mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-25 21:03:18 +00:00
Tessellation (#1528)
* shader_recompiler: Tessellation WIP * fix compiler errors after merge DONT MERGE set log file to /dev/null DONT MERGE linux pthread bb fix save work DONT MERGE dump ir save more work fix mistake with ES shader skip list add input patch control points dynamic state random stuff * WIP Tessellation partial implementation. Squash commits * test: make local/tcs use attr arrays * attr arrays in TCS/TES * dont define empty attr arrays * switch to special opcodes for tess tcs/tes reads and tcs writes * impl tcs/tes read attr insts * rebase fix * save some work * save work probably broken and slow * put Vertex LogicalStage after TCS and TES to fix bindings * more refactors * refactor pattern matching and optimize modulos (disabled) * enable modulo opt * copyright * rebase fixes * remove some prints * remove some stuff * Add TCS/TES support for shader patching and use LogicalStage * refactor and handle wider DS instructions * get rid of GetAttributes for special tess constants reads. Immediately replace some upon seeing readconstbuffer. Gets rid of some extra passes over IR * stop relying on GNMX HsConstants struct. Change runtime_info.hs_info and some regs * delete some more stuff * update comments for current implementation * some cleanup * uint error * more cleanup * remove patch control points dynamic state (because runtime_info already depends on it) * fix potential problem with determining passthrough --------- Co-authored-by: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com>
This commit is contained in:
parent
3e22622508
commit
3c0c921ef5
54 changed files with 2146 additions and 189 deletions
|
@ -104,6 +104,8 @@ std::string NameOf(Attribute attribute) {
|
|||
return "VertexId";
|
||||
case Attribute::InstanceId:
|
||||
return "InstanceId";
|
||||
case Attribute::PrimitiveId:
|
||||
return "PrimitiveId";
|
||||
case Attribute::FragCoord:
|
||||
return "FragCoord";
|
||||
case Attribute::IsFrontFace:
|
||||
|
@ -114,6 +116,16 @@ std::string NameOf(Attribute attribute) {
|
|||
return "LocalInvocationId";
|
||||
case Attribute::LocalInvocationIndex:
|
||||
return "LocalInvocationIndex";
|
||||
case Attribute::InvocationId:
|
||||
return "InvocationId";
|
||||
case Attribute::PatchVertices:
|
||||
return "PatchVertices";
|
||||
case Attribute::TessellationEvaluationPointU:
|
||||
return "TessellationEvaluationPointU";
|
||||
case Attribute::TessellationEvaluationPointV:
|
||||
return "TessellationEvaluationPointV";
|
||||
case Attribute::PackedHullInvocationInfo:
|
||||
return "PackedHullInvocationInfo";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -72,8 +72,13 @@ enum class Attribute : u64 {
|
|||
LocalInvocationId = 75,
|
||||
LocalInvocationIndex = 76,
|
||||
FragCoord = 77,
|
||||
InstanceId0 = 78, // step rate 0
|
||||
InstanceId1 = 79, // step rate 1
|
||||
InstanceId0 = 78, // step rate 0
|
||||
InstanceId1 = 79, // step rate 1
|
||||
InvocationId = 80, // TCS id in output patch and instanced geometry shader id
|
||||
PatchVertices = 81,
|
||||
TessellationEvaluationPointU = 82,
|
||||
TessellationEvaluationPointV = 83,
|
||||
PackedHullInvocationInfo = 84, // contains patch id within the VGT and invocation ID
|
||||
Max,
|
||||
};
|
||||
|
||||
|
@ -85,6 +90,11 @@ constexpr bool IsPosition(Attribute attribute) noexcept {
|
|||
return attribute >= Attribute::Position0 && attribute <= Attribute::Position3;
|
||||
}
|
||||
|
||||
constexpr bool IsTessCoord(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::TessellationEvaluationPointU &&
|
||||
attribute <= Attribute::TessellationEvaluationPointV;
|
||||
}
|
||||
|
||||
constexpr bool IsParam(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
|
||||
}
|
||||
|
|
|
@ -94,6 +94,8 @@ static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size
|
|||
return fmt::format("{}", arg.VectorReg());
|
||||
case Type::Attribute:
|
||||
return fmt::format("{}", arg.Attribute());
|
||||
case Type::Patch:
|
||||
return fmt::format("{}", arg.Patch());
|
||||
default:
|
||||
return "<unknown immediate type>";
|
||||
}
|
||||
|
|
|
@ -266,8 +266,8 @@ void IREmitter::SetM0(const U32& value) {
|
|||
Inst(Opcode::SetM0, value);
|
||||
}
|
||||
|
||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
|
||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), index);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
|
||||
|
@ -278,6 +278,24 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
|
|||
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
|
||||
}
|
||||
|
||||
F32 IREmitter::GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index,
|
||||
const U32& comp_index) {
|
||||
return Inst<F32>(IR::Opcode::GetTessGenericAttribute, vertex_index, attr_index, comp_index);
|
||||
}
|
||||
|
||||
void IREmitter::SetTcsGenericAttribute(const F32& value, const U32& attr_index,
|
||||
const U32& comp_index) {
|
||||
Inst(Opcode::SetTcsGenericAttribute, value, attr_index, comp_index);
|
||||
}
|
||||
|
||||
F32 IREmitter::GetPatch(Patch patch) {
|
||||
return Inst<F32>(Opcode::GetPatch, patch);
|
||||
}
|
||||
|
||||
void IREmitter::SetPatch(Patch patch, const F32& value) {
|
||||
Inst(Opcode::SetPatch, patch, value);
|
||||
}
|
||||
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 32:
|
||||
|
@ -552,6 +570,19 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
|
|||
}
|
||||
}
|
||||
|
||||
Value IREmitter::CompositeConstruct(std::span<const Value> elements) {
|
||||
switch (elements.size()) {
|
||||
case 2:
|
||||
return CompositeConstruct(elements[0], elements[1]);
|
||||
case 3:
|
||||
return CompositeConstruct(elements[0], elements[1], elements[2]);
|
||||
case 4:
|
||||
return CompositeConstruct(elements[0], elements[1], elements[2], elements[3]);
|
||||
default:
|
||||
UNREACHABLE_MSG("Composite construct with greater than 4 elements");
|
||||
}
|
||||
}
|
||||
|
||||
Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
|
||||
const auto read{[&](Opcode opcode, size_t limit) -> Value {
|
||||
if (element >= limit) {
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/condition.h"
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
@ -80,10 +81,18 @@ public:
|
|||
|
||||
[[nodiscard]] U1 Condition(IR::Condition cond);
|
||||
|
||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
|
||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0,
|
||||
IR::Value index = IR::Value(u32(0u)));
|
||||
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
||||
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
||||
|
||||
[[nodiscard]] F32 GetTessGenericAttribute(const U32& vertex_index, const U32& attr_index,
|
||||
const U32& comp_index);
|
||||
void SetTcsGenericAttribute(const F32& value, const U32& attr_index, const U32& comp_index);
|
||||
|
||||
[[nodiscard]] F32 GetPatch(Patch patch);
|
||||
void SetPatch(Patch patch, const F32& value);
|
||||
|
||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||
|
||||
|
@ -138,6 +147,8 @@ public:
|
|||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
|
||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
|
||||
const Value& e4);
|
||||
[[nodiscard]] Value CompositeConstruct(std::span<const Value> values);
|
||||
|
||||
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
|
||||
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
|
||||
|
||||
|
@ -335,6 +346,7 @@ private:
|
|||
template <typename T = Value, typename... Args>
|
||||
T Inst(Opcode op, Args... args) {
|
||||
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
|
||||
it->SetParent(block);
|
||||
return T{Value{&*it}};
|
||||
}
|
||||
|
||||
|
@ -352,6 +364,7 @@ private:
|
|||
u32 raw_flags{};
|
||||
std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
|
||||
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
|
||||
it->SetParent(block);
|
||||
return T{Value{&*it}};
|
||||
}
|
||||
};
|
||||
|
|
|
@ -52,6 +52,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::Discard:
|
||||
case Opcode::DiscardCond:
|
||||
case Opcode::SetAttribute:
|
||||
case Opcode::SetTcsGenericAttribute:
|
||||
case Opcode::SetPatch:
|
||||
case Opcode::StoreBufferU32:
|
||||
case Opcode::StoreBufferU32x2:
|
||||
case Opcode::StoreBufferU32x3:
|
||||
|
|
|
@ -30,7 +30,7 @@ constexpr Type Opaque{Type::Opaque};
|
|||
constexpr Type ScalarReg{Type::ScalarReg};
|
||||
constexpr Type VectorReg{Type::VectorReg};
|
||||
constexpr Type Attribute{Type::Attribute};
|
||||
constexpr Type SystemValue{Type::SystemValue};
|
||||
constexpr Type Patch{Type::Patch};
|
||||
constexpr Type U1{Type::U1};
|
||||
constexpr Type U8{Type::U8};
|
||||
constexpr Type U16{Type::U16};
|
||||
|
|
|
@ -60,6 +60,10 @@ OPCODE(SetGotoVariable, Void, U32,
|
|||
OPCODE(GetAttribute, F32, Attribute, U32, U32, )
|
||||
OPCODE(GetAttributeU32, U32, Attribute, U32, )
|
||||
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
|
||||
OPCODE(GetPatch, F32, Patch, )
|
||||
OPCODE(SetPatch, Void, Patch, F32, )
|
||||
OPCODE(GetTessGenericAttribute, F32, U32, U32, U32, )
|
||||
OPCODE(SetTcsGenericAttribute, Void, F32, U32, U32, )
|
||||
|
||||
// Flags
|
||||
OPCODE(GetScc, U1, Void, )
|
||||
|
|
|
@ -216,6 +216,18 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void FoldMul(IR::Block& block, IR::Inst& inst) {
|
||||
if (!FoldCommutative<T>(inst, [](T a, T b) { return a * b; })) {
|
||||
return;
|
||||
}
|
||||
const IR::Value rhs{inst.Arg(1)};
|
||||
if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(0u));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void FoldCmpClass(IR::Block& block, IR::Inst& inst) {
|
||||
ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation");
|
||||
const auto class_mask = static_cast<IR::FloatClassFunc>(inst.Arg(1).U32());
|
||||
|
@ -292,7 +304,19 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
FoldWhenAllImmediates(inst, [](u32 a) { return static_cast<float>(a); });
|
||||
return;
|
||||
case IR::Opcode::IMul32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
|
||||
FoldMul<u32>(block, inst);
|
||||
return;
|
||||
case IR::Opcode::UDiv32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) {
|
||||
ASSERT_MSG(b != 0, "Folding UDiv32 with divisor 0");
|
||||
return a / b;
|
||||
});
|
||||
return;
|
||||
case IR::Opcode::UMod32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) {
|
||||
ASSERT_MSG(b != 0, "Folding UMod32 with modulo 0");
|
||||
return a % b;
|
||||
});
|
||||
return;
|
||||
case IR::Opcode::FPCmpClass32:
|
||||
FoldCmpClass(block, inst);
|
||||
|
|
4
src/shader_recompiler/ir/passes/constant_propogation.h
Normal file
4
src/shader_recompiler/ir/passes/constant_propogation.h
Normal file
|
@ -0,0 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
744
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
Normal file
744
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
Normal file
|
@ -0,0 +1,744 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/pattern_matching.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
/**
|
||||
* Tessellation shaders pass outputs to the next shader using LDS.
|
||||
* The Hull shader stage receives input control points stored in LDS.
|
||||
*
|
||||
* These passes attempt to resolve LDS accesses to attribute accesses and correctly
|
||||
* write to the tessellation factor tables.
|
||||
*
|
||||
* The LDS layout is:
|
||||
* - TCS inputs for patch 0
|
||||
* - TCS inputs for patch 1
|
||||
* - TCS inputs for patch 2
|
||||
* - ...
|
||||
* - TCS outputs for patch 0
|
||||
* - TCS outputs for patch 1
|
||||
* - TCS outputs for patch 2
|
||||
* - ...
|
||||
* - PatchConst TCS outputs for patch 0
|
||||
* - PatchConst TCS outputs for patch 1
|
||||
* - PatchConst TCS outputs for patch 2
|
||||
*
|
||||
*
|
||||
* If the Hull stage does not write any new control points the driver will
|
||||
* optimize LDS layout so input and output control point spaces overlap.
|
||||
* (Passthrough)
|
||||
*
|
||||
* The gnm driver requires a V# holding special constants to be bound
|
||||
* for reads by the shader.
|
||||
* The Hull and Domain shaders read values from this buffer which
|
||||
* contain size and offset information required to address input, output,
|
||||
* or PatchConst attributes within the current patch.
|
||||
* See the TessellationDataConstantBuffer struct to see the layout of this V#.
|
||||
*
|
||||
* Tessellation factors are stored to a special tessellation factor V# that is automatically bound
|
||||
* by the driver. This is the input to the fixed function tessellator that actually subdivides the
|
||||
* domain. We translate these to writes to SPIR-V builtins for tessellation factors in the Hull
|
||||
* shader.
|
||||
* The offset into the tess factor buffer determines which factor the shader is writing.
|
||||
* Additionally, most hull shaders seem to redundantly write tess factors to PatchConst
|
||||
* attributes, even if dead in the domain shader. We just treat these as generic PatchConst writes.
|
||||
*
|
||||
* LDS reads in the Hull shader can be from input control points, and in the the Domain shader can
|
||||
* be hs output control points (output from the perspective of the Hull shader) and patchconst
|
||||
* values.
|
||||
* LDS stores in the Hull shader can either be output control point writes or per-patch
|
||||
* (PatchConst) data writes. The Domain shader exports attributes using EXP instructions, unless its
|
||||
* followed by the geometry stage (but we havent seen this yet), so nothing special there.
|
||||
* The address calculations can vary significantly and can't be easily pattern matched. We are at
|
||||
* the mercy of instruction selection the ps4 compiler wanted to use.
|
||||
* Generally though, they could look something like this:
|
||||
* Input control point:
|
||||
* addr = PatchIdInVgt * input_cp_stride * #input_cp_per_patch + index * input_cp_stride
|
||||
* + attr# * 16 + component
|
||||
* Output control point:
|
||||
* addr = #patches * input_cp_stride * #input_cp_per_patch
|
||||
* + PatchIdInVgt * output_patch_stride + InvocationID * output_cp_stride
|
||||
+ attr# * 16 + component
|
||||
* Per patch output:
|
||||
* addr = #patches * input_cp_stride * #cp_per_input_patch
|
||||
* + #patches * output_patch_stride
|
||||
* + PatchIdInVgt * per_patch_output_stride + attr# * 16 + component
|
||||
*
|
||||
* output_patch_stride and output_cp_stride are usually compile time constants in the gcn
|
||||
*
|
||||
* Hull shaders can probably also read output control points corresponding to other threads, like
|
||||
* shared memory (but we havent seen this yet).
|
||||
* ^ This is an UNREACHABLE for now. We may need to insert additional barriers if this happens.
|
||||
* They should also be able to read PatchConst values,
|
||||
* although not sure if this happens in practice.
|
||||
*
|
||||
* To determine which type of attribute (input, output, patchconst) we the check the users of
|
||||
* TessConstants V# reads to deduce which type of attribute a given load/store to LDS
|
||||
* is touching.
|
||||
*
|
||||
* In the Hull shader, both the PatchId within the VGT group (PatchIdInVgt) and the output control
|
||||
* point id (InvocationId) are packed in VGPR1 by the driver like
|
||||
* V1 = InvocationId << 8 | PatchIdInVgt
|
||||
* The shader typically uses V_BFE_(U|S)32 to extract them. We use the starting bit_pos to determine
|
||||
* which is which.
|
||||
*
|
||||
* This pass does not attempt to deduce the exact attribute referenced in a LDS load/store.
|
||||
* Instead, it feeds the address in the LDS load/store to the get/set Insts we use for TCS in/out's,
|
||||
* TES in's, and PatchConst in/out's.
|
||||
*
|
||||
* TCS/TES Input attributes:
|
||||
* We define input attributes using an array in the shader roughly like this:
|
||||
* // equivalent GLSL in TCS
|
||||
* layout (location = 0) in vec4 in_attrs[][NUM_INPUT_ATTRIBUTES];
|
||||
*
|
||||
* Here the NUM_INPUT_ATTRIBUTES is derived from the ls_stride member of the TessConstants V#.
|
||||
* We divide ls_stride (in bytes) by 16 to get the number of vec4 attributes.
|
||||
* For TES, the number of attributes comes from hs_cp_stride / 16.
|
||||
* The first (outer) dimension is unsized but corresponds to the number of vertices in the hs input
|
||||
* patch (for Hull) or the hs output patch (for Domain).
|
||||
*
|
||||
* For input reads in TCS or TES, we emit SPIR-V like:
|
||||
* float value = in_attrs[addr / ls_stride][(addr % ls_stride) >> 4][(addr & 0xF) >> 2];
|
||||
*
|
||||
* For output writes, we assume the control point index is InvocationId, since high level languages
|
||||
* impose that restriction (although maybe it's technically possible on hardware). So SPIR-V looks
|
||||
* like this:
|
||||
* layout (location = 0) in vec4 in_attrs[][NUM_OUTPUT_ATTRIBUTES];
|
||||
* out_attrs[InvocationId][(addr % hs_cp_stride) >> 4][(addr & 0xF) >> 2] = value;
|
||||
*
|
||||
* NUM_OUTPUT_ATTRIBUTES is derived by hs_cp_stride / 16, so it can link with the TES in_attrs
|
||||
* variable.
|
||||
*
|
||||
* Another challenge is the fact that the GCN shader needs to address attributes from LDS as a whole
|
||||
* which contains the attributes from many patches. On the other hand, higher level shading
|
||||
* languages restrict attribute access to the patch of the current thread, which is naturally a
|
||||
* restriction in SPIR-V also.
|
||||
* The addresses the ps4 compiler generates for loads/stores and the fact that LDS holds many
|
||||
* patches' attributes are just implementation details of the ps4 driver/compiler. To deal with
|
||||
* this, we can replace certain TessConstant V# reads with 0, which only contribute to the base
|
||||
* address of the current patch's attributes in LDS and not the indexes within the local patch.
|
||||
*
|
||||
* (A perfect implementation might need emulation of the VGTs in mesh/compute, loading/storing
|
||||
* attributes to buffers and not caring about whether they are hs input, hs output, or patchconst
|
||||
* attributes)
|
||||
*
|
||||
*/
|
||||
|
||||
namespace {
|
||||
|
||||
using namespace Shader::Optimiation::PatternMatching;
|
||||
|
||||
static void InitTessConstants(IR::ScalarReg sharp_ptr_base, s32 sharp_dword_offset,
|
||||
Shader::Info& info, Shader::RuntimeInfo& runtime_info,
|
||||
TessellationDataConstantBuffer& tess_constants) {
|
||||
info.tess_consts_ptr_base = sharp_ptr_base;
|
||||
info.tess_consts_dword_offset = sharp_dword_offset;
|
||||
info.ReadTessConstantBuffer(tess_constants);
|
||||
if (info.l_stage == LogicalStage::TessellationControl) {
|
||||
runtime_info.hs_info.InitFromTessConstants(tess_constants);
|
||||
} else {
|
||||
runtime_info.vs_info.InitFromTessConstants(tess_constants);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
struct TessSharpLocation {
|
||||
IR::ScalarReg ptr_base;
|
||||
u32 dword_off;
|
||||
};
|
||||
|
||||
std::optional<TessSharpLocation> FindTessConstantSharp(IR::Inst* read_const_buffer) {
|
||||
IR::Value sharp_ptr_base;
|
||||
IR::Value sharp_dword_offset;
|
||||
|
||||
IR::Value rv = IR::Value{read_const_buffer};
|
||||
IR::Value handle = read_const_buffer->Arg(0);
|
||||
|
||||
if (M_COMPOSITECONSTRUCTU32X4(M_GETUSERDATA(MatchImm(sharp_dword_offset)), MatchIgnore(),
|
||||
MatchIgnore(), MatchIgnore())
|
||||
.Match(handle)) {
|
||||
return TessSharpLocation{.ptr_base = IR::ScalarReg::Max,
|
||||
.dword_off = static_cast<u32>(sharp_dword_offset.ScalarReg())};
|
||||
} else if (M_COMPOSITECONSTRUCTU32X4(
|
||||
M_READCONST(M_COMPOSITECONSTRUCTU32X2(M_GETUSERDATA(MatchImm(sharp_ptr_base)),
|
||||
MatchIgnore()),
|
||||
MatchImm(sharp_dword_offset)),
|
||||
MatchIgnore(), MatchIgnore(), MatchIgnore())
|
||||
.Match(handle)) {
|
||||
return TessSharpLocation{.ptr_base = sharp_ptr_base.ScalarReg(),
|
||||
.dword_off = sharp_dword_offset.U32()};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
// Walker that helps deduce what type of attribute a DS instruction is reading
|
||||
// or writing, which could be an input control point, output control point,
|
||||
// or per-patch constant (PatchConst).
|
||||
// For certain ReadConstBuffer instructions using the tess constants V#,, we visit the users
|
||||
// recursively and increment a counter on the Load/WriteShared users.
|
||||
// Namely NumPatch (from m_hsNumPatch), HsOutputBase (m_hsOutputBase),
|
||||
// and PatchConstBase (m_patchConstBase).
|
||||
// In addr calculations, the term NumPatch * ls_stride * #input_cp_in_patch
|
||||
// is used as an addend to skip the region for input control points, and similarly
|
||||
// NumPatch * hs_cp_stride * #output_cp_in_patch is used to skip the region
|
||||
// for output control points.
|
||||
//
|
||||
// TODO: this will break if AMD compiler used distributive property like
|
||||
// TcsNumPatches * (ls_stride * #input_cp_in_patch + hs_cp_stride * #output_cp_in_patch)
|
||||
class TessConstantUseWalker {
|
||||
public:
|
||||
void MarkTessAttributeUsers(IR::Inst* read_const_buffer, TessConstantAttribute attr) {
|
||||
u32 inc;
|
||||
switch (attr) {
|
||||
case TessConstantAttribute::HsNumPatch:
|
||||
case TessConstantAttribute::HsOutputBase:
|
||||
inc = 1;
|
||||
break;
|
||||
case TessConstantAttribute::PatchConstBase:
|
||||
inc = 2;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
for (IR::Use use : read_const_buffer->Uses()) {
|
||||
MarkTessAttributeUsersHelper(use, inc);
|
||||
}
|
||||
|
||||
++seq_num;
|
||||
}
|
||||
|
||||
private:
|
||||
void MarkTessAttributeUsersHelper(IR::Use use, u32 inc) {
|
||||
IR::Inst* inst = use.user;
|
||||
|
||||
switch (use.user->GetOpcode()) {
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::LoadSharedU128:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::WriteSharedU128: {
|
||||
u32 counter = inst->Flags<u32>();
|
||||
inst->SetFlags<u32>(counter + inc);
|
||||
// Stop here
|
||||
return;
|
||||
}
|
||||
case IR::Opcode::Phi: {
|
||||
struct PhiCounter {
|
||||
u16 seq_num;
|
||||
u8 unique_edge;
|
||||
u8 counter;
|
||||
};
|
||||
|
||||
PhiCounter count = inst->Flags<PhiCounter>();
|
||||
ASSERT_MSG(count.counter == 0 || count.unique_edge == use.operand);
|
||||
// the point of seq_num is to tell us if we've already traversed this
|
||||
// phi on the current walk. Alternatively we could keep a set of phi's
|
||||
// seen on the current walk. This is to handle phi cycles
|
||||
if (count.seq_num == 0) {
|
||||
// First time we've encountered this phi
|
||||
count.seq_num = seq_num;
|
||||
// Mark the phi as having been traversed originally through this edge
|
||||
count.unique_edge = use.operand;
|
||||
count.counter = inc;
|
||||
} else if (count.seq_num < seq_num) {
|
||||
count.seq_num = seq_num;
|
||||
// For now, assume we are visiting this phi via the same edge
|
||||
// as on other walks. If not, some dataflow analysis might be necessary
|
||||
ASSERT(count.unique_edge == use.operand);
|
||||
count.counter += inc;
|
||||
} else {
|
||||
// count.seq_num == seq_num
|
||||
// there's a cycle, and we've already been here on this walk
|
||||
return;
|
||||
}
|
||||
inst->SetFlags<PhiCounter>(count);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
for (IR::Use use : inst->Uses()) {
|
||||
MarkTessAttributeUsersHelper(use, inc);
|
||||
}
|
||||
}
|
||||
|
||||
u32 seq_num{1u};
|
||||
};
|
||||
|
||||
enum class AttributeRegion : u32 { InputCP, OutputCP, PatchConst };
|
||||
|
||||
static AttributeRegion GetAttributeRegionKind(IR::Inst* ring_access, const Shader::Info& info,
|
||||
const Shader::RuntimeInfo& runtime_info) {
|
||||
u32 count = ring_access->Flags<u32>();
|
||||
if (count == 0) {
|
||||
return AttributeRegion::InputCP;
|
||||
} else if (info.l_stage == LogicalStage::TessellationControl &&
|
||||
runtime_info.hs_info.IsPassthrough()) {
|
||||
ASSERT(count <= 1);
|
||||
return AttributeRegion::PatchConst;
|
||||
} else {
|
||||
ASSERT(count <= 2);
|
||||
return AttributeRegion(count);
|
||||
}
|
||||
}
|
||||
|
||||
static bool IsDivisibleByStride(IR::Value term, u32 stride) {
|
||||
IR::Value a, b;
|
||||
if (MatchU32(stride).Match(term)) {
|
||||
return true;
|
||||
} else if (M_BITFIELDUEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term) ||
|
||||
M_BITFIELDSEXTRACT(MatchValue(a), MatchU32(0), MatchU32(24)).Match(term)) {
|
||||
return IsDivisibleByStride(a, stride);
|
||||
} else if (M_IMUL32(MatchValue(a), MatchValue(b)).Match(term)) {
|
||||
return IsDivisibleByStride(a, stride) || IsDivisibleByStride(b, stride);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return true if we can eliminate any addends
|
||||
static bool TryOptimizeAddendInModulo(IR::Value addend, u32 stride, std::vector<IR::U32>& addends) {
|
||||
IR::Value a, b;
|
||||
if (M_IADD32(MatchValue(a), MatchValue(b)).Match(addend)) {
|
||||
bool ret = false;
|
||||
ret = TryOptimizeAddendInModulo(a, stride, addends);
|
||||
ret |= TryOptimizeAddendInModulo(b, stride, addends);
|
||||
return ret;
|
||||
} else if (!IsDivisibleByStride(addend, stride)) {
|
||||
addends.push_back(IR::U32{addend});
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// In calculation (a + b + ...) % stride
|
||||
// Use this fact
|
||||
// (a + b) mod N = (a mod N + b mod N) mod N
|
||||
// If any addend is divisible by stride, then we can replace it with 0 in the attribute
|
||||
// or component index calculation
|
||||
static IR::U32 TryOptimizeAddressModulo(IR::U32 addr, u32 stride, IR::IREmitter& ir) {
|
||||
std::vector<IR::U32> addends;
|
||||
if (TryOptimizeAddendInModulo(addr, stride, addends)) {
|
||||
addr = ir.Imm32(0);
|
||||
for (auto& addend : addends) {
|
||||
addr = ir.IAdd(addr, addend);
|
||||
}
|
||||
}
|
||||
return addr;
|
||||
}
|
||||
|
||||
// TODO: can optimize div in control point index similarly to mod
|
||||
|
||||
// Read a TCS input (InputCP region) or TES input (OutputCP region)
|
||||
static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmitter& ir,
|
||||
u32 off_dw) {
|
||||
if (off_dw > 0) {
|
||||
addr = ir.IAdd(addr, ir.Imm32(off_dw));
|
||||
}
|
||||
const IR::U32 control_point_index = ir.IDiv(addr, ir.Imm32(stride));
|
||||
const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
|
||||
const IR::U32 attr_index =
|
||||
ir.ShiftRightLogical(ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
|
||||
const IR::U32 comp_index =
|
||||
ir.ShiftRightLogical(ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
|
||||
return ir.GetTessGenericAttribute(control_point_index, attr_index, comp_index);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
const Info& info = program.info;
|
||||
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::StoreBufferU32x2:
|
||||
case IR::Opcode::StoreBufferU32x3:
|
||||
case IR::Opcode::StoreBufferU32x4: {
|
||||
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto GetValue = [&](IR::Value data) -> IR::F32 {
|
||||
if (auto* inst = data.TryInstRecursive();
|
||||
inst && inst->GetOpcode() == IR::Opcode::BitCastU32F32) {
|
||||
return IR::F32{inst->Arg(0)};
|
||||
}
|
||||
return ir.BitCast<IR::F32, IR::U32>(IR::U32{data});
|
||||
};
|
||||
const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1;
|
||||
IR::U32 index = IR::U32{inst.Arg(1)};
|
||||
ASSERT(index.IsImmediate());
|
||||
const u32 gcn_factor_idx = (info.inst_offset.Value() + index.U32()) >> 2;
|
||||
|
||||
const IR::Value data = inst.Arg(2);
|
||||
auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch {
|
||||
// The hull outputs tess factors in different formats depending on the shader.
|
||||
// For triangle domains, it seems to pack the entries into 4 consecutive floats,
|
||||
// with the 3 edge factors followed by the 1 interior factor.
|
||||
// For quads, it does 4 edge factors then 2 interior.
|
||||
// There is a tess factor stride member of the GNMX hull constants struct in
|
||||
// a hull program shader binary archive, but this doesn't seem to be
|
||||
// communicated to the driver.
|
||||
// The layout seems to be implied by the type of the abstract domain.
|
||||
switch (runtime_info.hs_info.tess_type) {
|
||||
case AmdGpu::TessellationType::Quad:
|
||||
ASSERT(gcn_factor_idx < 6);
|
||||
return IR::PatchFactor(gcn_factor_idx);
|
||||
case AmdGpu::TessellationType::Triangle:
|
||||
ASSERT(gcn_factor_idx < 4);
|
||||
if (gcn_factor_idx == 3) {
|
||||
return IR::Patch::TessellationLodInteriorU;
|
||||
}
|
||||
return IR::PatchFactor(gcn_factor_idx);
|
||||
default:
|
||||
// Point domain types haven't been seen so far
|
||||
UNREACHABLE_MSG("Unhandled tess type");
|
||||
}
|
||||
};
|
||||
|
||||
inst.Invalidate();
|
||||
if (num_dwords == 1) {
|
||||
ir.SetPatch(get_factor_attr(gcn_factor_idx), GetValue(data));
|
||||
break;
|
||||
}
|
||||
auto* inst = data.TryInstRecursive();
|
||||
ASSERT(inst && (inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
|
||||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x3 ||
|
||||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x4));
|
||||
for (s32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetPatch(get_factor_attr(gcn_factor_idx + i), GetValue(inst->Arg(i)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::WriteSharedU128: {
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32
|
||||
? 1
|
||||
: (opcode == IR::Opcode::WriteSharedU64 ? 2 : 4);
|
||||
const IR::U32 addr{inst.Arg(0)};
|
||||
const IR::U32 data{inst.Arg(1).Resolve()};
|
||||
|
||||
const auto SetOutput = [&](IR::U32 addr, IR::U32 value, AttributeRegion output_kind,
|
||||
u32 off_dw) {
|
||||
const IR::F32 data_component = ir.BitCast<IR::F32, IR::U32>(value);
|
||||
|
||||
if (output_kind == AttributeRegion::OutputCP) {
|
||||
if (off_dw > 0) {
|
||||
addr = ir.IAdd(addr, ir.Imm32(off_dw));
|
||||
}
|
||||
u32 stride = runtime_info.hs_info.hs_output_cp_stride;
|
||||
// Invocation ID array index is implicit, handled by SPIRV backend
|
||||
const IR::U32 addr_for_attrs = TryOptimizeAddressModulo(addr, stride, ir);
|
||||
const IR::U32 attr_index = ir.ShiftRightLogical(
|
||||
ir.IMod(addr_for_attrs, ir.Imm32(stride)), ir.Imm32(4u));
|
||||
const IR::U32 comp_index = ir.ShiftRightLogical(
|
||||
ir.BitwiseAnd(addr_for_attrs, ir.Imm32(0xFU)), ir.Imm32(2u));
|
||||
ir.SetTcsGenericAttribute(data_component, attr_index, comp_index);
|
||||
} else {
|
||||
ASSERT(output_kind == AttributeRegion::PatchConst);
|
||||
ASSERT_MSG(addr.IsImmediate(), "patch addr non imm, inst {}",
|
||||
fmt::ptr(addr.Inst()));
|
||||
ir.SetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw), data_component);
|
||||
}
|
||||
};
|
||||
|
||||
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||
if (num_dwords == 1) {
|
||||
SetOutput(addr, data, region, 0);
|
||||
} else {
|
||||
for (auto i = 0; i < num_dwords; i++) {
|
||||
SetOutput(addr, IR::U32{data.Inst()->Arg(i)}, region, i);
|
||||
}
|
||||
}
|
||||
inst.Invalidate();
|
||||
break;
|
||||
}
|
||||
|
||||
case IR::Opcode::LoadSharedU32: {
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::LoadSharedU128:
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::U32 addr{inst.Arg(0)};
|
||||
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
|
||||
? 1
|
||||
: (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
|
||||
ASSERT_MSG(region == AttributeRegion::InputCP,
|
||||
"Unhandled read of output or patchconst attribute in hull shader");
|
||||
IR::Value attr_read;
|
||||
if (num_dwords == 1) {
|
||||
attr_read = ir.BitCast<IR::U32>(
|
||||
ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, 0));
|
||||
} else {
|
||||
boost::container::static_vector<IR::Value, 4> read_components;
|
||||
for (auto i = 0; i < num_dwords; i++) {
|
||||
const IR::F32 component =
|
||||
ReadTessInputComponent(addr, runtime_info.hs_info.ls_stride, ir, i);
|
||||
read_components.push_back(ir.BitCast<IR::U32>(component));
|
||||
}
|
||||
attr_read = ir.CompositeConstruct(read_components);
|
||||
}
|
||||
inst.ReplaceUsesWithAndRemove(attr_read);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (runtime_info.hs_info.IsPassthrough()) {
|
||||
// Copy input attributes to output attributes, indexed by InvocationID
|
||||
// Passthrough should imply that input and output patches have same number of vertices
|
||||
IR::Block* entry_block = *program.blocks.begin();
|
||||
auto it = std::ranges::find_if(entry_block->Instructions(), [](IR::Inst& inst) {
|
||||
return inst.GetOpcode() == IR::Opcode::Prologue;
|
||||
});
|
||||
ASSERT(it != entry_block->end());
|
||||
++it;
|
||||
ASSERT(it != entry_block->end());
|
||||
++it;
|
||||
// Prologue
|
||||
// SetExec #true
|
||||
// <- insert here
|
||||
// ...
|
||||
IR::IREmitter ir{*entry_block, it};
|
||||
|
||||
ASSERT(runtime_info.hs_info.ls_stride % 16 == 0);
|
||||
u32 num_attributes = runtime_info.hs_info.ls_stride / 16;
|
||||
const auto invocation_id = ir.GetAttributeU32(IR::Attribute::InvocationId);
|
||||
for (u32 attr_no = 0; attr_no < num_attributes; attr_no++) {
|
||||
for (u32 comp = 0; comp < 4; comp++) {
|
||||
IR::F32 attr_read =
|
||||
ir.GetTessGenericAttribute(invocation_id, ir.Imm32(attr_no), ir.Imm32(comp));
|
||||
// InvocationId is implicit index for output control point writes
|
||||
ir.SetTcsGenericAttribute(attr_read, ir.Imm32(attr_no), ir.Imm32(comp));
|
||||
}
|
||||
}
|
||||
// We could wrap the rest of the program in an if stmt
|
||||
// CopyInputAttrsToOutputs(); // psuedocode
|
||||
// if (InvocationId == 0) {
|
||||
// PatchConstFunction();
|
||||
// }
|
||||
// But as long as we treat invocation ID as 0 for all threads, shouldn't matter functionally
|
||||
}
|
||||
}
|
||||
|
||||
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
Info& info = program.info;
|
||||
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadSharedU32: {
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::LoadSharedU128:
|
||||
const IR::U32 addr{inst.Arg(0)};
|
||||
AttributeRegion region = GetAttributeRegionKind(&inst, info, runtime_info);
|
||||
const u32 num_dwords = opcode == IR::Opcode::LoadSharedU32
|
||||
? 1
|
||||
: (opcode == IR::Opcode::LoadSharedU64 ? 2 : 4);
|
||||
const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 {
|
||||
if (region == AttributeRegion::OutputCP) {
|
||||
return ReadTessInputComponent(
|
||||
addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw);
|
||||
} else {
|
||||
ASSERT(region == AttributeRegion::PatchConst);
|
||||
return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw));
|
||||
}
|
||||
};
|
||||
IR::Value attr_read;
|
||||
if (num_dwords == 1) {
|
||||
attr_read = ir.BitCast<IR::U32>(GetInput(addr, 0));
|
||||
} else {
|
||||
boost::container::static_vector<IR::Value, 4> read_components;
|
||||
for (auto i = 0; i < num_dwords; i++) {
|
||||
const IR::F32 component = GetInput(addr, i);
|
||||
read_components.push_back(ir.BitCast<IR::U32>(component));
|
||||
}
|
||||
attr_read = ir.CompositeConstruct(read_components);
|
||||
}
|
||||
inst.ReplaceUsesWithAndRemove(attr_read);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run before either hull or domain transform
|
||||
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) {
|
||||
TessellationDataConstantBuffer tess_constants;
|
||||
Shader::Info& info = program.info;
|
||||
// Find the TessellationDataConstantBuffer V#
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
auto found_tess_consts_sharp = [&]() -> bool {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::LoadSharedU128:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::WriteSharedU128: {
|
||||
IR::Value addr = inst.Arg(0);
|
||||
auto read_const_buffer = IR::BreadthFirstSearch(
|
||||
addr, [](IR::Inst* maybe_tess_const) -> std::optional<IR::Inst*> {
|
||||
if (maybe_tess_const->GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
return maybe_tess_const;
|
||||
}
|
||||
return std::nullopt;
|
||||
});
|
||||
if (read_const_buffer) {
|
||||
auto sharp_location = FindTessConstantSharp(read_const_buffer.value());
|
||||
if (sharp_location) {
|
||||
if (info.tess_consts_dword_offset >= 0) {
|
||||
// Its possible theres a readconstbuffer that contributes to an
|
||||
// LDS address and isnt a TessConstant V# read. Could improve on
|
||||
// this somehow
|
||||
ASSERT_MSG(static_cast<s32>(sharp_location->dword_off) ==
|
||||
info.tess_consts_dword_offset &&
|
||||
sharp_location->ptr_base ==
|
||||
info.tess_consts_ptr_base,
|
||||
"TessConstants V# is ambiguous");
|
||||
}
|
||||
InitTessConstants(sharp_location->ptr_base,
|
||||
static_cast<s32>(sharp_location->dword_off), info,
|
||||
runtime_info, tess_constants);
|
||||
return true;
|
||||
}
|
||||
UNREACHABLE_MSG("Failed to match tess constant sharp");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}();
|
||||
|
||||
if (found_tess_consts_sharp) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(info.tess_consts_dword_offset >= 0);
|
||||
|
||||
TessConstantUseWalker walker;
|
||||
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
auto sharp_location = FindTessConstantSharp(&inst);
|
||||
if (sharp_location && sharp_location->ptr_base == info.tess_consts_ptr_base &&
|
||||
sharp_location->dword_off == info.tess_consts_dword_offset) {
|
||||
// The shader is reading from the TessConstants V#
|
||||
IR::Value index = inst.Arg(1);
|
||||
|
||||
ASSERT_MSG(index.IsImmediate(),
|
||||
"Tessellation constant read with dynamic index");
|
||||
u32 off_dw = index.U32();
|
||||
ASSERT(off_dw <=
|
||||
static_cast<u32>(TessConstantAttribute::FirstEdgeTessFactorIndex));
|
||||
|
||||
auto tess_const_attr = static_cast<TessConstantAttribute>(off_dw);
|
||||
switch (tess_const_attr) {
|
||||
case TessConstantAttribute::LsStride:
|
||||
// If not, we may need to make this runtime state for TES
|
||||
ASSERT(info.l_stage == LogicalStage::TessellationControl);
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.ls_stride));
|
||||
break;
|
||||
case TessConstantAttribute::HsCpStride:
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.hs_cp_stride));
|
||||
break;
|
||||
case TessConstantAttribute::HsNumPatch:
|
||||
case TessConstantAttribute::HsOutputBase:
|
||||
case TessConstantAttribute::PatchConstBase:
|
||||
walker.MarkTessAttributeUsers(&inst, tess_const_attr);
|
||||
// We should be able to safely set these to 0 so that indexing happens only
|
||||
// within the local patch in the recompiled Vulkan shader. This assumes
|
||||
// these values only contribute to address calculations for in/out
|
||||
// attributes in the original gcn shader.
|
||||
// See the explanation for why we set V2 to 0 when emitting the prologue.
|
||||
inst.ReplaceUsesWithAndRemove(IR::Value(0u));
|
||||
break;
|
||||
case Shader::TessConstantAttribute::PatchConstSize:
|
||||
case Shader::TessConstantAttribute::PatchOutputSize:
|
||||
case Shader::TessConstantAttribute::OffChipTessellationFactorThreshold:
|
||||
case Shader::TessConstantAttribute::FirstEdgeTessFactorIndex:
|
||||
// May need to replace PatchConstSize and PatchOutputSize with 0
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Read past end of TessConstantsBuffer");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// These pattern matching are neccessary for now unless we support dynamic indexing of
|
||||
// PatchConst attributes and tess factors. PatchConst should be easy, turn those into a single
|
||||
// vec4 array like in/out attrs. Not sure about tess factors.
|
||||
if (info.l_stage == LogicalStage::TessellationControl) {
|
||||
// Replace the BFEs on V1 (packed with patch id within VGT and output cp id)
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (auto it = block->Instructions().begin(); it != block->Instructions().end(); it++) {
|
||||
IR::Inst& inst = *it;
|
||||
if (M_BITFIELDUEXTRACT(
|
||||
M_GETATTRIBUTEU32(MatchAttribute(IR::Attribute::PackedHullInvocationInfo),
|
||||
MatchIgnore()),
|
||||
MatchU32(0), MatchU32(8))
|
||||
.Match(IR::Value{&inst})) {
|
||||
IR::IREmitter emit(*block, it);
|
||||
// This is the patch id within the VGT, not the actual PrimitiveId
|
||||
// in the draw
|
||||
IR::Value replacement(0u);
|
||||
inst.ReplaceUsesWithAndRemove(replacement);
|
||||
} else if (M_BITFIELDUEXTRACT(
|
||||
M_GETATTRIBUTEU32(
|
||||
MatchAttribute(IR::Attribute::PackedHullInvocationInfo),
|
||||
MatchIgnore()),
|
||||
MatchU32(8), MatchU32(5))
|
||||
.Match(IR::Value{&inst})) {
|
||||
IR::IREmitter ir(*block, it);
|
||||
IR::Value replacement;
|
||||
if (runtime_info.hs_info.IsPassthrough()) {
|
||||
// Deal with annoying pattern in BB where InvocationID use makes no
|
||||
// sense (in addr calculation for patchconst or tess factor write)
|
||||
replacement = ir.Imm32(0);
|
||||
} else {
|
||||
replacement = ir.GetAttributeU32(IR::Attribute::InvocationId);
|
||||
}
|
||||
inst.ReplaceUsesWithAndRemove(replacement);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
|
@ -18,5 +18,8 @@ void CollectShaderInfoPass(IR::Program& program);
|
|||
void LowerSharedMemToRegisters(IR::Program& program);
|
||||
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
||||
Stage stage);
|
||||
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
|
@ -23,12 +25,45 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||
};
|
||||
|
||||
switch (stage) {
|
||||
case Stage::Local: {
|
||||
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::WriteSharedU32: {
|
||||
bool is_composite = opcode == IR::Opcode::WriteSharedU64;
|
||||
u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
|
||||
|
||||
u32 offset = 0;
|
||||
const auto* addr = inst.Arg(0).InstRecursive();
|
||||
if (addr->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
ASSERT(addr->Arg(1).IsImmediate());
|
||||
offset = addr->Arg(1).U32();
|
||||
}
|
||||
IR::Value data = inst.Arg(1).Resolve();
|
||||
for (s32 i = 0; i < num_components; i++) {
|
||||
const auto attrib = IR::Attribute::Param0 + (offset / 16);
|
||||
const auto comp = (offset / 4) % 4;
|
||||
const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data};
|
||||
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
|
||||
offset += 4;
|
||||
}
|
||||
inst.Invalidate();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
case Stage::Export: {
|
||||
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::StoreBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!info.system_coherent || !info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -61,12 +96,13 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::LoadBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!info.system_coherent || !info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
|
||||
const auto shl_inst = inst.Arg(1).TryInstRecursive();
|
||||
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
|
||||
const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2);
|
||||
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
|
||||
const auto bucket = offset.Resolve().U32() / 256u;
|
||||
const auto attrib = bucket < 4 ? IR::Attribute::Position0
|
||||
|
@ -80,7 +116,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||
break;
|
||||
}
|
||||
case IR::Opcode::StoreBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
const auto buffer_info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!buffer_info.system_coherent || !buffer_info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,22 @@ void Visit(Info& info, IR::Inst& inst) {
|
|||
case IR::Opcode::GetUserData:
|
||||
info.ud_mask.Set(inst.Arg(0).ScalarReg());
|
||||
break;
|
||||
case IR::Opcode::SetPatch: {
|
||||
const auto patch = inst.Arg(0).Patch();
|
||||
if (patch <= IR::Patch::TessellationLodBottom) {
|
||||
info.stores_tess_level_outer = true;
|
||||
} else if (patch <= IR::Patch::TessellationLodInteriorV) {
|
||||
info.stores_tess_level_inner = true;
|
||||
} else {
|
||||
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::GetPatch: {
|
||||
const auto patch = inst.Arg(0).Patch();
|
||||
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
|
|
28
src/shader_recompiler/ir/patch.cpp
Normal file
28
src/shader_recompiler/ir/patch.cpp
Normal file
|
@ -0,0 +1,28 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
std::string NameOf(Patch patch) {
|
||||
switch (patch) {
|
||||
case Patch::TessellationLodLeft:
|
||||
return "TessellationLodLeft";
|
||||
case Patch::TessellationLodTop:
|
||||
return "TessellationLodTop";
|
||||
case Patch::TessellationLodRight:
|
||||
return "TessellationLodRight";
|
||||
case Patch::TessellationLodBottom:
|
||||
return "TessellationLodBottom";
|
||||
case Patch::TessellationLodInteriorU:
|
||||
return "TessellationLodInteriorU";
|
||||
case Patch::TessellationLodInteriorV:
|
||||
return "TessellationLodInteriorV";
|
||||
default:
|
||||
const u32 index = u32(patch) - u32(Patch::Component0);
|
||||
return fmt::format("Component{}", index);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
173
src/shader_recompiler/ir/patch.h
Normal file
173
src/shader_recompiler/ir/patch.h
Normal file
|
@ -0,0 +1,173 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
enum class Patch : u64 {
|
||||
TessellationLodLeft,
|
||||
TessellationLodTop,
|
||||
TessellationLodRight,
|
||||
TessellationLodBottom,
|
||||
TessellationLodInteriorU,
|
||||
TessellationLodInteriorV,
|
||||
Component0,
|
||||
Component1,
|
||||
Component2,
|
||||
Component3,
|
||||
Component4,
|
||||
Component5,
|
||||
Component6,
|
||||
Component7,
|
||||
Component8,
|
||||
Component9,
|
||||
Component10,
|
||||
Component11,
|
||||
Component12,
|
||||
Component13,
|
||||
Component14,
|
||||
Component15,
|
||||
Component16,
|
||||
Component17,
|
||||
Component18,
|
||||
Component19,
|
||||
Component20,
|
||||
Component21,
|
||||
Component22,
|
||||
Component23,
|
||||
Component24,
|
||||
Component25,
|
||||
Component26,
|
||||
Component27,
|
||||
Component28,
|
||||
Component29,
|
||||
Component30,
|
||||
Component31,
|
||||
Component32,
|
||||
Component33,
|
||||
Component34,
|
||||
Component35,
|
||||
Component36,
|
||||
Component37,
|
||||
Component38,
|
||||
Component39,
|
||||
Component40,
|
||||
Component41,
|
||||
Component42,
|
||||
Component43,
|
||||
Component44,
|
||||
Component45,
|
||||
Component46,
|
||||
Component47,
|
||||
Component48,
|
||||
Component49,
|
||||
Component50,
|
||||
Component51,
|
||||
Component52,
|
||||
Component53,
|
||||
Component54,
|
||||
Component55,
|
||||
Component56,
|
||||
Component57,
|
||||
Component58,
|
||||
Component59,
|
||||
Component60,
|
||||
Component61,
|
||||
Component62,
|
||||
Component63,
|
||||
Component64,
|
||||
Component65,
|
||||
Component66,
|
||||
Component67,
|
||||
Component68,
|
||||
Component69,
|
||||
Component70,
|
||||
Component71,
|
||||
Component72,
|
||||
Component73,
|
||||
Component74,
|
||||
Component75,
|
||||
Component76,
|
||||
Component77,
|
||||
Component78,
|
||||
Component79,
|
||||
Component80,
|
||||
Component81,
|
||||
Component82,
|
||||
Component83,
|
||||
Component84,
|
||||
Component85,
|
||||
Component86,
|
||||
Component87,
|
||||
Component88,
|
||||
Component89,
|
||||
Component90,
|
||||
Component91,
|
||||
Component92,
|
||||
Component93,
|
||||
Component94,
|
||||
Component95,
|
||||
Component96,
|
||||
Component97,
|
||||
Component98,
|
||||
Component99,
|
||||
Component100,
|
||||
Component101,
|
||||
Component102,
|
||||
Component103,
|
||||
Component104,
|
||||
Component105,
|
||||
Component106,
|
||||
Component107,
|
||||
Component108,
|
||||
Component109,
|
||||
Component110,
|
||||
Component111,
|
||||
Component112,
|
||||
Component113,
|
||||
Component114,
|
||||
Component115,
|
||||
Component116,
|
||||
Component117,
|
||||
Component118,
|
||||
Component119,
|
||||
};
|
||||
static_assert(static_cast<u64>(Patch::Component119) == 125);
|
||||
|
||||
constexpr bool IsGeneric(Patch patch) noexcept {
|
||||
return patch >= Patch::Component0 && patch <= Patch::Component119;
|
||||
}
|
||||
|
||||
constexpr Patch PatchFactor(u32 index) {
|
||||
return static_cast<Patch>(index);
|
||||
}
|
||||
|
||||
constexpr Patch PatchGeneric(u32 index) {
|
||||
return static_cast<Patch>(static_cast<u32>(Patch::Component0) + index);
|
||||
}
|
||||
|
||||
constexpr u32 GenericPatchIndex(Patch patch) {
|
||||
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
|
||||
}
|
||||
|
||||
constexpr u32 GenericPatchElement(Patch patch) {
|
||||
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string NameOf(Patch patch);
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<Shader::IR::Patch> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(const Shader::IR::Patch patch, format_context& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(patch));
|
||||
}
|
||||
};
|
127
src/shader_recompiler/ir/pattern_matching.h
Normal file
127
src/shader_recompiler/ir/pattern_matching.h
Normal file
|
@ -0,0 +1,127 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
||||
namespace Shader::Optimiation::PatternMatching {
|
||||
|
||||
// Attempt at pattern matching for Insts and Values
|
||||
// Needs improvement, mostly a convenience
|
||||
|
||||
template <typename Derived>
|
||||
struct MatchObject {
|
||||
inline bool Match(IR::Value v) {
|
||||
return static_cast<Derived*>(this)->Match(v);
|
||||
}
|
||||
};
|
||||
|
||||
struct MatchValue : MatchObject<MatchValue> {
|
||||
MatchValue(IR::Value& return_val_) : return_val(return_val_) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
return_val = v;
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
IR::Value& return_val;
|
||||
};
|
||||
|
||||
struct MatchIgnore : MatchObject<MatchIgnore> {
|
||||
MatchIgnore() {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct MatchImm : MatchObject<MatchImm> {
|
||||
MatchImm(IR::Value& v) : return_val(v) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
if (!v.IsImmediate()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return_val = v;
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
IR::Value& return_val;
|
||||
};
|
||||
|
||||
struct MatchAttribute : MatchObject<MatchAttribute> {
|
||||
MatchAttribute(IR::Attribute attribute_) : attribute(attribute_) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
return v.Type() == IR::Type::Attribute && v.Attribute() == attribute;
|
||||
}
|
||||
|
||||
private:
|
||||
IR::Attribute attribute;
|
||||
};
|
||||
|
||||
struct MatchU32 : MatchObject<MatchU32> {
|
||||
MatchU32(u32 imm_) : imm(imm_) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
return v.IsImmediate() && v.Type() == IR::Type::U32 && v.U32() == imm;
|
||||
}
|
||||
|
||||
private:
|
||||
u32 imm;
|
||||
};
|
||||
|
||||
template <IR::Opcode opcode, typename... Args>
|
||||
struct MatchInstObject : MatchObject<MatchInstObject<opcode>> {
|
||||
static_assert(sizeof...(Args) == IR::NumArgsOf(opcode));
|
||||
MatchInstObject(Args&&... args) : pattern(std::forward_as_tuple(args...)) {}
|
||||
|
||||
inline bool Match(IR::Value v) {
|
||||
IR::Inst* inst = v.TryInstRecursive();
|
||||
if (!inst || inst->GetOpcode() != opcode) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool matched = true;
|
||||
|
||||
[&]<std::size_t... Is>(std::index_sequence<Is...>) {
|
||||
((matched = matched && std::get<Is>(pattern).Match(inst->Arg(Is))), ...);
|
||||
}(std::make_index_sequence<sizeof...(Args)>{});
|
||||
|
||||
return matched;
|
||||
}
|
||||
|
||||
private:
|
||||
using MatchArgs = std::tuple<Args&...>;
|
||||
MatchArgs pattern;
|
||||
};
|
||||
|
||||
template <IR::Opcode opcode, typename... Args>
|
||||
inline auto MakeInstPattern(Args&&... args) {
|
||||
return MatchInstObject<opcode, Args...>(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
// Conveniences. TODO probably simpler way of doing this
|
||||
#define M_READCONST(...) MakeInstPattern<IR::Opcode::ReadConst>(__VA_ARGS__)
|
||||
#define M_GETUSERDATA(...) MakeInstPattern<IR::Opcode::GetUserData>(__VA_ARGS__)
|
||||
#define M_BITFIELDUEXTRACT(...) MakeInstPattern<IR::Opcode::BitFieldUExtract>(__VA_ARGS__)
|
||||
#define M_BITFIELDSEXTRACT(...) MakeInstPattern<IR::Opcode::BitFieldSExtract>(__VA_ARGS__)
|
||||
#define M_GETATTRIBUTEU32(...) MakeInstPattern<IR::Opcode::GetAttributeU32>(__VA_ARGS__)
|
||||
#define M_UMOD32(...) MakeInstPattern<IR::Opcode::UMod32>(__VA_ARGS__)
|
||||
#define M_SHIFTRIGHTLOGICAL32(...) MakeInstPattern<IR::Opcode::ShiftRightLogical32>(__VA_ARGS__)
|
||||
#define M_IADD32(...) MakeInstPattern<IR::Opcode::IAdd32>(__VA_ARGS__)
|
||||
#define M_IMUL32(...) MakeInstPattern<IR::Opcode::IMul32>(__VA_ARGS__)
|
||||
#define M_BITWISEAND32(...) MakeInstPattern<IR::Opcode::BitwiseAnd32>(__VA_ARGS__)
|
||||
#define M_GETTESSGENERICATTRIBUTE(...) \
|
||||
MakeInstPattern<IR::Opcode::GetTessGenericAttribute>(__VA_ARGS__)
|
||||
#define M_SETTCSGENERICATTRIBUTE(...) \
|
||||
MakeInstPattern<IR::Opcode::SetTcsGenericAttribute>(__VA_ARGS__)
|
||||
#define M_COMPOSITECONSTRUCTU32X2(...) \
|
||||
MakeInstPattern<IR::Opcode::CompositeConstructU32x2>(__VA_ARGS__)
|
||||
#define M_COMPOSITECONSTRUCTU32X4(...) \
|
||||
MakeInstPattern<IR::Opcode::CompositeConstructU32x4>(__VA_ARGS__)
|
||||
|
||||
} // namespace Shader::Optimiation::PatternMatching
|
|
@ -49,7 +49,8 @@ union BufferInstInfo {
|
|||
BitField<0, 1, u32> index_enable;
|
||||
BitField<1, 1, u32> offset_enable;
|
||||
BitField<2, 12, u32> inst_offset;
|
||||
BitField<14, 1, u32> ring_access; // global + system coherency
|
||||
BitField<14, 1, u32> system_coherent;
|
||||
BitField<15, 1, u32> globally_coherent;
|
||||
};
|
||||
|
||||
enum class ScalarReg : u32 {
|
||||
|
|
|
@ -15,7 +15,7 @@ enum class Type {
|
|||
ScalarReg = 1 << 1,
|
||||
VectorReg = 1 << 2,
|
||||
Attribute = 1 << 3,
|
||||
SystemValue = 1 << 4,
|
||||
Patch = 1 << 4,
|
||||
U1 = 1 << 5,
|
||||
U8 = 1 << 6,
|
||||
U16 = 1 << 7,
|
||||
|
|
|
@ -16,6 +16,8 @@ Value::Value(IR::VectorReg reg) noexcept : type{Type::VectorReg}, vreg{reg} {}
|
|||
|
||||
Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
|
||||
|
||||
Value::Value(IR::Patch patch) noexcept : type{Type::Patch}, patch{patch} {}
|
||||
|
||||
Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
|
||||
|
||||
Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/type.h"
|
||||
|
||||
|
@ -34,6 +35,7 @@ public:
|
|||
explicit Value(IR::ScalarReg reg) noexcept;
|
||||
explicit Value(IR::VectorReg reg) noexcept;
|
||||
explicit Value(IR::Attribute value) noexcept;
|
||||
explicit Value(IR::Patch patch) noexcept;
|
||||
explicit Value(bool value) noexcept;
|
||||
explicit Value(u8 value) noexcept;
|
||||
explicit Value(u16 value) noexcept;
|
||||
|
@ -56,6 +58,7 @@ public:
|
|||
[[nodiscard]] IR::ScalarReg ScalarReg() const;
|
||||
[[nodiscard]] IR::VectorReg VectorReg() const;
|
||||
[[nodiscard]] IR::Attribute Attribute() const;
|
||||
[[nodiscard]] IR::Patch Patch() const;
|
||||
[[nodiscard]] bool U1() const;
|
||||
[[nodiscard]] u8 U8() const;
|
||||
[[nodiscard]] u16 U16() const;
|
||||
|
@ -75,6 +78,7 @@ private:
|
|||
IR::ScalarReg sreg;
|
||||
IR::VectorReg vreg;
|
||||
IR::Attribute attribute;
|
||||
IR::Patch patch;
|
||||
bool imm_u1;
|
||||
u8 imm_u8;
|
||||
u16 imm_u16;
|
||||
|
@ -330,6 +334,11 @@ inline IR::Attribute Value::Attribute() const {
|
|||
return attribute;
|
||||
}
|
||||
|
||||
inline IR::Patch Value::Patch() const {
|
||||
DEBUG_ASSERT(type == Type::Patch);
|
||||
return patch;
|
||||
}
|
||||
|
||||
inline bool Value::U1() const {
|
||||
if (IsIdentity()) {
|
||||
return inst->Arg(0).U1();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue