shader_recompiler: Better branch detection + more opcodes

2025-07-09 02:26:21 +00:00 · 2024-06-01 20:25:31 +03:00 · 2024-06-01 20:25:31 +03:00 · 02a50265f8
commit 02a50265f8
parent f624f7749c
31 changed files with 772 additions and 120 deletions
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@ -119,6 +119,14 @@ U32 IREmitter::GetUserData(IR::ScalarReg reg) {
    return Inst<U32>(Opcode::GetUserData, reg);
 }

+U1 IREmitter::GetThreadBitScalarReg(IR::ScalarReg reg) {
+    return Inst<U1>(Opcode::GetThreadBitScalarReg, reg);
+}
+
+void IREmitter::SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value) {
+    Inst(Opcode::SetThreadBitScalarReg, reg, value);
+}
+
 template <>
 U32 IREmitter::GetScalarReg(IR::ScalarReg reg) {
    return Inst<U32>(Opcode::GetScalarRegister, reg);
@ -196,6 +204,10 @@ U32 IREmitter::GetVccLo() {
    return Inst<U32>(Opcode::GetVccLo);
 }

+U32 IREmitter::GetVccHi() {
+    return Inst<U32>(Opcode::GetVccHi);
+}
+
 void IREmitter::SetScc(const U1& value) {
    Inst(Opcode::SetScc, value);
 }
@ -212,6 +224,10 @@ void IREmitter::SetVccLo(const U32& value) {
    Inst(Opcode::SetVccLo, value);
 }

+void IREmitter::SetVccHi(const U32& value) {
+    Inst(Opcode::SetVccHi, value);
+}
+
 F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) {
    return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp));
 }
--- a/src/shader_recompiler/ir/ir_emitter.h
+++ b/src/shader_recompiler/ir/ir_emitter.h
@ -43,7 +43,9 @@ public:
    void Epilogue();
    void Discard();

-    U32 GetUserData(IR::ScalarReg reg);
+    [[nodiscard]] U32 GetUserData(IR::ScalarReg reg);
+    [[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg);
+    void SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value);

    template <typename T = U32>
    [[nodiscard]] T GetScalarReg(IR::ScalarReg reg);
@ -59,10 +61,12 @@ public:
    [[nodiscard]] U1 GetExec();
    [[nodiscard]] U1 GetVcc();
    [[nodiscard]] U32 GetVccLo();
+    [[nodiscard]] U32 GetVccHi();
    void SetScc(const U1& value);
    void SetExec(const U1& value);
    void SetVcc(const U1& value);
    void SetVccLo(const U32& value);
+    void SetVccHi(const U32& value);

    [[nodiscard]] U1 Condition(IR::Condition cond);

--- a/src/shader_recompiler/ir/opcodes.inc
+++ b/src/shader_recompiler/ir/opcodes.inc
@ -21,6 +21,8 @@ OPCODE(ReadConstBufferU32,                                  U32,            Opaq

 // Context getters/setters
 OPCODE(GetUserData,                                         U32,            ScalarReg,                                                                      )
+OPCODE(GetThreadBitScalarReg,                               U1,             ScalarReg,                                                                      )
+OPCODE(SetThreadBitScalarReg,                               Void,           ScalarReg,      U1,                                                             )
 OPCODE(GetScalarRegister,                                   U32,            ScalarReg,                                                                      )
 OPCODE(SetScalarRegister,                                   Void,           ScalarReg,      U32,                                                            )
 OPCODE(GetVectorRegister,                                   U32,            VectorReg,                                                                      )
@ -36,10 +38,12 @@ OPCODE(GetScc,                                             U1,             Void,
 OPCODE(GetExec,                                            U1,             Void,                                                                            )
 OPCODE(GetVcc,                                             U1,             Void,                                                                            )
 OPCODE(GetVccLo,                                           U32,            Void,                                                                            )
+OPCODE(GetVccHi,                                           U32,            Void,                                                                            )
 OPCODE(SetScc,                                             Void,           U1,                                                                              )
 OPCODE(SetExec,                                            Void,           U1,                                                                              )
 OPCODE(SetVcc,                                             Void,           U1,                                                                              )
 OPCODE(SetVccLo,                                           Void,           U32,                                                                             )
+OPCODE(SetVccHi,                                           Void,           U32,                                                                             )

 // Undefined
 OPCODE(UndefU1,                                             U1,                                                                                             )
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@ -206,9 +206,12 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
    const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32);
    IR::U32 address = ir.Imm32(dword_offset);
    if (inst_info.index_enable && inst_info.offset_enable) {
-        UNREACHABLE();
+        const IR::U32 offset{ir.CompositeExtract(inst.Arg(1), 0)};
+        const IR::U32 index{ir.CompositeExtract(inst.Arg(1), 1)};
+        address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address);
+        address = ir.IAdd(address, ir.ShiftRightLogical(offset, ir.Imm32(2)));
    } else if (inst_info.index_enable) {
-        IR::U32 index{inst.Arg(1)};
+        const IR::U32 index{inst.Arg(1)};
        address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address);
    } else if (inst_info.offset_enable) {
        const IR::U32 offset{inst.Arg(1)};
@ -216,6 +219,17 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
    inst.SetArg(1, address);
 }

+IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
+                         const IR::Value& z) {
+    // We need to fix x and y coordinate,
+    // because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32.
+    // We already force the scale value to be 1.0 when handling v_cubema_f32,
+    // here we subtract 1.5 to recover the original value.
+    const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f));
+    const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f));
+    return ir.CompositeConstruct(x, y, z);
+}
+
 void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
    IR::Inst* producer = inst.Arg(0).InstRecursive();
    ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
@ -256,8 +270,9 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
            return {ir.CompositeConstruct(body->Arg(0), body->Arg(1)), body->Arg(2)};
        case AmdGpu::ImageType::Color2DArray:
        case AmdGpu::ImageType::Color3D:
-        case AmdGpu::ImageType::Cube:
            return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
+        case AmdGpu::ImageType::Cube:
+            return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
        default:
            UNREACHABLE();
        }
@ -276,6 +291,7 @@ void ResourceTrackingPass(IR::Program& program) {
    // Most of the time it is float so that is the default. This pass detects float buffer loads
    // combined with bitcasts and patches them to be integer loads.
    for (IR::Block* const block : program.post_order_blocks) {
+        break;
        for (IR::Inst& inst : block->Instructions()) {
            if (inst.GetOpcode() != IR::Opcode::BitCastU32F32) {
                continue;
--- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
+++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
@ -32,6 +32,7 @@ struct SccFlagTag : FlagTag {};
 struct ExecFlagTag : FlagTag {};
 struct VccFlagTag : FlagTag {};
 struct VccLoTag : FlagTag {};
+struct VccHiTag : FlagTag {};

 struct GotoVariable : FlagTag {
    GotoVariable() = default;
@ -43,7 +44,7 @@ struct GotoVariable : FlagTag {
 };

 using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
-                             VccFlagTag, VccLoTag>;
+                             VccFlagTag, VccLoTag, VccHiTag>;
 using ValueMap = std::unordered_map<IR::Block*, IR::Value>;

 struct DefTable {
@ -89,6 +90,13 @@ struct DefTable {
        vcc_lo_flag.insert_or_assign(block, value);
    }

+    const IR::Value& Def(IR::Block* block, VccHiTag) {
+        return vcc_hi_flag[block];
+    }
+    void SetDef(IR::Block* block, VccHiTag, const IR::Value& value) {
+        vcc_hi_flag.insert_or_assign(block, value);
+    }
+
    const IR::Value& Def(IR::Block* block, VccFlagTag) {
        return vcc_flag[block];
    }
@ -101,6 +109,7 @@ struct DefTable {
    ValueMap exec_flag;
    ValueMap vcc_flag;
    ValueMap vcc_lo_flag;
+    ValueMap vcc_hi_flag;
 };

 IR::Opcode UndefOpcode(IR::ScalarReg) noexcept {
@ -111,6 +120,14 @@ IR::Opcode UndefOpcode(IR::VectorReg) noexcept {
    return IR::Opcode::UndefU32;
 }

+IR::Opcode UndefOpcode(const VccLoTag&) noexcept {
+    return IR::Opcode::UndefU32;
+}
+
+IR::Opcode UndefOpcode(const VccHiTag&) noexcept {
+    return IR::Opcode::UndefU32;
+}
+
 IR::Opcode UndefOpcode(const FlagTag&) noexcept {
    return IR::Opcode::UndefU1;
 }
@ -281,6 +298,7 @@ private:
 void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
    const IR::Opcode opcode{inst.GetOpcode()};
    switch (opcode) {
+    case IR::Opcode::SetThreadBitScalarReg:
    case IR::Opcode::SetScalarRegister: {
        const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
        pass.WriteVariable(reg, block, inst.Arg(1));
@ -306,6 +324,10 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
    case IR::Opcode::SetVccLo:
        pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
        break;
+    case IR::Opcode::SetVccHi:
+        pass.WriteVariable(VccHiTag{}, block, inst.Arg(0));
+        break;
+    case IR::Opcode::GetThreadBitScalarReg:
    case IR::Opcode::GetScalarRegister: {
        const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
        inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
@ -331,6 +353,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
    case IR::Opcode::GetVccLo:
        inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
        break;
+    case IR::Opcode::GetVccHi:
+        inst.ReplaceUsesWith(pass.ReadVariable(VccHiTag{}, block));
+        break;
    default:
        break;
    }
--- a/src/shader_recompiler/ir/value.h
+++ b/src/shader_recompiler/ir/value.h
@ -219,6 +219,7 @@ using U64 = TypedValue<Type::U64>;
 using F16 = TypedValue<Type::F16>;
 using F32 = TypedValue<Type::F32>;
 using F64 = TypedValue<Type::F64>;
+using U1U32F32 = TypedValue<Type::U1 | Type::U32 | Type::F32>;
 using U32F32 = TypedValue<Type::U32 | Type::F32>;
 using U32U64 = TypedValue<Type::U32 | Type::U64>;
 using F32F64 = TypedValue<Type::F32 | Type::F64>;