Merge branch 'main' into shader_recompiler/format

2025-07-04 08:06:20 +00:00 · 2024-08-29 10:18:12 +03:00 · 2024-08-29 10:18:12 +03:00 · 18e95ae4c0
commit 18e95ae4c0
parent 3a8a666df0 e1382b43c8
138 changed files with 24513 additions and 1242 deletions
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@ -179,6 +179,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
    spv::ExecutionModel execution_model{};
    ctx.AddCapability(spv::Capability::Image1D);
    ctx.AddCapability(spv::Capability::Sampled1D);
+    ctx.AddCapability(spv::Capability::ImageQuery);
    if (info.uses_fp16) {
        ctx.AddCapability(spv::Capability::Float16);
        ctx.AddCapability(spv::Capability::Int16);
--- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@ -102,7 +102,7 @@ Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
    return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicXor);
 }

-Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
+Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
    return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange);
 }

--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@ -305,7 +305,7 @@ static Id ConvertValue(EmitContext& ctx, Id value, AmdGpu::NumberFormat format,
    case AmdGpu::NumberFormat::Float:
        return value;
    default:
-        UNREACHABLE_MSG("Unsupported number fromat for conversion: {}",
+        UNREACHABLE_MSG("Unsupported number format for conversion: {}",
                        magic_enum::enum_name(format));
    }
 }
@ -478,7 +478,7 @@ static Id ConvertF32ToFormat(EmitContext& ctx, Id value, AmdGpu::NumberFormat fo
    case AmdGpu::NumberFormat::Float:
        return value;
    default:
-        UNREACHABLE_MSG("Unsupported number fromat for conversion: {}",
+        UNREACHABLE_MSG("Unsupported number format for conversion: {}",
                        magic_enum::enum_name(format));
    }
 }
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@ -91,7 +91,7 @@ Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
 Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
-Id EmitBufferAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
+Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
 Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
 void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
@ -286,6 +286,7 @@ Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift);
 Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift);
 Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift);
 Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
+Id EmitBitwiseAnd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
--- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@ -139,6 +139,13 @@ Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
    return result;
 }

+Id EmitBitwiseAnd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
+    const Id result{ctx.OpBitwiseAnd(ctx.U64, a, b)};
+    SetZeroFlag(ctx, inst, result);
+    SetSignFlag(ctx, inst, result);
+    return result;
+}
+
 Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
    const Id result{ctx.OpBitwiseOr(ctx.U32[1], a, b)};
    SetZeroFlag(ctx, inst, result);
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@ -405,6 +405,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
        image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
        return spv::ImageFormat::Rg16f;
    }
+    if (image.GetDataFmt() == AmdGpu::DataFormat::Format16_16 &&
+        image.GetNumberFmt() == AmdGpu::NumberFormat::Snorm) {
+        return spv::ImageFormat::Rg16Snorm;
+    }
    if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8 &&
        image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) {
        return spv::ImageFormat::Rg8;
--- a/src/shader_recompiler/frontend/control_flow_graph.cpp
+++ b/src/shader_recompiler/frontend/control_flow_graph.cpp
@ -21,8 +21,13 @@ struct Compare {
    }
 };

-static IR::Condition MakeCondition(Opcode opcode) {
-    switch (opcode) {
+static IR::Condition MakeCondition(const GcnInst& inst) {
+    if (inst.IsCmpx()) {
+        ASSERT(inst.opcode == Opcode::V_CMPX_NE_U32);
+        return IR::Condition::Execnz;
+    }
+
+    switch (inst.opcode) {
    case Opcode::S_CBRANCH_SCC0:
        return IR::Condition::Scc0;
    case Opcode::S_CBRANCH_SCC1:
@ -37,7 +42,6 @@ static IR::Condition MakeCondition(Opcode opcode) {
        return IR::Condition::Execnz;
    case Opcode::S_AND_SAVEEXEC_B64:
    case Opcode::S_ANDN2_B64:
-    case Opcode::V_CMPX_NE_U32:
        return IR::Condition::Execnz;
    default:
        return IR::Condition::True;
@ -94,7 +98,8 @@ void CFG::EmitDivergenceLabels() {
               // While this instruction does not save EXEC it is often used paired
               // with SAVEEXEC to mask the threads that didn't pass the condition
               // of initial branch.
-               inst.opcode == Opcode::S_ANDN2_B64 || inst.opcode == Opcode::V_CMPX_NE_U32;
+               (inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) ||
+               inst.opcode == Opcode::V_CMPX_NE_U32;
    };
    const auto is_close_scope = [](const GcnInst& inst) {
        // Closing an EXEC scope can be either a branch instruction
@ -104,7 +109,8 @@ void CFG::EmitDivergenceLabels() {
               // Sometimes compiler might insert instructions between the SAVEEXEC and the branch.
               // Those instructions need to be wrapped in the condition as well so allow branch
               // as end scope instruction.
-               inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ANDN2_B64;
+               inst.opcode == Opcode::S_CBRANCH_EXECZ ||
+               (inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo);
    };

    // Since we will be adding new labels, avoid iterating those as well.
@ -171,7 +177,7 @@ void CFG::EmitBlocks() {
        block->begin_index = GetIndex(start);
        block->end_index = end_index;
        block->end_inst = end_inst;
-        block->cond = MakeCondition(end_inst.opcode);
+        block->cond = MakeCondition(end_inst);
        blocks.insert(*block);
    }
 }
--- a/src/shader_recompiler/frontend/instruction.cpp
+++ b/src/shader_recompiler/frontend/instruction.cpp
@ -47,4 +47,18 @@ bool GcnInst::IsConditionalBranch() const {
    return false;
 }

+bool GcnInst::IsCmpx() const {
+    if ((opcode >= Opcode::V_CMPX_F_F32 && opcode <= Opcode::V_CMPX_T_F32) ||
+        (opcode >= Opcode::V_CMPX_F_F64 && opcode <= Opcode::V_CMPX_T_F64) ||
+        (opcode >= Opcode::V_CMPSX_F_F32 && opcode <= Opcode::V_CMPSX_T_F32) ||
+        (opcode >= Opcode::V_CMPSX_F_F64 && opcode <= Opcode::V_CMPSX_T_F64) ||
+        (opcode >= Opcode::V_CMPX_F_I32 && opcode <= Opcode::V_CMPX_CLASS_F32) ||
+        (opcode >= Opcode::V_CMPX_F_I64 && opcode <= Opcode::V_CMPX_CLASS_F64) ||
+        (opcode >= Opcode::V_CMPX_F_U32 && opcode <= Opcode::V_CMPX_T_U32) ||
+        (opcode >= Opcode::V_CMPX_F_U64 && opcode <= Opcode::V_CMPX_T_U64)) {
+        return true;
+    }
+    return false;
+}
+
 } // namespace Shader::Gcn
--- a/src/shader_recompiler/frontend/instruction.h
+++ b/src/shader_recompiler/frontend/instruction.h
@ -203,6 +203,7 @@ struct GcnInst {
    bool IsUnconditionalBranch() const;
    bool IsConditionalBranch() const;
    bool IsFork() const;
+    bool IsCmpx() const;
 };

 } // namespace Shader::Gcn
--- a/src/shader_recompiler/frontend/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/structured_control_flow.cpp
@ -3,6 +3,7 @@

 #include <algorithm>
 #include <memory>
+#include <optional>
 #include <string>
 #include <unordered_map>
 #include <utility>
--- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp
@ -6,96 +6,150 @@
 namespace Shader::Gcn {

 void Translator::EmitScalarAlu(const GcnInst& inst) {
+    switch (inst.encoding) {
+    case InstEncoding::SOPC: {
+        EmitSOPC(inst);
+        break;
+    }
+    case InstEncoding::SOPK: {
+        EmitSOPK(inst);
+        break;
+    }
+    default:
+        switch (inst.opcode) {
+        case Opcode::S_MOV_B32:
+            return S_MOV(inst);
+        case Opcode::S_MUL_I32:
+            return S_MUL_I32(inst);
+        case Opcode::S_AND_SAVEEXEC_B64:
+            return S_AND_SAVEEXEC_B64(inst);
+        case Opcode::S_MOV_B64:
+            return S_MOV_B64(inst);
+        case Opcode::S_OR_B64:
+            return S_OR_B64(NegateMode::None, false, inst);
+        case Opcode::S_NOR_B64:
+            return S_OR_B64(NegateMode::Result, false, inst);
+        case Opcode::S_XOR_B64:
+            return S_OR_B64(NegateMode::None, true, inst);
+        case Opcode::S_ORN2_B64:
+            return S_OR_B64(NegateMode::Src1, false, inst);
+        case Opcode::S_AND_B64:
+            return S_AND_B64(NegateMode::None, inst);
+        case Opcode::S_NAND_B64:
+            return S_AND_B64(NegateMode::Result, inst);
+        case Opcode::S_ANDN2_B64:
+            return S_AND_B64(NegateMode::Src1, inst);
+        case Opcode::S_NOT_B64:
+            return S_NOT_B64(inst);
+        case Opcode::S_ADD_I32:
+            return S_ADD_I32(inst);
+        case Opcode::S_AND_B32:
+            return S_AND_B32(inst);
+        case Opcode::S_ASHR_I32:
+            return S_ASHR_I32(inst);
+        case Opcode::S_OR_B32:
+            return S_OR_B32(inst);
+        case Opcode::S_LSHL_B32:
+            return S_LSHL_B32(inst);
+        case Opcode::S_LSHR_B32:
+            return S_LSHR_B32(inst);
+        case Opcode::S_CSELECT_B32:
+            return S_CSELECT_B32(inst);
+        case Opcode::S_CSELECT_B64:
+            return S_CSELECT_B64(inst);
+        case Opcode::S_BFE_U32:
+            return S_BFE_U32(inst);
+        case Opcode::S_BFM_B32:
+            return S_BFM_B32(inst);
+        case Opcode::S_BREV_B32:
+            return S_BREV_B32(inst);
+        case Opcode::S_ADD_U32:
+            return S_ADD_U32(inst);
+        case Opcode::S_ADDC_U32:
+            return S_ADDC_U32(inst);
+        case Opcode::S_SUB_U32:
+        case Opcode::S_SUB_I32:
+            return S_SUB_U32(inst);
+        case Opcode::S_MIN_U32:
+            return S_MIN_U32(inst);
+        case Opcode::S_MAX_U32:
+            return S_MAX_U32(inst);
+        case Opcode::S_WQM_B64:
+            break;
+        default:
+            LogMissingOpcode(inst);
+        }
+        break;
+    }
+}
+
+void Translator::EmitSOPC(const GcnInst& inst) {
    switch (inst.opcode) {
-    case Opcode::S_MOVK_I32:
-        return S_MOVK(inst);
-    case Opcode::S_MOV_B32:
-        return S_MOV(inst);
-    case Opcode::S_MUL_I32:
-        return S_MUL_I32(inst);
-    case Opcode::S_AND_SAVEEXEC_B64:
-        return S_AND_SAVEEXEC_B64(inst);
-    case Opcode::S_MOV_B64:
-        return S_MOV_B64(inst);
-    case Opcode::S_CMP_LT_U32:
-        return S_CMP(ConditionOp::LT, false, inst);
-    case Opcode::S_CMP_LE_U32:
-        return S_CMP(ConditionOp::LE, false, inst);
-    case Opcode::S_CMP_LG_U32:
-        return S_CMP(ConditionOp::LG, false, inst);
-    case Opcode::S_CMP_LT_I32:
-        return S_CMP(ConditionOp::LT, true, inst);
+    case Opcode::S_CMP_EQ_I32:
+        return S_CMP(ConditionOp::EQ, true, inst);
    case Opcode::S_CMP_LG_I32:
        return S_CMP(ConditionOp::LG, true, inst);
    case Opcode::S_CMP_GT_I32:
        return S_CMP(ConditionOp::GT, true, inst);
-    case Opcode::S_CMP_LE_I32:
-        return S_CMP(ConditionOp::LE, true, inst);
    case Opcode::S_CMP_GE_I32:
        return S_CMP(ConditionOp::GE, true, inst);
-    case Opcode::S_CMP_EQ_I32:
-        return S_CMP(ConditionOp::EQ, true, inst);
+    case Opcode::S_CMP_LT_I32:
+        return S_CMP(ConditionOp::LT, true, inst);
+    case Opcode::S_CMP_LE_I32:
+        return S_CMP(ConditionOp::LE, true, inst);
+
    case Opcode::S_CMP_EQ_U32:
        return S_CMP(ConditionOp::EQ, false, inst);
-    case Opcode::S_CMP_GE_U32:
-        return S_CMP(ConditionOp::GE, false, inst);
+    case Opcode::S_CMP_LG_U32:
+        return S_CMP(ConditionOp::LG, false, inst);
    case Opcode::S_CMP_GT_U32:
        return S_CMP(ConditionOp::GT, false, inst);
-    case Opcode::S_OR_B64:
-        return S_OR_B64(NegateMode::None, false, inst);
-    case Opcode::S_NOR_B64:
-        return S_OR_B64(NegateMode::Result, false, inst);
-    case Opcode::S_XOR_B64:
-        return S_OR_B64(NegateMode::None, true, inst);
-    case Opcode::S_ORN2_B64:
-        return S_OR_B64(NegateMode::Src1, false, inst);
-    case Opcode::S_AND_B64:
-        return S_AND_B64(NegateMode::None, inst);
-    case Opcode::S_NAND_B64:
-        return S_AND_B64(NegateMode::Result, inst);
-    case Opcode::S_ANDN2_B64:
-        return S_AND_B64(NegateMode::Src1, inst);
-    case Opcode::S_NOT_B64:
-        return S_NOT_B64(inst);
-    case Opcode::S_ADD_I32:
-        return S_ADD_I32(inst);
-    case Opcode::S_AND_B32:
-        return S_AND_B32(inst);
-    case Opcode::S_ASHR_I32:
-        return S_ASHR_I32(inst);
-    case Opcode::S_OR_B32:
-        return S_OR_B32(inst);
-    case Opcode::S_LSHL_B32:
-        return S_LSHL_B32(inst);
-    case Opcode::S_LSHR_B32:
-        return S_LSHR_B32(inst);
-    case Opcode::S_CSELECT_B32:
-        return S_CSELECT_B32(inst);
-    case Opcode::S_CSELECT_B64:
-        return S_CSELECT_B64(inst);
-    case Opcode::S_BFE_U32:
-        return S_BFE_U32(inst);
-    case Opcode::S_BFM_B32:
-        return S_BFM_B32(inst);
-    case Opcode::S_BREV_B32:
-        return S_BREV_B32(inst);
-    case Opcode::S_ADD_U32:
-        return S_ADD_U32(inst);
-    case Opcode::S_ADDC_U32:
-        return S_ADDC_U32(inst);
+    case Opcode::S_CMP_GE_U32:
+        return S_CMP(ConditionOp::GE, false, inst);
+    case Opcode::S_CMP_LT_U32:
+        return S_CMP(ConditionOp::LT, false, inst);
+    case Opcode::S_CMP_LE_U32:
+        return S_CMP(ConditionOp::LE, false, inst);
+    default:
+        LogMissingOpcode(inst);
+    }
+}
+
+void Translator::EmitSOPK(const GcnInst& inst) {
+    switch (inst.opcode) {
+    case Opcode::S_MOVK_I32:
+        return S_MOVK(inst);
+
+    case Opcode::S_CMPK_EQ_I32:
+        return S_CMPK(ConditionOp::EQ, true, inst);
+    case Opcode::S_CMPK_LG_I32:
+        return S_CMPK(ConditionOp::LG, true, inst);
+    case Opcode::S_CMPK_GT_I32:
+        return S_CMPK(ConditionOp::GT, true, inst);
+    case Opcode::S_CMPK_GE_I32:
+        return S_CMPK(ConditionOp::GE, true, inst);
+    case Opcode::S_CMPK_LT_I32:
+        return S_CMPK(ConditionOp::LT, true, inst);
+    case Opcode::S_CMPK_LE_I32:
+        return S_CMPK(ConditionOp::LE, true, inst);
+
+    case Opcode::S_CMPK_EQ_U32:
+        return S_CMPK(ConditionOp::EQ, false, inst);
+    case Opcode::S_CMPK_LG_U32:
+        return S_CMPK(ConditionOp::LG, false, inst);
+    case Opcode::S_CMPK_GT_U32:
+        return S_CMPK(ConditionOp::GT, false, inst);
+    case Opcode::S_CMPK_GE_U32:
+        return S_CMPK(ConditionOp::GE, false, inst);
+    case Opcode::S_CMPK_LT_U32:
+        return S_CMPK(ConditionOp::LT, false, inst);
+    case Opcode::S_CMPK_LE_U32:
+        return S_CMPK(ConditionOp::LE, false, inst);
+
    case Opcode::S_ADDK_I32:
        return S_ADDK_I32(inst);
    case Opcode::S_MULK_I32:
        return S_MULK_I32(inst);
-    case Opcode::S_SUB_U32:
-    case Opcode::S_SUB_I32:
-        return S_SUB_U32(inst);
-    case Opcode::S_MIN_U32:
-        return S_MIN_U32(inst);
-    case Opcode::S_MAX_U32:
-        return S_MAX_U32(inst);
-    case Opcode::S_WQM_B64:
-        break;
    default:
        LogMissingOpcode(inst);
    }
@ -152,6 +206,31 @@ void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) {
    ir.SetScc(result);
 }

+void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) {
+    const s32 simm16 = inst.control.sopk.simm;
+    const IR::U32 lhs = GetSrc(inst.dst[0]);
+    const IR::U32 rhs = ir.Imm32(simm16);
+    const IR::U1 result = [&] {
+        switch (cond) {
+        case ConditionOp::EQ:
+            return ir.IEqual(lhs, rhs);
+        case ConditionOp::LG:
+            return ir.INotEqual(lhs, rhs);
+        case ConditionOp::GT:
+            return ir.IGreaterThan(lhs, rhs, is_signed);
+        case ConditionOp::GE:
+            return ir.IGreaterThanEqual(lhs, rhs, is_signed);
+        case ConditionOp::LT:
+            return ir.ILessThan(lhs, rhs, is_signed);
+        case ConditionOp::LE:
+            return ir.ILessThanEqual(lhs, rhs, is_signed);
+        default:
+            UNREACHABLE();
+        }
+    }();
+    ir.SetScc(result);
+}
+
 void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
    // This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
    // However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
--- a/src/shader_recompiler/frontend/translate/translate.h
+++ b/src/shader_recompiler/frontend/translate/translate.h
@ -69,6 +69,10 @@ public:
    void EmitScalarAlu(const GcnInst& inst);
    void EmitVectorAlu(const GcnInst& inst);

+    // Instruction encodings
+    void EmitSOPC(const GcnInst& inst);
+    void EmitSOPK(const GcnInst& inst);
+
    // Scalar ALU
    void S_MOVK(const GcnInst& inst);
    void S_MOV(const GcnInst& inst);
@ -98,6 +102,7 @@ public:
    void S_ADDK_I32(const GcnInst& inst);
    void S_MAX_U32(const GcnInst& inst);
    void S_MIN_U32(const GcnInst& inst);
+    void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst);

    // Scalar Memory
    void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
@ -116,6 +121,7 @@ public:
    void V_AND_B32(const GcnInst& inst);
    void V_LSHLREV_B32(const GcnInst& inst);
    void V_LSHL_B32(const GcnInst& inst);
+    void V_LSHL_B64(const GcnInst& inst);
    void V_ADD_I32(const GcnInst& inst);
    void V_ADDC_U32(const GcnInst& inst);
    void V_CVT_F32_I32(const GcnInst& inst);
--- a/src/shader_recompiler/frontend/translate/vector_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp
@ -11,6 +11,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
        return V_LSHLREV_B32(inst);
    case Opcode::V_LSHL_B32:
        return V_LSHL_B32(inst);
+    case Opcode::V_LSHL_B64:
+        return V_LSHL_B64(inst);
    case Opcode::V_BFREV_B32:
        return V_BFREV_B32(inst);
    case Opcode::V_BFE_U32:
@ -280,6 +282,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
        return V_CMP_U32(ConditionOp::GT, true, false, inst);
    case Opcode::V_CMP_LT_I32:
        return V_CMP_U32(ConditionOp::LT, true, false, inst);
+    case Opcode::V_CMPX_GT_I32:
+        return V_CMP_U32(ConditionOp::GT, true, true, inst);
    case Opcode::V_CMPX_LT_I32:
        return V_CMP_U32(ConditionOp::LT, true, true, inst);
    case Opcode::V_CMPX_F_U32:
@ -305,7 +309,6 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
        return V_MBCNT_U32_B32(true, inst);
    case Opcode::V_MBCNT_HI_U32_B32:
        return V_MBCNT_U32_B32(false, inst);
-
    case Opcode::V_NOP:
        return;
    default:
@ -389,6 +392,16 @@ void Translator::V_LSHL_B32(const GcnInst& inst) {
    SetDst(inst.dst[0], ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F))));
 }

+void Translator::V_LSHL_B64(const GcnInst& inst) {
+    const IR::U64 src0{GetSrc64(inst.src[0])};
+    const IR::U64 src1{GetSrc64(inst.src[1])};
+    const IR::VectorReg dst_reg{inst.dst[0].code};
+    ASSERT_MSG(src0.IsImmediate() && src0.U64() == 0 && src1.IsImmediate() && src1.U64() == 0,
+               "V_LSHL_B64 with non-zero src0 or src1 is not supported");
+    ir.SetVectorReg(dst_reg, ir.Imm32(0));
+    ir.SetVectorReg(dst_reg + 1, ir.Imm32(0));
+}
+
 void Translator::V_ADD_I32(const GcnInst& inst) {
    const IR::U32 src0{GetSrc(inst.src[0])};
    const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
--- a/src/shader_recompiler/frontend/translate/vector_memory.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp
@ -94,6 +94,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {

    case Opcode::TBUFFER_STORE_FORMAT_X:
        return BUFFER_STORE_FORMAT(1, true, true, inst);
+    case Opcode::TBUFFER_STORE_FORMAT_XY:
+        return BUFFER_STORE_FORMAT(2, true, true, inst);
    case Opcode::TBUFFER_STORE_FORMAT_XYZ:
        return BUFFER_STORE_FORMAT(3, true, true, inst);

@ -109,6 +111,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
        // Buffer atomic operations
    case Opcode::BUFFER_ATOMIC_ADD:
        return BUFFER_ATOMIC(AtomicOp::Add, inst);
+    case Opcode::BUFFER_ATOMIC_SWAP:
+        return BUFFER_ATOMIC(AtomicOp::Swap, inst);
    default:
        LogMissingOpcode(inst);
    }
@ -474,7 +478,7 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
    const IR::Value original_val = [&] {
        switch (op) {
        case AtomicOp::Swap:
-            return ir.BufferAtomicExchange(handle, address, vdata_val, info);
+            return ir.BufferAtomicSwap(handle, address, vdata_val, info);
        case AtomicOp::Add:
            return ir.BufferAtomicIAdd(handle, address, vdata_val, info);
        case AtomicOp::Smin:
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@ -404,9 +404,9 @@ Value IREmitter::BufferAtomicXor(const Value& handle, const Value& address, cons
    return Inst(Opcode::BufferAtomicXor32, Flags{info}, handle, address, value);
 }

-Value IREmitter::BufferAtomicExchange(const Value& handle, const Value& address, const Value& value,
-                                      BufferInstInfo info) {
-    return Inst(Opcode::BufferAtomicExchange32, Flags{info}, handle, address, value);
+Value IREmitter::BufferAtomicSwap(const Value& handle, const Value& address, const Value& value,
+                                  BufferInstInfo info) {
+    return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value);
 }

 void IREmitter::StoreBufferFormat(int num_dwords, const Value& handle, const Value& address,
@ -1115,8 +1115,18 @@ U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) {
    }
 }

-U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
-    return Inst<U32>(Opcode::BitwiseAnd32, a, b);
+U32U64 IREmitter::BitwiseAnd(const U32U64& a, const U32U64& b) {
+    if (a.Type() != b.Type()) {
+        UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
+    }
+    switch (a.Type()) {
+    case Type::U32:
+        return Inst<U32>(Opcode::BitwiseAnd32, a, b);
+    case Type::U64:
+        return Inst<U64>(Opcode::BitwiseAnd64, a, b);
+    default:
+        ThrowInvalidType(a.Type());
+    }
 }

 U32U64 IREmitter::BitwiseOr(const U32U64& a, const U32U64& b) {
--- a/src/shader_recompiler/ir/ir_emitter.h
+++ b/src/shader_recompiler/ir/ir_emitter.h
@ -115,8 +115,8 @@ public:
                                       const Value& value, BufferInstInfo info);
    [[nodiscard]] Value BufferAtomicXor(const Value& handle, const Value& address,
                                        const Value& value, BufferInstInfo info);
-    [[nodiscard]] Value BufferAtomicExchange(const Value& handle, const Value& address,
-                                             const Value& value, BufferInstInfo info);
+    [[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address,
+                                         const Value& value, BufferInstInfo info);

    [[nodiscard]] U32 LaneId();
    [[nodiscard]] U32 WarpId();
@ -195,7 +195,7 @@ public:
    [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
    [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
    [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
-    [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
+    [[nodiscard]] U32U64 BitwiseAnd(const U32U64& a, const U32U64& b);
    [[nodiscard]] U32U64 BitwiseOr(const U32U64& a, const U32U64& b);
    [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
    [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
--- a/src/shader_recompiler/ir/microinstruction.cpp
+++ b/src/shader_recompiler/ir/microinstruction.cpp
@ -70,7 +70,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
    case Opcode::BufferAtomicAnd32:
    case Opcode::BufferAtomicOr32:
    case Opcode::BufferAtomicXor32:
-    case Opcode::BufferAtomicExchange32:
+    case Opcode::BufferAtomicSwap32:
    case Opcode::WriteSharedU128:
    case Opcode::WriteSharedU64:
    case Opcode::WriteSharedU32:
--- a/src/shader_recompiler/ir/opcodes.inc
+++ b/src/shader_recompiler/ir/opcodes.inc
@ -95,7 +95,7 @@ OPCODE(StoreBufferFormatF32x4,                              Void,           Opaq
 OPCODE(StoreBufferU32,                                      Void,           Opaque,         Opaque,         U32,                                            )

 // Buffer atomic operations
-OPCODE(BufferAtomicIAdd32,									U32,			Opaque,			Opaque,			U32											)
+OPCODE(BufferAtomicIAdd32,									U32,			Opaque,			Opaque,			U32												)
 OPCODE(BufferAtomicSMin32,                                  U32,            Opaque,			Opaque,			U32												)
 OPCODE(BufferAtomicUMin32,                                  U32,            Opaque,			Opaque,			U32												)
 OPCODE(BufferAtomicSMax32,                                  U32,            Opaque,			Opaque,			U32												)
@ -105,7 +105,7 @@ OPCODE(BufferAtomicDec32,                                   U32,            Opaq
 OPCODE(BufferAtomicAnd32,                                   U32,            Opaque,			Opaque,			U32,											)
 OPCODE(BufferAtomicOr32,                                    U32,            Opaque,			Opaque,			U32,											)
 OPCODE(BufferAtomicXor32,                                   U32,            Opaque,			Opaque,			U32,											)
-OPCODE(BufferAtomicExchange32,                              U32,            Opaque,			Opaque,			U32,											)
+OPCODE(BufferAtomicSwap32,									U32,            Opaque,			Opaque,			U32,											)

 // Vector utility
 OPCODE(CompositeConstructU32x2,                             U32x2,          U32,            U32,                                                            )
@ -260,6 +260,7 @@ OPCODE(ShiftRightLogical64,                                 U64,            U64,
 OPCODE(ShiftRightArithmetic32,                              U32,            U32,            U32,                                                            )
 OPCODE(ShiftRightArithmetic64,                              U64,            U64,            U32,                                                            )
 OPCODE(BitwiseAnd32,                                        U32,            U32,            U32,                                                            )
+OPCODE(BitwiseAnd64,                                        U64,            U64,            U64,                                                            )
 OPCODE(BitwiseOr32,                                         U32,            U32,            U32,                                                            )
 OPCODE(BitwiseOr64,                                         U64,            U64,            U64,                                                            )
 OPCODE(BitwiseXor32,                                        U32,            U32,            U32,                                                            )
--- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
@ -352,9 +352,15 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
    case IR::Opcode::BitwiseAnd32:
        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; });
        return;
+    case IR::Opcode::BitwiseAnd64:
+        FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a & b; });
+        return;
    case IR::Opcode::BitwiseOr32:
        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; });
        return;
+    case IR::Opcode::BitwiseOr64:
+        FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a | b; });
+        return;
    case IR::Opcode::BitwiseXor32:
        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; });
        return;
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@ -32,7 +32,7 @@ bool IsBufferAtomic(const IR::Inst& inst) {
    case IR::Opcode::BufferAtomicAnd32:
    case IR::Opcode::BufferAtomicOr32:
    case IR::Opcode::BufferAtomicXor32:
-    case IR::Opcode::BufferAtomicExchange32:
+    case IR::Opcode::BufferAtomicSwap32:
        return true;
    default:
        return false;
@ -136,6 +136,7 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
    case IR::Opcode::ReadConstBufferU32:
    case IR::Opcode::StoreBufferU32:
    case IR::Opcode::BufferAtomicIAdd32:
+    case IR::Opcode::BufferAtomicSwap32:
        return IR::Type::U32;
    default:
        UNREACHABLE();
@ -246,10 +247,7 @@ public:
                return true;
            }
            // Samplers with different bindings might still be the same.
-            const auto old_sharp =
-                info.ReadUd<AmdGpu::Sampler>(existing.sgpr_base, existing.dword_offset);
-            const auto new_sharp = info.ReadUd<AmdGpu::Sampler>(desc.sgpr_base, desc.dword_offset);
-            return old_sharp == new_sharp;
+            return existing.GetSsharp(info) == desc.GetSsharp(info);
        })};
        return index;
    }
@ -295,10 +293,11 @@ std::pair<const IR::Inst*, bool> TryDisableAnisoLod0(const IR::Inst* inst) {
        return not_found;
    }

-    // The bits range is for lods
+    // The bits range is for lods (note that constants are changed after constant propagation pass)
    const auto* prod0_arg0 = prod0->Arg(0).InstRecursive();
    if (prod0_arg0->GetOpcode() != IR::Opcode::BitFieldUExtract ||
-        prod0_arg0->Arg(1).InstRecursive()->Arg(0).U32() != 0x0008000cu) {
+        !(prod0_arg0->Arg(1).IsIdentity() && prod0_arg0->Arg(1).U32() == 12) ||
+        !(prod0_arg0->Arg(2).IsIdentity() && prod0_arg0->Arg(2).U32() == 8)) {
        return not_found;
    }