From 9eae6b57ceb27cbc009740e5335130665d3aa333 Mon Sep 17 00:00:00 2001 From: nickci2002 <58965309+nickci2002@users.noreply.github.com> Date: Wed, 2 Jul 2025 12:22:30 -0400 Subject: [PATCH] V_CMP_EQ_U64 support (#3153) * Added V_CMP_EQ_U64 shader opcode support and added 64-bit relational operators (<,>,<=,>=) * Fixed clang-format crying because I typed xargs clang-format instead of xargs clang-format-19 * Replaced V_CMP_EQ_U64 code to match V_CMP_U32 to test * Updated V_CMP_U64 for future addons --- .../backend/spirv/emit_spirv_instructions.h | 18 ++++--- .../backend/spirv/emit_spirv_integer.cpp | 36 ++++++++++--- .../frontend/translate/translate.h | 4 +- .../frontend/translate/vector_alu.cpp | 53 +++++++++---------- src/shader_recompiler/ir/ir_emitter.cpp | 44 ++++++++++++--- src/shader_recompiler/ir/ir_emitter.h | 6 +-- src/shader_recompiler/ir/opcodes.inc | 18 ++++--- .../ir/passes/constant_propagation_pass.cpp | 30 ++++++++--- 8 files changed, 145 insertions(+), 64 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 15a8fd99b..08ea2c1cd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -406,14 +406,20 @@ Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); Id EmitINotEqual32(EmitContext& ctx, Id lhs, Id rhs); Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 1a995354d..ddc1e7574 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -371,19 +371,35 @@ Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpIEqual(ctx.U1[1], lhs, rhs); } -Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThanEqual(ctx.U1[1], lhs, rhs); } -Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitULessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpULessThanEqual(ctx.U1[1], lhs, rhs); } -Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitULessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitSGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSGreaterThan(ctx.U1[1], lhs, rhs); } -Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSGreaterThan(ctx.U1[1], lhs, rhs); +} + +Id EmitUGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThan(ctx.U1[1], lhs, rhs); +} + +Id EmitUGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpUGreaterThan(ctx.U1[1], lhs, rhs); } @@ -395,11 +411,19 @@ Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpINotEqual(ctx.U1[1], lhs, rhs); } -Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSGreaterThanEqual(ctx.U1[1], lhs, rhs); } -Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSGreaterThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitUGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThanEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitUGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpUGreaterThanEqual(ctx.U1[1], lhs, rhs); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index ece334bcd..b5bfec344 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -20,7 +20,7 @@ namespace Shader::Gcn { enum class ConditionOp : u32 { F, EQ, - LG, + LG, // NE GT, GE, LT, @@ -230,7 +230,7 @@ public: // VOPC void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst); void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst); - void V_CMP_NE_U64(const GcnInst& inst); + void V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst); void V_CMP_CLASS_F32(const GcnInst& inst); // VOP3a diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 3b88e4dec..448622f0e 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -327,8 +327,10 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_CMP_U32(ConditionOp::TRU, false, true, inst); // V_CMP_{OP8}_U64 + case Opcode::V_CMP_EQ_U64: + return V_CMP_U64(ConditionOp::EQ, false, false, inst); case Opcode::V_CMP_NE_U64: - return V_CMP_NE_U64(inst); + return V_CMP_U64(ConditionOp::LG, false, false, inst); case Opcode::V_CMP_CLASS_F32: return V_CMP_CLASS_F32(inst); @@ -996,39 +998,32 @@ void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const } } -void Translator::V_CMP_NE_U64(const GcnInst& inst) { - const auto get_src = [&](const InstOperand& operand) { - switch (operand.field) { - case OperandField::VccLo: - return ir.GetVcc(); - case OperandField::ExecLo: - return ir.GetExec(); - case OperandField::ScalarGPR: - return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code)); - case OperandField::ConstZero: - return ir.Imm1(false); +void Translator::V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) { + const IR::U64 src0{GetSrc64(inst.src[0])}; + const IR::U64 src1{GetSrc64(inst.src[1])}; + const IR::U1 result = [&] { + switch (op) { + case ConditionOp::EQ: + return ir.IEqual(src0, src1); + case ConditionOp::LG: // NE + return ir.INotEqual(src0, src1); default: - UNREACHABLE(); + UNREACHABLE_MSG("Unsupported V_CMP_U64 condition operation: {}", u32(op)); } - }; - const IR::U1 src0{get_src(inst.src[0])}; - auto op = [&inst, this](auto x) { - switch (inst.src[1].field) { - case OperandField::ConstZero: - return x; - case OperandField::SignedConstIntNeg: - return ir.LogicalNot(x); - default: - UNREACHABLE_MSG("unhandled V_CMP_NE_U64 source argument {}", u32(inst.src[1].field)); - } - }; + }(); + + if (is_signed) { + UNREACHABLE_MSG("V_CMP_U64 with signed integers is not supported"); + } + if (set_exec) { + UNREACHABLE_MSG("Exec setting for V_CMP_U64 is not supported"); + } + switch (inst.dst[1].field) { case OperandField::VccLo: - ir.SetVcc(op(src0)); - break; + return ir.SetVcc(result); case OperandField::ScalarGPR: - ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), op(src0)); - break; + return ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), result); default: UNREACHABLE(); } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 2497864c0..1f30a9565 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1712,12 +1712,32 @@ U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) { } } -U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { - return Inst(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs); +U1 IREmitter::ILessThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed) { + if (lhs.Type() != rhs.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst(is_signed ? Opcode::SLessThanEqual32 : Opcode::ULessThanEqual32, lhs, rhs); + case Type::U64: + return Inst(is_signed ? Opcode::SLessThanEqual64 : Opcode::ULessThanEqual64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } } -U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) { - return Inst(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs); +U1 IREmitter::IGreaterThan(const U32U64& lhs, const U32U64& rhs, bool is_signed) { + if (lhs.Type() != rhs.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst(is_signed ? Opcode::SGreaterThan32 : Opcode::UGreaterThan32, lhs, rhs); + case Type::U64: + return Inst(is_signed ? Opcode::SGreaterThan64 : Opcode::UGreaterThan64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } } U1 IREmitter::INotEqual(const U32U64& lhs, const U32U64& rhs) { @@ -1734,8 +1754,20 @@ U1 IREmitter::INotEqual(const U32U64& lhs, const U32U64& rhs) { } } -U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { - return Inst(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); +U1 IREmitter::IGreaterThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed) { + if (lhs.Type() != rhs.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst(is_signed ? Opcode::SGreaterThanEqual32 : Opcode::UGreaterThanEqual32, lhs, + rhs); + case Type::U64: + return Inst(is_signed ? Opcode::SGreaterThanEqual64 : Opcode::UGreaterThanEqual64, lhs, + rhs); + default: + ThrowInvalidType(lhs.Type()); + } } U1 IREmitter::LogicalOr(const U1& a, const U1& b) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 9e2f79978..119e3752e 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -299,10 +299,10 @@ public: [[nodiscard]] U1 ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); - [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); - [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 ILessThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed); + [[nodiscard]] U1 IGreaterThan(const U32U64& lhs, const U32U64& rhs, bool is_signed); [[nodiscard]] U1 INotEqual(const U32U64& lhs, const U32U64& rhs); - [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 IGreaterThanEqual(const U32U64& lhs, const U32U64& rhs, bool is_signed); [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 7fc514de9..d177017f2 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -382,14 +382,20 @@ OPCODE(ULessThan32, U1, U32, OPCODE(ULessThan64, U1, U64, U64, ) OPCODE(IEqual32, U1, U32, U32, ) OPCODE(IEqual64, U1, U64, U64, ) -OPCODE(SLessThanEqual, U1, U32, U32, ) -OPCODE(ULessThanEqual, U1, U32, U32, ) -OPCODE(SGreaterThan, U1, U32, U32, ) -OPCODE(UGreaterThan, U1, U32, U32, ) +OPCODE(SLessThanEqual32, U1, U32, U32, ) +OPCODE(SLessThanEqual64, U1, U64, U64, ) +OPCODE(ULessThanEqual32, U1, U32, U32, ) +OPCODE(ULessThanEqual64, U1, U64, U64, ) +OPCODE(SGreaterThan32, U1, U32, U32, ) +OPCODE(SGreaterThan64, U1, U64, U64, ) +OPCODE(UGreaterThan32, U1, U32, U32, ) +OPCODE(UGreaterThan64, U1, U64, U64, ) OPCODE(INotEqual32, U1, U32, U32, ) OPCODE(INotEqual64, U1, U64, U64, ) -OPCODE(SGreaterThanEqual, U1, U32, U32, ) -OPCODE(UGreaterThanEqual, U1, U32, U32, ) +OPCODE(SGreaterThanEqual32, U1, U32, U32, ) +OPCODE(SGreaterThanEqual64, U1, U64, U64, ) +OPCODE(UGreaterThanEqual32, U1, U32, U32, ) +OPCODE(UGreaterThanEqual64, U1, U64, U64, ) // Logical operations OPCODE(LogicalOr, U1, U1, U1, ) diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index 5c66b1115..2a39d3a2e 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -381,24 +381,42 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::ULessThan64: FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a < b; }); return; - case IR::Opcode::SLessThanEqual: + case IR::Opcode::SLessThanEqual32: FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; }); return; - case IR::Opcode::ULessThanEqual: + case IR::Opcode::SLessThanEqual64: + FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a <= b; }); + return; + case IR::Opcode::ULessThanEqual32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; }); return; - case IR::Opcode::SGreaterThan: + case IR::Opcode::ULessThanEqual64: + FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a <= b; }); + return; + case IR::Opcode::SGreaterThan32: FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; }); return; - case IR::Opcode::UGreaterThan: + case IR::Opcode::SGreaterThan64: + FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a > b; }); + return; + case IR::Opcode::UGreaterThan32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; }); return; - case IR::Opcode::SGreaterThanEqual: + case IR::Opcode::UGreaterThan64: + FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a > b; }); + return; + case IR::Opcode::SGreaterThanEqual32: FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; }); return; - case IR::Opcode::UGreaterThanEqual: + case IR::Opcode::SGreaterThanEqual64: + FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a >= b; }); + return; + case IR::Opcode::UGreaterThanEqual32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; }); return; + case IR::Opcode::UGreaterThanEqual64: + FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a >= b; }); + return; case IR::Opcode::IEqual32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; }); return;