From b1f74660df317cd3b396353599608b0cb3b5375f Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Fri, 27 Dec 2024 06:46:07 -0800 Subject: [PATCH] shader_recompiler: Implement S_BCNT1_I32_B64 and S_FF1_I32_B64 (#1889) * shader_recompiler: Implement S_BCNT1_I32_B64 * shader_recompiler: Implement S_FF1_I32_B64 * shader_recompiler: Implement IEqual for 64-bit. * shader_recompiler: Fix immediate type in S_FF1_I32_B32 --- .../backend/spirv/emit_spirv_instructions.h | 5 +++- .../backend/spirv/emit_spirv_integer.cpp | 14 +++++++++- .../frontend/translate/scalar_alu.cpp | 17 ++++++++++++ .../frontend/translate/translate.h | 2 ++ src/shader_recompiler/ir/ir_emitter.cpp | 26 +++++++++++++++---- src/shader_recompiler/ir/ir_emitter.h | 4 +-- src/shader_recompiler/ir/opcodes.inc | 5 +++- .../ir/passes/constant_propagation_pass.cpp | 5 +++- .../ir/passes/resource_tracking_pass.cpp | 2 +- 9 files changed, 68 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 2f606eb45..85bed589b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -304,10 +304,12 @@ Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); Id EmitBitReverse32(EmitContext& ctx, Id value); Id EmitBitCount32(EmitContext& ctx, Id value); +Id EmitBitCount64(EmitContext& ctx, Id value); Id EmitBitwiseNot32(EmitContext& ctx, Id value); Id EmitFindSMsb32(EmitContext& ctx, Id value); Id EmitFindUMsb32(EmitContext& ctx, Id value); Id EmitFindILsb32(EmitContext& ctx, Id value); +Id EmitFindILsb64(EmitContext& ctx, Id value); Id EmitSMin32(EmitContext& ctx, Id a, Id b); Id EmitUMin32(EmitContext& ctx, Id a, Id b); Id EmitSMax32(EmitContext& ctx, Id a, Id b); @@ -318,7 +320,8 @@ Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs); Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs); Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 02af92385..def1f816e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -201,6 +201,10 @@ Id EmitBitCount32(EmitContext& ctx, Id value) { return ctx.OpBitCount(ctx.U32[1], value); } +Id EmitBitCount64(EmitContext& ctx, Id value) { + return ctx.OpBitCount(ctx.U64, value); +} + Id EmitBitwiseNot32(EmitContext& ctx, Id value) { return ctx.OpNot(ctx.U32[1], value); } @@ -217,6 +221,10 @@ Id EmitFindILsb32(EmitContext& ctx, Id value) { return ctx.OpFindILsb(ctx.U32[1], value); } +Id EmitFindILsb64(EmitContext& ctx, Id value) { + return ctx.OpFindILsb(ctx.U64, value); +} + Id EmitSMin32(EmitContext& ctx, Id a, Id b) { return ctx.OpSMin(ctx.U32[1], a, b); } @@ -277,7 +285,11 @@ Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpULessThan(ctx.U1[1], lhs, rhs); } -Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpIEqual(ctx.U1[1], lhs, rhs); +} + +Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpIEqual(ctx.U1[1], lhs, rhs); } diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index f96fd0f40..3a2b01a90 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -100,8 +100,12 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_BREV_B32(inst); case Opcode::S_BCNT1_I32_B32: return S_BCNT1_I32_B32(inst); + case Opcode::S_BCNT1_I32_B64: + return S_BCNT1_I32_B64(inst); case Opcode::S_FF1_I32_B32: return S_FF1_I32_B32(inst); + case Opcode::S_FF1_I32_B64: + return S_FF1_I32_B64(inst); case Opcode::S_AND_SAVEEXEC_B64: return S_SAVEEXEC_B64(NegateMode::None, false, inst); case Opcode::S_ORN2_SAVEEXEC_B64: @@ -585,12 +589,25 @@ void Translator::S_BCNT1_I32_B32(const GcnInst& inst) { ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } +void Translator::S_BCNT1_I32_B64(const GcnInst& inst) { + const IR::U32 result = ir.BitCount(GetSrc64(inst.src[0])); + SetDst(inst.dst[0], result); + ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +} + void Translator::S_FF1_I32_B32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 result{ir.Select(ir.IEqual(src0, ir.Imm32(0U)), ir.Imm32(-1), ir.FindILsb(src0))}; SetDst(inst.dst[0], result); } +void Translator::S_FF1_I32_B64(const GcnInst& inst) { + const IR::U64 src0{GetSrc64(inst.src[0])}; + const IR::U32 result{ + ir.Select(ir.IEqual(src0, ir.Imm64(u64(0))), ir.Imm32(-1), ir.FindILsb(src0))}; + SetDst(inst.dst[0], result); +} + void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst) { // This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs) // However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index fd4d8d86a..e8584ec2f 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -111,7 +111,9 @@ public: void S_NOT_B64(const GcnInst& inst); void S_BREV_B32(const GcnInst& inst); void S_BCNT1_I32_B32(const GcnInst& inst); + void S_BCNT1_I32_B64(const GcnInst& inst); void S_FF1_I32_B32(const GcnInst& inst); + void S_FF1_I32_B64(const GcnInst& inst); void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); void S_ABS_I32(const GcnInst& inst); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index c241ec984..c9d97679f 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1273,8 +1273,15 @@ U32 IREmitter::BitReverse(const U32& value) { return Inst(Opcode::BitReverse32, value); } -U32 IREmitter::BitCount(const U32& value) { - return Inst(Opcode::BitCount32, value); +U32 IREmitter::BitCount(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::BitCount32, value); + case Type::U64: + return Inst(Opcode::BitCount64, value); + default: + ThrowInvalidType(value.Type()); + } } U32 IREmitter::BitwiseNot(const U32& value) { @@ -1289,8 +1296,15 @@ U32 IREmitter::FindUMsb(const U32& value) { return Inst(Opcode::FindUMsb32, value); } -U32 IREmitter::FindILsb(const U32& value) { - return Inst(Opcode::FindILsb32, value); +U32 IREmitter::FindILsb(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::FindILsb32, value); + case Type::U64: + return Inst(Opcode::FindILsb64, value); + default: + ThrowInvalidType(value.Type()); + } } U32 IREmitter::SMin(const U32& a, const U32& b) { @@ -1345,7 +1359,9 @@ U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) { } switch (lhs.Type()) { case Type::U32: - return Inst(Opcode::IEqual, lhs, rhs); + return Inst(Opcode::IEqual32, lhs, rhs); + case Type::U64: + return Inst(Opcode::IEqual64, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 4cf44107e..4679a0133 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -229,12 +229,12 @@ public: [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, bool is_signed = false); [[nodiscard]] U32 BitReverse(const U32& value); - [[nodiscard]] U32 BitCount(const U32& value); + [[nodiscard]] U32 BitCount(const U32U64& value); [[nodiscard]] U32 BitwiseNot(const U32& value); [[nodiscard]] U32 FindSMsb(const U32& value); [[nodiscard]] U32 FindUMsb(const U32& value); - [[nodiscard]] U32 FindILsb(const U32& value); + [[nodiscard]] U32 FindILsb(const U32U64& value); [[nodiscard]] U32 SMin(const U32& a, const U32& b); [[nodiscard]] U32 UMin(const U32& a, const U32& b); [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index aafd43ea8..cf2c3b67e 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -284,11 +284,13 @@ OPCODE(BitFieldSExtract, U32, U32, OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) OPCODE(BitReverse32, U32, U32, ) OPCODE(BitCount32, U32, U32, ) +OPCODE(BitCount64, U32, U64, ) OPCODE(BitwiseNot32, U32, U32, ) OPCODE(FindSMsb32, U32, U32, ) OPCODE(FindUMsb32, U32, U32, ) OPCODE(FindILsb32, U32, U32, ) +OPCODE(FindILsb64, U32, U64, ) OPCODE(SMin32, U32, U32, U32, ) OPCODE(UMin32, U32, U32, U32, ) OPCODE(SMax32, U32, U32, U32, ) @@ -299,7 +301,8 @@ OPCODE(SLessThan32, U1, U32, OPCODE(SLessThan64, U1, U64, U64, ) OPCODE(ULessThan32, U1, U32, U32, ) OPCODE(ULessThan64, U1, U64, U64, ) -OPCODE(IEqual, U1, U32, U32, ) +OPCODE(IEqual32, U1, U32, U32, ) +OPCODE(IEqual64, U1, U64, U64, ) OPCODE(SLessThanEqual, U1, U32, U32, ) OPCODE(ULessThanEqual, U1, U32, U32, ) OPCODE(SGreaterThan, U1, U32, U32, ) diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index 16b07e1a1..fcf2f7d9f 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -391,9 +391,12 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::UGreaterThanEqual: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; }); return; - case IR::Opcode::IEqual: + case IR::Opcode::IEqual32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; }); return; + case IR::Opcode::IEqual64: + FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a == b; }); + return; case IR::Opcode::INotEqual: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; }); return; diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index db1a2edd2..e6d23bfe7 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -249,7 +249,7 @@ std::pair TryDisableAnisoLod0(const IR::Inst* inst) { // Select should be based on zero check const auto* prod0 = inst->Arg(0).InstRecursive(); - if (prod0->GetOpcode() != IR::Opcode::IEqual || + if (prod0->GetOpcode() != IR::Opcode::IEqual32 || !(prod0->Arg(1).IsImmediate() && prod0->Arg(1).U32() == 0u)) { return not_found; }