From 8bc30270c853635885fffdca9f2ef757ea7ef484 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Thu, 10 Jul 2025 21:52:56 +0300 Subject: [PATCH] shader_recompiler: Implement ff1 with subgroup ops (#3225) --- externals/sirit | 2 +- .../backend/spirv/emit_spirv_instructions.h | 2 ++ .../backend/spirv/emit_spirv_warp.cpp | 8 ++++++++ .../frontend/translate/scalar_alu.cpp | 5 +++-- src/shader_recompiler/ir/ir_emitter.cpp | 8 ++++++++ src/shader_recompiler/ir/ir_emitter.h | 2 ++ src/shader_recompiler/ir/opcodes.inc | 2 ++ .../ir/passes/readlane_elimination_pass.cpp | 12 +----------- 8 files changed, 27 insertions(+), 14 deletions(-) diff --git a/externals/sirit b/externals/sirit index 6b450704f..b4eccb336 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 6b450704f6fedb9413d0c89a9eb59d028eb1e6c0 +Subproject commit b4eccb336f1b1169af48dac1e04015985af86e3e diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index f3dd9b2ea..74c94754d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -531,6 +531,8 @@ Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); Id EmitReadFirstLane(EmitContext& ctx, Id value); Id EmitReadLane(EmitContext& ctx, Id value, Id lane); Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane); +Id EmitBallot(EmitContext& ctx, Id bit); +Id EmitBallotFindLsb(EmitContext& ctx, Id mask); Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding); Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 20fb83fa6..951c76001 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -34,4 +34,12 @@ Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane) { return ctx.u32_zero_value; } +Id EmitBallot(EmitContext& ctx, Id bit) { + return ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), bit); +} + +Id EmitBallotFindLsb(EmitContext& ctx, Id mask) { + return ctx.OpGroupNonUniformBallotFindLSB(ctx.U32[1], SubgroupScope(ctx), mask); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 276b55567..e3134c300 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -680,8 +680,9 @@ void Translator::S_FF1_I32_B32(const GcnInst& inst) { } void Translator::S_FF1_I32_B64(const GcnInst& inst) { - const IR::U64 src0{GetSrc64(inst.src[0])}; - const IR::U32 result{ir.FindILsb(src0)}; + ASSERT(inst.src[0].field == OperandField::ScalarGPR); + const IR::U32 result{ + ir.BallotFindLsb(ir.Ballot(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code))))}; SetDst(inst.dst[0], result); } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index b88e1a17d..4997145d7 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -660,6 +660,14 @@ U32 IREmitter::WriteLane(const U32& value, const U32& write_value, const U32& la return Inst(Opcode::WriteLane, value, write_value, lane); } +Value IREmitter::Ballot(const U1& bit) { + return Inst(Opcode::Ballot, bit); +} + +U32 IREmitter::BallotFindLsb(const Value& mask) { + return Inst(Opcode::BallotFindLsb, mask); +} + F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) { if (a.Type() != b.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index d9e5aab7a..6055df565 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -176,6 +176,8 @@ public: [[nodiscard]] U32 ReadFirstLane(const U32& value); [[nodiscard]] U32 ReadLane(const U32& value, const U32& lane); [[nodiscard]] U32 WriteLane(const U32& value, const U32& write_value, const U32& lane); + [[nodiscard]] Value Ballot(const U1& bit); + [[nodiscard]] U32 BallotFindLsb(const Value& mask); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 08dcec458..747a27e35 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -472,5 +472,7 @@ OPCODE(QuadShuffle, U32, U32, OPCODE(ReadFirstLane, U32, U32, ) OPCODE(ReadLane, U32, U32, U32 ) OPCODE(WriteLane, U32, U32, U32, U32 ) +OPCODE(Ballot, U32x4, U1, ) +OPCODE(BallotFindLsb, U32, U32x4, ) OPCODE(DataAppend, U32, U32, U32 ) OPCODE(DataConsume, U32, U32, U32 ) diff --git a/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp b/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp index 3378d785f..d6586bda0 100644 --- a/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp +++ b/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp @@ -95,17 +95,7 @@ void ReadLaneEliminationPass(IR::Program& program) { if (inst.GetOpcode() != IR::Opcode::ReadLane) { continue; } - - // Check for the following pattern and replace it with ReadFirstLane - // s_ff1_i32_b64 sgpr, exec - // v_readlane_b32 sdst, vgpr, sgpr - if (const auto lane = inst.Arg(1); !lane.IsImmediate()) { - if (lane.InstRecursive()->GetOpcode() == IR::Opcode::FindILsb64) { - const auto value = inst.Arg(0); - inst.ReplaceOpcode(IR::Opcode::ReadFirstLane); - inst.ClearArgs(); - inst.SetArg(0, value); - } + if (!inst.Arg(1).IsImmediate()) { continue; }