shader_recompiler: Implement ff1 with subgroup ops (#3225)

This commit is contained in:
TheTurtle 2025-07-10 21:52:56 +03:00 committed by GitHub
parent 88abb93669
commit 8bc30270c8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 27 additions and 14 deletions

2
externals/sirit vendored

@ -1 +1 @@
Subproject commit 6b450704f6fedb9413d0c89a9eb59d028eb1e6c0 Subproject commit b4eccb336f1b1169af48dac1e04015985af86e3e

View file

@ -531,6 +531,8 @@ Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
Id EmitReadFirstLane(EmitContext& ctx, Id value); Id EmitReadFirstLane(EmitContext& ctx, Id value);
Id EmitReadLane(EmitContext& ctx, Id value, Id lane); Id EmitReadLane(EmitContext& ctx, Id value, Id lane);
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane); Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane);
Id EmitBallot(EmitContext& ctx, Id bit);
Id EmitBallotFindLsb(EmitContext& ctx, Id mask);
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding); Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding);
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding); Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding);

View file

@ -34,4 +34,12 @@ Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane) {
return ctx.u32_zero_value; return ctx.u32_zero_value;
} }
Id EmitBallot(EmitContext& ctx, Id bit) {
return ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), bit);
}
Id EmitBallotFindLsb(EmitContext& ctx, Id mask) {
return ctx.OpGroupNonUniformBallotFindLSB(ctx.U32[1], SubgroupScope(ctx), mask);
}
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View file

@ -680,8 +680,9 @@ void Translator::S_FF1_I32_B32(const GcnInst& inst) {
} }
void Translator::S_FF1_I32_B64(const GcnInst& inst) { void Translator::S_FF1_I32_B64(const GcnInst& inst) {
const IR::U64 src0{GetSrc64(inst.src[0])}; ASSERT(inst.src[0].field == OperandField::ScalarGPR);
const IR::U32 result{ir.FindILsb(src0)}; const IR::U32 result{
ir.BallotFindLsb(ir.Ballot(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code))))};
SetDst(inst.dst[0], result); SetDst(inst.dst[0], result);
} }

View file

@ -660,6 +660,14 @@ U32 IREmitter::WriteLane(const U32& value, const U32& write_value, const U32& la
return Inst<U32>(Opcode::WriteLane, value, write_value, lane); return Inst<U32>(Opcode::WriteLane, value, write_value, lane);
} }
Value IREmitter::Ballot(const U1& bit) {
return Inst(Opcode::Ballot, bit);
}
U32 IREmitter::BallotFindLsb(const Value& mask) {
return Inst<U32>(Opcode::BallotFindLsb, mask);
}
F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) { F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
if (a.Type() != b.Type()) { if (a.Type() != b.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());

View file

@ -176,6 +176,8 @@ public:
[[nodiscard]] U32 ReadFirstLane(const U32& value); [[nodiscard]] U32 ReadFirstLane(const U32& value);
[[nodiscard]] U32 ReadLane(const U32& value, const U32& lane); [[nodiscard]] U32 ReadLane(const U32& value, const U32& lane);
[[nodiscard]] U32 WriteLane(const U32& value, const U32& write_value, const U32& lane); [[nodiscard]] U32 WriteLane(const U32& value, const U32& write_value, const U32& lane);
[[nodiscard]] Value Ballot(const U1& bit);
[[nodiscard]] U32 BallotFindLsb(const Value& mask);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);

View file

@ -472,5 +472,7 @@ OPCODE(QuadShuffle, U32, U32,
OPCODE(ReadFirstLane, U32, U32, ) OPCODE(ReadFirstLane, U32, U32, )
OPCODE(ReadLane, U32, U32, U32 ) OPCODE(ReadLane, U32, U32, U32 )
OPCODE(WriteLane, U32, U32, U32, U32 ) OPCODE(WriteLane, U32, U32, U32, U32 )
OPCODE(Ballot, U32x4, U1, )
OPCODE(BallotFindLsb, U32, U32x4, )
OPCODE(DataAppend, U32, U32, U32 ) OPCODE(DataAppend, U32, U32, U32 )
OPCODE(DataConsume, U32, U32, U32 ) OPCODE(DataConsume, U32, U32, U32 )

View file

@ -95,17 +95,7 @@ void ReadLaneEliminationPass(IR::Program& program) {
if (inst.GetOpcode() != IR::Opcode::ReadLane) { if (inst.GetOpcode() != IR::Opcode::ReadLane) {
continue; continue;
} }
if (!inst.Arg(1).IsImmediate()) {
// Check for the following pattern and replace it with ReadFirstLane
// s_ff1_i32_b64 sgpr, exec
// v_readlane_b32 sdst, vgpr, sgpr
if (const auto lane = inst.Arg(1); !lane.IsImmediate()) {
if (lane.InstRecursive()->GetOpcode() == IR::Opcode::FindILsb64) {
const auto value = inst.Arg(0);
inst.ReplaceOpcode(IR::Opcode::ReadFirstLane);
inst.ClearArgs();
inst.SetArg(0, value);
}
continue; continue;
} }