ir_passes: Integrate DS barriers in block (#2020)

This commit is contained in:
TheTurtle 2025-01-02 22:52:10 +02:00 committed by GitHub
parent 67c531298a
commit dcc662ff1a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 52 additions and 26 deletions

View file

@ -8,6 +8,54 @@
namespace Shader::Optimization {
static void EmitBarrierInBlock(IR::Block* block) {
// This is inteded to insert a barrier when shared memory write and read
// occur in the same basic block. Also checks if branch depth is zero as
// we don't want to insert barrier in potentially divergent code.
bool emit_barrier_on_write = false;
bool emit_barrier_on_read = false;
const auto emit_barrier = [block](bool& emit_cond, IR::Inst& inst) {
if (emit_cond) {
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
ir.Barrier();
emit_cond = false;
}
};
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::LoadSharedU32 ||
inst.GetOpcode() == IR::Opcode::LoadSharedU64) {
emit_barrier(emit_barrier_on_read, inst);
emit_barrier_on_write = true;
}
if (inst.GetOpcode() == IR::Opcode::WriteSharedU32 ||
inst.GetOpcode() == IR::Opcode::WriteSharedU64) {
emit_barrier(emit_barrier_on_write, inst);
emit_barrier_on_read = true;
}
}
}
static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) {
// Insert a barrier after divergent conditional blocks.
// This avoids potential softlocks and crashes when some threads
// initialize shared memory and others read from it.
const IR::U1 cond = data.if_node.cond;
const auto insert_barrier =
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
return true;
}
return std::nullopt;
});
if (insert_barrier) {
IR::Block* const merge = data.if_node.merge;
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
IR::IREmitter ir{*merge, insert_point};
ir.Barrier();
}
}
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
if (!program.info.uses_shared || !profile.needs_lds_barriers) {
return;
@ -19,27 +67,12 @@ void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
--branch_depth;
continue;
}
if (node.type != Type::If) {
if (node.type == Type::If && branch_depth++ == 0) {
EmitBarrierInMergeBlock(node.data);
continue;
}
u32 curr_depth = branch_depth++;
if (curr_depth != 0) {
continue;
}
const IR::U1 cond = node.data.if_node.cond;
const auto insert_barrier =
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
return true;
}
return std::nullopt;
});
if (insert_barrier) {
IR::Block* const merge = node.data.if_node.merge;
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
IR::IREmitter ir{*merge, insert_point};
ir.Barrier();
if (node.type == Type::Block && branch_depth == 0) {
EmitBarrierInBlock(node.data.block);
}
}
}