ir: Add heuristic based LDS barrier pass (#1801)

* ir: Add heuristic based LDS barrier pass

* Attempts to insert barriers after zero-depth divergant conditional blocks in shaders that use shared memory

* lds_barriers: Limit to nvidia

* Intel has historically had problems with cs barriers, will debug other time
This commit is contained in:
TheTurtle 2024-12-19 10:18:28 +02:00 committed by GitHub
parent adf4b635f7
commit 188eebb92a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 55 additions and 0 deletions

View file

@ -6,6 +6,10 @@
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
namespace Shader {
struct Profile;
}
namespace Shader::Optimization {
void SsaRewritePass(IR::BlockList& program);
@ -21,5 +25,6 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile);
} // namespace Shader::Optimization

View file

@ -0,0 +1,46 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/breadth_first_search.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/profile.h"
namespace Shader::Optimization {
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
if (!program.info.uses_shared || !profile.needs_lds_barriers) {
return;
}
using Type = IR::AbstractSyntaxNode::Type;
u32 branch_depth{};
for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
if (node.type == Type::EndIf) {
--branch_depth;
continue;
}
if (node.type != Type::If) {
continue;
}
u32 curr_depth = branch_depth++;
if (curr_depth != 0) {
continue;
}
const IR::U1 cond = node.data.if_node.cond;
const auto insert_barrier = IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
return true;
}
return std::nullopt;
});
if (insert_barrier) {
IR::Block* const merge = node.data.if_node.merge;
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
IR::IREmitter ir{*merge, insert_point};
ir.Barrier();
}
}
}
} // namespace Shader::Optimization

View file

@ -27,6 +27,7 @@ struct Profile {
bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{};
bool needs_manual_interpolation{};
bool needs_lds_barriers{};
u64 min_ssbo_alignment{};
};

View file

@ -91,6 +91,7 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::DeadCodeEliminationPass(program);
Shader::Optimization::CollectShaderInfoPass(program);
Shader::Optimization::SharedMemoryBarrierPass(program, profile);
return program;
}