texture_cache: Implement color<->depth copies (#3079)

* texture_cache: Implement color to depth copies and vise versa

* ir_passes: Adjust shared memory barrier pass to cover more cases

* texture_cache: Remove unused code

* review comment
This commit is contained in:
TheTurtle 2025-06-11 11:34:37 +03:00 committed by GitHub
parent fc4fd0107d
commit dedf6de2ac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 157 additions and 54 deletions

View file

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <unordered_set>
#include "shader_recompiler/ir/breadth_first_search.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/program.h"
@ -51,11 +52,14 @@ static void EmitBarrierInBlock(IR::Block* block) {
}
}
using NodeSet = std::unordered_set<const IR::Block*>;
// Inserts a barrier after divergent conditional blocks to avoid undefined
// behavior when some threads write and others read from shared memory.
static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) {
static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data,
NodeSet& divergence_end, u32& divergence_depth) {
const IR::U1 cond = data.if_node.cond;
const auto insert_barrier =
const auto is_divergent_cond =
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
@ -63,11 +67,15 @@ static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) {
}
return std::nullopt;
});
if (insert_barrier) {
IR::Block* const merge = data.if_node.merge;
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
IR::IREmitter ir{*merge, insert_point};
ir.Barrier();
if (is_divergent_cond) {
if (divergence_depth == 0) {
IR::Block* const merge = data.if_node.merge;
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
IR::IREmitter ir{*merge, insert_point};
ir.Barrier();
}
++divergence_depth;
divergence_end.emplace(data.if_node.merge);
}
}
@ -89,19 +97,22 @@ void SharedMemoryBarrierPass(IR::Program& program, const RuntimeInfo& runtime_in
return;
}
using Type = IR::AbstractSyntaxNode::Type;
u32 branch_depth{};
u32 divergence_depth{};
NodeSet divergence_end;
for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
if (node.type == Type::EndIf) {
--branch_depth;
if (divergence_end.contains(node.data.end_if.merge)) {
--divergence_depth;
}
continue;
}
// Check if branch depth is zero, we don't want to insert barrier in potentially divergent
// code.
if (node.type == Type::If && branch_depth++ == 0) {
EmitBarrierInMergeBlock(node.data);
if (node.type == Type::If) {
EmitBarrierInMergeBlock(node.data, divergence_end, divergence_depth);
continue;
}
if (node.type == Type::Block && branch_depth == 0) {
if (node.type == Type::Block && divergence_depth == 0) {
EmitBarrierInBlock(node.data.block);
}
}