mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-14 08:12:16 +00:00
* basic impl * minor improvements * clang * more clang * improvements requested by squidbus
430 lines
17 KiB
C++
430 lines
17 KiB
C++
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include <algorithm>
|
|
#include <unordered_map>
|
|
#include "common/assert.h"
|
|
#include "common/logging/log.h"
|
|
#include "shader_recompiler/frontend/control_flow_graph.h"
|
|
|
|
namespace Shader::Gcn {
|
|
|
|
struct Compare {
|
|
bool operator()(const Block& lhs, u32 rhs) const noexcept {
|
|
return lhs.begin < rhs;
|
|
}
|
|
|
|
bool operator()(u32 lhs, const Block& rhs) const noexcept {
|
|
return lhs < rhs.begin;
|
|
}
|
|
|
|
bool operator()(const Block& lhs, const Block& rhs) const noexcept {
|
|
return lhs.begin < rhs.begin;
|
|
}
|
|
};
|
|
|
|
static IR::Condition MakeCondition(const GcnInst& inst) {
|
|
if (inst.IsCmpx()) {
|
|
return IR::Condition::Execnz;
|
|
}
|
|
|
|
switch (inst.opcode) {
|
|
case Opcode::S_CBRANCH_SCC0:
|
|
return IR::Condition::Scc0;
|
|
case Opcode::S_CBRANCH_SCC1:
|
|
return IR::Condition::Scc1;
|
|
case Opcode::S_CBRANCH_VCCZ:
|
|
return IR::Condition::Vccz;
|
|
case Opcode::S_CBRANCH_VCCNZ:
|
|
return IR::Condition::Vccnz;
|
|
case Opcode::S_CBRANCH_EXECZ:
|
|
return IR::Condition::Execz;
|
|
case Opcode::S_CBRANCH_EXECNZ:
|
|
return IR::Condition::Execnz;
|
|
default:
|
|
return IR::Condition::True;
|
|
}
|
|
}
|
|
|
|
static bool IgnoresExecMask(const GcnInst& inst) {
|
|
// EXEC mask does not affect scalar instructions or branches.
|
|
switch (inst.category) {
|
|
case InstCategory::ScalarALU:
|
|
case InstCategory::ScalarMemory:
|
|
case InstCategory::FlowControl:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
// Read/Write Lane instructions are not affected either.
|
|
switch (inst.opcode) {
|
|
case Opcode::V_READLANE_B32:
|
|
case Opcode::V_WRITELANE_B32:
|
|
case Opcode::V_READFIRSTLANE_B32:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static std::optional<u32> ResolveSetPcTarget(std::span<const GcnInst> list, u32 setpc_index,
|
|
std::span<const u32> pc_map) {
|
|
if (setpc_index < 3) {
|
|
return std::nullopt;
|
|
}
|
|
|
|
const auto& getpc = list[setpc_index - 3];
|
|
const auto& arith = list[setpc_index - 2];
|
|
const auto& setpc = list[setpc_index];
|
|
|
|
if (getpc.opcode != Opcode::S_GETPC_B64 ||
|
|
!(arith.opcode == Opcode::S_ADD_U32 || arith.opcode == Opcode::S_SUB_U32) ||
|
|
setpc.opcode != Opcode::S_SETPC_B64)
|
|
return std::nullopt;
|
|
|
|
if (getpc.dst[0].code != setpc.src[0].code || arith.dst[0].code != setpc.src[0].code)
|
|
return std::nullopt;
|
|
|
|
if (arith.src_count < 2 || arith.src[1].field != OperandField::LiteralConst)
|
|
return std::nullopt;
|
|
|
|
const u32 imm = arith.src[1].code;
|
|
|
|
const s32 signed_offset =
|
|
(arith.opcode == Opcode::S_ADD_U32) ? static_cast<s32>(imm) : -static_cast<s32>(imm);
|
|
|
|
const u32 base_pc = pc_map[setpc_index - 3] + getpc.length;
|
|
|
|
const u32 result_pc = static_cast<u32>(static_cast<s32>(base_pc) + signed_offset);
|
|
LOG_DEBUG(Render_Recompiler, "SetPC target: {} + {} = {}", base_pc, signed_offset, result_pc);
|
|
return result_pc & ~0x3u;
|
|
}
|
|
|
|
static constexpr size_t LabelReserveSize = 32;
|
|
|
|
CFG::CFG(Common::ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
|
|
: block_pool{block_pool_}, inst_list{inst_list_} {
|
|
index_to_pc.resize(inst_list.size() + 1);
|
|
labels.reserve(LabelReserveSize);
|
|
EmitLabels();
|
|
EmitBlocks();
|
|
LinkBlocks();
|
|
SplitDivergenceScopes();
|
|
}
|
|
|
|
void CFG::EmitLabels() {
|
|
// Always set a label at entry point.
|
|
u32 pc = 0;
|
|
AddLabel(pc);
|
|
|
|
// Iterate instruction list and add labels to branch targets.
|
|
for (u32 i = 0; i < inst_list.size(); i++) {
|
|
index_to_pc[i] = pc;
|
|
const GcnInst inst = inst_list[i];
|
|
if (inst.IsUnconditionalBranch()) {
|
|
u32 target = inst.BranchTarget(pc);
|
|
if (inst.opcode == Opcode::S_SETPC_B64) {
|
|
if (auto t = ResolveSetPcTarget(inst_list, i, index_to_pc)) {
|
|
target = *t;
|
|
} else {
|
|
ASSERT_MSG(
|
|
false,
|
|
"S_SETPC_B64 without a resolvable offset at PC {:#x} (Index {}): Involved "
|
|
"instructions not recognized or invalid pattern",
|
|
pc, i);
|
|
}
|
|
}
|
|
AddLabel(target);
|
|
// Emit this label so that the block ends with the branching instruction
|
|
AddLabel(pc + inst.length);
|
|
} else if (inst.IsConditionalBranch()) {
|
|
const u32 true_label = inst.BranchTarget(pc);
|
|
const u32 false_label = pc + inst.length;
|
|
AddLabel(true_label);
|
|
AddLabel(false_label);
|
|
} else if (inst.opcode == Opcode::S_ENDPGM) {
|
|
const u32 next_label = pc + inst.length;
|
|
AddLabel(next_label);
|
|
}
|
|
|
|
pc += inst.length;
|
|
}
|
|
index_to_pc[inst_list.size()] = pc;
|
|
|
|
// Sort labels to make sure block insertion is correct.
|
|
std::ranges::sort(labels);
|
|
}
|
|
|
|
void CFG::SplitDivergenceScopes() {
|
|
const auto is_open_scope = [](const GcnInst& inst) {
|
|
// An open scope instruction is an instruction that modifies EXEC
|
|
// but also saves the previous value to restore later. This indicates
|
|
// we are entering a scope.
|
|
return inst.opcode == Opcode::S_AND_SAVEEXEC_B64 ||
|
|
// While this instruction does not save EXEC it is often used paired
|
|
// with SAVEEXEC to mask the threads that didn't pass the condition
|
|
// of initial branch.
|
|
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) ||
|
|
inst.IsCmpx();
|
|
};
|
|
const auto is_close_scope = [](const GcnInst& inst) {
|
|
// Closing an EXEC scope can be either a branch instruction
|
|
// (typical case when S_AND_SAVEEXEC_B64 is right before a branch)
|
|
// or by a move instruction to EXEC that restores the backup.
|
|
return (inst.opcode == Opcode::S_MOV_B64 && inst.dst[0].field == OperandField::ExecLo) ||
|
|
// Sometimes compiler might insert instructions between the SAVEEXEC and the branch.
|
|
// Those instructions need to be wrapped in the condition as well so allow branch
|
|
// as end scope instruction.
|
|
inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ENDPGM ||
|
|
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo);
|
|
};
|
|
|
|
for (auto blk = blocks.begin(); blk != blocks.end(); blk++) {
|
|
auto next_blk = std::next(blk);
|
|
s32 curr_begin = -1;
|
|
for (size_t index = blk->begin_index; index <= blk->end_index; index++) {
|
|
const auto& inst = inst_list[index];
|
|
const bool is_close = is_close_scope(inst);
|
|
if ((is_close || index == blk->end_index) && curr_begin != -1) {
|
|
// If there are no instructions inside scope don't do anything.
|
|
if (index - curr_begin == 1) {
|
|
curr_begin = -1;
|
|
continue;
|
|
}
|
|
// If all instructions in the scope ignore exec masking, we shouldn't insert a
|
|
// scope.
|
|
const auto start = inst_list.begin() + curr_begin + 1;
|
|
if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask)) {
|
|
// Determine the first instruction affected by the exec mask.
|
|
do {
|
|
++curr_begin;
|
|
} while (IgnoresExecMask(inst_list[curr_begin]));
|
|
|
|
// Determine the last instruction affected by the exec mask.
|
|
s32 curr_end = index;
|
|
while (IgnoresExecMask(inst_list[curr_end])) {
|
|
--curr_end;
|
|
}
|
|
|
|
// Create a new block for the divergence scope.
|
|
Block* block = block_pool.Create();
|
|
block->begin = index_to_pc[curr_begin];
|
|
block->end = index_to_pc[curr_end];
|
|
block->begin_index = curr_begin;
|
|
block->end_index = curr_end;
|
|
block->end_inst = inst_list[curr_end];
|
|
blocks.insert_before(next_blk, *block);
|
|
|
|
// If we are inside the parent block, make an epilogue block and jump to it.
|
|
if (curr_end != blk->end_index) {
|
|
Block* epi_block = block_pool.Create();
|
|
epi_block->begin = index_to_pc[curr_end + 1];
|
|
epi_block->end = blk->end;
|
|
epi_block->begin_index = curr_end + 1;
|
|
epi_block->end_index = blk->end_index;
|
|
epi_block->end_inst = blk->end_inst;
|
|
epi_block->cond = blk->cond;
|
|
epi_block->end_class = blk->end_class;
|
|
epi_block->branch_true = blk->branch_true;
|
|
epi_block->branch_false = blk->branch_false;
|
|
blocks.insert_before(next_blk, *epi_block);
|
|
|
|
// Have divergence block always jump to epilogue block.
|
|
block->cond = IR::Condition::True;
|
|
block->branch_true = epi_block;
|
|
block->branch_false = nullptr;
|
|
|
|
// If the parent block fails to enter divergence block make it jump to
|
|
// epilogue too
|
|
blk->branch_false = epi_block;
|
|
} else {
|
|
// No epilogue block is needed since the divergence block
|
|
// also ends the parent block. Inherit the end condition.
|
|
auto& parent_blk = *blk;
|
|
ASSERT(blk->cond == IR::Condition::True && blk->branch_true);
|
|
block->cond = IR::Condition::True;
|
|
block->branch_true = blk->branch_true;
|
|
block->branch_false = nullptr;
|
|
|
|
// If the parent block didn't enter the divergence scope
|
|
// have it jump directly to the next one
|
|
blk->branch_false = blk->branch_true;
|
|
}
|
|
|
|
// Shrink parent block to end right before curr_begin
|
|
// and make it jump to divergence block
|
|
--curr_begin;
|
|
blk->end = index_to_pc[curr_begin];
|
|
blk->end_index = curr_begin;
|
|
blk->end_inst = inst_list[curr_begin];
|
|
blk->cond = IR::Condition::Execnz;
|
|
blk->end_class = EndClass::Branch;
|
|
blk->branch_true = block;
|
|
}
|
|
// Reset scope begin.
|
|
curr_begin = -1;
|
|
}
|
|
// Mark a potential start of an exec scope.
|
|
if (is_open_scope(inst)) {
|
|
curr_begin = index;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void CFG::EmitBlocks() {
|
|
for (auto it = labels.cbegin(); it != labels.cend(); ++it) {
|
|
const Label start = *it;
|
|
const auto next_it = std::next(it);
|
|
const bool is_last = (next_it == labels.cend());
|
|
if (is_last) {
|
|
// Last label is special.
|
|
return;
|
|
}
|
|
// The end label is the start instruction of next block.
|
|
// The end instruction of this block is the previous one.
|
|
const Label end = *next_it;
|
|
const size_t end_index = GetIndex(end) - 1;
|
|
const auto& end_inst = inst_list[end_index];
|
|
|
|
// Insert block between the labels using the last instruction
|
|
// as an indicator for branching type.
|
|
Block* block = block_pool.Create();
|
|
block->begin = start;
|
|
block->end = end;
|
|
block->begin_index = GetIndex(start);
|
|
block->end_index = end_index;
|
|
block->end_inst = end_inst;
|
|
block->cond = MakeCondition(end_inst);
|
|
blocks.insert(*block);
|
|
}
|
|
}
|
|
|
|
void CFG::LinkBlocks() {
|
|
const auto get_block = [this](u32 address) {
|
|
auto it = blocks.find(address, Compare{});
|
|
ASSERT_MSG(it != blocks.cend() && it->begin == address);
|
|
return &*it;
|
|
};
|
|
|
|
for (auto it = blocks.begin(); it != blocks.end(); it++) {
|
|
auto& block = *it;
|
|
const auto end_inst{block.end_inst};
|
|
|
|
// If the block doesn't end with a branch we simply
|
|
// need to link with the next block.
|
|
if (!end_inst.IsTerminateInstruction()) {
|
|
auto* next_block = get_block(block.end);
|
|
++next_block->num_predecessors;
|
|
|
|
block.branch_true = next_block;
|
|
block.end_class = EndClass::Branch;
|
|
continue;
|
|
}
|
|
|
|
// Find the branch targets from the instruction and link the blocks.
|
|
// Note: Block end address is one instruction after end_inst.
|
|
const u32 branch_pc = block.end - end_inst.length;
|
|
u32 target_pc = 0;
|
|
if (end_inst.opcode == Opcode::S_SETPC_B64) {
|
|
auto tgt = ResolveSetPcTarget(inst_list, block.end_index, index_to_pc);
|
|
ASSERT_MSG(tgt,
|
|
"S_SETPC_B64 without a resolvable offset at PC {:#x} (Index {}): Involved "
|
|
"instructions not recognized or invalid pattern",
|
|
branch_pc, block.end_index);
|
|
target_pc = *tgt;
|
|
} else {
|
|
target_pc = end_inst.BranchTarget(branch_pc);
|
|
}
|
|
|
|
if (end_inst.IsUnconditionalBranch()) {
|
|
auto* target_block = get_block(target_pc);
|
|
++target_block->num_predecessors;
|
|
|
|
block.branch_true = target_block;
|
|
block.end_class = EndClass::Branch;
|
|
} else if (end_inst.IsConditionalBranch()) {
|
|
auto* target_block = get_block(target_pc);
|
|
++target_block->num_predecessors;
|
|
|
|
auto* end_block = get_block(block.end);
|
|
++end_block->num_predecessors;
|
|
|
|
block.branch_true = target_block;
|
|
block.branch_false = end_block;
|
|
block.end_class = EndClass::Branch;
|
|
} else if (end_inst.opcode == Opcode::S_ENDPGM) {
|
|
const auto& prev_inst = inst_list[block.end_index - 1];
|
|
if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0) {
|
|
if (prev_inst.control.exp.target != 9) {
|
|
block.end_class = EndClass::Kill;
|
|
} else if (const auto& exec_mask = inst_list[block.end_index - 2];
|
|
exec_mask.src[0].field == OperandField::ConstZero) {
|
|
block.end_class = EndClass::Kill;
|
|
} else {
|
|
block.end_class = EndClass::Exit;
|
|
}
|
|
} else {
|
|
block.end_class = EndClass::Exit;
|
|
}
|
|
} else {
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
}
|
|
|
|
std::string CFG::Dot() const {
|
|
int node_uid{0};
|
|
|
|
const auto name_of = [](const Block& block) { return fmt::format("\"{:#x}\"", block.begin); };
|
|
|
|
std::string dot{"digraph shader {\n"};
|
|
dot += fmt::format("\tsubgraph cluster_{} {{\n", 0);
|
|
dot += fmt::format("\t\tnode [style=filled];\n");
|
|
for (const Block& block : blocks) {
|
|
const std::string name{name_of(block)};
|
|
const auto add_branch = [&](Block* branch, bool add_label) {
|
|
dot += fmt::format("\t\t{}->{}", name, name_of(*branch));
|
|
if (add_label && block.cond != IR::Condition::True &&
|
|
block.cond != IR::Condition::False) {
|
|
dot += fmt::format(" [label=\"{}\"]", block.cond);
|
|
}
|
|
dot += '\n';
|
|
};
|
|
dot += fmt::format("\t\t{};\n", name);
|
|
switch (block.end_class) {
|
|
case EndClass::Branch:
|
|
if (block.cond != IR::Condition::False) {
|
|
add_branch(block.branch_true, true);
|
|
}
|
|
if (block.cond != IR::Condition::True) {
|
|
add_branch(block.branch_false, false);
|
|
}
|
|
break;
|
|
case EndClass::Exit:
|
|
dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
|
|
dot +=
|
|
fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", node_uid);
|
|
++node_uid;
|
|
break;
|
|
case EndClass::Kill:
|
|
dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
|
|
dot +=
|
|
fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", node_uid);
|
|
++node_uid;
|
|
break;
|
|
}
|
|
}
|
|
dot += "\t\tlabel = \"main\";\n\t}\n";
|
|
if (blocks.empty()) {
|
|
dot += "Start;\n";
|
|
} else {
|
|
dot += fmt::format("\tStart -> {};\n", name_of(*blocks.begin()));
|
|
}
|
|
dot += fmt::format("\tStart [shape=diamond];\n");
|
|
dot += "}\n";
|
|
return dot;
|
|
}
|
|
|
|
} // namespace Shader::Gcn
|