// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include #include #include "common/assert.h" #include "common/logging/log.h" #include "shader_recompiler/frontend/control_flow_graph.h" namespace Shader::Gcn { struct Compare { bool operator()(const Block& lhs, u32 rhs) const noexcept { return lhs.begin < rhs; } bool operator()(u32 lhs, const Block& rhs) const noexcept { return lhs < rhs.begin; } bool operator()(const Block& lhs, const Block& rhs) const noexcept { return lhs.begin < rhs.begin; } }; static IR::Condition MakeCondition(const GcnInst& inst) { if (inst.IsCmpx()) { return IR::Condition::Execnz; } switch (inst.opcode) { case Opcode::S_CBRANCH_SCC0: return IR::Condition::Scc0; case Opcode::S_CBRANCH_SCC1: return IR::Condition::Scc1; case Opcode::S_CBRANCH_VCCZ: return IR::Condition::Vccz; case Opcode::S_CBRANCH_VCCNZ: return IR::Condition::Vccnz; case Opcode::S_CBRANCH_EXECZ: return IR::Condition::Execz; case Opcode::S_CBRANCH_EXECNZ: return IR::Condition::Execnz; default: return IR::Condition::True; } } static bool IgnoresExecMask(const GcnInst& inst) { // EXEC mask does not affect scalar instructions or branches. switch (inst.category) { case InstCategory::ScalarALU: case InstCategory::ScalarMemory: case InstCategory::FlowControl: return true; default: break; } // Read/Write Lane instructions are not affected either. switch (inst.opcode) { case Opcode::V_READLANE_B32: case Opcode::V_WRITELANE_B32: case Opcode::V_READFIRSTLANE_B32: return true; default: break; } return false; } static std::optional ResolveSetPcTarget(std::span list, u32 setpc_index, std::span pc_map) { if (setpc_index < 3) { return std::nullopt; } const auto& getpc = list[setpc_index - 3]; const auto& arith = list[setpc_index - 2]; const auto& setpc = list[setpc_index]; if (getpc.opcode != Opcode::S_GETPC_B64 || !(arith.opcode == Opcode::S_ADD_U32 || arith.opcode == Opcode::S_SUB_U32) || setpc.opcode != Opcode::S_SETPC_B64) return std::nullopt; if (getpc.dst[0].code != setpc.src[0].code || arith.dst[0].code != setpc.src[0].code) return std::nullopt; if (arith.src_count < 2 || arith.src[1].field != OperandField::LiteralConst) return std::nullopt; const u32 imm = arith.src[1].code; const s32 signed_offset = (arith.opcode == Opcode::S_ADD_U32) ? static_cast(imm) : -static_cast(imm); const u32 base_pc = pc_map[setpc_index - 3] + getpc.length; const u32 result_pc = static_cast(static_cast(base_pc) + signed_offset); LOG_DEBUG(Render_Recompiler, "SetPC target: {} + {} = {}", base_pc, signed_offset, result_pc); return result_pc & ~0x3u; } static constexpr size_t LabelReserveSize = 32; CFG::CFG(Common::ObjectPool& block_pool_, std::span inst_list_) : block_pool{block_pool_}, inst_list{inst_list_} { index_to_pc.resize(inst_list.size() + 1); labels.reserve(LabelReserveSize); EmitLabels(); EmitBlocks(); LinkBlocks(); SplitDivergenceScopes(); } void CFG::EmitLabels() { // Always set a label at entry point. u32 pc = 0; AddLabel(pc); // Iterate instruction list and add labels to branch targets. for (u32 i = 0; i < inst_list.size(); i++) { index_to_pc[i] = pc; const GcnInst inst = inst_list[i]; if (inst.IsUnconditionalBranch()) { u32 target = inst.BranchTarget(pc); if (inst.opcode == Opcode::S_SETPC_B64) { if (auto t = ResolveSetPcTarget(inst_list, i, index_to_pc)) { target = *t; } else { ASSERT_MSG( false, "S_SETPC_B64 without a resolvable offset at PC {:#x} (Index {}): Involved " "instructions not recognized or invalid pattern", pc, i); } } AddLabel(target); // Emit this label so that the block ends with the branching instruction AddLabel(pc + inst.length); } else if (inst.IsConditionalBranch()) { const u32 true_label = inst.BranchTarget(pc); const u32 false_label = pc + inst.length; AddLabel(true_label); AddLabel(false_label); } else if (inst.opcode == Opcode::S_ENDPGM) { const u32 next_label = pc + inst.length; AddLabel(next_label); } pc += inst.length; } index_to_pc[inst_list.size()] = pc; // Sort labels to make sure block insertion is correct. std::ranges::sort(labels); } void CFG::SplitDivergenceScopes() { const auto is_open_scope = [](const GcnInst& inst) { // An open scope instruction is an instruction that modifies EXEC // but also saves the previous value to restore later. This indicates // we are entering a scope. return inst.opcode == Opcode::S_AND_SAVEEXEC_B64 || // While this instruction does not save EXEC it is often used paired // with SAVEEXEC to mask the threads that didn't pass the condition // of initial branch. (inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) || inst.IsCmpx(); }; const auto is_close_scope = [](const GcnInst& inst) { // Closing an EXEC scope can be either a branch instruction // (typical case when S_AND_SAVEEXEC_B64 is right before a branch) // or by a move instruction to EXEC that restores the backup. return (inst.opcode == Opcode::S_MOV_B64 && inst.dst[0].field == OperandField::ExecLo) || // Sometimes compiler might insert instructions between the SAVEEXEC and the branch. // Those instructions need to be wrapped in the condition as well so allow branch // as end scope instruction. inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ENDPGM || (inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo); }; for (auto blk = blocks.begin(); blk != blocks.end(); blk++) { auto next_blk = std::next(blk); s32 curr_begin = -1; for (size_t index = blk->begin_index; index <= blk->end_index; index++) { const auto& inst = inst_list[index]; const bool is_close = is_close_scope(inst); if ((is_close || index == blk->end_index) && curr_begin != -1) { // If there are no instructions inside scope don't do anything. if (index - curr_begin == 1) { curr_begin = -1; continue; } // If all instructions in the scope ignore exec masking, we shouldn't insert a // scope. const auto start = inst_list.begin() + curr_begin + 1; if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask)) { // Determine the first instruction affected by the exec mask. do { ++curr_begin; } while (IgnoresExecMask(inst_list[curr_begin])); // Determine the last instruction affected by the exec mask. s32 curr_end = index; while (IgnoresExecMask(inst_list[curr_end])) { --curr_end; } // Create a new block for the divergence scope. Block* block = block_pool.Create(); block->begin = index_to_pc[curr_begin]; block->end = index_to_pc[curr_end]; block->begin_index = curr_begin; block->end_index = curr_end; block->end_inst = inst_list[curr_end]; blocks.insert_before(next_blk, *block); // If we are inside the parent block, make an epilogue block and jump to it. if (curr_end != blk->end_index) { Block* epi_block = block_pool.Create(); epi_block->begin = index_to_pc[curr_end + 1]; epi_block->end = blk->end; epi_block->begin_index = curr_end + 1; epi_block->end_index = blk->end_index; epi_block->end_inst = blk->end_inst; epi_block->cond = blk->cond; epi_block->end_class = blk->end_class; epi_block->branch_true = blk->branch_true; epi_block->branch_false = blk->branch_false; blocks.insert_before(next_blk, *epi_block); // Have divergence block always jump to epilogue block. block->cond = IR::Condition::True; block->branch_true = epi_block; block->branch_false = nullptr; // If the parent block fails to enter divergence block make it jump to // epilogue too blk->branch_false = epi_block; } else { // No epilogue block is needed since the divergence block // also ends the parent block. Inherit the end condition. auto& parent_blk = *blk; ASSERT(blk->cond == IR::Condition::True && blk->branch_true); block->cond = IR::Condition::True; block->branch_true = blk->branch_true; block->branch_false = nullptr; // If the parent block didn't enter the divergence scope // have it jump directly to the next one blk->branch_false = blk->branch_true; } // Shrink parent block to end right before curr_begin // and make it jump to divergence block --curr_begin; blk->end = index_to_pc[curr_begin]; blk->end_index = curr_begin; blk->end_inst = inst_list[curr_begin]; blk->cond = IR::Condition::Execnz; blk->end_class = EndClass::Branch; blk->branch_true = block; } // Reset scope begin. curr_begin = -1; } // Mark a potential start of an exec scope. if (is_open_scope(inst)) { curr_begin = index; } } } } void CFG::EmitBlocks() { for (auto it = labels.cbegin(); it != labels.cend(); ++it) { const Label start = *it; const auto next_it = std::next(it); const bool is_last = (next_it == labels.cend()); if (is_last) { // Last label is special. return; } // The end label is the start instruction of next block. // The end instruction of this block is the previous one. const Label end = *next_it; const size_t end_index = GetIndex(end) - 1; const auto& end_inst = inst_list[end_index]; // Insert block between the labels using the last instruction // as an indicator for branching type. Block* block = block_pool.Create(); block->begin = start; block->end = end; block->begin_index = GetIndex(start); block->end_index = end_index; block->end_inst = end_inst; block->cond = MakeCondition(end_inst); blocks.insert(*block); } } void CFG::LinkBlocks() { const auto get_block = [this](u32 address) { auto it = blocks.find(address, Compare{}); ASSERT_MSG(it != blocks.cend() && it->begin == address); return &*it; }; for (auto it = blocks.begin(); it != blocks.end(); it++) { auto& block = *it; const auto end_inst{block.end_inst}; // If the block doesn't end with a branch we simply // need to link with the next block. if (!end_inst.IsTerminateInstruction()) { auto* next_block = get_block(block.end); ++next_block->num_predecessors; block.branch_true = next_block; block.end_class = EndClass::Branch; continue; } // Find the branch targets from the instruction and link the blocks. // Note: Block end address is one instruction after end_inst. const u32 branch_pc = block.end - end_inst.length; u32 target_pc = 0; if (end_inst.opcode == Opcode::S_SETPC_B64) { auto tgt = ResolveSetPcTarget(inst_list, block.end_index, index_to_pc); ASSERT_MSG(tgt, "S_SETPC_B64 without a resolvable offset at PC {:#x} (Index {}): Involved " "instructions not recognized or invalid pattern", branch_pc, block.end_index); target_pc = *tgt; } else { target_pc = end_inst.BranchTarget(branch_pc); } if (end_inst.IsUnconditionalBranch()) { auto* target_block = get_block(target_pc); ++target_block->num_predecessors; block.branch_true = target_block; block.end_class = EndClass::Branch; } else if (end_inst.IsConditionalBranch()) { auto* target_block = get_block(target_pc); ++target_block->num_predecessors; auto* end_block = get_block(block.end); ++end_block->num_predecessors; block.branch_true = target_block; block.branch_false = end_block; block.end_class = EndClass::Branch; } else if (end_inst.opcode == Opcode::S_ENDPGM) { const auto& prev_inst = inst_list[block.end_index - 1]; if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0) { if (prev_inst.control.exp.target != 9) { block.end_class = EndClass::Kill; } else if (const auto& exec_mask = inst_list[block.end_index - 2]; exec_mask.src[0].field == OperandField::ConstZero) { block.end_class = EndClass::Kill; } else { block.end_class = EndClass::Exit; } } else { block.end_class = EndClass::Exit; } } else { UNREACHABLE(); } } } std::string CFG::Dot() const { int node_uid{0}; const auto name_of = [](const Block& block) { return fmt::format("\"{:#x}\"", block.begin); }; std::string dot{"digraph shader {\n"}; dot += fmt::format("\tsubgraph cluster_{} {{\n", 0); dot += fmt::format("\t\tnode [style=filled];\n"); for (const Block& block : blocks) { const std::string name{name_of(block)}; const auto add_branch = [&](Block* branch, bool add_label) { dot += fmt::format("\t\t{}->{}", name, name_of(*branch)); if (add_label && block.cond != IR::Condition::True && block.cond != IR::Condition::False) { dot += fmt::format(" [label=\"{}\"]", block.cond); } dot += '\n'; }; dot += fmt::format("\t\t{};\n", name); switch (block.end_class) { case EndClass::Branch: if (block.cond != IR::Condition::False) { add_branch(block.branch_true, true); } if (block.cond != IR::Condition::True) { add_branch(block.branch_false, false); } break; case EndClass::Exit: dot += fmt::format("\t\t{}->N{};\n", name, node_uid); dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", node_uid); ++node_uid; break; case EndClass::Kill: dot += fmt::format("\t\t{}->N{};\n", name, node_uid); dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", node_uid); ++node_uid; break; } } dot += "\t\tlabel = \"main\";\n\t}\n"; if (blocks.empty()) { dot += "Start;\n"; } else { dot += fmt::format("\tStart -> {};\n", name_of(*blocks.begin())); } dot += fmt::format("\tStart [shape=diamond];\n"); dot += "}\n"; return dot; } } // namespace Shader::Gcn