video: Import new shader recompiler + display a triangle (#142)

This commit is contained in:
TheTurtle 2024-05-22 01:35:12 +03:00 committed by GitHub
parent 8cf64a33b2
commit 8730968385
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
103 changed files with 17793 additions and 729 deletions

View file

@ -0,0 +1,56 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include "shader_recompiler/ir/value.h"
namespace Shader::IR {
class Block;
struct AbstractSyntaxNode {
enum class Type {
Block,
If,
EndIf,
Loop,
Repeat,
Break,
Return,
Unreachable,
};
union Data {
Block* block;
struct {
U1 cond;
Block* body;
Block* merge;
} if_node;
struct {
Block* merge;
} end_if;
struct {
Block* body;
Block* continue_block;
Block* merge;
} loop;
struct {
U1 cond;
Block* loop_header;
Block* merge;
} repeat;
struct {
U1 cond;
Block* merge;
Block* skip;
} break_node;
};
Data data{};
Type type{};
};
using AbstractSyntaxList = std::vector<AbstractSyntaxNode>;
} // namespace Shader::IR

View file

@ -0,0 +1,115 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <fmt/format.h>
#include "shader_recompiler/ir/attribute.h"
namespace Shader::IR {
bool IsParam(Attribute attribute) noexcept {
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
}
std::string NameOf(Attribute attribute) {
switch (attribute) {
case Attribute::RenderTarget0:
return "RenderTarget0";
case Attribute::RenderTarget1:
return "RenderTarget1";
case Attribute::RenderTarget2:
return "RenderTarget2";
case Attribute::RenderTarget3:
return "RenderTarget3";
case Attribute::RenderTarget4:
return "RenderTarget4";
case Attribute::RenderTarget5:
return "RenderTarget5";
case Attribute::RenderTarget6:
return "RenderTarget6";
case Attribute::RenderTarget7:
return "RenderTarget7";
case Attribute::Depth:
return "Depth";
case Attribute::Null:
return "Null";
case Attribute::Position0:
return "Position0";
case Attribute::Position1:
return "Position1";
case Attribute::Position2:
return "Position2";
case Attribute::Position3:
return "Position3";
case Attribute::Param0:
return "Param0";
case Attribute::Param1:
return "Param1";
case Attribute::Param2:
return "Param2";
case Attribute::Param3:
return "Param3";
case Attribute::Param4:
return "Param4";
case Attribute::Param5:
return "Param5";
case Attribute::Param6:
return "Param6";
case Attribute::Param7:
return "Param7";
case Attribute::Param8:
return "Param8";
case Attribute::Param9:
return "Param9";
case Attribute::Param10:
return "Param10";
case Attribute::Param11:
return "Param11";
case Attribute::Param12:
return "Param12";
case Attribute::Param13:
return "Param13";
case Attribute::Param14:
return "Param14";
case Attribute::Param15:
return "Param15";
case Attribute::Param16:
return "Param16";
case Attribute::Param17:
return "Param17";
case Attribute::Param18:
return "Param18";
case Attribute::Param19:
return "Param19";
case Attribute::Param20:
return "Param20";
case Attribute::Param21:
return "Param21";
case Attribute::Param22:
return "Param22";
case Attribute::Param23:
return "Param23";
case Attribute::Param24:
return "Param24";
case Attribute::Param25:
return "Param25";
case Attribute::Param26:
return "Param26";
case Attribute::Param27:
return "Param27";
case Attribute::Param28:
return "Param28";
case Attribute::Param29:
return "Param29";
case Attribute::Param30:
return "Param30";
case Attribute::Param31:
return "Param31";
case Attribute::VertexId:
return "VertexId";
default:
break;
}
return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
}
} // namespace Shader::IR

View file

@ -0,0 +1,105 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <fmt/format.h>
#include "common/types.h"
#include "shader_recompiler/exception.h"
namespace Shader::IR {
enum class Attribute : u64 {
// Export targets
RenderTarget0 = 0,
RenderTarget1 = 1,
RenderTarget2 = 2,
RenderTarget3 = 3,
RenderTarget4 = 4,
RenderTarget5 = 5,
RenderTarget6 = 6,
RenderTarget7 = 7,
Depth = 8,
Null = 9,
Position0 = 12,
Position1 = 13,
Position2 = 14,
Position3 = 15,
Param0 = 32,
Param1 = 33,
Param2 = 34,
Param3 = 35,
Param4 = 36,
Param5 = 37,
Param6 = 38,
Param7 = 39,
Param8 = 40,
Param9 = 41,
Param10 = 42,
Param11 = 43,
Param12 = 44,
Param13 = 45,
Param14 = 46,
Param15 = 47,
Param16 = 48,
Param17 = 49,
Param18 = 50,
Param19 = 51,
Param20 = 52,
Param21 = 53,
Param22 = 54,
Param23 = 55,
Param24 = 56,
Param25 = 57,
Param26 = 58,
Param27 = 59,
Param28 = 60,
Param29 = 61,
Param30 = 62,
Param31 = 63,
// System values
ClipDistance = 64,
CullDistance = 65,
RenderTargetId = 66,
ViewportId = 67,
VertexId = 68,
PrimitiveId = 69,
InstanceId = 70,
IsFrontFace = 71,
SampleIndex = 72,
GlobalInvocationId = 73,
WorkgroupId = 74,
LocalInvocationId = 75,
LocalInvocationIndex = 76,
FragCoord = 77,
};
constexpr size_t EXP_NUM_POS = 4;
constexpr size_t EXP_NUM_PARAM = 32;
[[nodiscard]] bool IsParam(Attribute attribute) noexcept;
[[nodiscard]] std::string NameOf(Attribute attribute);
[[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) {
const int result{static_cast<int>(attr) + num};
if (result > static_cast<int>(Attribute::Param31)) {
throw LogicError("Overflow on register arithmetic");
}
if (result < static_cast<int>(Attribute::Param0)) {
throw LogicError("Underflow on register arithmetic");
}
return static_cast<Attribute>(result);
}
} // namespace Shader::IR
template <>
struct fmt::formatter<Shader::IR::Attribute> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(const Shader::IR::Attribute& attribute, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute));
}
};

View file

@ -0,0 +1,149 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <initializer_list>
#include <map>
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/value.h"
namespace Shader::IR {
Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
Block::~Block() = default;
void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
PrependNewInst(end(), op, args);
}
Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base_inst) {
Inst* const inst{inst_pool->Create(base_inst)};
return instructions.insert(insertion_point, *inst);
}
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
std::initializer_list<Value> args, u32 flags) {
Inst* const inst{inst_pool->Create(op, flags)};
const auto result_it{instructions.insert(insertion_point, *inst)};
if (inst->NumArgs() != args.size()) {
throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op);
}
std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable {
inst->SetArg(index, arg);
++index;
});
return result_it;
}
void Block::AddBranch(Block* block) {
if (std::ranges::find(imm_successors, block) != imm_successors.end()) {
throw LogicError("Successor already inserted");
}
if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) {
throw LogicError("Predecessor already inserted");
}
imm_successors.push_back(block);
block->imm_predecessors.push_back(this);
}
static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index,
Block* block) {
if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) {
return fmt::format("{{Block ${}}}", it->second);
}
return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block));
}
static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
const Inst* inst) {
const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)};
if (is_inserted) {
++inst_index;
}
return it->second;
}
static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
const Value& arg) {
if (arg.IsEmpty()) {
return "<null>";
}
if (!arg.IsImmediate() || arg.IsIdentity()) {
return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst()));
}
switch (arg.Type()) {
case Type::U1:
return fmt::format("#{}", arg.U1() ? "true" : "false");
case Type::U8:
return fmt::format("#{}", arg.U8());
case Type::U16:
return fmt::format("#{}", arg.U16());
case Type::U32:
return fmt::format("#{}", arg.U32());
case Type::U64:
return fmt::format("#{}", arg.U64());
case Type::F32:
return fmt::format("#{}", arg.F32());
case Type::ScalarReg:
return fmt::format("{}", arg.ScalarReg());
case Type::VectorReg:
return fmt::format("{}", arg.VectorReg());
case Type::Attribute:
return fmt::format("{}", arg.Attribute());
default:
return "<unknown immediate type>";
}
}
std::string DumpBlock(const Block& block) {
size_t inst_index{0};
std::map<const Inst*, size_t> inst_to_index;
return DumpBlock(block, {}, inst_to_index, inst_index);
}
std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& block_to_index,
std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index) {
std::string ret{"Block"};
if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) {
ret += fmt::format(" ${}", it->second);
}
ret += '\n';
for (const Inst& inst : block) {
const Opcode op{inst.GetOpcode()};
ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst));
if (TypeOf(op) != Type::Void) {
ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op);
} else {
ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
}
const size_t arg_count{inst.NumArgs()};
for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
const Value arg{inst.Arg(arg_index)};
const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)};
ret += arg_index != 0 ? ", " : " ";
if (op == Opcode::Phi) {
ret += fmt::format("[ {}, {} ]", arg_str,
BlockToIndex(block_to_index, inst.PhiBlock(arg_index)));
} else {
ret += arg_str;
}
if (op != Opcode::Phi) {
const Type actual_type{arg.Type()};
const Type expected_type{ArgTypeOf(op, arg_index)};
if (!AreTypesCompatible(actual_type, expected_type)) {
ret += fmt::format("<type error: {} != {}>", actual_type, expected_type);
}
}
}
if (TypeOf(op) != Type::Void) {
ret += fmt::format(" (uses: {})\n", inst.UseCount());
} else {
ret += '\n';
}
}
return ret;
}
} // namespace Shader::IR

View file

@ -0,0 +1,180 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <initializer_list>
#include <map>
#include <span>
#include <vector>
#include <boost/intrusive/list.hpp>
#include "common/types.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/value.h"
#include "shader_recompiler/object_pool.h"
namespace Shader::IR {
class Block {
public:
using InstructionList = boost::intrusive::list<Inst>;
using size_type = InstructionList::size_type;
using iterator = InstructionList::iterator;
using const_iterator = InstructionList::const_iterator;
using reverse_iterator = InstructionList::reverse_iterator;
using const_reverse_iterator = InstructionList::const_reverse_iterator;
explicit Block(ObjectPool<Inst>& inst_pool_);
~Block();
Block(const Block&) = delete;
Block& operator=(const Block&) = delete;
Block(Block&&) = default;
Block& operator=(Block&&) = default;
/// Appends a new instruction to the end of this basic block.
void AppendNewInst(Opcode op, std::initializer_list<Value> args);
/// Prepends a copy of an instruction to this basic block before the insertion point.
iterator PrependNewInst(iterator insertion_point, const Inst& base_inst);
/// Prepends a new instruction to this basic block before the insertion point.
iterator PrependNewInst(iterator insertion_point, Opcode op,
std::initializer_list<Value> args = {}, u32 flags = 0);
/// Adds a new branch to this basic block.
void AddBranch(Block* block);
/// Gets a mutable reference to the instruction list for this basic block.
[[nodiscard]] InstructionList& Instructions() noexcept {
return instructions;
}
/// Gets an immutable reference to the instruction list for this basic block.
[[nodiscard]] const InstructionList& Instructions() const noexcept {
return instructions;
}
/// Gets an immutable span to the immediate predecessors.
[[nodiscard]] std::span<Block* const> ImmPredecessors() const noexcept {
return imm_predecessors;
}
/// Gets an immutable span to the immediate successors.
[[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept {
return imm_successors;
}
/// Intrusively store the host definition of this instruction.
template <typename T>
void SetDefinition(T def) {
definition = std::bit_cast<u32>(def);
}
/// Return the intrusively stored host definition of this instruction.
template <typename T>
[[nodiscard]] T Definition() const noexcept {
return std::bit_cast<T>(definition);
}
void SsaSeal() noexcept {
is_ssa_sealed = true;
}
[[nodiscard]] bool IsSsaSealed() const noexcept {
return is_ssa_sealed;
}
[[nodiscard]] bool empty() const {
return instructions.empty();
}
[[nodiscard]] size_type size() const {
return instructions.size();
}
[[nodiscard]] Inst& front() {
return instructions.front();
}
[[nodiscard]] const Inst& front() const {
return instructions.front();
}
[[nodiscard]] Inst& back() {
return instructions.back();
}
[[nodiscard]] const Inst& back() const {
return instructions.back();
}
[[nodiscard]] iterator begin() {
return instructions.begin();
}
[[nodiscard]] const_iterator begin() const {
return instructions.begin();
}
[[nodiscard]] iterator end() {
return instructions.end();
}
[[nodiscard]] const_iterator end() const {
return instructions.end();
}
[[nodiscard]] reverse_iterator rbegin() {
return instructions.rbegin();
}
[[nodiscard]] const_reverse_iterator rbegin() const {
return instructions.rbegin();
}
[[nodiscard]] reverse_iterator rend() {
return instructions.rend();
}
[[nodiscard]] const_reverse_iterator rend() const {
return instructions.rend();
}
[[nodiscard]] const_iterator cbegin() const {
return instructions.cbegin();
}
[[nodiscard]] const_iterator cend() const {
return instructions.cend();
}
[[nodiscard]] const_reverse_iterator crbegin() const {
return instructions.crbegin();
}
[[nodiscard]] const_reverse_iterator crend() const {
return instructions.crend();
}
/// Intrusively store the value of a register in the block.
std::array<Value, NumScalarRegs> ssa_sreg_values;
std::array<Value, NumVectorRegs> ssa_vreg_values;
private:
/// Memory pool for instruction list
ObjectPool<Inst>* inst_pool;
/// List of instructions in this block
InstructionList instructions;
/// Block immediate predecessors
std::vector<Block*> imm_predecessors;
/// Block immediate successors
std::vector<Block*> imm_successors;
/// Intrusively store if the block is sealed in the SSA pass.
bool is_ssa_sealed{false};
/// Intrusively stored host definition of this block.
u32 definition{};
};
using BlockList = std::vector<Block*>;
[[nodiscard]] std::string DumpBlock(const Block& block);
[[nodiscard]] std::string DumpBlock(const Block& block,
const std::map<const Block*, size_t>& block_to_index,
std::map<const Inst*, size_t>& inst_to_index,
size_t& inst_index);
} // namespace Shader::IR

View file

@ -0,0 +1,50 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <fmt/format.h>
#include "common/types.h"
namespace Shader::IR {
enum class Condition : u32 {
False,
True,
Scc0,
Scc1,
Vccz,
Vccnz,
Execz,
Execnz,
};
constexpr std::string_view NameOf(Condition condition) {
switch (condition) {
case Condition::False:
return "False";
case Condition::True:
return "True";
case Condition::Scc0:
return "Scc0";
case Condition::Scc1:
return "Scc1";
case Condition::Vccz:
return "Vccz";
case Condition::Vccnz:
return "Vccnz";
case Condition::Execz:
return "Execz";
case Condition::Execnz:
return "Execnz";
}
}
} // namespace Shader::IR
template <>
struct fmt::formatter<Shader::IR::Condition> : formatter<std::string_view> {
auto format(const Shader::IR::Condition& cond, format_context& ctx) const {
return formatter<string_view>::format(NameOf(cond), ctx);
}
};

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,250 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstring>
#include <type_traits>
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/condition.h"
#include "shader_recompiler/ir/value.h"
namespace Shader::IR {
class IREmitter {
public:
explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
explicit IREmitter(Block& block_, Block::iterator insertion_point_)
: block{&block_}, insertion_point{insertion_point_} {}
Block* block;
[[nodiscard]] U1 Imm1(bool value) const;
[[nodiscard]] U8 Imm8(u8 value) const;
[[nodiscard]] U16 Imm16(u16 value) const;
[[nodiscard]] U32 Imm32(u32 value) const;
[[nodiscard]] U32 Imm32(s32 value) const;
[[nodiscard]] F32 Imm32(f32 value) const;
[[nodiscard]] U64 Imm64(u64 value) const;
[[nodiscard]] U64 Imm64(s64 value) const;
[[nodiscard]] F64 Imm64(f64 value) const;
template <typename Dest, typename Source>
[[nodiscard]] Dest BitCast(const Source& value);
U1 ConditionRef(const U1& value);
void Reference(const Value& value);
void PhiMove(IR::Inst& phi, const Value& value);
void Prologue();
void Epilogue();
template <typename T = U32>
[[nodiscard]] T GetScalarReg(IR::ScalarReg reg);
template <typename T = U32>
[[nodiscard]] T GetVectorReg(IR::VectorReg reg);
void SetScalarReg(IR::ScalarReg reg, const U32F32& value);
void SetVectorReg(IR::VectorReg reg, const U32F32& value);
[[nodiscard]] U1 GetGotoVariable(u32 id);
void SetGotoVariable(u32 id, const U1& value);
[[nodiscard]] U1 GetVcc();
void SetVcc(const U1& value);
[[nodiscard]] U1 Condition(IR::Condition cond);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute, u32 comp);
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute);
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, u32 comp);
void SetAttribute(IR::Attribute attribute, const F32& value, u32 comp);
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset);
[[nodiscard]] U32 ReadConst(const U64& address, const U32& offset);
template <typename T = U32>
[[nodiscard]] T ReadConstBuffer(const Value& handle, const U32& index, const U32& offset);
[[nodiscard]] U1 GetZeroFromOp(const Value& op);
[[nodiscard]] U1 GetSignFromOp(const Value& op);
[[nodiscard]] U1 GetCarryFromOp(const Value& op);
[[nodiscard]] U1 GetOverflowFromOp(const Value& op);
[[nodiscard]] U1 GetSparseFromOp(const Value& op);
[[nodiscard]] U1 GetInBoundsFromOp(const Value& op);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
const Value& e4);
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
[[nodiscard]] Value Select(const U1& condition, const Value& true_value,
const Value& false_value);
[[nodiscard]] U64 PackUint2x32(const Value& vector);
[[nodiscard]] Value UnpackUint2x32(const U64& value);
[[nodiscard]] U32 PackFloat2x16(const Value& vector);
[[nodiscard]] Value UnpackFloat2x16(const U32& value);
[[nodiscard]] U32 PackHalf2x16(const Value& vector);
[[nodiscard]] Value UnpackHalf2x16(const U32& value);
[[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
[[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b);
[[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c);
[[nodiscard]] F32F64 FPAbs(const F32F64& value);
[[nodiscard]] F32F64 FPNeg(const F32F64& value);
[[nodiscard]] F32F64 FPAbsNeg(const F32F64& value, bool abs, bool neg);
[[nodiscard]] F32 FPCos(const F32& value);
[[nodiscard]] F32 FPSin(const F32& value);
[[nodiscard]] F32 FPExp2(const F32& value);
[[nodiscard]] F32 FPLog2(const F32& value);
[[nodiscard]] F32F64 FPRecip(const F32F64& value);
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
[[nodiscard]] F32 FPSqrt(const F32& value);
[[nodiscard]] F32F64 FPSaturate(const F32F64& value);
[[nodiscard]] F32F64 FPClamp(const F32F64& value, const F32F64& min_value,
const F32F64& max_value);
[[nodiscard]] F32F64 FPRoundEven(const F32F64& value);
[[nodiscard]] F32F64 FPFloor(const F32F64& value);
[[nodiscard]] F32F64 FPCeil(const F32F64& value);
[[nodiscard]] F32F64 FPTrunc(const F32F64& value);
[[nodiscard]] U1 FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPNotEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPLessThanEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPGreaterThanEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPLessThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPGreaterThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPIsNan(const F32F64& value);
[[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
[[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false);
[[nodiscard]] U32U64 INeg(const U32U64& value);
[[nodiscard]] U32 IAbs(const U32& value);
[[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
[[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
[[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
[[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
[[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
[[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
[[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
const U32& count);
[[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
bool is_signed = false);
[[nodiscard]] U32 BitReverse(const U32& value);
[[nodiscard]] U32 BitCount(const U32& value);
[[nodiscard]] U32 BitwiseNot(const U32& value);
[[nodiscard]] U32 FindSMsb(const U32& value);
[[nodiscard]] U32 FindUMsb(const U32& value);
[[nodiscard]] U32 SMin(const U32& a, const U32& b);
[[nodiscard]] U32 UMin(const U32& a, const U32& b);
[[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed);
[[nodiscard]] U32 SMax(const U32& a, const U32& b);
[[nodiscard]] U32 UMax(const U32& a, const U32& b);
[[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed);
[[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max);
[[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max);
[[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
[[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
[[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
[[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
[[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
[[nodiscard]] U1 LogicalNot(const U1& value);
[[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F32F64& value);
[[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F32F64& value);
[[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F32F64& value);
[[nodiscard]] F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value);
[[nodiscard]] F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value);
[[nodiscard]] F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
const Value& value);
[[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
[[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
const F32& bias, const Value& offset,
const F32& lod_clamp, TextureInstInfo info);
[[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
const F32& lod, const Value& offset,
TextureInstInfo info);
[[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
const F32& dref, const F32& bias,
const Value& offset, const F32& lod_clamp,
TextureInstInfo info);
[[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
const F32& dref, const F32& lod,
const Value& offset, TextureInstInfo info);
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod,
const IR::U1& skip_mips);
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod,
const IR::U1& skip_mips, TextureInstInfo info);
[[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords,
TextureInstInfo info);
[[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset,
const Value& offset2, TextureInstInfo info);
[[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
const Value& offset, const Value& offset2, const F32& dref,
TextureInstInfo info);
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
const U32& lod, const U32& multisampling, TextureInstInfo info);
[[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
const Value& derivatives, const Value& offset,
const F32& lod_clamp, TextureInstInfo info);
[[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
void ImageWrite(const Value& handle, const Value& coords, const Value& color,
TextureInstInfo info);
private:
IR::Block::iterator insertion_point;
template <typename T = Value, typename... Args>
T Inst(Opcode op, Args... args) {
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
return T{Value{&*it}};
}
template <typename T>
requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>)
struct Flags {
Flags() = default;
Flags(T proxy_) : proxy{proxy_} {}
T proxy;
};
template <typename T = Value, typename FlagType, typename... Args>
T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
u32 raw_flags{};
std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
return T{Value{&*it}};
}
};
} // namespace Shader::IR

View file

@ -0,0 +1,167 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <memory>
#include "shader_recompiler/exception.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/type.h"
#include "shader_recompiler/ir/value.h"
namespace Shader::IR {
Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
if (op == Opcode::Phi) {
std::construct_at(&phi_args);
} else {
std::construct_at(&args);
}
}
Inst::Inst(const Inst& base) : op{base.op}, flags{base.flags} {
if (base.op == Opcode::Phi) {
throw NotImplementedException("Copying phi node");
}
std::construct_at(&args);
const size_t num_args{base.NumArgs()};
for (size_t index = 0; index < num_args; ++index) {
SetArg(index, base.Arg(index));
}
}
Inst::~Inst() {
if (op == Opcode::Phi) {
std::destroy_at(&phi_args);
} else {
std::destroy_at(&args);
}
}
bool Inst::MayHaveSideEffects() const noexcept {
switch (op) {
case Opcode::ConditionRef:
case Opcode::Reference:
case Opcode::PhiMove:
case Opcode::Prologue:
case Opcode::Epilogue:
// case Opcode::Join:
// case Opcode::Barrier:
// case Opcode::WorkgroupMemoryBarrier:
// case Opcode::DeviceMemoryBarrier:
// case Opcode::EmitVertex:
// case Opcode::EndPrimitive:
case Opcode::SetAttribute:
// case Opcode::SetFragColor:
// case Opcode::SetFragDepth:
return true;
default:
return false;
}
}
bool Inst::AreAllArgsImmediates() const {
if (op == Opcode::Phi) {
throw LogicError("Testing for all arguments are immediates on phi instruction");
}
return std::all_of(args.begin(), args.begin() + NumArgs(),
[](const IR::Value& value) { return value.IsImmediate(); });
}
IR::Type Inst::Type() const {
return TypeOf(op);
}
void Inst::SetArg(size_t index, Value value) {
if (index >= NumArgs()) {
throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
}
const IR::Value arg{Arg(index)};
if (!arg.IsImmediate()) {
UndoUse(arg);
}
if (!value.IsImmediate()) {
Use(value);
}
if (op == Opcode::Phi) {
phi_args[index].second = value;
} else {
args[index] = value;
}
}
Block* Inst::PhiBlock(size_t index) const {
if (op != Opcode::Phi) {
throw LogicError("{} is not a Phi instruction", op);
}
if (index >= phi_args.size()) {
throw InvalidArgument("Out of bounds argument index {} in phi instruction");
}
return phi_args[index].first;
}
void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
if (!value.IsImmediate()) {
Use(value);
}
phi_args.emplace_back(predecessor, value);
}
void Inst::Invalidate() {
ClearArgs();
ReplaceOpcode(Opcode::Void);
}
void Inst::ClearArgs() {
if (op == Opcode::Phi) {
for (auto& pair : phi_args) {
IR::Value& value{pair.second};
if (!value.IsImmediate()) {
UndoUse(value);
}
}
phi_args.clear();
} else {
for (auto& value : args) {
if (!value.IsImmediate()) {
UndoUse(value);
}
}
// Reset arguments to null
// std::memset was measured to be faster on MSVC than std::ranges:fill
std::memset(reinterpret_cast<char*>(&args), 0, sizeof(args));
}
}
void Inst::ReplaceUsesWith(Value replacement) {
Invalidate();
ReplaceOpcode(Opcode::Identity);
if (!replacement.IsImmediate()) {
Use(replacement);
}
args[0] = replacement;
}
void Inst::ReplaceOpcode(IR::Opcode opcode) {
if (opcode == IR::Opcode::Phi) {
throw LogicError("Cannot transition into Phi");
}
if (op == Opcode::Phi) {
// Transition out of phi arguments into non-phi
std::destroy_at(&phi_args);
std::construct_at(&args);
}
op = opcode;
}
void Inst::Use(const Value& value) {
Inst* const inst{value.Inst()};
++inst->use_count;
}
void Inst::UndoUse(const Value& value) {
Inst* const inst{value.Inst()};
--inst->use_count;
}
} // namespace Shader::IR

View file

@ -0,0 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/opcodes.h"
namespace Shader::IR {
std::string_view NameOf(Opcode op) {
return Detail::META_TABLE[static_cast<size_t>(op)].name;
}
} // namespace Shader::IR

View file

@ -0,0 +1,107 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
#include <array>
#include <fmt/format.h>
#include "common/types.h"
#include "shader_recompiler/ir/type.h"
namespace Shader::IR {
enum class Opcode {
#define OPCODE(name, ...) name,
#include "opcodes.inc"
#undef OPCODE
};
namespace Detail {
struct OpcodeMeta {
std::string_view name;
Type type;
std::array<Type, 5> arg_types;
};
// using enum Type;
constexpr Type Void{Type::Void};
constexpr Type Opaque{Type::Opaque};
constexpr Type ScalarReg{Type::ScalarReg};
constexpr Type VectorReg{Type::VectorReg};
constexpr Type Attribute{Type::Attribute};
constexpr Type SystemValue{Type::SystemValue};
constexpr Type U1{Type::U1};
constexpr Type U8{Type::U8};
constexpr Type U16{Type::U16};
constexpr Type U32{Type::U32};
constexpr Type U64{Type::U64};
constexpr Type F16{Type::F16};
constexpr Type F32{Type::F32};
constexpr Type F64{Type::F64};
constexpr Type U32x2{Type::U32x2};
constexpr Type U32x3{Type::U32x3};
constexpr Type U32x4{Type::U32x4};
constexpr Type F16x2{Type::F16x2};
constexpr Type F16x3{Type::F16x3};
constexpr Type F16x4{Type::F16x4};
constexpr Type F32x2{Type::F32x2};
constexpr Type F32x3{Type::F32x3};
constexpr Type F32x4{Type::F32x4};
constexpr Type F64x2{Type::F64x2};
constexpr Type F64x3{Type::F64x3};
constexpr Type F64x4{Type::F64x4};
constexpr OpcodeMeta META_TABLE[]{
#define OPCODE(name_token, type_token, ...) \
{ \
.name{#name_token}, \
.type = type_token, \
.arg_types{__VA_ARGS__}, \
},
#include "opcodes.inc"
#undef OPCODE
};
constexpr size_t CalculateNumArgsOf(Opcode op) {
const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types};
return static_cast<size_t>(
std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)));
}
constexpr u8 NUM_ARGS[]{
#define OPCODE(name_token, type_token, ...) static_cast<u8>(CalculateNumArgsOf(Opcode::name_token)),
#include "opcodes.inc"
#undef OPCODE
};
} // namespace Detail
/// Get return type of an opcode
[[nodiscard]] inline Type TypeOf(Opcode op) noexcept {
return Detail::META_TABLE[static_cast<size_t>(op)].type;
}
/// Get the number of arguments an opcode accepts
[[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept {
return static_cast<size_t>(Detail::NUM_ARGS[static_cast<size_t>(op)]);
}
/// Get the required type of an argument of an opcode
[[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept {
return Detail::META_TABLE[static_cast<size_t>(op)].arg_types[arg_index];
}
/// Get the name of an opcode
[[nodiscard]] std::string_view NameOf(Opcode op);
} // namespace Shader::IR
template <>
struct fmt::formatter<Shader::IR::Opcode> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
template <typename FormatContext>
auto format(const Shader::IR::Opcode& op, FormatContext& ctx) const {
return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(op));
}
};

View file

@ -0,0 +1,247 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ...
OPCODE(Phi, Opaque, )
OPCODE(Identity, Opaque, Opaque, )
OPCODE(Void, Void, )
OPCODE(ConditionRef, U1, U1, )
OPCODE(Reference, Void, Opaque, )
OPCODE(PhiMove, Void, Opaque, Opaque, )
// Special operations
OPCODE(Prologue, Void, )
OPCODE(Epilogue, Void, )
// Constant memory operations
OPCODE(ReadConst, U32, U64, U32, )
OPCODE(ReadConstBuffer, U32, Opaque, U32, U32 )
OPCODE(ReadConstBufferF32, F32, Opaque, U32, U32 )
// Context getters/setters
OPCODE(GetScalarRegister, U32, ScalarReg, )
OPCODE(SetScalarRegister, Void, ScalarReg, U32, )
OPCODE(GetVectorRegister, U32, VectorReg, )
OPCODE(SetVectorRegister, Void, VectorReg, U32, )
OPCODE(GetGotoVariable, U1, U32, )
OPCODE(SetGotoVariable, Void, U32, U1, )
OPCODE(GetAttribute, F32, Attribute, U32, )
OPCODE(GetAttributeU32, U32, Attribute, U32, )
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
// Flags
//OPCODE(GetScc, U1, Void, )
OPCODE(GetVcc, U1, Void, )
//OPCODE(SetScc, Void, U1, )
OPCODE(SetVcc, Void, U1, )
// Undefined
OPCODE(UndefU1, U1, )
OPCODE(UndefU8, U8, )
OPCODE(UndefU16, U16, )
OPCODE(UndefU32, U32, )
OPCODE(UndefU64, U64, )
// Vector utility
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, )
OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
OPCODE(CompositeExtractF16x2, F16, F16x2, U32, )
OPCODE(CompositeExtractF16x3, F16, F16x3, U32, )
OPCODE(CompositeExtractF16x4, F16, F16x4, U32, )
OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
OPCODE(CompositeExtractF64x2, F64, F64x2, U32, )
OPCODE(CompositeExtractF64x3, F64, F64x3, U32, )
OPCODE(CompositeExtractF64x4, F64, F64x4, U32, )
OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
// Select operations
OPCODE(SelectU1, U1, U1, U1, U1, )
OPCODE(SelectU8, U8, U1, U8, U8, )
OPCODE(SelectU16, U16, U1, U16, U16, )
OPCODE(SelectU32, U32, U1, U32, U32, )
OPCODE(SelectU64, U64, U1, U64, U64, )
OPCODE(SelectF32, F32, U1, F32, F32, )
OPCODE(SelectF64, F64, U1, F64, F64, )
// Bitwise conversions
OPCODE(BitCastU16F16, U16, F16, )
OPCODE(BitCastU32F32, U32, F32, )
OPCODE(BitCastU64F64, U64, F64, )
OPCODE(BitCastF16U16, F16, U16, )
OPCODE(BitCastF32U32, F32, U32, )
OPCODE(BitCastF64U64, F64, U64, )
OPCODE(PackUint2x32, U64, U32x2, )
OPCODE(UnpackUint2x32, U32x2, U64, )
OPCODE(PackFloat2x16, U32, F16x2, )
OPCODE(UnpackFloat2x16, F16x2, U32, )
OPCODE(PackHalf2x16, U32, F32x2, )
OPCODE(UnpackHalf2x16, F32x2, U32, )
// Floating-point operations
OPCODE(FPAbs32, F32, F32, )
OPCODE(FPAbs64, F64, F64, )
OPCODE(FPAdd32, F32, F32, F32, )
OPCODE(FPAdd64, F64, F64, F64, )
OPCODE(FPFma32, F32, F32, F32, F32, )
OPCODE(FPFma64, F64, F64, F64, F64, )
OPCODE(FPMax32, F32, F32, F32, )
OPCODE(FPMax64, F64, F64, F64, )
OPCODE(FPMin32, F32, F32, F32, )
OPCODE(FPMin64, F64, F64, F64, )
OPCODE(FPMul32, F32, F32, F32, )
OPCODE(FPMul64, F64, F64, F64, )
OPCODE(FPNeg32, F32, F32, )
OPCODE(FPNeg64, F64, F64, )
OPCODE(FPRecip32, F32, F32, )
OPCODE(FPRecip64, F64, F64, )
OPCODE(FPRecipSqrt32, F32, F32, )
OPCODE(FPRecipSqrt64, F64, F64, )
OPCODE(FPSqrt, F32, F32, )
OPCODE(FPSin, F32, F32, )
OPCODE(FPExp2, F32, F32, )
OPCODE(FPCos, F32, F32, )
OPCODE(FPLog2, F32, F32, )
OPCODE(FPSaturate32, F32, F32, )
OPCODE(FPSaturate64, F64, F64, )
OPCODE(FPClamp32, F32, F32, F32, F32, )
OPCODE(FPClamp64, F64, F64, F64, F64, )
OPCODE(FPRoundEven32, F32, F32, )
OPCODE(FPRoundEven64, F64, F64, )
OPCODE(FPFloor32, F32, F32, )
OPCODE(FPFloor64, F64, F64, )
OPCODE(FPCeil32, F32, F32, )
OPCODE(FPCeil64, F64, F64, )
OPCODE(FPTrunc32, F32, F32, )
OPCODE(FPTrunc64, F64, F64, )
OPCODE(FPOrdEqual32, U1, F32, F32, )
OPCODE(FPOrdEqual64, U1, F64, F64, )
OPCODE(FPUnordEqual32, U1, F32, F32, )
OPCODE(FPUnordEqual64, U1, F64, F64, )
OPCODE(FPOrdNotEqual32, U1, F32, F32, )
OPCODE(FPOrdNotEqual64, U1, F64, F64, )
OPCODE(FPUnordNotEqual32, U1, F32, F32, )
OPCODE(FPUnordNotEqual64, U1, F64, F64, )
OPCODE(FPOrdLessThan32, U1, F32, F32, )
OPCODE(FPOrdLessThan64, U1, F64, F64, )
OPCODE(FPUnordLessThan32, U1, F32, F32, )
OPCODE(FPUnordLessThan64, U1, F64, F64, )
OPCODE(FPOrdGreaterThan32, U1, F32, F32, )
OPCODE(FPOrdGreaterThan64, U1, F64, F64, )
OPCODE(FPUnordGreaterThan32, U1, F32, F32, )
OPCODE(FPUnordGreaterThan64, U1, F64, F64, )
OPCODE(FPOrdLessThanEqual32, U1, F32, F32, )
OPCODE(FPOrdLessThanEqual64, U1, F64, F64, )
OPCODE(FPUnordLessThanEqual32, U1, F32, F32, )
OPCODE(FPUnordLessThanEqual64, U1, F64, F64, )
OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, )
OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, )
OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, )
OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
OPCODE(FPIsNan32, U1, F32, )
OPCODE(FPIsNan64, U1, F64, )
// Integer operations
OPCODE(IAdd32, U32, U32, U32, )
OPCODE(IAdd64, U64, U64, U64, )
OPCODE(ISub32, U32, U32, U32, )
OPCODE(ISub64, U64, U64, U64, )
OPCODE(IMul32, U32, U32, U32, )
OPCODE(SDiv32, U32, U32, U32, )
OPCODE(UDiv32, U32, U32, U32, )
OPCODE(INeg32, U32, U32, )
OPCODE(INeg64, U64, U64, )
OPCODE(IAbs32, U32, U32, )
OPCODE(ShiftLeftLogical32, U32, U32, U32, )
OPCODE(ShiftLeftLogical64, U64, U64, U32, )
OPCODE(ShiftRightLogical32, U32, U32, U32, )
OPCODE(ShiftRightLogical64, U64, U64, U32, )
OPCODE(ShiftRightArithmetic32, U32, U32, U32, )
OPCODE(ShiftRightArithmetic64, U64, U64, U32, )
OPCODE(BitwiseAnd32, U32, U32, U32, )
OPCODE(BitwiseOr32, U32, U32, U32, )
OPCODE(BitwiseXor32, U32, U32, U32, )
OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, )
OPCODE(BitFieldSExtract, U32, U32, U32, U32, )
OPCODE(BitFieldUExtract, U32, U32, U32, U32, )
OPCODE(BitReverse32, U32, U32, )
OPCODE(BitCount32, U32, U32, )
OPCODE(BitwiseNot32, U32, U32, )
OPCODE(FindSMsb32, U32, U32, )
OPCODE(FindUMsb32, U32, U32, )
OPCODE(SMin32, U32, U32, U32, )
OPCODE(UMin32, U32, U32, U32, )
OPCODE(SMax32, U32, U32, U32, )
OPCODE(UMax32, U32, U32, U32, )
OPCODE(SClamp32, U32, U32, U32, U32, )
OPCODE(UClamp32, U32, U32, U32, U32, )
OPCODE(SLessThan, U1, U32, U32, )
OPCODE(ULessThan, U1, U32, U32, )
OPCODE(IEqual, U1, U32, U32, )
OPCODE(SLessThanEqual, U1, U32, U32, )
OPCODE(ULessThanEqual, U1, U32, U32, )
OPCODE(SGreaterThan, U1, U32, U32, )
OPCODE(UGreaterThan, U1, U32, U32, )
OPCODE(INotEqual, U1, U32, U32, )
OPCODE(SGreaterThanEqual, U1, U32, U32, )
OPCODE(UGreaterThanEqual, U1, U32, U32, )
// Logical operations
OPCODE(LogicalOr, U1, U1, U1, )
OPCODE(LogicalAnd, U1, U1, U1, )
OPCODE(LogicalXor, U1, U1, U1, )
OPCODE(LogicalNot, U1, U1, )
// Conversion operations
OPCODE(ConvertS32F32, U32, F32, )
OPCODE(ConvertS32F64, U32, F64, )
OPCODE(ConvertU32F32, U32, F32, )
OPCODE(ConvertF16F32, F16, F32, )
OPCODE(ConvertF32F16, F32, F16, )
OPCODE(ConvertF32F64, F32, F64, )
OPCODE(ConvertF64F32, F64, F32, )
OPCODE(ConvertF32S32, F32, U32, )
OPCODE(ConvertF32U32, F32, U32, )
OPCODE(ConvertF64S32, F64, U32, )
OPCODE(ConvertF64U32, F64, U32, )
// Image operations
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, )
OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, )
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )

View file

@ -0,0 +1,403 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <bit>
#include <optional>
#include <type_traits>
#include "common/func_traits.h"
#include "shader_recompiler/ir/basic_block.h"
namespace Shader::Optimization {
template <typename T>
[[nodiscard]] T Arg(const IR::Value& value) {
if constexpr (std::is_same_v<T, bool>) {
return value.U1();
} else if constexpr (std::is_same_v<T, u32>) {
return value.U32();
} else if constexpr (std::is_same_v<T, s32>) {
return static_cast<s32>(value.U32());
} else if constexpr (std::is_same_v<T, f32>) {
return value.F32();
} else if constexpr (std::is_same_v<T, u64>) {
return value.U64();
}
}
template <typename Func, size_t... I>
IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) {
using Traits = Common::LambdaTraits<decltype(func)>;
return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)};
}
template <typename T, typename ImmFn>
bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) {
const IR::Value lhs{inst.Arg(0)};
const IR::Value rhs{inst.Arg(1)};
const bool is_lhs_immediate{lhs.IsImmediate()};
const bool is_rhs_immediate{rhs.IsImmediate()};
if (is_lhs_immediate && is_rhs_immediate) {
const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))};
inst.ReplaceUsesWith(IR::Value{result});
return false;
}
if (is_lhs_immediate && !is_rhs_immediate) {
IR::Inst* const rhs_inst{rhs.InstRecursive()};
if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) {
const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))};
inst.SetArg(0, rhs_inst->Arg(0));
inst.SetArg(1, IR::Value{combined});
} else {
// Normalize
inst.SetArg(0, rhs);
inst.SetArg(1, lhs);
}
}
if (!is_lhs_immediate && is_rhs_immediate) {
const IR::Inst* const lhs_inst{lhs.InstRecursive()};
if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) {
const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))};
inst.SetArg(0, lhs_inst->Arg(0));
inst.SetArg(1, IR::Value{combined});
}
}
return true;
}
template <typename Func>
bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
if (!inst.AreAllArgsImmediates() /*|| inst.HasAssociatedPseudoOperation()*/) {
return false;
}
using Indices = std::make_index_sequence<Common::LambdaTraits<decltype(func)>::NUM_ARGS>;
inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{}));
return true;
}
template <IR::Opcode op, typename Dest, typename Source>
void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
const IR::Value value{inst.Arg(0)};
if (value.IsImmediate()) {
inst.ReplaceUsesWith(IR::Value{std::bit_cast<Dest>(Arg<Source>(value))});
return;
}
IR::Inst* const arg_inst{value.InstRecursive()};
if (arg_inst->GetOpcode() == reverse) {
inst.ReplaceUsesWith(arg_inst->Arg(0));
return;
}
if constexpr (op == IR::Opcode::BitCastF32U32) {
if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) {
// Replace the bitcast with a typed constant buffer read
inst.ReplaceOpcode(IR::Opcode::ReadConstBufferF32);
inst.SetArg(0, arg_inst->Arg(0));
inst.SetArg(1, arg_inst->Arg(1));
return;
}
}
}
std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
IR::Opcode construct, u32 first_index) {
IR::Inst* const inst{inst_value.InstRecursive()};
if (inst->GetOpcode() == construct) {
return inst->Arg(first_index);
}
if (inst->GetOpcode() != insert) {
return std::nullopt;
}
IR::Value value_index{inst->Arg(2)};
if (!value_index.IsImmediate()) {
return std::nullopt;
}
const u32 second_index{value_index.U32()};
if (first_index != second_index) {
IR::Value value_composite{inst->Arg(0)};
if (value_composite.IsImmediate()) {
return std::nullopt;
}
return FoldCompositeExtractImpl(value_composite, insert, construct, first_index);
}
return inst->Arg(1);
}
void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) {
const IR::Value value_1{inst.Arg(0)};
const IR::Value value_2{inst.Arg(1)};
if (value_1.IsImmediate()) {
return;
}
if (!value_2.IsImmediate()) {
return;
}
const u32 first_index{value_2.U32()};
const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)};
if (!result) {
return;
}
inst.ReplaceUsesWith(*result);
}
void FoldConvert(IR::Inst& inst, IR::Opcode opposite) {
const IR::Value value{inst.Arg(0)};
if (value.IsImmediate()) {
return;
}
IR::Inst* const producer{value.InstRecursive()};
if (producer->GetOpcode() == opposite) {
inst.ReplaceUsesWith(producer->Arg(0));
}
}
void FoldLogicalAnd(IR::Inst& inst) {
if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a && b; })) {
return;
}
const IR::Value rhs{inst.Arg(1)};
if (rhs.IsImmediate()) {
if (rhs.U1()) {
inst.ReplaceUsesWith(inst.Arg(0));
} else {
inst.ReplaceUsesWith(IR::Value{false});
}
}
}
void FoldSelect(IR::Inst& inst) {
const IR::Value cond{inst.Arg(0)};
if (cond.IsImmediate()) {
inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2));
}
}
void FoldLogicalOr(IR::Inst& inst) {
if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a || b; })) {
return;
}
const IR::Value rhs{inst.Arg(1)};
if (rhs.IsImmediate()) {
if (rhs.U1()) {
inst.ReplaceUsesWith(IR::Value{true});
} else {
inst.ReplaceUsesWith(inst.Arg(0));
}
}
}
void FoldLogicalNot(IR::Inst& inst) {
const IR::U1 value{inst.Arg(0)};
if (value.IsImmediate()) {
inst.ReplaceUsesWith(IR::Value{!value.U1()});
return;
}
IR::Inst* const arg{value.InstRecursive()};
if (arg->GetOpcode() == IR::Opcode::LogicalNot) {
inst.ReplaceUsesWith(arg->Arg(0));
}
}
void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) {
const IR::Value value{inst.Arg(0)};
if (value.IsImmediate()) {
return;
}
IR::Inst* const arg_inst{value.InstRecursive()};
if (arg_inst->GetOpcode() == reverse) {
inst.ReplaceUsesWith(arg_inst->Arg(0));
return;
}
}
template <typename T>
void FoldAdd(IR::Block& block, IR::Inst& inst) {
if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) {
return;
}
const IR::Value rhs{inst.Arg(1)};
if (rhs.IsImmediate() && Arg<T>(rhs) == 0) {
inst.ReplaceUsesWith(inst.Arg(0));
return;
}
}
template <u32 idx>
bool IsArgImm(const IR::Inst& inst, u32 imm) {
const IR::Value& arg = inst.Arg(idx);
return arg.IsImmediate() && arg.U32() == imm;
};
void FoldBooleanConvert(IR::Inst& inst) {
// Eliminate pattern
// %4 = <some bool>
// %5 = SelectU32 %4, #1, #0 (uses: 2)
// %8 = INotEqual %5, #0 (uses: 1)
if (!IsArgImm<1>(inst, 0)) {
return;
}
IR::Inst* prod = inst.Arg(0).TryInstRecursive();
if (!prod || prod->GetOpcode() != IR::Opcode::SelectU32) {
return;
}
if (IsArgImm<1>(*prod, 1) && IsArgImm<2>(*prod, 0)) {
inst.ReplaceUsesWith(prod->Arg(0));
}
}
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::IAdd32:
return FoldAdd<u32>(block, inst);
case IR::Opcode::IMul32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
return;
case IR::Opcode::ShiftRightArithmetic32:
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); });
return;
case IR::Opcode::BitCastF32U32:
return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32);
case IR::Opcode::BitCastU32F32:
return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32);
case IR::Opcode::PackHalf2x16:
return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16);
case IR::Opcode::UnpackHalf2x16:
return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16);
case IR::Opcode::PackFloat2x16:
return FoldInverseFunc(inst, IR::Opcode::UnpackFloat2x16);
case IR::Opcode::UnpackFloat2x16:
return FoldInverseFunc(inst, IR::Opcode::PackFloat2x16);
case IR::Opcode::SelectU1:
case IR::Opcode::SelectU8:
case IR::Opcode::SelectU16:
case IR::Opcode::SelectU32:
case IR::Opcode::SelectU64:
case IR::Opcode::SelectF32:
case IR::Opcode::SelectF64:
return FoldSelect(inst);
case IR::Opcode::FPNeg32:
FoldWhenAllImmediates(inst, [](f32 a) { return -a; });
return;
case IR::Opcode::LogicalAnd:
return FoldLogicalAnd(inst);
case IR::Opcode::LogicalOr:
return FoldLogicalOr(inst);
case IR::Opcode::LogicalNot:
return FoldLogicalNot(inst);
case IR::Opcode::SLessThan:
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; });
return;
case IR::Opcode::ULessThan:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; });
return;
case IR::Opcode::SLessThanEqual:
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; });
return;
case IR::Opcode::ULessThanEqual:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; });
return;
case IR::Opcode::SGreaterThan:
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; });
return;
case IR::Opcode::UGreaterThan:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; });
return;
case IR::Opcode::SGreaterThanEqual:
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; });
return;
case IR::Opcode::UGreaterThanEqual:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
return;
case IR::Opcode::IEqual:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
return;
case IR::Opcode::INotEqual:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; });
FoldBooleanConvert(inst);
return;
case IR::Opcode::BitwiseAnd32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; });
return;
case IR::Opcode::BitwiseOr32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; });
return;
case IR::Opcode::BitwiseXor32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; });
return;
case IR::Opcode::BitFieldUExtract:
FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) {
if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) {
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract,
base, shift, count);
}
return (base >> shift) & ((1U << count) - 1);
});
return;
case IR::Opcode::BitFieldSExtract:
FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) {
const size_t back_shift{static_cast<size_t>(shift) + static_cast<size_t>(count)};
const size_t left_shift{32 - back_shift};
const size_t right_shift{static_cast<size_t>(32 - count)};
if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) {
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
base, shift, count);
}
return static_cast<u32>((base << left_shift) >> right_shift);
});
return;
case IR::Opcode::BitFieldInsert:
FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) {
if (bits >= 32 || offset >= 32) {
throw LogicError("Undefined result in {}({}, {}, {}, {})",
IR::Opcode::BitFieldInsert, base, insert, offset, bits);
}
return (base & ~(~(~0u << bits) << offset)) | (insert << offset);
});
return;
case IR::Opcode::CompositeExtractU32x2:
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2,
IR::Opcode::CompositeInsertU32x2);
case IR::Opcode::CompositeExtractU32x3:
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3,
IR::Opcode::CompositeInsertU32x3);
case IR::Opcode::CompositeExtractU32x4:
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4,
IR::Opcode::CompositeInsertU32x4);
case IR::Opcode::CompositeExtractF32x2:
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2,
IR::Opcode::CompositeInsertF32x2);
case IR::Opcode::CompositeExtractF32x3:
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3,
IR::Opcode::CompositeInsertF32x3);
case IR::Opcode::CompositeExtractF32x4:
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4,
IR::Opcode::CompositeInsertF32x4);
case IR::Opcode::CompositeExtractF16x2:
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2,
IR::Opcode::CompositeInsertF16x2);
case IR::Opcode::CompositeExtractF16x3:
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3,
IR::Opcode::CompositeInsertF16x3);
case IR::Opcode::CompositeExtractF16x4:
return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4,
IR::Opcode::CompositeInsertF16x4);
case IR::Opcode::ConvertF32F16:
return FoldConvert(inst, IR::Opcode::ConvertF16F32);
case IR::Opcode::ConvertF16F32:
return FoldConvert(inst, IR::Opcode::ConvertF32F16);
default:
break;
}
}
void ConstantPropagationPass(IR::BlockList& program) {
const auto end{program.rend()};
for (auto it = program.rbegin(); it != end; ++it) {
IR::Block* const block{*it};
for (IR::Inst& inst : block->Instructions()) {
ConstantPropagation(*block, inst);
}
}
}
} // namespace Shader::Optimization

View file

@ -0,0 +1,16 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/ir/basic_block.h"
namespace Shader::Optimization {
void SsaRewritePass(IR::BlockList& program);
void IdentityRemovalPass(IR::BlockList& program);
void DeadCodeEliminationPass(IR::BlockList& program);
void ConstantPropagationPass(IR::BlockList& program);
void ResourceTrackingPass(IR::BlockList& program);
} // namespace Shader::Optimization

View file

@ -0,0 +1,131 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <bit>
#include <optional>
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader::Optimization {
namespace {
struct SharpLocation {
IR::ScalarReg eud_ptr;
u32 index_dwords;
auto operator<=>(const SharpLocation&) const = default;
};
bool IsResourceInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ReadConstBuffer:
case IR::Opcode::ReadConstBufferF32:
case IR::Opcode::ImageSampleExplicitLod:
case IR::Opcode::ImageSampleImplicitLod:
case IR::Opcode::ImageSampleDrefExplicitLod:
case IR::Opcode::ImageSampleDrefImplicitLod:
case IR::Opcode::ImageFetch:
case IR::Opcode::ImageGather:
case IR::Opcode::ImageGatherDref:
case IR::Opcode::ImageQueryDimensions:
case IR::Opcode::ImageQueryLod:
case IR::Opcode::ImageGradient:
case IR::Opcode::ImageRead:
case IR::Opcode::ImageWrite:
return true;
default:
return false;
}
}
/*class Descriptors {
public:
explicit Descriptors(TextureDescriptors& texture_descriptors_)
: texture_descriptors{texture_descriptors_} {}
u32 Add(const TextureDescriptor& desc) {
const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) {
return desc.type == existing.type && desc.is_depth == existing.is_depth &&
desc.has_secondary == existing.has_secondary &&
desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset &&
desc.shift_left == existing.shift_left &&
desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
desc.secondary_shift_left == existing.secondary_shift_left &&
desc.count == existing.count && desc.size_shift == existing.size_shift;
})};
// TODO: Read this from TIC
texture_descriptors[index].is_multisample |= desc.is_multisample;
return index;
}
private:
template <typename Descriptors, typename Descriptor, typename Func>
static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
// TODO: Handle arrays
const auto it{std::ranges::find_if(descriptors, pred)};
if (it != descriptors.end()) {
return static_cast<u32>(std::distance(descriptors.begin(), it));
}
descriptors.push_back(desc);
return static_cast<u32>(descriptors.size()) - 1;
}
TextureDescriptors& texture_descriptors;
};*/
} // Anonymous namespace
SharpLocation TrackSharp(const IR::Value& handle) {
IR::Inst* inst = handle.InstRecursive();
if (inst->GetOpcode() == IR::Opcode::GetScalarRegister) {
return SharpLocation{
.eud_ptr = IR::ScalarReg::Max,
.index_dwords = inst->Arg(0).U32(),
};
}
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, "Sharp load not from constant memory");
// Retrieve offset from base.
IR::Inst* addr = inst->Arg(0).InstRecursive();
u32 dword_offset = addr->Arg(1).U32();
addr = addr->Arg(0).InstRecursive();
ASSERT_MSG(addr->Arg(1).IsImmediate(), "Bindless not supported");
dword_offset += addr->Arg(1).U32() >> 2;
// Retrieve SGPR that holds sbase
inst = addr->Arg(0).InstRecursive()->Arg(0).InstRecursive();
ASSERT_MSG(inst->GetOpcode() == IR::Opcode::GetScalarRegister,
"Nested resource loads not supported");
const IR::ScalarReg base = inst->Arg(0).ScalarReg();
// Return retrieved location.
return SharpLocation{
.eud_ptr = base,
.index_dwords = dword_offset,
};
}
void ResourceTrackingPass(IR::BlockList& program) {
for (IR::Block* const block : program) {
for (IR::Inst& inst : block->Instructions()) {
if (!IsResourceInstruction(inst)) {
continue;
}
printf("ff\n");
IR::Inst* producer = inst.Arg(0).InstRecursive();
const auto loc = TrackSharp(producer->Arg(0));
fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords,
loc.index_dwords + 4, loc.eud_ptr != IR::ScalarReg::Max);
}
}
}
} // namespace Shader::Optimization

View file

@ -0,0 +1,408 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// This file implements the SSA rewriting algorithm proposed in
//
// Simple and Efficient Construction of Static Single Assignment Form.
// Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013)
// In: Jhala R., De Bosschere K. (eds)
// Compiler Construction. CC 2013.
// Lecture Notes in Computer Science, vol 7791.
// Springer, Berlin, Heidelberg
//
// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
//
#include <map>
#include <span>
#include <unordered_map>
#include <variant>
#include <vector>
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/value.h"
namespace Shader::Optimization {
namespace {
struct FlagTag {
auto operator<=>(const FlagTag&) const noexcept = default;
};
struct ZeroFlagTag : FlagTag {};
struct SignFlagTag : FlagTag {};
struct CarryFlagTag : FlagTag {};
struct OverflowFlagTag : FlagTag {};
struct VccFlagTag : FlagTag {};
struct GotoVariable : FlagTag {
GotoVariable() = default;
explicit GotoVariable(u32 index_) : index{index_} {}
auto operator<=>(const GotoVariable&) const noexcept = default;
u32 index;
};
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, ZeroFlagTag, SignFlagTag, CarryFlagTag,
OverflowFlagTag, GotoVariable, VccFlagTag>;
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
struct DefTable {
const IR::Value& Def(IR::Block* block, IR::ScalarReg variable) {
return block->ssa_sreg_values[RegIndex(variable)];
}
void SetDef(IR::Block* block, IR::ScalarReg variable, const IR::Value& value) {
block->ssa_sreg_values[RegIndex(variable)] = value;
}
const IR::Value& Def(IR::Block* block, IR::VectorReg variable) {
return block->ssa_vreg_values[RegIndex(variable)];
}
void SetDef(IR::Block* block, IR::VectorReg variable, const IR::Value& value) {
block->ssa_vreg_values[RegIndex(variable)] = value;
}
const IR::Value& Def(IR::Block* block, GotoVariable variable) {
return goto_vars[variable.index][block];
}
void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) {
goto_vars[variable.index].insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
return zero_flag[block];
}
void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
zero_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, SignFlagTag) {
return sign_flag[block];
}
void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
sign_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, CarryFlagTag) {
return carry_flag[block];
}
void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
carry_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
return overflow_flag[block];
}
void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
overflow_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, VccFlagTag) {
return vcc_flag[block];
}
void SetDef(IR::Block* block, VccFlagTag, const IR::Value& value) {
vcc_flag.insert_or_assign(block, value);
}
std::unordered_map<u32, ValueMap> goto_vars;
ValueMap indirect_branch_var;
ValueMap zero_flag;
ValueMap sign_flag;
ValueMap carry_flag;
ValueMap overflow_flag;
ValueMap vcc_flag;
};
IR::Opcode UndefOpcode(IR::ScalarReg) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(IR::VectorReg) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(const FlagTag&) noexcept {
return IR::Opcode::UndefU1;
}
enum class Status {
Start,
SetValue,
PreparePhiArgument,
PushPhiArgument,
};
template <typename Type>
struct ReadState {
ReadState(IR::Block* block_) : block{block_} {}
ReadState() = default;
IR::Block* block{};
IR::Value result{};
IR::Inst* phi{};
IR::Block* const* pred_it{};
IR::Block* const* pred_end{};
Status pc{Status::Start};
};
class Pass {
public:
template <typename Type>
void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) {
current_def.SetDef(block, variable, value);
}
template <typename Type>
IR::Value ReadVariable(Type variable, IR::Block* root_block) {
boost::container::small_vector<ReadState<Type>, 64> stack{
ReadState<Type>(nullptr),
ReadState<Type>(root_block),
};
const auto prepare_phi_operand = [&] {
if (stack.back().pred_it == stack.back().pred_end) {
IR::Inst* const phi{stack.back().phi};
IR::Block* const block{stack.back().block};
const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))};
stack.pop_back();
stack.back().result = result;
WriteVariable(variable, block, result);
} else {
IR::Block* const imm_pred{*stack.back().pred_it};
stack.back().pc = Status::PushPhiArgument;
stack.emplace_back(imm_pred);
}
};
do {
IR::Block* const block{stack.back().block};
switch (stack.back().pc) {
case Status::Start: {
if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) {
stack.back().result = def;
} else if (!block->IsSsaSealed()) {
// Incomplete CFG
IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
incomplete_phis[block].insert_or_assign(variable, phi);
stack.back().result = IR::Value{&*phi};
} else if (const std::span imm_preds = block->ImmPredecessors();
imm_preds.size() == 1) {
// Optimize the common case of one predecessor: no phi needed
stack.back().pc = Status::SetValue;
stack.emplace_back(imm_preds.front());
break;
} else {
// Break potential cycles with operandless phi
IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)};
phi->SetFlags(IR::TypeOf(UndefOpcode(variable)));
WriteVariable(variable, block, IR::Value{phi});
stack.back().phi = phi;
stack.back().pred_it = imm_preds.data();
stack.back().pred_end = imm_preds.data() + imm_preds.size();
prepare_phi_operand();
break;
}
}
[[fallthrough]];
case Status::SetValue: {
const IR::Value result{stack.back().result};
WriteVariable(variable, block, result);
stack.pop_back();
stack.back().result = result;
break;
}
case Status::PushPhiArgument: {
IR::Inst* const phi{stack.back().phi};
phi->AddPhiOperand(*stack.back().pred_it, stack.back().result);
++stack.back().pred_it;
}
[[fallthrough]];
case Status::PreparePhiArgument:
prepare_phi_operand();
break;
}
} while (stack.size() > 1);
return stack.back().result;
}
void SealBlock(IR::Block* block) {
const auto it{incomplete_phis.find(block)};
if (it != incomplete_phis.end()) {
for (auto& pair : it->second) {
auto& variant{pair.first};
auto& phi{pair.second};
std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant);
}
}
block->SsaSeal();
}
private:
template <typename Type>
IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) {
for (IR::Block* const imm_pred : block->ImmPredecessors()) {
phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred));
}
return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable));
}
IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) {
IR::Value same;
const size_t num_args{phi.NumArgs()};
for (size_t arg_index = 0; arg_index < num_args; ++arg_index) {
const IR::Value& op{phi.Arg(arg_index)};
if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) {
// Unique value or self-reference
continue;
}
if (!same.IsEmpty()) {
// The phi merges at least two values: not trivial
return IR::Value{&phi};
}
same = op;
}
// Remove the phi node from the block, it will be reinserted
IR::Block::InstructionList& list{block->Instructions()};
list.erase(IR::Block::InstructionList::s_iterator_to(phi));
// Find the first non-phi instruction and use it as an insertion point
IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)};
if (same.IsEmpty()) {
// The phi is unreachable or in the start block
// Insert an undefined instruction and make it the phi node replacement
// The "phi" node reinsertion point is specified after this instruction
reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode);
same = IR::Value{&*reinsert_point};
++reinsert_point;
}
// Reinsert the phi node and reroute all its uses to the "same" value
list.insert(reinsert_point, phi);
phi.ReplaceUsesWith(same);
// TODO: Try to recursively remove all phi users, which might have become trivial
return same;
}
std::unordered_map<IR::Block*, std::map<Variant, IR::Inst*>> incomplete_phis;
DefTable current_def;
};
void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
const IR::Opcode opcode{inst.GetOpcode()};
switch (opcode) {
case IR::Opcode::SetScalarRegister: {
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
pass.WriteVariable(reg, block, inst.Arg(1));
break;
}
case IR::Opcode::SetVectorRegister: {
const IR::VectorReg reg{inst.Arg(0).VectorReg()};
pass.WriteVariable(reg, block, inst.Arg(1));
break;
}
case IR::Opcode::SetGotoVariable:
pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
break;
case IR::Opcode::SetVcc:
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
break;
// case IR::Opcode::SetSFlag:
// pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
// break;
// case IR::Opcode::SetCFlag:
// pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
// break;
// case IR::Opcode::SetOFlag:
// pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
// break;
case IR::Opcode::GetScalarRegister: {
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
break;
}
case IR::Opcode::GetVectorRegister: {
const IR::VectorReg reg{inst.Arg(0).VectorReg()};
inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
break;
}
case IR::Opcode::GetGotoVariable:
inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
break;
case IR::Opcode::GetVcc:
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
break;
// case IR::Opcode::GetSFlag:
// inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
// break;
// case IR::Opcode::GetCFlag:
// inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
// break;
// case IR::Opcode::GetOFlag:
// inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
// break;
default:
break;
}
}
void VisitBlock(Pass& pass, IR::Block* block) {
for (IR::Inst& inst : block->Instructions()) {
VisitInst(pass, block, inst);
}
pass.SealBlock(block);
}
} // Anonymous namespace
void SsaRewritePass(IR::BlockList& program) {
Pass pass;
const auto end{program.rend()};
for (auto block = program.rbegin(); block != end; ++block) {
VisitBlock(pass, *block);
}
}
void IdentityRemovalPass(IR::BlockList& program) {
std::vector<IR::Inst*> to_invalidate;
for (IR::Block* const block : program) {
for (auto inst = block->begin(); inst != block->end();) {
const size_t num_args{inst->NumArgs()};
for (size_t i = 0; i < num_args; ++i) {
IR::Value arg;
while ((arg = inst->Arg(i)).IsIdentity()) {
inst->SetArg(i, arg.Inst()->Arg(0));
}
}
if (inst->GetOpcode() == IR::Opcode::Identity ||
inst->GetOpcode() == IR::Opcode::Void) {
to_invalidate.push_back(&*inst);
inst = block->Instructions().erase(inst);
} else {
++inst;
}
}
}
for (IR::Inst* const inst : to_invalidate) {
inst->Invalidate();
}
}
void DeadCodeEliminationPass(IR::BlockList& program) {
// We iterate over the instructions in reverse order.
// This is because removing an instruction reduces the number of uses for earlier instructions.
for (IR::Block* const block : program) {
auto it{block->end()};
while (it != block->begin()) {
--it;
if (!it->HasUses() && !it->MayHaveSideEffects()) {
it->Invalidate();
it = block->Instructions().erase(it);
}
}
}
}
} // namespace Shader::Optimization

View file

@ -0,0 +1,42 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <boost/container/flat_set.hpp>
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/ir/post_order.h"
namespace Shader::IR {
BlockList PostOrder(const AbstractSyntaxNode& root) {
boost::container::small_vector<Block*, 16> block_stack;
boost::container::flat_set<Block*> visited;
BlockList post_order_blocks;
if (root.type != AbstractSyntaxNode::Type::Block) {
throw LogicError("First node in abstract syntax list root is not a block");
}
Block* const first_block{root.data.block};
visited.insert(first_block);
block_stack.push_back(first_block);
while (!block_stack.empty()) {
Block* const block = block_stack.back();
const auto visit = [&](Block* branch) {
if (!visited.insert(branch).second) {
return false;
}
// Calling push_back twice is faster than insert on MSVC
block_stack.push_back(block);
block_stack.push_back(branch);
return true;
};
block_stack.pop_back();
if (std::ranges::none_of(block->ImmSuccessors(), visit)) {
post_order_blocks.push_back(block);
}
}
return post_order_blocks;
}
} // namespace Shader::IR

View file

@ -0,0 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "shader_recompiler/ir/abstract_syntax_list.h"
#include "shader_recompiler/ir/basic_block.h"
namespace Shader::IR {
BlockList PostOrder(const AbstractSyntaxNode& root);
} // namespace Shader::IR

View file

@ -0,0 +1,31 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <map>
#include <string>
#include <fmt/format.h>
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/ir/value.h"
namespace Shader::IR {
std::string DumpProgram(const Program& program) {
size_t index{0};
std::map<const IR::Inst*, size_t> inst_to_index;
std::map<const IR::Block*, size_t> block_to_index;
for (const IR::Block* const block : program.blocks) {
block_to_index.emplace(block, index);
++index;
}
std::string ret;
for (const auto& block : program.blocks) {
ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
}
return ret;
}
} // namespace Shader::IR

View file

@ -0,0 +1,28 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <string>
#include "shader_recompiler/frontend/instruction.h"
#include "shader_recompiler/ir/abstract_syntax_list.h"
#include "shader_recompiler/ir/basic_block.h"
namespace Shader {
enum class Stage : u32;
}
namespace Shader::IR {
struct Program {
AbstractSyntaxList syntax_list;
BlockList blocks;
BlockList post_order_blocks;
std::vector<Gcn::GcnInst> ins_list;
Stage stage;
};
[[nodiscard]] std::string DumpProgram(const Program& program);
} // namespace Shader::IR

View file

@ -0,0 +1,471 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/bit_field.h"
#include "common/types.h"
#include "shader_recompiler/exception.h"
namespace Shader::IR {
enum class FpRoundMode : u32 {
NearestEven = 0,
PlusInf = 1,
MinInf = 2,
ToZero = 3,
};
enum class FpDenormMode : u32 {
InOutFlush = 0,
InAllowOutFlush = 1,
InFlushOutAllow = 2,
InOutAllow = 3,
};
union Mode {
BitField<0, 4, FpRoundMode> fp_round;
BitField<4, 2, FpDenormMode> fp_denorm_single;
BitField<6, 2, FpDenormMode> fp_denorm_double;
BitField<8, 1, u32> dx10_clamp;
};
union TextureInstInfo {
u32 raw;
BitField<0, 16, u32> descriptor_index;
BitField<19, 1, u32> is_depth;
BitField<20, 1, u32> has_bias;
BitField<21, 1, u32> has_lod_clamp;
BitField<22, 1, u32> relaxed_precision;
BitField<23, 2, u32> gather_component;
BitField<25, 2, u32> num_derivatives;
};
enum class ScalarReg : u32 {
S0,
S1,
S2,
S3,
S4,
S5,
S6,
S7,
S8,
S9,
S10,
S11,
S12,
S13,
S14,
S15,
S16,
S17,
S18,
S19,
S20,
S21,
S22,
S23,
S24,
S25,
S26,
S27,
S28,
S29,
S30,
S31,
S32,
S33,
S34,
S35,
S36,
S37,
S38,
S39,
S40,
S41,
S42,
S43,
S44,
S45,
S46,
S47,
S48,
S49,
S50,
S51,
S52,
S53,
S54,
S55,
S56,
S57,
S58,
S59,
S60,
S61,
S62,
S63,
S64,
S65,
S66,
S67,
S68,
S69,
S70,
S71,
S72,
S73,
S74,
S75,
S76,
S77,
S78,
S79,
S80,
S81,
S82,
S83,
S84,
S85,
S86,
S87,
S88,
S89,
S90,
S91,
S92,
S93,
S94,
S95,
S96,
S97,
S98,
S99,
S100,
S101,
S102,
S103,
Max,
};
static constexpr size_t NumScalarRegs = static_cast<size_t>(ScalarReg::Max);
enum class VectorReg : u32 {
V0,
V1,
V2,
V3,
V4,
V5,
V6,
V7,
V8,
V9,
V10,
V11,
V12,
V13,
V14,
V15,
V16,
V17,
V18,
V19,
V20,
V21,
V22,
V23,
V24,
V25,
V26,
V27,
V28,
V29,
V30,
V31,
V32,
V33,
V34,
V35,
V36,
V37,
V38,
V39,
V40,
V41,
V42,
V43,
V44,
V45,
V46,
V47,
V48,
V49,
V50,
V51,
V52,
V53,
V54,
V55,
V56,
V57,
V58,
V59,
V60,
V61,
V62,
V63,
V64,
V65,
V66,
V67,
V68,
V69,
V70,
V71,
V72,
V73,
V74,
V75,
V76,
V77,
V78,
V79,
V80,
V81,
V82,
V83,
V84,
V85,
V86,
V87,
V88,
V89,
V90,
V91,
V92,
V93,
V94,
V95,
V96,
V97,
V98,
V99,
V100,
V101,
V102,
V103,
V104,
V105,
V106,
V107,
V108,
V109,
V110,
V111,
V112,
V113,
V114,
V115,
V116,
V117,
V118,
V119,
V120,
V121,
V122,
V123,
V124,
V125,
V126,
V127,
V128,
V129,
V130,
V131,
V132,
V133,
V134,
V135,
V136,
V137,
V138,
V139,
V140,
V141,
V142,
V143,
V144,
V145,
V146,
V147,
V148,
V149,
V150,
V151,
V152,
V153,
V154,
V155,
V156,
V157,
V158,
V159,
V160,
V161,
V162,
V163,
V164,
V165,
V166,
V167,
V168,
V169,
V170,
V171,
V172,
V173,
V174,
V175,
V176,
V177,
V178,
V179,
V180,
V181,
V182,
V183,
V184,
V185,
V186,
V187,
V188,
V189,
V190,
V191,
V192,
V193,
V194,
V195,
V196,
V197,
V198,
V199,
V200,
V201,
V202,
V203,
V204,
V205,
V206,
V207,
V208,
V209,
V210,
V211,
V212,
V213,
V214,
V215,
V216,
V217,
V218,
V219,
V220,
V221,
V222,
V223,
V224,
V225,
V226,
V227,
V228,
V229,
V230,
V231,
V232,
V233,
V234,
V235,
V236,
V237,
V238,
V239,
V240,
V241,
V242,
V243,
V244,
V245,
V246,
V247,
V248,
V249,
V250,
V251,
V252,
V253,
V254,
V255,
Max,
};
static constexpr size_t NumVectorRegs = static_cast<size_t>(VectorReg::Max);
template <class T>
concept RegT = std::is_same_v<T, ScalarReg> || std::is_same_v<T, VectorReg>;
template <RegT Reg>
[[nodiscard]] constexpr Reg operator+(Reg reg, int num) {
const int result{static_cast<int>(reg) + num};
if (result >= static_cast<int>(Reg::Max)) {
throw LogicError("Overflow on register arithmetic");
}
if (result < 0) {
throw LogicError("Underflow on register arithmetic");
}
return static_cast<Reg>(result);
}
template <RegT Reg>
[[nodiscard]] constexpr Reg operator-(Reg reg, int num) {
return reg + (-num);
}
template <RegT Reg>
constexpr Reg operator++(Reg& reg) {
reg = reg + 1;
return reg;
}
template <RegT Reg>
constexpr Reg operator++(Reg& reg, int) {
const Reg copy{reg};
reg = reg + 1;
return copy;
}
template <RegT Reg>
[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept {
return static_cast<size_t>(reg);
}
} // namespace Shader::IR
template <>
struct fmt::formatter<Shader::IR::ScalarReg> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(Shader::IR::ScalarReg reg, format_context& ctx) const {
return fmt::format_to(ctx.out(), "SGPR{}", static_cast<u32>(reg));
}
};
template <>
struct fmt::formatter<Shader::IR::VectorReg> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(Shader::IR::VectorReg reg, format_context& ctx) const {
return fmt::format_to(ctx.out(), "VGPR{}", static_cast<u32>(reg));
}
};

View file

@ -0,0 +1,36 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <string>
#include "shader_recompiler/ir/type.h"
namespace Shader::IR {
std::string NameOf(Type type) {
static constexpr std::array names{
"Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32",
"U64", "F16", "F32", "F64", "U32x2", "U32x3", "U32x4", "F16x2", "F16x3",
"F16x4", "F32x2", "F32x3", "F32x4", "F64x2", "F64x3", "F64x4",
};
const size_t bits{static_cast<size_t>(type)};
if (bits == 0) {
return "Void";
}
std::string result;
for (size_t i = 0; i < names.size(); i++) {
if ((bits & (size_t{1} << i)) != 0) {
if (!result.empty()) {
result += '|';
}
result += names[i];
}
}
return result;
}
bool AreTypesCompatible(Type lhs, Type rhs) noexcept {
return lhs == rhs || lhs == Type::Opaque || rhs == Type::Opaque;
}
} // namespace Shader::IR

View file

@ -0,0 +1,56 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <string>
#include <fmt/format.h>
#include "common/enum.h"
namespace Shader::IR {
enum class Type {
Void = 0,
Opaque = 1 << 0,
ScalarReg = 1 << 1,
VectorReg = 1 << 2,
Attribute = 1 << 3,
SystemValue = 1 << 4,
U1 = 1 << 5,
U8 = 1 << 6,
U16 = 1 << 7,
U32 = 1 << 8,
U64 = 1 << 9,
F16 = 1 << 10,
F32 = 1 << 11,
F64 = 1 << 12,
U32x2 = 1 << 13,
U32x3 = 1 << 14,
U32x4 = 1 << 15,
F16x2 = 1 << 16,
F16x3 = 1 << 17,
F16x4 = 1 << 18,
F32x2 = 1 << 19,
F32x3 = 1 << 20,
F32x4 = 1 << 21,
F64x2 = 1 << 22,
F64x3 = 1 << 23,
F64x4 = 1 << 24,
};
DECLARE_ENUM_FLAG_OPERATORS(Type)
[[nodiscard]] std::string NameOf(Type type);
[[nodiscard]] bool AreTypesCompatible(Type lhs, Type rhs) noexcept;
} // namespace Shader::IR
template <>
struct fmt::formatter<Shader::IR::Type> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(Shader::IR::Type type, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(type));
}
};

View file

@ -0,0 +1,93 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/value.h"
namespace Shader::IR {
Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {}
Value::Value(IR::ScalarReg reg) noexcept : type{Type::ScalarReg}, sreg{reg} {}
Value::Value(IR::VectorReg reg) noexcept : type{Type::VectorReg}, vreg{reg} {}
Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {}
Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {}
Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {}
Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
IR::Type Value::Type() const noexcept {
if (IsPhi()) {
// The type of a phi node is stored in its flags
return inst->Flags<IR::Type>();
}
if (IsIdentity()) {
return inst->Arg(0).Type();
}
if (type == Type::Opaque) {
return inst->Type();
}
return type;
}
bool Value::operator==(const Value& other) const {
if (type != other.type) {
return false;
}
switch (type) {
case Type::Void:
return true;
case Type::Opaque:
return inst == other.inst;
case Type::ScalarReg:
return sreg == other.sreg;
case Type::VectorReg:
return vreg == other.vreg;
case Type::Attribute:
return attribute == other.attribute;
case Type::U1:
return imm_u1 == other.imm_u1;
case Type::U8:
return imm_u8 == other.imm_u8;
case Type::U16:
case Type::F16:
return imm_u16 == other.imm_u16;
case Type::U32:
case Type::F32:
return imm_u32 == other.imm_u32;
case Type::U64:
case Type::F64:
return imm_u64 == other.imm_u64;
case Type::U32x2:
case Type::U32x3:
case Type::U32x4:
case Type::F16x2:
case Type::F16x3:
case Type::F16x4:
case Type::F32x2:
case Type::F32x3:
case Type::F32x4:
case Type::F64x2:
case Type::F64x3:
case Type::F64x4:
break;
}
throw LogicError("Invalid type {}", type);
}
bool Value::operator!=(const Value& other) const {
return !operator==(other);
}
} // namespace Shader::IR

View file

@ -0,0 +1,353 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <bit>
#include <cstring>
#include <type_traits>
#include <utility>
#include <boost/container/small_vector.hpp>
#include <boost/intrusive/list.hpp>
#include "common/assert.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/type.h"
namespace Shader::IR {
class Block;
class Inst;
struct AssociatedInsts;
class Value {
public:
Value() noexcept = default;
explicit Value(IR::Inst* value) noexcept;
explicit Value(IR::ScalarReg reg) noexcept;
explicit Value(IR::VectorReg reg) noexcept;
explicit Value(IR::Attribute value) noexcept;
explicit Value(bool value) noexcept;
explicit Value(u8 value) noexcept;
explicit Value(u16 value) noexcept;
explicit Value(u32 value) noexcept;
explicit Value(f32 value) noexcept;
explicit Value(u64 value) noexcept;
explicit Value(f64 value) noexcept;
[[nodiscard]] bool IsIdentity() const noexcept;
[[nodiscard]] bool IsPhi() const noexcept;
[[nodiscard]] bool IsEmpty() const noexcept;
[[nodiscard]] bool IsImmediate() const noexcept;
[[nodiscard]] IR::Type Type() const noexcept;
[[nodiscard]] IR::Inst* Inst() const;
[[nodiscard]] IR::Inst* InstRecursive() const;
[[nodiscard]] IR::Inst* TryInstRecursive() const;
[[nodiscard]] IR::Value Resolve() const;
[[nodiscard]] IR::ScalarReg ScalarReg() const;
[[nodiscard]] IR::VectorReg VectorReg() const;
[[nodiscard]] IR::Attribute Attribute() const;
[[nodiscard]] bool U1() const;
[[nodiscard]] u8 U8() const;
[[nodiscard]] u16 U16() const;
[[nodiscard]] u32 U32() const;
[[nodiscard]] f32 F32() const;
[[nodiscard]] u64 U64() const;
[[nodiscard]] f64 F64() const;
[[nodiscard]] bool operator==(const Value& other) const;
[[nodiscard]] bool operator!=(const Value& other) const;
private:
IR::Type type{};
union {
IR::Inst* inst{};
IR::ScalarReg sreg;
IR::VectorReg vreg;
IR::Attribute attribute;
bool imm_u1;
u8 imm_u8;
u16 imm_u16;
u32 imm_u32;
f32 imm_f32;
u64 imm_u64;
f64 imm_f64;
};
};
static_assert(static_cast<u32>(IR::Type::Void) == 0, "memset relies on IR::Type being zero");
static_assert(std::is_trivially_copyable_v<Value>);
template <IR::Type type_>
class TypedValue : public Value {
public:
TypedValue() = default;
template <IR::Type other_type>
requires((other_type & type_) != IR::Type::Void)
explicit(false) TypedValue(const TypedValue<other_type>& value) : Value(value) {}
explicit TypedValue(const Value& value) : Value(value) {
if ((value.Type() & type_) == IR::Type::Void) {
throw InvalidArgument("Incompatible types {} and {}", type_, value.Type());
}
}
explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {}
};
class Inst : public boost::intrusive::list_base_hook<> {
public:
explicit Inst(IR::Opcode op_, u32 flags_) noexcept;
explicit Inst(const Inst& base);
~Inst();
Inst& operator=(const Inst&) = delete;
Inst& operator=(Inst&&) = delete;
Inst(Inst&&) = delete;
/// Get the number of uses this instruction has.
[[nodiscard]] int UseCount() const noexcept {
return use_count;
}
/// Determines whether this instruction has uses or not.
[[nodiscard]] bool HasUses() const noexcept {
return use_count > 0;
}
/// Get the opcode this microinstruction represents.
[[nodiscard]] IR::Opcode GetOpcode() const noexcept {
return op;
}
/// Determines whether or not this instruction may have side effects.
[[nodiscard]] bool MayHaveSideEffects() const noexcept;
/// Determines if all arguments of this instruction are immediates.
[[nodiscard]] bool AreAllArgsImmediates() const;
/// Get the type this instruction returns.
[[nodiscard]] IR::Type Type() const;
/// Get the number of arguments this instruction has.
[[nodiscard]] size_t NumArgs() const {
return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op);
}
/// Get the value of a given argument index.
[[nodiscard]] Value Arg(size_t index) const noexcept {
if (op == IR::Opcode::Phi) {
return phi_args[index].second;
} else {
return args[index];
}
}
/// Set the value of a given argument index.
void SetArg(size_t index, Value value);
/// Get a pointer to the block of a phi argument.
[[nodiscard]] Block* PhiBlock(size_t index) const;
/// Add phi operand to a phi instruction.
void AddPhiOperand(Block* predecessor, const Value& value);
void Invalidate();
void ClearArgs();
void ReplaceUsesWith(Value replacement);
void ReplaceOpcode(IR::Opcode opcode);
template <typename FlagsType>
requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
[[nodiscard]] FlagsType Flags() const noexcept {
FlagsType ret;
std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
return ret;
}
template <typename FlagsType>
requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
void SetFlags(FlagsType value) noexcept {
std::memcpy(&flags, &value, sizeof(value));
}
/// Intrusively store the host definition of this instruction.
template <typename DefinitionType>
void SetDefinition(DefinitionType def) {
definition = std::bit_cast<u32>(def);
}
/// Return the intrusively stored host definition of this instruction.
template <typename DefinitionType>
[[nodiscard]] DefinitionType Definition() const noexcept {
return std::bit_cast<DefinitionType>(definition);
}
private:
struct NonTriviallyDummy {
NonTriviallyDummy() noexcept {}
};
void Use(const Value& value);
void UndoUse(const Value& value);
IR::Opcode op{};
int use_count{};
u32 flags{};
u32 definition{};
union {
NonTriviallyDummy dummy{};
boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args;
std::array<Value, 5> args;
};
};
static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
using U1 = TypedValue<Type::U1>;
using U8 = TypedValue<Type::U8>;
using U16 = TypedValue<Type::U16>;
using U32 = TypedValue<Type::U32>;
using U64 = TypedValue<Type::U64>;
using F16 = TypedValue<Type::F16>;
using F32 = TypedValue<Type::F32>;
using F64 = TypedValue<Type::F64>;
using U32F32 = TypedValue<Type::U32 | Type::F32>;
using U32U64 = TypedValue<Type::U32 | Type::U64>;
using F32F64 = TypedValue<Type::F32 | Type::F64>;
using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>;
using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
inline bool Value::IsIdentity() const noexcept {
return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity;
}
inline bool Value::IsPhi() const noexcept {
return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi;
}
inline bool Value::IsEmpty() const noexcept {
return type == Type::Void;
}
inline bool Value::IsImmediate() const noexcept {
IR::Type current_type{type};
const IR::Inst* current_inst{inst};
while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) {
const Value& arg{current_inst->Arg(0)};
current_type = arg.type;
current_inst = arg.inst;
}
return current_type != Type::Opaque;
}
inline IR::Inst* Value::Inst() const {
DEBUG_ASSERT(type == Type::Opaque);
return inst;
}
inline IR::Inst* Value::InstRecursive() const {
DEBUG_ASSERT(type == Type::Opaque);
if (IsIdentity()) {
return inst->Arg(0).InstRecursive();
}
return inst;
}
inline IR::Inst* Value::TryInstRecursive() const {
if (IsIdentity()) {
return inst->Arg(0).TryInstRecursive();
}
return type == Type::Opaque ? inst : nullptr;
}
inline IR::Value Value::Resolve() const {
if (IsIdentity()) {
return inst->Arg(0).Resolve();
}
return *this;
}
inline IR::ScalarReg Value::ScalarReg() const {
DEBUG_ASSERT(type == Type::ScalarReg);
return sreg;
}
inline IR::VectorReg Value::VectorReg() const {
DEBUG_ASSERT(type == Type::VectorReg);
return vreg;
}
inline IR::Attribute Value::Attribute() const {
DEBUG_ASSERT(type == Type::Attribute);
return attribute;
}
inline bool Value::U1() const {
if (IsIdentity()) {
return inst->Arg(0).U1();
}
DEBUG_ASSERT(type == Type::U1);
return imm_u1;
}
inline u8 Value::U8() const {
if (IsIdentity()) {
return inst->Arg(0).U8();
}
DEBUG_ASSERT(type == Type::U8);
return imm_u8;
}
inline u16 Value::U16() const {
if (IsIdentity()) {
return inst->Arg(0).U16();
}
DEBUG_ASSERT(type == Type::U16);
return imm_u16;
}
inline u32 Value::U32() const {
if (IsIdentity()) {
return inst->Arg(0).U32();
}
DEBUG_ASSERT(type == Type::U32);
return imm_u32;
}
inline f32 Value::F32() const {
if (IsIdentity()) {
return inst->Arg(0).F32();
}
DEBUG_ASSERT(type == Type::F32);
return imm_f32;
}
inline u64 Value::U64() const {
if (IsIdentity()) {
return inst->Arg(0).U64();
}
DEBUG_ASSERT(type == Type::U64);
return imm_u64;
}
inline f64 Value::F64() const {
if (IsIdentity()) {
return inst->Arg(0).F64();
}
DEBUG_ASSERT(type == Type::F64);
return imm_f64;
}
[[nodiscard]] inline bool IsPhi(const Inst& inst) {
return inst.GetOpcode() == Opcode::Phi;
}
} // namespace Shader::IR