mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-23 11:55:00 +00:00
video_core: Implement basic compute shaders and more instructions
This commit is contained in:
parent
10bceb1643
commit
58de7ff55a
58 changed files with 1234 additions and 293 deletions
|
@ -10,6 +10,10 @@ bool IsParam(Attribute attribute) noexcept {
|
|||
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
|
||||
}
|
||||
|
||||
bool IsMrt(Attribute attribute) noexcept {
|
||||
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
|
||||
}
|
||||
|
||||
std::string NameOf(Attribute attribute) {
|
||||
switch (attribute) {
|
||||
case Attribute::RenderTarget0:
|
||||
|
@ -112,6 +116,12 @@ std::string NameOf(Attribute attribute) {
|
|||
return "FragCoord";
|
||||
case Attribute::IsFrontFace:
|
||||
return "IsFrontFace";
|
||||
case Attribute::WorkgroupId:
|
||||
return "WorkgroupId";
|
||||
case Attribute::LocalInvocationId:
|
||||
return "LocalInvocationId";
|
||||
case Attribute::LocalInvocationIndex:
|
||||
return "LocalInvocationIndex";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -81,6 +81,8 @@ constexpr size_t NumParams = 32;
|
|||
|
||||
[[nodiscard]] bool IsParam(Attribute attribute) noexcept;
|
||||
|
||||
[[nodiscard]] bool IsMrt(Attribute attribute) noexcept;
|
||||
|
||||
[[nodiscard]] std::string NameOf(Attribute attribute);
|
||||
|
||||
[[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) {
|
||||
|
|
|
@ -111,6 +111,10 @@ void IREmitter::Epilogue() {
|
|||
Inst(Opcode::Epilogue);
|
||||
}
|
||||
|
||||
void IREmitter::Discard() {
|
||||
Inst(Opcode::Discard);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetUserData(IR::ScalarReg reg) {
|
||||
return Inst<U32>(Opcode::GetUserData, reg);
|
||||
}
|
||||
|
@ -156,11 +160,17 @@ U1 IREmitter::Condition(IR::Condition cond) {
|
|||
case IR::Condition::True:
|
||||
return Imm1(true);
|
||||
case IR::Condition::Scc0:
|
||||
return LogicalNot(GetScc());
|
||||
case IR::Condition::Scc1:
|
||||
return GetScc();
|
||||
case IR::Condition::Vccz:
|
||||
return LogicalNot(GetVcc());
|
||||
case IR::Condition::Vccnz:
|
||||
return GetVcc();
|
||||
case IR::Condition::Execz:
|
||||
return LogicalNot(GetExec());
|
||||
case IR::Condition::Execnz:
|
||||
return GetExec();
|
||||
default:
|
||||
throw NotImplementedException("");
|
||||
}
|
||||
|
@ -170,14 +180,38 @@ void IREmitter::SetGotoVariable(u32 id, const U1& value) {
|
|||
Inst(Opcode::SetGotoVariable, id, value);
|
||||
}
|
||||
|
||||
U1 IREmitter::GetScc() {
|
||||
return Inst<U1>(Opcode::GetScc);
|
||||
}
|
||||
|
||||
U1 IREmitter::GetExec() {
|
||||
return Inst<U1>(Opcode::GetExec);
|
||||
}
|
||||
|
||||
U1 IREmitter::GetVcc() {
|
||||
return Inst<U1>(Opcode::GetVcc);
|
||||
}
|
||||
|
||||
U32 IREmitter::GetVccLo() {
|
||||
return Inst<U32>(Opcode::GetVccLo);
|
||||
}
|
||||
|
||||
void IREmitter::SetScc(const U1& value) {
|
||||
Inst(Opcode::SetScc, value);
|
||||
}
|
||||
|
||||
void IREmitter::SetExec(const U1& value) {
|
||||
Inst(Opcode::SetExec, value);
|
||||
}
|
||||
|
||||
void IREmitter::SetVcc(const U1& value) {
|
||||
Inst(Opcode::SetVcc, value);
|
||||
}
|
||||
|
||||
void IREmitter::SetVccLo(const U32& value) {
|
||||
Inst(Opcode::SetVccLo, value);
|
||||
}
|
||||
|
||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) {
|
||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp));
|
||||
}
|
||||
|
@ -247,6 +281,27 @@ Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& ad
|
|||
}
|
||||
}
|
||||
|
||||
void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||
const Value& data, BufferInstInfo info) {
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
Inst(data.Type() == Type::F32 ? Opcode::StoreBufferF32 : Opcode::StoreBufferU32,
|
||||
Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 2:
|
||||
Inst(Opcode::StoreBufferF32x2, Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 3:
|
||||
Inst(Opcode::StoreBufferF32x3, Flags{info}, handle, address, data);
|
||||
break;
|
||||
case 4:
|
||||
Inst(Opcode::StoreBufferF32x4, Flags{info}, handle, address, data);
|
||||
break;
|
||||
default:
|
||||
throw InvalidArgument("Invalid number of dwords {}", num_dwords);
|
||||
}
|
||||
}
|
||||
|
||||
F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
|
@ -261,6 +316,18 @@ F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
|
|||
}
|
||||
}
|
||||
|
||||
F32F64 IREmitter::FPSub(const F32F64& a, const F32F64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
}
|
||||
switch (a.Type()) {
|
||||
case Type::F32:
|
||||
return Inst<F32>(Opcode::FPSub32, a, b);
|
||||
default:
|
||||
ThrowInvalidType(a.Type());
|
||||
}
|
||||
}
|
||||
|
||||
Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) {
|
||||
if (e1.Type() != e2.Type()) {
|
||||
throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
|
||||
|
@ -612,6 +679,10 @@ F32F64 IREmitter::FPTrunc(const F32F64& value) {
|
|||
}
|
||||
}
|
||||
|
||||
F32 IREmitter::Fract(const F32& value) {
|
||||
return Inst<F32>(Opcode::FPFract, value);
|
||||
}
|
||||
|
||||
U1 IREmitter::FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered) {
|
||||
if (lhs.Type() != rhs.Type()) {
|
||||
throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
|
||||
|
|
|
@ -41,6 +41,7 @@ public:
|
|||
|
||||
void Prologue();
|
||||
void Epilogue();
|
||||
void Discard();
|
||||
|
||||
U32 GetUserData(IR::ScalarReg reg);
|
||||
|
||||
|
@ -54,9 +55,14 @@ public:
|
|||
[[nodiscard]] U1 GetGotoVariable(u32 id);
|
||||
void SetGotoVariable(u32 id, const U1& value);
|
||||
|
||||
[[nodiscard]] U1 GetScc();
|
||||
[[nodiscard]] U1 GetExec();
|
||||
[[nodiscard]] U1 GetVcc();
|
||||
|
||||
[[nodiscard]] U32 GetVccLo();
|
||||
void SetScc(const U1& value);
|
||||
void SetExec(const U1& value);
|
||||
void SetVcc(const U1& value);
|
||||
void SetVccLo(const U32& value);
|
||||
|
||||
[[nodiscard]] U1 Condition(IR::Condition cond);
|
||||
|
||||
|
@ -72,6 +78,8 @@ public:
|
|||
|
||||
[[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
|
||||
BufferInstInfo info);
|
||||
void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data,
|
||||
BufferInstInfo info);
|
||||
|
||||
[[nodiscard]] U1 GetZeroFromOp(const Value& op);
|
||||
[[nodiscard]] U1 GetSignFromOp(const Value& op);
|
||||
|
@ -100,6 +108,7 @@ public:
|
|||
[[nodiscard]] Value UnpackHalf2x16(const U32& value);
|
||||
|
||||
[[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
|
||||
[[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b);
|
||||
[[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b);
|
||||
[[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c);
|
||||
|
||||
|
@ -121,6 +130,7 @@ public:
|
|||
[[nodiscard]] F32F64 FPFloor(const F32F64& value);
|
||||
[[nodiscard]] F32F64 FPCeil(const F32F64& value);
|
||||
[[nodiscard]] F32F64 FPTrunc(const F32F64& value);
|
||||
[[nodiscard]] F32 Fract(const F32& value);
|
||||
|
||||
[[nodiscard]] U1 FPEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||
[[nodiscard]] U1 FPNotEqual(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||
|
|
|
@ -45,15 +45,13 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::PhiMove:
|
||||
case Opcode::Prologue:
|
||||
case Opcode::Epilogue:
|
||||
// case Opcode::Join:
|
||||
// case Opcode::Barrier:
|
||||
// case Opcode::WorkgroupMemoryBarrier:
|
||||
// case Opcode::DeviceMemoryBarrier:
|
||||
// case Opcode::EmitVertex:
|
||||
// case Opcode::EndPrimitive:
|
||||
case Opcode::Discard:
|
||||
case Opcode::SetAttribute:
|
||||
// case Opcode::SetFragColor:
|
||||
// case Opcode::SetFragDepth:
|
||||
case Opcode::StoreBufferF32:
|
||||
case Opcode::StoreBufferF32x2:
|
||||
case Opcode::StoreBufferF32x3:
|
||||
case Opcode::StoreBufferF32x4:
|
||||
case Opcode::StoreBufferU32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -12,10 +12,12 @@ OPCODE(PhiMove, Void, Opaq
|
|||
// Special operations
|
||||
OPCODE(Prologue, Void, )
|
||||
OPCODE(Epilogue, Void, )
|
||||
OPCODE(Discard, Void, )
|
||||
|
||||
// Constant memory operations
|
||||
OPCODE(ReadConst, U32, U64, U32, )
|
||||
OPCODE(ReadConstBuffer, F32, Opaque, U32, )
|
||||
OPCODE(ReadConstBufferU32, U32, Opaque, U32, )
|
||||
|
||||
// Context getters/setters
|
||||
OPCODE(GetUserData, U32, ScalarReg, )
|
||||
|
@ -30,10 +32,14 @@ OPCODE(GetAttributeU32, U32, Attr
|
|||
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
|
||||
|
||||
// Flags
|
||||
//OPCODE(GetScc, U1, Void, )
|
||||
OPCODE(GetVcc, U1, Void, )
|
||||
//OPCODE(SetScc, Void, U1, )
|
||||
OPCODE(SetVcc, Void, U1, )
|
||||
OPCODE(GetScc, U1, Void, )
|
||||
OPCODE(GetExec, U1, Void, )
|
||||
OPCODE(GetVcc, U1, Void, )
|
||||
OPCODE(GetVccLo, U32, Void, )
|
||||
OPCODE(SetScc, Void, U1, )
|
||||
OPCODE(SetExec, Void, U1, )
|
||||
OPCODE(SetVcc, Void, U1, )
|
||||
OPCODE(SetVccLo, Void, U32, )
|
||||
|
||||
// Undefined
|
||||
OPCODE(UndefU1, U1, )
|
||||
|
@ -47,6 +53,12 @@ OPCODE(LoadBufferF32, F32, Opaq
|
|||
OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, )
|
||||
OPCODE(LoadBufferU32, U32, Opaque, Opaque, )
|
||||
OPCODE(StoreBufferF32, Void, Opaque, Opaque, F32, )
|
||||
OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, )
|
||||
OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, )
|
||||
OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, )
|
||||
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
|
||||
|
||||
// Vector utility
|
||||
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
|
||||
|
@ -114,6 +126,7 @@ OPCODE(FPAbs32, F32, F32,
|
|||
OPCODE(FPAbs64, F64, F64, )
|
||||
OPCODE(FPAdd32, F32, F32, F32, )
|
||||
OPCODE(FPAdd64, F64, F64, F64, )
|
||||
OPCODE(FPSub32, F32, F32, F32, )
|
||||
OPCODE(FPFma32, F32, F32, F32, F32, )
|
||||
OPCODE(FPFma64, F64, F64, F64, F64, )
|
||||
OPCODE(FPMax32, F32, F32, F32, )
|
||||
|
@ -145,6 +158,7 @@ OPCODE(FPCeil32, F32, F32,
|
|||
OPCODE(FPCeil64, F64, F64, )
|
||||
OPCODE(FPTrunc32, F32, F32, )
|
||||
OPCODE(FPTrunc64, F64, F64, )
|
||||
OPCODE(FPFract, F32, F32, )
|
||||
|
||||
OPCODE(FPOrdEqual32, U1, F32, F32, )
|
||||
OPCODE(FPOrdEqual64, U1, F64, F64, )
|
||||
|
|
|
@ -88,15 +88,6 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
|
|||
inst.ReplaceUsesWith(arg_inst->Arg(0));
|
||||
return;
|
||||
}
|
||||
// if constexpr (op == IR::Opcode::BitCastF32U32) {
|
||||
// if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
// // Replace the bitcast with a typed constant buffer read
|
||||
// inst.ReplaceOpcode(IR::Opcode::ReadConstBufferF32);
|
||||
// inst.SetArg(0, arg_inst->Arg(0));
|
||||
// inst.SetArg(1, arg_inst->Arg(1));
|
||||
// return;
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
|
||||
|
@ -249,6 +240,12 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::IAdd32:
|
||||
return FoldAdd<u32>(block, inst);
|
||||
case IR::Opcode::ISub32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; });
|
||||
return;
|
||||
case IR::Opcode::ConvertF32U32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a) { return static_cast<float>(a); });
|
||||
return;
|
||||
case IR::Opcode::IMul32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
|
||||
return;
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void DeadCodeEliminationPass(IR::BlockList& program) {
|
||||
// We iterate over the instructions in reverse order.
|
||||
// This is because removing an instruction reduces the number of uses for earlier instructions.
|
||||
for (IR::Block* const block : program) {
|
||||
auto it{block->end()};
|
||||
while (it != block->begin()) {
|
||||
--it;
|
||||
if (!it->HasUses() && !it->MayHaveSideEffects()) {
|
||||
it->Invalidate();
|
||||
it = block->Instructions().erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
34
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
Normal file
34
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
Normal file
|
@ -0,0 +1,34 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <vector>
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void IdentityRemovalPass(IR::BlockList& program) {
|
||||
std::vector<IR::Inst*> to_invalidate;
|
||||
for (IR::Block* const block : program) {
|
||||
for (auto inst = block->begin(); inst != block->end();) {
|
||||
const size_t num_args{inst->NumArgs()};
|
||||
for (size_t i = 0; i < num_args; ++i) {
|
||||
IR::Value arg;
|
||||
while ((arg = inst->Arg(i)).IsIdentity()) {
|
||||
inst->SetArg(i, arg.Inst()->Arg(0));
|
||||
}
|
||||
}
|
||||
if (inst->GetOpcode() == IR::Opcode::Identity ||
|
||||
inst->GetOpcode() == IR::Opcode::Void) {
|
||||
to_invalidate.push_back(&*inst);
|
||||
inst = block->Instructions().erase(inst);
|
||||
} else {
|
||||
++inst;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (IR::Inst* const inst : to_invalidate) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
|
@ -2,8 +2,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <bit>
|
||||
#include <optional>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
|
@ -27,26 +25,54 @@ bool IsBufferInstruction(const IR::Inst& inst) {
|
|||
case IR::Opcode::LoadBufferF32x2:
|
||||
case IR::Opcode::LoadBufferF32x3:
|
||||
case IR::Opcode::LoadBufferF32x4:
|
||||
case IR::Opcode::LoadBufferU32:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::ReadConstBufferU32:
|
||||
case IR::Opcode::StoreBufferF32:
|
||||
case IR::Opcode::StoreBufferF32x2:
|
||||
case IR::Opcode::StoreBufferF32x3:
|
||||
case IR::Opcode::StoreBufferF32x4:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
IR::Type BufferLoadType(const IR::Inst& inst) {
|
||||
IR::Type BufferDataType(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferF32:
|
||||
case IR::Opcode::LoadBufferF32x2:
|
||||
case IR::Opcode::LoadBufferF32x3:
|
||||
case IR::Opcode::LoadBufferF32x4:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::StoreBufferF32:
|
||||
case IR::Opcode::StoreBufferF32x2:
|
||||
case IR::Opcode::StoreBufferF32x3:
|
||||
case IR::Opcode::StoreBufferF32x4:
|
||||
return IR::Type::F32;
|
||||
case IR::Opcode::LoadBufferU32:
|
||||
case IR::Opcode::ReadConstBufferU32:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
return IR::Type::U32;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
bool IsBufferStore(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::StoreBufferF32:
|
||||
case IR::Opcode::StoreBufferF32x2:
|
||||
case IR::Opcode::StoreBufferF32x3:
|
||||
case IR::Opcode::StoreBufferF32x4:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsImageInstruction(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageSampleExplicitLod:
|
||||
|
@ -157,10 +183,10 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.sgpr_base = sharp.sgpr_base,
|
||||
.dword_offset = sharp.dword_offset,
|
||||
.stride = u32(buffer.stride),
|
||||
.stride = buffer.GetStride(),
|
||||
.num_records = u32(buffer.num_records),
|
||||
.used_types = BufferLoadType(inst),
|
||||
.is_storage = /*buffer.base_address % 64 != 0*/ true,
|
||||
.used_types = BufferDataType(inst),
|
||||
.is_storage = true || IsBufferStore(inst),
|
||||
});
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
@ -171,17 +197,18 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32);
|
||||
}
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
|
||||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {
|
||||
return;
|
||||
}
|
||||
// Calculate buffer address.
|
||||
const u32 dword_stride = buffer.stride / sizeof(u32);
|
||||
const u32 dword_stride = buffer.GetStrideElements(sizeof(u32));
|
||||
const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32);
|
||||
IR::U32 address = ir.Imm32(dword_offset);
|
||||
if (inst_info.index_enable && inst_info.offset_enable) {
|
||||
UNREACHABLE();
|
||||
} else if (inst_info.index_enable) {
|
||||
const IR::U32 index{inst.Arg(1)};
|
||||
IR::U32 index{inst.Arg(1)};
|
||||
address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address);
|
||||
} else if (inst_info.offset_enable) {
|
||||
const IR::U32 offset{inst.Arg(1)};
|
||||
|
@ -245,6 +272,36 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
}
|
||||
|
||||
void ResourceTrackingPass(IR::Program& program) {
|
||||
// When loading data from untyped buffer we don't have if it is float or integer.
|
||||
// Most of the time it is float so that is the default. This pass detects float buffer loads
|
||||
// combined with bitcasts and patches them to be integer loads.
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() != IR::Opcode::BitCastU32F32) {
|
||||
continue;
|
||||
}
|
||||
// Replace the bitcast with a typed buffer read
|
||||
IR::Inst* const arg_inst{inst.Arg(0).TryInstRecursive()};
|
||||
if (!arg_inst) {
|
||||
continue;
|
||||
}
|
||||
const auto replace{[&](IR::Opcode new_opcode) {
|
||||
inst.ReplaceOpcode(new_opcode);
|
||||
inst.SetArg(0, arg_inst->Arg(0));
|
||||
inst.SetArg(1, arg_inst->Arg(1));
|
||||
inst.SetFlags(arg_inst->Flags<u32>());
|
||||
arg_inst->Invalidate();
|
||||
}};
|
||||
if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
replace(IR::Opcode::ReadConstBufferU32);
|
||||
}
|
||||
if (arg_inst->GetOpcode() == IR::Opcode::LoadBufferF32) {
|
||||
replace(IR::Opcode::LoadBufferU32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Iterate resource instructions and patch them after finding the sharp.
|
||||
auto& info = program.info;
|
||||
Descriptors descriptors{info.buffers, info.images, info.samplers};
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
|
|
|
@ -17,10 +17,8 @@
|
|||
#include <span>
|
||||
#include <unordered_map>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
|
@ -30,11 +28,10 @@ namespace {
|
|||
struct FlagTag {
|
||||
auto operator<=>(const FlagTag&) const noexcept = default;
|
||||
};
|
||||
struct ZeroFlagTag : FlagTag {};
|
||||
struct SignFlagTag : FlagTag {};
|
||||
struct CarryFlagTag : FlagTag {};
|
||||
struct OverflowFlagTag : FlagTag {};
|
||||
struct SccFlagTag : FlagTag {};
|
||||
struct ExecFlagTag : FlagTag {};
|
||||
struct VccFlagTag : FlagTag {};
|
||||
struct VccLoTag : FlagTag {};
|
||||
|
||||
struct GotoVariable : FlagTag {
|
||||
GotoVariable() = default;
|
||||
|
@ -45,8 +42,8 @@ struct GotoVariable : FlagTag {
|
|||
u32 index;
|
||||
};
|
||||
|
||||
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, ZeroFlagTag, SignFlagTag, CarryFlagTag,
|
||||
OverflowFlagTag, GotoVariable, VccFlagTag>;
|
||||
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
|
||||
VccFlagTag, VccLoTag>;
|
||||
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
|
||||
|
||||
struct DefTable {
|
||||
|
@ -71,32 +68,25 @@ struct DefTable {
|
|||
goto_vars[variable.index].insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
|
||||
return zero_flag[block];
|
||||
const IR::Value& Def(IR::Block* block, SccFlagTag) {
|
||||
return scc_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
|
||||
zero_flag.insert_or_assign(block, value);
|
||||
void SetDef(IR::Block* block, SccFlagTag, const IR::Value& value) {
|
||||
scc_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, SignFlagTag) {
|
||||
return sign_flag[block];
|
||||
const IR::Value& Def(IR::Block* block, ExecFlagTag) {
|
||||
return exec_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
|
||||
sign_flag.insert_or_assign(block, value);
|
||||
void SetDef(IR::Block* block, ExecFlagTag, const IR::Value& value) {
|
||||
exec_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, CarryFlagTag) {
|
||||
return carry_flag[block];
|
||||
const IR::Value& Def(IR::Block* block, VccLoTag) {
|
||||
return vcc_lo_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
|
||||
carry_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
|
||||
return overflow_flag[block];
|
||||
}
|
||||
void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
|
||||
overflow_flag.insert_or_assign(block, value);
|
||||
void SetDef(IR::Block* block, VccLoTag, const IR::Value& value) {
|
||||
vcc_lo_flag.insert_or_assign(block, value);
|
||||
}
|
||||
|
||||
const IR::Value& Def(IR::Block* block, VccFlagTag) {
|
||||
|
@ -107,12 +97,10 @@ struct DefTable {
|
|||
}
|
||||
|
||||
std::unordered_map<u32, ValueMap> goto_vars;
|
||||
ValueMap indirect_branch_var;
|
||||
ValueMap zero_flag;
|
||||
ValueMap sign_flag;
|
||||
ValueMap carry_flag;
|
||||
ValueMap overflow_flag;
|
||||
ValueMap scc_flag;
|
||||
ValueMap exec_flag;
|
||||
ValueMap vcc_flag;
|
||||
ValueMap vcc_lo_flag;
|
||||
};
|
||||
|
||||
IR::Opcode UndefOpcode(IR::ScalarReg) noexcept {
|
||||
|
@ -306,18 +294,18 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
|||
case IR::Opcode::SetGotoVariable:
|
||||
pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
|
||||
break;
|
||||
case IR::Opcode::SetExec:
|
||||
pass.WriteVariable(ExecFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetScc:
|
||||
pass.WriteVariable(SccFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::SetVcc:
|
||||
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
// case IR::Opcode::SetSFlag:
|
||||
// pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
|
||||
// break;
|
||||
// case IR::Opcode::SetCFlag:
|
||||
// pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
|
||||
// break;
|
||||
// case IR::Opcode::SetOFlag:
|
||||
// pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
|
||||
// break;
|
||||
case IR::Opcode::SetVccLo:
|
||||
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
|
||||
break;
|
||||
case IR::Opcode::GetScalarRegister: {
|
||||
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
|
||||
|
@ -331,18 +319,18 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
|||
case IR::Opcode::GetGotoVariable:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
|
||||
break;
|
||||
case IR::Opcode::GetExec:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(ExecFlagTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetScc:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(SccFlagTag{}, block));
|
||||
break;
|
||||
case IR::Opcode::GetVcc:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
|
||||
break;
|
||||
// case IR::Opcode::GetSFlag:
|
||||
// inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
|
||||
// break;
|
||||
// case IR::Opcode::GetCFlag:
|
||||
// inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
|
||||
// break;
|
||||
// case IR::Opcode::GetOFlag:
|
||||
// inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
|
||||
// break;
|
||||
case IR::Opcode::GetVccLo:
|
||||
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -365,44 +353,4 @@ void SsaRewritePass(IR::BlockList& program) {
|
|||
}
|
||||
}
|
||||
|
||||
void IdentityRemovalPass(IR::BlockList& program) {
|
||||
std::vector<IR::Inst*> to_invalidate;
|
||||
for (IR::Block* const block : program) {
|
||||
for (auto inst = block->begin(); inst != block->end();) {
|
||||
const size_t num_args{inst->NumArgs()};
|
||||
for (size_t i = 0; i < num_args; ++i) {
|
||||
IR::Value arg;
|
||||
while ((arg = inst->Arg(i)).IsIdentity()) {
|
||||
inst->SetArg(i, arg.Inst()->Arg(0));
|
||||
}
|
||||
}
|
||||
if (inst->GetOpcode() == IR::Opcode::Identity ||
|
||||
inst->GetOpcode() == IR::Opcode::Void) {
|
||||
to_invalidate.push_back(&*inst);
|
||||
inst = block->Instructions().erase(inst);
|
||||
} else {
|
||||
++inst;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (IR::Inst* const inst : to_invalidate) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
void DeadCodeEliminationPass(IR::BlockList& program) {
|
||||
// We iterate over the instructions in reverse order.
|
||||
// This is because removing an instruction reduces the number of uses for earlier instructions.
|
||||
for (IR::Block* const block : program) {
|
||||
auto it{block->end()};
|
||||
while (it != block->begin()) {
|
||||
--it;
|
||||
if (!it->HasUses() && !it->MayHaveSideEffects()) {
|
||||
it->Invalidate();
|
||||
it = block->Instructions().erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue