video_core: Implement basic compute shaders and more instructions

This commit is contained in:
raphaelthegreat 2024-05-29 01:28:34 +03:00
parent 10bceb1643
commit 58de7ff55a
58 changed files with 1234 additions and 293 deletions

View file

@ -88,15 +88,6 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
inst.ReplaceUsesWith(arg_inst->Arg(0));
return;
}
// if constexpr (op == IR::Opcode::BitCastF32U32) {
// if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) {
// // Replace the bitcast with a typed constant buffer read
// inst.ReplaceOpcode(IR::Opcode::ReadConstBufferF32);
// inst.SetArg(0, arg_inst->Arg(0));
// inst.SetArg(1, arg_inst->Arg(1));
// return;
// }
// }
}
std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert,
@ -249,6 +240,12 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::IAdd32:
return FoldAdd<u32>(block, inst);
case IR::Opcode::ISub32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; });
return;
case IR::Opcode::ConvertF32U32:
FoldWhenAllImmediates(inst, [](u32 a) { return static_cast<float>(a); });
return;
case IR::Opcode::IMul32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
return;

View file

@ -0,0 +1,23 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {
void DeadCodeEliminationPass(IR::BlockList& program) {
// We iterate over the instructions in reverse order.
// This is because removing an instruction reduces the number of uses for earlier instructions.
for (IR::Block* const block : program) {
auto it{block->end()};
while (it != block->begin()) {
--it;
if (!it->HasUses() && !it->MayHaveSideEffects()) {
it->Invalidate();
it = block->Instructions().erase(it);
}
}
}
}
} // namespace Shader::Optimization

View file

@ -0,0 +1,34 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <vector>
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {
void IdentityRemovalPass(IR::BlockList& program) {
std::vector<IR::Inst*> to_invalidate;
for (IR::Block* const block : program) {
for (auto inst = block->begin(); inst != block->end();) {
const size_t num_args{inst->NumArgs()};
for (size_t i = 0; i < num_args; ++i) {
IR::Value arg;
while ((arg = inst->Arg(i)).IsIdentity()) {
inst->SetArg(i, arg.Inst()->Arg(0));
}
}
if (inst->GetOpcode() == IR::Opcode::Identity ||
inst->GetOpcode() == IR::Opcode::Void) {
to_invalidate.push_back(&*inst);
inst = block->Instructions().erase(inst);
} else {
++inst;
}
}
}
for (IR::Inst* const inst : to_invalidate) {
inst->Invalidate();
}
}
} // namespace Shader::Optimization

View file

@ -2,8 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <bit>
#include <optional>
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/ir_emitter.h"
@ -27,26 +25,54 @@ bool IsBufferInstruction(const IR::Inst& inst) {
case IR::Opcode::LoadBufferF32x2:
case IR::Opcode::LoadBufferF32x3:
case IR::Opcode::LoadBufferF32x4:
case IR::Opcode::LoadBufferU32:
case IR::Opcode::ReadConstBuffer:
case IR::Opcode::ReadConstBufferU32:
case IR::Opcode::StoreBufferF32:
case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4:
case IR::Opcode::StoreBufferU32:
return true;
default:
return false;
}
}
IR::Type BufferLoadType(const IR::Inst& inst) {
IR::Type BufferDataType(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferF32:
case IR::Opcode::LoadBufferF32x2:
case IR::Opcode::LoadBufferF32x3:
case IR::Opcode::LoadBufferF32x4:
case IR::Opcode::ReadConstBuffer:
case IR::Opcode::StoreBufferF32:
case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4:
return IR::Type::F32;
case IR::Opcode::LoadBufferU32:
case IR::Opcode::ReadConstBufferU32:
case IR::Opcode::StoreBufferU32:
return IR::Type::U32;
default:
UNREACHABLE();
}
}
bool IsBufferStore(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::StoreBufferF32:
case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4:
case IR::Opcode::StoreBufferU32:
return true;
default:
return false;
}
}
bool IsImageInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ImageSampleExplicitLod:
@ -157,10 +183,10 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
const u32 binding = descriptors.Add(BufferResource{
.sgpr_base = sharp.sgpr_base,
.dword_offset = sharp.dword_offset,
.stride = u32(buffer.stride),
.stride = buffer.GetStride(),
.num_records = u32(buffer.num_records),
.used_types = BufferLoadType(inst),
.is_storage = /*buffer.base_address % 64 != 0*/ true,
.used_types = BufferDataType(inst),
.is_storage = true || IsBufferStore(inst),
});
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
@ -171,17 +197,18 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32);
}
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {
return;
}
// Calculate buffer address.
const u32 dword_stride = buffer.stride / sizeof(u32);
const u32 dword_stride = buffer.GetStrideElements(sizeof(u32));
const u32 dword_offset = inst_info.inst_offset.Value() / sizeof(u32);
IR::U32 address = ir.Imm32(dword_offset);
if (inst_info.index_enable && inst_info.offset_enable) {
UNREACHABLE();
} else if (inst_info.index_enable) {
const IR::U32 index{inst.Arg(1)};
IR::U32 index{inst.Arg(1)};
address = ir.IAdd(ir.IMul(index, ir.Imm32(dword_stride)), address);
} else if (inst_info.offset_enable) {
const IR::U32 offset{inst.Arg(1)};
@ -245,6 +272,36 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
}
void ResourceTrackingPass(IR::Program& program) {
// When loading data from untyped buffer we don't have if it is float or integer.
// Most of the time it is float so that is the default. This pass detects float buffer loads
// combined with bitcasts and patches them to be integer loads.
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() != IR::Opcode::BitCastU32F32) {
continue;
}
// Replace the bitcast with a typed buffer read
IR::Inst* const arg_inst{inst.Arg(0).TryInstRecursive()};
if (!arg_inst) {
continue;
}
const auto replace{[&](IR::Opcode new_opcode) {
inst.ReplaceOpcode(new_opcode);
inst.SetArg(0, arg_inst->Arg(0));
inst.SetArg(1, arg_inst->Arg(1));
inst.SetFlags(arg_inst->Flags<u32>());
arg_inst->Invalidate();
}};
if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) {
replace(IR::Opcode::ReadConstBufferU32);
}
if (arg_inst->GetOpcode() == IR::Opcode::LoadBufferF32) {
replace(IR::Opcode::LoadBufferU32);
}
}
}
// Iterate resource instructions and patch them after finding the sharp.
auto& info = program.info;
Descriptors descriptors{info.buffers, info.images, info.samplers};
for (IR::Block* const block : program.post_order_blocks) {

View file

@ -17,10 +17,8 @@
#include <span>
#include <unordered_map>
#include <variant>
#include <vector>
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/value.h"
@ -30,11 +28,10 @@ namespace {
struct FlagTag {
auto operator<=>(const FlagTag&) const noexcept = default;
};
struct ZeroFlagTag : FlagTag {};
struct SignFlagTag : FlagTag {};
struct CarryFlagTag : FlagTag {};
struct OverflowFlagTag : FlagTag {};
struct SccFlagTag : FlagTag {};
struct ExecFlagTag : FlagTag {};
struct VccFlagTag : FlagTag {};
struct VccLoTag : FlagTag {};
struct GotoVariable : FlagTag {
GotoVariable() = default;
@ -45,8 +42,8 @@ struct GotoVariable : FlagTag {
u32 index;
};
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, ZeroFlagTag, SignFlagTag, CarryFlagTag,
OverflowFlagTag, GotoVariable, VccFlagTag>;
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
VccFlagTag, VccLoTag>;
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
struct DefTable {
@ -71,32 +68,25 @@ struct DefTable {
goto_vars[variable.index].insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, ZeroFlagTag) {
return zero_flag[block];
const IR::Value& Def(IR::Block* block, SccFlagTag) {
return scc_flag[block];
}
void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) {
zero_flag.insert_or_assign(block, value);
void SetDef(IR::Block* block, SccFlagTag, const IR::Value& value) {
scc_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, SignFlagTag) {
return sign_flag[block];
const IR::Value& Def(IR::Block* block, ExecFlagTag) {
return exec_flag[block];
}
void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) {
sign_flag.insert_or_assign(block, value);
void SetDef(IR::Block* block, ExecFlagTag, const IR::Value& value) {
exec_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, CarryFlagTag) {
return carry_flag[block];
const IR::Value& Def(IR::Block* block, VccLoTag) {
return vcc_lo_flag[block];
}
void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) {
carry_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, OverflowFlagTag) {
return overflow_flag[block];
}
void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) {
overflow_flag.insert_or_assign(block, value);
void SetDef(IR::Block* block, VccLoTag, const IR::Value& value) {
vcc_lo_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, VccFlagTag) {
@ -107,12 +97,10 @@ struct DefTable {
}
std::unordered_map<u32, ValueMap> goto_vars;
ValueMap indirect_branch_var;
ValueMap zero_flag;
ValueMap sign_flag;
ValueMap carry_flag;
ValueMap overflow_flag;
ValueMap scc_flag;
ValueMap exec_flag;
ValueMap vcc_flag;
ValueMap vcc_lo_flag;
};
IR::Opcode UndefOpcode(IR::ScalarReg) noexcept {
@ -306,18 +294,18 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::SetGotoVariable:
pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1));
break;
case IR::Opcode::SetExec:
pass.WriteVariable(ExecFlagTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetScc:
pass.WriteVariable(SccFlagTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetVcc:
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
break;
// case IR::Opcode::SetSFlag:
// pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0));
// break;
// case IR::Opcode::SetCFlag:
// pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0));
// break;
// case IR::Opcode::SetOFlag:
// pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0));
// break;
case IR::Opcode::SetVccLo:
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
break;
case IR::Opcode::GetScalarRegister: {
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
inst.ReplaceUsesWith(pass.ReadVariable(reg, block));
@ -331,18 +319,18 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::GetGotoVariable:
inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block));
break;
case IR::Opcode::GetExec:
inst.ReplaceUsesWith(pass.ReadVariable(ExecFlagTag{}, block));
break;
case IR::Opcode::GetScc:
inst.ReplaceUsesWith(pass.ReadVariable(SccFlagTag{}, block));
break;
case IR::Opcode::GetVcc:
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
break;
// case IR::Opcode::GetSFlag:
// inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block));
// break;
// case IR::Opcode::GetCFlag:
// inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block));
// break;
// case IR::Opcode::GetOFlag:
// inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block));
// break;
case IR::Opcode::GetVccLo:
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
break;
default:
break;
}
@ -365,44 +353,4 @@ void SsaRewritePass(IR::BlockList& program) {
}
}
void IdentityRemovalPass(IR::BlockList& program) {
std::vector<IR::Inst*> to_invalidate;
for (IR::Block* const block : program) {
for (auto inst = block->begin(); inst != block->end();) {
const size_t num_args{inst->NumArgs()};
for (size_t i = 0; i < num_args; ++i) {
IR::Value arg;
while ((arg = inst->Arg(i)).IsIdentity()) {
inst->SetArg(i, arg.Inst()->Arg(0));
}
}
if (inst->GetOpcode() == IR::Opcode::Identity ||
inst->GetOpcode() == IR::Opcode::Void) {
to_invalidate.push_back(&*inst);
inst = block->Instructions().erase(inst);
} else {
++inst;
}
}
}
for (IR::Inst* const inst : to_invalidate) {
inst->Invalidate();
}
}
void DeadCodeEliminationPass(IR::BlockList& program) {
// We iterate over the instructions in reverse order.
// This is because removing an instruction reduces the number of uses for earlier instructions.
for (IR::Block* const block : program) {
auto it{block->end()};
while (it != block->begin()) {
--it;
if (!it->HasUses() && !it->MayHaveSideEffects()) {
it->Invalidate();
it = block->Instructions().erase(it);
}
}
}
}
} // namespace Shader::Optimization