video_core: Implement guest buffer manager (#373)

* video_core: Introduce buffer cache

* video_core: Use multi level page table for caches

* renderer_vulkan: Remove unused stream buffer

* fix build

* oops forgot optimize off
This commit is contained in:
TheTurtle 2024-08-08 15:02:10 +03:00 committed by GitHub
parent 159be2c7f4
commit 381ba8c7a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 2697 additions and 1039 deletions

View file

@ -21,6 +21,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
case VsOutput::ClipDist7: {
const u32 index = u32(output) - u32(VsOutput::ClipDist0);
const Id clip_num{ctx.ConstU32(index)};
ASSERT_MSG(Sirit::ValidId(ctx.clip_distances), "Clip distance used but not defined");
return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num);
}
case VsOutput::CullDist0:
@ -33,6 +34,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
case VsOutput::CullDist7: {
const u32 index = u32(output) - u32(VsOutput::CullDist0);
const Id cull_num{ctx.ConstU32(index)};
ASSERT_MSG(Sirit::ValidId(ctx.cull_distances), "Cull distance used but not defined");
return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num);
}
default:
@ -125,7 +127,12 @@ Id EmitReadConst(EmitContext& ctx) {
}
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
const auto& buffer = ctx.buffers[handle];
auto& buffer = ctx.buffers[handle];
if (!Sirit::ValidId(buffer.offset)) {
buffer.offset = ctx.GetBufferOffset(handle);
}
const Id offset_dwords{ctx.OpShiftRightLogical(ctx.U32[1], buffer.offset, ctx.ConstU32(2U))};
index = ctx.OpIAdd(ctx.U32[1], index, offset_dwords);
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
}
@ -137,7 +144,7 @@ Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index) {
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
return ctx.OpLoad(
ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
ctx.instance_step_rates,
ctx.push_data_block,
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
}
@ -221,7 +228,11 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
template <u32 N>
static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) {
const auto& buffer = ctx.buffers[handle];
auto& buffer = ctx.buffers[handle];
if (!Sirit::ValidId(buffer.offset)) {
buffer.offset = ctx.GetBufferOffset(handle);
}
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
if constexpr (N == 1) {
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
@ -314,7 +325,7 @@ static Id ComponentOffset(EmitContext& ctx, Id address, u32 stride, u32 bit_offs
}
static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 comp) {
const auto& buffer = ctx.buffers[handle];
auto& buffer = ctx.buffers[handle];
const auto format = buffer.buffer.GetDataFmt();
switch (format) {
case AmdGpu::DataFormat::FormatInvalid:
@ -399,6 +410,11 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com
template <u32 N>
static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
auto& buffer = ctx.buffers[handle];
if (!Sirit::ValidId(buffer.offset)) {
buffer.offset = ctx.GetBufferOffset(handle);
}
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
if constexpr (N == 1) {
return GetBufferFormatValue(ctx, handle, address, 0);
} else {
@ -428,7 +444,11 @@ Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id ad
template <u32 N>
static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
const auto& buffer = ctx.buffers[handle];
auto& buffer = ctx.buffers[handle];
if (!Sirit::ValidId(buffer.offset)) {
buffer.offset = ctx.GetBufferOffset(handle);
}
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
if constexpr (N == 1) {
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};

View file

@ -46,9 +46,9 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
stage{program.info.stage}, binding{binding_} {
AddCapability(spv::Capability::Shader);
DefineArithmeticTypes();
DefineInterfaces(program);
DefineBuffers(info);
DefineImagesAndSamplers(info);
DefineInterfaces();
DefineBuffers();
DefineImagesAndSamplers();
DefineSharedMemory();
}
@ -117,9 +117,10 @@ void EmitContext::DefineArithmeticTypes() {
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
}
void EmitContext::DefineInterfaces(const IR::Program& program) {
DefineInputs(program.info);
DefineOutputs(program.info);
void EmitContext::DefineInterfaces() {
DefinePushDataBlock();
DefineInputs();
DefineOutputs();
}
Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
@ -164,6 +165,16 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
throw InvalidArgument("Invalid attribute type {}", fmt);
}
Id EmitContext::GetBufferOffset(u32 binding) {
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
const u32 offset = (binding & 0x3) << 3;
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
return OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
}
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
switch (default_value) {
case 0:
@ -179,24 +190,13 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
}
}
void EmitContext::DefineInputs(const Info& info) {
void EmitContext::DefineInputs() {
switch (stage) {
case Stage::Vertex: {
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
// Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1]), "instance_step_rates")};
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "sr0");
MemberName(struct_type, 1, "sr1");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
instance_step_rates = DefineVar(struct_type, spv::StorageClass::PushConstant);
Name(instance_step_rates, "step_rates");
interfaces.push_back(instance_step_rates);
for (const auto& input : info.vs_inputs) {
const Id type{GetAttributeType(*this, input.fmt)};
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
@ -260,19 +260,20 @@ void EmitContext::DefineInputs(const Info& info) {
}
}
void EmitContext::DefineOutputs(const Info& info) {
void EmitContext::DefineOutputs() {
switch (stage) {
case Stage::Vertex: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
const std::array<Id, 8> zero{f32_zero_value, f32_zero_value, f32_zero_value,
f32_zero_value, f32_zero_value, f32_zero_value,
f32_zero_value, f32_zero_value};
const Id type{TypeArray(F32[1], ConstU32(8U))};
const Id initializer{ConstantComposite(type, zero)};
clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output,
initializer);
cull_distances = DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output,
initializer);
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
info.stores.Get(IR::Attribute::Position2) ||
info.stores.Get(IR::Attribute::Position3);
if (has_extra_pos_stores) {
const Id type{TypeArray(F32[1], ConstU32(8U))};
clip_distances =
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
cull_distances =
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
}
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {
@ -304,7 +305,24 @@ void EmitContext::DefineOutputs(const Info& info) {
}
}
void EmitContext::DefineBuffers(const Info& info) {
void EmitContext::DefinePushDataBlock() {
// Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4]), "AuxData")};
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "sr0");
MemberName(struct_type, 1, "sr1");
MemberName(struct_type, 2, "buf_offsets0");
MemberName(struct_type, 3, "buf_offsets1");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U);
MemberDecorate(struct_type, 3, spv::Decoration::Offset, 24U);
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
Name(push_data_block, "push_data");
interfaces.push_back(push_data_block);
}
void EmitContext::DefineBuffers() {
boost::container::small_vector<Id, 8> type_ids;
for (u32 i = 0; const auto& buffer : info.buffers) {
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
@ -322,8 +340,8 @@ void EmitContext::DefineBuffers(const Info& info) {
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "data");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
type_ids.push_back(record_array_type);
}
type_ids.push_back(record_array_type);
const auto storage_class =
buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
@ -430,7 +448,7 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
throw InvalidArgument("Invalid texture type {}", desc.type);
}
void EmitContext::DefineImagesAndSamplers(const Info& info) {
void EmitContext::DefineImagesAndSamplers() {
for (const auto& image_desc : info.images) {
const VectorIds* data_types = [&] {
switch (image_desc.nfmt) {

View file

@ -40,6 +40,7 @@ public:
~EmitContext();
Id Def(const IR::Value& value);
Id GetBufferOffset(u32 binding);
[[nodiscard]] Id DefineInput(Id type, u32 location) {
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
@ -168,7 +169,7 @@ public:
Id output_position{};
Id vertex_index{};
Id instance_id{};
Id instance_step_rates{};
Id push_data_block{};
Id base_vertex{};
Id frag_coord{};
Id front_facing{};
@ -201,14 +202,15 @@ public:
struct BufferDefinition {
Id id;
Id offset;
const VectorIds* data_types;
Id pointer_type;
AmdGpu::Buffer buffer;
};
u32& binding;
boost::container::small_vector<BufferDefinition, 4> buffers;
boost::container::small_vector<TextureDefinition, 4> images;
boost::container::small_vector<BufferDefinition, 16> buffers;
boost::container::small_vector<TextureDefinition, 8> images;
boost::container::small_vector<Id, 4> samplers;
Id sampler_type{};
@ -227,11 +229,12 @@ public:
private:
void DefineArithmeticTypes();
void DefineInterfaces(const IR::Program& program);
void DefineInputs(const Info& info);
void DefineOutputs(const Info& info);
void DefineBuffers(const Info& info);
void DefineImagesAndSamplers(const Info& info);
void DefineInterfaces();
void DefineInputs();
void DefineOutputs();
void DefinePushDataBlock();
void DefineBuffers();
void DefineImagesAndSamplers();
void DefineSharedMemory();
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);

View file

@ -40,7 +40,7 @@ static IR::Condition MakeCondition(Opcode opcode) {
}
}
CFG::CFG(ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
CFG::CFG(Common::ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
: block_pool{block_pool_}, inst_list{inst_list_} {
index_to_pc.resize(inst_list.size() + 1);
EmitLabels();

View file

@ -8,10 +8,10 @@
#include <boost/container/small_vector.hpp>
#include <boost/intrusive/set.hpp>
#include "common/object_pool.h"
#include "common/types.h"
#include "shader_recompiler/frontend/instruction.h"
#include "shader_recompiler/ir/condition.h"
#include "shader_recompiler/object_pool.h"
namespace Shader::Gcn {
@ -49,7 +49,7 @@ class CFG {
using Label = u32;
public:
explicit CFG(ObjectPool<Block>& block_pool, std::span<const GcnInst> inst_list);
explicit CFG(Common::ObjectPool<Block>& block_pool, std::span<const GcnInst> inst_list);
[[nodiscard]] std::string Dot() const;
@ -59,7 +59,7 @@ private:
void LinkBlocks();
public:
ObjectPool<Block>& block_pool;
Common::ObjectPool<Block>& block_pool;
std::span<const GcnInst> inst_list;
std::vector<u32> index_to_pc;
boost::container::small_vector<Label, 16> labels;

View file

@ -1,10 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
namespace Shader::Gcn {
void Translate();
} // namespace Shader::Gcn

View file

@ -287,7 +287,7 @@ bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept {
*/
class GotoPass {
public:
explicit GotoPass(CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
explicit GotoPass(CFG& cfg, Common::ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
std::vector gotos{BuildTree(cfg)};
const auto end{gotos.rend()};
for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) {
@ -563,7 +563,7 @@ private:
return parent_tree.insert(std::next(loop), *new_goto);
}
ObjectPool<Statement>& pool;
Common::ObjectPool<Statement>& pool;
Statement root_stmt{FunctionTag{}};
};
@ -597,8 +597,9 @@ private:
class TranslatePass {
public:
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
TranslatePass(Common::ObjectPool<IR::Inst>& inst_pool_,
Common::ObjectPool<IR::Block>& block_pool_,
Common::ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
Info& info_, const Profile& profile_)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
@ -808,9 +809,9 @@ private:
return block_pool.Create(inst_pool);
}
ObjectPool<Statement>& stmt_pool;
ObjectPool<IR::Inst>& inst_pool;
ObjectPool<IR::Block>& block_pool;
Common::ObjectPool<Statement>& stmt_pool;
Common::ObjectPool<IR::Inst>& inst_pool;
Common::ObjectPool<IR::Block>& block_pool;
IR::AbstractSyntaxList& syntax_list;
const Block dummy_flow_block{.is_dummy = true};
std::span<const GcnInst> inst_list;
@ -819,9 +820,10 @@ private:
};
} // Anonymous namespace
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
CFG& cfg, Info& info, const Profile& profile) {
ObjectPool<Statement> stmt_pool{64};
IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
Common::ObjectPool<IR::Block>& block_pool, CFG& cfg, Info& info,
const Profile& profile) {
Common::ObjectPool<Statement> stmt_pool{64};
GotoPass goto_pass{cfg, stmt_pool};
Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list;

View file

@ -7,7 +7,6 @@
#include "shader_recompiler/ir/abstract_syntax_list.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/value.h"
#include "shader_recompiler/object_pool.h"
namespace Shader {
struct Info;
@ -16,8 +15,8 @@ struct Profile;
namespace Shader::Gcn {
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, CFG& cfg,
[[nodiscard]] IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
Common::ObjectPool<IR::Block>& block_pool, CFG& cfg,
Info& info, const Profile& profile);
} // namespace Shader::Gcn

View file

@ -447,6 +447,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
.is_instance_data = true,
});
instance_buf_handle = s32(info.buffers.size() - 1);
info.uses_step_rates = true;
}
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());

View file

@ -338,6 +338,11 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_forma
if (is_typed) {
info.dmft.Assign(static_cast<AmdGpu::DataFormat>(mtbuf.dfmt));
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
ASSERT(info.nfmt == AmdGpu::NumberFormat::Float &&
(info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
info.dmft == AmdGpu::DataFormat::Format32_32_32 ||
info.dmft == AmdGpu::DataFormat::Format32_32 ||
info.dmft == AmdGpu::DataFormat::Format32));
}
const IR::Value handle =

View file

@ -9,7 +9,7 @@
namespace Shader::IR {
Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
Block::Block(Common::ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
Block::~Block() = default;

View file

@ -9,10 +9,10 @@
#include <vector>
#include <boost/intrusive/list.hpp>
#include "common/object_pool.h"
#include "common/types.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/value.h"
#include "shader_recompiler/object_pool.h"
namespace Shader::IR {
@ -25,7 +25,7 @@ public:
using reverse_iterator = InstructionList::reverse_iterator;
using const_reverse_iterator = InstructionList::const_reverse_iterator;
explicit Block(ObjectPool<Inst>& inst_pool_);
explicit Block(Common::ObjectPool<Inst>& inst_pool_);
~Block();
Block(const Block&) = delete;
@ -153,7 +153,7 @@ public:
private:
/// Memory pool for instruction list
ObjectPool<Inst>* inst_pool;
Common::ObjectPool<Inst>* inst_pool;
/// List of instructions in this block
InstructionList instructions;

View file

@ -173,10 +173,9 @@ bool IsImageStorageInstruction(const IR::Inst& inst) {
class Descriptors {
public:
explicit Descriptors(BufferResourceList& buffer_resources_, ImageResourceList& image_resources_,
SamplerResourceList& sampler_resources_)
: buffer_resources{buffer_resources_}, image_resources{image_resources_},
sampler_resources{sampler_resources_} {}
explicit Descriptors(Info& info_)
: info{info_}, buffer_resources{info_.buffers}, image_resources{info_.images},
sampler_resources{info_.samplers} {}
u32 Add(const BufferResource& desc) {
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
@ -188,6 +187,7 @@ public:
ASSERT(buffer.length == desc.length);
buffer.is_storage |= desc.is_storage;
buffer.used_types |= desc.used_types;
buffer.is_written |= desc.is_written;
return index;
}
@ -201,9 +201,16 @@ public:
}
u32 Add(const SamplerResource& desc) {
const u32 index{Add(sampler_resources, desc, [&desc](const auto& existing) {
return desc.sgpr_base == existing.sgpr_base &&
desc.dword_offset == existing.dword_offset;
const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) {
if (desc.sgpr_base == existing.sgpr_base &&
desc.dword_offset == existing.dword_offset) {
return true;
}
// Samplers with different bindings might still be the same.
const auto old_sharp =
info.ReadUd<AmdGpu::Sampler>(existing.sgpr_base, existing.dword_offset);
const auto new_sharp = info.ReadUd<AmdGpu::Sampler>(desc.sgpr_base, desc.dword_offset);
return old_sharp == new_sharp;
})};
return index;
}
@ -219,6 +226,7 @@ private:
return static_cast<u32>(descriptors.size()) - 1;
}
const Info& info;
BufferResourceList& buffer_resources;
ImageResourceList& image_resources;
SamplerResourceList& sampler_resources;
@ -328,16 +336,6 @@ static bool IsLoadBufferFormat(const IR::Inst& inst) {
}
}
static bool IsReadConstBuffer(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ReadConstBuffer:
case IR::Opcode::ReadConstBufferU32:
return true;
default:
return false;
}
}
static u32 BufferLength(const AmdGpu::Buffer& buffer) {
const auto stride = buffer.GetStride();
if (stride < sizeof(f32)) {
@ -401,30 +399,37 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
IR::Inst* handle = inst.Arg(0).InstRecursive();
IR::Inst* producer = handle->Arg(0).InstRecursive();
const auto sharp = TrackSharp(producer);
const bool is_store = IsBufferStore(inst);
buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
binding = descriptors.Add(BufferResource{
.sgpr_base = sharp.sgpr_base,
.dword_offset = sharp.dword_offset,
.length = BufferLength(buffer),
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
.is_storage = is_store || buffer.GetSize() > MaxUboSize,
.is_written = is_store,
});
}
// Update buffer descriptor format.
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
// Replace handle with binding index in buffer resource list.
inst.SetArg(0, ir.Imm32(binding));
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
auto& buffer_desc = info.buffers[binding];
if (inst_info.is_typed) {
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
(inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32_32 ||
inst_info.dmft == AmdGpu::DataFormat::Format32));
buffer_desc.dfmt = inst_info.dmft;
buffer_desc.nfmt = inst_info.nfmt;
} else {
buffer_desc.dfmt = buffer.GetDataFmt();
buffer_desc.nfmt = buffer.GetNumberFmt();
}
if (IsReadConstBuffer(inst)) {
// Replace handle with binding index in buffer resource list.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(binding));
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
// Address of constant buffer reads can be calculated at IR emittion time.
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {
return;
}
@ -434,10 +439,14 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
}
} else {
const u32 stride = buffer.GetStride();
ASSERT_MSG(stride >= 4, "non-formatting load_buffer_* is not implemented for stride {}",
stride);
if (stride < 4) {
LOG_WARNING(Render_Vulkan,
"non-formatting load_buffer_* is not implemented for stride {}", stride);
}
}
// Compute address of the buffer using the stride.
// Todo: What if buffer is rebound with different stride?
IR::U32 address = ir.Imm32(inst_info.inst_offset.Value());
if (inst_info.index_enable) {
const IR::U32 index = inst_info.offset_enable ? IR::U32{ir.CompositeExtract(inst.Arg(1), 0)}
@ -587,39 +596,9 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
}
void ResourceTrackingPass(IR::Program& program) {
// When loading data from untyped buffer we don't have if it is float or integer.
// Most of the time it is float so that is the default. This pass detects float buffer loads
// combined with bitcasts and patches them to be integer loads.
for (IR::Block* const block : program.post_order_blocks) {
break;
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() != IR::Opcode::BitCastU32F32) {
continue;
}
// Replace the bitcast with a typed buffer read
IR::Inst* const arg_inst{inst.Arg(0).TryInstRecursive()};
if (!arg_inst) {
continue;
}
const auto replace{[&](IR::Opcode new_opcode) {
inst.ReplaceOpcode(new_opcode);
inst.SetArg(0, arg_inst->Arg(0));
inst.SetArg(1, arg_inst->Arg(1));
inst.SetFlags(arg_inst->Flags<u32>());
arg_inst->Invalidate();
}};
if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) {
replace(IR::Opcode::ReadConstBufferU32);
}
if (arg_inst->GetOpcode() == IR::Opcode::LoadBufferF32) {
replace(IR::Opcode::LoadBufferU32);
}
}
}
// Iterate resource instructions and patch them after finding the sharp.
auto& info = program.info;
Descriptors descriptors{info.buffers, info.images, info.samplers};
Descriptors descriptors{info};
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (IsBufferInstruction(inst)) {

View file

@ -1,107 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <type_traits>
#include <utility>
#include <vector>
namespace Shader {
template <typename T>
requires std::is_destructible_v<T>
class ObjectPool {
public:
explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} {
node = &chunks.emplace_back(new_chunk_size);
}
template <typename... Args>
requires std::is_constructible_v<T, Args...>
[[nodiscard]] T* Create(Args&&... args) {
return std::construct_at(Memory(), std::forward<Args>(args)...);
}
void ReleaseContents() {
if (chunks.empty()) {
return;
}
Chunk& root{chunks.front()};
if (root.used_objects == root.num_objects) {
// Root chunk has been filled, squash allocations into it
const size_t total_objects{root.num_objects + new_chunk_size * (chunks.size() - 1)};
chunks.clear();
chunks.emplace_back(total_objects);
} else {
root.Release();
chunks.resize(1);
}
chunks.shrink_to_fit();
node = &chunks.front();
}
private:
struct NonTrivialDummy {
NonTrivialDummy() noexcept {}
};
union Storage {
Storage() noexcept {}
~Storage() noexcept {}
NonTrivialDummy dummy{};
T object;
};
struct Chunk {
explicit Chunk() = default;
explicit Chunk(size_t size)
: num_objects{size}, storage{std::make_unique<Storage[]>(size)} {}
Chunk& operator=(Chunk&& rhs) noexcept {
Release();
used_objects = std::exchange(rhs.used_objects, 0);
num_objects = std::exchange(rhs.num_objects, 0);
storage = std::move(rhs.storage);
return *this;
}
Chunk(Chunk&& rhs) noexcept
: used_objects{std::exchange(rhs.used_objects, 0)},
num_objects{std::exchange(rhs.num_objects, 0)}, storage{std::move(rhs.storage)} {}
~Chunk() {
Release();
}
void Release() {
std::destroy_n(storage.get(), used_objects);
used_objects = 0;
}
size_t used_objects{};
size_t num_objects{};
std::unique_ptr<Storage[]> storage;
};
[[nodiscard]] T* Memory() {
Chunk* const chunk{FreeChunk()};
return &chunk->storage[chunk->used_objects++].object;
}
[[nodiscard]] Chunk* FreeChunk() {
if (node->used_objects != node->num_objects) {
return node;
}
node = &chunks.emplace_back(new_chunk_size);
return node;
}
Chunk* node{};
std::vector<Chunk> chunks;
size_t new_chunk_size{};
};
} // namespace Shader

View file

@ -27,9 +27,9 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
return blocks;
}
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
std::span<const u32> token, const Info&& info,
const Profile& profile) {
IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
Common::ObjectPool<IR::Block>& block_pool, std::span<const u32> token,
const Info&& info, const Profile& profile) {
// Ensure first instruction is expected.
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
@ -45,7 +45,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
}
// Create control flow graph
ObjectPool<Gcn::Block> gcn_block_pool{64};
Common::ObjectPool<Gcn::Block> gcn_block_pool{64};
Gcn::CFG cfg{gcn_block_pool, program.ins_list};
// Structurize control flow graph and create program.
@ -61,7 +61,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::DeadCodeEliminationPass(program);
Shader::Optimization::CollectShaderInfoPass(program);
LOG_INFO(Render_Vulkan, "{}", Shader::IR::DumpProgram(program));
LOG_DEBUG(Render_Vulkan, "{}", Shader::IR::DumpProgram(program));
return program;
}

View file

@ -3,16 +3,16 @@
#pragma once
#include "common/object_pool.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/object_pool.h"
namespace Shader {
struct Profile;
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool,
[[nodiscard]] IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
Common::ObjectPool<IR::Block>& block_pool,
std::span<const u32> code, const Info&& info,
const Profile& profile);

View file

@ -77,8 +77,11 @@ struct BufferResource {
u32 length;
IR::Type used_types;
AmdGpu::Buffer inline_cbuf;
bool is_storage{false};
bool is_instance_data{false};
AmdGpu::DataFormat dfmt;
AmdGpu::NumberFormat nfmt;
bool is_storage{};
bool is_instance_data{};
bool is_written{};
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
};
@ -105,6 +108,19 @@ struct SamplerResource {
};
using SamplerResourceList = boost::container::static_vector<SamplerResource, 16>;
struct PushData {
static constexpr size_t BufOffsetIndex = 2;
u32 step0;
u32 step1;
std::array<u8, 32> buf_offsets;
void AddOffset(u32 binding, u32 offset) {
ASSERT(offset < 64 && binding < 32);
buf_offsets[binding] = offset;
}
};
struct Info {
struct VsInput {
enum InstanceIdType : u8 {
@ -182,6 +198,7 @@ struct Info {
bool uses_shared_u8{};
bool uses_shared_u16{};
bool uses_fp16{};
bool uses_step_rates{};
bool translation_failed{}; // indicates that shader has unsupported instructions
template <typename T>