mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-04 16:16:20 +00:00
video_core: Implement guest buffer manager (#373)
* video_core: Introduce buffer cache * video_core: Use multi level page table for caches * renderer_vulkan: Remove unused stream buffer * fix build * oops forgot optimize off
This commit is contained in:
parent
159be2c7f4
commit
381ba8c7a5
55 changed files with 2697 additions and 1039 deletions
|
@ -21,6 +21,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
|
|||
case VsOutput::ClipDist7: {
|
||||
const u32 index = u32(output) - u32(VsOutput::ClipDist0);
|
||||
const Id clip_num{ctx.ConstU32(index)};
|
||||
ASSERT_MSG(Sirit::ValidId(ctx.clip_distances), "Clip distance used but not defined");
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num);
|
||||
}
|
||||
case VsOutput::CullDist0:
|
||||
|
@ -33,6 +34,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
|
|||
case VsOutput::CullDist7: {
|
||||
const u32 index = u32(output) - u32(VsOutput::CullDist0);
|
||||
const Id cull_num{ctx.ConstU32(index)};
|
||||
ASSERT_MSG(Sirit::ValidId(ctx.cull_distances), "Cull distance used but not defined");
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num);
|
||||
}
|
||||
default:
|
||||
|
@ -125,7 +127,12 @@ Id EmitReadConst(EmitContext& ctx) {
|
|||
}
|
||||
|
||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
if (!Sirit::ValidId(buffer.offset)) {
|
||||
buffer.offset = ctx.GetBufferOffset(handle);
|
||||
}
|
||||
const Id offset_dwords{ctx.OpShiftRightLogical(ctx.U32[1], buffer.offset, ctx.ConstU32(2U))};
|
||||
index = ctx.OpIAdd(ctx.U32[1], index, offset_dwords);
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
|
||||
}
|
||||
|
@ -137,7 +144,7 @@ Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index) {
|
|||
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
||||
return ctx.OpLoad(
|
||||
ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
|
||||
ctx.instance_step_rates,
|
||||
ctx.push_data_block,
|
||||
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
|
||||
}
|
||||
|
||||
|
@ -221,7 +228,11 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
|||
|
||||
template <u32 N>
|
||||
static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
if (!Sirit::ValidId(buffer.offset)) {
|
||||
buffer.offset = ctx.GetBufferOffset(handle);
|
||||
}
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
if constexpr (N == 1) {
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
|
@ -314,7 +325,7 @@ static Id ComponentOffset(EmitContext& ctx, Id address, u32 stride, u32 bit_offs
|
|||
}
|
||||
|
||||
static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 comp) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
const auto format = buffer.buffer.GetDataFmt();
|
||||
switch (format) {
|
||||
case AmdGpu::DataFormat::FormatInvalid:
|
||||
|
@ -399,6 +410,11 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com
|
|||
|
||||
template <u32 N>
|
||||
static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
if (!Sirit::ValidId(buffer.offset)) {
|
||||
buffer.offset = ctx.GetBufferOffset(handle);
|
||||
}
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
if constexpr (N == 1) {
|
||||
return GetBufferFormatValue(ctx, handle, address, 0);
|
||||
} else {
|
||||
|
@ -428,7 +444,11 @@ Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id ad
|
|||
|
||||
template <u32 N>
|
||||
static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
if (!Sirit::ValidId(buffer.offset)) {
|
||||
buffer.offset = ctx.GetBufferOffset(handle);
|
||||
}
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
if constexpr (N == 1) {
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
|
|
|
@ -46,9 +46,9 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
|
|||
stage{program.info.stage}, binding{binding_} {
|
||||
AddCapability(spv::Capability::Shader);
|
||||
DefineArithmeticTypes();
|
||||
DefineInterfaces(program);
|
||||
DefineBuffers(info);
|
||||
DefineImagesAndSamplers(info);
|
||||
DefineInterfaces();
|
||||
DefineBuffers();
|
||||
DefineImagesAndSamplers();
|
||||
DefineSharedMemory();
|
||||
}
|
||||
|
||||
|
@ -117,9 +117,10 @@ void EmitContext::DefineArithmeticTypes() {
|
|||
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
|
||||
}
|
||||
|
||||
void EmitContext::DefineInterfaces(const IR::Program& program) {
|
||||
DefineInputs(program.info);
|
||||
DefineOutputs(program.info);
|
||||
void EmitContext::DefineInterfaces() {
|
||||
DefinePushDataBlock();
|
||||
DefineInputs();
|
||||
DefineOutputs();
|
||||
}
|
||||
|
||||
Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
||||
|
@ -164,6 +165,16 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
|||
throw InvalidArgument("Invalid attribute type {}", fmt);
|
||||
}
|
||||
|
||||
Id EmitContext::GetBufferOffset(u32 binding) {
|
||||
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
|
||||
const u32 comp = (binding & 0xf) >> 2;
|
||||
const u32 offset = (binding & 0x3) << 3;
|
||||
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
|
||||
push_data_block, ConstU32(half), ConstU32(comp))};
|
||||
const Id value{OpLoad(U32[1], ptr)};
|
||||
return OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
|
||||
}
|
||||
|
||||
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
||||
switch (default_value) {
|
||||
case 0:
|
||||
|
@ -179,24 +190,13 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineInputs(const Info& info) {
|
||||
void EmitContext::DefineInputs() {
|
||||
switch (stage) {
|
||||
case Stage::Vertex: {
|
||||
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
||||
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
||||
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
||||
|
||||
// Create push constants block for instance steps rates
|
||||
const Id struct_type{Name(TypeStruct(U32[1], U32[1]), "instance_step_rates")};
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "sr0");
|
||||
MemberName(struct_type, 1, "sr1");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
|
||||
instance_step_rates = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
||||
Name(instance_step_rates, "step_rates");
|
||||
interfaces.push_back(instance_step_rates);
|
||||
|
||||
for (const auto& input : info.vs_inputs) {
|
||||
const Id type{GetAttributeType(*this, input.fmt)};
|
||||
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||
|
@ -260,19 +260,20 @@ void EmitContext::DefineInputs(const Info& info) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineOutputs(const Info& info) {
|
||||
void EmitContext::DefineOutputs() {
|
||||
switch (stage) {
|
||||
case Stage::Vertex: {
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
const std::array<Id, 8> zero{f32_zero_value, f32_zero_value, f32_zero_value,
|
||||
f32_zero_value, f32_zero_value, f32_zero_value,
|
||||
f32_zero_value, f32_zero_value};
|
||||
const Id type{TypeArray(F32[1], ConstU32(8U))};
|
||||
const Id initializer{ConstantComposite(type, zero)};
|
||||
clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output,
|
||||
initializer);
|
||||
cull_distances = DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output,
|
||||
initializer);
|
||||
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
||||
info.stores.Get(IR::Attribute::Position2) ||
|
||||
info.stores.Get(IR::Attribute::Position3);
|
||||
if (has_extra_pos_stores) {
|
||||
const Id type{TypeArray(F32[1], ConstU32(8U))};
|
||||
clip_distances =
|
||||
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
|
||||
cull_distances =
|
||||
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
|
||||
}
|
||||
for (u32 i = 0; i < IR::NumParams; i++) {
|
||||
const IR::Attribute param{IR::Attribute::Param0 + i};
|
||||
if (!info.stores.GetAny(param)) {
|
||||
|
@ -304,7 +305,24 @@ void EmitContext::DefineOutputs(const Info& info) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineBuffers(const Info& info) {
|
||||
void EmitContext::DefinePushDataBlock() {
|
||||
// Create push constants block for instance steps rates
|
||||
const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4]), "AuxData")};
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "sr0");
|
||||
MemberName(struct_type, 1, "sr1");
|
||||
MemberName(struct_type, 2, "buf_offsets0");
|
||||
MemberName(struct_type, 3, "buf_offsets1");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
|
||||
MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U);
|
||||
MemberDecorate(struct_type, 3, spv::Decoration::Offset, 24U);
|
||||
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
||||
Name(push_data_block, "push_data");
|
||||
interfaces.push_back(push_data_block);
|
||||
}
|
||||
|
||||
void EmitContext::DefineBuffers() {
|
||||
boost::container::small_vector<Id, 8> type_ids;
|
||||
for (u32 i = 0; const auto& buffer : info.buffers) {
|
||||
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
|
||||
|
@ -322,8 +340,8 @@ void EmitContext::DefineBuffers(const Info& info) {
|
|||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "data");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
type_ids.push_back(record_array_type);
|
||||
}
|
||||
type_ids.push_back(record_array_type);
|
||||
|
||||
const auto storage_class =
|
||||
buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
||||
|
@ -430,7 +448,7 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
|||
throw InvalidArgument("Invalid texture type {}", desc.type);
|
||||
}
|
||||
|
||||
void EmitContext::DefineImagesAndSamplers(const Info& info) {
|
||||
void EmitContext::DefineImagesAndSamplers() {
|
||||
for (const auto& image_desc : info.images) {
|
||||
const VectorIds* data_types = [&] {
|
||||
switch (image_desc.nfmt) {
|
||||
|
|
|
@ -40,6 +40,7 @@ public:
|
|||
~EmitContext();
|
||||
|
||||
Id Def(const IR::Value& value);
|
||||
Id GetBufferOffset(u32 binding);
|
||||
|
||||
[[nodiscard]] Id DefineInput(Id type, u32 location) {
|
||||
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
|
||||
|
@ -168,7 +169,7 @@ public:
|
|||
Id output_position{};
|
||||
Id vertex_index{};
|
||||
Id instance_id{};
|
||||
Id instance_step_rates{};
|
||||
Id push_data_block{};
|
||||
Id base_vertex{};
|
||||
Id frag_coord{};
|
||||
Id front_facing{};
|
||||
|
@ -201,14 +202,15 @@ public:
|
|||
|
||||
struct BufferDefinition {
|
||||
Id id;
|
||||
Id offset;
|
||||
const VectorIds* data_types;
|
||||
Id pointer_type;
|
||||
AmdGpu::Buffer buffer;
|
||||
};
|
||||
|
||||
u32& binding;
|
||||
boost::container::small_vector<BufferDefinition, 4> buffers;
|
||||
boost::container::small_vector<TextureDefinition, 4> images;
|
||||
boost::container::small_vector<BufferDefinition, 16> buffers;
|
||||
boost::container::small_vector<TextureDefinition, 8> images;
|
||||
boost::container::small_vector<Id, 4> samplers;
|
||||
|
||||
Id sampler_type{};
|
||||
|
@ -227,11 +229,12 @@ public:
|
|||
|
||||
private:
|
||||
void DefineArithmeticTypes();
|
||||
void DefineInterfaces(const IR::Program& program);
|
||||
void DefineInputs(const Info& info);
|
||||
void DefineOutputs(const Info& info);
|
||||
void DefineBuffers(const Info& info);
|
||||
void DefineImagesAndSamplers(const Info& info);
|
||||
void DefineInterfaces();
|
||||
void DefineInputs();
|
||||
void DefineOutputs();
|
||||
void DefinePushDataBlock();
|
||||
void DefineBuffers();
|
||||
void DefineImagesAndSamplers();
|
||||
void DefineSharedMemory();
|
||||
|
||||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
|
||||
|
|
|
@ -40,7 +40,7 @@ static IR::Condition MakeCondition(Opcode opcode) {
|
|||
}
|
||||
}
|
||||
|
||||
CFG::CFG(ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
|
||||
CFG::CFG(Common::ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
|
||||
: block_pool{block_pool_}, inst_list{inst_list_} {
|
||||
index_to_pc.resize(inst_list.size() + 1);
|
||||
EmitLabels();
|
||||
|
|
|
@ -8,10 +8,10 @@
|
|||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/intrusive/set.hpp>
|
||||
|
||||
#include "common/object_pool.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/frontend/instruction.h"
|
||||
#include "shader_recompiler/ir/condition.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
|
@ -49,7 +49,7 @@ class CFG {
|
|||
using Label = u32;
|
||||
|
||||
public:
|
||||
explicit CFG(ObjectPool<Block>& block_pool, std::span<const GcnInst> inst_list);
|
||||
explicit CFG(Common::ObjectPool<Block>& block_pool, std::span<const GcnInst> inst_list);
|
||||
|
||||
[[nodiscard]] std::string Dot() const;
|
||||
|
||||
|
@ -59,7 +59,7 @@ private:
|
|||
void LinkBlocks();
|
||||
|
||||
public:
|
||||
ObjectPool<Block>& block_pool;
|
||||
Common::ObjectPool<Block>& block_pool;
|
||||
std::span<const GcnInst> inst_list;
|
||||
std::vector<u32> index_to_pc;
|
||||
boost::container::small_vector<Label, 16> labels;
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translate();
|
||||
|
||||
} // namespace Shader::Gcn
|
|
@ -287,7 +287,7 @@ bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept {
|
|||
*/
|
||||
class GotoPass {
|
||||
public:
|
||||
explicit GotoPass(CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
|
||||
explicit GotoPass(CFG& cfg, Common::ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
|
||||
std::vector gotos{BuildTree(cfg)};
|
||||
const auto end{gotos.rend()};
|
||||
for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) {
|
||||
|
@ -563,7 +563,7 @@ private:
|
|||
return parent_tree.insert(std::next(loop), *new_goto);
|
||||
}
|
||||
|
||||
ObjectPool<Statement>& pool;
|
||||
Common::ObjectPool<Statement>& pool;
|
||||
Statement root_stmt{FunctionTag{}};
|
||||
};
|
||||
|
||||
|
@ -597,8 +597,9 @@ private:
|
|||
|
||||
class TranslatePass {
|
||||
public:
|
||||
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
|
||||
ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
|
||||
TranslatePass(Common::ObjectPool<IR::Inst>& inst_pool_,
|
||||
Common::ObjectPool<IR::Block>& block_pool_,
|
||||
Common::ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
|
||||
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
|
||||
Info& info_, const Profile& profile_)
|
||||
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
|
||||
|
@ -808,9 +809,9 @@ private:
|
|||
return block_pool.Create(inst_pool);
|
||||
}
|
||||
|
||||
ObjectPool<Statement>& stmt_pool;
|
||||
ObjectPool<IR::Inst>& inst_pool;
|
||||
ObjectPool<IR::Block>& block_pool;
|
||||
Common::ObjectPool<Statement>& stmt_pool;
|
||||
Common::ObjectPool<IR::Inst>& inst_pool;
|
||||
Common::ObjectPool<IR::Block>& block_pool;
|
||||
IR::AbstractSyntaxList& syntax_list;
|
||||
const Block dummy_flow_block{.is_dummy = true};
|
||||
std::span<const GcnInst> inst_list;
|
||||
|
@ -819,9 +820,10 @@ private:
|
|||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
CFG& cfg, Info& info, const Profile& profile) {
|
||||
ObjectPool<Statement> stmt_pool{64};
|
||||
IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||
Common::ObjectPool<IR::Block>& block_pool, CFG& cfg, Info& info,
|
||||
const Profile& profile) {
|
||||
Common::ObjectPool<Statement> stmt_pool{64};
|
||||
GotoPass goto_pass{cfg, stmt_pool};
|
||||
Statement& root{goto_pass.RootStatement()};
|
||||
IR::AbstractSyntaxList syntax_list;
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
#include "shader_recompiler/ir/abstract_syntax_list.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
|
||||
namespace Shader {
|
||||
struct Info;
|
||||
|
@ -16,8 +15,8 @@ struct Profile;
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
|
||||
ObjectPool<IR::Block>& block_pool, CFG& cfg,
|
||||
[[nodiscard]] IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||
Common::ObjectPool<IR::Block>& block_pool, CFG& cfg,
|
||||
Info& info, const Profile& profile);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -447,6 +447,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
.is_instance_data = true,
|
||||
});
|
||||
instance_buf_handle = s32(info.buffers.size() - 1);
|
||||
info.uses_step_rates = true;
|
||||
}
|
||||
|
||||
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
|
||||
|
|
|
@ -338,6 +338,11 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_forma
|
|||
if (is_typed) {
|
||||
info.dmft.Assign(static_cast<AmdGpu::DataFormat>(mtbuf.dfmt));
|
||||
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
|
||||
ASSERT(info.nfmt == AmdGpu::NumberFormat::Float &&
|
||||
(info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
|
||||
info.dmft == AmdGpu::DataFormat::Format32_32_32 ||
|
||||
info.dmft == AmdGpu::DataFormat::Format32_32 ||
|
||||
info.dmft == AmdGpu::DataFormat::Format32));
|
||||
}
|
||||
|
||||
const IR::Value handle =
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
namespace Shader::IR {
|
||||
|
||||
Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
|
||||
Block::Block(Common::ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
|
||||
|
||||
Block::~Block() = default;
|
||||
|
||||
|
|
|
@ -9,10 +9,10 @@
|
|||
#include <vector>
|
||||
#include <boost/intrusive/list.hpp>
|
||||
|
||||
#include "common/object_pool.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
|
@ -25,7 +25,7 @@ public:
|
|||
using reverse_iterator = InstructionList::reverse_iterator;
|
||||
using const_reverse_iterator = InstructionList::const_reverse_iterator;
|
||||
|
||||
explicit Block(ObjectPool<Inst>& inst_pool_);
|
||||
explicit Block(Common::ObjectPool<Inst>& inst_pool_);
|
||||
~Block();
|
||||
|
||||
Block(const Block&) = delete;
|
||||
|
@ -153,7 +153,7 @@ public:
|
|||
|
||||
private:
|
||||
/// Memory pool for instruction list
|
||||
ObjectPool<Inst>* inst_pool;
|
||||
Common::ObjectPool<Inst>* inst_pool;
|
||||
|
||||
/// List of instructions in this block
|
||||
InstructionList instructions;
|
||||
|
|
|
@ -173,10 +173,9 @@ bool IsImageStorageInstruction(const IR::Inst& inst) {
|
|||
|
||||
class Descriptors {
|
||||
public:
|
||||
explicit Descriptors(BufferResourceList& buffer_resources_, ImageResourceList& image_resources_,
|
||||
SamplerResourceList& sampler_resources_)
|
||||
: buffer_resources{buffer_resources_}, image_resources{image_resources_},
|
||||
sampler_resources{sampler_resources_} {}
|
||||
explicit Descriptors(Info& info_)
|
||||
: info{info_}, buffer_resources{info_.buffers}, image_resources{info_.images},
|
||||
sampler_resources{info_.samplers} {}
|
||||
|
||||
u32 Add(const BufferResource& desc) {
|
||||
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
|
||||
|
@ -188,6 +187,7 @@ public:
|
|||
ASSERT(buffer.length == desc.length);
|
||||
buffer.is_storage |= desc.is_storage;
|
||||
buffer.used_types |= desc.used_types;
|
||||
buffer.is_written |= desc.is_written;
|
||||
return index;
|
||||
}
|
||||
|
||||
|
@ -201,9 +201,16 @@ public:
|
|||
}
|
||||
|
||||
u32 Add(const SamplerResource& desc) {
|
||||
const u32 index{Add(sampler_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset;
|
||||
const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) {
|
||||
if (desc.sgpr_base == existing.sgpr_base &&
|
||||
desc.dword_offset == existing.dword_offset) {
|
||||
return true;
|
||||
}
|
||||
// Samplers with different bindings might still be the same.
|
||||
const auto old_sharp =
|
||||
info.ReadUd<AmdGpu::Sampler>(existing.sgpr_base, existing.dword_offset);
|
||||
const auto new_sharp = info.ReadUd<AmdGpu::Sampler>(desc.sgpr_base, desc.dword_offset);
|
||||
return old_sharp == new_sharp;
|
||||
})};
|
||||
return index;
|
||||
}
|
||||
|
@ -219,6 +226,7 @@ private:
|
|||
return static_cast<u32>(descriptors.size()) - 1;
|
||||
}
|
||||
|
||||
const Info& info;
|
||||
BufferResourceList& buffer_resources;
|
||||
ImageResourceList& image_resources;
|
||||
SamplerResourceList& sampler_resources;
|
||||
|
@ -328,16 +336,6 @@ static bool IsLoadBufferFormat(const IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
static bool IsReadConstBuffer(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
case IR::Opcode::ReadConstBufferU32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static u32 BufferLength(const AmdGpu::Buffer& buffer) {
|
||||
const auto stride = buffer.GetStride();
|
||||
if (stride < sizeof(f32)) {
|
||||
|
@ -401,30 +399,37 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer);
|
||||
const bool is_store = IsBufferStore(inst);
|
||||
buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
|
||||
binding = descriptors.Add(BufferResource{
|
||||
.sgpr_base = sharp.sgpr_base,
|
||||
.dword_offset = sharp.dword_offset,
|
||||
.length = BufferLength(buffer),
|
||||
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
|
||||
.is_storage = IsBufferStore(inst) || buffer.GetSize() > MaxUboSize,
|
||||
.is_storage = is_store || buffer.GetSize() > MaxUboSize,
|
||||
.is_written = is_store,
|
||||
});
|
||||
}
|
||||
|
||||
// Update buffer descriptor format.
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
// Replace handle with binding index in buffer resource list.
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||
auto& buffer_desc = info.buffers[binding];
|
||||
if (inst_info.is_typed) {
|
||||
ASSERT(inst_info.nfmt == AmdGpu::NumberFormat::Float &&
|
||||
(inst_info.dmft == AmdGpu::DataFormat::Format32_32_32_32 ||
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32_32 ||
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32_32 ||
|
||||
inst_info.dmft == AmdGpu::DataFormat::Format32));
|
||||
buffer_desc.dfmt = inst_info.dmft;
|
||||
buffer_desc.nfmt = inst_info.nfmt;
|
||||
} else {
|
||||
buffer_desc.dfmt = buffer.GetDataFmt();
|
||||
buffer_desc.nfmt = buffer.GetNumberFmt();
|
||||
}
|
||||
|
||||
if (IsReadConstBuffer(inst)) {
|
||||
// Replace handle with binding index in buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||
|
||||
// Address of constant buffer reads can be calculated at IR emittion time.
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer ||
|
||||
inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -434,10 +439,14 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
}
|
||||
} else {
|
||||
const u32 stride = buffer.GetStride();
|
||||
ASSERT_MSG(stride >= 4, "non-formatting load_buffer_* is not implemented for stride {}",
|
||||
stride);
|
||||
if (stride < 4) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"non-formatting load_buffer_* is not implemented for stride {}", stride);
|
||||
}
|
||||
}
|
||||
|
||||
// Compute address of the buffer using the stride.
|
||||
// Todo: What if buffer is rebound with different stride?
|
||||
IR::U32 address = ir.Imm32(inst_info.inst_offset.Value());
|
||||
if (inst_info.index_enable) {
|
||||
const IR::U32 index = inst_info.offset_enable ? IR::U32{ir.CompositeExtract(inst.Arg(1), 0)}
|
||||
|
@ -587,39 +596,9 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
}
|
||||
|
||||
void ResourceTrackingPass(IR::Program& program) {
|
||||
// When loading data from untyped buffer we don't have if it is float or integer.
|
||||
// Most of the time it is float so that is the default. This pass detects float buffer loads
|
||||
// combined with bitcasts and patches them to be integer loads.
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
break;
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() != IR::Opcode::BitCastU32F32) {
|
||||
continue;
|
||||
}
|
||||
// Replace the bitcast with a typed buffer read
|
||||
IR::Inst* const arg_inst{inst.Arg(0).TryInstRecursive()};
|
||||
if (!arg_inst) {
|
||||
continue;
|
||||
}
|
||||
const auto replace{[&](IR::Opcode new_opcode) {
|
||||
inst.ReplaceOpcode(new_opcode);
|
||||
inst.SetArg(0, arg_inst->Arg(0));
|
||||
inst.SetArg(1, arg_inst->Arg(1));
|
||||
inst.SetFlags(arg_inst->Flags<u32>());
|
||||
arg_inst->Invalidate();
|
||||
}};
|
||||
if (arg_inst->GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
replace(IR::Opcode::ReadConstBufferU32);
|
||||
}
|
||||
if (arg_inst->GetOpcode() == IR::Opcode::LoadBufferF32) {
|
||||
replace(IR::Opcode::LoadBufferU32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Iterate resource instructions and patch them after finding the sharp.
|
||||
auto& info = program.info;
|
||||
Descriptors descriptors{info.buffers, info.images, info.samplers};
|
||||
Descriptors descriptors{info};
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsBufferInstruction(inst)) {
|
||||
|
|
|
@ -1,107 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace Shader {
|
||||
|
||||
template <typename T>
|
||||
requires std::is_destructible_v<T>
|
||||
class ObjectPool {
|
||||
public:
|
||||
explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} {
|
||||
node = &chunks.emplace_back(new_chunk_size);
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
requires std::is_constructible_v<T, Args...>
|
||||
[[nodiscard]] T* Create(Args&&... args) {
|
||||
return std::construct_at(Memory(), std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
void ReleaseContents() {
|
||||
if (chunks.empty()) {
|
||||
return;
|
||||
}
|
||||
Chunk& root{chunks.front()};
|
||||
if (root.used_objects == root.num_objects) {
|
||||
// Root chunk has been filled, squash allocations into it
|
||||
const size_t total_objects{root.num_objects + new_chunk_size * (chunks.size() - 1)};
|
||||
chunks.clear();
|
||||
chunks.emplace_back(total_objects);
|
||||
} else {
|
||||
root.Release();
|
||||
chunks.resize(1);
|
||||
}
|
||||
chunks.shrink_to_fit();
|
||||
node = &chunks.front();
|
||||
}
|
||||
|
||||
private:
|
||||
struct NonTrivialDummy {
|
||||
NonTrivialDummy() noexcept {}
|
||||
};
|
||||
|
||||
union Storage {
|
||||
Storage() noexcept {}
|
||||
~Storage() noexcept {}
|
||||
|
||||
NonTrivialDummy dummy{};
|
||||
T object;
|
||||
};
|
||||
|
||||
struct Chunk {
|
||||
explicit Chunk() = default;
|
||||
explicit Chunk(size_t size)
|
||||
: num_objects{size}, storage{std::make_unique<Storage[]>(size)} {}
|
||||
|
||||
Chunk& operator=(Chunk&& rhs) noexcept {
|
||||
Release();
|
||||
used_objects = std::exchange(rhs.used_objects, 0);
|
||||
num_objects = std::exchange(rhs.num_objects, 0);
|
||||
storage = std::move(rhs.storage);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Chunk(Chunk&& rhs) noexcept
|
||||
: used_objects{std::exchange(rhs.used_objects, 0)},
|
||||
num_objects{std::exchange(rhs.num_objects, 0)}, storage{std::move(rhs.storage)} {}
|
||||
|
||||
~Chunk() {
|
||||
Release();
|
||||
}
|
||||
|
||||
void Release() {
|
||||
std::destroy_n(storage.get(), used_objects);
|
||||
used_objects = 0;
|
||||
}
|
||||
|
||||
size_t used_objects{};
|
||||
size_t num_objects{};
|
||||
std::unique_ptr<Storage[]> storage;
|
||||
};
|
||||
|
||||
[[nodiscard]] T* Memory() {
|
||||
Chunk* const chunk{FreeChunk()};
|
||||
return &chunk->storage[chunk->used_objects++].object;
|
||||
}
|
||||
|
||||
[[nodiscard]] Chunk* FreeChunk() {
|
||||
if (node->used_objects != node->num_objects) {
|
||||
return node;
|
||||
}
|
||||
node = &chunks.emplace_back(new_chunk_size);
|
||||
return node;
|
||||
}
|
||||
|
||||
Chunk* node{};
|
||||
std::vector<Chunk> chunks;
|
||||
size_t new_chunk_size{};
|
||||
};
|
||||
|
||||
} // namespace Shader
|
|
@ -27,9 +27,9 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
|
|||
return blocks;
|
||||
}
|
||||
|
||||
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
std::span<const u32> token, const Info&& info,
|
||||
const Profile& profile) {
|
||||
IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||
Common::ObjectPool<IR::Block>& block_pool, std::span<const u32> token,
|
||||
const Info&& info, const Profile& profile) {
|
||||
// Ensure first instruction is expected.
|
||||
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
||||
ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
|
||||
|
@ -45,7 +45,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
}
|
||||
|
||||
// Create control flow graph
|
||||
ObjectPool<Gcn::Block> gcn_block_pool{64};
|
||||
Common::ObjectPool<Gcn::Block> gcn_block_pool{64};
|
||||
Gcn::CFG cfg{gcn_block_pool, program.ins_list};
|
||||
|
||||
// Structurize control flow graph and create program.
|
||||
|
@ -61,7 +61,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||
Shader::Optimization::DeadCodeEliminationPass(program);
|
||||
Shader::Optimization::CollectShaderInfoPass(program);
|
||||
LOG_INFO(Render_Vulkan, "{}", Shader::IR::DumpProgram(program));
|
||||
LOG_DEBUG(Render_Vulkan, "{}", Shader::IR::DumpProgram(program));
|
||||
|
||||
return program;
|
||||
}
|
||||
|
|
|
@ -3,16 +3,16 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "common/object_pool.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
struct Profile;
|
||||
|
||||
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
|
||||
ObjectPool<IR::Block>& block_pool,
|
||||
[[nodiscard]] IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||
Common::ObjectPool<IR::Block>& block_pool,
|
||||
std::span<const u32> code, const Info&& info,
|
||||
const Profile& profile);
|
||||
|
||||
|
|
|
@ -77,8 +77,11 @@ struct BufferResource {
|
|||
u32 length;
|
||||
IR::Type used_types;
|
||||
AmdGpu::Buffer inline_cbuf;
|
||||
bool is_storage{false};
|
||||
bool is_instance_data{false};
|
||||
AmdGpu::DataFormat dfmt;
|
||||
AmdGpu::NumberFormat nfmt;
|
||||
bool is_storage{};
|
||||
bool is_instance_data{};
|
||||
bool is_written{};
|
||||
|
||||
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
|
||||
};
|
||||
|
@ -105,6 +108,19 @@ struct SamplerResource {
|
|||
};
|
||||
using SamplerResourceList = boost::container::static_vector<SamplerResource, 16>;
|
||||
|
||||
struct PushData {
|
||||
static constexpr size_t BufOffsetIndex = 2;
|
||||
|
||||
u32 step0;
|
||||
u32 step1;
|
||||
std::array<u8, 32> buf_offsets;
|
||||
|
||||
void AddOffset(u32 binding, u32 offset) {
|
||||
ASSERT(offset < 64 && binding < 32);
|
||||
buf_offsets[binding] = offset;
|
||||
}
|
||||
};
|
||||
|
||||
struct Info {
|
||||
struct VsInput {
|
||||
enum InstanceIdType : u8 {
|
||||
|
@ -182,6 +198,7 @@ struct Info {
|
|||
bool uses_shared_u8{};
|
||||
bool uses_shared_u16{};
|
||||
bool uses_fp16{};
|
||||
bool uses_step_rates{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
|
||||
template <typename T>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue