mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-19 09:54:54 +00:00
video_core: Implement guest buffer manager (#373)
* video_core: Introduce buffer cache * video_core: Use multi level page table for caches * renderer_vulkan: Remove unused stream buffer * fix build * oops forgot optimize off
This commit is contained in:
parent
159be2c7f4
commit
381ba8c7a5
55 changed files with 2697 additions and 1039 deletions
|
@ -21,6 +21,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
|
|||
case VsOutput::ClipDist7: {
|
||||
const u32 index = u32(output) - u32(VsOutput::ClipDist0);
|
||||
const Id clip_num{ctx.ConstU32(index)};
|
||||
ASSERT_MSG(Sirit::ValidId(ctx.clip_distances), "Clip distance used but not defined");
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num);
|
||||
}
|
||||
case VsOutput::CullDist0:
|
||||
|
@ -33,6 +34,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
|
|||
case VsOutput::CullDist7: {
|
||||
const u32 index = u32(output) - u32(VsOutput::CullDist0);
|
||||
const Id cull_num{ctx.ConstU32(index)};
|
||||
ASSERT_MSG(Sirit::ValidId(ctx.cull_distances), "Cull distance used but not defined");
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num);
|
||||
}
|
||||
default:
|
||||
|
@ -125,7 +127,12 @@ Id EmitReadConst(EmitContext& ctx) {
|
|||
}
|
||||
|
||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
if (!Sirit::ValidId(buffer.offset)) {
|
||||
buffer.offset = ctx.GetBufferOffset(handle);
|
||||
}
|
||||
const Id offset_dwords{ctx.OpShiftRightLogical(ctx.U32[1], buffer.offset, ctx.ConstU32(2U))};
|
||||
index = ctx.OpIAdd(ctx.U32[1], index, offset_dwords);
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
|
||||
}
|
||||
|
@ -137,7 +144,7 @@ Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index) {
|
|||
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
||||
return ctx.OpLoad(
|
||||
ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
|
||||
ctx.instance_step_rates,
|
||||
ctx.push_data_block,
|
||||
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
|
||||
}
|
||||
|
||||
|
@ -221,7 +228,11 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
|||
|
||||
template <u32 N>
|
||||
static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
if (!Sirit::ValidId(buffer.offset)) {
|
||||
buffer.offset = ctx.GetBufferOffset(handle);
|
||||
}
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
if constexpr (N == 1) {
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
|
@ -314,7 +325,7 @@ static Id ComponentOffset(EmitContext& ctx, Id address, u32 stride, u32 bit_offs
|
|||
}
|
||||
|
||||
static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 comp) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
const auto format = buffer.buffer.GetDataFmt();
|
||||
switch (format) {
|
||||
case AmdGpu::DataFormat::FormatInvalid:
|
||||
|
@ -399,6 +410,11 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com
|
|||
|
||||
template <u32 N>
|
||||
static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
if (!Sirit::ValidId(buffer.offset)) {
|
||||
buffer.offset = ctx.GetBufferOffset(handle);
|
||||
}
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
if constexpr (N == 1) {
|
||||
return GetBufferFormatValue(ctx, handle, address, 0);
|
||||
} else {
|
||||
|
@ -428,7 +444,11 @@ Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id ad
|
|||
|
||||
template <u32 N>
|
||||
static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
if (!Sirit::ValidId(buffer.offset)) {
|
||||
buffer.offset = ctx.GetBufferOffset(handle);
|
||||
}
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
if constexpr (N == 1) {
|
||||
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
|
||||
|
|
|
@ -46,9 +46,9 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
|
|||
stage{program.info.stage}, binding{binding_} {
|
||||
AddCapability(spv::Capability::Shader);
|
||||
DefineArithmeticTypes();
|
||||
DefineInterfaces(program);
|
||||
DefineBuffers(info);
|
||||
DefineImagesAndSamplers(info);
|
||||
DefineInterfaces();
|
||||
DefineBuffers();
|
||||
DefineImagesAndSamplers();
|
||||
DefineSharedMemory();
|
||||
}
|
||||
|
||||
|
@ -117,9 +117,10 @@ void EmitContext::DefineArithmeticTypes() {
|
|||
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
|
||||
}
|
||||
|
||||
void EmitContext::DefineInterfaces(const IR::Program& program) {
|
||||
DefineInputs(program.info);
|
||||
DefineOutputs(program.info);
|
||||
void EmitContext::DefineInterfaces() {
|
||||
DefinePushDataBlock();
|
||||
DefineInputs();
|
||||
DefineOutputs();
|
||||
}
|
||||
|
||||
Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
||||
|
@ -164,6 +165,16 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
|||
throw InvalidArgument("Invalid attribute type {}", fmt);
|
||||
}
|
||||
|
||||
Id EmitContext::GetBufferOffset(u32 binding) {
|
||||
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
|
||||
const u32 comp = (binding & 0xf) >> 2;
|
||||
const u32 offset = (binding & 0x3) << 3;
|
||||
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
|
||||
push_data_block, ConstU32(half), ConstU32(comp))};
|
||||
const Id value{OpLoad(U32[1], ptr)};
|
||||
return OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
|
||||
}
|
||||
|
||||
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
||||
switch (default_value) {
|
||||
case 0:
|
||||
|
@ -179,24 +190,13 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineInputs(const Info& info) {
|
||||
void EmitContext::DefineInputs() {
|
||||
switch (stage) {
|
||||
case Stage::Vertex: {
|
||||
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
||||
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
||||
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
||||
|
||||
// Create push constants block for instance steps rates
|
||||
const Id struct_type{Name(TypeStruct(U32[1], U32[1]), "instance_step_rates")};
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "sr0");
|
||||
MemberName(struct_type, 1, "sr1");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
|
||||
instance_step_rates = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
||||
Name(instance_step_rates, "step_rates");
|
||||
interfaces.push_back(instance_step_rates);
|
||||
|
||||
for (const auto& input : info.vs_inputs) {
|
||||
const Id type{GetAttributeType(*this, input.fmt)};
|
||||
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||
|
@ -260,19 +260,20 @@ void EmitContext::DefineInputs(const Info& info) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineOutputs(const Info& info) {
|
||||
void EmitContext::DefineOutputs() {
|
||||
switch (stage) {
|
||||
case Stage::Vertex: {
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
const std::array<Id, 8> zero{f32_zero_value, f32_zero_value, f32_zero_value,
|
||||
f32_zero_value, f32_zero_value, f32_zero_value,
|
||||
f32_zero_value, f32_zero_value};
|
||||
const Id type{TypeArray(F32[1], ConstU32(8U))};
|
||||
const Id initializer{ConstantComposite(type, zero)};
|
||||
clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output,
|
||||
initializer);
|
||||
cull_distances = DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output,
|
||||
initializer);
|
||||
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
||||
info.stores.Get(IR::Attribute::Position2) ||
|
||||
info.stores.Get(IR::Attribute::Position3);
|
||||
if (has_extra_pos_stores) {
|
||||
const Id type{TypeArray(F32[1], ConstU32(8U))};
|
||||
clip_distances =
|
||||
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
|
||||
cull_distances =
|
||||
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
|
||||
}
|
||||
for (u32 i = 0; i < IR::NumParams; i++) {
|
||||
const IR::Attribute param{IR::Attribute::Param0 + i};
|
||||
if (!info.stores.GetAny(param)) {
|
||||
|
@ -304,7 +305,24 @@ void EmitContext::DefineOutputs(const Info& info) {
|
|||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineBuffers(const Info& info) {
|
||||
void EmitContext::DefinePushDataBlock() {
|
||||
// Create push constants block for instance steps rates
|
||||
const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4]), "AuxData")};
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "sr0");
|
||||
MemberName(struct_type, 1, "sr1");
|
||||
MemberName(struct_type, 2, "buf_offsets0");
|
||||
MemberName(struct_type, 3, "buf_offsets1");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
|
||||
MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U);
|
||||
MemberDecorate(struct_type, 3, spv::Decoration::Offset, 24U);
|
||||
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
||||
Name(push_data_block, "push_data");
|
||||
interfaces.push_back(push_data_block);
|
||||
}
|
||||
|
||||
void EmitContext::DefineBuffers() {
|
||||
boost::container::small_vector<Id, 8> type_ids;
|
||||
for (u32 i = 0; const auto& buffer : info.buffers) {
|
||||
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
|
||||
|
@ -322,8 +340,8 @@ void EmitContext::DefineBuffers(const Info& info) {
|
|||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, 0, "data");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
type_ids.push_back(record_array_type);
|
||||
}
|
||||
type_ids.push_back(record_array_type);
|
||||
|
||||
const auto storage_class =
|
||||
buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
|
||||
|
@ -430,7 +448,7 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
|||
throw InvalidArgument("Invalid texture type {}", desc.type);
|
||||
}
|
||||
|
||||
void EmitContext::DefineImagesAndSamplers(const Info& info) {
|
||||
void EmitContext::DefineImagesAndSamplers() {
|
||||
for (const auto& image_desc : info.images) {
|
||||
const VectorIds* data_types = [&] {
|
||||
switch (image_desc.nfmt) {
|
||||
|
|
|
@ -40,6 +40,7 @@ public:
|
|||
~EmitContext();
|
||||
|
||||
Id Def(const IR::Value& value);
|
||||
Id GetBufferOffset(u32 binding);
|
||||
|
||||
[[nodiscard]] Id DefineInput(Id type, u32 location) {
|
||||
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
|
||||
|
@ -168,7 +169,7 @@ public:
|
|||
Id output_position{};
|
||||
Id vertex_index{};
|
||||
Id instance_id{};
|
||||
Id instance_step_rates{};
|
||||
Id push_data_block{};
|
||||
Id base_vertex{};
|
||||
Id frag_coord{};
|
||||
Id front_facing{};
|
||||
|
@ -201,14 +202,15 @@ public:
|
|||
|
||||
struct BufferDefinition {
|
||||
Id id;
|
||||
Id offset;
|
||||
const VectorIds* data_types;
|
||||
Id pointer_type;
|
||||
AmdGpu::Buffer buffer;
|
||||
};
|
||||
|
||||
u32& binding;
|
||||
boost::container::small_vector<BufferDefinition, 4> buffers;
|
||||
boost::container::small_vector<TextureDefinition, 4> images;
|
||||
boost::container::small_vector<BufferDefinition, 16> buffers;
|
||||
boost::container::small_vector<TextureDefinition, 8> images;
|
||||
boost::container::small_vector<Id, 4> samplers;
|
||||
|
||||
Id sampler_type{};
|
||||
|
@ -227,11 +229,12 @@ public:
|
|||
|
||||
private:
|
||||
void DefineArithmeticTypes();
|
||||
void DefineInterfaces(const IR::Program& program);
|
||||
void DefineInputs(const Info& info);
|
||||
void DefineOutputs(const Info& info);
|
||||
void DefineBuffers(const Info& info);
|
||||
void DefineImagesAndSamplers(const Info& info);
|
||||
void DefineInterfaces();
|
||||
void DefineInputs();
|
||||
void DefineOutputs();
|
||||
void DefinePushDataBlock();
|
||||
void DefineBuffers();
|
||||
void DefineImagesAndSamplers();
|
||||
void DefineSharedMemory();
|
||||
|
||||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue