video_core: Implement guest buffer manager (#373)

* video_core: Introduce buffer cache

* video_core: Use multi level page table for caches

* renderer_vulkan: Remove unused stream buffer

* fix build

* oops forgot optimize off
This commit is contained in:
TheTurtle 2024-08-08 15:02:10 +03:00 committed by GitHub
parent 159be2c7f4
commit 381ba8c7a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 2697 additions and 1039 deletions

View file

@ -21,6 +21,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
case VsOutput::ClipDist7: {
const u32 index = u32(output) - u32(VsOutput::ClipDist0);
const Id clip_num{ctx.ConstU32(index)};
ASSERT_MSG(Sirit::ValidId(ctx.clip_distances), "Clip distance used but not defined");
return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num);
}
case VsOutput::CullDist0:
@ -33,6 +34,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
case VsOutput::CullDist7: {
const u32 index = u32(output) - u32(VsOutput::CullDist0);
const Id cull_num{ctx.ConstU32(index)};
ASSERT_MSG(Sirit::ValidId(ctx.cull_distances), "Cull distance used but not defined");
return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num);
}
default:
@ -125,7 +127,12 @@ Id EmitReadConst(EmitContext& ctx) {
}
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
const auto& buffer = ctx.buffers[handle];
auto& buffer = ctx.buffers[handle];
if (!Sirit::ValidId(buffer.offset)) {
buffer.offset = ctx.GetBufferOffset(handle);
}
const Id offset_dwords{ctx.OpShiftRightLogical(ctx.U32[1], buffer.offset, ctx.ConstU32(2U))};
index = ctx.OpIAdd(ctx.U32[1], index, offset_dwords);
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
}
@ -137,7 +144,7 @@ Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index) {
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
return ctx.OpLoad(
ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
ctx.instance_step_rates,
ctx.push_data_block,
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
}
@ -221,7 +228,11 @@ Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
template <u32 N>
static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) {
const auto& buffer = ctx.buffers[handle];
auto& buffer = ctx.buffers[handle];
if (!Sirit::ValidId(buffer.offset)) {
buffer.offset = ctx.GetBufferOffset(handle);
}
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
if constexpr (N == 1) {
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
@ -314,7 +325,7 @@ static Id ComponentOffset(EmitContext& ctx, Id address, u32 stride, u32 bit_offs
}
static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 comp) {
const auto& buffer = ctx.buffers[handle];
auto& buffer = ctx.buffers[handle];
const auto format = buffer.buffer.GetDataFmt();
switch (format) {
case AmdGpu::DataFormat::FormatInvalid:
@ -399,6 +410,11 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com
template <u32 N>
static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
auto& buffer = ctx.buffers[handle];
if (!Sirit::ValidId(buffer.offset)) {
buffer.offset = ctx.GetBufferOffset(handle);
}
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
if constexpr (N == 1) {
return GetBufferFormatValue(ctx, handle, address, 0);
} else {
@ -428,7 +444,11 @@ Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id ad
template <u32 N>
static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
const auto& buffer = ctx.buffers[handle];
auto& buffer = ctx.buffers[handle];
if (!Sirit::ValidId(buffer.offset)) {
buffer.offset = ctx.GetBufferOffset(handle);
}
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
if constexpr (N == 1) {
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};

View file

@ -46,9 +46,9 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
stage{program.info.stage}, binding{binding_} {
AddCapability(spv::Capability::Shader);
DefineArithmeticTypes();
DefineInterfaces(program);
DefineBuffers(info);
DefineImagesAndSamplers(info);
DefineInterfaces();
DefineBuffers();
DefineImagesAndSamplers();
DefineSharedMemory();
}
@ -117,9 +117,10 @@ void EmitContext::DefineArithmeticTypes() {
full_result_u32x2 = Name(TypeStruct(U32[1], U32[1]), "full_result_u32x2");
}
void EmitContext::DefineInterfaces(const IR::Program& program) {
DefineInputs(program.info);
DefineOutputs(program.info);
void EmitContext::DefineInterfaces() {
DefinePushDataBlock();
DefineInputs();
DefineOutputs();
}
Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
@ -164,6 +165,16 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
throw InvalidArgument("Invalid attribute type {}", fmt);
}
Id EmitContext::GetBufferOffset(u32 binding) {
const u32 half = Shader::PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
const u32 offset = (binding & 0x3) << 3;
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
return OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
}
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
switch (default_value) {
case 0:
@ -179,24 +190,13 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
}
}
void EmitContext::DefineInputs(const Info& info) {
void EmitContext::DefineInputs() {
switch (stage) {
case Stage::Vertex: {
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
// Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1]), "instance_step_rates")};
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "sr0");
MemberName(struct_type, 1, "sr1");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
instance_step_rates = DefineVar(struct_type, spv::StorageClass::PushConstant);
Name(instance_step_rates, "step_rates");
interfaces.push_back(instance_step_rates);
for (const auto& input : info.vs_inputs) {
const Id type{GetAttributeType(*this, input.fmt)};
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
@ -260,19 +260,20 @@ void EmitContext::DefineInputs(const Info& info) {
}
}
void EmitContext::DefineOutputs(const Info& info) {
void EmitContext::DefineOutputs() {
switch (stage) {
case Stage::Vertex: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
const std::array<Id, 8> zero{f32_zero_value, f32_zero_value, f32_zero_value,
f32_zero_value, f32_zero_value, f32_zero_value,
f32_zero_value, f32_zero_value};
const Id type{TypeArray(F32[1], ConstU32(8U))};
const Id initializer{ConstantComposite(type, zero)};
clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output,
initializer);
cull_distances = DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output,
initializer);
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
info.stores.Get(IR::Attribute::Position2) ||
info.stores.Get(IR::Attribute::Position3);
if (has_extra_pos_stores) {
const Id type{TypeArray(F32[1], ConstU32(8U))};
clip_distances =
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
cull_distances =
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
}
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {
@ -304,7 +305,24 @@ void EmitContext::DefineOutputs(const Info& info) {
}
}
void EmitContext::DefineBuffers(const Info& info) {
void EmitContext::DefinePushDataBlock() {
// Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4]), "AuxData")};
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "sr0");
MemberName(struct_type, 1, "sr1");
MemberName(struct_type, 2, "buf_offsets0");
MemberName(struct_type, 3, "buf_offsets1");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U);
MemberDecorate(struct_type, 3, spv::Decoration::Offset, 24U);
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
Name(push_data_block, "push_data");
interfaces.push_back(push_data_block);
}
void EmitContext::DefineBuffers() {
boost::container::small_vector<Id, 8> type_ids;
for (u32 i = 0; const auto& buffer : info.buffers) {
const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
@ -322,8 +340,8 @@ void EmitContext::DefineBuffers(const Info& info) {
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "data");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
type_ids.push_back(record_array_type);
}
type_ids.push_back(record_array_type);
const auto storage_class =
buffer.is_storage ? spv::StorageClass::StorageBuffer : spv::StorageClass::Uniform;
@ -430,7 +448,7 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
throw InvalidArgument("Invalid texture type {}", desc.type);
}
void EmitContext::DefineImagesAndSamplers(const Info& info) {
void EmitContext::DefineImagesAndSamplers() {
for (const auto& image_desc : info.images) {
const VectorIds* data_types = [&] {
switch (image_desc.nfmt) {

View file

@ -40,6 +40,7 @@ public:
~EmitContext();
Id Def(const IR::Value& value);
Id GetBufferOffset(u32 binding);
[[nodiscard]] Id DefineInput(Id type, u32 location) {
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
@ -168,7 +169,7 @@ public:
Id output_position{};
Id vertex_index{};
Id instance_id{};
Id instance_step_rates{};
Id push_data_block{};
Id base_vertex{};
Id frag_coord{};
Id front_facing{};
@ -201,14 +202,15 @@ public:
struct BufferDefinition {
Id id;
Id offset;
const VectorIds* data_types;
Id pointer_type;
AmdGpu::Buffer buffer;
};
u32& binding;
boost::container::small_vector<BufferDefinition, 4> buffers;
boost::container::small_vector<TextureDefinition, 4> images;
boost::container::small_vector<BufferDefinition, 16> buffers;
boost::container::small_vector<TextureDefinition, 8> images;
boost::container::small_vector<Id, 4> samplers;
Id sampler_type{};
@ -227,11 +229,12 @@ public:
private:
void DefineArithmeticTypes();
void DefineInterfaces(const IR::Program& program);
void DefineInputs(const Info& info);
void DefineOutputs(const Info& info);
void DefineBuffers(const Info& info);
void DefineImagesAndSamplers(const Info& info);
void DefineInterfaces();
void DefineInputs();
void DefineOutputs();
void DefinePushDataBlock();
void DefineBuffers();
void DefineImagesAndSamplers();
void DefineSharedMemory();
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);