Metadata support (#223)

* texture_cache: more image usage flags

* texture_cache: metadata registration

* renderer_vulkan: initial CMask support

* renderer_vulkan: skip redundant FCE and FMask decompression passes

* renderer_vulkan: redundant VO surface registration removed

* renderer_vulkan: initial HTile support

* renderer_vulkan: added support for MSAA attachments

* renderer_vulkan: skip unnecessary metadata updates
This commit is contained in:
psucien 2024-06-29 15:49:59 +02:00 committed by GitHub
parent 059f54838a
commit 2cbbcbd371
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 336 additions and 47 deletions

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "video_core/amdgpu/pixel_format.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
namespace Vulkan::LiverpoolToVK {
@ -381,6 +382,13 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format,
case vk::Format::eB8G8R8A8Srgb:
return is_vo_surface ? vk::Format::eR8G8B8A8Unorm : vk::Format::eR8G8B8A8Srgb;
}
} else {
if (is_vo_surface && base_format == vk::Format::eR8G8B8A8Srgb) {
return vk::Format::eR8G8B8A8Unorm;
}
if (is_vo_surface && base_format == vk::Format::eB8G8R8A8Srgb) {
return vk::Format::eB8G8R8A8Unorm;
}
}
return base_format;
}
@ -422,4 +430,69 @@ void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
}
}
static constexpr float U8ToUnorm(u8 v) {
static constexpr auto c = 1.0f / 255.0f;
return float(v * c);
}
vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
const auto comp_swap = color_buffer.info.comp_swap.Value();
ASSERT_MSG(comp_swap == Liverpool::ColorBuffer::SwapMode::Standard ||
comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate,
"Unsupported component swap mode {}", static_cast<u32>(comp_swap));
const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate;
const auto& c0 = color_buffer.clear_word0;
const auto& c1 = color_buffer.clear_word1;
const auto num_bits = AmdGpu::NumBits(color_buffer.info.format);
vk::ClearColorValue color{};
switch (color_buffer.info.number_type) {
case AmdGpu::NumberFormat::Snorm:
[[fallthrough]];
case AmdGpu::NumberFormat::SnormNz:
[[fallthrough]];
case AmdGpu::NumberFormat::Unorm:
[[fallthrough]];
case AmdGpu::NumberFormat::Srgb: {
switch (num_bits) {
case 32: {
color.float32 = std::array{
U8ToUnorm((c0 >> (comp_swap_alt ? 16 : 0)) & 0xff),
U8ToUnorm((c0 >> 8) & 0xff),
U8ToUnorm((c0 >> (comp_swap_alt ? 0 : 16)) & 0xff),
U8ToUnorm((c0 >> 24) & 0xff),
};
break;
}
default: {
LOG_ERROR(Render_Vulkan, "Missing clear color conversion for bits {}", num_bits);
break;
}
}
break;
}
default: {
LOG_ERROR(Render_Vulkan, "Missing clear color conversion for type {}",
color_buffer.info.number_type.Value());
break;
}
}
return {.color = color};
}
vk::SampleCountFlagBits NumSamples(u32 num_samples) {
switch (num_samples) {
case 1:
return vk::SampleCountFlagBits::e1;
case 2:
return vk::SampleCountFlagBits::e2;
case 4:
return vk::SampleCountFlagBits::e4;
default:
UNREACHABLE();
}
}
} // namespace Vulkan::LiverpoolToVK

View file

@ -46,6 +46,10 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format,
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
Liverpool::DepthBuffer::StencilFormat stencil_format);
vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer);
vk::SampleCountFlagBits NumSamples(u32 num_samples);
void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices);
} // namespace Vulkan::LiverpoolToVK

View file

@ -40,7 +40,8 @@ public:
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address) {
auto& image = RegisterVideoOutSurface(attribute, cpu_address);
const auto info = VideoCore::ImageInfo{attribute};
auto& image = texture_cache.FindImage(info, cpu_address);
return PrepareFrameInternal(image);
}

View file

@ -82,7 +82,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
ComputePipeline::~ComputePipeline() = default;
void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
VideoCore::TextureCache& texture_cache) const {
// Bind resource buffers and textures.
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
@ -93,12 +93,11 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
for (const auto& buffer : info.buffers) {
const auto vsharp = info.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
const u32 size = vsharp.GetSize();
const VAddr addr = vsharp.base_address.Value();
texture_cache.OnCpuWrite(addr);
const u32 offset = staging.Copy(addr, size,
const VAddr address = vsharp.base_address.Value();
texture_cache.OnCpuWrite(address);
const u32 offset = staging.Copy(address, size,
buffer.is_storage ? instance.StorageMinAlignment()
: instance.UniformMinAlignment());
// const auto [vk_buffer, offset] = memory->GetVulkanBuffer(addr);
buffer_infos.emplace_back(staging.Handle(), offset, size);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
@ -109,6 +108,21 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
// Most of the time when a metadata is updated with a shader it gets cleared. It means we
// can skip the whole dispatch and update the tracked state instead. Also, it is not
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we will
// need its full emulation anyways. For cases of metadata read a warning will be logged.
if (buffer.is_storage) {
if (texture_cache.TouchMeta(address, true)) {
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
return false;
}
} else {
if (texture_cache.IsMeta(address)) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
}
}
}
for (const auto& image : info.images) {
@ -124,6 +138,10 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
: vk::DescriptorType::eSampledImage,
.pImageInfo = &image_infos.back(),
});
if (texture_cache.IsMeta(tsharp.Address())) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (texture)");
}
}
for (const auto& sampler : info.samplers) {
const auto ssharp = info.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
@ -139,11 +157,13 @@ void ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
});
}
if (!set_writes.empty()) {
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0,
set_writes);
if (set_writes.empty()) {
return false;
}
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, set_writes);
return true;
}
} // namespace Vulkan

View file

@ -31,7 +31,7 @@ public:
return *pipeline;
}
void BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
bool BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
VideoCore::TextureCache& texture_cache) const;
private:

View file

@ -92,7 +92,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
};
const vk::PipelineMultisampleStateCreateInfo multisampling = {
.rasterizationSamples = vk::SampleCountFlagBits::e1,
.rasterizationSamples = LiverpoolToVK::NumSamples(key.num_samples),
.sampleShadingEnable = false,
};
@ -327,8 +327,9 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
for (const auto& stage : stages) {
for (const auto& buffer : stage.buffers) {
const auto vsharp = stage.ReadUd<AmdGpu::Buffer>(buffer.sgpr_base, buffer.dword_offset);
const VAddr address = vsharp.base_address.Value();
const u32 size = vsharp.GetSize();
const u32 offset = staging.Copy(vsharp.base_address.Value(), size,
const u32 offset = staging.Copy(address, size,
buffer.is_storage ? instance.StorageMinAlignment()
: instance.UniformMinAlignment());
buffer_infos.emplace_back(staging.Handle(), offset, size);
@ -341,6 +342,10 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
if (texture_cache.IsMeta(address)) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)");
}
}
for (const auto& image : stage.images) {
@ -357,6 +362,10 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
: vk::DescriptorType::eSampledImage,
.pImageInfo = &image_infos.back(),
});
if (texture_cache.IsMeta(tsharp.Address())) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)");
}
}
for (const auto& sampler : stage.samplers) {
const auto ssharp =

View file

@ -38,6 +38,7 @@ struct GraphicsPipelineKey {
float depth_bias_slope_factor;
float depth_bias_clamp;
u32 depth_bias_enable;
u32 num_samples = 1;
Liverpool::StencilControl stencil;
Liverpool::StencilRefMask stencil_ref_front;
Liverpool::StencilRefMask stencil_ref_back;

View file

@ -205,6 +205,7 @@ bool Instance::CreateDevice() {
.logicOp = features.logicOp,
.samplerAnisotropy = features.samplerAnisotropy,
.fragmentStoresAndAtomics = features.fragmentStoresAndAtomics,
.shaderStorageImageMultisample = true,
.shaderClipDistance = features.shaderClipDistance,
},
},

View file

@ -114,12 +114,18 @@ void PipelineCache::RefreshGraphicsKey() {
key.cull_mode = regs.polygon_control.CullingMode();
key.clip_space = regs.clipper_control.clip_space;
key.front_face = regs.polygon_control.front_face;
key.num_samples = regs.aa_config.NumSamples();
const auto& db = regs.depth_buffer;
if (key.depth.depth_enable) {
key.depth_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format);
key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined);
}
// TODO: Should be a check for `OperationMode::Disable` once we emulate HW state init packet
// sent by system software.
const auto skip_cb_binding = false;
// `RenderingInfo` is assumed to be initialized with a contiguous array of valid color
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary order.
// We need to do some arrays compaction at this stage
@ -129,7 +135,7 @@ void PipelineCache::RefreshGraphicsKey() {
int remapped_cb{};
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
auto const& col_buf = regs.color_buffers[cb];
if (!col_buf) {
if (!col_buf || skip_cb_binding) {
continue;
}
const auto base_format =
@ -160,6 +166,19 @@ void PipelineCache::RefreshGraphicsKey() {
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
const auto& regs = liverpool->regs;
// There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
// actual draw hence can skip pipeline creation.
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {
LOG_TRACE(Render_Vulkan, "FCE pass skipped");
return {};
}
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) {
// TODO: check for a valid MRT1 to promote the draw to the resolve pass.
LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped");
return {};
}
u32 binding{};
std::array<Shader::IR::Program, MaxShaderStages> programs;
std::array<const Shader::Info*, MaxShaderStages> infos{};

View file

@ -40,12 +40,14 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
const auto& regs = liverpool->regs;
const u32 num_indices = SetupIndexBuffer(is_indexed, index_offset);
const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline();
if (!pipeline) {
return;
}
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
boost::container::static_vector<vk::RenderingAttachmentInfo, Liverpool::NumColorBuffers>
color_attachments{};
vk::RenderingAttachmentInfo depth_attachment{};
u32 num_depth_attachments{};
for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) {
const auto& col_buf = regs.color_buffers[col_buf_id];
if (!col_buf) {
@ -55,17 +57,26 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
const auto& hint = liverpool->last_cb_extent[col_buf_id];
const auto& image_view = texture_cache.RenderTarget(col_buf, hint);
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress());
color_attachments.push_back({
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eGeneral,
.loadOp = vk::AttachmentLoadOp::eLoad,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue =
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{},
});
texture_cache.TouchMeta(col_buf.CmaskAddress(), false);
}
vk::RenderingAttachmentInfo depth_attachment{};
u32 num_depth_attachments{};
if (pipeline->IsDepthEnabled() && regs.depth_buffer.Address() != 0) {
const bool is_clear = regs.depth_render_control.depth_clear_enable;
const auto htile_address = regs.depth_htile_data_base.GetAddress();
const bool is_clear = regs.depth_render_control.depth_clear_enable ||
texture_cache.IsMetaCleared(htile_address);
const auto& image_view =
texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent);
texture_cache.DepthTarget(regs.depth_buffer, htile_address, liverpool->last_db_extent);
depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eGeneral,
@ -74,6 +85,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear,
.stencil = regs.stencil_clear}},
};
texture_cache.TouchMeta(htile_address, false);
num_depth_attachments++;
}
@ -112,7 +124,14 @@ void Rasterizer::DispatchDirect() {
const auto cmdbuf = scheduler.CommandBuffer();
const auto& cs_program = liverpool->regs.cs_program;
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
if (!pipeline) {
return;
}
const auto has_resources = pipeline->BindResources(memory, vertex_index_buffer, texture_cache);
if (!has_resources) {
return;
}
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);