Image binding and texture cache interface refactor (1/2) (#1481)

* video_core: texture_cache: interface refactor and better overlap handling

* resources binding moved into vk_rasterizer

* remove `virtual` flag leftover
This commit is contained in:
psucien 2024-11-24 17:07:51 +01:00 committed by GitHub
parent 16e1d679dc
commit 3d95ad0e3a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 911 additions and 679 deletions

View file

@ -20,7 +20,7 @@ enum class Stage : u32 {
Local, Local,
Compute, Compute,
}; };
constexpr u32 MaxStageTypes = 6; constexpr u32 MaxStageTypes = 7;
[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { [[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
return static_cast<Stage>(index); return static_cast<Stage>(index);

View file

@ -620,10 +620,10 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
static constexpr FindFlags find_flags = static constexpr FindFlags find_flags =
FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize; FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize;
ImageInfo info{}; TextureCache::BaseDesc desc{};
info.guest_address = device_addr; desc.info.guest_address = device_addr;
info.guest_size_bytes = size; desc.info.guest_size_bytes = size;
const ImageId image_id = texture_cache.FindImage(info, find_flags); const ImageId image_id = texture_cache.FindImage(desc, find_flags);
if (!image_id) { if (!image_id) {
return false; return false;
} }

View file

@ -15,8 +15,10 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache, DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache,
u64 compute_key_, const Shader::Info& info_, u64 compute_key_, const Shader::Info& info_,
vk::ShaderModule module) vk::ShaderModule module)
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, compute_key{compute_key_}, : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} {
info{&info_} { auto& info = stages[int(Shader::Stage::Compute)];
info = &info_;
const vk::PipelineShaderStageCreateInfo shader_ci = { const vk::PipelineShaderStageCreateInfo shader_ci = {
.stage = vk::ShaderStageFlagBits::eCompute, .stage = vk::ShaderStageFlagBits::eCompute,
.module = module, .module = module,
@ -118,90 +120,4 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
ComputePipeline::~ComputePipeline() = default; ComputePipeline::~ComputePipeline() = default;
bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
VideoCore::TextureCache& texture_cache) const {
// Bind resource buffers and textures.
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
BufferBarriers buffer_barriers;
Shader::PushData push_data{};
Shader::Backend::Bindings binding{};
info->PushUd(binding, push_data);
buffer_infos.clear();
buffer_views.clear();
image_infos.clear();
// Most of the time when a metadata is updated with a shader it gets cleared. It means
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
// will need its full emulation anyways. For cases of metadata read a warning will be logged.
const auto IsMetaUpdate = [&](const auto& desc) {
const VAddr address = desc.GetSharp(*info).base_address;
if (desc.is_written) {
if (texture_cache.TouchMeta(address, true)) {
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
return true;
}
} else {
if (texture_cache.IsMeta(address)) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
}
}
return false;
};
for (const auto& desc : info->buffers) {
if (desc.is_gds_buffer) {
continue;
}
if (IsMetaUpdate(desc)) {
return false;
}
}
for (const auto& desc : info->texture_buffers) {
if (IsMetaUpdate(desc)) {
return false;
}
}
BindBuffers(buffer_cache, texture_cache, *info, binding, push_data, set_writes,
buffer_barriers);
BindTextures(texture_cache, *info, binding, set_writes);
if (set_writes.empty()) {
return false;
}
const auto cmdbuf = scheduler.CommandBuffer();
if (!buffer_barriers.empty()) {
const auto dependencies = vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = u32(buffer_barriers.size()),
.pBufferMemoryBarriers = buffer_barriers.data(),
};
scheduler.EndRendering();
cmdbuf.pipelineBarrier2(dependencies);
}
cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data),
&push_data);
// Bind descriptor set.
if (uses_push_descriptors) {
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0,
set_writes);
return true;
}
const auto desc_set = desc_heap.Commit(*desc_layout);
for (auto& set_write : set_writes) {
set_write.dstSet = desc_set;
}
instance.GetDevice().updateDescriptorSets(set_writes, {});
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, desc_set, {});
return true;
}
} // namespace Vulkan } // namespace Vulkan

View file

@ -24,13 +24,8 @@ public:
vk::ShaderModule module); vk::ShaderModule module);
~ComputePipeline(); ~ComputePipeline();
bool BindResources(VideoCore::BufferCache& buffer_cache,
VideoCore::TextureCache& texture_cache) const;
private: private:
u64 compute_key; u64 compute_key;
const Shader::Info* info;
bool uses_push_descriptors{};
}; };
} // namespace Vulkan } // namespace Vulkan

View file

@ -16,10 +16,6 @@
namespace Vulkan { namespace Vulkan {
static constexpr auto gp_stage_flags = vk::ShaderStageFlagBits::eVertex |
vk::ShaderStageFlagBits::eGeometry |
vk::ShaderStageFlagBits::eFragment;
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_, GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_, DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_,
vk::PipelineCache pipeline_cache, vk::PipelineCache pipeline_cache,
@ -389,67 +385,4 @@ void GraphicsPipeline::BuildDescSetLayout() {
desc_layout = std::move(layout); desc_layout = std::move(layout);
} }
void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
VideoCore::BufferCache& buffer_cache,
VideoCore::TextureCache& texture_cache) const {
// Bind resource buffers and textures.
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
BufferBarriers buffer_barriers;
Shader::PushData push_data{};
Shader::Backend::Bindings binding{};
buffer_infos.clear();
buffer_views.clear();
image_infos.clear();
for (const auto* stage : stages) {
if (!stage) {
continue;
}
if (stage->uses_step_rates) {
push_data.step0 = regs.vgt_instance_step_rate_0;
push_data.step1 = regs.vgt_instance_step_rate_1;
}
stage->PushUd(binding, push_data);
BindBuffers(buffer_cache, texture_cache, *stage, binding, push_data, set_writes,
buffer_barriers);
BindTextures(texture_cache, *stage, binding, set_writes);
}
const auto cmdbuf = scheduler.CommandBuffer();
SCOPE_EXIT {
cmdbuf.pushConstants(*pipeline_layout, gp_stage_flags, 0U, sizeof(push_data), &push_data);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, Handle());
};
if (set_writes.empty()) {
return;
}
if (!buffer_barriers.empty()) {
const auto dependencies = vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = u32(buffer_barriers.size()),
.pBufferMemoryBarriers = buffer_barriers.data(),
};
scheduler.EndRendering();
cmdbuf.pipelineBarrier2(dependencies);
}
// Bind descriptor set.
if (uses_push_descriptors) {
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
set_writes);
return;
}
const auto desc_set = desc_heap.Commit(*desc_layout);
for (auto& set_write : set_writes) {
set_write.dstSet = desc_set;
}
instance.GetDevice().updateDescriptorSets(set_writes, {});
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, desc_set, {});
}
} // namespace Vulkan } // namespace Vulkan

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <xxhash.h> #include <xxhash.h>
#include "common/types.h" #include "common/types.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
@ -14,8 +15,8 @@ class TextureCache;
namespace Vulkan { namespace Vulkan {
static constexpr u32 MaxVertexBufferCount = 32;
static constexpr u32 MaxShaderStages = 5; static constexpr u32 MaxShaderStages = 5;
static constexpr u32 MaxVertexBufferCount = 32;
class Instance; class Instance;
class Scheduler; class Scheduler;
@ -61,13 +62,6 @@ public:
std::span<const vk::ShaderModule> modules); std::span<const vk::ShaderModule> modules);
~GraphicsPipeline(); ~GraphicsPipeline();
void BindResources(const Liverpool::Regs& regs, VideoCore::BufferCache& buffer_cache,
VideoCore::TextureCache& texture_cache) const;
const Shader::Info& GetStage(Shader::Stage stage) const noexcept {
return *stages[u32(stage)];
}
bool IsEmbeddedVs() const noexcept { bool IsEmbeddedVs() const noexcept {
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash; return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash;
@ -99,9 +93,7 @@ private:
void BuildDescSetLayout(); void BuildDescSetLayout();
private: private:
std::array<const Shader::Info*, MaxShaderStages> stages{};
GraphicsPipelineKey key; GraphicsPipelineKey key;
bool uses_push_descriptors{};
}; };
} // namespace Vulkan } // namespace Vulkan

View file

@ -38,8 +38,6 @@ struct Program {
}; };
class PipelineCache { class PipelineCache {
static constexpr size_t MaxShaderStages = 5;
public: public:
explicit PipelineCache(const Instance& instance, Scheduler& scheduler, explicit PipelineCache(const Instance& instance, Scheduler& scheduler,
AmdGpu::Liverpool* liverpool); AmdGpu::Liverpool* liverpool);

View file

@ -12,230 +12,47 @@
namespace Vulkan { namespace Vulkan {
boost::container::static_vector<vk::DescriptorImageInfo, 32> Pipeline::image_infos;
boost::container::static_vector<vk::BufferView, 8> Pipeline::buffer_views;
boost::container::static_vector<vk::DescriptorBufferInfo, 32> Pipeline::buffer_infos;
Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_,
vk::PipelineCache pipeline_cache) vk::PipelineCache pipeline_cache, bool is_compute_ /*= false*/)
: instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_} {} : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, is_compute{is_compute_} {}
Pipeline::~Pipeline() = default; Pipeline::~Pipeline() = default;
void Pipeline::BindBuffers(VideoCore::BufferCache& buffer_cache, void Pipeline::BindResources(DescriptorWrites& set_writes, const BufferBarriers& buffer_barriers,
VideoCore::TextureCache& texture_cache, const Shader::Info& stage, const Shader::PushData& push_data) const {
Shader::Backend::Bindings& binding, Shader::PushData& push_data, const auto cmdbuf = scheduler.CommandBuffer();
DescriptorWrites& set_writes, BufferBarriers& buffer_barriers) const { const auto bind_point =
using BufferBindingInfo = std::pair<VideoCore::BufferId, AmdGpu::Buffer>; IsCompute() ? vk::PipelineBindPoint::eCompute : vk::PipelineBindPoint::eGraphics;
static boost::container::static_vector<BufferBindingInfo, 32> buffer_bindings;
buffer_bindings.clear(); if (!buffer_barriers.empty()) {
const auto dependencies = vk::DependencyInfo{
for (const auto& desc : stage.buffers) { .dependencyFlags = vk::DependencyFlagBits::eByRegion,
const auto vsharp = desc.GetSharp(stage); .bufferMemoryBarrierCount = u32(buffer_barriers.size()),
if (!desc.is_gds_buffer && vsharp.base_address != 0 && vsharp.GetSize() > 0) { .pBufferMemoryBarriers = buffer_barriers.data(),
const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize()); };
buffer_bindings.emplace_back(buffer_id, vsharp); scheduler.EndRendering();
} else { cmdbuf.pipelineBarrier2(dependencies);
buffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp);
}
} }
using TexBufferBindingInfo = std::pair<VideoCore::BufferId, AmdGpu::Buffer>; const auto stage_flags = IsCompute() ? vk::ShaderStageFlagBits::eCompute : gp_stage_flags;
static boost::container::static_vector<TexBufferBindingInfo, 32> texbuffer_bindings; cmdbuf.pushConstants(*pipeline_layout, stage_flags, 0u, sizeof(push_data), &push_data);
texbuffer_bindings.clear(); // Bind descriptor set.
if (set_writes.empty()) {
for (const auto& desc : stage.texture_buffers) { return;
const auto vsharp = desc.GetSharp(stage);
if (vsharp.base_address != 0 && vsharp.GetSize() > 0 &&
vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize());
texbuffer_bindings.emplace_back(buffer_id, vsharp);
} else {
texbuffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp);
}
} }
// Bind the flattened user data buffer as a UBO so it's accessible to the shader if (uses_push_descriptors) {
if (stage.has_readconst) { cmdbuf.pushDescriptorSetKHR(bind_point, *pipeline_layout, 0, set_writes);
const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf); return;
buffer_infos.emplace_back(vk_buffer->Handle(), offset,
stage.flattened_ud_buf.size() * sizeof(u32));
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
++binding.buffer;
} }
// Second pass to re-bind buffers that were updated after binding const auto desc_set = desc_heap.Commit(*desc_layout);
for (u32 i = 0; i < buffer_bindings.size(); i++) { for (auto& set_write : set_writes) {
const auto& [buffer_id, vsharp] = buffer_bindings[i]; set_write.dstSet = desc_set;
const auto& desc = stage.buffers[i];
const bool is_storage = desc.IsStorage(vsharp);
if (!buffer_id) {
if (desc.is_gds_buffer) {
const auto* gds_buf = buffer_cache.GetGdsBuffer();
buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes());
} else if (instance.IsNullDescriptorSupported()) {
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
} else {
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE);
}
} else {
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
vsharp.base_address, vsharp.GetSize(), desc.is_written, false, buffer_id);
const u32 alignment =
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
ASSERT(adjust % 4 == 0);
push_data.AddOffset(binding.buffer, adjust);
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned,
vsharp.GetSize() + adjust);
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
++binding.buffer;
}
const auto null_buffer_view =
instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView();
for (u32 i = 0; i < texbuffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp] = texbuffer_bindings[i];
const auto& desc = stage.texture_buffers[i];
vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view);
if (buffer_id) {
const u32 alignment = instance.TexelBufferMinAlignment();
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id);
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
"Texel buffer stride must match format stride");
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
ASSERT(adjust % fmt_stride == 0);
push_data.AddOffset(binding.buffer, adjust / fmt_stride);
buffer_view =
vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written,
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
if (auto barrier =
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
: vk::AccessFlagBits2::eShaderRead,
vk::PipelineStageFlagBits2::eComputeShader)) {
buffer_barriers.emplace_back(*barrier);
}
if (desc.is_written) {
texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize());
}
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer,
.pTexelBufferView = &buffer_view,
});
++binding.buffer;
}
}
void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage,
Shader::Backend::Bindings& binding,
DescriptorWrites& set_writes) const {
using ImageBindingInfo = std::tuple<VideoCore::ImageId, AmdGpu::Image, Shader::ImageResource>;
static boost::container::static_vector<ImageBindingInfo, 32> image_bindings;
image_bindings.clear();
for (const auto& image_desc : stage.images) {
const auto tsharp = image_desc.GetSharp(stage);
if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
VideoCore::ImageInfo image_info{tsharp, image_desc};
const auto image_id = texture_cache.FindImage(image_info);
auto& image = texture_cache.GetImage(image_id);
image.flags |= VideoCore::ImageFlagBits::Bound;
image_bindings.emplace_back(image_id, tsharp, image_desc);
} else {
image_bindings.emplace_back(VideoCore::ImageId{}, tsharp, image_desc);
}
if (texture_cache.IsMeta(tsharp.Address())) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)");
}
}
// Second pass to re-bind images that were updated after binding
for (auto [image_id, tsharp, desc] : image_bindings) {
if (!image_id) {
if (instance.IsNullDescriptorSupported()) {
image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
} else {
auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID);
image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view,
vk::ImageLayout::eGeneral);
}
} else {
auto& image = texture_cache.GetImage(image_id);
if (True(image.flags & VideoCore::ImageFlagBits::NeedsRebind)) {
image_id = texture_cache.FindImage(image.info);
}
VideoCore::ImageViewInfo view_info{tsharp, desc};
auto& image_view = texture_cache.FindTexture(image_id, view_info);
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
texture_cache.GetImage(image_id).last_state.layout);
image.flags &=
~(VideoCore::ImageFlagBits::NeedsRebind | VideoCore::ImageFlagBits::Bound);
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = desc.is_storage ? vk::DescriptorType::eStorageImage
: vk::DescriptorType::eSampledImage,
.pImageInfo = &image_infos.back(),
});
}
for (const auto& sampler : stage.samplers) {
auto ssharp = sampler.GetSharp(stage);
if (sampler.disable_aniso) {
const auto& tsharp = stage.images[sampler.associated_image].GetSharp(stage);
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One);
}
}
const auto vk_sampler = texture_cache.GetSampler(ssharp);
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampler,
.pImageInfo = &image_infos.back(),
});
} }
instance.GetDevice().updateDescriptorSets(set_writes, {});
cmdbuf.bindDescriptorSets(bind_point, *pipeline_layout, 0, desc_set, {});
} }
} // namespace Vulkan } // namespace Vulkan

View file

@ -6,14 +6,18 @@
#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCore { namespace VideoCore {
class BufferCache; class BufferCache;
class TextureCache;
} // namespace VideoCore } // namespace VideoCore
namespace Vulkan { namespace Vulkan {
static constexpr auto gp_stage_flags = vk::ShaderStageFlagBits::eVertex |
vk::ShaderStageFlagBits::eGeometry |
vk::ShaderStageFlagBits::eFragment;
class Instance; class Instance;
class Scheduler; class Scheduler;
class DescriptorHeap; class DescriptorHeap;
@ -21,7 +25,7 @@ class DescriptorHeap;
class Pipeline { class Pipeline {
public: public:
Pipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, Pipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
vk::PipelineCache pipeline_cache); vk::PipelineCache pipeline_cache, bool is_compute = false);
virtual ~Pipeline(); virtual ~Pipeline();
vk::Pipeline Handle() const noexcept { vk::Pipeline Handle() const noexcept {
@ -32,16 +36,27 @@ public:
return *pipeline_layout; return *pipeline_layout;
} }
auto GetStages() const {
if (is_compute) {
return std::span{stages.cend() - 1, stages.cend()};
} else {
return std::span{stages.cbegin(), stages.cend() - 1};
}
}
const Shader::Info& GetStage(Shader::Stage stage) const noexcept {
return *stages[u32(stage)];
}
bool IsCompute() const {
return is_compute;
}
using DescriptorWrites = boost::container::small_vector<vk::WriteDescriptorSet, 16>; using DescriptorWrites = boost::container::small_vector<vk::WriteDescriptorSet, 16>;
using BufferBarriers = boost::container::small_vector<vk::BufferMemoryBarrier2, 16>; using BufferBarriers = boost::container::small_vector<vk::BufferMemoryBarrier2, 16>;
void BindBuffers(VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache, void BindResources(DescriptorWrites& set_writes, const BufferBarriers& buffer_barriers,
const Shader::Info& stage, Shader::Backend::Bindings& binding, const Shader::PushData& push_data) const;
Shader::PushData& push_data, DescriptorWrites& set_writes,
BufferBarriers& buffer_barriers) const;
void BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage,
Shader::Backend::Bindings& binding, DescriptorWrites& set_writes) const;
protected: protected:
const Instance& instance; const Instance& instance;
@ -50,9 +65,9 @@ protected:
vk::UniquePipeline pipeline; vk::UniquePipeline pipeline;
vk::UniquePipelineLayout pipeline_layout; vk::UniquePipelineLayout pipeline_layout;
vk::UniqueDescriptorSetLayout desc_layout; vk::UniqueDescriptorSetLayout desc_layout;
static boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos; std::array<const Shader::Info*, Shader::MaxStageTypes> stages{};
static boost::container::static_vector<vk::BufferView, 8> buffer_views; bool uses_push_descriptors{};
static boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos; const bool is_compute;
}; };
} // namespace Vulkan } // namespace Vulkan

View file

@ -55,8 +55,8 @@ public:
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address, bool is_eop) { VAddr cpu_address, bool is_eop) {
const auto info = VideoCore::ImageInfo{attribute, cpu_address}; auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(info); const auto image_id = texture_cache.FindImage(desc);
texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler); texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler);
return PrepareFrameInternal(image_id, is_eop); return PrepareFrameInternal(image_id, is_eop);
} }
@ -68,9 +68,11 @@ public:
VideoCore::Image& RegisterVideoOutSurface( VideoCore::Image& RegisterVideoOutSurface(
const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) {
vo_buffers_addr.emplace_back(cpu_address); vo_buffers_addr.emplace_back(cpu_address);
const auto info = VideoCore::ImageInfo{attribute, cpu_address}; auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(info); const auto image_id = texture_cache.FindImage(desc);
return texture_cache.GetImage(image_id); auto& image = texture_cache.GetImage(image_id);
image.usage.vo_surface = 1u;
return image;
} }
bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {

View file

@ -75,6 +75,105 @@ bool Rasterizer::FilterDraw() {
return true; return true;
} }
RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
// Prefetch color and depth buffers to let texture cache handle possible overlaps with bound
// textures (e.g. mipgen)
RenderState state;
cb_descs.clear();
db_desc.reset();
const auto& regs = liverpool->regs;
if (regs.color_control.degamma_enable) {
LOG_WARNING(Render_Vulkan, "Color buffers require gamma correction");
}
for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) {
const auto& col_buf = regs.color_buffers[col_buf_id];
if (!col_buf) {
continue;
}
// If the color buffer is still bound but rendering to it is disabled by the target
// mask, we need to prevent the render area from being affected by unbound render target
// extents.
if (!regs.color_target_mask.GetMask(col_buf_id)) {
continue;
}
// Skip stale color buffers if shader doesn't output to them. Otherwise it will perform
// an unnecessary transition and may result in state conflict if the resource is already
// bound for reading.
if ((mrt_mask & (1 << col_buf_id)) == 0) {
continue;
}
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress());
texture_cache.TouchMeta(col_buf.CmaskAddress(), false);
const auto& hint = liverpool->last_cb_extent[col_buf_id];
auto& [image_id, desc] = cb_descs.emplace_back(std::piecewise_construct, std::tuple{},
std::tuple{col_buf, hint});
const auto& image_view = texture_cache.FindRenderTarget(desc);
image_id = bound_images.emplace_back(image_view.image_id);
auto& image = texture_cache.GetImage(image_id);
image.binding.is_target = 1u;
const auto mip = image_view.info.range.base.level;
state.width = std::min<u32>(state.width, std::max(image.info.size.width >> mip, 1u));
state.height = std::min<u32>(state.height, std::max(image.info.size.height >> mip, 1u));
state.color_images[state.num_color_attachments] = image.image;
state.color_attachments[state.num_color_attachments++] = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eUndefined,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue =
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{},
};
}
using ZFormat = AmdGpu::Liverpool::DepthBuffer::ZFormat;
using StencilFormat = AmdGpu::Liverpool::DepthBuffer::StencilFormat;
if (regs.depth_buffer.Address() != 0 &&
((regs.depth_control.depth_enable && regs.depth_buffer.z_info.format != ZFormat::Invalid) ||
(regs.depth_control.stencil_enable &&
regs.depth_buffer.stencil_info.format != StencilFormat::Invalid))) {
const auto htile_address = regs.depth_htile_data_base.GetAddress();
const bool is_clear = regs.depth_render_control.depth_clear_enable ||
texture_cache.IsMetaCleared(htile_address);
const auto& hint = liverpool->last_db_extent;
auto& [image_id, desc] =
db_desc.emplace(std::piecewise_construct, std::tuple{},
std::tuple{regs.depth_buffer, regs.depth_view, regs.depth_control,
htile_address, hint});
const auto& image_view = texture_cache.FindDepthTarget(desc);
image_id = bound_images.emplace_back(image_view.image_id);
auto& image = texture_cache.GetImage(image_id);
image.binding.is_target = 1u;
state.width = std::min<u32>(state.width, image.info.size.width);
state.height = std::min<u32>(state.height, image.info.size.height);
state.depth_image = image.image;
state.depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eUndefined,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear,
.stencil = regs.stencil_clear}},
};
texture_cache.TouchMeta(htile_address, false);
state.has_depth =
regs.depth_buffer.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid;
state.has_stencil = regs.depth_buffer.stencil_info.format !=
AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid;
}
return state;
}
void Rasterizer::Draw(bool is_indexed, u32 index_offset) { void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
RENDERER_TRACE; RENDERER_TRACE;
@ -82,28 +181,30 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
return; return;
} }
const auto cmdbuf = scheduler.CommandBuffer();
const auto& regs = liverpool->regs; const auto& regs = liverpool->regs;
const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline(); const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline();
if (!pipeline) { if (!pipeline) {
return; return;
} }
try { auto state = PrepareRenderState(pipeline->GetMrtMask());
pipeline->BindResources(regs, buffer_cache, texture_cache);
} catch (...) { if (!BindResources(pipeline)) {
UNREACHABLE(); return;
} }
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
buffer_cache.BindVertexBuffers(vs_info); buffer_cache.BindVertexBuffers(vs_info);
const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset);
BeginRendering(*pipeline); BeginRendering(*pipeline, state);
UpdateDynamicState(*pipeline); UpdateDynamicState(*pipeline);
const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(); const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets();
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
if (is_indexed) { if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset), cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset),
instance_offset); instance_offset);
@ -113,6 +214,8 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), vertex_offset, cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), vertex_offset,
instance_offset); instance_offset);
} }
ResetBindings();
} }
void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 stride, void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 stride,
@ -123,19 +226,19 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
return; return;
} }
const auto& regs = liverpool->regs;
const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline(); const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline();
if (!pipeline) { if (!pipeline) {
return; return;
} }
auto state = PrepareRenderState(pipeline->GetMrtMask());
const auto& regs = liverpool->regs;
ASSERT_MSG(regs.primitive_type != AmdGpu::PrimitiveType::RectList, ASSERT_MSG(regs.primitive_type != AmdGpu::PrimitiveType::RectList,
"Unsupported primitive type for indirect draw"); "Unsupported primitive type for indirect draw");
try { if (!BindResources(pipeline)) {
pipeline->BindResources(regs, buffer_cache, texture_cache); return;
} catch (...) {
UNREACHABLE();
} }
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
@ -151,13 +254,15 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
std::tie(count_buffer, count_base) = buffer_cache.ObtainBuffer(count_address, 4, false); std::tie(count_buffer, count_base) = buffer_cache.ObtainBuffer(count_address, 4, false);
} }
BeginRendering(*pipeline); BeginRendering(*pipeline, state);
UpdateDynamicState(*pipeline); UpdateDynamicState(*pipeline);
// We can safely ignore both SGPR UD indices and results of fetch shader parsing, as vertex and // We can safely ignore both SGPR UD indices and results of fetch shader parsing, as vertex and
// instance offsets will be automatically applied by Vulkan from indirect args buffer. // instance offsets will be automatically applied by Vulkan from indirect args buffer.
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
if (is_indexed) { if (is_indexed) {
ASSERT(sizeof(VkDrawIndexedIndirectCommand) == stride); ASSERT(sizeof(VkDrawIndexedIndirectCommand) == stride);
@ -177,6 +282,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
cmdbuf.drawIndirect(buffer->Handle(), base, max_count, stride); cmdbuf.drawIndirect(buffer->Handle(), base, max_count, stride);
} }
} }
ResetBindings();
} }
void Rasterizer::DispatchDirect() { void Rasterizer::DispatchDirect() {
@ -189,18 +296,15 @@ void Rasterizer::DispatchDirect() {
return; return;
} }
try { if (!BindResources(pipeline)) {
const auto has_resources = pipeline->BindResources(buffer_cache, texture_cache); return;
if (!has_resources) {
return;
}
} catch (...) {
UNREACHABLE();
} }
scheduler.EndRendering(); scheduler.EndRendering();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z); cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
ResetBindings();
} }
void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
@ -213,19 +317,16 @@ void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
return; return;
} }
try { if (!BindResources(pipeline)) {
const auto has_resources = pipeline->BindResources(buffer_cache, texture_cache); return;
if (!has_resources) {
return;
}
} catch (...) {
UNREACHABLE();
} }
scheduler.EndRendering(); scheduler.EndRendering();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size, false); const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size, false);
cmdbuf.dispatchIndirect(buffer->Handle(), base); cmdbuf.dispatchIndirect(buffer->Handle(), base);
ResetBindings();
} }
u64 Rasterizer::Flush() { u64 Rasterizer::Flush() {
@ -239,86 +340,386 @@ void Rasterizer::Finish() {
scheduler.Finish(); scheduler.Finish();
} }
void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline) { bool Rasterizer::BindResources(const Pipeline* pipeline) {
buffer_infos.clear();
buffer_views.clear();
image_infos.clear();
const auto& regs = liverpool->regs; const auto& regs = liverpool->regs;
RenderState state;
if (regs.color_control.degamma_enable) { if (pipeline->IsCompute()) {
LOG_WARNING(Render_Vulkan, "Color buffers require gamma correction"); const auto& info = pipeline->GetStage(Shader::Stage::Compute);
}
for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) { // Most of the time when a metadata is updated with a shader it gets cleared. It means
const auto& col_buf = regs.color_buffers[col_buf_id]; // we can skip the whole dispatch and update the tracked state instead. Also, it is not
if (!col_buf) { // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
continue; // will need its full emulation anyways. For cases of metadata read a warning will be
} // logged.
const auto IsMetaUpdate = [&](const auto& desc) {
// If the color buffer is still bound but rendering to it is disabled by the target mask, const VAddr address = desc.GetSharp(info).base_address;
// we need to prevent the render area from being affected by unbound render target extents. if (desc.is_written) {
if (!regs.color_target_mask.GetMask(col_buf_id)) { if (texture_cache.TouchMeta(address, true)) {
continue; LOG_TRACE(Render_Vulkan, "Metadata update skipped");
} return true;
}
// Skip stale color buffers if shader doesn't output to them. Otherwise it will perform } else {
// an unnecessary transition and may result in state conflict if the resource is already if (texture_cache.IsMeta(address)) {
// bound for reading. LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
if ((pipeline.GetMrtMask() & (1 << col_buf_id)) == 0) { }
continue; }
} return false;
const auto& hint = liverpool->last_cb_extent[col_buf_id];
VideoCore::ImageInfo image_info{col_buf, hint};
VideoCore::ImageViewInfo view_info{col_buf};
const auto& image_view = texture_cache.FindRenderTarget(image_info, view_info);
const auto& image = texture_cache.GetImage(image_view.image_id);
state.width = std::min<u32>(state.width, image.info.size.width);
state.height = std::min<u32>(state.height, image.info.size.height);
const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress());
state.color_images[state.num_color_attachments] = image.image;
state.color_attachments[state.num_color_attachments++] = {
.imageView = *image_view.image_view,
.imageLayout = vk::ImageLayout::eColorAttachmentOptimal,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue =
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{},
}; };
texture_cache.TouchMeta(col_buf.CmaskAddress(), false);
for (const auto& desc : info.buffers) {
if (desc.is_gds_buffer) {
continue;
}
if (IsMetaUpdate(desc)) {
return false;
}
}
for (const auto& desc : info.texture_buffers) {
if (IsMetaUpdate(desc)) {
return false;
}
}
} }
using ZFormat = AmdGpu::Liverpool::DepthBuffer::ZFormat; set_writes.clear();
using StencilFormat = AmdGpu::Liverpool::DepthBuffer::StencilFormat; buffer_barriers.clear();
if (regs.depth_buffer.Address() != 0 &&
((regs.depth_control.depth_enable && regs.depth_buffer.z_info.format != ZFormat::Invalid) || // Bind resource buffers and textures.
(regs.depth_control.stencil_enable && Shader::PushData push_data{};
regs.depth_buffer.stencil_info.format != StencilFormat::Invalid))) { Shader::Backend::Bindings binding{};
const auto htile_address = regs.depth_htile_data_base.GetAddress();
const bool is_clear = regs.depth_render_control.depth_clear_enable || for (const auto* stage : pipeline->GetStages()) {
texture_cache.IsMetaCleared(htile_address); if (!stage) {
const auto& hint = liverpool->last_db_extent; continue;
VideoCore::ImageInfo image_info{regs.depth_buffer, regs.depth_view.NumSlices(), }
htile_address, hint}; if (stage->uses_step_rates) {
VideoCore::ImageViewInfo view_info{regs.depth_buffer, regs.depth_view, regs.depth_control}; push_data.step0 = regs.vgt_instance_step_rate_0;
const auto& image_view = texture_cache.FindDepthTarget(image_info, view_info); push_data.step1 = regs.vgt_instance_step_rate_1;
const auto& image = texture_cache.GetImage(image_view.image_id); }
state.width = std::min<u32>(state.width, image.info.size.width); stage->PushUd(binding, push_data);
state.height = std::min<u32>(state.height, image.info.size.height);
state.depth_image = image.image; BindBuffers(*stage, binding, push_data, set_writes, buffer_barriers);
state.depth_attachment = { BindTextures(*stage, binding, set_writes);
.imageView = *image_view.image_view,
.imageLayout = image.last_state.layout,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear,
.stencil = regs.stencil_clear}},
};
texture_cache.TouchMeta(htile_address, false);
state.has_depth =
regs.depth_buffer.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid;
state.has_stencil = regs.depth_buffer.stencil_info.format !=
AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid;
} }
pipeline->BindResources(set_writes, buffer_barriers, push_data);
return true;
}
void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding,
Shader::PushData& push_data, Pipeline::DescriptorWrites& set_writes,
Pipeline::BufferBarriers& buffer_barriers) {
buffer_bindings.clear();
for (const auto& desc : stage.buffers) {
const auto vsharp = desc.GetSharp(stage);
if (!desc.is_gds_buffer && vsharp.base_address != 0 && vsharp.GetSize() > 0) {
const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize());
buffer_bindings.emplace_back(buffer_id, vsharp);
} else {
buffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp);
}
}
texbuffer_bindings.clear();
for (const auto& desc : stage.texture_buffers) {
const auto vsharp = desc.GetSharp(stage);
if (vsharp.base_address != 0 && vsharp.GetSize() > 0 &&
vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize());
texbuffer_bindings.emplace_back(buffer_id, vsharp);
} else {
texbuffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp);
}
}
// Bind the flattened user data buffer as a UBO so it's accessible to the shader
if (stage.has_readconst) {
const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf);
buffer_infos.emplace_back(vk_buffer->Handle(), offset,
stage.flattened_ud_buf.size() * sizeof(u32));
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
++binding.buffer;
}
// Second pass to re-bind buffers that were updated after binding
for (u32 i = 0; i < buffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp] = buffer_bindings[i];
const auto& desc = stage.buffers[i];
const bool is_storage = desc.IsStorage(vsharp);
if (!buffer_id) {
if (desc.is_gds_buffer) {
const auto* gds_buf = buffer_cache.GetGdsBuffer();
buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes());
} else if (instance.IsNullDescriptorSupported()) {
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
} else {
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE);
}
} else {
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
vsharp.base_address, vsharp.GetSize(), desc.is_written, false, buffer_id);
const u32 alignment =
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
ASSERT(adjust % 4 == 0);
push_data.AddOffset(binding.buffer, adjust);
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned,
vsharp.GetSize() + adjust);
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
++binding.buffer;
}
const auto null_buffer_view =
instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView();
for (u32 i = 0; i < texbuffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp] = texbuffer_bindings[i];
const auto& desc = stage.texture_buffers[i];
vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view);
if (buffer_id) {
const u32 alignment = instance.TexelBufferMinAlignment();
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id);
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
"Texel buffer stride must match format stride");
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
ASSERT(adjust % fmt_stride == 0);
push_data.AddOffset(binding.buffer, adjust / fmt_stride);
buffer_view =
vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written,
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
if (auto barrier =
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
: vk::AccessFlagBits2::eShaderRead,
vk::PipelineStageFlagBits2::eComputeShader)) {
buffer_barriers.emplace_back(*barrier);
}
if (desc.is_written) {
texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize());
}
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer,
.pTexelBufferView = &buffer_view,
});
++binding.buffer;
}
}
void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding,
Pipeline::DescriptorWrites& set_writes) {
image_bindings.clear();
for (const auto& image_desc : stage.images) {
const auto tsharp = image_desc.GetSharp(stage);
if (texture_cache.IsMeta(tsharp.Address())) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a shader (texture)");
}
if (tsharp.GetDataFmt() == AmdGpu::DataFormat::FormatInvalid) {
image_bindings.emplace_back(std::piecewise_construct, std::tuple{}, std::tuple{});
continue;
}
auto& [image_id, desc] = image_bindings.emplace_back(std::piecewise_construct, std::tuple{},
std::tuple{tsharp, image_desc});
image_id = texture_cache.FindImage(desc);
auto& image = texture_cache.GetImage(image_id);
if (image.binding.is_bound) {
// The image is already bound. In case if it is about to be used as storage we need
// to force general layout on it.
image.binding.force_general |= image_desc.is_storage;
}
if (image.binding.is_target) {
// The image is already bound as target. Since we read and output to it need to force
// general layout too.
image.binding.force_general = 1u;
}
image.binding.is_bound = 1u;
}
// Second pass to re-bind images that were updated after binding
for (auto& [image_id, desc] : image_bindings) {
bool is_storage = desc.type == VideoCore::TextureCache::BindingType::Storage;
if (!image_id) {
if (instance.IsNullDescriptorSupported()) {
image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
} else {
auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID);
image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view,
vk::ImageLayout::eGeneral);
}
} else {
if (auto& old_image = texture_cache.GetImage(image_id);
old_image.binding.needs_rebind) {
old_image.binding.Reset(); // clean up previous image binding state
image_id = texture_cache.FindImage(desc);
}
bound_images.emplace_back(image_id);
auto& image = texture_cache.GetImage(image_id);
auto& image_view = texture_cache.FindTexture(image_id, desc.view_info);
if (image.binding.force_general || image.binding.is_target) {
image.Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits2::eShaderRead |
(image.info.IsDepthStencil()
? vk::AccessFlagBits2::eDepthStencilAttachmentWrite
: vk::AccessFlagBits2::eColorAttachmentWrite),
{});
} else {
if (is_storage) {
image.Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits2::eShaderRead |
vk::AccessFlagBits2::eShaderWrite,
desc.view_info.range);
} else {
const auto new_layout = image.info.IsDepthStencil()
? vk::ImageLayout::eDepthStencilReadOnlyOptimal
: vk::ImageLayout::eShaderReadOnlyOptimal;
image.Transit(new_layout, vk::AccessFlagBits2::eShaderRead,
desc.view_info.range);
}
}
image.usage.storage |= is_storage;
image.usage.texture |= !is_storage;
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
image.last_state.layout);
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType =
is_storage ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage,
.pImageInfo = &image_infos.back(),
});
}
for (const auto& sampler : stage.samplers) {
auto ssharp = sampler.GetSharp(stage);
if (sampler.disable_aniso) {
const auto& tsharp = stage.images[sampler.associated_image].GetSharp(stage);
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One);
}
}
const auto vk_sampler = texture_cache.GetSampler(ssharp);
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampler,
.pImageInfo = &image_infos.back(),
});
}
}
void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& state) {
int cb_index = 0;
for (auto& [image_id, desc] : cb_descs) {
if (auto& old_img = texture_cache.GetImage(image_id); old_img.binding.needs_rebind) {
auto& view = texture_cache.FindRenderTarget(desc);
ASSERT(view.image_id != image_id);
image_id = bound_images.emplace_back(view.image_id);
auto& image = texture_cache.GetImage(view.image_id);
state.color_attachments[cb_index].imageView = *view.image_view;
state.color_attachments[cb_index].imageLayout = image.last_state.layout;
state.color_images[cb_index] = image.image;
const auto mip = view.info.range.base.level;
state.width = std::min<u32>(state.width, std::max(image.info.size.width >> mip, 1u));
state.height = std::min<u32>(state.height, std::max(image.info.size.height >> mip, 1u));
ASSERT(old_img.info.size.width == state.width);
ASSERT(old_img.info.size.height == state.height);
}
auto& image = texture_cache.GetImage(image_id);
if (image.binding.force_general) {
image.Transit(
vk::ImageLayout::eGeneral,
vk::AccessFlagBits2::eColorAttachmentWrite | vk::AccessFlagBits2::eShaderRead, {});
} else {
image.Transit(vk::ImageLayout::eColorAttachmentOptimal,
vk::AccessFlagBits2::eColorAttachmentWrite |
vk::AccessFlagBits2::eColorAttachmentRead,
desc.view_info.range);
}
image.usage.render_target = 1u;
state.color_attachments[cb_index].imageLayout = image.last_state.layout;
++cb_index;
}
if (db_desc) {
const auto& image_id = std::get<0>(*db_desc);
const auto& desc = std::get<1>(*db_desc);
auto& image = texture_cache.GetImage(image_id);
ASSERT(image.binding.needs_rebind == 0);
const bool has_stencil = image.usage.stencil;
if (has_stencil) {
image.aspect_mask |= vk::ImageAspectFlagBits::eStencil;
}
if (image.binding.force_general) {
image.Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits2::eDepthStencilAttachmentWrite |
vk::AccessFlagBits2::eShaderRead,
{});
} else {
const auto new_layout = desc.view_info.is_storage
? has_stencil
? vk::ImageLayout::eDepthStencilAttachmentOptimal
: vk::ImageLayout::eDepthAttachmentOptimal
: has_stencil ? vk::ImageLayout::eDepthStencilReadOnlyOptimal
: vk::ImageLayout::eDepthReadOnlyOptimal;
image.Transit(new_layout,
vk::AccessFlagBits2::eDepthStencilAttachmentWrite |
vk::AccessFlagBits2::eDepthStencilAttachmentRead,
desc.view_info.range);
}
state.depth_attachment.imageLayout = image.last_state.layout;
image.usage.depth_target = true;
image.usage.stencil = has_stencil;
}
scheduler.BeginRendering(state); scheduler.BeginRendering(state);
} }
@ -328,10 +729,12 @@ void Rasterizer::Resolve() {
// Read from MRT0, average all samples, and write to MRT1, which is one-sample // Read from MRT0, average all samples, and write to MRT1, which is one-sample
const auto& mrt0_hint = liverpool->last_cb_extent[0]; const auto& mrt0_hint = liverpool->last_cb_extent[0];
const auto& mrt1_hint = liverpool->last_cb_extent[1]; const auto& mrt1_hint = liverpool->last_cb_extent[1];
VideoCore::ImageInfo mrt0_info{liverpool->regs.color_buffers[0], mrt0_hint}; VideoCore::TextureCache::RenderTargetDesc mrt0_desc{liverpool->regs.color_buffers[0],
VideoCore::ImageInfo mrt1_info{liverpool->regs.color_buffers[1], mrt1_hint}; mrt0_hint};
auto& mrt0_image = texture_cache.GetImage(texture_cache.FindImage(mrt0_info)); VideoCore::TextureCache::RenderTargetDesc mrt1_desc{liverpool->regs.color_buffers[1],
auto& mrt1_image = texture_cache.GetImage(texture_cache.FindImage(mrt1_info)); mrt1_hint};
auto& mrt0_image = texture_cache.GetImage(texture_cache.FindImage(mrt0_desc));
auto& mrt1_image = texture_cache.GetImage(texture_cache.FindImage(mrt1_desc));
VideoCore::SubresourceRange mrt0_range; VideoCore::SubresourceRange mrt0_range;
mrt0_range.base.layer = liverpool->regs.color_buffers[0].view.slice_start; mrt0_range.base.layer = liverpool->regs.color_buffers[0].view.slice_start;

View file

@ -19,6 +19,7 @@ class MemoryManager;
namespace Vulkan { namespace Vulkan {
class Scheduler; class Scheduler;
class RenderState;
class GraphicsPipeline; class GraphicsPipeline;
class Rasterizer { class Rasterizer {
@ -54,7 +55,8 @@ public:
void Finish(); void Finish();
private: private:
void BeginRendering(const GraphicsPipeline& pipeline); RenderState PrepareRenderState(u32 mrt_mask);
void BeginRendering(const GraphicsPipeline& pipeline, RenderState& state);
void Resolve(); void Resolve();
void UpdateDynamicState(const GraphicsPipeline& pipeline); void UpdateDynamicState(const GraphicsPipeline& pipeline);
@ -63,6 +65,21 @@ private:
bool FilterDraw(); bool FilterDraw();
void BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding,
Shader::PushData& push_data, Pipeline::DescriptorWrites& set_writes,
Pipeline::BufferBarriers& buffer_barriers);
void BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding,
Pipeline::DescriptorWrites& set_writes);
bool BindResources(const Pipeline* pipeline);
void ResetBindings() {
for (auto& image_id : bound_images) {
texture_cache.GetImage(image_id).binding.Reset();
}
bound_images.clear();
}
private: private:
const Instance& instance; const Instance& instance;
Scheduler& scheduler; Scheduler& scheduler;
@ -72,6 +89,25 @@ private:
AmdGpu::Liverpool* liverpool; AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory; Core::MemoryManager* memory;
PipelineCache pipeline_cache; PipelineCache pipeline_cache;
boost::container::static_vector<
std::pair<VideoCore::ImageId, VideoCore::TextureCache::RenderTargetDesc>, 8>
cb_descs;
std::optional<std::pair<VideoCore::ImageId, VideoCore::TextureCache::DepthTargetDesc>> db_desc;
boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
boost::container::static_vector<vk::BufferView, 8> buffer_views;
boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
boost::container::static_vector<VideoCore::ImageId, 64> bound_images;
Pipeline::DescriptorWrites set_writes;
Pipeline::BufferBarriers buffer_barriers;
using BufferBindingInfo = std::pair<VideoCore::BufferId, AmdGpu::Buffer>;
boost::container::static_vector<BufferBindingInfo, 32> buffer_bindings;
using TexBufferBindingInfo = std::pair<VideoCore::BufferId, AmdGpu::Buffer>;
boost::container::static_vector<TexBufferBindingInfo, 32> texbuffer_bindings;
using ImageBindingInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::TextureDesc>;
boost::container::static_vector<ImageBindingInfo, 32> image_bindings;
}; };
} // namespace Vulkan } // namespace Vulkan

View file

@ -61,6 +61,15 @@ bool ImageInfo::IsDepthStencil() const {
} }
} }
bool ImageInfo::HasStencil() const {
if (pixel_format == vk::Format::eD32SfloatS8Uint ||
pixel_format == vk::Format::eD24UnormS8Uint ||
pixel_format == vk::Format::eD16UnormS8Uint) {
return true;
}
return false;
}
static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) { static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eTransferDst |
@ -143,14 +152,17 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
// the texture cache should re-create the resource with the usage requested // the texture cache should re-create the resource with the usage requested
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |
vk::ImageCreateFlagBits::eExtendedUsage}; vk::ImageCreateFlagBits::eExtendedUsage};
if (info.props.is_cube || (info.type == vk::ImageType::e2D && info.resources.layers >= 6)) { const bool can_be_cube = (info.type == vk::ImageType::e2D) &&
(info.resources.layers % 6 == 0) &&
(info.size.width == info.size.height);
if (info.props.is_cube || can_be_cube) {
flags |= vk::ImageCreateFlagBits::eCubeCompatible; flags |= vk::ImageCreateFlagBits::eCubeCompatible;
} else if (info.props.is_volume) { } else if (info.props.is_volume) {
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
} }
usage = ImageUsageFlags(info); usage_flags = ImageUsageFlags(info);
format_features = FormatFeatureFlags(usage); format_features = FormatFeatureFlags(usage_flags);
switch (info.pixel_format) { switch (info.pixel_format) {
case vk::Format::eD16Unorm: case vk::Format::eD16Unorm:
@ -170,7 +182,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
constexpr auto tiling = vk::ImageTiling::eOptimal; constexpr auto tiling = vk::ImageTiling::eOptimal;
const auto supported_format = instance->GetSupportedFormat(info.pixel_format, format_features); const auto supported_format = instance->GetSupportedFormat(info.pixel_format, format_features);
const auto properties = instance->GetPhysicalDevice().getImageFormatProperties( const auto properties = instance->GetPhysicalDevice().getImageFormatProperties(
supported_format, info.type, tiling, usage, flags); supported_format, info.type, tiling, usage_flags, flags);
const auto supported_samples = properties.result == vk::Result::eSuccess const auto supported_samples = properties.result == vk::Result::eSuccess
? properties.value.sampleCounts ? properties.value.sampleCounts
: vk::SampleCountFlagBits::e1; : vk::SampleCountFlagBits::e1;
@ -188,7 +200,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
.arrayLayers = static_cast<u32>(info.resources.layers), .arrayLayers = static_cast<u32>(info.resources.layers),
.samples = LiverpoolToVK::NumSamples(info.num_samples, supported_samples), .samples = LiverpoolToVK::NumSamples(info.num_samples, supported_samples),
.tiling = tiling, .tiling = tiling,
.usage = usage, .usage = usage_flags,
.initialLayout = vk::ImageLayout::eUndefined, .initialLayout = vk::ImageLayout::eUndefined,
}; };

View file

@ -30,8 +30,6 @@ enum ImageFlagBits : u32 {
Registered = 1 << 6, ///< True when the image is registered Registered = 1 << 6, ///< True when the image is registered
Picked = 1 << 7, ///< Temporary flag to mark the image as picked Picked = 1 << 7, ///< Temporary flag to mark the image as picked
MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered
Bound = 1 << 9, ///< True when the image is bound to a descriptor set
NeedsRebind = 1 << 10, ///< True when the image needs to be rebound
}; };
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
@ -113,7 +111,15 @@ struct Image {
std::vector<ImageViewId> image_view_ids; std::vector<ImageViewId> image_view_ids;
// Resource state tracking // Resource state tracking
vk::ImageUsageFlags usage; struct {
u32 texture : 1;
u32 storage : 1;
u32 render_target : 1;
u32 depth_target : 1;
u32 stencil : 1;
u32 vo_surface : 1;
} usage{};
vk::ImageUsageFlags usage_flags;
vk::FormatFeatureFlags2 format_features; vk::FormatFeatureFlags2 format_features;
struct State { struct State {
vk::Flags<vk::PipelineStageFlagBits2> pl_stage = vk::PipelineStageFlagBits2::eAllCommands; vk::Flags<vk::PipelineStageFlagBits2> pl_stage = vk::PipelineStageFlagBits2::eAllCommands;
@ -124,6 +130,22 @@ struct Image {
std::vector<State> subresource_states{}; std::vector<State> subresource_states{};
boost::container::small_vector<u64, 14> mip_hashes{}; boost::container::small_vector<u64, 14> mip_hashes{};
u64 tick_accessed_last{0}; u64 tick_accessed_last{0};
struct {
union {
struct {
u32 is_bound : 1; // the image is bound to a descriptor set
u32 is_target : 1; // the image is bound as color/depth target
u32 needs_rebind : 1; // the image needs to be rebound
u32 force_general : 1; // the image needs to be used in general layout
};
u32 raw{};
};
void Reset() {
raw = 0u;
}
} binding{};
}; };
} // namespace VideoCore } // namespace VideoCore

View file

@ -245,7 +245,6 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group,
size.width = attrib.width; size.width = attrib.width;
size.height = attrib.height; size.height = attrib.height;
pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) & (~127); pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) & (~127);
usage.vo_buffer = true;
num_bits = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float ? 32 : 64; num_bits = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float ? 32 : 64;
ASSERT(num_bits == 32); ASSERT(num_bits == 32);
@ -277,7 +276,6 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
resources.layers = buffer.NumSlices(); resources.layers = buffer.NumSlices();
meta_info.cmask_addr = buffer.info.fast_clear ? buffer.CmaskAddress() : 0; meta_info.cmask_addr = buffer.info.fast_clear ? buffer.CmaskAddress() : 0;
meta_info.fmask_addr = buffer.info.compression ? buffer.FmaskAddress() : 0; meta_info.fmask_addr = buffer.info.compression ? buffer.FmaskAddress() : 0;
usage.render_target = true;
guest_address = buffer.Address(); guest_address = buffer.Address();
const auto color_slice_sz = buffer.GetColorSliceSize(); const auto color_slice_sz = buffer.GetColorSliceSize();
@ -299,9 +297,6 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
pitch = buffer.Pitch(); pitch = buffer.Pitch();
resources.layers = num_slices; resources.layers = num_slices;
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
usage.depth_target = true;
usage.stencil =
buffer.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid;
guest_address = buffer.Address(); guest_address = buffer.Address();
const auto depth_slice_sz = buffer.GetDepthSliceSize(); const auto depth_slice_sz = buffer.GetDepthSliceSize();
@ -330,7 +325,6 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
resources.layers = image.NumLayers(desc.is_array); resources.layers = image.NumLayers(desc.is_array);
num_samples = image.NumSamples(); num_samples = image.NumSamples();
num_bits = NumBits(image.GetDataFmt()); num_bits = NumBits(image.GetDataFmt());
usage.texture = true;
guest_address = image.Address(); guest_address = image.Address();
@ -392,7 +386,6 @@ void ImageInfo::UpdateSize() {
} }
} }
mip_info.size *= mip_d; mip_info.size *= mip_d;
mip_info.offset = guest_size_bytes; mip_info.offset = guest_size_bytes;
mips_layout.emplace_back(mip_info); mips_layout.emplace_back(mip_info);
guest_size_bytes += mip_info.size; guest_size_bytes += mip_info.size;
@ -400,79 +393,87 @@ void ImageInfo::UpdateSize() {
guest_size_bytes *= resources.layers; guest_size_bytes *= resources.layers;
} }
bool ImageInfo::IsMipOf(const ImageInfo& info) const { int ImageInfo::IsMipOf(const ImageInfo& info) const {
if (!IsCompatible(info)) { if (!IsCompatible(info)) {
return false; return -1;
}
if (IsTilingCompatible(info.tiling_idx, tiling_idx)) {
return -1;
} }
// Currently we expect only on level to be copied. // Currently we expect only on level to be copied.
if (resources.levels != 1) { if (resources.levels != 1) {
return false; return -1;
} }
const int mip = info.resources.levels - resources.levels; if (info.mips_layout.empty()) {
if (mip < 1) { UNREACHABLE();
return false;
} }
// Find mip
auto mip = -1;
for (auto m = 0; m < info.mips_layout.size(); ++m) {
if (guest_address == (info.guest_address + info.mips_layout[m].offset)) {
mip = m;
break;
}
}
if (mip < 0) {
return -1;
}
ASSERT(mip != 0);
const auto mip_w = std::max(info.size.width >> mip, 1u); const auto mip_w = std::max(info.size.width >> mip, 1u);
const auto mip_h = std::max(info.size.height >> mip, 1u); const auto mip_h = std::max(info.size.height >> mip, 1u);
if ((size.width != mip_w) || (size.height != mip_h)) { if ((size.width != mip_w) || (size.height != mip_h)) {
return false; return -1;
} }
const auto mip_d = std::max(info.size.depth >> mip, 1u); const auto mip_d = std::max(info.size.depth >> mip, 1u);
if (info.type == vk::ImageType::e3D && type == vk::ImageType::e2D) { if (info.type == vk::ImageType::e3D && type == vk::ImageType::e2D) {
// In case of 2D array to 3D copy, make sure we have proper number of layers. // In case of 2D array to 3D copy, make sure we have proper number of layers.
if (resources.layers != mip_d) { if (resources.layers != mip_d) {
return false; return -1;
} }
} else { } else {
if (type != info.type) { if (type != info.type) {
return false; return -1;
} }
} }
// Check if the mip has correct size. return mip;
if (info.mips_layout.size() <= mip || info.mips_layout[mip].size != guest_size_bytes) {
return false;
}
// Ensure that address matches too.
if ((info.guest_address + info.mips_layout[mip].offset) != guest_address) {
return false;
}
return true;
} }
bool ImageInfo::IsSliceOf(const ImageInfo& info) const { int ImageInfo::IsSliceOf(const ImageInfo& info) const {
if (!IsCompatible(info)) { if (!IsCompatible(info)) {
return false; return -1;
} }
// Array slices should be of the same type. // Array slices should be of the same type.
if (type != info.type) { if (type != info.type) {
return false; return -1;
} }
// 2D dimensions of both images should be the same. // 2D dimensions of both images should be the same.
if ((size.width != info.size.width) || (size.height != info.size.height)) { if ((size.width != info.size.width) || (size.height != info.size.height)) {
return false; return -1;
} }
// Check for size alignment. // Check for size alignment.
const bool slice_size = info.guest_size_bytes / info.resources.layers; const bool slice_size = info.guest_size_bytes / info.resources.layers;
if (guest_size_bytes % slice_size != 0) { if (guest_size_bytes % slice_size != 0) {
return false; return -1;
} }
// Ensure that address is aligned too. // Ensure that address is aligned too.
if (((info.guest_address - guest_address) % guest_size_bytes) != 0) { const auto addr_diff = guest_address - info.guest_address;
return false; if ((addr_diff % guest_size_bytes) != 0) {
return -1;
} }
return true; return addr_diff / guest_size_bytes;
} }
} // namespace VideoCore } // namespace VideoCore

View file

@ -28,14 +28,28 @@ struct ImageInfo {
bool IsBlockCoded() const; bool IsBlockCoded() const;
bool IsPacked() const; bool IsPacked() const;
bool IsDepthStencil() const; bool IsDepthStencil() const;
bool HasStencil() const;
bool IsMipOf(const ImageInfo& info) const; int IsMipOf(const ImageInfo& info) const;
bool IsSliceOf(const ImageInfo& info) const; int IsSliceOf(const ImageInfo& info) const;
/// Verifies if images are compatible for subresource merging. /// Verifies if images are compatible for subresource merging.
bool IsCompatible(const ImageInfo& info) const { bool IsCompatible(const ImageInfo& info) const {
return (pixel_format == info.pixel_format && tiling_idx == info.tiling_idx && return (pixel_format == info.pixel_format && num_samples == info.num_samples &&
num_samples == info.num_samples && num_bits == info.num_bits); num_bits == info.num_bits);
}
bool IsTilingCompatible(u32 lhs, u32 rhs) const {
if (lhs == rhs) {
return true;
}
if (lhs == 0x0e && rhs == 0x0d) {
return true;
}
if (lhs == 0x0d && rhs == 0x0e) {
return true;
}
return false;
} }
void UpdateSize(); void UpdateSize();
@ -46,15 +60,6 @@ struct ImageInfo {
VAddr htile_addr; VAddr htile_addr;
} meta_info{}; } meta_info{};
struct {
u32 texture : 1;
u32 storage : 1;
u32 render_target : 1;
u32 depth_target : 1;
u32 stencil : 1;
u32 vo_buffer : 1;
} usage{}; // Usage data tracked during image lifetime
struct { struct {
u32 is_cube : 1; u32 is_cube : 1;
u32 is_volume : 1; u32 is_volume : 1;
@ -81,6 +86,9 @@ struct ImageInfo {
VAddr guest_address{0}; VAddr guest_address{0};
u32 guest_size_bytes{0}; u32 guest_size_bytes{0};
u32 tiling_idx{0}; // TODO: merge with existing! u32 tiling_idx{0}; // TODO: merge with existing!
VAddr stencil_addr{0};
u32 stencil_size{0};
}; };
} // namespace VideoCore } // namespace VideoCore

View file

@ -149,7 +149,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer,
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image,
ImageId image_id_) ImageId image_id_)
: image_id{image_id_}, info{info_} { : image_id{image_id_}, info{info_} {
vk::ImageViewUsageCreateInfo usage_ci{.usage = image.usage}; vk::ImageViewUsageCreateInfo usage_ci{.usage = image.usage_flags};
if (!info.is_storage) { if (!info.is_storage) {
usage_ci.usage &= ~vk::ImageUsageFlagBits::eStorage; usage_ci.usage &= ~vk::ImageUsageFlagBits::eStorage;
} }

View file

@ -77,84 +77,149 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
} }
} }
ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, ImageId cache_image_id) { ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, BindingType binding,
const auto& cache_info = slot_images[cache_image_id].info; ImageId cache_image_id) {
const auto& cache_image = slot_images[cache_image_id];
const bool was_bound_as_texture = if (!cache_image.info.IsDepthStencil() && !requested_info.IsDepthStencil()) {
!cache_info.usage.depth_target && (cache_info.usage.texture || cache_info.usage.storage); return {};
if (requested_info.usage.depth_target && was_bound_as_texture) { }
auto new_image_id = slot_images.insert(instance, scheduler, requested_info);
const bool stencil_match = requested_info.HasStencil() == cache_image.info.HasStencil();
const bool bpp_match = requested_info.num_bits == cache_image.info.num_bits;
// If an image in the cache has less slices we need to expand it
bool recreate = cache_image.info.resources < requested_info.resources;
switch (binding) {
case BindingType::Texture:
// The guest requires a depth sampled texture, but cache can offer only Rxf. Need to
// recreate the image.
recreate |= requested_info.IsDepthStencil() && !cache_image.info.IsDepthStencil();
break;
case BindingType::Storage:
// If the guest is going to use previously created depth as storage, the image needs to be
// recreated. (TODO: Probably a case with linear rgba8 aliasing is legit)
recreate |= cache_image.info.IsDepthStencil();
break;
case BindingType::RenderTarget:
// Render target can have only Rxf format. If the cache contains only Dx[S8] we need to
// re-create the image.
ASSERT(!requested_info.IsDepthStencil());
recreate |= cache_image.info.IsDepthStencil();
break;
case BindingType::DepthTarget:
// The guest has requested previously allocated texture to be bound as a depth target.
// In this case we need to convert Rx float to a Dx[S8] as requested
recreate |= !cache_image.info.IsDepthStencil();
// The guest is trying to bind a depth target and cache has it. Need to be sure that aspects
// and bpp match
recreate |= cache_image.info.IsDepthStencil() && !(stencil_match && bpp_match);
break;
default:
break;
}
if (recreate) {
auto new_info{requested_info};
new_info.resources = std::max(requested_info.resources, cache_image.info.resources);
new_info.UpdateSize();
const auto new_image_id = slot_images.insert(instance, scheduler, new_info);
RegisterImage(new_image_id); RegisterImage(new_image_id);
// Inherit image usage
auto& new_image = GetImage(new_image_id);
new_image.usage = cache_image.usage;
// TODO: perform a depth copy here // TODO: perform a depth copy here
FreeImage(cache_image_id); FreeImage(cache_image_id);
return new_image_id; return new_image_id;
} }
const bool should_bind_as_texture = // Will be handled by view
!requested_info.usage.depth_target && return cache_image_id;
(requested_info.usage.texture || requested_info.usage.storage);
if (cache_info.usage.depth_target && should_bind_as_texture) {
if (cache_info.resources == requested_info.resources) {
return cache_image_id;
} else {
// UNREACHABLE();
}
}
return {};
} }
ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_image_id, std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& image_info,
ImageId merged_image_id) { BindingType binding,
ImageId cache_image_id,
ImageId merged_image_id) {
auto& tex_cache_image = slot_images[cache_image_id]; auto& tex_cache_image = slot_images[cache_image_id];
// We can assume it is safe to delete the image if it wasn't accessed in some number of frames.
const bool safe_to_delete =
scheduler.CurrentTick() - tex_cache_image.tick_accessed_last > NumFramesBeforeRemoval;
if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address
if (image_info.size != tex_cache_image.info.size) { if (image_info.size != tex_cache_image.info.size) {
// Very likely this kind of overlap is caused by allocation from a pool. We can assume // Very likely this kind of overlap is caused by allocation from a pool.
// it is safe to delete the image if it wasn't accessed in some amount of frames. if (safe_to_delete) {
if (scheduler.CurrentTick() - tex_cache_image.tick_accessed_last >
NumFramesBeforeRemoval) {
FreeImage(cache_image_id); FreeImage(cache_image_id);
} }
return merged_image_id; return {merged_image_id, -1, -1};
} }
if (auto depth_image_id = ResolveDepthOverlap(image_info, cache_image_id)) { if (const auto depth_image_id = ResolveDepthOverlap(image_info, binding, cache_image_id)) {
return depth_image_id; return {depth_image_id, -1, -1};
} }
if (image_info.pixel_format != tex_cache_image.info.pixel_format || if (image_info.pixel_format != tex_cache_image.info.pixel_format ||
image_info.guest_size_bytes <= tex_cache_image.info.guest_size_bytes) { image_info.guest_size_bytes <= tex_cache_image.info.guest_size_bytes) {
auto result_id = merged_image_id ? merged_image_id : cache_image_id; auto result_id = merged_image_id ? merged_image_id : cache_image_id;
const auto& result_image = slot_images[result_id]; const auto& result_image = slot_images[result_id];
return IsVulkanFormatCompatible(image_info.pixel_format, result_image.info.pixel_format) return {
? result_id IsVulkanFormatCompatible(image_info.pixel_format, result_image.info.pixel_format)
: ImageId{}; ? result_id
: ImageId{},
-1, -1};
} }
ImageId new_image_id{}; ImageId new_image_id{};
if (image_info.type == tex_cache_image.info.type) { if (image_info.type == tex_cache_image.info.type) {
ASSERT(image_info.resources > tex_cache_image.info.resources);
new_image_id = ExpandImage(image_info, cache_image_id); new_image_id = ExpandImage(image_info, cache_image_id);
} else { } else {
UNREACHABLE(); UNREACHABLE();
} }
return new_image_id; return {new_image_id, -1, -1};
} }
// Right overlap, the image requested is a possible subresource of the image from cache. // Right overlap, the image requested is a possible subresource of the image from cache.
if (image_info.guest_address > tex_cache_image.info.guest_address) { if (image_info.guest_address > tex_cache_image.info.guest_address) {
// Should be handled by view. No additional actions needed. if (auto mip = image_info.IsMipOf(tex_cache_image.info); mip >= 0) {
return {cache_image_id, mip, -1};
}
if (auto slice = image_info.IsSliceOf(tex_cache_image.info); slice >= 0) {
return {cache_image_id, -1, slice};
}
// TODO: slice and mip
if (safe_to_delete) {
FreeImage(cache_image_id);
}
return {{}, -1, -1};
} else { } else {
// Left overlap, the image from cache is a possible subresource of the image requested // Left overlap, the image from cache is a possible subresource of the image requested
if (!merged_image_id) { if (!merged_image_id) {
// We need to have a larger, already allocated image to copy this one into // We need to have a larger, already allocated image to copy this one into
return {}; return {{}, -1, -1};
} }
if (tex_cache_image.info.IsMipOf(image_info)) { if (auto mip = tex_cache_image.info.IsMipOf(image_info); mip >= 0) {
if (tex_cache_image.binding.is_target) {
// We have a larger image created and a separate one, representing a subres of it,
// bound as render target. In this case we need to rebind render target.
tex_cache_image.binding.needs_rebind = 1u;
GetImage(merged_image_id).binding.is_target = 1u;
FreeImage(cache_image_id);
return {merged_image_id, -1, -1};
}
tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal, tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal,
vk::AccessFlagBits2::eTransferRead, {}); vk::AccessFlagBits2::eTransferRead, {});
@ -162,13 +227,13 @@ ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_
ASSERT(num_mips_to_copy == 1); ASSERT(num_mips_to_copy == 1);
auto& merged_image = slot_images[merged_image_id]; auto& merged_image = slot_images[merged_image_id];
merged_image.CopyMip(tex_cache_image, image_info.resources.levels - 1); merged_image.CopyMip(tex_cache_image, mip);
FreeImage(cache_image_id); FreeImage(cache_image_id);
} }
} }
return merged_image_id; return {merged_image_id, -1, -1};
} }
ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
@ -181,8 +246,8 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
new_image.CopyImage(src_image); new_image.CopyImage(src_image);
if (True(src_image.flags & ImageFlagBits::Bound)) { if (src_image.binding.is_bound || src_image.binding.is_target) {
src_image.flags |= ImageFlagBits::NeedsRebind; src_image.binding.needs_rebind = 1u;
} }
FreeImage(image_id); FreeImage(image_id);
@ -192,9 +257,11 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
return new_image_id; return new_image_id;
} }
ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) { ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
const auto& info = desc.info;
if (info.guest_address == 0) [[unlikely]] { if (info.guest_address == 0) [[unlikely]] {
return NULL_IMAGE_VIEW_ID; return NULL_IMAGE_ID;
} }
std::scoped_lock lock{mutex}; std::scoped_lock lock{mutex};
@ -231,10 +298,16 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) {
} }
// Try to resolve overlaps (if any) // Try to resolve overlaps (if any)
int view_mip{-1};
int view_slice{-1};
if (!image_id) { if (!image_id) {
for (const auto& cache_id : image_ids) { for (const auto& cache_id : image_ids) {
view_mip = -1;
view_slice = -1;
const auto& merged_info = image_id ? slot_images[image_id].info : info; const auto& merged_info = image_id ? slot_images[image_id].info : info;
image_id = ResolveOverlap(merged_info, cache_id, image_id); std::tie(image_id, view_mip, view_slice) =
ResolveOverlap(merged_info, desc.type, cache_id, image_id);
} }
} }
@ -254,6 +327,10 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) {
RegisterImage(image_id); RegisterImage(image_id);
} }
if (view_mip > 0) {
desc.view_info.range.base.level = view_mip;
}
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
image.tick_accessed_last = scheduler.CurrentTick(); image.tick_accessed_last = scheduler.CurrentTick();
@ -275,100 +352,58 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
ImageView& TextureCache::FindTexture(ImageId image_id, const ImageViewInfo& view_info) { ImageView& TextureCache::FindTexture(ImageId image_id, const ImageViewInfo& view_info) {
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
UpdateImage(image_id); UpdateImage(image_id);
auto& usage = image.info.usage;
if (view_info.is_storage) {
image.Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite,
view_info.range);
usage.storage = true;
} else {
const auto new_layout = image.info.IsDepthStencil()
? vk::ImageLayout::eDepthStencilReadOnlyOptimal
: vk::ImageLayout::eShaderReadOnlyOptimal;
image.Transit(new_layout, vk::AccessFlagBits2::eShaderRead, view_info.range);
usage.texture = true;
}
return RegisterImageView(image_id, view_info); return RegisterImageView(image_id, view_info);
} }
ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) {
const ImageViewInfo& view_info) { const ImageId image_id = FindImage(desc);
const ImageId image_id = FindImage(image_info);
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
image.flags |= ImageFlagBits::GpuModified; image.flags |= ImageFlagBits::GpuModified;
image.usage.render_target = 1u;
UpdateImage(image_id); UpdateImage(image_id);
image.Transit(vk::ImageLayout::eColorAttachmentOptimal,
vk::AccessFlagBits2::eColorAttachmentWrite |
vk::AccessFlagBits2::eColorAttachmentRead,
view_info.range);
// Register meta data for this color buffer // Register meta data for this color buffer
if (!(image.flags & ImageFlagBits::MetaRegistered)) { if (!(image.flags & ImageFlagBits::MetaRegistered)) {
if (image_info.meta_info.cmask_addr) { if (desc.info.meta_info.cmask_addr) {
surface_metas.emplace( surface_metas.emplace(
image_info.meta_info.cmask_addr, desc.info.meta_info.cmask_addr,
MetaDataInfo{.type = MetaDataInfo::Type::CMask, .is_cleared = true}); MetaDataInfo{.type = MetaDataInfo::Type::CMask, .is_cleared = true});
image.info.meta_info.cmask_addr = image_info.meta_info.cmask_addr; image.info.meta_info.cmask_addr = desc.info.meta_info.cmask_addr;
image.flags |= ImageFlagBits::MetaRegistered; image.flags |= ImageFlagBits::MetaRegistered;
} }
if (image_info.meta_info.fmask_addr) { if (desc.info.meta_info.fmask_addr) {
surface_metas.emplace( surface_metas.emplace(
image_info.meta_info.fmask_addr, desc.info.meta_info.fmask_addr,
MetaDataInfo{.type = MetaDataInfo::Type::FMask, .is_cleared = true}); MetaDataInfo{.type = MetaDataInfo::Type::FMask, .is_cleared = true});
image.info.meta_info.fmask_addr = image_info.meta_info.fmask_addr; image.info.meta_info.fmask_addr = desc.info.meta_info.fmask_addr;
image.flags |= ImageFlagBits::MetaRegistered; image.flags |= ImageFlagBits::MetaRegistered;
} }
} }
// Update tracked image usage return RegisterImageView(image_id, desc.view_info);
image.info.usage.render_target = true;
return RegisterImageView(image_id, view_info);
} }
ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
const ImageViewInfo& view_info) { const ImageId image_id = FindImage(desc);
const ImageId image_id = FindImage(image_info);
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
image.flags |= ImageFlagBits::GpuModified; image.flags |= ImageFlagBits::GpuModified;
image.flags &= ~ImageFlagBits::Dirty; image.flags &= ~ImageFlagBits::Dirty;
image.aspect_mask = vk::ImageAspectFlagBits::eDepth; image.usage.depth_target = 1u;
image.usage.stencil = image.info.HasStencil();
const bool has_stencil = image_info.usage.stencil;
if (has_stencil) {
image.aspect_mask |= vk::ImageAspectFlagBits::eStencil;
}
const auto new_layout = view_info.is_storage
? has_stencil ? vk::ImageLayout::eDepthStencilAttachmentOptimal
: vk::ImageLayout::eDepthAttachmentOptimal
: has_stencil ? vk::ImageLayout::eDepthStencilReadOnlyOptimal
: vk::ImageLayout::eDepthReadOnlyOptimal;
image.Transit(new_layout,
vk::AccessFlagBits2::eDepthStencilAttachmentWrite |
vk::AccessFlagBits2::eDepthStencilAttachmentRead,
view_info.range);
// Register meta data for this depth buffer // Register meta data for this depth buffer
if (!(image.flags & ImageFlagBits::MetaRegistered)) { if (!(image.flags & ImageFlagBits::MetaRegistered)) {
if (image_info.meta_info.htile_addr) { if (desc.info.meta_info.htile_addr) {
surface_metas.emplace( surface_metas.emplace(
image_info.meta_info.htile_addr, desc.info.meta_info.htile_addr,
MetaDataInfo{.type = MetaDataInfo::Type::HTile, .is_cleared = true}); MetaDataInfo{.type = MetaDataInfo::Type::HTile, .is_cleared = true});
image.info.meta_info.htile_addr = image_info.meta_info.htile_addr; image.info.meta_info.htile_addr = desc.info.meta_info.htile_addr;
image.flags |= ImageFlagBits::MetaRegistered; image.flags |= ImageFlagBits::MetaRegistered;
} }
} }
// Update tracked image usage return RegisterImageView(image_id, desc.view_info);
image.info.usage.depth_target = true;
image.info.usage.stencil = has_stencil;
return RegisterImageView(image_id, view_info);
} }
void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) { void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) {
@ -472,7 +507,7 @@ void TextureCache::RegisterImage(ImageId image_id) {
void TextureCache::UnregisterImage(ImageId image_id) { void TextureCache::UnregisterImage(ImageId image_id) {
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
"Trying to unregister an already registered image"); "Trying to unregister an already unregistered image");
image.flags &= ~ImageFlagBits::Registered; image.flags &= ~ImageFlagBits::Registered;
ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) { ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) {
const auto page_it = page_table.find(page); const auto page_it = page_table.find(page);

View file

@ -43,8 +43,55 @@ class TextureCache {
using PageTable = MultiLevelPageTable<Traits>; using PageTable = MultiLevelPageTable<Traits>;
public: public:
explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, enum class BindingType : u32 {
BufferCache& buffer_cache, PageManager& tracker); Texture,
Storage,
RenderTarget,
DepthTarget,
VideoOut,
};
struct BaseDesc {
ImageInfo info;
ImageViewInfo view_info;
BindingType type{BindingType::Texture};
BaseDesc() = default;
BaseDesc(BindingType type_, ImageInfo info_, ImageViewInfo view_info_) noexcept
: info{std::move(info_)}, view_info{std::move(view_info_)}, type{type_} {}
};
struct TextureDesc : public BaseDesc {
TextureDesc() = default;
TextureDesc(const AmdGpu::Image& image, const Shader::ImageResource& desc)
: BaseDesc{desc.is_storage ? BindingType::Storage : BindingType::Texture,
ImageInfo{image, desc}, ImageViewInfo{image, desc}} {}
};
struct RenderTargetDesc : public BaseDesc {
RenderTargetDesc(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint = {})
: BaseDesc{BindingType::RenderTarget, ImageInfo{buffer, hint}, ImageViewInfo{buffer}} {}
};
struct DepthTargetDesc : public BaseDesc {
DepthTargetDesc(const AmdGpu::Liverpool::DepthBuffer& buffer,
const AmdGpu::Liverpool::DepthView& view,
const AmdGpu::Liverpool::DepthControl& ctl, VAddr htile_address,
const AmdGpu::Liverpool::CbDbExtent& hint = {})
: BaseDesc{BindingType::DepthTarget,
ImageInfo{buffer, view.NumSlices(), htile_address, hint},
ImageViewInfo{buffer, view, ctl}} {}
};
struct VideoOutDesc : public BaseDesc {
VideoOutDesc(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address)
: BaseDesc{BindingType::VideoOut, ImageInfo{group, cpu_address}, ImageViewInfo{}} {}
};
public:
TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
BufferCache& buffer_cache, PageManager& tracker);
~TextureCache(); ~TextureCache();
/// Invalidates any image in the logical page range. /// Invalidates any image in the logical page range.
@ -57,18 +104,16 @@ public:
void UnmapMemory(VAddr cpu_addr, size_t size); void UnmapMemory(VAddr cpu_addr, size_t size);
/// Retrieves the image handle of the image with the provided attributes. /// Retrieves the image handle of the image with the provided attributes.
[[nodiscard]] ImageId FindImage(const ImageInfo& info, FindFlags flags = {}); [[nodiscard]] ImageId FindImage(BaseDesc& desc, FindFlags flags = {});
/// Retrieves an image view with the properties of the specified image id. /// Retrieves an image view with the properties of the specified image id.
[[nodiscard]] ImageView& FindTexture(ImageId image_id, const ImageViewInfo& view_info); [[nodiscard]] ImageView& FindTexture(ImageId image_id, const ImageViewInfo& view_info);
/// Retrieves the render target with specified properties /// Retrieves the render target with specified properties
[[nodiscard]] ImageView& FindRenderTarget(const ImageInfo& image_info, [[nodiscard]] ImageView& FindRenderTarget(BaseDesc& desc);
const ImageViewInfo& view_info);
/// Retrieves the depth target with specified properties /// Retrieves the depth target with specified properties
[[nodiscard]] ImageView& FindDepthTarget(const ImageInfo& image_info, [[nodiscard]] ImageView& FindDepthTarget(BaseDesc& desc);
const ImageViewInfo& view_info);
/// Updates image contents if it was modified by CPU. /// Updates image contents if it was modified by CPU.
void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) { void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) {
@ -77,11 +122,13 @@ public:
RefreshImage(image, custom_scheduler); RefreshImage(image, custom_scheduler);
} }
[[nodiscard]] ImageId ResolveOverlap(const ImageInfo& info, ImageId cache_img_id, [[nodiscard]] std::tuple<ImageId, int, int> ResolveOverlap(const ImageInfo& info,
ImageId merged_image_id); BindingType binding,
ImageId cache_img_id,
ImageId merged_image_id);
/// Resolves depth overlap and either re-creates the image or returns existing one /// Resolves depth overlap and either re-creates the image or returns existing one
[[nodiscard]] ImageId ResolveDepthOverlap(const ImageInfo& requested_info, [[nodiscard]] ImageId ResolveDepthOverlap(const ImageInfo& requested_info, BindingType binding,
ImageId cache_img_id); ImageId cache_img_id);
[[nodiscard]] ImageId ExpandImage(const ImageInfo& info, ImageId image_id); [[nodiscard]] ImageId ExpandImage(const ImageInfo& info, ImageId image_id);