video_core: Improve handling of image buffer aliases (#757)

* texture_cache: Use invalidate threshhold

* It's possible for shaders to bind huge buffers and only write to lower portion of it. This is a problem if upper parts of the buffer overlap with render targets. If the image is very far away from buffer base it's unlikely the shader will want to write it, so skip invalidation for it

* video_core: Allow using texture cache to validate texture buffers

* texture_cache: Use buffer cache in all cases for data source

* Allows to correctly handle compute written micro tiled textures

* texture_cache: Fix depth pitch

* kernel: Remove missed code

* clang format

* video_core: Adjust depth format

* buffer_cache: Do not cache buffer views

* thread_management: Do not call createMutex on unlock

* temp: Revert this when pr is done

* buffer_cache: Dont skip cpu uploads with image sync

* Sometimes image does not fully overlap with a region

* fix build

* video_core: Improve invalidate heuristic

* small fixes

* video_core: Hopefully fix some vertex explosions
This commit is contained in:
TheTurtle 2024-09-05 17:25:45 +03:00 committed by GitHub
parent 4e0dc91040
commit b08baaeb13
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 248 additions and 191 deletions

View file

@ -167,9 +167,6 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
}
}
if (desc.is_written) {
texture_cache.InvalidateMemory(address, size);
}
const u32 alignment = instance.TexelBufferMinAlignment();
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, desc.is_written, true);
@ -184,13 +181,15 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
}
buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written,
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
if (auto barrier =
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
: vk::AccessFlagBits2::eShaderRead,
vk::PipelineStageFlagBits2::eComputeShader)) {
buffer_barriers.emplace_back(*barrier);
}
if (desc.is_written) {
texture_cache.InvalidateMemory(address, size);
}
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
@ -206,7 +205,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
for (const auto& image_desc : info->images) {
const auto tsharp = image_desc.GetSharp(*info);
if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
VideoCore::ImageInfo image_info{tsharp};
VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth};
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
const auto& image = texture_cache.GetImage(image_view.image_id);
@ -252,10 +251,12 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
const auto cmdbuf = scheduler.CommandBuffer();
if (!buffer_barriers.empty()) {
auto dependencies = vk::DependencyInfo{
const auto dependencies = vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = u32(buffer_barriers.size()),
.pBufferMemoryBarriers = buffer_barriers.data(),
};
scheduler.EndRendering();
cmdbuf.pipelineBarrier2(dependencies);
}

View file

@ -405,15 +405,15 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
});
}
for (const auto& tex_buffer : stage->texture_buffers) {
const auto vsharp = tex_buffer.GetSharp(*stage);
for (const auto& desc : stage->texture_buffers) {
const auto vsharp = desc.GetSharp(*stage);
vk::BufferView& buffer_view = buffer_views.emplace_back(VK_NULL_HANDLE);
const u32 size = vsharp.GetSize();
if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid && size != 0) {
const VAddr address = vsharp.base_address;
const u32 alignment = instance.TexelBufferMinAlignment();
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, tex_buffer.is_written, true);
buffer_cache.ObtainBuffer(address, size, desc.is_written, true);
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
"Texel buffer stride must match format stride");
@ -423,22 +423,25 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
ASSERT(adjust % fmt_stride == 0);
push_data.AddOffset(binding, adjust / fmt_stride);
}
buffer_view = vk_buffer->View(offset_aligned, size + adjust, tex_buffer.is_written,
buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written,
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
const auto dst_access = tex_buffer.is_written ? vk::AccessFlagBits2::eShaderWrite
: vk::AccessFlagBits2::eShaderRead;
const auto dst_access = desc.is_written ? vk::AccessFlagBits2::eShaderWrite
: vk::AccessFlagBits2::eShaderRead;
if (auto barrier = vk_buffer->GetBarrier(
dst_access, vk::PipelineStageFlagBits2::eVertexShader)) {
buffer_barriers.emplace_back(*barrier);
}
if (desc.is_written) {
texture_cache.InvalidateMemory(address, size);
}
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer,
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer,
.pTexelBufferView = &buffer_view,
});
}
@ -497,10 +500,12 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
const auto cmdbuf = scheduler.CommandBuffer();
if (!buffer_barriers.empty()) {
auto dependencies = vk::DependencyInfo{
const auto dependencies = vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = u32(buffer_barriers.size()),
.pBufferMemoryBarriers = buffer_barriers.data(),
};
scheduler.EndRendering();
cmdbuf.pipelineBarrier2(dependencies);
}

View file

@ -297,6 +297,7 @@ bool Instance::CreateDevice() {
.shaderFloat16 = vk12_features.shaderFloat16,
.scalarBlockLayout = vk12_features.scalarBlockLayout,
.uniformBufferStandardLayout = vk12_features.uniformBufferStandardLayout,
.separateDepthStencilLayouts = vk12_features.separateDepthStencilLayouts,
.hostQueryReset = vk12_features.hostQueryReset,
.timelineSemaphore = vk12_features.timelineSemaphore,
},

View file

@ -42,6 +42,8 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback(
switch (static_cast<u32>(callback_data->messageIdNumber)) {
case 0x609a13b: // Vertex attribute at location not consumed by shader
case 0xc81ad50e:
case 0xb7c39078:
case 0x32868fde: // vkCreateBufferView(): pCreateInfo->range does not equal VK_WHOLE_SIZE
case 0x92d66fc1: // `pMultisampleState is NULL` for depth only passes (confirmed VL error)
return VK_FALSE;
default:

View file

@ -17,7 +17,7 @@ namespace Vulkan {
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
buffer_cache{instance, scheduler, liverpool_, page_manager},
buffer_cache{instance, scheduler, liverpool_, texture_cache, page_manager},
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
if (!Config::nullGpu()) {