From 1675cffc09b117c707055d4dec6e34fc2fd7918c Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Sun, 23 Mar 2025 01:17:33 +0200 Subject: [PATCH] liverpool: fix dmadata packet handling --- src/video_core/amdgpu/liverpool.cpp | 26 +++--- src/video_core/buffer_cache/buffer_cache.cpp | 84 +++++++++++++++++++ src/video_core/buffer_cache/buffer_cache.h | 5 +- .../renderer_vulkan/vk_rasterizer.cpp | 4 + .../renderer_vulkan/vk_rasterizer.h | 1 + 5 files changed, 106 insertions(+), 14 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 9fd865cd7..23bec75f5 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -605,21 +605,21 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spansrc_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->InlineData(dma_data->dst_addr_lo, - dma_data->SrcAddress(), - dma_data->NumBytes(), true); + rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress(), + dma_data->NumBytes(), true, false); } else if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, sizeof(u32), false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - // LOG_WARNING(Render_Vulkan, "GDS memory read"); + rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->src_addr_lo, + dma_data->NumBytes(), false, true); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->InlineData(dma_data->DstAddress(), - dma_data->SrcAddress(), - dma_data->NumBytes(), false); + rasterizer->CopyBuffer(dma_data->DstAddress(), + dma_data->SrcAddress(), dma_data->NumBytes(), + false, false); } else { UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}", u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value())); @@ -769,20 +769,20 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, u32 vqid) { rasterizer->InlineData(dma_data->dst_addr_lo, &dma_data->data, sizeof(u32), true); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Gds) { - rasterizer->InlineData(dma_data->dst_addr_lo, dma_data->SrcAddress(), - dma_data->NumBytes(), true); + rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress(), + dma_data->NumBytes(), true, false); } else if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(), &dma_data->data, sizeof(u32), false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - // LOG_WARNING(Render_Vulkan, "GDS memory read"); + rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->src_addr_lo, + dma_data->NumBytes(), false, true); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { - rasterizer->InlineData(dma_data->DstAddress(), - dma_data->SrcAddress(), dma_data->NumBytes(), - false); + rasterizer->CopyBuffer(dma_data->DstAddress(), dma_data->SrcAddress(), + dma_data->NumBytes(), false, false); } else { UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}", u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value())); diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index cf6e5bdb2..981aa18ca 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -273,6 +273,90 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo }); } +void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { + if (!dst_gds && !IsRegionRegistered(dst, num_bytes)) { + if (!src_gds && !IsRegionRegistered(src, num_bytes)) { + // Both buffers were not transferred to GPU yet. Can safely copy in host memory. + memcpy(std::bit_cast(dst), std::bit_cast(src), num_bytes); + return; + } + // Without a readback there's nothing we can do with this + // Fallback to creating dst buffer on GPU to at least have this data there + } + auto& src_buffer = [&] -> const Buffer& { + if (src_gds) { + return gds_buffer; + } + const auto [buffer, offset] = ObtainBuffer(src, num_bytes, false); + return *buffer; + }(); + auto& dst_buffer = [&] -> const Buffer& { + if (dst_gds) { + return gds_buffer; + } + const auto [buffer, offset] = ObtainBuffer(dst, num_bytes, true); + return *buffer; + }(); + vk::BufferCopy region{ + .srcOffset = src_buffer.Offset(src), + .dstOffset = dst_buffer.Offset(dst), + .size = num_bytes, + }; + const vk::BufferMemoryBarrier2 buf_barriers_before[2] = { + { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = dst_buffer.Handle(), + .offset = dst_buffer.Offset(dst), + .size = num_bytes, + }, + { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eTransferRead, + .buffer = src_buffer.Handle(), + .offset = src_buffer.Offset(src), + .size = num_bytes, + }, + }; + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 2, + .pBufferMemoryBarriers = buf_barriers_before, + }); + cmdbuf.copyBuffer(src_buffer.Handle(), dst_buffer.Handle(), region); + const vk::BufferMemoryBarrier2 buf_barriers_after[2] = { + { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, + .buffer = dst_buffer.Handle(), + .offset = dst_buffer.Offset(dst), + .size = num_bytes, + }, + { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eTransferRead, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryWrite, + .buffer = src_buffer.Handle(), + .offset = src_buffer.Offset(src), + .size = num_bytes, + }, + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 2, + .pBufferMemoryBarriers = buf_barriers_after, + }); +} + std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written, bool is_texel_buffer, BufferId buffer_id) { // For small uniform buffers that have not been modified by gpu diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6e7d1a18c..1aa72f173 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -90,9 +90,12 @@ public: /// Bind host index buffer for the current draw. void BindIndexBuffer(u32 index_offset); - /// Writes a value to GPU buffer. + /// Writes a value from CPU to GPU buffer. void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); + /// Performs buffer to buffer data copy on the GPU. + void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); + /// Obtains a buffer for the specified region. [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, bool is_texel_buffer = false, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 568103b47..d4e2731ae 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -908,6 +908,10 @@ void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, boo buffer_cache.InlineData(address, value, num_bytes, is_gds); } +void Rasterizer::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) { + buffer_cache.CopyBuffer(dst, src, num_bytes, dst_gds, src_gds); +} + u32 Rasterizer::ReadDataFromGds(u32 gds_offset) { auto* gds_buf = buffer_cache.GetGdsBuffer(); u32 value; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 4c0652c4f..e6de35f5f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -54,6 +54,7 @@ public: bool from_guest = false); void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); + void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); u32 ReadDataFromGds(u32 gsd_offset); bool InvalidateMemory(VAddr addr, u64 size); bool ReadMemory(VAddr addr, u64 size);