From 31ac54258a2cea6d6f63c05a576baf4d4142ffa8 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 5 Jul 2025 17:52:22 +0200 Subject: [PATCH 1/7] Tracker locking --- src/video_core/buffer_cache/buffer_cache.cpp | 4 ++ src/video_core/buffer_cache/buffer_cache.h | 5 +-- src/video_core/buffer_cache/memory_tracker.h | 40 ++++++++++++++----- src/video_core/page_manager.cpp | 1 + .../renderer_vulkan/vk_rasterizer.h | 1 - 5 files changed, 38 insertions(+), 13 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index d55e05d1e..9096fa606 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -996,6 +996,10 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) { }); } +void BufferCache::SynchronizeBuffersForDma() { + +} + void BufferCache::MemoryBarrier() { // Vulkan doesn't know which buffer we access in a shader if we use // BufferDeviceAddress. We need a full memory barrier. diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 900a27aee..14fe957e0 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -5,7 +5,6 @@ #include #include -#include "common/div_ceil.h" #include "common/slot_vector.h" #include "common/types.h" #include "video_core/buffer_cache/buffer.h" @@ -156,8 +155,8 @@ public: /// Synchronizes all buffers in the specified range. void SynchronizeBuffersInRange(VAddr device_addr, u64 size); - /// Synchronizes all buffers neede for DMA. - void SynchronizeDmaBuffers(); + /// Synchronizes all buffers for DMA. + void SynchronizeBuffersForDma(); /// Record memory barrier. Used for buffers when accessed via BDA. void MemoryBarrier(); diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index ca87c7df0..5ac129967 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include "common/debug.h" @@ -24,8 +25,9 @@ public: ~MemoryTracker() = default; /// Returns true if a region has been modified from the CPU + template bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { - return IteratePages( + return IteratePages( query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { std::scoped_lock lk{manager->lock}; return manager->template IsRegionModified(offset, size); @@ -33,8 +35,9 @@ public: } /// Returns true if a region has been modified from the GPU + template bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { - return IteratePages( + return IteratePages( query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { std::scoped_lock lk{manager->lock}; return manager->template IsRegionModified(offset, size); @@ -42,8 +45,9 @@ public: } /// Mark region as CPU modified, notifying the device_tracker about this change + template void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { - IteratePages(dirty_cpu_addr, query_size, + IteratePages(dirty_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { std::scoped_lock lk{manager->lock}; manager->template ChangeRegionState( @@ -52,8 +56,9 @@ public: } /// Unmark region as modified from the host GPU + template void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { - IteratePages(dirty_cpu_addr, query_size, + IteratePages(dirty_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { std::scoped_lock lk{manager->lock}; manager->template ChangeRegionState( @@ -62,8 +67,9 @@ public: } /// Removes all protection from a page and ensures GPU data has been flushed if requested + template void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept { - IteratePages( + IteratePages( cpu_addr, size, [try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) { const bool should_flush = [&] { @@ -86,8 +92,9 @@ public: } /// Call 'func' for each CPU modified range and unmark those pages as CPU modified + template void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) { - IteratePages(query_cpu_range, query_size, + IteratePages(query_cpu_range, query_size, [&func, is_written](RegionManager* manager, u64 offset, size_t size) { std::scoped_lock lk{manager->lock}; manager->template ForEachModifiedRange( @@ -100,9 +107,9 @@ public: } /// Call 'func' for each GPU modified range and unmark those pages as GPU modified - template + template void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) { - IteratePages(query_cpu_range, query_size, + IteratePages(query_cpu_range, query_size, [&func](RegionManager* manager, u64 offset, size_t size) { std::scoped_lock lk{manager->lock}; manager->template ForEachModifiedRange( @@ -110,6 +117,17 @@ public: }); } + /// Lck the memory tracker. + void Lock() { + global_lock.lock(); + } + + /// Unlock the memory tracker. + void Unlock() { + global_lock.unlock(); + } + + private: /** * @brief IteratePages Iterates L2 word manager page table. @@ -118,9 +136,12 @@ private: * @param func Callback for each word manager. * @return */ - template + template bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { RENDERER_TRACE; + if constexpr (locking) { + std::shared_lock lock{global_lock}; + } using FuncReturn = typename std::invoke_result::type; static constexpr bool BOOL_BREAK = std::is_same_v; std::size_t remaining_size{size}; @@ -177,6 +198,7 @@ private: std::deque> manager_pool; std::vector free_managers; std::array top_tier{}; + std::shared_mutex global_lock; }; } // namespace VideoCore diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 63297bfdc..daa1218cc 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -4,6 +4,7 @@ #include #include "common/assert.h" #include "common/debug.h" +#include "common/div_ceil.h" #include "common/range_lock.h" #include "common/signal_context.h" #include "core/memory.h" diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 4a978746c..1c307651a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -3,7 +3,6 @@ #pragma once -#include #include "common/recursive_lock.h" #include "common/shared_first_mutex.h" #include "video_core/buffer_cache/buffer_cache.h" From 47c43df5449fe65015b48c4d0b06419781164b45 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 5 Jul 2025 18:07:22 +0200 Subject: [PATCH 2/7] Use RangeSet --- CMakeLists.txt | 2 +- src/video_core/buffer_cache/buffer_cache.cpp | 3 +-- src/video_core/buffer_cache/buffer_cache.h | 2 +- src/video_core/{buffer_cache => }/range_set.h | 12 ++++++------ src/video_core/renderer_vulkan/vk_rasterizer.cpp | 16 +++++++--------- src/video_core/renderer_vulkan/vk_rasterizer.h | 10 ++++------ 6 files changed, 20 insertions(+), 25 deletions(-) rename src/video_core/{buffer_cache => }/range_set.h (96%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 38532760d..dd8df7ba7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -921,7 +921,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/buffer_cache/buffer_cache.cpp src/video_core/buffer_cache/buffer_cache.h src/video_core/buffer_cache/memory_tracker.h - src/video_core/buffer_cache/range_set.h src/video_core/buffer_cache/region_definitions.h src/video_core/buffer_cache/region_manager.h src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -980,6 +979,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/page_manager.cpp src/video_core/page_manager.h src/video_core/multi_level_page_table.h + src/video_core/range_set.h src/video_core/renderdoc.cpp src/video_core/renderdoc.h ) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 9096fa606..1aadf34a8 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -154,9 +154,8 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si memory_tracker->ForEachDownloadRange( device_addr, size, [&](u64 device_addr_out, u64 range_size) { const VAddr buffer_addr = buffer.CpuAddr(); - const auto add_download = [&](VAddr start, VAddr end) { + const auto add_download = [&](VAddr start, u64 new_size) { const u64 new_offset = start - buffer_addr; - const u64 new_size = end - start; copies.push_back(vk::BufferCopy{ .srcOffset = new_offset, .dstOffset = total_size_bytes, diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 14fe957e0..3ce5d2436 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -8,8 +8,8 @@ #include "common/slot_vector.h" #include "common/types.h" #include "video_core/buffer_cache/buffer.h" -#include "video_core/buffer_cache/range_set.h" #include "video_core/multi_level_page_table.h" +#include "video_core/range_set.h" namespace AmdGpu { struct Liverpool; diff --git a/src/video_core/buffer_cache/range_set.h b/src/video_core/range_set.h similarity index 96% rename from src/video_core/buffer_cache/range_set.h rename to src/video_core/range_set.h index 5c8e78c7c..711c1cb04 100644 --- a/src/video_core/buffer_cache/range_set.h +++ b/src/video_core/range_set.h @@ -66,7 +66,7 @@ struct RangeSet { for (const auto& set : m_ranges_set) { const VAddr inter_addr_end = set.upper(); const VAddr inter_addr = set.lower(); - func(inter_addr, inter_addr_end); + func(inter_addr, inter_addr_end - inter_addr); } } @@ -92,7 +92,7 @@ struct RangeSet { if (inter_addr < start_address) { inter_addr = start_address; } - func(inter_addr, inter_addr_end); + func(inter_addr, inter_addr_end - inter_addr); } } @@ -170,7 +170,7 @@ public: for (const auto& [interval, value] : m_ranges_map) { const VAddr inter_addr_end = interval.upper(); const VAddr inter_addr = interval.lower(); - func(inter_addr, inter_addr_end, value); + func(inter_addr, inter_addr_end - inter_addr, value); } } @@ -196,7 +196,7 @@ public: if (inter_addr < start_address) { inter_addr = start_address; } - func(inter_addr, inter_addr_end, it->second); + func(inter_addr, inter_addr_end - inter_addr, it->second); } } @@ -274,7 +274,7 @@ public: for (const auto& [interval, value] : m_ranges_map) { const VAddr inter_addr_end = interval.upper(); const VAddr inter_addr = interval.lower(); - func(inter_addr, inter_addr_end, value); + func(inter_addr, inter_addr_end - inter_addr, value); } } @@ -300,7 +300,7 @@ public: if (inter_addr < start_address) { inter_addr = start_address; } - func(inter_addr, inter_addr_end, it->second); + func(inter_addr, inter_addr_end - inter_addr, it->second); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e4e026485..da15b7c5e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -475,10 +475,10 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { // We only use fault buffer for DMA right now. { Common::RecursiveSharedLock lock{mapped_ranges_mutex}; - for (auto& range : mapped_ranges) { - buffer_cache.SynchronizeBuffersInRange(range.lower(), - range.upper() - range.lower()); - } + mapped_ranges.ForEach( + [&](const VAddr addr, u64 size) { + buffer_cache.SynchronizeBuffersInRange(addr, size); + }); } buffer_cache.MemoryBarrier(); } @@ -979,16 +979,14 @@ bool Rasterizer::IsMapped(VAddr addr, u64 size) { // There is no memory, so not mapped. return false; } - const auto range = decltype(mapped_ranges)::interval_type::right_open(addr, addr + size); - Common::RecursiveSharedLock lock{mapped_ranges_mutex}; - return boost::icl::contains(mapped_ranges, range); + return mapped_ranges.Contains(addr, size); } void Rasterizer::MapMemory(VAddr addr, u64 size) { { std::scoped_lock lock{mapped_ranges_mutex}; - mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size); + mapped_ranges.Add(addr, size); } page_manager.OnGpuMap(addr, size); } @@ -999,7 +997,7 @@ void Rasterizer::UnmapMemory(VAddr addr, u64 size) { page_manager.OnGpuUnmap(addr, size); { std::scoped_lock lock{mapped_ranges_mutex}; - mapped_ranges -= decltype(mapped_ranges)::interval_type::right_open(addr, addr + size); + mapped_ranges.Subtract(addr, size); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 1c307651a..35e8284ae 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -9,6 +9,7 @@ #include "video_core/page_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/texture_cache/texture_cache.h" +#include "video_core/range_set.h" namespace AmdGpu { struct Liverpool; @@ -75,11 +76,8 @@ public: template void ForEachMappedRangeInRange(VAddr addr, u64 size, Func&& func) { - const auto range = decltype(mapped_ranges)::interval_type::right_open(addr, addr + size); - Common::RecursiveSharedLock lock{mapped_ranges_mutex}; - for (const auto& mapped_range : (mapped_ranges & range)) { - func(mapped_range); - } + Common::RecursiveSharedLock lk(mapped_ranges_mutex); + mapped_ranges.ForEachInRange(addr, size, std::forward(func)); } private: @@ -121,7 +119,7 @@ private: VideoCore::TextureCache texture_cache; AmdGpu::Liverpool* liverpool; Core::MemoryManager* memory; - boost::icl::interval_set mapped_ranges; + VideoCore::RangeSet mapped_ranges; Common::SharedFirstMutex mapped_ranges_mutex; PipelineCache pipeline_cache; From 54907409c76d3383a17cbfd3dc207b109ffb1a25 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 6 Jul 2025 02:34:51 +0200 Subject: [PATCH 3/7] Initial implementation (untested) --- src/video_core/buffer_cache/buffer_cache.cpp | 57 ++++++++++- src/video_core/buffer_cache/buffer_cache.h | 8 +- src/video_core/buffer_cache/memory_tracker.h | 97 ++++++++++++------- src/video_core/buffer_cache/region_manager.h | 18 ++++ .../renderer_vulkan/vk_rasterizer.cpp | 8 +- .../renderer_vulkan/vk_rasterizer.h | 6 +- 6 files changed, 145 insertions(+), 49 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 1aadf34a8..dfc3f2050 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -29,9 +29,9 @@ static constexpr size_t DeviceBufferSize = 128_MB; static constexpr size_t MaxPageFaults = 1024; BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, - AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, - PageManager& tracker) - : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, + Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_, + TextureCache& texture_cache_, PageManager& tracker) + : instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_}, memory{Core::Memory::Instance()}, texture_cache{texture_cache_}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, @@ -817,7 +817,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer) { boost::container::small_vector copies; VAddr buffer_start = buffer.CpuAddr(); - memory_tracker->ForEachUploadRange( + memory_tracker->ForEachUploadRange( device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) { const u64 offset = staging_buffer.Copy(device_addr_out, range_size); copies.push_back(vk::BufferCopy{ @@ -996,7 +996,54 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) { } void BufferCache::SynchronizeBuffersForDma() { - + RENDERER_TRACE; + boost::container::small_vector buffers; + boost::container::small_vector barriers; + boost::container::small_vector copies; + const auto& mapped_ranges = rasterizer.GetMappedRanges(); + memory_tracker->Lock(); + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + mapped_ranges.ForEach([&](VAddr device_addr, u64 size) { + ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { + if (memory_tracker->IsRegionCpuModified(device_addr, size)) { + barriers.push_back(vk::BufferMemoryBarrier2{ + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = + vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite | + vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer.Handle(), + .offset = 0, + .size = buffer.SizeBytes(), + }); + buffers.push_back(&buffer); + } + }); + }); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = static_cast(barriers.size()), + .pBufferMemoryBarriers = barriers.data(), + }); + for (auto* buffer : buffers) { + memory_tracker->ForEachUploadRange( + buffer->CpuAddr(), buffer->SizeBytes(), false, + [&](u64 device_addr_out, u64 range_size) { + const u64 offset = staging_buffer.Copy(device_addr_out, range_size); + copies.push_back(vk::BufferCopy{ + .srcOffset = offset, + .dstOffset = device_addr_out - buffer->CpuAddr(), + .size = range_size, + }); + }); + cmdbuf.copyBuffer(staging_buffer.Handle(), buffer->Handle(), copies); + copies.clear(); + } + memory_tracker->UnmarkAllRegionsAsCpuModified(); + MemoryBarrier(); + memory_tracker->Unlock(); } void BufferCache::MemoryBarrier() { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 3ce5d2436..fd3470ae7 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -21,7 +21,8 @@ class MemoryManager; namespace Vulkan { class GraphicsPipeline; -} +class Rasterizer; +} // namespace Vulkan namespace VideoCore { @@ -70,8 +71,8 @@ public: public: explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, - AmdGpu::Liverpool* liverpool, TextureCache& texture_cache, - PageManager& tracker); + Vulkan::Rasterizer& rasterizer, AmdGpu::Liverpool* liverpool, + TextureCache& texture_cache, PageManager& tracker); ~BufferCache(); /// Returns a pointer to GDS device local buffer. @@ -203,6 +204,7 @@ private: const Vulkan::Instance& instance; Vulkan::Scheduler& scheduler; + Vulkan::Rasterizer& rasterizer; AmdGpu::Liverpool* liverpool; Core::MemoryManager* memory; TextureCache& texture_cache; diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index 5ac129967..6a93ee31b 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -27,7 +27,7 @@ public: /// Returns true if a region has been modified from the CPU template bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { - return IteratePages( + return IterateRegions( query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { std::scoped_lock lk{manager->lock}; return manager->template IsRegionModified(offset, size); @@ -37,7 +37,7 @@ public: /// Returns true if a region has been modified from the GPU template bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { - return IteratePages( + return IterateRegions( query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { std::scoped_lock lk{manager->lock}; return manager->template IsRegionModified(offset, size); @@ -47,29 +47,38 @@ public: /// Mark region as CPU modified, notifying the device_tracker about this change template void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { - IteratePages(dirty_cpu_addr, query_size, - [](RegionManager* manager, u64 offset, size_t size) { - std::scoped_lock lk{manager->lock}; - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); + IterateRegions(dirty_cpu_addr, query_size, + [](RegionManager* manager, u64 offset, size_t size) { + std::scoped_lock lk{manager->lock}; + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); + } + + /// Unmark all regions as CPU modified, notifying the device_tracker about this change + template + void UnmarkAllRegionsAsCpuModified() noexcept { + ForEachRegion([](RegionManager* manager) { + std::scoped_lock lk{manager->lock}; + manager->template ChangeAllRegionState(); + }); } /// Unmark region as modified from the host GPU template void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { - IteratePages(dirty_cpu_addr, query_size, - [](RegionManager* manager, u64 offset, size_t size) { - std::scoped_lock lk{manager->lock}; - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); + IterateRegions(dirty_cpu_addr, query_size, + [](RegionManager* manager, u64 offset, size_t size) { + std::scoped_lock lk{manager->lock}; + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); } /// Removes all protection from a page and ensures GPU data has been flushed if requested template void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept { - IteratePages( + IterateRegions( cpu_addr, size, [try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) { const bool should_flush = [&] { @@ -92,29 +101,30 @@ public: } /// Call 'func' for each CPU modified range and unmark those pages as CPU modified - template + template void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) { - IteratePages(query_cpu_range, query_size, - [&func, is_written](RegionManager* manager, u64 offset, size_t size) { - std::scoped_lock lk{manager->lock}; - manager->template ForEachModifiedRange( - manager->GetCpuAddr() + offset, size, func); - if (is_written) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - } - }); + IterateRegions( + query_cpu_range, query_size, + [&func, is_written](RegionManager* manager, u64 offset, size_t size) { + std::scoped_lock lk{manager->lock}; + manager->template ForEachModifiedRange( + manager->GetCpuAddr() + offset, size, func); + if (is_written && clear) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + } + }); } /// Call 'func' for each GPU modified range and unmark those pages as GPU modified template void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) { - IteratePages(query_cpu_range, query_size, - [&func](RegionManager* manager, u64 offset, size_t size) { - std::scoped_lock lk{manager->lock}; - manager->template ForEachModifiedRange( - manager->GetCpuAddr() + offset, size, func); - }); + IterateRegions(query_cpu_range, query_size, + [&func](RegionManager* manager, u64 offset, size_t size) { + std::scoped_lock lk{manager->lock}; + manager->template ForEachModifiedRange( + manager->GetCpuAddr() + offset, size, func); + }); } /// Lck the memory tracker. @@ -127,7 +137,6 @@ public: global_lock.unlock(); } - private: /** * @brief IteratePages Iterates L2 word manager page table. @@ -137,7 +146,7 @@ private: * @return */ template - bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { + bool IterateRegions(VAddr cpu_address, size_t size, Func&& func) { RENDERER_TRACE; if constexpr (locking) { std::shared_lock lock{global_lock}; @@ -177,6 +186,26 @@ private: return false; } + /** + * @brief Iterate throw all regions in the memory tracker. + * @param func Callback for each region manager. + * @return + */ + template + void ForEachRegion(Func&& func) { + RENDERER_TRACE; + if constexpr (locking) { + std::shared_lock lock{global_lock}; + } + for (auto& pool : manager_pool) { + for (auto& manager : pool) { + if (manager.GetCpuAddr() != 0) { + func(&manager); + } + } + } + } + void CreateRegion(std::size_t page_index) { const VAddr base_cpu_addr = page_index << TRACKER_HIGHER_PAGE_BITS; if (free_managers.empty()) { diff --git a/src/video_core/buffer_cache/region_manager.h b/src/video_core/buffer_cache/region_manager.h index 608b16fb3..b21f4e406 100644 --- a/src/video_core/buffer_cache/region_manager.h +++ b/src/video_core/buffer_cache/region_manager.h @@ -136,6 +136,24 @@ public: } } + /** + * Chagnes state of all pages in the region + */ + template + void ChangeAllRegionState() noexcept { + RENDERER_TRACE; + if constexpr (enable) { + GetRegionBits().Fill(); + } else { + GetRegionBits().Clear(); + } + if constexpr (type == Type::CPU) { + UpdateProtection(); + } else if (Config::readbacks()) { + UpdateProtection(); + } + } + /** * Returns true when a region has been modified * diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index da15b7c5e..d95363af7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,7 +36,7 @@ static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) { Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_) : instance{instance_}, scheduler{scheduler_}, page_manager{this}, - buffer_cache{instance, scheduler, liverpool_, texture_cache, page_manager}, + buffer_cache{instance, scheduler, *this, liverpool_, texture_cache, page_manager}, texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_}, memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} { if (!Config::nullGpu()) { @@ -475,12 +475,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { // We only use fault buffer for DMA right now. { Common::RecursiveSharedLock lock{mapped_ranges_mutex}; - mapped_ranges.ForEach( - [&](const VAddr addr, u64 size) { - buffer_cache.SynchronizeBuffersInRange(addr, size); - }); + buffer_cache.SynchronizeBuffersForDma(); } - buffer_cache.MemoryBarrier(); } fault_process_pending |= uses_dma; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 35e8284ae..a00df1093 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -7,9 +7,9 @@ #include "common/shared_first_mutex.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/page_manager.h" +#include "video_core/range_set.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/texture_cache/texture_cache.h" -#include "video_core/range_set.h" namespace AmdGpu { struct Liverpool; @@ -43,6 +43,10 @@ public: return texture_cache; } + [[nodiscard]] const VideoCore::RangeSet& GetMappedRanges() const noexcept { + return mapped_ranges; + } + void Draw(bool is_indexed, u32 index_offset = 0); void DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 size, u32 max_count, VAddr count_address); From 5a242586ad3663e1566b7a90d8839da30e07b6b4 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 7 Jul 2025 01:07:38 +0200 Subject: [PATCH 4/7] Deferring mode --- src/video_core/buffer_cache/buffer_cache.cpp | 7 +-- src/video_core/buffer_cache/memory_tracker.h | 44 +++++++++--------- .../buffer_cache/region_definitions.h | 8 +++- src/video_core/buffer_cache/region_manager.h | 45 ++++++++++--------- 4 files changed, 56 insertions(+), 48 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index dfc3f2050..9545e828b 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -817,7 +817,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer) { boost::container::small_vector copies; VAddr buffer_start = buffer.CpuAddr(); - memory_tracker->ForEachUploadRange( + memory_tracker->ForEachUploadRange( device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) { const u64 offset = staging_buffer.Copy(device_addr_out, range_size); copies.push_back(vk::BufferCopy{ @@ -997,6 +997,7 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) { void BufferCache::SynchronizeBuffersForDma() { RENDERER_TRACE; + LOG_WARNING(Render_Vulkan, "SYNC RANGES FOR DMA"); boost::container::small_vector buffers; boost::container::small_vector barriers; boost::container::small_vector copies; @@ -1028,7 +1029,7 @@ void BufferCache::SynchronizeBuffersForDma() { .pBufferMemoryBarriers = barriers.data(), }); for (auto* buffer : buffers) { - memory_tracker->ForEachUploadRange( + memory_tracker->ForEachUploadRange( buffer->CpuAddr(), buffer->SizeBytes(), false, [&](u64 device_addr_out, u64 range_size) { const u64 offset = staging_buffer.Copy(device_addr_out, range_size); @@ -1041,8 +1042,8 @@ void BufferCache::SynchronizeBuffersForDma() { cmdbuf.copyBuffer(staging_buffer.Handle(), buffer->Handle(), copies); copies.clear(); } - memory_tracker->UnmarkAllRegionsAsCpuModified(); MemoryBarrier(); + memory_tracker->PerformDeferredProtections(); memory_tracker->Unlock(); } diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index 6a93ee31b..b78b841fb 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -45,38 +45,29 @@ public: } /// Mark region as CPU modified, notifying the device_tracker about this change - template + template void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { IterateRegions(dirty_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { std::scoped_lock lk{manager->lock}; - manager->template ChangeRegionState( + manager->template ChangeRegionState( manager->GetCpuAddr() + offset, size); }); } - /// Unmark all regions as CPU modified, notifying the device_tracker about this change - template - void UnmarkAllRegionsAsCpuModified() noexcept { - ForEachRegion([](RegionManager* manager) { - std::scoped_lock lk{manager->lock}; - manager->template ChangeAllRegionState(); - }); - } - /// Unmark region as modified from the host GPU - template + template void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { IterateRegions(dirty_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { std::scoped_lock lk{manager->lock}; - manager->template ChangeRegionState( + manager->template ChangeRegionState( manager->GetCpuAddr() + offset, size); }); } /// Removes all protection from a page and ensures GPU data has been flushed if requested - template + template void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept { IterateRegions( cpu_addr, size, @@ -90,7 +81,7 @@ public: if (try_flush && manager->template IsRegionModified(offset, size)) { return true; } - manager->template ChangeRegionState( + manager->template ChangeRegionState( manager->GetCpuAddr() + offset, size); return false; }(); @@ -101,32 +92,43 @@ public: } /// Call 'func' for each CPU modified range and unmark those pages as CPU modified - template + template void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) { IterateRegions( query_cpu_range, query_size, [&func, is_written](RegionManager* manager, u64 offset, size_t size) { std::scoped_lock lk{manager->lock}; - manager->template ForEachModifiedRange( + manager->template ForEachModifiedRange( manager->GetCpuAddr() + offset, size, func); - if (is_written && clear) { - manager->template ChangeRegionState( + if (is_written) { + manager->template ChangeRegionState( manager->GetCpuAddr() + offset, size); } }); } /// Call 'func' for each GPU modified range and unmark those pages as GPU modified - template + template void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) { IterateRegions(query_cpu_range, query_size, [&func](RegionManager* manager, u64 offset, size_t size) { std::scoped_lock lk{manager->lock}; - manager->template ForEachModifiedRange( + manager->template ForEachModifiedRange( manager->GetCpuAddr() + offset, size, func); }); } + /// Notifies deferred protection changes to the tracker. + template + void PerformDeferredProtections() { + ForEachRegion([&](RegionManager* manager) { + std::scoped_lock lk{manager->lock}; + manager->template PerformDeferredProtections(); + }); + } + + /// Notifies all deferred protection changes to the tracker. + /// Lck the memory tracker. void Lock() { global_lock.lock(); diff --git a/src/video_core/buffer_cache/region_definitions.h b/src/video_core/buffer_cache/region_definitions.h index 76e7ee263..af25226f5 100644 --- a/src/video_core/buffer_cache/region_definitions.h +++ b/src/video_core/buffer_cache/region_definitions.h @@ -4,6 +4,7 @@ #pragma once #include "common/bit_array.h" +#include "common/enum.h" #include "common/types.h" namespace VideoCore { @@ -17,9 +18,12 @@ constexpr u64 TRACKER_HIGHER_PAGE_MASK = TRACKER_HIGHER_PAGE_SIZE - 1ULL; constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PER_PAGE; enum class Type { - CPU, - GPU, + None = 0, + CPU = 1 << 0, + GPU = 1 << 1, }; +DECLARE_ENUM_FLAG_OPERATORS(Type) + using RegionBits = Common::BitArray; diff --git a/src/video_core/buffer_cache/region_manager.h b/src/video_core/buffer_cache/region_manager.h index b21f4e406..8eff058ff 100644 --- a/src/video_core/buffer_cache/region_manager.h +++ b/src/video_core/buffer_cache/region_manager.h @@ -70,13 +70,27 @@ public: } } + template + void PerformDeferredProtections() { + bool was_deferred = True(deferred_protection & type); + if (!was_deferred) { + return; + } + deferred_protection &= ~type; + if constexpr (type == Type::CPU) { + UpdateProtection(); + } else if constexpr (type == Type::GPU) { + UpdateProtection(); + } + } + /** * Change the state of a range of pages * * @param dirty_addr Base address to mark or unmark as modified * @param size Size in bytes to mark or unmark as modified */ - template + template void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { RENDERER_TRACE; const size_t offset = dirty_addr - cpu_addr; @@ -93,7 +107,9 @@ public: } else { bits.UnsetRange(start_page, end_page); } - if constexpr (type == Type::CPU) { + if constexpr (defer_protect) { + deferred_protection |= type; + } else if constexpr (type == Type::CPU) { UpdateProtection(); } else if (Config::readbacks()) { UpdateProtection(); @@ -108,7 +124,7 @@ public: * @param size Size in bytes of the CPU range to loop over * @param func Function to call for each turned off region */ - template + template void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) { RENDERER_TRACE; const size_t offset = query_cpu_range - cpu_addr; @@ -124,7 +140,9 @@ public: if constexpr (clear) { bits.UnsetRange(start_page, end_page); - if constexpr (type == Type::CPU) { + if constexpr (defer_protect) { + deferred_protection |= type; + } else if constexpr (type == Type::CPU) { UpdateProtection(); } else if (Config::readbacks()) { UpdateProtection(); @@ -136,24 +154,6 @@ public: } } - /** - * Chagnes state of all pages in the region - */ - template - void ChangeAllRegionState() noexcept { - RENDERER_TRACE; - if constexpr (enable) { - GetRegionBits().Fill(); - } else { - GetRegionBits().Clear(); - } - if constexpr (type == Type::CPU) { - UpdateProtection(); - } else if (Config::readbacks()) { - UpdateProtection(); - } - } - /** * Returns true when a region has been modified * @@ -204,6 +204,7 @@ private: PageManager* tracker; VAddr cpu_addr = 0; + Type deferred_protection = Type::None; RegionBits cpu; RegionBits gpu; RegionBits writeable; From 8e54385f103f9524a3502e26df526d6676428e69 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 7 Jul 2025 14:18:13 +0200 Subject: [PATCH 5/7] correct default temple args --- src/video_core/buffer_cache/memory_tracker.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index b78b841fb..3d4a18411 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -56,7 +56,7 @@ public: } /// Unmark region as modified from the host GPU - template + template void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { IterateRegions(dirty_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { From 5d2598c50557a9c1aa8ecd76704714bf1fe2ad13 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 7 Jul 2025 21:37:43 +0200 Subject: [PATCH 6/7] Fix shared locking --- src/video_core/buffer_cache/memory_tracker.h | 90 +++++++++++--------- 1 file changed, 50 insertions(+), 40 deletions(-) diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index 3d4a18411..e742a3261 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -150,42 +150,47 @@ private: template bool IterateRegions(VAddr cpu_address, size_t size, Func&& func) { RENDERER_TRACE; + const auto run = [&]() { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + std::size_t remaining_size{size}; + std::size_t page_index{cpu_address >> TRACKER_HIGHER_PAGE_BITS}; + u64 page_offset{cpu_address & TRACKER_HIGHER_PAGE_MASK}; + while (remaining_size > 0) { + const std::size_t copy_amount{ + std::min(TRACKER_HIGHER_PAGE_SIZE - page_offset, remaining_size)}; + auto* manager{top_tier[page_index]}; + if (manager) { + if constexpr (BOOL_BREAK) { + if (func(manager, page_offset, copy_amount)) { + return true; + } + } else { + func(manager, page_offset, copy_amount); + } + } else if constexpr (create_region_on_fail) { + CreateRegion(page_index); + manager = top_tier[page_index]; + if constexpr (BOOL_BREAK) { + if (func(manager, page_offset, copy_amount)) { + return true; + } + } else { + func(manager, page_offset, copy_amount); + } + } + page_index++; + page_offset = 0; + remaining_size -= copy_amount; + } + return false; + }; if constexpr (locking) { std::shared_lock lock{global_lock}; + return run(); + } else { + return run(); } - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - std::size_t remaining_size{size}; - std::size_t page_index{cpu_address >> TRACKER_HIGHER_PAGE_BITS}; - u64 page_offset{cpu_address & TRACKER_HIGHER_PAGE_MASK}; - while (remaining_size > 0) { - const std::size_t copy_amount{ - std::min(TRACKER_HIGHER_PAGE_SIZE - page_offset, remaining_size)}; - auto* manager{top_tier[page_index]}; - if (manager) { - if constexpr (BOOL_BREAK) { - if (func(manager, page_offset, copy_amount)) { - return true; - } - } else { - func(manager, page_offset, copy_amount); - } - } else if constexpr (create_region_on_fail) { - CreateRegion(page_index); - manager = top_tier[page_index]; - if constexpr (BOOL_BREAK) { - if (func(manager, page_offset, copy_amount)) { - return true; - } - } else { - func(manager, page_offset, copy_amount); - } - } - page_index++; - page_offset = 0; - remaining_size -= copy_amount; - } - return false; } /** @@ -196,15 +201,20 @@ private: template void ForEachRegion(Func&& func) { RENDERER_TRACE; - if constexpr (locking) { - std::shared_lock lock{global_lock}; - } - for (auto& pool : manager_pool) { - for (auto& manager : pool) { - if (manager.GetCpuAddr() != 0) { - func(&manager); + const auto run = [&]() { + for (auto& pool : manager_pool) { + for (auto& manager : pool) { + if (manager.GetCpuAddr() != 0) { + func(&manager); + } } } + }; + if constexpr (locking) { + std::shared_lock lock{global_lock}; + run(); + } else { + run(); } } From 17815ad439edfd9c08ec262c2c846b13ed068a89 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 7 Jul 2025 21:59:03 +0200 Subject: [PATCH 7/7] Single pass on mapped ranges (no barrier batching) --- src/video_core/buffer_cache/buffer_cache.cpp | 69 ++++++++++---------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 9545e828b..ae2324448 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -997,53 +997,52 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) { void BufferCache::SynchronizeBuffersForDma() { RENDERER_TRACE; - LOG_WARNING(Render_Vulkan, "SYNC RANGES FOR DMA"); boost::container::small_vector buffers; - boost::container::small_vector barriers; boost::container::small_vector copies; const auto& mapped_ranges = rasterizer.GetMappedRanges(); + bool barrier_recorded = false; memory_tracker->Lock(); scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); mapped_ranges.ForEach([&](VAddr device_addr, u64 size) { ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { - if (memory_tracker->IsRegionCpuModified(device_addr, size)) { - barriers.push_back(vk::BufferMemoryBarrier2{ - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = - vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite | - vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = buffer.Handle(), - .offset = 0, - .size = buffer.SizeBytes(), + memory_tracker->ForEachUploadRange( + buffer.CpuAddr(), buffer.SizeBytes(), false, + [&](u64 device_addr_out, u64 range_size) { + if (!barrier_recorded) { + barrier_recorded = true; + const vk::BufferMemoryBarrier2 barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eMemoryRead | + vk::AccessFlagBits2::eMemoryWrite | + vk::AccessFlagBits2::eTransferRead | + vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = buffer.Handle(), + .offset = 0, + .size = buffer.SizeBytes(), + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &barrier, + }); + } + const u64 offset = staging_buffer.Copy(device_addr_out, range_size); + copies.push_back(vk::BufferCopy{ + .srcOffset = offset, + .dstOffset = device_addr_out - buffer.CpuAddr(), + .size = range_size, + }); }); - buffers.push_back(&buffer); - } + cmdbuf.copyBuffer(staging_buffer.Handle(), buffer.Handle(), copies); + copies.clear(); + barrier_recorded = false; }); }); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = static_cast(barriers.size()), - .pBufferMemoryBarriers = barriers.data(), - }); - for (auto* buffer : buffers) { - memory_tracker->ForEachUploadRange( - buffer->CpuAddr(), buffer->SizeBytes(), false, - [&](u64 device_addr_out, u64 range_size) { - const u64 offset = staging_buffer.Copy(device_addr_out, range_size); - copies.push_back(vk::BufferCopy{ - .srcOffset = offset, - .dstOffset = device_addr_out - buffer->CpuAddr(), - .size = range_size, - }); - }); - cmdbuf.copyBuffer(staging_buffer.Handle(), buffer->Handle(), copies); - copies.clear(); - } - MemoryBarrier(); memory_tracker->PerformDeferredProtections(); + MemoryBarrier(); memory_tracker->Unlock(); }