diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 4a88c7ed4..d55e05d1e 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "common/alignment.h" -#include "common/config.h" #include "common/debug.h" #include "common/scope_exit.h" #include "common/types.h" @@ -136,20 +136,19 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { if (!IsRegionRegistered(device_addr, size)) { return; } - if (Config::readbacks() && memory_tracker->IsRegionGpuModified(device_addr, size)) { - ReadMemory(device_addr, size); - } - memory_tracker->MarkRegionAsCpuModified(device_addr, size); + memory_tracker->InvalidateRegion( + device_addr, size, Config::readbacks(), + [this, device_addr, size] { ReadMemory(device_addr, size, true); }); } -void BufferCache::ReadMemory(VAddr device_addr, u64 size) { - liverpool->SendCommand([this, device_addr, size] { +void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) { + liverpool->SendCommand([this, device_addr, size, is_write] { Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)]; - DownloadBufferMemory(buffer, device_addr, size); + DownloadBufferMemory(buffer, device_addr, size, is_write); }); } -void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) { +void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write) { boost::container::small_vector copies; u64 total_size_bytes = 0; memory_tracker->ForEachDownloadRange( @@ -192,6 +191,9 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si copy.size); } memory_tracker->UnmarkRegionAsGpuModified(device_addr, size); + if (is_write) { + memory_tracker->MarkRegionAsCpuModified(device_addr, size); + } } void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) { @@ -359,7 +361,7 @@ void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, // Avoid using ObtainBuffer here as that might give us the stream buffer. const BufferId buffer_id = FindBuffer(src, num_bytes); auto& buffer = slot_buffers[buffer_id]; - SynchronizeBuffer(buffer, src, num_bytes, false); + SynchronizeBuffer(buffer, src, num_bytes, false, false); return buffer; }(); auto& dst_buffer = [&] -> const Buffer& { @@ -441,9 +443,8 @@ std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b buffer_id = FindBuffer(device_addr, size); } Buffer& buffer = slot_buffers[buffer_id]; - SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer); + SynchronizeBuffer(buffer, device_addr, size, is_written, is_texel_buffer); if (is_written) { - memory_tracker->MarkRegionAsGpuModified(device_addr, size); gpu_modified_ranges.Add(device_addr, size); } return {&buffer, buffer.Offset(device_addr)}; @@ -454,7 +455,7 @@ std::pair BufferCache::ObtainBufferForImage(VAddr gpu_addr, u32 si const BufferId buffer_id = page_table[gpu_addr >> CACHING_PAGEBITS].buffer_id; if (buffer_id) { if (Buffer& buffer = slot_buffers[buffer_id]; buffer.IsInBounds(gpu_addr, size)) { - SynchronizeBuffer(buffer, gpu_addr, size, false); + SynchronizeBuffer(buffer, gpu_addr, size, false, false); return {&buffer, buffer.Offset(gpu_addr)}; } } @@ -813,56 +814,27 @@ void BufferCache::ChangeRegister(BufferId buffer_id) { } } -void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, +void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_written, bool is_texel_buffer) { boost::container::small_vector copies; - u64 total_size_bytes = 0; VAddr buffer_start = buffer.CpuAddr(); - memory_tracker->ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { - copies.push_back(vk::BufferCopy{ - .srcOffset = total_size_bytes, - .dstOffset = device_addr_out - buffer_start, - .size = range_size, + memory_tracker->ForEachUploadRange( + device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) { + const u64 offset = staging_buffer.Copy(device_addr_out, range_size); + copies.push_back(vk::BufferCopy{ + .srcOffset = offset, + .dstOffset = device_addr_out - buffer_start, + .size = range_size, + }); }); - total_size_bytes += range_size; - }); SCOPE_EXIT { if (is_texel_buffer) { SynchronizeBufferFromImage(buffer, device_addr, size); } }; - if (total_size_bytes == 0) { + if (copies.empty()) { return; } - vk::Buffer src_buffer = staging_buffer.Handle(); - if (total_size_bytes < StagingBufferSize) { - const auto [staging, offset] = staging_buffer.Map(total_size_bytes); - for (auto& copy : copies) { - u8* const src_pointer = staging + copy.srcOffset; - const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset; - std::memcpy(src_pointer, std::bit_cast(device_addr), copy.size); - // Apply the staging offset - copy.srcOffset += offset; - } - staging_buffer.Commit(); - } else { - // For large one time transfers use a temporary host buffer. - // RenderDoc can lag quite a bit if the stream buffer is too large. - Buffer temp_buffer{instance, - scheduler, - MemoryUsage::Upload, - 0, - vk::BufferUsageFlagBits::eTransferSrc, - total_size_bytes}; - src_buffer = temp_buffer.Handle(); - u8* const staging = temp_buffer.mapped_data.data(); - for (auto& copy : copies) { - u8* const src_pointer = staging + copy.srcOffset; - const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset; - std::memcpy(src_pointer, std::bit_cast(device_addr), copy.size); - } - scheduler.DeferOperation([buffer = std::move(temp_buffer)]() mutable {}); - } scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); const vk::BufferMemoryBarrier2 pre_barrier = { @@ -889,7 +861,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &pre_barrier, }); - cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies); + cmdbuf.copyBuffer(staging_buffer.Handle(), buffer.buffer, copies); cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, @@ -1020,7 +992,7 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) { VAddr start = std::max(buffer.CpuAddr(), device_addr); VAddr end = std::min(buffer.CpuAddr() + buffer.SizeBytes(), device_addr_end); u32 size = static_cast(end - start); - SynchronizeBuffer(buffer, start, size, false); + SynchronizeBuffer(buffer, start, size, false, false); }); } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 5acb6ebd3..900a27aee 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -113,7 +113,7 @@ public: void InvalidateMemory(VAddr device_addr, u64 size); /// Waits on pending downloads in the logical page range. - void ReadMemory(VAddr device_addr, u64 size); + void ReadMemory(VAddr device_addr, u64 size, bool is_write = false); /// Binds host vertex buffers for the current draw. void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline); @@ -176,7 +176,7 @@ private: return !buffer_id || slot_buffers[buffer_id].is_deleted; } - void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size); + void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write); [[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size); @@ -191,7 +191,8 @@ private: template void ChangeRegister(BufferId buffer_id); - void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer); + void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_written, + bool is_texel_buffer); bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size); diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index acc53b8f9..ca87c7df0 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -27,6 +27,7 @@ public: bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { return IteratePages( query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { + std::scoped_lock lk{manager->lock}; return manager->template IsRegionModified(offset, size); }); } @@ -35,6 +36,7 @@ public: bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { return IteratePages( query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { + std::scoped_lock lk{manager->lock}; return manager->template IsRegionModified(offset, size); }); } @@ -43,34 +45,57 @@ public: void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { IteratePages(dirty_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { + std::scoped_lock lk{manager->lock}; manager->template ChangeRegionState( manager->GetCpuAddr() + offset, size); }); } - /// Mark region as modified from the host GPU - void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { - IteratePages(dirty_cpu_addr, query_size, - [](RegionManager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); - } - + /// Unmark region as modified from the host GPU void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { IteratePages(dirty_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { + std::scoped_lock lk{manager->lock}; manager->template ChangeRegionState( manager->GetCpuAddr() + offset, size); }); } + /// Removes all protection from a page and ensures GPU data has been flushed if requested + void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept { + IteratePages( + cpu_addr, size, + [try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) { + const bool should_flush = [&] { + // Perform both the GPU modification check and CPU state change with the lock + // in case we are racing with GPU thread trying to mark the page as GPU + // modified. If we need to flush the flush function is going to perform CPU + // state change. + std::scoped_lock lk{manager->lock}; + if (try_flush && manager->template IsRegionModified(offset, size)) { + return true; + } + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + return false; + }(); + if (should_flush) { + on_flush(); + } + }); + } + /// Call 'func' for each CPU modified range and unmark those pages as CPU modified - void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, auto&& func) { + void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) { IteratePages(query_cpu_range, query_size, - [&func](RegionManager* manager, u64 offset, size_t size) { + [&func, is_written](RegionManager* manager, u64 offset, size_t size) { + std::scoped_lock lk{manager->lock}; manager->template ForEachModifiedRange( manager->GetCpuAddr() + offset, size, func); + if (is_written) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + } }); } @@ -79,6 +104,7 @@ public: void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) { IteratePages(query_cpu_range, query_size, [&func](RegionManager* manager, u64 offset, size_t size) { + std::scoped_lock lk{manager->lock}; manager->template ForEachModifiedRange( manager->GetCpuAddr() + offset, size, func); }); diff --git a/src/video_core/buffer_cache/region_manager.h b/src/video_core/buffer_cache/region_manager.h index 894809cd5..608b16fb3 100644 --- a/src/video_core/buffer_cache/region_manager.h +++ b/src/video_core/buffer_cache/region_manager.h @@ -3,9 +3,9 @@ #pragma once -#include #include "common/config.h" #include "common/div_ceil.h" +#include "common/logging/log.h" #ifdef __linux__ #include "common/adaptive_mutex.h" @@ -19,6 +19,12 @@ namespace VideoCore { +#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +using LockType = Common::AdaptiveMutex; +#else +using LockType = Common::SpinLock; +#endif + /** * Allows tracking CPU and GPU modification of pages in a contigious 16MB virtual address region. * Information is stored in bitsets for spacial locality and fast update of single pages. @@ -80,7 +86,6 @@ public: if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) { return; } - std::scoped_lock lk{lock}; RegionBits& bits = GetRegionBits(); if constexpr (enable) { @@ -113,15 +118,10 @@ public: if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) { return; } - std::scoped_lock lk{lock}; RegionBits& bits = GetRegionBits(); RegionBits mask(bits, start_page, end_page); - for (const auto& [start, end] : mask) { - func(cpu_addr + start * TRACKER_BYTES_PER_PAGE, (end - start) * TRACKER_BYTES_PER_PAGE); - } - if constexpr (clear) { bits.UnsetRange(start_page, end_page); if constexpr (type == Type::CPU) { @@ -130,6 +130,10 @@ public: UpdateProtection(); } } + + for (const auto& [start, end] : mask) { + func(cpu_addr + start * TRACKER_BYTES_PER_PAGE, (end - start) * TRACKER_BYTES_PER_PAGE); + } } /** @@ -147,13 +151,14 @@ public: if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) { return false; } - std::scoped_lock lk{lock}; const RegionBits& bits = GetRegionBits(); RegionBits test(bits, start_page, end_page); return test.Any(); } + LockType lock; + private: /** * Notify tracker about changes in the CPU tracking state of a word in the buffer @@ -179,11 +184,6 @@ private: tracker->UpdatePageWatchersForRegion(cpu_addr, mask); } -#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP - Common::AdaptiveMutex lock; -#else - Common::SpinLock lock; -#endif PageManager* tracker; VAddr cpu_addr = 0; RegionBits cpu; diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 40ac9b5b4..63297bfdc 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -201,16 +201,17 @@ struct PageManager::Impl { RENDERER_TRACE; auto* memory = Core::Memory::Instance(); auto& impl = memory->GetAddressSpace(); - // ASSERT(perms != Core::MemoryPermission::Write); + ASSERT_MSG(perms != Core::MemoryPermission::Write, + "Attempted to protect region as write-only which is not a valid permission"); impl.Protect(address, size, perms); } static bool GuestFaultSignalHandler(void* context, void* fault_address) { const auto addr = reinterpret_cast(fault_address); if (Common::IsWriteError(context)) { - return rasterizer->InvalidateMemory(addr, 1); + return rasterizer->InvalidateMemory(addr, 8); } else { - return rasterizer->ReadMemory(addr, 1); + return rasterizer->ReadMemory(addr, 8); } return false; }