diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp index 15bf0d81e..e85a6eb18 100644 --- a/src/video_core/buffer_cache/buffer.cpp +++ b/src/video_core/buffer_cache/buffer.cpp @@ -137,12 +137,15 @@ StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& size_bytes); } -std::pair StreamBuffer::Map(u64 size, u64 alignment) { +std::pair StreamBuffer::Map(u64 size, u64 alignment, bool allow_wait) { if (!is_coherent && usage == MemoryUsage::Stream) { size = Common::AlignUp(size, instance->NonCoherentAtomSize()); } - ASSERT(size <= this->size_bytes); + if (size > this->size_bytes) { + return {nullptr, 0}; + } + mapped_size = size; if (alignment > 0) { @@ -162,8 +165,11 @@ std::pair StreamBuffer::Map(u64 size, u64 alignment) { } const u64 mapped_upper_bound = offset + size; - WaitPendingOperations(mapped_upper_bound); - return std::make_pair(mapped_data.data() + offset, offset); + if (!WaitPendingOperations(mapped_upper_bound, allow_wait)) { + return {nullptr, 0}; + } + + return {mapped_data.data() + offset, offset}; } void StreamBuffer::Commit() { @@ -177,6 +183,12 @@ void StreamBuffer::Commit() { } offset += mapped_size; + if (current_watch_cursor != 0 && + current_watches[current_watch_cursor].tick == scheduler->CurrentTick()) { + current_watches[current_watch_cursor].upper_bound = offset; + return; + } + if (current_watch_cursor + 1 >= current_watches.size()) { // Ensure that there are enough watches. ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK); @@ -191,16 +203,20 @@ void StreamBuffer::ReserveWatches(std::vector& watches, std::size_t grow_ watches.resize(watches.size() + grow_size); } -void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { +bool StreamBuffer::WaitPendingOperations(u64 requested_upper_bound, bool allow_wait) { if (!invalidation_mark) { - return; + return true; } while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) { auto& watch = previous_watches[wait_cursor]; - wait_bound = watch.upper_bound; + if (!scheduler->IsFree(watch.tick) && !allow_wait) { + return false; + } scheduler->Wait(watch.tick); + wait_bound = watch.upper_bound; ++wait_cursor; } + return true; } } // namespace VideoCore diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index 530968787..a7a0ce84f 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -168,7 +168,7 @@ public: MemoryUsage usage, u64 size_bytes_); /// Reserves a region of memory from the stream buffer. - std::pair Map(u64 size, u64 alignment = 0); + std::pair Map(u64 size, u64 alignment = 0, bool allow_wait = true); /// Ensures that reserved bytes of memory are available to the GPU. void Commit(); @@ -181,10 +181,6 @@ public: return offset; } - u64 GetFreeSize() const { - return size_bytes - offset - mapped_size; - } - private: struct Watch { u64 tick{}; @@ -195,7 +191,7 @@ private: void ReserveWatches(std::vector& watches, std::size_t grow_size); /// Waits pending watches until requested upper bound. - void WaitPendingOperations(u64 requested_upper_bound); + bool WaitPendingOperations(u64 requested_upper_bound, bool allow_wait); private: u64 offset{}; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index d55e05d1e..6ff18051a 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -137,8 +137,7 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { return; } memory_tracker->InvalidateRegion( - device_addr, size, Config::readbacks(), - [this, device_addr, size] { ReadMemory(device_addr, size, true); }); + device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); } void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) { @@ -817,24 +816,50 @@ void BufferCache::ChangeRegister(BufferId buffer_id) { void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_written, bool is_texel_buffer) { boost::container::small_vector copies; + size_t total_size_bytes = 0; VAddr buffer_start = buffer.CpuAddr(); memory_tracker->ForEachUploadRange( - device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) { - const u64 offset = staging_buffer.Copy(device_addr_out, range_size); - copies.push_back(vk::BufferCopy{ - .srcOffset = offset, - .dstOffset = device_addr_out - buffer_start, - .size = range_size, - }); - }); - SCOPE_EXIT { - if (is_texel_buffer) { - SynchronizeBufferFromImage(buffer, device_addr, size); - } - }; + device_addr, size, is_written, + [&](u64 device_addr_out, u64 range_size) { + copies.emplace_back(total_size_bytes, device_addr_out - buffer_start, range_size); + total_size_bytes += range_size; + }, + [&] { UploadCopies(buffer, copies, total_size_bytes); }); + if (is_texel_buffer) { + SynchronizeBufferFromImage(buffer, device_addr, size); + } +} + +void BufferCache::UploadCopies(Buffer& buffer, std::span copies, + size_t total_size_bytes) { if (copies.empty()) { return; } + vk::Buffer src_buffer = staging_buffer.Handle(); + const auto [staging, offset] = staging_buffer.Map(total_size_bytes); + if (staging) { + for (auto& copy : copies) { + u8* const src_pointer = staging + copy.srcOffset; + const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset; + std::memcpy(src_pointer, std::bit_cast(device_addr), copy.size); + // Apply the staging offset + copy.srcOffset += offset; + } + staging_buffer.Commit(); + } else { + // For large one time transfers use a temporary host buffer. + auto temp_buffer = + std::make_unique(instance, scheduler, MemoryUsage::Upload, 0, + vk::BufferUsageFlagBits::eTransferSrc, total_size_bytes); + src_buffer = temp_buffer->Handle(); + u8* const staging = temp_buffer->mapped_data.data(); + for (const auto& copy : copies) { + u8* const src_pointer = staging + copy.srcOffset; + const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset; + std::memcpy(src_pointer, std::bit_cast(device_addr), copy.size); + } + scheduler.DeferOperation([buffer = std::move(temp_buffer)]() mutable { buffer.reset(); }); + } scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); const vk::BufferMemoryBarrier2 pre_barrier = { @@ -861,7 +886,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &pre_barrier, }); - cmdbuf.copyBuffer(staging_buffer.Handle(), buffer.buffer, copies); + cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies); cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 354d01431..fe66c2568 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -194,6 +194,8 @@ private: void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_written, bool is_texel_buffer); + void UploadCopies(Buffer& buffer, std::span copies, size_t total_size_bytes); + bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size); void InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes); diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index ca87c7df0..ec0878c3b 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -62,17 +62,17 @@ public: } /// Removes all protection from a page and ensures GPU data has been flushed if requested - void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept { + void InvalidateRegion(VAddr cpu_addr, u64 size, auto&& on_flush) noexcept { IteratePages( - cpu_addr, size, - [try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) { + cpu_addr, size, [&on_flush](RegionManager* manager, u64 offset, size_t size) { const bool should_flush = [&] { // Perform both the GPU modification check and CPU state change with the lock // in case we are racing with GPU thread trying to mark the page as GPU // modified. If we need to flush the flush function is going to perform CPU // state change. std::scoped_lock lk{manager->lock}; - if (try_flush && manager->template IsRegionModified(offset, size)) { + if (Config::readbacks() && + manager->template IsRegionModified(offset, size)) { return true; } manager->template ChangeRegionState( @@ -86,17 +86,27 @@ public: } /// Call 'func' for each CPU modified range and unmark those pages as CPU modified - void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) { + void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func, + auto&& on_upload) { IteratePages(query_cpu_range, query_size, [&func, is_written](RegionManager* manager, u64 offset, size_t size) { - std::scoped_lock lk{manager->lock}; + manager->lock.lock(); manager->template ForEachModifiedRange( manager->GetCpuAddr() + offset, size, func); - if (is_written) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); + if (!is_written) { + manager->lock.unlock(); } }); + on_upload(); + if (!is_written) { + return; + } + IteratePages(query_cpu_range, query_size, + [&func, is_written](RegionManager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + manager->lock.unlock(); + }); } /// Call 'func' for each GPU modified range and unmark those pages as GPU modified diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 36fd9c055..bd6fb549a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -347,7 +347,11 @@ public: } /// Returns true when a tick has been triggered by the GPU. - [[nodiscard]] bool IsFree(u64 tick) const noexcept { + [[nodiscard]] bool IsFree(u64 tick) noexcept { + if (master_semaphore.IsFree(tick)) { + return true; + } + master_semaphore.Refresh(); return master_semaphore.IsFree(tick); }