diff --git a/CMakeLists.txt b/CMakeLists.txt index 185205221..be6048ac4 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -817,7 +817,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/buffer_cache/buffer.h src/video_core/buffer_cache/buffer_cache.cpp src/video_core/buffer_cache/buffer_cache.h - src/video_core/buffer_cache/memory_tracker_base.h + src/video_core/buffer_cache/memory_tracker.h src/video_core/buffer_cache/range_set.h src/video_core/buffer_cache/word_manager.h src/video_core/renderer_vulkan/liverpool_to_vk.cpp diff --git a/src/core/address_space.h b/src/core/address_space.h index 7ccc2cd1e..685cb16de 100644 --- a/src/core/address_space.h +++ b/src/core/address_space.h @@ -11,6 +11,7 @@ namespace Core { enum class MemoryPermission : u32 { + None = 0, Read = 1 << 0, Write = 1 << 1, ReadWrite = Read | Write, diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index cdf736a89..1e3e37c4c 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -5,8 +5,10 @@ #include "common/alignment.h" #include "common/scope_exit.h" #include "common/types.h" +#include "core/memory.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/buffer_cache/memory_tracker.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -20,14 +22,13 @@ static constexpr size_t UboStreamBufferSize = 128_MB; BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, - PageManager& tracker_) + PageManager& tracker) : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, - texture_cache{texture_cache_}, tracker{tracker_}, + texture_cache{texture_cache_}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, - gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize}, - memory_tracker{&tracker} { - Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); + gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize} { + memory_tracker = std::make_unique(tracker); // Ensure the first slot is used for the null buffer const auto null_id = @@ -35,22 +36,29 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s ASSERT(null_id.index == 0); const vk::Buffer& null_buffer = slot_buffers[null_id].buffer; Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer"); + Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); } BufferCache::~BufferCache() = default; void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { - const bool is_tracked = IsRegionRegistered(device_addr, size); - if (is_tracked) { - // Mark the page as CPU modified to stop tracking writes. - memory_tracker.MarkRegionAsCpuModified(device_addr, size); + if (!IsRegionRegistered(device_addr, size)) { + return; } + if (memory_tracker->IsRegionGpuModified(device_addr, size)) { + memory_tracker->UnmarkRegionAsGpuModified(device_addr, size); + } + memory_tracker->MarkRegionAsCpuModified(device_addr, size); +} + +void BufferCache::ReadMemory(VAddr device_addr, u64 size) { + memory_tracker->UnmarkRegionAsGpuModified(device_addr, size); } void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) { boost::container::small_vector copies; u64 total_size_bytes = 0; - memory_tracker.ForEachDownloadRange( + memory_tracker->ForEachDownloadRange( device_addr, size, [&](u64 device_addr_out, u64 range_size) { const VAddr buffer_addr = buffer.CpuAddr(); const auto add_download = [&](VAddr start, VAddr end) { @@ -61,7 +69,10 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si .dstOffset = total_size_bytes, .size = new_size, }); - total_size_bytes += new_size; + // Align up to avoid cache conflicts + constexpr u64 align = 64ULL; + constexpr u64 mask = ~(align - 1ULL); + total_size_bytes += (new_size + align - 1) & mask; }; gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download); gpu_modified_ranges.Subtract(device_addr_out, range_size); @@ -79,10 +90,12 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyBuffer(buffer.buffer, staging_buffer.Handle(), copies); scheduler.Finish(); + auto* memory = Core::Memory::Instance(); for (const auto& copy : copies) { const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset; const u64 dst_offset = copy.dstOffset - offset; - std::memcpy(std::bit_cast(copy_device_addr), staging + dst_offset, copy.size); + memory->TryWriteBacking(std::bit_cast(copy_device_addr), staging + dst_offset, + copy.size); } } @@ -202,9 +215,11 @@ void BufferCache::BindIndexBuffer(u32 index_offset) { void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned"); - if (!is_gds && !IsRegionRegistered(address, num_bytes)) { + if (!is_gds) { memcpy(std::bit_cast(address), value, num_bytes); - return; + if (!IsRegionRegistered(address, num_bytes)) { + return; + } } scheduler.EndRendering(); const Buffer* buffer = [&] { @@ -250,20 +265,25 @@ std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b bool is_texel_buffer, BufferId buffer_id) { // For small uniform buffers that have not been modified by gpu // use device local stream buffer to reduce renderpass breaks. - static constexpr u64 StreamThreshold = CACHING_PAGESIZE; - const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size); - if (!is_written && size <= StreamThreshold && !is_gpu_dirty) { + static constexpr u64 StreamThreshold = CACHING_PAGESIZE * 2; + if (!is_written && size <= StreamThreshold && !IsRegionGpuModified(device_addr, size)) { const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment()); return {&stream_buffer, offset}; } + // Lookup buffer if needed, or if provided id was deleted by join operation. if (!buffer_id || slot_buffers[buffer_id].is_deleted) { buffer_id = FindBuffer(device_addr, size); } + Buffer& buffer = slot_buffers[buffer_id]; SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer); - if (is_written) { - memory_tracker.MarkRegionAsGpuModified(device_addr, size); + + // Mark region as GPU modified to get additional tracking needed for readbacks. + // Somtimes huge buffers may be bound, so set a threshold here as well. + static constexpr u64 GpuMarkThreshold = 512_MB; + if (is_written && size <= GpuMarkThreshold) { + memory_tracker->MarkRegionAsGpuModified(device_addr, size); gpu_modified_ranges.Add(device_addr, size); } return {&buffer, buffer.Offset(device_addr)}; @@ -271,8 +291,7 @@ std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b std::pair BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size, bool prefer_gpu) { // Check if any buffer contains the full requested range. - const u64 page = gpu_addr >> CACHING_PAGEBITS; - const BufferId buffer_id = page_table[page]; + const BufferId buffer_id = page_table[gpu_addr >> CACHING_PAGEBITS]; if (buffer_id) { Buffer& buffer = slot_buffers[buffer_id]; if (buffer.IsInBounds(gpu_addr, size)) { @@ -283,7 +302,7 @@ std::pair BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size, // If no buffer contains the full requested range but some buffer within was GPU-modified, // fall back to ObtainBuffer to create a full buffer and avoid losing GPU modifications. // This is only done if the request prefers to use GPU memory, otherwise we can skip it. - if (prefer_gpu && memory_tracker.IsRegionGpuModified(gpu_addr, size)) { + if (prefer_gpu && memory_tracker->IsRegionGpuModified(gpu_addr, size)) { return ObtainBuffer(gpu_addr, size, false, false); } // In all other cases, just do a CPU copy to the staging buffer. @@ -313,11 +332,16 @@ bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) { } bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) { - return memory_tracker.IsRegionCpuModified(addr, size); + return memory_tracker->IsRegionCpuModified(addr, size); } bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) { - return memory_tracker.IsRegionGpuModified(addr, size); + if (!memory_tracker->IsRegionGpuModified(addr, size)) { + return false; + } + bool modified = false; + gpu_modified_ranges.ForEachInRange(addr, size, [&](VAddr, size_t) { modified = true; }); + return modified; } BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) { @@ -518,7 +542,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, boost::container::small_vector copies; u64 total_size_bytes = 0; VAddr buffer_start = buffer.CpuAddr(); - memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { + memory_tracker->ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { copies.push_back(vk::BufferCopy{ .srcOffset = total_size_bytes, .dstOffset = device_addr_out - buffer_start, diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 71a6bed2a..6e7d1a18c 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -9,7 +9,6 @@ #include "common/slot_vector.h" #include "common/types.h" #include "video_core/buffer_cache/buffer.h" -#include "video_core/buffer_cache/memory_tracker_base.h" #include "video_core/buffer_cache/range_set.h" #include "video_core/multi_level_page_table.h" @@ -17,13 +16,6 @@ namespace AmdGpu { struct Liverpool; } -namespace Shader { -namespace Gcn { -struct FetchShaderData; -} -struct Info; -} // namespace Shader - namespace Vulkan { class GraphicsPipeline; } @@ -35,6 +27,8 @@ using BufferId = Common::SlotId; static constexpr BufferId NULL_BUFFER_ID{0}; class TextureCache; +class MemoryTracker; +class PageManager; class BufferCache { public: @@ -57,6 +51,12 @@ public: bool has_stream_leap = false; }; + using IntervalSet = + boost::icl::interval_set; + using IntervalType = typename IntervalSet::interval_type; + public: explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, AmdGpu::Liverpool* liverpool, TextureCache& texture_cache, @@ -81,6 +81,9 @@ public: /// Invalidates any buffer in the logical page range. void InvalidateMemory(VAddr device_addr, u64 size); + /// Waits on pending downloads in the logical page range. + void ReadMemory(VAddr device_addr, u64 size); + /// Binds host vertex buffers for the current draw. void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline); @@ -153,14 +156,13 @@ private: Vulkan::Scheduler& scheduler; AmdGpu::Liverpool* liverpool; TextureCache& texture_cache; - PageManager& tracker; + std::unique_ptr memory_tracker; StreamBuffer staging_buffer; StreamBuffer stream_buffer; Buffer gds_buffer; std::shared_mutex mutex; Common::SlotVector slot_buffers; RangeSet gpu_modified_ranges; - MemoryTracker memory_tracker; PageTable page_table; }; diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker.h similarity index 86% rename from src/video_core/buffer_cache/memory_tracker_base.h rename to src/video_core/buffer_cache/memory_tracker.h index d9166b11c..efbbd0a7f 100644 --- a/src/video_core/buffer_cache/memory_tracker_base.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -7,6 +7,7 @@ #include #include #include +#include "common/logging/log.h" #include "common/types.h" #include "video_core/buffer_cache/word_manager.h" @@ -19,11 +20,11 @@ public: static constexpr size_t MANAGER_POOL_SIZE = 32; public: - explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {} + explicit MemoryTracker(PageManager& tracker_) : tracker{&tracker_} {} ~MemoryTracker() = default; /// Returns true if a region has been modified from the CPU - [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { + bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { return IteratePages( query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { return manager->template IsRegionModified(offset, size); @@ -31,7 +32,7 @@ public: } /// Returns true if a region has been modified from the GPU - [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { + bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { return IteratePages( query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { return manager->template IsRegionModified(offset, size); @@ -56,9 +57,16 @@ public: }); } + void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { + IteratePages(dirty_cpu_addr, query_size, + [](RegionManager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); + } + /// Call 'func' for each CPU modified range and unmark those pages as CPU modified - template - void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { + void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, auto&& func) { IteratePages(query_cpu_range, query_size, [&func](RegionManager* manager, u64 offset, size_t size) { manager->template ForEachModifiedRange( @@ -67,17 +75,12 @@ public: } /// Call 'func' for each GPU modified range and unmark those pages as GPU modified - template - void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { + template + void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) { IteratePages(query_cpu_range, query_size, [&func](RegionManager* manager, u64 offset, size_t size) { - if constexpr (clear) { - manager->template ForEachModifiedRange( - manager->GetCpuAddr() + offset, size, func); - } else { - manager->template ForEachModifiedRange( - manager->GetCpuAddr() + offset, size, func); - } + manager->template ForEachModifiedRange( + manager->GetCpuAddr() + offset, size, func); }); } diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index 5ad724f96..92b6029c5 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h @@ -10,8 +10,9 @@ #ifdef __linux__ #include "common/adaptive_mutex.h" -#endif +#else #include "common/spin_lock.h" +#endif #include "common/types.h" #include "video_core/page_manager.h" @@ -27,9 +28,8 @@ constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD; enum class Type { - CPU, - GPU, - Untracked, + CPU, ///< Set if CPU page data is more up-to-date than GPU data. + GPU, ///< Set if GPU page data is more up-to-date than CPU data. }; using WordsArray = std::array; @@ -44,7 +44,8 @@ public: : tracker{tracker_}, cpu_addr{cpu_addr_} { cpu.fill(~u64{0}); gpu.fill(0); - untracked.fill(~u64{0}); + write.fill(~u64{0}); + read.fill(~u64{0}); } explicit RegionManager() = default; @@ -56,7 +57,7 @@ public: return cpu_addr; } - static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) { + static constexpr u64 ExtractBits(u64 word, size_t page_start, size_t page_end) { constexpr size_t number_bits = sizeof(u64) * 8; const size_t limit_page_end = number_bits - std::min(page_end, number_bits); u64 bits = (word >> page_start) << page_start; @@ -64,7 +65,7 @@ public: return bits; } - static std::pair GetWordPage(VAddr address) { + static constexpr std::pair GetWordPage(VAddr address) { const size_t converted_address = static_cast(address); const size_t word_number = converted_address / BYTES_PER_WORD; const size_t amount_pages = converted_address % BYTES_PER_WORD; @@ -104,13 +105,12 @@ public: } } - template - void IteratePages(u64 mask, Func&& func) const { + void IteratePages(u64 mask, auto&& func) const { size_t offset = 0; while (mask != 0) { const size_t empty_bits = std::countr_zero(mask); offset += empty_bits; - mask = mask >> empty_bits; + mask >>= empty_bits; const size_t continuous_bits = std::countr_one(mask); func(offset, continuous_bits); @@ -121,27 +121,37 @@ public: /** * Change the state of a range of pages - * + * - If the CPU data is modified, stop tracking writes to allow guest to write to the page. + * - If the CPU data is not modified, track writes to get notified when a write does occur. + * - If the GPU data is modified, track both reads and writes to wait for any pending GPU + * downloads before any CPU access. + * - If the GPU data is not modified, track writes, to get notified when a write does occur. * @param dirty_addr Base address to mark or unmark as modified * @param size Size in bytes to mark or unmark as modified */ - template - void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { + template + void ChangeRegionState(u64 dirty_addr, u64 size) noexcept { std::scoped_lock lk{lock}; - std::span state_words = Span(); IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) { if constexpr (type == Type::CPU) { - UpdateProtection(index, untracked[index], mask); - } - if constexpr (enable) { - state_words[index] |= mask; - if constexpr (type == Type::CPU) { - untracked[index] |= mask; + UpdateProtection(index, write[index], mask); + if constexpr (is_dirty) { + cpu[index] |= mask; + write[index] |= mask; + } else { + cpu[index] &= ~mask; + write[index] &= ~mask; } } else { - state_words[index] &= ~mask; - if constexpr (type == Type::CPU) { - untracked[index] &= ~mask; + UpdateProtection(index, write[index], mask); + UpdateProtection(index, read[index], mask); + write[index] &= ~mask; + if constexpr (is_dirty) { + gpu[index] |= mask; + read[index] &= ~mask; + } else { + gpu[index] &= ~mask; + read[index] |= mask; } } }); @@ -150,16 +160,15 @@ public: /** * Loop over each page in the given range, turn off those bits and notify the tracker if * needed. Call the given function on each turned off range. - * + * - If looping over CPU pages the clear flag will re-enable write protection. + * - If looping over GPU pages the clear flag will disable read protection. * @param query_cpu_range Base CPU address to loop over * @param size Size in bytes of the CPU range to loop over * @param func Function to call for each turned off region */ - template - void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { + template + void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) { std::scoped_lock lk{lock}; - static_assert(type != Type::Untracked); - std::span state_words = Span(); const size_t offset = query_cpu_range - cpu_addr; bool pending = false; @@ -170,18 +179,18 @@ public: (pending_pointer - pending_offset) * BYTES_PER_PAGE); }; IterateWords(offset, size, [&](size_t index, u64 mask) { - if constexpr (type == Type::GPU) { - mask &= ~untracked[index]; - } const u64 word = state_words[index] & mask; if constexpr (clear) { if constexpr (type == Type::CPU) { - UpdateProtection(index, untracked[index], mask); + UpdateProtection(index, write[index], mask); + write[index] &= ~mask; + } else { + UpdateProtection(index, read[index], mask); + UpdateProtection(index, write[index], mask); + read[index] |= mask; + write[index] |= mask; } state_words[index] &= ~mask; - if constexpr (type == Type::CPU) { - untracked[index] &= ~mask; - } } const size_t base_offset = index * PAGES_PER_WORD; IteratePages(word, [&](size_t pages_offset, size_t pages_size) { @@ -215,14 +224,9 @@ public: */ template [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { - static_assert(type != Type::Untracked); - const std::span state_words = Span(); bool result = false; IterateWords(offset, size, [&](size_t index, u64 mask) { - if constexpr (type == Type::GPU) { - mask &= ~untracked[index]; - } const u64 word = state_words[index] & mask; if (word != 0) { result = true; @@ -238,18 +242,19 @@ private: * Notify tracker about changes in the CPU tracking state of a word in the buffer * * @param word_index Index to the word to notify to the tracker - * @param current_bits Current state of the word - * @param new_bits New state of the word + * @param access_bits Bits of pages of word_index that are unprotected. + * @param mask Which bits from access_bits are relevant. * * @tparam add_to_tracker True when the tracker should start tracking the new pages */ - template - void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const { - u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; - VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; + template + void UpdateProtection(u64 word_index, u64 access_bits, u64 mask) const { + constexpr s32 delta = add_to_tracker ? 1 : -1; + const u64 changed_bits = (add_to_tracker ? access_bits : ~access_bits) & mask; + const VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; IteratePages(changed_bits, [&](size_t offset, size_t size) { - tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE, - add_to_tracker ? 1 : -1); + tracker->UpdatePageWatchers(addr + offset * BYTES_PER_PAGE, + size * BYTES_PER_PAGE); }); } @@ -259,8 +264,6 @@ private: return cpu; } else if constexpr (type == Type::GPU) { return gpu; - } else if constexpr (type == Type::Untracked) { - return untracked; } } @@ -270,8 +273,6 @@ private: return cpu; } else if constexpr (type == Type::GPU) { return gpu; - } else if constexpr (type == Type::Untracked) { - return untracked; } } @@ -284,7 +285,8 @@ private: VAddr cpu_addr = 0; WordsArray cpu; WordsArray gpu; - WordsArray untracked; + WordsArray write; + WordsArray read; }; } // namespace VideoCore diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 47ed9e543..35fc90143 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -3,7 +3,6 @@ #include #include -#include "common/alignment.h" #include "common/assert.h" #include "common/error.h" #include "common/signal_context.h" @@ -14,6 +13,7 @@ #ifndef _WIN64 #include +#include "common/adaptive_mutex.h" #ifdef ENABLE_USERFAULTFD #include #include @@ -22,16 +22,19 @@ #endif #else #include +#include "common/spin_lock.h" #endif namespace VideoCore { -constexpr size_t PAGESIZE = 4_KB; -constexpr size_t PAGEBITS = 12; +struct PageManager::Impl { + static constexpr size_t ADDRESS_BITS = 40; + static constexpr size_t NUM_ADDRESS_PAGES = 1ULL << (40 - PAGE_BITS); + inline static Vulkan::Rasterizer* rasterizer; #ifdef ENABLE_USERFAULTFD -struct PageManager::Impl { - Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} { + Impl(Vulkan::Rasterizer* rasterizer_) { + rasterizer = rasterizer_; uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); ASSERT_MSG(uffd != -1, "{}", Common::GetLastErrorMsg()); @@ -118,12 +121,9 @@ struct PageManager::Impl { } } - Vulkan::Rasterizer* rasterizer; std::jthread ufd_thread; int uffd; -}; #else -struct PageManager::Impl { Impl(Vulkan::Rasterizer* rasterizer_) { rasterizer = rasterizer_; @@ -141,39 +141,117 @@ struct PageManager::Impl { // No-op } - void Protect(VAddr address, size_t size, bool allow_write) { + void Protect(VAddr address, size_t size, Core::MemoryPermission perms) { auto* memory = Core::Memory::Instance(); auto& impl = memory->GetAddressSpace(); - impl.Protect(address, size, - allow_write ? Core::MemoryPermission::ReadWrite - : Core::MemoryPermission::Read); + impl.Protect(address, size, perms); } static bool GuestFaultSignalHandler(void* context, void* fault_address) { const auto addr = reinterpret_cast(fault_address); if (Common::IsWriteError(context)) { return rasterizer->InvalidateMemory(addr, 1); + } else { + return rasterizer->ReadMemory(addr, 1); } return false; } - - inline static Vulkan::Rasterizer* rasterizer; -}; #endif + template + void UpdatePageWatchers(VAddr addr, u64 size) { + std::scoped_lock lk{lock}; + std::atomic_thread_fence(std::memory_order_acquire); + + size_t page = addr >> PAGE_BITS; + auto perms = cached_pages[page].Perms(); + u64 range_begin = 0; + u64 range_bytes = 0; + + const auto release_pending = [&] { + if (range_bytes > 0) { + Protect(range_begin << PAGE_BITS, range_bytes, perms); + range_bytes = 0; + } + }; + // Iterate requested pages. + const size_t page_end = Common::DivCeil(addr + size, PAGE_SIZE); + for (; page != page_end; ++page) { + PageState& state = cached_pages[page]; + + // Apply the change to the page state. + const auto new_count = state.AddDelta(); + + // If the protection changed flush pending (un)protect action. + if (auto new_perms = state.Perms(); new_perms != perms) [[unlikely]] { + release_pending(); + perms = new_perms; + } + + // If the page must be (un)protected add it to pending range. + if ((new_count == 0 && delta < 0) || (new_count == 1 && delta > 0)) { + if (range_bytes == 0) { + range_begin = page; + } + range_bytes += PAGE_SIZE; + } else { + release_pending(); + } + } + release_pending(); + } + + struct PageState { + u8 num_write_watchers : 7; + // At the moment only buffer cache can request read watchers. + // And buffers cannot overlap, thus only 1 can exist per page. + u8 num_read_watchers : 1; + + Core::MemoryPermission WritePerm() const noexcept { + return num_write_watchers == 0 ? Core::MemoryPermission::Write + : Core::MemoryPermission::None; + } + + Core::MemoryPermission ReadPerm() const noexcept { + return num_read_watchers == 0 ? Core::MemoryPermission::Read + : Core::MemoryPermission::None; + } + + Core::MemoryPermission Perms() const noexcept { + return ReadPerm() | WritePerm(); + } + + template + u8 AddDelta() { + if constexpr (is_read) { + if constexpr (delta == 1) { + return ++num_read_watchers; + } else { + return --num_read_watchers; + } + } else { + if constexpr (delta == 1) { + return ++num_write_watchers; + } else { + return --num_write_watchers; + } + } + } + }; + + std::array cached_pages{}; +#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP + Common::AdaptiveMutex lock; +#else + Common::SpinLock lock; +#endif +}; + PageManager::PageManager(Vulkan::Rasterizer* rasterizer_) - : impl{std::make_unique(rasterizer_)}, rasterizer{rasterizer_} {} + : impl{std::make_unique(rasterizer_)} {} PageManager::~PageManager() = default; -VAddr PageManager::GetPageAddr(VAddr addr) { - return Common::AlignDown(addr, PAGESIZE); -} - -VAddr PageManager::GetNextPageAddr(VAddr addr) { - return Common::AlignUp(addr + 1, PAGESIZE); -} - void PageManager::OnGpuMap(VAddr address, size_t size) { impl->OnMap(address, size); } @@ -182,41 +260,14 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) { impl->OnUnmap(address, size); } -void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) { - static constexpr u64 PageShift = 12; - - std::scoped_lock lk{lock}; - const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1; - const u64 page_start = addr >> PageShift; - const u64 page_end = page_start + num_pages; - - const auto pages_interval = - decltype(cached_pages)::interval_type::right_open(page_start, page_end); - if (delta > 0) { - cached_pages.add({pages_interval, delta}); - } - - const auto& range = cached_pages.equal_range(pages_interval); - for (const auto& [range, count] : boost::make_iterator_range(range)) { - const auto interval = range & pages_interval; - const VAddr interval_start_addr = boost::icl::first(interval) << PageShift; - const VAddr interval_end_addr = boost::icl::last_next(interval) << PageShift; - const u32 interval_size = interval_end_addr - interval_start_addr; - ASSERT_MSG(rasterizer->IsMapped(interval_start_addr, interval_size), - "Attempted to track non-GPU memory at address {:#x}, size {:#x}.", - interval_start_addr, interval_size); - if (delta > 0 && count == delta) { - impl->Protect(interval_start_addr, interval_size, false); - } else if (delta < 0 && count == -delta) { - impl->Protect(interval_start_addr, interval_size, true); - } else { - ASSERT(count >= 0); - } - } - - if (delta < 0) { - cached_pages.add({pages_interval, delta}); - } +template +void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const { + impl->UpdatePageWatchers(addr, size); } +template void PageManager::UpdatePageWatchers<1, true>(VAddr addr, u64 size) const; +template void PageManager::UpdatePageWatchers<1, false>(VAddr addr, u64 size) const; +template void PageManager::UpdatePageWatchers<-1, true>(VAddr addr, u64 size) const; +template void PageManager::UpdatePageWatchers<-1, false>(VAddr addr, u64 size) const; + } // namespace VideoCore diff --git a/src/video_core/page_manager.h b/src/video_core/page_manager.h index f6bae9641..09938633e 100644 --- a/src/video_core/page_manager.h +++ b/src/video_core/page_manager.h @@ -4,11 +4,7 @@ #pragma once #include -#include -#ifdef __linux__ -#include "common/adaptive_mutex.h" -#endif -#include "common/spin_lock.h" +#include "common/alignment.h" #include "common/types.h" namespace Vulkan { @@ -18,6 +14,9 @@ class Rasterizer; namespace VideoCore { class PageManager { + static constexpr size_t PAGE_BITS = 12; + static constexpr size_t PAGE_SIZE = 1ULL << PAGE_BITS; + public: explicit PageManager(Vulkan::Rasterizer* rasterizer); ~PageManager(); @@ -28,22 +27,23 @@ public: /// Unregister a range of gpu memory that was unmapped. void OnGpuUnmap(VAddr address, size_t size); - /// Increase/decrease the number of surface in pages touching the specified region - void UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta); + /// Updates read/write watches in the pages touching the specified region + template + void UpdatePageWatchers(VAddr addr, u64 size) const; - static VAddr GetPageAddr(VAddr addr); - static VAddr GetNextPageAddr(VAddr addr); + /// Returns page aligned address. + static constexpr VAddr GetPageAddr(VAddr addr) { + return Common::AlignDown(addr, PAGE_SIZE); + } + + /// Returns address of the next page. + static constexpr VAddr GetNextPageAddr(VAddr addr) { + return Common::AlignUp(addr + 1, PAGE_SIZE); + } private: struct Impl; std::unique_ptr impl; - Vulkan::Rasterizer* rasterizer; - boost::icl::interval_map cached_pages; -#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP - Common::AdaptiveMutex lock; -#else - Common::SpinLock lock; -#endif }; } // namespace VideoCore diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 87d07a967..568103b47 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -925,6 +925,15 @@ bool Rasterizer::InvalidateMemory(VAddr addr, u64 size) { return true; } +bool Rasterizer::ReadMemory(VAddr addr, u64 size) { + if (!IsMapped(addr, size)) { + // Not GPU mapped memory, can skip invalidation logic entirely. + return false; + } + buffer_cache.ReadMemory(addr, size); + return true; +} + bool Rasterizer::IsMapped(VAddr addr, u64 size) { if (size == 0) { // There is no memory, so not mapped. diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2fac8c8da..4c0652c4f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -56,6 +56,7 @@ public: void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); u32 ReadDataFromGds(u32 gsd_offset); bool InvalidateMemory(VAddr addr, u64 size); + bool ReadMemory(VAddr addr, u64 size); bool IsMapped(VAddr addr, u64 size); void MapMemory(VAddr addr, u64 size); void UnmapMemory(VAddr addr, u64 size); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index d41ee57cc..a3594cb79 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -662,7 +662,7 @@ void TextureCache::TrackImage(ImageId image_id) { // Re-track the whole image image.track_addr = image_begin; image.track_addr_end = image_end; - tracker.UpdatePagesCachedCount(image_begin, image.info.guest_size, 1); + tracker.UpdatePageWatchers<1>(image_begin, image.info.guest_size); } else { if (image_begin < image.track_addr) { TrackImageHead(image_id); @@ -685,7 +685,7 @@ void TextureCache::TrackImageHead(ImageId image_id) { ASSERT(image.track_addr != 0 && image_begin < image.track_addr); const auto size = image.track_addr - image_begin; image.track_addr = image_begin; - tracker.UpdatePagesCachedCount(image_begin, size, 1); + tracker.UpdatePageWatchers<1>(image_begin, size); } void TextureCache::TrackImageTail(ImageId image_id) { @@ -701,7 +701,7 @@ void TextureCache::TrackImageTail(ImageId image_id) { const auto addr = image.track_addr_end; const auto size = image_end - image.track_addr_end; image.track_addr_end = image_end; - tracker.UpdatePagesCachedCount(addr, size, 1); + tracker.UpdatePageWatchers<1>(addr, size); } void TextureCache::UntrackImage(ImageId image_id) { @@ -714,7 +714,7 @@ void TextureCache::UntrackImage(ImageId image_id) { image.track_addr = 0; image.track_addr_end = 0; if (size != 0) { - tracker.UpdatePagesCachedCount(addr, size, -1); + tracker.UpdatePageWatchers<-1>(addr, size); } } @@ -733,7 +733,7 @@ void TextureCache::UntrackImageHead(ImageId image_id) { // Cehck its hash later. MarkAsMaybeDirty(image_id, image); } - tracker.UpdatePagesCachedCount(image_begin, size, -1); + tracker.UpdatePageWatchers<-1>(image_begin, size); } void TextureCache::UntrackImageTail(ImageId image_id) { @@ -752,7 +752,7 @@ void TextureCache::UntrackImageTail(ImageId image_id) { // Cehck its hash later. MarkAsMaybeDirty(image_id, image); } - tracker.UpdatePagesCachedCount(addr, size, -1); + tracker.UpdatePageWatchers<-1>(addr, size); } void TextureCache::DeleteImage(ImageId image_id) {