diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 0088ea4fa..3ac6a3598 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -54,18 +54,10 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s BufferCache::~BufferCache() = default; void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { - std::scoped_lock lk{mutex}; const bool is_tracked = IsRegionRegistered(device_addr, size); - if (!is_tracked) { - return; - } - // Mark the page as CPU modified to stop tracking writes. - SCOPE_EXIT { + if (is_tracked) { + // Mark the page as CPU modified to stop tracking writes. memory_tracker.MarkRegionAsCpuModified(device_addr, size); - }; - if (!memory_tracker.IsRegionGpuModified(device_addr, size)) { - // Page has not been modified by the GPU, nothing to do. - return; } } @@ -346,6 +338,7 @@ bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) { ++page; continue; } + std::shared_lock lk{mutex}; Buffer& buffer = slot_buffers[buffer_id]; const VAddr buf_start_addr = buffer.CpuAddr(); const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); @@ -496,8 +489,11 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { wanted_size = static_cast(device_addr_end - device_addr); const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size); const u32 size = static_cast(overlap.end - overlap.begin); - const BufferId new_buffer_id = slot_buffers.insert( - instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size); + const BufferId new_buffer_id = [&] { + std::scoped_lock lk{mutex}; + return slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, + AllFlags, size); + }(); auto& new_buffer = slot_buffers[new_buffer_id]; const size_t size_bytes = new_buffer.SizeBytes(); const auto cmdbuf = scheduler.CommandBuffer(); @@ -537,10 +533,8 @@ void BufferCache::ChangeRegister(BufferId buffer_id) { void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer) { - std::scoped_lock lk{mutex}; boost::container::small_vector copies; u64 total_size_bytes = 0; - u64 largest_copy = 0; VAddr buffer_start = buffer.CpuAddr(); memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { copies.push_back(vk::BufferCopy{ @@ -549,7 +543,6 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, .size = range_size, }); total_size_bytes += range_size; - largest_copy = std::max(largest_copy, range_size); }); SCOPE_EXIT { if (is_texel_buffer) { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 0c70fa10b..c367795f1 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -3,7 +3,7 @@ #pragma once -#include +#include #include #include #include @@ -157,7 +157,7 @@ private: StreamBuffer staging_buffer; StreamBuffer stream_buffer; Buffer gds_buffer; - std::mutex mutex; + std::shared_mutex mutex; Common::SlotVector slot_buffers; RangeSet gpu_modified_ranges; vk::BufferView null_buffer_view; diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h index ae61b55f2..d9166b11c 100644 --- a/src/video_core/buffer_cache/memory_tracker_base.h +++ b/src/video_core/buffer_cache/memory_tracker_base.h @@ -15,13 +15,8 @@ namespace VideoCore { class MemoryTracker { public: static constexpr size_t MAX_CPU_PAGE_BITS = 40; - static constexpr size_t HIGHER_PAGE_BITS = 22; - static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; - static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); static constexpr size_t MANAGER_POOL_SIZE = 32; - static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; - using Manager = WordManager; public: explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {} @@ -30,7 +25,7 @@ public: /// Returns true if a region has been modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { return IteratePages( - query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { + query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { return manager->template IsRegionModified(offset, size); }); } @@ -38,52 +33,34 @@ public: /// Returns true if a region has been modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { return IteratePages( - query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { + query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { return manager->template IsRegionModified(offset, size); }); } /// Mark region as CPU modified, notifying the device_tracker about this change void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); - } - - /// Unmark region as CPU modified, notifying the device_tracker about this change - void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); + IteratePages(dirty_cpu_addr, query_size, + [](RegionManager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); } /// Mark region as modified from the host GPU void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); - } - - /// Unmark region as modified from the host GPU - void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { - IteratePages(dirty_cpu_addr, query_size, - [](Manager* manager, u64 offset, size_t size) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); - }); + IteratePages(dirty_cpu_addr, query_size, + [](RegionManager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); } /// Call 'func' for each CPU modified range and unmark those pages as CPU modified template void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { IteratePages(query_cpu_range, query_size, - [&func](Manager* manager, u64 offset, size_t size) { + [&func](RegionManager* manager, u64 offset, size_t size) { manager->template ForEachModifiedRange( manager->GetCpuAddr() + offset, size, func); }); @@ -93,7 +70,7 @@ public: template void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { IteratePages(query_cpu_range, query_size, - [&func](Manager* manager, u64 offset, size_t size) { + [&func](RegionManager* manager, u64 offset, size_t size) { if constexpr (clear) { manager->template ForEachModifiedRange( manager->GetCpuAddr() + offset, size, func); @@ -114,7 +91,7 @@ private: */ template bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; + using FuncReturn = typename std::invoke_result::type; static constexpr bool BOOL_BREAK = std::is_same_v; std::size_t remaining_size{size}; std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; @@ -155,7 +132,7 @@ private: manager_pool.emplace_back(); auto& last_pool = manager_pool.back(); for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { - std::construct_at(&last_pool[i], tracker, 0, HIGHER_PAGE_SIZE); + std::construct_at(&last_pool[i], tracker, 0); free_managers.push_back(&last_pool[i]); } } @@ -167,9 +144,9 @@ private: } PageManager* tracker; - std::deque> manager_pool; - std::vector free_managers; - std::array top_tier{}; + std::deque> manager_pool; + std::vector free_managers; + std::array top_tier{}; }; } // namespace VideoCore diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index ae85d1eb1..7ad33d7a6 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h @@ -3,10 +3,12 @@ #pragma once -#include +#include +#include #include #include -#include "common/div_ceil.h" + +#include "common/spin_lock.h" #include "common/types.h" #include "video_core/page_manager.h" @@ -16,135 +18,32 @@ constexpr u64 PAGES_PER_WORD = 64; constexpr u64 BYTES_PER_PAGE = 4_KB; constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; +constexpr u64 HIGHER_PAGE_BITS = 22; +constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; +constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; +constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD; + enum class Type { CPU, GPU, Untracked, }; -/// Vector tracking modified pages tightly packed with small vector optimization -template -struct WordsArray { - /// Returns the pointer to the words state - [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { - return is_short ? stack.data() : heap; - } +using WordsArray = std::array; - /// Returns the pointer to the words state - [[nodiscard]] u64* Pointer(bool is_short) noexcept { - return is_short ? stack.data() : heap; - } - - std::array stack{}; ///< Small buffers storage - u64* heap; ///< Not-small buffers pointer to the storage -}; - -template -struct Words { - explicit Words() = default; - explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { - num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD); - if (IsShort()) { - cpu.stack.fill(~u64{0}); - gpu.stack.fill(0); - untracked.stack.fill(~u64{0}); - } else { - // Share allocation between CPU and GPU pages and set their default values - u64* const alloc = new u64[num_words * 3]; - cpu.heap = alloc; - gpu.heap = alloc + num_words; - untracked.heap = alloc + num_words * 2; - std::fill_n(cpu.heap, num_words, ~u64{0}); - std::fill_n(gpu.heap, num_words, 0); - std::fill_n(untracked.heap, num_words, ~u64{0}); - } - // Clean up tailing bits - const u64 last_word_size = size_bytes % BYTES_PER_WORD; - const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); - const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; - const u64 last_word = (~u64{0} << shift) >> shift; - cpu.Pointer(IsShort())[NumWords() - 1] = last_word; - untracked.Pointer(IsShort())[NumWords() - 1] = last_word; - } - - ~Words() { - Release(); - } - - Words& operator=(Words&& rhs) noexcept { - Release(); - size_bytes = rhs.size_bytes; - num_words = rhs.num_words; - cpu = rhs.cpu; - gpu = rhs.gpu; - untracked = rhs.untracked; - rhs.cpu.heap = nullptr; - return *this; - } - - Words(Words&& rhs) noexcept - : size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu}, - untracked{rhs.untracked} { - rhs.cpu.heap = nullptr; - } - - Words& operator=(const Words&) = delete; - Words(const Words&) = delete; - - /// Returns true when the buffer fits in the small vector optimization - [[nodiscard]] bool IsShort() const noexcept { - return num_words <= stack_words; - } - - /// Returns the number of words of the buffer - [[nodiscard]] size_t NumWords() const noexcept { - return num_words; - } - - /// Release buffer resources - void Release() { - if (!IsShort()) { - // CPU written words is the base for the heap allocation - delete[] cpu.heap; - } - } - - template - std::span Span() noexcept { - if constexpr (type == Type::CPU) { - return std::span(cpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::GPU) { - return std::span(gpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::Untracked) { - return std::span(untracked.Pointer(IsShort()), num_words); - } - } - - template - std::span Span() const noexcept { - if constexpr (type == Type::CPU) { - return std::span(cpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::GPU) { - return std::span(gpu.Pointer(IsShort()), num_words); - } else if constexpr (type == Type::Untracked) { - return std::span(untracked.Pointer(IsShort()), num_words); - } - } - - u64 size_bytes = 0; - size_t num_words = 0; - WordsArray cpu; - WordsArray gpu; - WordsArray untracked; -}; - -template -class WordManager { +/** + * Allows tracking CPU and GPU modification of pages in a contigious 4MB virtual address region. + * Information is stored in bitsets for spacial locality and fast update of single pages. + */ +class RegionManager { public: - explicit WordManager(PageManager* tracker_, VAddr cpu_addr_, u64 size_bytes) - : tracker{tracker_}, cpu_addr{cpu_addr_}, words{size_bytes} {} - - explicit WordManager() = default; + explicit RegionManager(PageManager* tracker_, VAddr cpu_addr_) + : tracker{tracker_}, cpu_addr{cpu_addr_} { + cpu.fill(~u64{0}); + gpu.fill(0); + untracked.fill(~u64{0}); + } + explicit RegionManager() = default; void SetCpuAddress(VAddr new_cpu_addr) { cpu_addr = new_cpu_addr; @@ -175,12 +74,12 @@ public: static constexpr bool BOOL_BREAK = std::is_same_v; const size_t start = static_cast(std::max(static_cast(offset), 0LL)); const size_t end = static_cast(std::max(static_cast(offset + size), 0LL)); - if (start >= SizeBytes() || end <= start) { + if (start >= HIGHER_PAGE_SIZE || end <= start) { return; } auto [start_word, start_page] = GetWordPage(start); auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL); - const size_t num_words = NumWords(); + constexpr size_t num_words = NUM_REGION_WORDS; start_word = std::min(start_word, num_words); end_word = std::min(end_word, num_words); const size_t diff = end_word - start_word; @@ -225,21 +124,21 @@ public: */ template void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { - std::span state_words = words.template Span(); - [[maybe_unused]] std::span untracked_words = words.template Span(); + std::scoped_lock lk{lock}; + std::span state_words = Span(); IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) { if constexpr (type == Type::CPU) { - NotifyPageTracker(index, untracked_words[index], mask); + UpdateProtection(index, untracked[index], mask); } if constexpr (enable) { state_words[index] |= mask; if constexpr (type == Type::CPU) { - untracked_words[index] |= mask; + untracked[index] |= mask; } } else { state_words[index] &= ~mask; if constexpr (type == Type::CPU) { - untracked_words[index] &= ~mask; + untracked[index] &= ~mask; } } }); @@ -255,10 +154,10 @@ public: */ template void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { + std::scoped_lock lk{lock}; static_assert(type != Type::Untracked); - std::span state_words = words.template Span(); - [[maybe_unused]] std::span untracked_words = words.template Span(); + std::span state_words = Span(); const size_t offset = query_cpu_range - cpu_addr; bool pending = false; size_t pending_offset{}; @@ -269,16 +168,16 @@ public: }; IterateWords(offset, size, [&](size_t index, u64 mask) { if constexpr (type == Type::GPU) { - mask &= ~untracked_words[index]; + mask &= ~untracked[index]; } const u64 word = state_words[index] & mask; if constexpr (clear) { if constexpr (type == Type::CPU) { - NotifyPageTracker(index, untracked_words[index], mask); + UpdateProtection(index, untracked[index], mask); } state_words[index] &= ~mask; if constexpr (type == Type::CPU) { - untracked_words[index] &= ~mask; + untracked[index] &= ~mask; } } const size_t base_offset = index * PAGES_PER_WORD; @@ -315,13 +214,11 @@ public: [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { static_assert(type != Type::Untracked); - const std::span state_words = words.template Span(); - [[maybe_unused]] const std::span untracked_words = - words.template Span(); + const std::span state_words = Span(); bool result = false; IterateWords(offset, size, [&](size_t index, u64 mask) { if constexpr (type == Type::GPU) { - mask &= ~untracked_words[index]; + mask &= ~untracked[index]; } const u64 word = state_words[index] & mask; if (word != 0) { @@ -333,44 +230,7 @@ public: return result; } - /// Returns the number of words of the manager - [[nodiscard]] size_t NumWords() const noexcept { - return words.NumWords(); - } - - /// Returns the size in bytes of the manager - [[nodiscard]] u64 SizeBytes() const noexcept { - return words.size_bytes; - } - - /// Returns true when the buffer fits in the small vector optimization - [[nodiscard]] bool IsShort() const noexcept { - return words.IsShort(); - } - private: - template - u64* Array() noexcept { - if constexpr (type == Type::CPU) { - return words.cpu.Pointer(IsShort()); - } else if constexpr (type == Type::GPU) { - return words.gpu.Pointer(IsShort()); - } else if constexpr (type == Type::Untracked) { - return words.untracked.Pointer(IsShort()); - } - } - - template - const u64* Array() const noexcept { - if constexpr (type == Type::CPU) { - return words.cpu.Pointer(IsShort()); - } else if constexpr (type == Type::GPU) { - return words.gpu.Pointer(IsShort()); - } else if constexpr (type == Type::Untracked) { - return words.untracked.Pointer(IsShort()); - } - } - /** * Notify tracker about changes in the CPU tracking state of a word in the buffer * @@ -381,7 +241,7 @@ private: * @tparam add_to_tracker True when the tracker should start tracking the new pages */ template - void NotifyPageTracker(u64 word_index, u64 current_bits, u64 new_bits) const { + void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const { u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; IteratePages(changed_bits, [&](size_t offset, size_t size) { @@ -390,9 +250,34 @@ private: }); } + template + std::span Span() noexcept { + if constexpr (type == Type::CPU) { + return cpu; + } else if constexpr (type == Type::GPU) { + return gpu; + } else if constexpr (type == Type::Untracked) { + return untracked; + } + } + + template + std::span Span() const noexcept { + if constexpr (type == Type::CPU) { + return cpu; + } else if constexpr (type == Type::GPU) { + return gpu; + } else if constexpr (type == Type::Untracked) { + return untracked; + } + } + + Common::SpinLock lock; PageManager* tracker; VAddr cpu_addr = 0; - Words words; + WordsArray cpu; + WordsArray gpu; + WordsArray untracked; }; } // namespace VideoCore diff --git a/src/video_core/multi_level_page_table.h b/src/video_core/multi_level_page_table.h index 527476f3b..7f3205e1a 100644 --- a/src/video_core/multi_level_page_table.h +++ b/src/video_core/multi_level_page_table.h @@ -39,6 +39,15 @@ public: return &(*first_level_map[l1_page])[l2_page]; } + [[nodiscard]] const Entry* find(size_t page) const { + const size_t l1_page = page >> SecondLevelBits; + const size_t l2_page = page & (NumEntriesPerL1Page - 1); + if (!first_level_map[l1_page]) { + return nullptr; + } + return &(*first_level_map[l1_page])[l2_page]; + } + [[nodiscard]] const Entry& operator[](size_t page) const { const size_t l1_page = page >> SecondLevelBits; const size_t l2_page = page & (NumEntriesPerL1Page - 1); diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 556555c25..47ed9e543 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -185,7 +185,7 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) { void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) { static constexpr u64 PageShift = 12; - std::scoped_lock lk{mutex}; + std::scoped_lock lk{lock}; const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1; const u64 page_start = addr >> PageShift; const u64 page_end = page_start + num_pages; diff --git a/src/video_core/page_manager.h b/src/video_core/page_manager.h index 29a946a8f..f44307f92 100644 --- a/src/video_core/page_manager.h +++ b/src/video_core/page_manager.h @@ -4,8 +4,8 @@ #pragma once #include -#include #include +#include "common/spin_lock.h" #include "common/types.h" namespace Vulkan { @@ -35,8 +35,8 @@ private: struct Impl; std::unique_ptr impl; Vulkan::Rasterizer* rasterizer; - std::mutex mutex; boost::icl::interval_map cached_pages; + Common::SpinLock lock; }; } // namespace VideoCore