diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d50e3bf4..ab846fa9d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -683,6 +683,7 @@ set(COMMON src/common/logging/backend.cpp src/common/path_util.h src/common/object_pool.h src/common/polyfill_thread.h + src/common/range_lock.h src/common/rdtsc.cpp src/common/rdtsc.h src/common/recursive_lock.cpp diff --git a/src/common/adaptive_mutex.h b/src/common/adaptive_mutex.h index f174f5996..2ab385bdb 100644 --- a/src/common/adaptive_mutex.h +++ b/src/common/adaptive_mutex.h @@ -18,6 +18,9 @@ public: void unlock() { pthread_mutex_unlock(&mutex); } + [[nodiscard]] bool try_lock() { + return pthread_mutex_trylock(&mutex) == 0; + } private: pthread_mutex_t mutex = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP; diff --git a/src/common/range_lock.h b/src/common/range_lock.h new file mode 100644 index 000000000..efe6eb549 --- /dev/null +++ b/src/common/range_lock.h @@ -0,0 +1,101 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +namespace Common { + +// From boost thread locking + +template +struct RangeLockGuard { + Iterator begin; + Iterator end; + + RangeLockGuard(Iterator begin_, Iterator end_) : begin(begin_), end(end_) { + LockRange(begin, end); + } + + void release() { + begin = end; + } + + ~RangeLockGuard() { + for (; begin != end; ++begin) { + begin->unlock(); + } + } +}; + +template +Iterator TryLockRange(Iterator begin, Iterator end) { + using LockType = typename std::iterator_traits::value_type; + + if (begin == end) { + return end; + } + + std::unique_lock guard(*begin, std::try_to_lock); + if (!guard.owns_lock()) { + return begin; + } + + Iterator failed = TryLockRange(++begin, end); + if (failed == end) { + guard.release(); + } + + return failed; +} + +template +void LockRange(Iterator begin, Iterator end) { + using LockType = typename std::iterator_traits::value_type; + + if (begin == end) { + return; + } + + bool start_with_begin = true; + Iterator second = begin; + ++second; + Iterator next = second; + + while (true) { + std::unique_lock begin_lock(*begin, std::defer_lock); + if (start_with_begin) { + begin_lock.lock(); + + const Iterator failed_lock = TryLockRange(next, end); + if (failed_lock == end) { + begin_lock.release(); + return; + } + + start_with_begin = false; + next = failed_lock; + } else { + RangeLockGuard guard(next, end); + + if (begin_lock.try_lock()) { + const Iterator failed_lock = TryLockRange(second, next); + if (failed_lock == next) { + begin_lock.release(); + guard.release(); + return; + } + + start_with_begin = false; + next = failed_lock; + } else { + start_with_begin = true; + next = second; + } + } + } +} + +} // namespace Common \ No newline at end of file diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index 37fafa2d6..3dbffdabd 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -16,7 +16,7 @@ namespace VideoCore { class MemoryTracker { public: static constexpr size_t MAX_CPU_PAGE_BITS = 40; - static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); + static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - TRACKER_HIGHER_PAGE_BITS); static constexpr size_t MANAGER_POOL_SIZE = 32; public: @@ -90,11 +90,11 @@ private: using FuncReturn = typename std::invoke_result::type; static constexpr bool BOOL_BREAK = std::is_same_v; std::size_t remaining_size{size}; - std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; - u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; + std::size_t page_index{cpu_address >> TRACKER_HIGHER_PAGE_BITS}; + u64 page_offset{cpu_address & TRACKER_HIGHER_PAGE_MASK}; while (remaining_size > 0) { const std::size_t copy_amount{ - std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)}; + std::min(TRACKER_HIGHER_PAGE_SIZE - page_offset, remaining_size)}; auto* manager{top_tier[page_index]}; if (manager) { if constexpr (BOOL_BREAK) { @@ -123,7 +123,7 @@ private: } void CreateRegion(std::size_t page_index) { - const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS; + const VAddr base_cpu_addr = page_index << TRACKER_HIGHER_PAGE_BITS; if (free_managers.empty()) { manager_pool.emplace_back(); auto& last_pool = manager_pool.back(); diff --git a/src/video_core/buffer_cache/region_definitions.h b/src/video_core/buffer_cache/region_definitions.h index 80c6afdc6..f035704d9 100644 --- a/src/video_core/buffer_cache/region_definitions.h +++ b/src/video_core/buffer_cache/region_definitions.h @@ -9,13 +9,13 @@ namespace VideoCore { -constexpr u64 PAGES_PER_WORD = 64; -constexpr u64 BYTES_PER_PAGE = 4_KB; +constexpr u64 TRACKER_PAGE_BITS = 12; // 4K pages +constexpr u64 TRACKER_BYTES_PER_PAGE = 1ULL << TRACKER_PAGE_BITS; -constexpr u64 HIGHER_PAGE_BITS = 22; -constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; -constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; -constexpr u64 NUM_REGION_PAGES = HIGHER_PAGE_SIZE / BYTES_PER_PAGE; +constexpr u64 TRACKER_HIGHER_PAGE_BITS = 24; // each region is 16MB +constexpr u64 TRACKER_HIGHER_PAGE_SIZE = 1ULL << TRACKER_HIGHER_PAGE_BITS; +constexpr u64 TRACKER_HIGHER_PAGE_MASK = TRACKER_HIGHER_PAGE_SIZE - 1ULL; +constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PER_PAGE; enum class Type { CPU, @@ -23,6 +23,6 @@ enum class Type { Writeable, }; -using RegionBits = Common::BitArray; +using RegionBits = Common::BitArray; } // namespace VideoCore \ No newline at end of file diff --git a/src/video_core/buffer_cache/region_manager.h b/src/video_core/buffer_cache/region_manager.h index 07ffee36b..e8ec21129 100644 --- a/src/video_core/buffer_cache/region_manager.h +++ b/src/video_core/buffer_cache/region_manager.h @@ -83,9 +83,10 @@ public: void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { RENDERER_TRACE; const size_t offset = dirty_addr - cpu_addr; - const size_t start_page = SanitizeAddress(offset) / BYTES_PER_PAGE; - const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), BYTES_PER_PAGE); - if (start_page >= NUM_REGION_PAGES || end_page <= start_page) { + const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE; + const size_t end_page = + Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE); + if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) { return; } std::scoped_lock lk{lock}; @@ -114,9 +115,10 @@ public: void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) { RENDERER_TRACE; const size_t offset = query_cpu_range - cpu_addr; - const size_t start_page = SanitizeAddress(offset) / BYTES_PER_PAGE; - const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), BYTES_PER_PAGE); - if (start_page >= NUM_REGION_PAGES || end_page <= start_page) { + const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE; + const size_t end_page = + Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE); + if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) { return; } std::scoped_lock lk{lock}; @@ -131,7 +133,7 @@ public: } for (const auto& [start, end] : mask) { - func(cpu_addr + start * BYTES_PER_PAGE, (end - start) * BYTES_PER_PAGE); + func(cpu_addr + start * TRACKER_BYTES_PER_PAGE, (end - start) * TRACKER_BYTES_PER_PAGE); } if constexpr (clear) { @@ -151,9 +153,10 @@ public: template [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { RENDERER_TRACE; - const size_t start_page = SanitizeAddress(offset) / BYTES_PER_PAGE; - const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), BYTES_PER_PAGE); - if (start_page >= NUM_REGION_PAGES || end_page <= start_page) { + const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE; + const size_t end_page = + Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE); + if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) { return false; } // std::scoped_lock lk{lock}; // Is this needed? diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 145779070..15dbf909c 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -4,6 +4,7 @@ #include #include "common/assert.h" #include "common/debug.h" +#include "common/range_lock.h" #include "common/signal_context.h" #include "core/memory.h" #include "core/signals.h" @@ -59,6 +60,7 @@ struct PageManager::Impl { static constexpr size_t ADDRESS_BITS = 40; static constexpr size_t NUM_ADDRESS_PAGES = 1ULL << (40 - PAGE_BITS); + static constexpr size_t NUM_ADDRESS_LOCKS = NUM_ADDRESS_PAGES / PAGES_PER_LOCK; inline static Vulkan::Rasterizer* rasterizer; #ifdef ENABLE_USERFAULTFD Impl(Vulkan::Rasterizer* rasterizer_) { @@ -191,9 +193,17 @@ struct PageManager::Impl { RENDERER_TRACE; size_t page = addr >> PAGE_BITS; + const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); + + // Acquire locks for the range of pages + const auto lock_start = locks.begin() + (page / PAGES_PER_LOCK); + const auto lock_end = locks.begin() + Common::DivCeil(page_end, PAGES_PER_LOCK); + Common::RangeLockGuard lk(lock_start, lock_end); + auto perms = cached_pages[page].Perm(); u64 range_begin = 0; u64 range_bytes = 0; + u64 potential_range_bytes = 0; const auto release_pending = [&] { if (range_bytes > 0) { @@ -201,13 +211,11 @@ struct PageManager::Impl { // Perform pending (un)protect action Protect(range_begin << PAGE_BITS, range_bytes, perms); range_bytes = 0; + potential_range_bytes = 0; } }; - std::scoped_lock lk(lock); - // Iterate requested pages - const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); const u64 aligned_addr = page << PAGE_BITS; const u64 aligned_end = page_end << PAGE_BITS; ASSERT_MSG(rasterizer->IsMapped(aligned_addr, aligned_end - aligned_addr), @@ -225,14 +233,19 @@ struct PageManager::Impl { release_pending(); perms = new_perms; } else if (range_bytes != 0) { - // If the protection did not change, extend the current range - range_bytes += PAGE_SIZE; + // If the protection did not change, extend the potential range + potential_range_bytes += PAGE_SIZE; } // Only start a new range if the page must be (un)protected - if (range_bytes == 0 && ((new_count == 0 && !track) || (new_count == 1 && track))) { - range_begin = page; - range_bytes = PAGE_SIZE; + if ((new_count == 0 && !track) || (new_count == 1 && track)) { + if (range_bytes == 0) { + // Start a new potential range + range_begin = page; + potential_range_bytes = PAGE_SIZE; + } + // Extend current range up to potential range + range_bytes = potential_range_bytes; } } @@ -256,9 +269,12 @@ struct PageManager::Impl { } size_t base_page = (base_addr >> PAGE_BITS); + ASSERT(base_page % PAGES_PER_LOCK == 0); + std::scoped_lock lk(locks[base_page / PAGES_PER_LOCK]); auto perms = cached_pages[base_page + start_range.first].Perm(); u64 range_begin = 0; u64 range_bytes = 0; + u64 potential_range_bytes = 0; const auto release_pending = [&] { if (range_bytes > 0) { @@ -266,11 +282,10 @@ struct PageManager::Impl { // Perform pending (un)protect action Protect((range_begin << PAGE_BITS), range_bytes, perms); range_bytes = 0; + potential_range_bytes = 0; } }; - std::scoped_lock lk(lock); - // Iterate pages for (size_t page = start_range.first; page < end_range.second; ++page) { PageState& state = cached_pages[base_page + page]; @@ -284,8 +299,8 @@ struct PageManager::Impl { release_pending(); perms = new_perms; } else if (range_bytes != 0) { - // If the protection did not change, extend the current range - range_bytes += PAGE_SIZE; + // If the protection did not change, extend the potential range + potential_range_bytes += PAGE_SIZE; } // If the page is not being updated, skip it @@ -293,10 +308,15 @@ struct PageManager::Impl { continue; } - // Only start a new range if the page must be (un)protected - if (range_bytes == 0 && ((new_count == 0 && !track) || (new_count == 1 && track))) { - range_begin = base_page + page; - range_bytes = PAGE_SIZE; + // If the page must be (un)protected + if ((new_count == 0 && !track) || (new_count == 1 && track)) { + if (range_bytes == 0) { + // Start a new potential range + range_begin = base_page + page; + potential_range_bytes = PAGE_SIZE; + } + // Extend current rango up to potential range + range_bytes = potential_range_bytes; } } @@ -306,10 +326,11 @@ struct PageManager::Impl { std::array cached_pages{}; #ifdef __linux__ - Common::AdaptiveMutex lock; + using LockType = Common::AdaptiveMutex; #else - Common::SpinLock lock; + using LockType = Common::SpinLock; #endif + std::array locks{}; }; PageManager::PageManager(Vulkan::Rasterizer* rasterizer_) diff --git a/src/video_core/page_manager.h b/src/video_core/page_manager.h index 157b34984..561087ead 100644 --- a/src/video_core/page_manager.h +++ b/src/video_core/page_manager.h @@ -15,8 +15,13 @@ class Rasterizer; namespace VideoCore { class PageManager { - static constexpr size_t PAGE_BITS = 12; - static constexpr size_t PAGE_SIZE = 1ULL << PAGE_BITS; + // Use the same page size as the tracker. + static constexpr size_t PAGE_BITS = TRACKER_PAGE_BITS; + static constexpr size_t PAGE_SIZE = TRACKER_BYTES_PER_PAGE; + + // Keep the lock granularity the same as region granularity. (since each regions has + // itself a lock) + static constexpr size_t PAGES_PER_LOCK = NUM_PAGES_PER_REGION; public: explicit PageManager(Vulkan::Rasterizer* rasterizer);