video_core: Page manager and memory tracker improvenets (#3155)

* I don't know what to put here

* clang-format

* clang-format 2.0

* Deadlock free locking

* Por boost range lock implementation

* clang-format
This commit is contained in:
Lander Gallastegi 2025-06-26 18:38:53 +02:00 committed by GitHub
parent a49b13fe66
commit 9f37ede336
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 176 additions and 42 deletions

View file

@ -683,6 +683,7 @@ set(COMMON src/common/logging/backend.cpp
src/common/path_util.h
src/common/object_pool.h
src/common/polyfill_thread.h
src/common/range_lock.h
src/common/rdtsc.cpp
src/common/rdtsc.h
src/common/recursive_lock.cpp

View file

@ -18,6 +18,9 @@ public:
void unlock() {
pthread_mutex_unlock(&mutex);
}
[[nodiscard]] bool try_lock() {
return pthread_mutex_trylock(&mutex) == 0;
}
private:
pthread_mutex_t mutex = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP;

101
src/common/range_lock.h Normal file
View file

@ -0,0 +1,101 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <iterator>
#include <mutex>
namespace Common {
// From boost thread locking
template <typename Iterator>
struct RangeLockGuard {
Iterator begin;
Iterator end;
RangeLockGuard(Iterator begin_, Iterator end_) : begin(begin_), end(end_) {
LockRange(begin, end);
}
void release() {
begin = end;
}
~RangeLockGuard() {
for (; begin != end; ++begin) {
begin->unlock();
}
}
};
template <typename Iterator>
Iterator TryLockRange(Iterator begin, Iterator end) {
using LockType = typename std::iterator_traits<Iterator>::value_type;
if (begin == end) {
return end;
}
std::unique_lock<LockType> guard(*begin, std::try_to_lock);
if (!guard.owns_lock()) {
return begin;
}
Iterator failed = TryLockRange(++begin, end);
if (failed == end) {
guard.release();
}
return failed;
}
template <typename Iterator>
void LockRange(Iterator begin, Iterator end) {
using LockType = typename std::iterator_traits<Iterator>::value_type;
if (begin == end) {
return;
}
bool start_with_begin = true;
Iterator second = begin;
++second;
Iterator next = second;
while (true) {
std::unique_lock<LockType> begin_lock(*begin, std::defer_lock);
if (start_with_begin) {
begin_lock.lock();
const Iterator failed_lock = TryLockRange(next, end);
if (failed_lock == end) {
begin_lock.release();
return;
}
start_with_begin = false;
next = failed_lock;
} else {
RangeLockGuard<Iterator> guard(next, end);
if (begin_lock.try_lock()) {
const Iterator failed_lock = TryLockRange(second, next);
if (failed_lock == next) {
begin_lock.release();
guard.release();
return;
}
start_with_begin = false;
next = failed_lock;
} else {
start_with_begin = true;
next = second;
}
}
}
}
} // namespace Common

View file

@ -16,7 +16,7 @@ namespace VideoCore {
class MemoryTracker {
public:
static constexpr size_t MAX_CPU_PAGE_BITS = 40;
static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - TRACKER_HIGHER_PAGE_BITS);
static constexpr size_t MANAGER_POOL_SIZE = 32;
public:
@ -90,11 +90,11 @@ private:
using FuncReturn = typename std::invoke_result<Func, RegionManager*, u64, size_t>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
std::size_t remaining_size{size};
std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
u64 page_offset{cpu_address & HIGHER_PAGE_MASK};
std::size_t page_index{cpu_address >> TRACKER_HIGHER_PAGE_BITS};
u64 page_offset{cpu_address & TRACKER_HIGHER_PAGE_MASK};
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
std::min<std::size_t>(TRACKER_HIGHER_PAGE_SIZE - page_offset, remaining_size)};
auto* manager{top_tier[page_index]};
if (manager) {
if constexpr (BOOL_BREAK) {
@ -123,7 +123,7 @@ private:
}
void CreateRegion(std::size_t page_index) {
const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS;
const VAddr base_cpu_addr = page_index << TRACKER_HIGHER_PAGE_BITS;
if (free_managers.empty()) {
manager_pool.emplace_back();
auto& last_pool = manager_pool.back();

View file

@ -9,13 +9,13 @@
namespace VideoCore {
constexpr u64 PAGES_PER_WORD = 64;
constexpr u64 BYTES_PER_PAGE = 4_KB;
constexpr u64 TRACKER_PAGE_BITS = 12; // 4K pages
constexpr u64 TRACKER_BYTES_PER_PAGE = 1ULL << TRACKER_PAGE_BITS;
constexpr u64 HIGHER_PAGE_BITS = 22;
constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
constexpr u64 NUM_REGION_PAGES = HIGHER_PAGE_SIZE / BYTES_PER_PAGE;
constexpr u64 TRACKER_HIGHER_PAGE_BITS = 24; // each region is 16MB
constexpr u64 TRACKER_HIGHER_PAGE_SIZE = 1ULL << TRACKER_HIGHER_PAGE_BITS;
constexpr u64 TRACKER_HIGHER_PAGE_MASK = TRACKER_HIGHER_PAGE_SIZE - 1ULL;
constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PER_PAGE;
enum class Type {
CPU,
@ -23,6 +23,6 @@ enum class Type {
Writeable,
};
using RegionBits = Common::BitArray<NUM_REGION_PAGES>;
using RegionBits = Common::BitArray<NUM_PAGES_PER_REGION>;
} // namespace VideoCore

View file

@ -83,9 +83,10 @@ public:
void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
RENDERER_TRACE;
const size_t offset = dirty_addr - cpu_addr;
const size_t start_page = SanitizeAddress(offset) / BYTES_PER_PAGE;
const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), BYTES_PER_PAGE);
if (start_page >= NUM_REGION_PAGES || end_page <= start_page) {
const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE;
const size_t end_page =
Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE);
if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) {
return;
}
std::scoped_lock lk{lock};
@ -114,9 +115,10 @@ public:
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) {
RENDERER_TRACE;
const size_t offset = query_cpu_range - cpu_addr;
const size_t start_page = SanitizeAddress(offset) / BYTES_PER_PAGE;
const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), BYTES_PER_PAGE);
if (start_page >= NUM_REGION_PAGES || end_page <= start_page) {
const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE;
const size_t end_page =
Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE);
if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) {
return;
}
std::scoped_lock lk{lock};
@ -131,7 +133,7 @@ public:
}
for (const auto& [start, end] : mask) {
func(cpu_addr + start * BYTES_PER_PAGE, (end - start) * BYTES_PER_PAGE);
func(cpu_addr + start * TRACKER_BYTES_PER_PAGE, (end - start) * TRACKER_BYTES_PER_PAGE);
}
if constexpr (clear) {
@ -151,9 +153,10 @@ public:
template <Type type>
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
RENDERER_TRACE;
const size_t start_page = SanitizeAddress(offset) / BYTES_PER_PAGE;
const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), BYTES_PER_PAGE);
if (start_page >= NUM_REGION_PAGES || end_page <= start_page) {
const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE;
const size_t end_page =
Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE);
if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) {
return false;
}
// std::scoped_lock lk{lock}; // Is this needed?

View file

@ -4,6 +4,7 @@
#include <boost/container/small_vector.hpp>
#include "common/assert.h"
#include "common/debug.h"
#include "common/range_lock.h"
#include "common/signal_context.h"
#include "core/memory.h"
#include "core/signals.h"
@ -59,6 +60,7 @@ struct PageManager::Impl {
static constexpr size_t ADDRESS_BITS = 40;
static constexpr size_t NUM_ADDRESS_PAGES = 1ULL << (40 - PAGE_BITS);
static constexpr size_t NUM_ADDRESS_LOCKS = NUM_ADDRESS_PAGES / PAGES_PER_LOCK;
inline static Vulkan::Rasterizer* rasterizer;
#ifdef ENABLE_USERFAULTFD
Impl(Vulkan::Rasterizer* rasterizer_) {
@ -191,9 +193,17 @@ struct PageManager::Impl {
RENDERER_TRACE;
size_t page = addr >> PAGE_BITS;
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
// Acquire locks for the range of pages
const auto lock_start = locks.begin() + (page / PAGES_PER_LOCK);
const auto lock_end = locks.begin() + Common::DivCeil(page_end, PAGES_PER_LOCK);
Common::RangeLockGuard lk(lock_start, lock_end);
auto perms = cached_pages[page].Perm();
u64 range_begin = 0;
u64 range_bytes = 0;
u64 potential_range_bytes = 0;
const auto release_pending = [&] {
if (range_bytes > 0) {
@ -201,13 +211,11 @@ struct PageManager::Impl {
// Perform pending (un)protect action
Protect(range_begin << PAGE_BITS, range_bytes, perms);
range_bytes = 0;
potential_range_bytes = 0;
}
};
std::scoped_lock lk(lock);
// Iterate requested pages
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
const u64 aligned_addr = page << PAGE_BITS;
const u64 aligned_end = page_end << PAGE_BITS;
ASSERT_MSG(rasterizer->IsMapped(aligned_addr, aligned_end - aligned_addr),
@ -225,14 +233,19 @@ struct PageManager::Impl {
release_pending();
perms = new_perms;
} else if (range_bytes != 0) {
// If the protection did not change, extend the current range
range_bytes += PAGE_SIZE;
// If the protection did not change, extend the potential range
potential_range_bytes += PAGE_SIZE;
}
// Only start a new range if the page must be (un)protected
if (range_bytes == 0 && ((new_count == 0 && !track) || (new_count == 1 && track))) {
if ((new_count == 0 && !track) || (new_count == 1 && track)) {
if (range_bytes == 0) {
// Start a new potential range
range_begin = page;
range_bytes = PAGE_SIZE;
potential_range_bytes = PAGE_SIZE;
}
// Extend current range up to potential range
range_bytes = potential_range_bytes;
}
}
@ -256,9 +269,12 @@ struct PageManager::Impl {
}
size_t base_page = (base_addr >> PAGE_BITS);
ASSERT(base_page % PAGES_PER_LOCK == 0);
std::scoped_lock lk(locks[base_page / PAGES_PER_LOCK]);
auto perms = cached_pages[base_page + start_range.first].Perm();
u64 range_begin = 0;
u64 range_bytes = 0;
u64 potential_range_bytes = 0;
const auto release_pending = [&] {
if (range_bytes > 0) {
@ -266,11 +282,10 @@ struct PageManager::Impl {
// Perform pending (un)protect action
Protect((range_begin << PAGE_BITS), range_bytes, perms);
range_bytes = 0;
potential_range_bytes = 0;
}
};
std::scoped_lock lk(lock);
// Iterate pages
for (size_t page = start_range.first; page < end_range.second; ++page) {
PageState& state = cached_pages[base_page + page];
@ -284,8 +299,8 @@ struct PageManager::Impl {
release_pending();
perms = new_perms;
} else if (range_bytes != 0) {
// If the protection did not change, extend the current range
range_bytes += PAGE_SIZE;
// If the protection did not change, extend the potential range
potential_range_bytes += PAGE_SIZE;
}
// If the page is not being updated, skip it
@ -293,10 +308,15 @@ struct PageManager::Impl {
continue;
}
// Only start a new range if the page must be (un)protected
if (range_bytes == 0 && ((new_count == 0 && !track) || (new_count == 1 && track))) {
// If the page must be (un)protected
if ((new_count == 0 && !track) || (new_count == 1 && track)) {
if (range_bytes == 0) {
// Start a new potential range
range_begin = base_page + page;
range_bytes = PAGE_SIZE;
potential_range_bytes = PAGE_SIZE;
}
// Extend current rango up to potential range
range_bytes = potential_range_bytes;
}
}
@ -306,10 +326,11 @@ struct PageManager::Impl {
std::array<PageState, NUM_ADDRESS_PAGES> cached_pages{};
#ifdef __linux__
Common::AdaptiveMutex lock;
using LockType = Common::AdaptiveMutex;
#else
Common::SpinLock lock;
using LockType = Common::SpinLock;
#endif
std::array<LockType, NUM_ADDRESS_LOCKS> locks{};
};
PageManager::PageManager(Vulkan::Rasterizer* rasterizer_)

View file

@ -15,8 +15,13 @@ class Rasterizer;
namespace VideoCore {
class PageManager {
static constexpr size_t PAGE_BITS = 12;
static constexpr size_t PAGE_SIZE = 1ULL << PAGE_BITS;
// Use the same page size as the tracker.
static constexpr size_t PAGE_BITS = TRACKER_PAGE_BITS;
static constexpr size_t PAGE_SIZE = TRACKER_BYTES_PER_PAGE;
// Keep the lock granularity the same as region granularity. (since each regions has
// itself a lock)
static constexpr size_t PAGES_PER_LOCK = NUM_PAGES_PER_REGION;
public:
explicit PageManager(Vulkan::Rasterizer* rasterizer);