mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-12 12:45:56 +00:00
video_core: Page manager and memory tracker improvenets (#3155)
* I don't know what to put here * clang-format * clang-format 2.0 * Deadlock free locking * Por boost range lock implementation * clang-format
This commit is contained in:
parent
a49b13fe66
commit
9f37ede336
8 changed files with 176 additions and 42 deletions
|
@ -683,6 +683,7 @@ set(COMMON src/common/logging/backend.cpp
|
|||
src/common/path_util.h
|
||||
src/common/object_pool.h
|
||||
src/common/polyfill_thread.h
|
||||
src/common/range_lock.h
|
||||
src/common/rdtsc.cpp
|
||||
src/common/rdtsc.h
|
||||
src/common/recursive_lock.cpp
|
||||
|
|
|
@ -18,6 +18,9 @@ public:
|
|||
void unlock() {
|
||||
pthread_mutex_unlock(&mutex);
|
||||
}
|
||||
[[nodiscard]] bool try_lock() {
|
||||
return pthread_mutex_trylock(&mutex) == 0;
|
||||
}
|
||||
|
||||
private:
|
||||
pthread_mutex_t mutex = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP;
|
||||
|
|
101
src/common/range_lock.h
Normal file
101
src/common/range_lock.h
Normal file
|
@ -0,0 +1,101 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iterator>
|
||||
#include <mutex>
|
||||
|
||||
namespace Common {
|
||||
|
||||
// From boost thread locking
|
||||
|
||||
template <typename Iterator>
|
||||
struct RangeLockGuard {
|
||||
Iterator begin;
|
||||
Iterator end;
|
||||
|
||||
RangeLockGuard(Iterator begin_, Iterator end_) : begin(begin_), end(end_) {
|
||||
LockRange(begin, end);
|
||||
}
|
||||
|
||||
void release() {
|
||||
begin = end;
|
||||
}
|
||||
|
||||
~RangeLockGuard() {
|
||||
for (; begin != end; ++begin) {
|
||||
begin->unlock();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Iterator>
|
||||
Iterator TryLockRange(Iterator begin, Iterator end) {
|
||||
using LockType = typename std::iterator_traits<Iterator>::value_type;
|
||||
|
||||
if (begin == end) {
|
||||
return end;
|
||||
}
|
||||
|
||||
std::unique_lock<LockType> guard(*begin, std::try_to_lock);
|
||||
if (!guard.owns_lock()) {
|
||||
return begin;
|
||||
}
|
||||
|
||||
Iterator failed = TryLockRange(++begin, end);
|
||||
if (failed == end) {
|
||||
guard.release();
|
||||
}
|
||||
|
||||
return failed;
|
||||
}
|
||||
|
||||
template <typename Iterator>
|
||||
void LockRange(Iterator begin, Iterator end) {
|
||||
using LockType = typename std::iterator_traits<Iterator>::value_type;
|
||||
|
||||
if (begin == end) {
|
||||
return;
|
||||
}
|
||||
|
||||
bool start_with_begin = true;
|
||||
Iterator second = begin;
|
||||
++second;
|
||||
Iterator next = second;
|
||||
|
||||
while (true) {
|
||||
std::unique_lock<LockType> begin_lock(*begin, std::defer_lock);
|
||||
if (start_with_begin) {
|
||||
begin_lock.lock();
|
||||
|
||||
const Iterator failed_lock = TryLockRange(next, end);
|
||||
if (failed_lock == end) {
|
||||
begin_lock.release();
|
||||
return;
|
||||
}
|
||||
|
||||
start_with_begin = false;
|
||||
next = failed_lock;
|
||||
} else {
|
||||
RangeLockGuard<Iterator> guard(next, end);
|
||||
|
||||
if (begin_lock.try_lock()) {
|
||||
const Iterator failed_lock = TryLockRange(second, next);
|
||||
if (failed_lock == next) {
|
||||
begin_lock.release();
|
||||
guard.release();
|
||||
return;
|
||||
}
|
||||
|
||||
start_with_begin = false;
|
||||
next = failed_lock;
|
||||
} else {
|
||||
start_with_begin = true;
|
||||
next = second;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Common
|
|
@ -16,7 +16,7 @@ namespace VideoCore {
|
|||
class MemoryTracker {
|
||||
public:
|
||||
static constexpr size_t MAX_CPU_PAGE_BITS = 40;
|
||||
static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
|
||||
static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - TRACKER_HIGHER_PAGE_BITS);
|
||||
static constexpr size_t MANAGER_POOL_SIZE = 32;
|
||||
|
||||
public:
|
||||
|
@ -90,11 +90,11 @@ private:
|
|||
using FuncReturn = typename std::invoke_result<Func, RegionManager*, u64, size_t>::type;
|
||||
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||
std::size_t remaining_size{size};
|
||||
std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
|
||||
u64 page_offset{cpu_address & HIGHER_PAGE_MASK};
|
||||
std::size_t page_index{cpu_address >> TRACKER_HIGHER_PAGE_BITS};
|
||||
u64 page_offset{cpu_address & TRACKER_HIGHER_PAGE_MASK};
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount{
|
||||
std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
|
||||
std::min<std::size_t>(TRACKER_HIGHER_PAGE_SIZE - page_offset, remaining_size)};
|
||||
auto* manager{top_tier[page_index]};
|
||||
if (manager) {
|
||||
if constexpr (BOOL_BREAK) {
|
||||
|
@ -123,7 +123,7 @@ private:
|
|||
}
|
||||
|
||||
void CreateRegion(std::size_t page_index) {
|
||||
const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS;
|
||||
const VAddr base_cpu_addr = page_index << TRACKER_HIGHER_PAGE_BITS;
|
||||
if (free_managers.empty()) {
|
||||
manager_pool.emplace_back();
|
||||
auto& last_pool = manager_pool.back();
|
||||
|
|
|
@ -9,13 +9,13 @@
|
|||
|
||||
namespace VideoCore {
|
||||
|
||||
constexpr u64 PAGES_PER_WORD = 64;
|
||||
constexpr u64 BYTES_PER_PAGE = 4_KB;
|
||||
constexpr u64 TRACKER_PAGE_BITS = 12; // 4K pages
|
||||
constexpr u64 TRACKER_BYTES_PER_PAGE = 1ULL << TRACKER_PAGE_BITS;
|
||||
|
||||
constexpr u64 HIGHER_PAGE_BITS = 22;
|
||||
constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
|
||||
constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
|
||||
constexpr u64 NUM_REGION_PAGES = HIGHER_PAGE_SIZE / BYTES_PER_PAGE;
|
||||
constexpr u64 TRACKER_HIGHER_PAGE_BITS = 24; // each region is 16MB
|
||||
constexpr u64 TRACKER_HIGHER_PAGE_SIZE = 1ULL << TRACKER_HIGHER_PAGE_BITS;
|
||||
constexpr u64 TRACKER_HIGHER_PAGE_MASK = TRACKER_HIGHER_PAGE_SIZE - 1ULL;
|
||||
constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PER_PAGE;
|
||||
|
||||
enum class Type {
|
||||
CPU,
|
||||
|
@ -23,6 +23,6 @@ enum class Type {
|
|||
Writeable,
|
||||
};
|
||||
|
||||
using RegionBits = Common::BitArray<NUM_REGION_PAGES>;
|
||||
using RegionBits = Common::BitArray<NUM_PAGES_PER_REGION>;
|
||||
|
||||
} // namespace VideoCore
|
|
@ -83,9 +83,10 @@ public:
|
|||
void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
|
||||
RENDERER_TRACE;
|
||||
const size_t offset = dirty_addr - cpu_addr;
|
||||
const size_t start_page = SanitizeAddress(offset) / BYTES_PER_PAGE;
|
||||
const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), BYTES_PER_PAGE);
|
||||
if (start_page >= NUM_REGION_PAGES || end_page <= start_page) {
|
||||
const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE;
|
||||
const size_t end_page =
|
||||
Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE);
|
||||
if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lk{lock};
|
||||
|
@ -114,9 +115,10 @@ public:
|
|||
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) {
|
||||
RENDERER_TRACE;
|
||||
const size_t offset = query_cpu_range - cpu_addr;
|
||||
const size_t start_page = SanitizeAddress(offset) / BYTES_PER_PAGE;
|
||||
const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), BYTES_PER_PAGE);
|
||||
if (start_page >= NUM_REGION_PAGES || end_page <= start_page) {
|
||||
const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE;
|
||||
const size_t end_page =
|
||||
Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE);
|
||||
if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lk{lock};
|
||||
|
@ -131,7 +133,7 @@ public:
|
|||
}
|
||||
|
||||
for (const auto& [start, end] : mask) {
|
||||
func(cpu_addr + start * BYTES_PER_PAGE, (end - start) * BYTES_PER_PAGE);
|
||||
func(cpu_addr + start * TRACKER_BYTES_PER_PAGE, (end - start) * TRACKER_BYTES_PER_PAGE);
|
||||
}
|
||||
|
||||
if constexpr (clear) {
|
||||
|
@ -151,9 +153,10 @@ public:
|
|||
template <Type type>
|
||||
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
|
||||
RENDERER_TRACE;
|
||||
const size_t start_page = SanitizeAddress(offset) / BYTES_PER_PAGE;
|
||||
const size_t end_page = Common::DivCeil(SanitizeAddress(offset + size), BYTES_PER_PAGE);
|
||||
if (start_page >= NUM_REGION_PAGES || end_page <= start_page) {
|
||||
const size_t start_page = SanitizeAddress(offset) / TRACKER_BYTES_PER_PAGE;
|
||||
const size_t end_page =
|
||||
Common::DivCeil(SanitizeAddress(offset + size), TRACKER_BYTES_PER_PAGE);
|
||||
if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) {
|
||||
return false;
|
||||
}
|
||||
// std::scoped_lock lk{lock}; // Is this needed?
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/assert.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/range_lock.h"
|
||||
#include "common/signal_context.h"
|
||||
#include "core/memory.h"
|
||||
#include "core/signals.h"
|
||||
|
@ -59,6 +60,7 @@ struct PageManager::Impl {
|
|||
|
||||
static constexpr size_t ADDRESS_BITS = 40;
|
||||
static constexpr size_t NUM_ADDRESS_PAGES = 1ULL << (40 - PAGE_BITS);
|
||||
static constexpr size_t NUM_ADDRESS_LOCKS = NUM_ADDRESS_PAGES / PAGES_PER_LOCK;
|
||||
inline static Vulkan::Rasterizer* rasterizer;
|
||||
#ifdef ENABLE_USERFAULTFD
|
||||
Impl(Vulkan::Rasterizer* rasterizer_) {
|
||||
|
@ -191,9 +193,17 @@ struct PageManager::Impl {
|
|||
RENDERER_TRACE;
|
||||
|
||||
size_t page = addr >> PAGE_BITS;
|
||||
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
|
||||
|
||||
// Acquire locks for the range of pages
|
||||
const auto lock_start = locks.begin() + (page / PAGES_PER_LOCK);
|
||||
const auto lock_end = locks.begin() + Common::DivCeil(page_end, PAGES_PER_LOCK);
|
||||
Common::RangeLockGuard lk(lock_start, lock_end);
|
||||
|
||||
auto perms = cached_pages[page].Perm();
|
||||
u64 range_begin = 0;
|
||||
u64 range_bytes = 0;
|
||||
u64 potential_range_bytes = 0;
|
||||
|
||||
const auto release_pending = [&] {
|
||||
if (range_bytes > 0) {
|
||||
|
@ -201,13 +211,11 @@ struct PageManager::Impl {
|
|||
// Perform pending (un)protect action
|
||||
Protect(range_begin << PAGE_BITS, range_bytes, perms);
|
||||
range_bytes = 0;
|
||||
potential_range_bytes = 0;
|
||||
}
|
||||
};
|
||||
|
||||
std::scoped_lock lk(lock);
|
||||
|
||||
// Iterate requested pages
|
||||
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
|
||||
const u64 aligned_addr = page << PAGE_BITS;
|
||||
const u64 aligned_end = page_end << PAGE_BITS;
|
||||
ASSERT_MSG(rasterizer->IsMapped(aligned_addr, aligned_end - aligned_addr),
|
||||
|
@ -225,14 +233,19 @@ struct PageManager::Impl {
|
|||
release_pending();
|
||||
perms = new_perms;
|
||||
} else if (range_bytes != 0) {
|
||||
// If the protection did not change, extend the current range
|
||||
range_bytes += PAGE_SIZE;
|
||||
// If the protection did not change, extend the potential range
|
||||
potential_range_bytes += PAGE_SIZE;
|
||||
}
|
||||
|
||||
// Only start a new range if the page must be (un)protected
|
||||
if (range_bytes == 0 && ((new_count == 0 && !track) || (new_count == 1 && track))) {
|
||||
if ((new_count == 0 && !track) || (new_count == 1 && track)) {
|
||||
if (range_bytes == 0) {
|
||||
// Start a new potential range
|
||||
range_begin = page;
|
||||
range_bytes = PAGE_SIZE;
|
||||
potential_range_bytes = PAGE_SIZE;
|
||||
}
|
||||
// Extend current range up to potential range
|
||||
range_bytes = potential_range_bytes;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -256,9 +269,12 @@ struct PageManager::Impl {
|
|||
}
|
||||
|
||||
size_t base_page = (base_addr >> PAGE_BITS);
|
||||
ASSERT(base_page % PAGES_PER_LOCK == 0);
|
||||
std::scoped_lock lk(locks[base_page / PAGES_PER_LOCK]);
|
||||
auto perms = cached_pages[base_page + start_range.first].Perm();
|
||||
u64 range_begin = 0;
|
||||
u64 range_bytes = 0;
|
||||
u64 potential_range_bytes = 0;
|
||||
|
||||
const auto release_pending = [&] {
|
||||
if (range_bytes > 0) {
|
||||
|
@ -266,11 +282,10 @@ struct PageManager::Impl {
|
|||
// Perform pending (un)protect action
|
||||
Protect((range_begin << PAGE_BITS), range_bytes, perms);
|
||||
range_bytes = 0;
|
||||
potential_range_bytes = 0;
|
||||
}
|
||||
};
|
||||
|
||||
std::scoped_lock lk(lock);
|
||||
|
||||
// Iterate pages
|
||||
for (size_t page = start_range.first; page < end_range.second; ++page) {
|
||||
PageState& state = cached_pages[base_page + page];
|
||||
|
@ -284,8 +299,8 @@ struct PageManager::Impl {
|
|||
release_pending();
|
||||
perms = new_perms;
|
||||
} else if (range_bytes != 0) {
|
||||
// If the protection did not change, extend the current range
|
||||
range_bytes += PAGE_SIZE;
|
||||
// If the protection did not change, extend the potential range
|
||||
potential_range_bytes += PAGE_SIZE;
|
||||
}
|
||||
|
||||
// If the page is not being updated, skip it
|
||||
|
@ -293,10 +308,15 @@ struct PageManager::Impl {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Only start a new range if the page must be (un)protected
|
||||
if (range_bytes == 0 && ((new_count == 0 && !track) || (new_count == 1 && track))) {
|
||||
// If the page must be (un)protected
|
||||
if ((new_count == 0 && !track) || (new_count == 1 && track)) {
|
||||
if (range_bytes == 0) {
|
||||
// Start a new potential range
|
||||
range_begin = base_page + page;
|
||||
range_bytes = PAGE_SIZE;
|
||||
potential_range_bytes = PAGE_SIZE;
|
||||
}
|
||||
// Extend current rango up to potential range
|
||||
range_bytes = potential_range_bytes;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -306,10 +326,11 @@ struct PageManager::Impl {
|
|||
|
||||
std::array<PageState, NUM_ADDRESS_PAGES> cached_pages{};
|
||||
#ifdef __linux__
|
||||
Common::AdaptiveMutex lock;
|
||||
using LockType = Common::AdaptiveMutex;
|
||||
#else
|
||||
Common::SpinLock lock;
|
||||
using LockType = Common::SpinLock;
|
||||
#endif
|
||||
std::array<LockType, NUM_ADDRESS_LOCKS> locks{};
|
||||
};
|
||||
|
||||
PageManager::PageManager(Vulkan::Rasterizer* rasterizer_)
|
||||
|
|
|
@ -15,8 +15,13 @@ class Rasterizer;
|
|||
namespace VideoCore {
|
||||
|
||||
class PageManager {
|
||||
static constexpr size_t PAGE_BITS = 12;
|
||||
static constexpr size_t PAGE_SIZE = 1ULL << PAGE_BITS;
|
||||
// Use the same page size as the tracker.
|
||||
static constexpr size_t PAGE_BITS = TRACKER_PAGE_BITS;
|
||||
static constexpr size_t PAGE_SIZE = TRACKER_BYTES_PER_PAGE;
|
||||
|
||||
// Keep the lock granularity the same as region granularity. (since each regions has
|
||||
// itself a lock)
|
||||
static constexpr size_t PAGES_PER_LOCK = NUM_PAGES_PER_REGION;
|
||||
|
||||
public:
|
||||
explicit PageManager(Vulkan::Rasterizer* rasterizer);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue