mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-28 22:33:17 +00:00
buffer_cache: Improve buffer cache locking contention (#1973)
* Improve buffer cache locking contention * buffer_cache: Revert some changes * clang fmt 1 * clang fmt 2 * clang fmt 3 * buffer_cache: Fix build
This commit is contained in:
parent
6862c9aad7
commit
c25447097e
7 changed files with 104 additions and 240 deletions
|
@ -54,18 +54,10 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||||
BufferCache::~BufferCache() = default;
|
BufferCache::~BufferCache() = default;
|
||||||
|
|
||||||
void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
|
void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
|
||||||
std::scoped_lock lk{mutex};
|
|
||||||
const bool is_tracked = IsRegionRegistered(device_addr, size);
|
const bool is_tracked = IsRegionRegistered(device_addr, size);
|
||||||
if (!is_tracked) {
|
if (is_tracked) {
|
||||||
return;
|
// Mark the page as CPU modified to stop tracking writes.
|
||||||
}
|
|
||||||
// Mark the page as CPU modified to stop tracking writes.
|
|
||||||
SCOPE_EXIT {
|
|
||||||
memory_tracker.MarkRegionAsCpuModified(device_addr, size);
|
memory_tracker.MarkRegionAsCpuModified(device_addr, size);
|
||||||
};
|
|
||||||
if (!memory_tracker.IsRegionGpuModified(device_addr, size)) {
|
|
||||||
// Page has not been modified by the GPU, nothing to do.
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -346,6 +338,7 @@ bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) {
|
||||||
++page;
|
++page;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
std::shared_lock lk{mutex};
|
||||||
Buffer& buffer = slot_buffers[buffer_id];
|
Buffer& buffer = slot_buffers[buffer_id];
|
||||||
const VAddr buf_start_addr = buffer.CpuAddr();
|
const VAddr buf_start_addr = buffer.CpuAddr();
|
||||||
const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes();
|
const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes();
|
||||||
|
@ -496,8 +489,11 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
||||||
wanted_size = static_cast<u32>(device_addr_end - device_addr);
|
wanted_size = static_cast<u32>(device_addr_end - device_addr);
|
||||||
const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size);
|
const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size);
|
||||||
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
|
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
|
||||||
const BufferId new_buffer_id = slot_buffers.insert(
|
const BufferId new_buffer_id = [&] {
|
||||||
instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size);
|
std::scoped_lock lk{mutex};
|
||||||
|
return slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin,
|
||||||
|
AllFlags, size);
|
||||||
|
}();
|
||||||
auto& new_buffer = slot_buffers[new_buffer_id];
|
auto& new_buffer = slot_buffers[new_buffer_id];
|
||||||
const size_t size_bytes = new_buffer.SizeBytes();
|
const size_t size_bytes = new_buffer.SizeBytes();
|
||||||
const auto cmdbuf = scheduler.CommandBuffer();
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
@ -537,10 +533,8 @@ void BufferCache::ChangeRegister(BufferId buffer_id) {
|
||||||
|
|
||||||
void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||||
bool is_texel_buffer) {
|
bool is_texel_buffer) {
|
||||||
std::scoped_lock lk{mutex};
|
|
||||||
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
||||||
u64 total_size_bytes = 0;
|
u64 total_size_bytes = 0;
|
||||||
u64 largest_copy = 0;
|
|
||||||
VAddr buffer_start = buffer.CpuAddr();
|
VAddr buffer_start = buffer.CpuAddr();
|
||||||
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||||
copies.push_back(vk::BufferCopy{
|
copies.push_back(vk::BufferCopy{
|
||||||
|
@ -549,7 +543,6 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||||
.size = range_size,
|
.size = range_size,
|
||||||
});
|
});
|
||||||
total_size_bytes += range_size;
|
total_size_bytes += range_size;
|
||||||
largest_copy = std::max(largest_copy, range_size);
|
|
||||||
});
|
});
|
||||||
SCOPE_EXIT {
|
SCOPE_EXIT {
|
||||||
if (is_texel_buffer) {
|
if (is_texel_buffer) {
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <mutex>
|
#include <shared_mutex>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include <boost/icl/interval_map.hpp>
|
#include <boost/icl/interval_map.hpp>
|
||||||
#include <tsl/robin_map.h>
|
#include <tsl/robin_map.h>
|
||||||
|
@ -157,7 +157,7 @@ private:
|
||||||
StreamBuffer staging_buffer;
|
StreamBuffer staging_buffer;
|
||||||
StreamBuffer stream_buffer;
|
StreamBuffer stream_buffer;
|
||||||
Buffer gds_buffer;
|
Buffer gds_buffer;
|
||||||
std::mutex mutex;
|
std::shared_mutex mutex;
|
||||||
Common::SlotVector<Buffer> slot_buffers;
|
Common::SlotVector<Buffer> slot_buffers;
|
||||||
RangeSet gpu_modified_ranges;
|
RangeSet gpu_modified_ranges;
|
||||||
vk::BufferView null_buffer_view;
|
vk::BufferView null_buffer_view;
|
||||||
|
|
|
@ -15,13 +15,8 @@ namespace VideoCore {
|
||||||
class MemoryTracker {
|
class MemoryTracker {
|
||||||
public:
|
public:
|
||||||
static constexpr size_t MAX_CPU_PAGE_BITS = 40;
|
static constexpr size_t MAX_CPU_PAGE_BITS = 40;
|
||||||
static constexpr size_t HIGHER_PAGE_BITS = 22;
|
|
||||||
static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
|
|
||||||
static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
|
|
||||||
static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
|
static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
|
||||||
static constexpr size_t MANAGER_POOL_SIZE = 32;
|
static constexpr size_t MANAGER_POOL_SIZE = 32;
|
||||||
static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
|
|
||||||
using Manager = WordManager<WORDS_STACK_NEEDED>;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {}
|
explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {}
|
||||||
|
@ -30,7 +25,7 @@ public:
|
||||||
/// Returns true if a region has been modified from the CPU
|
/// Returns true if a region has been modified from the CPU
|
||||||
[[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
[[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||||
return IteratePages<true>(
|
return IteratePages<true>(
|
||||||
query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
|
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||||
return manager->template IsRegionModified<Type::CPU>(offset, size);
|
return manager->template IsRegionModified<Type::CPU>(offset, size);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -38,52 +33,34 @@ public:
|
||||||
/// Returns true if a region has been modified from the GPU
|
/// Returns true if a region has been modified from the GPU
|
||||||
[[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
[[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||||
return IteratePages<false>(
|
return IteratePages<false>(
|
||||||
query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
|
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||||
return manager->template IsRegionModified<Type::GPU>(offset, size);
|
return manager->template IsRegionModified<Type::GPU>(offset, size);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Mark region as CPU modified, notifying the device_tracker about this change
|
/// Mark region as CPU modified, notifying the device_tracker about this change
|
||||||
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
|
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
|
||||||
IteratePages<true>(dirty_cpu_addr, query_size,
|
IteratePages<false>(dirty_cpu_addr, query_size,
|
||||||
[](Manager* manager, u64 offset, size_t size) {
|
[](RegionManager* manager, u64 offset, size_t size) {
|
||||||
manager->template ChangeRegionState<Type::CPU, true>(
|
manager->template ChangeRegionState<Type::CPU, true>(
|
||||||
manager->GetCpuAddr() + offset, size);
|
manager->GetCpuAddr() + offset, size);
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
|
||||||
/// Unmark region as CPU modified, notifying the device_tracker about this change
|
|
||||||
void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
|
|
||||||
IteratePages<true>(dirty_cpu_addr, query_size,
|
|
||||||
[](Manager* manager, u64 offset, size_t size) {
|
|
||||||
manager->template ChangeRegionState<Type::CPU, false>(
|
|
||||||
manager->GetCpuAddr() + offset, size);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Mark region as modified from the host GPU
|
/// Mark region as modified from the host GPU
|
||||||
void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
||||||
IteratePages<true>(dirty_cpu_addr, query_size,
|
IteratePages<false>(dirty_cpu_addr, query_size,
|
||||||
[](Manager* manager, u64 offset, size_t size) {
|
[](RegionManager* manager, u64 offset, size_t size) {
|
||||||
manager->template ChangeRegionState<Type::GPU, true>(
|
manager->template ChangeRegionState<Type::GPU, true>(
|
||||||
manager->GetCpuAddr() + offset, size);
|
manager->GetCpuAddr() + offset, size);
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
|
||||||
/// Unmark region as modified from the host GPU
|
|
||||||
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
|
||||||
IteratePages<true>(dirty_cpu_addr, query_size,
|
|
||||||
[](Manager* manager, u64 offset, size_t size) {
|
|
||||||
manager->template ChangeRegionState<Type::GPU, false>(
|
|
||||||
manager->GetCpuAddr() + offset, size);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
|
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
|
||||||
IteratePages<true>(query_cpu_range, query_size,
|
IteratePages<true>(query_cpu_range, query_size,
|
||||||
[&func](Manager* manager, u64 offset, size_t size) {
|
[&func](RegionManager* manager, u64 offset, size_t size) {
|
||||||
manager->template ForEachModifiedRange<Type::CPU, true>(
|
manager->template ForEachModifiedRange<Type::CPU, true>(
|
||||||
manager->GetCpuAddr() + offset, size, func);
|
manager->GetCpuAddr() + offset, size, func);
|
||||||
});
|
});
|
||||||
|
@ -93,7 +70,7 @@ public:
|
||||||
template <bool clear, typename Func>
|
template <bool clear, typename Func>
|
||||||
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
|
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
|
||||||
IteratePages<false>(query_cpu_range, query_size,
|
IteratePages<false>(query_cpu_range, query_size,
|
||||||
[&func](Manager* manager, u64 offset, size_t size) {
|
[&func](RegionManager* manager, u64 offset, size_t size) {
|
||||||
if constexpr (clear) {
|
if constexpr (clear) {
|
||||||
manager->template ForEachModifiedRange<Type::GPU, true>(
|
manager->template ForEachModifiedRange<Type::GPU, true>(
|
||||||
manager->GetCpuAddr() + offset, size, func);
|
manager->GetCpuAddr() + offset, size, func);
|
||||||
|
@ -114,7 +91,7 @@ private:
|
||||||
*/
|
*/
|
||||||
template <bool create_region_on_fail, typename Func>
|
template <bool create_region_on_fail, typename Func>
|
||||||
bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
|
bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
|
||||||
using FuncReturn = typename std::invoke_result<Func, Manager*, u64, size_t>::type;
|
using FuncReturn = typename std::invoke_result<Func, RegionManager*, u64, size_t>::type;
|
||||||
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||||
std::size_t remaining_size{size};
|
std::size_t remaining_size{size};
|
||||||
std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
|
std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
|
||||||
|
@ -155,7 +132,7 @@ private:
|
||||||
manager_pool.emplace_back();
|
manager_pool.emplace_back();
|
||||||
auto& last_pool = manager_pool.back();
|
auto& last_pool = manager_pool.back();
|
||||||
for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) {
|
for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) {
|
||||||
std::construct_at(&last_pool[i], tracker, 0, HIGHER_PAGE_SIZE);
|
std::construct_at(&last_pool[i], tracker, 0);
|
||||||
free_managers.push_back(&last_pool[i]);
|
free_managers.push_back(&last_pool[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -167,9 +144,9 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
PageManager* tracker;
|
PageManager* tracker;
|
||||||
std::deque<std::array<Manager, MANAGER_POOL_SIZE>> manager_pool;
|
std::deque<std::array<RegionManager, MANAGER_POOL_SIZE>> manager_pool;
|
||||||
std::vector<Manager*> free_managers;
|
std::vector<RegionManager*> free_managers;
|
||||||
std::array<Manager*, NUM_HIGH_PAGES> top_tier{};
|
std::array<RegionManager*, NUM_HIGH_PAGES> top_tier{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
|
@ -3,10 +3,12 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <algorithm>
|
#include <array>
|
||||||
|
#include <mutex>
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include "common/div_ceil.h"
|
|
||||||
|
#include "common/spin_lock.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "video_core/page_manager.h"
|
#include "video_core/page_manager.h"
|
||||||
|
|
||||||
|
@ -16,135 +18,32 @@ constexpr u64 PAGES_PER_WORD = 64;
|
||||||
constexpr u64 BYTES_PER_PAGE = 4_KB;
|
constexpr u64 BYTES_PER_PAGE = 4_KB;
|
||||||
constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
|
constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
|
||||||
|
|
||||||
|
constexpr u64 HIGHER_PAGE_BITS = 22;
|
||||||
|
constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
|
||||||
|
constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
|
||||||
|
constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
|
||||||
|
|
||||||
enum class Type {
|
enum class Type {
|
||||||
CPU,
|
CPU,
|
||||||
GPU,
|
GPU,
|
||||||
Untracked,
|
Untracked,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Vector tracking modified pages tightly packed with small vector optimization
|
using WordsArray = std::array<u64, NUM_REGION_WORDS>;
|
||||||
template <size_t stack_words = 1>
|
|
||||||
struct WordsArray {
|
|
||||||
/// Returns the pointer to the words state
|
|
||||||
[[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
|
|
||||||
return is_short ? stack.data() : heap;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the pointer to the words state
|
/**
|
||||||
[[nodiscard]] u64* Pointer(bool is_short) noexcept {
|
* Allows tracking CPU and GPU modification of pages in a contigious 4MB virtual address region.
|
||||||
return is_short ? stack.data() : heap;
|
* Information is stored in bitsets for spacial locality and fast update of single pages.
|
||||||
}
|
*/
|
||||||
|
class RegionManager {
|
||||||
std::array<u64, stack_words> stack{}; ///< Small buffers storage
|
|
||||||
u64* heap; ///< Not-small buffers pointer to the storage
|
|
||||||
};
|
|
||||||
|
|
||||||
template <size_t stack_words = 1>
|
|
||||||
struct Words {
|
|
||||||
explicit Words() = default;
|
|
||||||
explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
|
|
||||||
num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD);
|
|
||||||
if (IsShort()) {
|
|
||||||
cpu.stack.fill(~u64{0});
|
|
||||||
gpu.stack.fill(0);
|
|
||||||
untracked.stack.fill(~u64{0});
|
|
||||||
} else {
|
|
||||||
// Share allocation between CPU and GPU pages and set their default values
|
|
||||||
u64* const alloc = new u64[num_words * 3];
|
|
||||||
cpu.heap = alloc;
|
|
||||||
gpu.heap = alloc + num_words;
|
|
||||||
untracked.heap = alloc + num_words * 2;
|
|
||||||
std::fill_n(cpu.heap, num_words, ~u64{0});
|
|
||||||
std::fill_n(gpu.heap, num_words, 0);
|
|
||||||
std::fill_n(untracked.heap, num_words, ~u64{0});
|
|
||||||
}
|
|
||||||
// Clean up tailing bits
|
|
||||||
const u64 last_word_size = size_bytes % BYTES_PER_WORD;
|
|
||||||
const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
|
|
||||||
const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
|
|
||||||
const u64 last_word = (~u64{0} << shift) >> shift;
|
|
||||||
cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
|
|
||||||
untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
|
|
||||||
}
|
|
||||||
|
|
||||||
~Words() {
|
|
||||||
Release();
|
|
||||||
}
|
|
||||||
|
|
||||||
Words& operator=(Words&& rhs) noexcept {
|
|
||||||
Release();
|
|
||||||
size_bytes = rhs.size_bytes;
|
|
||||||
num_words = rhs.num_words;
|
|
||||||
cpu = rhs.cpu;
|
|
||||||
gpu = rhs.gpu;
|
|
||||||
untracked = rhs.untracked;
|
|
||||||
rhs.cpu.heap = nullptr;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
Words(Words&& rhs) noexcept
|
|
||||||
: size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu},
|
|
||||||
untracked{rhs.untracked} {
|
|
||||||
rhs.cpu.heap = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
Words& operator=(const Words&) = delete;
|
|
||||||
Words(const Words&) = delete;
|
|
||||||
|
|
||||||
/// Returns true when the buffer fits in the small vector optimization
|
|
||||||
[[nodiscard]] bool IsShort() const noexcept {
|
|
||||||
return num_words <= stack_words;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the number of words of the buffer
|
|
||||||
[[nodiscard]] size_t NumWords() const noexcept {
|
|
||||||
return num_words;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Release buffer resources
|
|
||||||
void Release() {
|
|
||||||
if (!IsShort()) {
|
|
||||||
// CPU written words is the base for the heap allocation
|
|
||||||
delete[] cpu.heap;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <Type type>
|
|
||||||
std::span<u64> Span() noexcept {
|
|
||||||
if constexpr (type == Type::CPU) {
|
|
||||||
return std::span<u64>(cpu.Pointer(IsShort()), num_words);
|
|
||||||
} else if constexpr (type == Type::GPU) {
|
|
||||||
return std::span<u64>(gpu.Pointer(IsShort()), num_words);
|
|
||||||
} else if constexpr (type == Type::Untracked) {
|
|
||||||
return std::span<u64>(untracked.Pointer(IsShort()), num_words);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <Type type>
|
|
||||||
std::span<const u64> Span() const noexcept {
|
|
||||||
if constexpr (type == Type::CPU) {
|
|
||||||
return std::span<const u64>(cpu.Pointer(IsShort()), num_words);
|
|
||||||
} else if constexpr (type == Type::GPU) {
|
|
||||||
return std::span<const u64>(gpu.Pointer(IsShort()), num_words);
|
|
||||||
} else if constexpr (type == Type::Untracked) {
|
|
||||||
return std::span<const u64>(untracked.Pointer(IsShort()), num_words);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 size_bytes = 0;
|
|
||||||
size_t num_words = 0;
|
|
||||||
WordsArray<stack_words> cpu;
|
|
||||||
WordsArray<stack_words> gpu;
|
|
||||||
WordsArray<stack_words> untracked;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <size_t stack_words = 1>
|
|
||||||
class WordManager {
|
|
||||||
public:
|
public:
|
||||||
explicit WordManager(PageManager* tracker_, VAddr cpu_addr_, u64 size_bytes)
|
explicit RegionManager(PageManager* tracker_, VAddr cpu_addr_)
|
||||||
: tracker{tracker_}, cpu_addr{cpu_addr_}, words{size_bytes} {}
|
: tracker{tracker_}, cpu_addr{cpu_addr_} {
|
||||||
|
cpu.fill(~u64{0});
|
||||||
explicit WordManager() = default;
|
gpu.fill(0);
|
||||||
|
untracked.fill(~u64{0});
|
||||||
|
}
|
||||||
|
explicit RegionManager() = default;
|
||||||
|
|
||||||
void SetCpuAddress(VAddr new_cpu_addr) {
|
void SetCpuAddress(VAddr new_cpu_addr) {
|
||||||
cpu_addr = new_cpu_addr;
|
cpu_addr = new_cpu_addr;
|
||||||
|
@ -175,12 +74,12 @@ public:
|
||||||
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||||
const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL));
|
const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL));
|
||||||
const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL));
|
const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL));
|
||||||
if (start >= SizeBytes() || end <= start) {
|
if (start >= HIGHER_PAGE_SIZE || end <= start) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
auto [start_word, start_page] = GetWordPage(start);
|
auto [start_word, start_page] = GetWordPage(start);
|
||||||
auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL);
|
auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL);
|
||||||
const size_t num_words = NumWords();
|
constexpr size_t num_words = NUM_REGION_WORDS;
|
||||||
start_word = std::min(start_word, num_words);
|
start_word = std::min(start_word, num_words);
|
||||||
end_word = std::min(end_word, num_words);
|
end_word = std::min(end_word, num_words);
|
||||||
const size_t diff = end_word - start_word;
|
const size_t diff = end_word - start_word;
|
||||||
|
@ -225,21 +124,21 @@ public:
|
||||||
*/
|
*/
|
||||||
template <Type type, bool enable>
|
template <Type type, bool enable>
|
||||||
void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
|
void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
|
||||||
std::span<u64> state_words = words.template Span<type>();
|
std::scoped_lock lk{lock};
|
||||||
[[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
|
std::span<u64> state_words = Span<type>();
|
||||||
IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) {
|
IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) {
|
||||||
if constexpr (type == Type::CPU) {
|
if constexpr (type == Type::CPU) {
|
||||||
NotifyPageTracker<!enable>(index, untracked_words[index], mask);
|
UpdateProtection<!enable>(index, untracked[index], mask);
|
||||||
}
|
}
|
||||||
if constexpr (enable) {
|
if constexpr (enable) {
|
||||||
state_words[index] |= mask;
|
state_words[index] |= mask;
|
||||||
if constexpr (type == Type::CPU) {
|
if constexpr (type == Type::CPU) {
|
||||||
untracked_words[index] |= mask;
|
untracked[index] |= mask;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
state_words[index] &= ~mask;
|
state_words[index] &= ~mask;
|
||||||
if constexpr (type == Type::CPU) {
|
if constexpr (type == Type::CPU) {
|
||||||
untracked_words[index] &= ~mask;
|
untracked[index] &= ~mask;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -255,10 +154,10 @@ public:
|
||||||
*/
|
*/
|
||||||
template <Type type, bool clear, typename Func>
|
template <Type type, bool clear, typename Func>
|
||||||
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
|
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
|
||||||
|
std::scoped_lock lk{lock};
|
||||||
static_assert(type != Type::Untracked);
|
static_assert(type != Type::Untracked);
|
||||||
|
|
||||||
std::span<u64> state_words = words.template Span<type>();
|
std::span<u64> state_words = Span<type>();
|
||||||
[[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
|
|
||||||
const size_t offset = query_cpu_range - cpu_addr;
|
const size_t offset = query_cpu_range - cpu_addr;
|
||||||
bool pending = false;
|
bool pending = false;
|
||||||
size_t pending_offset{};
|
size_t pending_offset{};
|
||||||
|
@ -269,16 +168,16 @@ public:
|
||||||
};
|
};
|
||||||
IterateWords(offset, size, [&](size_t index, u64 mask) {
|
IterateWords(offset, size, [&](size_t index, u64 mask) {
|
||||||
if constexpr (type == Type::GPU) {
|
if constexpr (type == Type::GPU) {
|
||||||
mask &= ~untracked_words[index];
|
mask &= ~untracked[index];
|
||||||
}
|
}
|
||||||
const u64 word = state_words[index] & mask;
|
const u64 word = state_words[index] & mask;
|
||||||
if constexpr (clear) {
|
if constexpr (clear) {
|
||||||
if constexpr (type == Type::CPU) {
|
if constexpr (type == Type::CPU) {
|
||||||
NotifyPageTracker<true>(index, untracked_words[index], mask);
|
UpdateProtection<true>(index, untracked[index], mask);
|
||||||
}
|
}
|
||||||
state_words[index] &= ~mask;
|
state_words[index] &= ~mask;
|
||||||
if constexpr (type == Type::CPU) {
|
if constexpr (type == Type::CPU) {
|
||||||
untracked_words[index] &= ~mask;
|
untracked[index] &= ~mask;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const size_t base_offset = index * PAGES_PER_WORD;
|
const size_t base_offset = index * PAGES_PER_WORD;
|
||||||
|
@ -315,13 +214,11 @@ public:
|
||||||
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
|
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
|
||||||
static_assert(type != Type::Untracked);
|
static_assert(type != Type::Untracked);
|
||||||
|
|
||||||
const std::span<const u64> state_words = words.template Span<type>();
|
const std::span<const u64> state_words = Span<type>();
|
||||||
[[maybe_unused]] const std::span<const u64> untracked_words =
|
|
||||||
words.template Span<Type::Untracked>();
|
|
||||||
bool result = false;
|
bool result = false;
|
||||||
IterateWords(offset, size, [&](size_t index, u64 mask) {
|
IterateWords(offset, size, [&](size_t index, u64 mask) {
|
||||||
if constexpr (type == Type::GPU) {
|
if constexpr (type == Type::GPU) {
|
||||||
mask &= ~untracked_words[index];
|
mask &= ~untracked[index];
|
||||||
}
|
}
|
||||||
const u64 word = state_words[index] & mask;
|
const u64 word = state_words[index] & mask;
|
||||||
if (word != 0) {
|
if (word != 0) {
|
||||||
|
@ -333,44 +230,7 @@ public:
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the number of words of the manager
|
|
||||||
[[nodiscard]] size_t NumWords() const noexcept {
|
|
||||||
return words.NumWords();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the size in bytes of the manager
|
|
||||||
[[nodiscard]] u64 SizeBytes() const noexcept {
|
|
||||||
return words.size_bytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true when the buffer fits in the small vector optimization
|
|
||||||
[[nodiscard]] bool IsShort() const noexcept {
|
|
||||||
return words.IsShort();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <Type type>
|
|
||||||
u64* Array() noexcept {
|
|
||||||
if constexpr (type == Type::CPU) {
|
|
||||||
return words.cpu.Pointer(IsShort());
|
|
||||||
} else if constexpr (type == Type::GPU) {
|
|
||||||
return words.gpu.Pointer(IsShort());
|
|
||||||
} else if constexpr (type == Type::Untracked) {
|
|
||||||
return words.untracked.Pointer(IsShort());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <Type type>
|
|
||||||
const u64* Array() const noexcept {
|
|
||||||
if constexpr (type == Type::CPU) {
|
|
||||||
return words.cpu.Pointer(IsShort());
|
|
||||||
} else if constexpr (type == Type::GPU) {
|
|
||||||
return words.gpu.Pointer(IsShort());
|
|
||||||
} else if constexpr (type == Type::Untracked) {
|
|
||||||
return words.untracked.Pointer(IsShort());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Notify tracker about changes in the CPU tracking state of a word in the buffer
|
* Notify tracker about changes in the CPU tracking state of a word in the buffer
|
||||||
*
|
*
|
||||||
|
@ -381,7 +241,7 @@ private:
|
||||||
* @tparam add_to_tracker True when the tracker should start tracking the new pages
|
* @tparam add_to_tracker True when the tracker should start tracking the new pages
|
||||||
*/
|
*/
|
||||||
template <bool add_to_tracker>
|
template <bool add_to_tracker>
|
||||||
void NotifyPageTracker(u64 word_index, u64 current_bits, u64 new_bits) const {
|
void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const {
|
||||||
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
|
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
|
||||||
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
|
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
|
||||||
IteratePages(changed_bits, [&](size_t offset, size_t size) {
|
IteratePages(changed_bits, [&](size_t offset, size_t size) {
|
||||||
|
@ -390,9 +250,34 @@ private:
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <Type type>
|
||||||
|
std::span<u64> Span() noexcept {
|
||||||
|
if constexpr (type == Type::CPU) {
|
||||||
|
return cpu;
|
||||||
|
} else if constexpr (type == Type::GPU) {
|
||||||
|
return gpu;
|
||||||
|
} else if constexpr (type == Type::Untracked) {
|
||||||
|
return untracked;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <Type type>
|
||||||
|
std::span<const u64> Span() const noexcept {
|
||||||
|
if constexpr (type == Type::CPU) {
|
||||||
|
return cpu;
|
||||||
|
} else if constexpr (type == Type::GPU) {
|
||||||
|
return gpu;
|
||||||
|
} else if constexpr (type == Type::Untracked) {
|
||||||
|
return untracked;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Common::SpinLock lock;
|
||||||
PageManager* tracker;
|
PageManager* tracker;
|
||||||
VAddr cpu_addr = 0;
|
VAddr cpu_addr = 0;
|
||||||
Words<stack_words> words;
|
WordsArray cpu;
|
||||||
|
WordsArray gpu;
|
||||||
|
WordsArray untracked;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
|
@ -39,6 +39,15 @@ public:
|
||||||
return &(*first_level_map[l1_page])[l2_page];
|
return &(*first_level_map[l1_page])[l2_page];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] const Entry* find(size_t page) const {
|
||||||
|
const size_t l1_page = page >> SecondLevelBits;
|
||||||
|
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
|
||||||
|
if (!first_level_map[l1_page]) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return &(*first_level_map[l1_page])[l2_page];
|
||||||
|
}
|
||||||
|
|
||||||
[[nodiscard]] const Entry& operator[](size_t page) const {
|
[[nodiscard]] const Entry& operator[](size_t page) const {
|
||||||
const size_t l1_page = page >> SecondLevelBits;
|
const size_t l1_page = page >> SecondLevelBits;
|
||||||
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
|
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
|
||||||
|
|
|
@ -185,7 +185,7 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) {
|
||||||
void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
|
void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
|
||||||
static constexpr u64 PageShift = 12;
|
static constexpr u64 PageShift = 12;
|
||||||
|
|
||||||
std::scoped_lock lk{mutex};
|
std::scoped_lock lk{lock};
|
||||||
const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1;
|
const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1;
|
||||||
const u64 page_start = addr >> PageShift;
|
const u64 page_start = addr >> PageShift;
|
||||||
const u64 page_end = page_start + num_pages;
|
const u64 page_end = page_start + num_pages;
|
||||||
|
|
|
@ -4,8 +4,8 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
|
||||||
#include <boost/icl/interval_map.hpp>
|
#include <boost/icl/interval_map.hpp>
|
||||||
|
#include "common/spin_lock.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
@ -35,8 +35,8 @@ private:
|
||||||
struct Impl;
|
struct Impl;
|
||||||
std::unique_ptr<Impl> impl;
|
std::unique_ptr<Impl> impl;
|
||||||
Vulkan::Rasterizer* rasterizer;
|
Vulkan::Rasterizer* rasterizer;
|
||||||
std::mutex mutex;
|
|
||||||
boost::icl::interval_map<VAddr, s32> cached_pages;
|
boost::icl::interval_map<VAddr, s32> cached_pages;
|
||||||
|
Common::SpinLock lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue