Merge pull request #10996 from Kelebek1/readblock_optimisation
Use spans over guest memory where possible instead of copying data
This commit is contained in:
commit
ce7c418e0c
23 changed files with 477 additions and 235 deletions
|
@ -70,7 +70,7 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
|
|||
-> std::optional<std::chrono::nanoseconds> { return std::nullopt; };
|
||||
ev_lost = CreateEvent("_lost_event", empty_timed_callback);
|
||||
if (is_multicore) {
|
||||
timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
|
||||
timer_thread = std::make_unique<std::jthread>(ThreadEntry, std::ref(*this));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -255,7 +255,6 @@ void CoreTiming::ThreadLoop() {
|
|||
#ifdef _WIN32
|
||||
while (!paused && !event.IsSet() && wait_time > 0) {
|
||||
wait_time = *next_time - GetGlobalTimeNs().count();
|
||||
|
||||
if (wait_time >= timer_resolution_ns) {
|
||||
Common::Windows::SleepForOneTick();
|
||||
} else {
|
||||
|
|
|
@ -163,7 +163,7 @@ private:
|
|||
Common::Event pause_event{};
|
||||
std::mutex basic_lock;
|
||||
std::mutex advance_lock;
|
||||
std::unique_ptr<std::thread> timer_thread;
|
||||
std::unique_ptr<std::jthread> timer_thread;
|
||||
std::atomic<bool> paused{};
|
||||
std::atomic<bool> paused_set{};
|
||||
std::atomic<bool> wait_set{};
|
||||
|
|
|
@ -329,8 +329,22 @@ std::vector<u8> HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons
|
|||
}
|
||||
|
||||
std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const {
|
||||
static thread_local std::array<Common::ScratchBuffer<u8>, 2> read_buffer_a;
|
||||
static thread_local std::array<Common::ScratchBuffer<u8>, 2> read_buffer_x;
|
||||
static thread_local std::array read_buffer_a{
|
||||
Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
|
||||
Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
|
||||
};
|
||||
static thread_local std::array read_buffer_data_a{
|
||||
Common::ScratchBuffer<u8>(),
|
||||
Common::ScratchBuffer<u8>(),
|
||||
};
|
||||
static thread_local std::array read_buffer_x{
|
||||
Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
|
||||
Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
|
||||
};
|
||||
static thread_local std::array read_buffer_data_x{
|
||||
Common::ScratchBuffer<u8>(),
|
||||
Common::ScratchBuffer<u8>(),
|
||||
};
|
||||
|
||||
const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
|
||||
BufferDescriptorA()[buffer_index].Size()};
|
||||
|
@ -339,19 +353,17 @@ std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) cons
|
|||
BufferDescriptorA().size() > buffer_index, { return {}; },
|
||||
"BufferDescriptorA invalid buffer_index {}", buffer_index);
|
||||
auto& read_buffer = read_buffer_a[buffer_index];
|
||||
read_buffer.resize_destructive(BufferDescriptorA()[buffer_index].Size());
|
||||
memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), read_buffer.data(),
|
||||
read_buffer.size());
|
||||
return read_buffer;
|
||||
return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(),
|
||||
BufferDescriptorA()[buffer_index].Size(),
|
||||
&read_buffer_data_a[buffer_index]);
|
||||
} else {
|
||||
ASSERT_OR_EXECUTE_MSG(
|
||||
BufferDescriptorX().size() > buffer_index, { return {}; },
|
||||
"BufferDescriptorX invalid buffer_index {}", buffer_index);
|
||||
auto& read_buffer = read_buffer_x[buffer_index];
|
||||
read_buffer.resize_destructive(BufferDescriptorX()[buffer_index].Size());
|
||||
memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), read_buffer.data(),
|
||||
read_buffer.size());
|
||||
return read_buffer;
|
||||
return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(),
|
||||
BufferDescriptorX()[buffer_index].Size(),
|
||||
&read_buffer_data_x[buffer_index]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -266,6 +266,22 @@ struct Memory::Impl {
|
|||
ReadBlockImpl<true>(*system.ApplicationProcess(), src_addr, dest_buffer, size);
|
||||
}
|
||||
|
||||
const u8* GetSpan(const VAddr src_addr, const std::size_t size) const {
|
||||
if (current_page_table->blocks[src_addr >> YUZU_PAGEBITS] ==
|
||||
current_page_table->blocks[(src_addr + size) >> YUZU_PAGEBITS]) {
|
||||
return GetPointerSilent(src_addr);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
u8* GetSpan(const VAddr src_addr, const std::size_t size) {
|
||||
if (current_page_table->blocks[src_addr >> YUZU_PAGEBITS] ==
|
||||
current_page_table->blocks[(src_addr + size) >> YUZU_PAGEBITS]) {
|
||||
return GetPointerSilent(src_addr);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <bool UNSAFE>
|
||||
void WriteBlockImpl(const Kernel::KProcess& process, const Common::ProcessAddress dest_addr,
|
||||
const void* src_buffer, const std::size_t size) {
|
||||
|
@ -559,7 +575,7 @@ struct Memory::Impl {
|
|||
}
|
||||
}
|
||||
|
||||
const Common::ProcessAddress end = base + size;
|
||||
const auto end = base + size;
|
||||
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
|
||||
base + page_table.pointers.size());
|
||||
|
||||
|
@ -570,14 +586,18 @@ struct Memory::Impl {
|
|||
while (base != end) {
|
||||
page_table.pointers[base].Store(nullptr, type);
|
||||
page_table.backing_addr[base] = 0;
|
||||
|
||||
page_table.blocks[base] = 0;
|
||||
base += 1;
|
||||
}
|
||||
} else {
|
||||
auto orig_base = base;
|
||||
while (base != end) {
|
||||
page_table.pointers[base].Store(
|
||||
system.DeviceMemory().GetPointer<u8>(target) - (base << YUZU_PAGEBITS), type);
|
||||
page_table.backing_addr[base] = GetInteger(target) - (base << YUZU_PAGEBITS);
|
||||
auto host_ptr =
|
||||
system.DeviceMemory().GetPointer<u8>(target) - (base << YUZU_PAGEBITS);
|
||||
auto backing = GetInteger(target) - (base << YUZU_PAGEBITS);
|
||||
page_table.pointers[base].Store(host_ptr, type);
|
||||
page_table.backing_addr[base] = backing;
|
||||
page_table.blocks[base] = orig_base << YUZU_PAGEBITS;
|
||||
|
||||
ASSERT_MSG(page_table.pointers[base].Pointer(),
|
||||
"memory mapping base yield a nullptr within the table");
|
||||
|
@ -747,6 +767,14 @@ struct Memory::Impl {
|
|||
VAddr last_address;
|
||||
};
|
||||
|
||||
void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
|
||||
system.GPU().InvalidateRegion(GetInteger(dest_addr), size);
|
||||
}
|
||||
|
||||
void FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
|
||||
system.GPU().FlushRegion(GetInteger(dest_addr), size);
|
||||
}
|
||||
|
||||
Core::System& system;
|
||||
Common::PageTable* current_page_table = nullptr;
|
||||
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
|
||||
|
@ -881,6 +909,14 @@ void Memory::ReadBlockUnsafe(const Common::ProcessAddress src_addr, void* dest_b
|
|||
impl->ReadBlockUnsafe(src_addr, dest_buffer, size);
|
||||
}
|
||||
|
||||
const u8* Memory::GetSpan(const VAddr src_addr, const std::size_t size) const {
|
||||
return impl->GetSpan(src_addr, size);
|
||||
}
|
||||
|
||||
u8* Memory::GetSpan(const VAddr src_addr, const std::size_t size) {
|
||||
return impl->GetSpan(src_addr, size);
|
||||
}
|
||||
|
||||
void Memory::WriteBlock(const Common::ProcessAddress dest_addr, const void* src_buffer,
|
||||
const std::size_t size) {
|
||||
impl->WriteBlock(dest_addr, src_buffer, size);
|
||||
|
@ -924,4 +960,12 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug)
|
|||
impl->MarkRegionDebug(GetInteger(vaddr), size, debug);
|
||||
}
|
||||
|
||||
void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
|
||||
impl->InvalidateRegion(dest_addr, size);
|
||||
}
|
||||
|
||||
void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
|
||||
impl->FlushRegion(dest_addr, size);
|
||||
}
|
||||
|
||||
} // namespace Core::Memory
|
||||
|
|
|
@ -5,8 +5,12 @@
|
|||
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "common/scratch_buffer.h"
|
||||
#include "common/typed_address.h"
|
||||
#include "core/hle/result.h"
|
||||
|
||||
|
@ -24,6 +28,10 @@ class PhysicalMemory;
|
|||
class KProcess;
|
||||
} // namespace Kernel
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Core::Memory {
|
||||
|
||||
/**
|
||||
|
@ -343,6 +351,9 @@ public:
|
|||
*/
|
||||
void ReadBlockUnsafe(Common::ProcessAddress src_addr, void* dest_buffer, std::size_t size);
|
||||
|
||||
const u8* GetSpan(const VAddr src_addr, const std::size_t size) const;
|
||||
u8* GetSpan(const VAddr src_addr, const std::size_t size);
|
||||
|
||||
/**
|
||||
* Writes a range of bytes into the current process' address space at the specified
|
||||
* virtual address.
|
||||
|
@ -461,6 +472,8 @@ public:
|
|||
void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
|
||||
|
||||
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
|
||||
void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size);
|
||||
void FlushRegion(Common::ProcessAddress dest_addr, size_t size);
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
|
@ -469,4 +482,203 @@ private:
|
|||
std::unique_ptr<Impl> impl;
|
||||
};
|
||||
|
||||
enum GuestMemoryFlags : u32 {
|
||||
Read = 1 << 0,
|
||||
Write = 1 << 1,
|
||||
Safe = 1 << 2,
|
||||
Cached = 1 << 3,
|
||||
|
||||
SafeRead = Read | Safe,
|
||||
SafeWrite = Write | Safe,
|
||||
SafeReadWrite = SafeRead | SafeWrite,
|
||||
SafeReadCachedWrite = SafeReadWrite | Cached,
|
||||
|
||||
UnsafeRead = Read,
|
||||
UnsafeWrite = Write,
|
||||
UnsafeReadWrite = UnsafeRead | UnsafeWrite,
|
||||
UnsafeReadCachedWrite = UnsafeReadWrite | Cached,
|
||||
};
|
||||
|
||||
namespace {
|
||||
template <typename M, typename T, GuestMemoryFlags FLAGS>
|
||||
class GuestMemory {
|
||||
using iterator = T*;
|
||||
using const_iterator = const T*;
|
||||
using value_type = T;
|
||||
using element_type = T;
|
||||
using iterator_category = std::contiguous_iterator_tag;
|
||||
|
||||
public:
|
||||
GuestMemory() = delete;
|
||||
explicit GuestMemory(M& memory_, u64 addr_, std::size_t size_,
|
||||
Common::ScratchBuffer<T>* backup = nullptr)
|
||||
: memory{memory_}, addr{addr_}, size{size_} {
|
||||
static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write);
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Read) {
|
||||
Read(addr, size, backup);
|
||||
}
|
||||
}
|
||||
|
||||
~GuestMemory() = default;
|
||||
|
||||
T* data() noexcept {
|
||||
return data_span.data();
|
||||
}
|
||||
|
||||
const T* data() const noexcept {
|
||||
return data_span.data();
|
||||
}
|
||||
|
||||
[[nodiscard]] T* begin() noexcept {
|
||||
return data();
|
||||
}
|
||||
|
||||
[[nodiscard]] const T* begin() const noexcept {
|
||||
return data();
|
||||
}
|
||||
|
||||
[[nodiscard]] T* end() noexcept {
|
||||
return data() + size;
|
||||
}
|
||||
|
||||
[[nodiscard]] const T* end() const noexcept {
|
||||
return data() + size;
|
||||
}
|
||||
|
||||
T& operator[](size_t index) noexcept {
|
||||
return data_span[index];
|
||||
}
|
||||
|
||||
const T& operator[](size_t index) const noexcept {
|
||||
return data_span[index];
|
||||
}
|
||||
|
||||
void SetAddressAndSize(u64 addr_, std::size_t size_) noexcept {
|
||||
addr = addr_;
|
||||
size = size_;
|
||||
addr_changed = true;
|
||||
}
|
||||
|
||||
std::span<T> Read(u64 addr_, std::size_t size_,
|
||||
Common::ScratchBuffer<T>* backup = nullptr) noexcept {
|
||||
addr = addr_;
|
||||
size = size_;
|
||||
if (size == 0) {
|
||||
is_data_copy = true;
|
||||
return {};
|
||||
}
|
||||
|
||||
if (TrySetSpan()) {
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Safe) {
|
||||
memory.FlushRegion(addr, size * sizeof(T));
|
||||
}
|
||||
} else {
|
||||
if (backup) {
|
||||
backup->resize_destructive(size);
|
||||
data_span = *backup;
|
||||
} else {
|
||||
data_copy.resize(size);
|
||||
data_span = std::span(data_copy);
|
||||
}
|
||||
is_data_copy = true;
|
||||
span_valid = true;
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Safe) {
|
||||
memory.ReadBlock(addr, data_span.data(), size * sizeof(T));
|
||||
} else {
|
||||
memory.ReadBlockUnsafe(addr, data_span.data(), size * sizeof(T));
|
||||
}
|
||||
}
|
||||
return data_span;
|
||||
}
|
||||
|
||||
void Write(std::span<T> write_data) noexcept {
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Cached) {
|
||||
memory.WriteBlockCached(addr, write_data.data(), size * sizeof(T));
|
||||
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
|
||||
memory.WriteBlock(addr, write_data.data(), size * sizeof(T));
|
||||
} else {
|
||||
memory.WriteBlockUnsafe(addr, write_data.data(), size * sizeof(T));
|
||||
}
|
||||
}
|
||||
|
||||
bool TrySetSpan() noexcept {
|
||||
if (u8* ptr = memory.GetSpan(addr, size * sizeof(T)); ptr) {
|
||||
data_span = {reinterpret_cast<T*>(ptr), size};
|
||||
span_valid = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected:
|
||||
bool IsDataCopy() const noexcept {
|
||||
return is_data_copy;
|
||||
}
|
||||
|
||||
bool AddressChanged() const noexcept {
|
||||
return addr_changed;
|
||||
}
|
||||
|
||||
M& memory;
|
||||
u64 addr;
|
||||
size_t size;
|
||||
std::span<T> data_span{};
|
||||
std::vector<T> data_copy;
|
||||
bool span_valid{false};
|
||||
bool is_data_copy{false};
|
||||
bool addr_changed{false};
|
||||
};
|
||||
|
||||
template <typename M, typename T, GuestMemoryFlags FLAGS>
|
||||
class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> {
|
||||
public:
|
||||
GuestMemoryScoped() = delete;
|
||||
explicit GuestMemoryScoped(M& memory_, u64 addr_, std::size_t size_,
|
||||
Common::ScratchBuffer<T>* backup = nullptr)
|
||||
: GuestMemory<M, T, FLAGS>(memory_, addr_, size_, backup) {
|
||||
if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
|
||||
if (!this->TrySetSpan()) {
|
||||
if (backup) {
|
||||
this->data_span = *backup;
|
||||
this->span_valid = true;
|
||||
this->is_data_copy = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~GuestMemoryScoped() {
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Write) {
|
||||
if (this->size == 0) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->AddressChanged() || this->IsDataCopy()) {
|
||||
ASSERT(this->span_valid);
|
||||
if constexpr (FLAGS & GuestMemoryFlags::Cached) {
|
||||
this->memory.WriteBlockCached(this->addr, this->data_span.data(),
|
||||
this->size * sizeof(T));
|
||||
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
|
||||
this->memory.WriteBlock(this->addr, this->data_span.data(),
|
||||
this->size * sizeof(T));
|
||||
} else {
|
||||
this->memory.WriteBlockUnsafe(this->addr, this->data_span.data(),
|
||||
this->size * sizeof(T));
|
||||
}
|
||||
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
|
||||
this->memory.InvalidateRegion(this->addr, this->size * sizeof(T));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
template <typename T, GuestMemoryFlags FLAGS>
|
||||
using CpuGuestMemory = GuestMemory<Memory, T, FLAGS>;
|
||||
template <typename T, GuestMemoryFlags FLAGS>
|
||||
using CpuGuestMemoryScoped = GuestMemoryScoped<Memory, T, FLAGS>;
|
||||
template <typename T, GuestMemoryFlags FLAGS>
|
||||
using GpuGuestMemory = GuestMemory<Tegra::MemoryManager, T, FLAGS>;
|
||||
template <typename T, GuestMemoryFlags FLAGS>
|
||||
using GpuGuestMemoryScoped = GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>;
|
||||
} // namespace Core::Memory
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue