Memory Tracking: Optimize tracking to only use atomic writes when contested with the host GPU
This commit is contained in:
parent
47d0d292d5
commit
da440da9f5
19 changed files with 153 additions and 38 deletions
|
@ -55,7 +55,6 @@
|
|||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
|
||||
MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64));
|
||||
MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64));
|
||||
MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64));
|
||||
|
@ -132,7 +131,10 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
|
|||
struct System::Impl {
|
||||
explicit Impl(System& system)
|
||||
: kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{},
|
||||
cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {}
|
||||
cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system},
|
||||
gpu_dirty_memory_write_manager{} {
|
||||
memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
|
||||
}
|
||||
|
||||
void Initialize(System& system) {
|
||||
device_memory = std::make_unique<Core::DeviceMemory>();
|
||||
|
@ -236,6 +238,8 @@ struct System::Impl {
|
|||
// Setting changes may require a full system reinitialization (e.g., disabling multicore).
|
||||
ReinitializeIfNecessary(system);
|
||||
|
||||
memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
|
||||
|
||||
kernel.Initialize();
|
||||
cpu_manager.Initialize();
|
||||
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
|
@ -59,8 +62,7 @@ public:
|
|||
mask = mask >> empty_bits;
|
||||
|
||||
const size_t continuous_bits = std::countr_one(mask);
|
||||
callback((transform.address << Memory::YUZU_PAGEBITS) + offset,
|
||||
continuous_bits << align_bits);
|
||||
callback((transform.address << page_bits) + offset, continuous_bits << align_bits);
|
||||
mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
|
||||
offset += continuous_bits << align_bits;
|
||||
}
|
||||
|
@ -74,6 +76,10 @@ private:
|
|||
u64 mask;
|
||||
};
|
||||
|
||||
constexpr static size_t page_bits = Memory::YUZU_PAGEBITS;
|
||||
constexpr static size_t page_size = 1ULL << page_bits;
|
||||
constexpr static size_t page_mask = page_size - 1;
|
||||
|
||||
constexpr static size_t align_bits = 6U;
|
||||
constexpr static size_t align_size = 1U << align_bits;
|
||||
constexpr static size_t align_mask = align_size - 1;
|
||||
|
@ -94,11 +100,11 @@ private:
|
|||
}
|
||||
|
||||
TransformAddress BuildTransform(VAddr address, size_t size) {
|
||||
const size_t minor_address = address & Memory::YUZU_PAGEMASK;
|
||||
const size_t minor_address = address & page_mask;
|
||||
const size_t minor_bit = minor_address >> align_bits;
|
||||
const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
|
||||
TransformAddress result{};
|
||||
result.address = address >> Memory::YUZU_PAGEBITS;
|
||||
result.address = address >> page_bits;
|
||||
result.mask = CreateMask<u64>(top_bit, minor_bit);
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <span>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/atomic_ops.h"
|
||||
|
@ -679,7 +680,7 @@ struct Memory::Impl {
|
|||
LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
|
||||
GetInteger(vaddr), static_cast<u64>(data));
|
||||
},
|
||||
[&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); });
|
||||
[&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
|
||||
if (ptr) {
|
||||
std::memcpy(ptr, &data, sizeof(T));
|
||||
}
|
||||
|
@ -693,7 +694,7 @@ struct Memory::Impl {
|
|||
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
|
||||
sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data));
|
||||
},
|
||||
[&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); });
|
||||
[&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
|
||||
if (ptr) {
|
||||
const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
|
||||
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
|
||||
|
@ -708,7 +709,7 @@ struct Memory::Impl {
|
|||
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
|
||||
GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0]));
|
||||
},
|
||||
[&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(u128)); });
|
||||
[&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); });
|
||||
if (ptr) {
|
||||
const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
|
||||
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
|
||||
|
@ -718,7 +719,7 @@ struct Memory::Impl {
|
|||
|
||||
void HandleRasterizerDownload(VAddr address, size_t size) {
|
||||
const size_t core = system.GetCurrentHostThreadID();
|
||||
auto& current_area = rasterizer_areas[core];
|
||||
auto& current_area = rasterizer_read_areas[core];
|
||||
const VAddr end_address = address + size;
|
||||
if (current_area.start_address <= address && end_address <= current_area.end_address)
|
||||
[[likely]] {
|
||||
|
@ -727,9 +728,31 @@ struct Memory::Impl {
|
|||
current_area = system.GPU().OnCPURead(address, size);
|
||||
}
|
||||
|
||||
Common::PageTable* current_page_table = nullptr;
|
||||
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{};
|
||||
void HandleRasterizerWrite(VAddr address, size_t size) {
|
||||
const size_t core = system.GetCurrentHostThreadID();
|
||||
auto& current_area = rasterizer_write_areas[core];
|
||||
VAddr subaddress = address >> YUZU_PAGEBITS;
|
||||
bool do_collection = current_area.last_address == subaddress;
|
||||
if (!do_collection) [[unlikely]] {
|
||||
do_collection = system.GPU().OnCPUWrite(address, size);
|
||||
if (!do_collection) {
|
||||
return;
|
||||
}
|
||||
current_area.last_address = subaddress;
|
||||
}
|
||||
gpu_dirty_managers[core].Collect(address, size);
|
||||
}
|
||||
|
||||
struct GPUDirtyState {
|
||||
VAddr last_address;
|
||||
};
|
||||
|
||||
Core::System& system;
|
||||
Common::PageTable* current_page_table = nullptr;
|
||||
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
|
||||
rasterizer_read_areas{};
|
||||
std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
|
||||
std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
|
||||
};
|
||||
|
||||
Memory::Memory(Core::System& system_) : system{system_} {
|
||||
|
@ -877,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size)
|
|||
impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size);
|
||||
}
|
||||
|
||||
void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) {
|
||||
impl->gpu_dirty_managers = managers;
|
||||
}
|
||||
|
||||
Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) {
|
||||
return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size);
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include "common/typed_address.h"
|
||||
#include "core/hle/result.h"
|
||||
|
@ -15,7 +16,8 @@ struct PageTable;
|
|||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
class GPUDirtyMemoryManager;
|
||||
} // namespace Core
|
||||
|
||||
namespace Kernel {
|
||||
class PhysicalMemory;
|
||||
|
@ -458,6 +460,8 @@ public:
|
|||
*/
|
||||
void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
|
||||
|
||||
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue