Merge pull request #10942 from FernandoS27/android-is-a-pain-in-the-a--
Memory Tracking: Add mechanism to register small writes when gpu page is contested by GPU
This commit is contained in:
commit
eaa62aee98
20 changed files with 329 additions and 41 deletions
|
@ -115,7 +115,34 @@ void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
|
||||
memory_tracker.CachedCpuWrite(cpu_addr, size);
|
||||
const bool is_dirty = IsRegionRegistered(cpu_addr, size);
|
||||
if (!is_dirty) {
|
||||
return;
|
||||
}
|
||||
VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE);
|
||||
VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE);
|
||||
if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
|
||||
WriteMemory(cpu_addr, size);
|
||||
return;
|
||||
}
|
||||
|
||||
tmp_buffer.resize_destructive(size);
|
||||
cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size);
|
||||
|
||||
InlineMemoryImplementation(cpu_addr, size, tmp_buffer);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) {
|
||||
const bool is_dirty = IsRegionRegistered(cpu_addr, size);
|
||||
if (!is_dirty) {
|
||||
return false;
|
||||
}
|
||||
if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) {
|
||||
return true;
|
||||
}
|
||||
WriteMemory(cpu_addr, size);
|
||||
return false;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
@ -1553,6 +1580,14 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
|
|||
return false;
|
||||
}
|
||||
|
||||
InlineMemoryImplementation(dest_address, copy_size, inlined_buffer);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
|
||||
std::span<const u8> inlined_buffer) {
|
||||
const IntervalType subtract_interval{dest_address, dest_address + copy_size};
|
||||
ClearDownload(subtract_interval);
|
||||
common_ranges.subtract(subtract_interval);
|
||||
|
@ -1574,8 +1609,6 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
|
|||
} else {
|
||||
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
|
|
@ -245,6 +245,8 @@ public:
|
|||
|
||||
void CachedWriteMemory(VAddr cpu_addr, u64 size);
|
||||
|
||||
bool OnCPUWrite(VAddr cpu_addr, u64 size);
|
||||
|
||||
void DownloadMemory(VAddr cpu_addr, u64 size);
|
||||
|
||||
std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size);
|
||||
|
@ -543,6 +545,9 @@ private:
|
|||
|
||||
void ClearDownload(IntervalType subtract_interval);
|
||||
|
||||
void InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
|
||||
std::span<const u8> inlined_buffer);
|
||||
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
|
||||
|
|
|
@ -69,7 +69,6 @@ public:
|
|||
}
|
||||
|
||||
void SignalFence(std::function<void()>&& func) {
|
||||
rasterizer.InvalidateGPUCache();
|
||||
bool delay_fence = Settings::IsGPULevelHigh();
|
||||
if constexpr (!can_async_check) {
|
||||
TryReleasePendingFences<false>();
|
||||
|
@ -96,6 +95,7 @@ public:
|
|||
guard.unlock();
|
||||
cv.notify_all();
|
||||
}
|
||||
rasterizer.InvalidateGPUCache();
|
||||
}
|
||||
|
||||
void SignalSyncPoint(u32 value) {
|
||||
|
|
|
@ -95,7 +95,9 @@ struct GPU::Impl {
|
|||
|
||||
/// Synchronizes CPU writes with Host GPU memory.
|
||||
void InvalidateGPUCache() {
|
||||
rasterizer->InvalidateGPUCache();
|
||||
std::function<void(VAddr, size_t)> callback_writes(
|
||||
[this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
|
||||
system.GatherGPUDirtyMemory(callback_writes);
|
||||
}
|
||||
|
||||
/// Signal the ending of command list.
|
||||
|
@ -299,6 +301,10 @@ struct GPU::Impl {
|
|||
gpu_thread.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
bool OnCPUWrite(VAddr addr, u64 size) {
|
||||
return rasterizer->OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushAndInvalidateRegion(addr, size);
|
||||
|
@ -561,6 +567,10 @@ void GPU::InvalidateRegion(VAddr addr, u64 size) {
|
|||
impl->InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
bool GPU::OnCPUWrite(VAddr addr, u64 size) {
|
||||
return impl->OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
impl->FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
|
|
@ -250,6 +250,10 @@ public:
|
|||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
void InvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
/// Notify rasterizer that CPU is trying to write this area. It returns true if the area is
|
||||
/// sensible, false otherwise
|
||||
bool OnCPUWrite(VAddr addr, u64 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
|
|||
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
|
||||
rasterizer->FlushRegion(flush->addr, flush->size);
|
||||
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
||||
rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
|
||||
rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size);
|
||||
} else {
|
||||
ASSERT(false);
|
||||
}
|
||||
|
@ -102,12 +102,12 @@ void ThreadManager::TickGPU() {
|
|||
}
|
||||
|
||||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
||||
rasterizer->OnCPUWrite(addr, size);
|
||||
rasterizer->OnCacheInvalidation(addr, size);
|
||||
}
|
||||
|
||||
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
|
||||
rasterizer->OnCPUWrite(addr, size);
|
||||
rasterizer->OnCacheInvalidation(addr, size);
|
||||
}
|
||||
|
||||
u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
|
||||
|
|
|
@ -109,7 +109,9 @@ public:
|
|||
}
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region are desync with guest
|
||||
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
|
||||
virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0;
|
||||
|
||||
virtual bool OnCPUWrite(VAddr addr, u64 size) = 0;
|
||||
|
||||
/// Sync memory between guest and host.
|
||||
virtual void InvalidateGPUCache() = 0;
|
||||
|
|
|
@ -47,7 +47,10 @@ bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheTyp
|
|||
return false;
|
||||
}
|
||||
void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
|
||||
void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {}
|
||||
bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {
|
||||
return false;
|
||||
}
|
||||
void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {}
|
||||
VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) {
|
||||
VideoCore::RasterizerDownloadArea new_area{
|
||||
.start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE),
|
||||
|
|
|
@ -53,7 +53,8 @@ public:
|
|||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void OnCacheInvalidation(VAddr addr, u64 size) override;
|
||||
bool OnCPUWrite(VAddr addr, u64 size) override;
|
||||
VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
|
||||
void InvalidateGPUCache() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
|
|
|
@ -485,12 +485,33 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
||||
bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
if (addr == 0 || size == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
if (buffer_cache.OnCPUWrite(addr, size)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
return false;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
|
@ -499,15 +520,11 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
|||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.CachedWriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateGPUCache() {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
shader_cache.SyncGuestHost();
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.FlushCachedWrites();
|
||||
}
|
||||
gpu.InvalidateGPUCache();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
|
||||
|
@ -519,7 +536,7 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
|
|||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
shader_cache.OnCacheInvalidation(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
|
||||
|
|
|
@ -98,7 +98,8 @@ public:
|
|||
VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void OnCacheInvalidation(VAddr addr, u64 size) override;
|
||||
bool OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void InvalidateGPUCache() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
|
||||
|
|
|
@ -566,11 +566,32 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
|
||||
bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
if (buffer_cache.OnCPUWrite(addr, size)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
|
||||
pipeline_cache.InvalidateRegion(addr, size);
|
||||
return false;
|
||||
}
|
||||
|
||||
void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
pipeline_cache.OnCPUWrite(addr, size);
|
||||
|
||||
{
|
||||
std::scoped_lock lock{texture_cache.mutex};
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
|
@ -579,14 +600,11 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
|
|||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.CachedWriteMemory(addr, size);
|
||||
}
|
||||
pipeline_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::InvalidateGPUCache() {
|
||||
pipeline_cache.SyncGuestHost();
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.FlushCachedWrites();
|
||||
}
|
||||
gpu.InvalidateGPUCache();
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
|
||||
|
@ -598,7 +616,7 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
|
|||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.WriteMemory(addr, size);
|
||||
}
|
||||
pipeline_cache.OnCPUWrite(addr, size);
|
||||
pipeline_cache.OnCacheInvalidation(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
|
||||
|
|
|
@ -96,7 +96,8 @@ public:
|
|||
void InvalidateRegion(VAddr addr, u64 size,
|
||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void OnCacheInvalidation(VAddr addr, u64 size) override;
|
||||
bool OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void InvalidateGPUCache() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
|
||||
|
|
|
@ -24,7 +24,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
|
|||
RemovePendingShaders();
|
||||
}
|
||||
|
||||
void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
|
||||
void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) {
|
||||
std::scoped_lock lock{invalidation_mutex};
|
||||
InvalidatePagesInRegion(addr, size);
|
||||
}
|
||||
|
|
|
@ -62,7 +62,7 @@ public:
|
|||
/// @brief Unmarks a memory region as cached and marks it for removal
|
||||
/// @param addr Start address of the CPU write operation
|
||||
/// @param size Number of bytes of the CPU write operation
|
||||
void OnCPUWrite(VAddr addr, size_t size);
|
||||
void OnCacheInvalidation(VAddr addr, size_t size);
|
||||
|
||||
/// @brief Flushes delayed removal operations
|
||||
void SyncGuestHost();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue