diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 5e94199e1..f02ddafdc 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -660,6 +660,9 @@ int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) { "PRT aperture id = {}, address = {:#x}, size = {:#x} is set but not used", id, address, size); + auto* memory = Core::Memory::Instance(); + memory->SetPrtArea(id, address, size); + PrtApertures[id] = {address, size}; return ORBIS_OK; } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 54cae910b..e738f85a1 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -95,6 +95,46 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) { return clamped_size; } +void MemoryManager::SetPrtArea(u32 id, VAddr address, u64 size) { + PrtArea& area = prt_areas[id]; + if (area.mapped) { + rasterizer->UnmapMemory(area.start, area.end - area.start); + } + + area.start = address; + area.end = address + size; + area.mapped = true; + + // Pretend the entire PRT area is mapped to avoid GPU tracking errors. + // The caches will use CopySparseMemory to fetch data which avoids unmapped areas. + rasterizer->MapMemory(address, size); +} + +void MemoryManager::CopySparseMemory(VAddr virtual_addr, u8* dest, u64 size) { + const bool is_sparse = std::ranges::any_of( + prt_areas, [&](const PrtArea& area) { return area.Overlaps(virtual_addr, size); }); + if (!is_sparse) { + std::memcpy(dest, std::bit_cast(virtual_addr), size); + return; + } + + auto vma = FindVMA(virtual_addr); + ASSERT_MSG(vma->second.Contains(virtual_addr, 0), + "Attempted to access invalid GPU address {:#x}", virtual_addr); + while (size) { + u64 copy_size = std::min(vma->second.size - (virtual_addr - vma->first), size); + if (vma->second.IsFree()) { + std::memset(dest, 0, copy_size); + } else { + std::memcpy(dest, std::bit_cast(virtual_addr), copy_size); + } + size -= copy_size; + virtual_addr += copy_size; + dest += copy_size; + ++vma; + } +} + bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) { const VAddr virtual_addr = std::bit_cast(address); const auto& vma = FindVMA(virtual_addr)->second; diff --git a/src/core/memory.h b/src/core/memory.h index b3ebe3c27..68f9c26c4 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -172,6 +172,10 @@ public: u64 ClampRangeSize(VAddr virtual_addr, u64 size); + void SetPrtArea(u32 id, VAddr address, u64 size); + + void CopySparseMemory(VAddr source, u8* dest, u64 size); + bool TryWriteBacking(void* address, const void* data, u32 num_bytes); void SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1, bool use_extended_mem2); @@ -275,6 +279,18 @@ private: size_t pool_budget{}; Vulkan::Rasterizer* rasterizer{}; + struct PrtArea { + VAddr start; + VAddr end; + bool mapped; + + bool Overlaps(VAddr test_address, u64 test_size) const { + const VAddr overlap_end = test_address + test_size; + return start < overlap_end && test_address < end; + } + }; + std::array prt_areas{}; + friend class ::Core::Devtools::Widget::MemoryMapViewer; }; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index f53c111e9..e470f8e77 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -6,6 +6,7 @@ #include "common/debug.h" #include "common/scope_exit.h" #include "common/types.h" +#include "core/memory.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/host_shaders/fault_buffer_process_comp.h" @@ -28,7 +29,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, PageManager& tracker_) : instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_}, - texture_cache{texture_cache_}, tracker{tracker_}, + memory{Core::Memory::Instance()}, texture_cache{texture_cache_}, tracker{tracker_}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, download_buffer(instance, scheduler, MemoryUsage::Download, DownloadBufferSize), @@ -365,7 +366,9 @@ std::pair BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size, return ObtainBuffer(gpu_addr, size, false, false); } // In all other cases, just do a CPU copy to the staging buffer. - const u32 offset = staging_buffer.Copy(gpu_addr, size, 16); + const auto [data, offset] = staging_buffer.Map(size, 16); + memory->CopySparseMemory(gpu_addr, data, size); + staging_buffer.Commit(); return {&staging_buffer, offset}; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2d6551a7f..c2faf12c8 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -17,6 +17,10 @@ namespace AmdGpu { struct Liverpool; } +namespace Core { +class MemoryManager; +} + namespace Shader { namespace Gcn { struct FetchShaderData; @@ -183,6 +187,7 @@ private: Vulkan::Scheduler& scheduler; Vulkan::Rasterizer& rasterizer; AmdGpu::Liverpool* liverpool; + Core::MemoryManager* memory; TextureCache& texture_cache; PageManager& tracker; StreamBuffer staging_buffer;