From 6cdc52cdde23a7bf35c6f316217155c04e99418f Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Thu, 29 May 2025 10:56:03 -0500 Subject: [PATCH 1/4] Core: More Memory Cleanup & Fixes (#2997) * Only perform GPU memory mapping when GPU can access it This better aligns with hardware observations, and should also speed up unmaps and decommits, since they don't need to be compared with the GPU max address anymore. * Reserve fixes ReserveVirtualRange seems to follow the 0x200000000 base address like MemoryPoolReserve does. Both also need checks in their flags Fixed path to ensure we're mapping in-bounds. If we're not in mapping to our address space, we'll end up reserving and returning the wrong address, which could lead to weird memory issues in games. I'll need to test on real hardware to verify if such changes are appropriate. * Better sceKernelMmap Handles errors where we would previously throw exceptions. Also moves the file logic to MapFile, since that way all the possible errors are in one place. Also fixes some function parameters to align with our current standards. * Major refactor MapDirectMemory, MapFlexibleMemory, ReserveVirtualRange, and MemoryPoolReserve all internally use mmap to perform their mappings. Naturally, this means that all functions have similar behaviors, and a lot of duplicate code. This add necessary conditional behavior to MapMemory so MemoryPoolReserve and ReserveVirtualRange can use it, without disrupting the behavior of MapDirectMemory or MapFlexibleMemory calls. * Accurate phys_addr for non-direct mappings * Properly handle GPU access rights Since my first commit restricts GPU mappings to memory areas with GPU access permissions, we also need to be updating the GPU mappings appropriately during Protect calls too. * Update memory.cpp * Update memory.h * Update memory.cpp * Update memory.cpp * Update memory.cpp * Revert "Update memory.cpp" This reverts commit 2c55d014c0efbdfadee4121b01e1dcf5af60e63d. * Coalesce dmem map Aligns with hardware observations, hopefully shouldn't break anything since nothing should change hardware-wise when release dmem calls and unmap calls are performed? Either that or Windows breaks because Windows, will need to test. * Implement posix_mprotect Unity calls this Also fixes the names of sceKernelMprotect and sceKernelMtypeprotect, though that's more of a style change and can be reverted if requested. * Fix sceKernelSetVirtualRangeName Partially addresses a "regression" introduced when I fixed up some asserts. As noted in the code, this implementation is still slightly inaccurate, as handling this properly could cause regressions on Windows. * Unconditional assert in MapFile * Remove protect warning This is expected behavior, shouldn't need any logging. * Respect alignment Forgot to properly do this when updating ReserveVirtualRange and MemoryPoolReserve * Fix Mprotect on free memory On real hardware, this just does nothing. If something did get protected, there's no way to query that information. Therefore, it seems pretty safe to just behave like munmap and return size here. * Minor tidy-up No functional difference, but looks better. --- src/core/libraries/kernel/memory.cpp | 106 +++++++----- src/core/libraries/kernel/memory.h | 18 +- src/core/memory.cpp | 236 +++++++++++++-------------- src/core/memory.h | 14 +- 4 files changed, 193 insertions(+), 181 deletions(-) diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index cb41a664a..ce694dc1e 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -8,7 +8,6 @@ #include "common/logging/log.h" #include "common/scope_exit.h" #include "common/singleton.h" -#include "core/file_sys/fs.h" #include "core/libraries/kernel/kernel.h" #include "core/libraries/kernel/memory.h" #include "core/libraries/kernel/orbis_error.h" @@ -152,7 +151,8 @@ s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u const VAddr in_addr = reinterpret_cast(*addr); const auto map_flags = static_cast(flags); - s32 result = memory->Reserve(addr, in_addr, len, map_flags, alignment); + s32 result = memory->MapMemory(addr, in_addr, len, Core::MemoryProt::NoAccess, map_flags, + Core::VMAType::Reserved, "anon", false, -1, alignment); if (result == 0) { LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr)); } @@ -263,13 +263,22 @@ int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** return memory->QueryProtection(std::bit_cast(addr), start, end, prot); } -int PS4_SYSV_ABI sceKernelMProtect(const void* addr, size_t size, int prot) { +s32 PS4_SYSV_ABI sceKernelMprotect(const void* addr, u64 size, s32 prot) { Core::MemoryManager* memory_manager = Core::Memory::Instance(); Core::MemoryProt protection_flags = static_cast(prot); return memory_manager->Protect(std::bit_cast(addr), size, protection_flags); } -int PS4_SYSV_ABI sceKernelMTypeProtect(const void* addr, size_t size, int mtype, int prot) { +s32 PS4_SYSV_ABI posix_mprotect(const void* addr, u64 size, s32 prot) { + s32 result = sceKernelMprotect(addr, size, prot); + if (result < 0) { + ErrSceToPosix(result); + return -1; + } + return result; +} + +s32 PS4_SYSV_ABI sceKernelMtypeprotect(const void* addr, u64 size, s32 mtype, s32 prot) { Core::MemoryManager* memory_manager = Core::Memory::Instance(); Core::MemoryProt protection_flags = static_cast(prot); return memory_manager->Protect(std::bit_cast(addr), size, protection_flags); @@ -344,7 +353,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn break; } case MemoryOpTypes::ORBIS_KERNEL_MAP_OP_PROTECT: { - result = sceKernelMProtect(entries[i].start, entries[i].length, entries[i].protection); + result = sceKernelMprotect(entries[i].start, entries[i].length, entries[i].protection); LOG_INFO(Kernel_Vmm, "entry = {}, operation = {}, len = {:#x}, result = {}", i, entries[i].operation, entries[i].length, result); break; @@ -359,7 +368,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn break; } case MemoryOpTypes::ORBIS_KERNEL_MAP_OP_TYPE_PROTECT: { - result = sceKernelMTypeProtect(entries[i].start, entries[i].length, entries[i].type, + result = sceKernelMtypeprotect(entries[i].start, entries[i].length, entries[i].type, entries[i].protection); LOG_INFO(Kernel_Vmm, "entry = {}, operation = {}, len = {:#x}, result = {}", i, entries[i].operation, entries[i].length, result); @@ -380,7 +389,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn return result; } -s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, const char* name) { +s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, u64 len, const char* name) { if (name == nullptr) { LOG_ERROR(Kernel_Vmm, "name is invalid!"); return ORBIS_KERNEL_ERROR_EFAULT; @@ -396,8 +405,8 @@ s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, cons return ORBIS_OK; } -s32 PS4_SYSV_ABI sceKernelMemoryPoolExpand(u64 searchStart, u64 searchEnd, size_t len, - size_t alignment, u64* physAddrOut) { +s32 PS4_SYSV_ABI sceKernelMemoryPoolExpand(u64 searchStart, u64 searchEnd, u64 len, u64 alignment, + u64* physAddrOut) { if (searchStart < 0 || searchEnd <= searchStart) { LOG_ERROR(Kernel_Vmm, "Provided address range is invalid!"); return ORBIS_KERNEL_ERROR_EINVAL; @@ -439,10 +448,10 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolExpand(u64 searchStart, u64 searchEnd, size_ return ORBIS_OK; } -s32 PS4_SYSV_ABI sceKernelMemoryPoolReserve(void* addrIn, size_t len, size_t alignment, int flags, - void** addrOut) { - LOG_INFO(Kernel_Vmm, "addrIn = {}, len = {:#x}, alignment = {:#x}, flags = {:#x}", - fmt::ptr(addrIn), len, alignment, flags); +s32 PS4_SYSV_ABI sceKernelMemoryPoolReserve(void* addr_in, u64 len, u64 alignment, s32 flags, + void** addr_out) { + LOG_INFO(Kernel_Vmm, "addr_in = {}, len = {:#x}, alignment = {:#x}, flags = {:#x}", + fmt::ptr(addr_in), len, alignment, flags); if (len == 0 || !Common::Is2MBAligned(len)) { LOG_ERROR(Kernel_Vmm, "Map size is either zero or not 2MB aligned!"); @@ -456,14 +465,16 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolReserve(void* addrIn, size_t len, size_t ali } auto* memory = Core::Memory::Instance(); - const VAddr in_addr = reinterpret_cast(addrIn); + const VAddr in_addr = reinterpret_cast(addr_in); const auto map_flags = static_cast(flags); - memory->PoolReserve(addrOut, in_addr, len, map_flags, alignment); + u64 map_alignment = alignment == 0 ? 2_MB : alignment; - return ORBIS_OK; + return memory->MapMemory(addr_out, std::bit_cast(addr_in), len, + Core::MemoryProt::NoAccess, map_flags, Core::VMAType::PoolReserved, + "anon", false, -1, map_alignment); } -s32 PS4_SYSV_ABI sceKernelMemoryPoolCommit(void* addr, size_t len, int type, int prot, int flags) { +s32 PS4_SYSV_ABI sceKernelMemoryPoolCommit(void* addr, u64 len, s32 type, s32 prot, s32 flags) { if (addr == nullptr) { LOG_ERROR(Kernel_Vmm, "Address is invalid!"); return ORBIS_KERNEL_ERROR_EINVAL; @@ -482,7 +493,7 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolCommit(void* addr, size_t len, int type, int return memory->PoolCommit(in_addr, len, mem_prot); } -s32 PS4_SYSV_ABI sceKernelMemoryPoolDecommit(void* addr, size_t len, int flags) { +s32 PS4_SYSV_ABI sceKernelMemoryPoolDecommit(void* addr, u64 len, s32 flags) { if (addr == nullptr) { LOG_ERROR(Kernel_Vmm, "Address is invalid!"); return ORBIS_KERNEL_ERROR_EINVAL; @@ -523,12 +534,12 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolBatch(const OrbisKernelMemoryPoolBatchEntry* break; } case OrbisKernelMemoryPoolOpcode::Protect: { - result = sceKernelMProtect(entry.protect_params.addr, entry.protect_params.len, + result = sceKernelMprotect(entry.protect_params.addr, entry.protect_params.len, entry.protect_params.prot); break; } case OrbisKernelMemoryPoolOpcode::TypeProtect: { - result = sceKernelMTypeProtect( + result = sceKernelMtypeprotect( entry.type_protect_params.addr, entry.type_protect_params.len, entry.type_protect_params.type, entry.type_protect_params.prot); break; @@ -553,30 +564,48 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolBatch(const OrbisKernelMemoryPoolBatchEntry* return result; } -int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd, size_t offset, - void** res) { - LOG_INFO(Kernel_Vmm, "called addr = {}, len = {}, prot = {}, flags = {}, fd = {}, offset = {}", - fmt::ptr(addr), len, prot, flags, fd, offset); - auto* h = Common::Singleton::Instance(); +void* PS4_SYSV_ABI posix_mmap(void* addr, u64 len, s32 prot, s32 flags, s32 fd, s64 phys_addr) { + LOG_INFO(Kernel_Vmm, + "called addr = {}, len = {}, prot = {}, flags = {}, fd = {}, phys_addr = {}", + fmt::ptr(addr), len, prot, flags, fd, phys_addr); + + void* addr_out; auto* memory = Core::Memory::Instance(); const auto mem_prot = static_cast(prot); const auto mem_flags = static_cast(flags); + + s32 result = ORBIS_OK; if (fd == -1) { - return memory->MapMemory(res, std::bit_cast(addr), len, mem_prot, mem_flags, - Core::VMAType::Flexible); + result = memory->MapMemory(&addr_out, std::bit_cast(addr), len, mem_prot, mem_flags, + Core::VMAType::Flexible); } else { - const uintptr_t handle = h->GetFile(fd)->f.GetFileMapping(); - return memory->MapFile(res, std::bit_cast(addr), len, mem_prot, mem_flags, handle, - offset); + result = memory->MapFile(&addr_out, std::bit_cast(addr), len, mem_prot, mem_flags, + fd, phys_addr); } + + if (result != ORBIS_OK) { + // If the memory mappings fail, mmap sets errno to the appropriate error code, + // then returns (void*)-1; + ErrSceToPosix(result); + return reinterpret_cast(-1); + } + + return addr_out; } -void* PS4_SYSV_ABI posix_mmap(void* addr, u64 len, int prot, int flags, int fd, u64 offset) { - void* ptr; - LOG_INFO(Kernel_Vmm, "posix mmap redirect to sceKernelMmap"); - int result = sceKernelMmap(addr, len, prot, flags, fd, offset, &ptr); - ASSERT(result == 0); - return ptr; +s32 PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, s32 prot, s32 flags, s32 fd, s64 phys_addr, + void** res) { + void* addr_out = posix_mmap(addr, len, prot, flags, fd, phys_addr); + + if (addr_out == reinterpret_cast(-1)) { + // posix_mmap failed, calculate and return the appropriate kernel error code using errno. + LOG_ERROR(Kernel_Fs, "error = {}", *__Error()); + return ErrnoToSceKernelError(*__Error()); + } + + // Set the outputted address + *res = addr_out; + return ORBIS_OK; } s32 PS4_SYSV_ABI sceKernelConfiguredFlexibleMemorySize(u64* sizeOut) { @@ -678,8 +707,9 @@ void RegisterMemory(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("n1-v6FgU7MQ", "libkernel", 1, "libkernel", 1, 1, sceKernelConfiguredFlexibleMemorySize); - LIB_FUNCTION("9bfdLIyuwCY", "libkernel", 1, "libkernel", 1, 1, sceKernelMTypeProtect); - LIB_FUNCTION("vSMAm3cxYTY", "libkernel", 1, "libkernel", 1, 1, sceKernelMProtect); + LIB_FUNCTION("vSMAm3cxYTY", "libkernel", 1, "libkernel", 1, 1, sceKernelMprotect); + LIB_FUNCTION("YQOfxL4QfeU", "libScePosix", 1, "libkernel", 1, 1, posix_mprotect); + LIB_FUNCTION("9bfdLIyuwCY", "libkernel", 1, "libkernel", 1, 1, sceKernelMtypeprotect); // Memory pool LIB_FUNCTION("qCSfqDILlns", "libkernel", 1, "libkernel", 1, 1, sceKernelMemoryPoolExpand); diff --git a/src/core/libraries/kernel/memory.h b/src/core/libraries/kernel/memory.h index 92e158a00..6cefe0d07 100644 --- a/src/core/libraries/kernel/memory.h +++ b/src/core/libraries/kernel/memory.h @@ -147,9 +147,9 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int flags); int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot); -int PS4_SYSV_ABI sceKernelMProtect(const void* addr, size_t size, int prot); +s32 PS4_SYSV_ABI sceKernelMprotect(const void* addr, u64 size, s32 prot); -int PS4_SYSV_ABI sceKernelMTypeProtect(const void* addr, size_t size, int mtype, int prot); +s32 PS4_SYSV_ABI sceKernelMtypeprotect(const void* addr, u64 size, s32 mtype, s32 prot); int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info, size_t infoSize); @@ -165,14 +165,14 @@ s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEnt s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEntries, int* numEntriesOut, int flags); -s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, const char* name); +s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, u64 len, const char* name); -s32 PS4_SYSV_ABI sceKernelMemoryPoolExpand(u64 searchStart, u64 searchEnd, size_t len, - size_t alignment, u64* physAddrOut); -s32 PS4_SYSV_ABI sceKernelMemoryPoolReserve(void* addrIn, size_t len, size_t alignment, int flags, - void** addrOut); -s32 PS4_SYSV_ABI sceKernelMemoryPoolCommit(void* addr, size_t len, int type, int prot, int flags); -s32 PS4_SYSV_ABI sceKernelMemoryPoolDecommit(void* addr, size_t len, int flags); +s32 PS4_SYSV_ABI sceKernelMemoryPoolExpand(u64 searchStart, u64 searchEnd, u64 len, u64 alignment, + u64* physAddrOut); +s32 PS4_SYSV_ABI sceKernelMemoryPoolReserve(void* addr_in, u64 len, u64 alignment, s32 flags, + void** addr_out); +s32 PS4_SYSV_ABI sceKernelMemoryPoolCommit(void* addr, u64 len, s32 type, s32 prot, s32 flags); +s32 PS4_SYSV_ABI sceKernelMemoryPoolDecommit(void* addr, u64 len, s32 flags); s32 PS4_SYSV_ABI sceKernelMemoryPoolBatch(const OrbisKernelMemoryPoolBatchEntry* entries, s32 count, s32* num_processed, s32 flags); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index ca6a0d6cd..ab59219b2 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/config.h" #include "common/debug.h" +#include "core/file_sys/fs.h" #include "core/libraries/kernel/memory.h" #include "core/libraries/kernel/orbis_error.h" #include "core/libraries/kernel/process.h" @@ -181,6 +182,7 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, auto& area = CarveDmemArea(mapping_start, size)->second; area.memory_type = memory_type; area.is_free = false; + MergeAdjacent(dmem_map, dmem_area); return mapping_start; } @@ -214,90 +216,6 @@ void MemoryManager::Free(PAddr phys_addr, size_t size) { MergeAdjacent(dmem_map, dmem_area); } -int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size, - MemoryMapFlags flags, u64 alignment) { - std::scoped_lock lk{mutex}; - alignment = alignment > 0 ? alignment : 2_MB; - VAddr min_address = Common::AlignUp(impl.SystemManagedVirtualBase(), alignment); - VAddr mapped_addr = Common::AlignUp(virtual_addr, alignment); - - // Fixed mapping means the virtual address must exactly match the provided one. - if (True(flags & MemoryMapFlags::Fixed)) { - // Make sure we're mapping to a valid address - mapped_addr = mapped_addr > min_address ? mapped_addr : min_address; - auto vma = FindVMA(mapped_addr)->second; - size_t remaining_size = vma.base + vma.size - mapped_addr; - // If the VMA is mapped or there's not enough space, unmap the region first. - if (vma.IsMapped() || remaining_size < size) { - UnmapMemoryImpl(mapped_addr, size); - vma = FindVMA(mapped_addr)->second; - } - } - - if (False(flags & MemoryMapFlags::Fixed)) { - // When MemoryMapFlags::Fixed is not specified, and mapped_addr is 0, - // search from address 0x200000000 instead. - mapped_addr = mapped_addr == 0 ? 0x200000000 : mapped_addr; - mapped_addr = SearchFree(mapped_addr, size, alignment); - if (mapped_addr == -1) { - // No suitable memory areas to map to - return ORBIS_KERNEL_ERROR_ENOMEM; - } - } - - // Add virtual memory area - const auto new_vma_handle = CarveVMA(mapped_addr, size); - auto& new_vma = new_vma_handle->second; - new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); - new_vma.prot = MemoryProt::NoAccess; - new_vma.name = "anon"; - new_vma.type = VMAType::PoolReserved; - - *out_addr = std::bit_cast(mapped_addr); - return ORBIS_OK; -} - -int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, MemoryMapFlags flags, - u64 alignment) { - std::scoped_lock lk{mutex}; - - virtual_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr; - alignment = alignment > 0 ? alignment : 16_KB; - VAddr mapped_addr = alignment > 0 ? Common::AlignUp(virtual_addr, alignment) : virtual_addr; - - // Fixed mapping means the virtual address must exactly match the provided one. - if (True(flags & MemoryMapFlags::Fixed)) { - auto vma = FindVMA(mapped_addr)->second; - size_t remaining_size = vma.base + vma.size - mapped_addr; - // If the VMA is mapped or there's not enough space, unmap the region first. - if (vma.IsMapped() || remaining_size < size) { - UnmapMemoryImpl(mapped_addr, size); - vma = FindVMA(mapped_addr)->second; - } - } - - // Find the first free area starting with provided virtual address. - if (False(flags & MemoryMapFlags::Fixed)) { - mapped_addr = SearchFree(mapped_addr, size, alignment); - if (mapped_addr == -1) { - // No suitable memory areas to map to - return ORBIS_KERNEL_ERROR_ENOMEM; - } - } - - // Add virtual memory area - const auto new_vma_handle = CarveVMA(mapped_addr, size); - auto& new_vma = new_vma_handle->second; - new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); - new_vma.prot = MemoryProt::NoAccess; - new_vma.name = "anon"; - new_vma.type = VMAType::Reserved; - MergeAdjacent(vma_map, new_vma_handle); - - *out_addr = std::bit_cast(mapped_addr); - return ORBIS_OK; -} - int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot) { std::scoped_lock lk{mutex}; @@ -344,14 +262,17 @@ int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot) void* out_addr = impl.Map(mapped_addr, size, alignment, -1, false); TRACK_ALLOC(out_addr, size, "VMEM"); - if (IsValidGpuMapping(mapped_addr, size)) { + if (prot >= MemoryProt::GpuRead) { + // PS4s only map to GPU memory when the protection includes GPU access. + // If the address to map to is too high, PS4s throw a page fault and crash. + ASSERT_MSG(IsValidGpuMapping(mapped_addr, size), "Invalid address for GPU mapping"); rasterizer->MapMemory(mapped_addr, size); } return ORBIS_OK; } -int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot, +s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, MemoryProt prot, MemoryMapFlags flags, VMAType type, std::string_view name, bool is_exec, PAddr phys_addr, u64 alignment) { std::scoped_lock lk{mutex}; @@ -366,17 +287,18 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M VAddr mapped_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr; // Fixed mapping means the virtual address must exactly match the provided one. - if (True(flags & MemoryMapFlags::Fixed)) { + // On a PS4, the Fixed flag is ignored if address 0 is provided. + if (True(flags & MemoryMapFlags::Fixed) && virtual_addr != 0) { auto vma = FindVMA(mapped_addr)->second; - size_t remaining_size = vma.base + vma.size - mapped_addr; // There's a possible edge case where we're mapping to a partially reserved range. // To account for this, unmap any reserved areas within this mapping range first. auto unmap_addr = mapped_addr; auto unmap_size = size; + // If flag NoOverwrite is provided, don't overwrite mapped VMAs. // When it isn't provided, VMAs can be overwritten regardless of if they're mapped. while ((False(flags & MemoryMapFlags::NoOverwrite) || !vma.IsMapped()) && - unmap_addr < mapped_addr + size && remaining_size < size) { + unmap_addr < mapped_addr + size) { auto unmapped = UnmapBytesFromEntry(unmap_addr, vma, unmap_size); unmap_addr += unmapped; unmap_size -= unmapped; @@ -384,51 +306,69 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M } vma = FindVMA(mapped_addr)->second; - remaining_size = vma.base + vma.size - mapped_addr; + auto remaining_size = vma.base + vma.size - mapped_addr; if (vma.IsMapped() || remaining_size < size) { LOG_ERROR(Kernel_Vmm, "Unable to map {:#x} bytes at address {:#x}", size, mapped_addr); return ORBIS_KERNEL_ERROR_ENOMEM; } - } - - // Find the first free area starting with provided virtual address. - if (False(flags & MemoryMapFlags::Fixed)) { - // Provided address needs to be aligned before we can map. + } else { + // When MemoryMapFlags::Fixed is not specified, and mapped_addr is 0, + // search from address 0x200000000 instead. alignment = alignment > 0 ? alignment : 16_KB; - mapped_addr = SearchFree(Common::AlignUp(mapped_addr, alignment), size, alignment); + mapped_addr = virtual_addr == 0 ? 0x200000000 : mapped_addr; + mapped_addr = SearchFree(mapped_addr, size, alignment); if (mapped_addr == -1) { // No suitable memory areas to map to return ORBIS_KERNEL_ERROR_ENOMEM; } } - // Perform the mapping. - *out_addr = impl.Map(mapped_addr, size, alignment, phys_addr, is_exec); - TRACK_ALLOC(*out_addr, size, "VMEM"); + // Create a memory area representing this mapping. + const auto new_vma_handle = CarveVMA(mapped_addr, size); + auto& new_vma = new_vma_handle->second; - auto& new_vma = CarveVMA(mapped_addr, size)->second; - new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); - new_vma.prot = prot; - new_vma.name = name; - new_vma.type = type; - new_vma.is_exec = is_exec; - - if (type == VMAType::Direct) { - new_vma.phys_base = phys_addr; - } + // If type is Flexible, we need to track how much flexible memory is used here. if (type == VMAType::Flexible) { flexible_usage += size; } - if (IsValidGpuMapping(mapped_addr, size)) { + new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); + new_vma.prot = prot; + new_vma.name = name; + new_vma.type = type; + new_vma.phys_base = phys_addr == -1 ? 0 : phys_addr; + new_vma.is_exec = is_exec; + + if (type == VMAType::Reserved) { + // Technically this should be done for direct and flexible mappings too, + // But some Windows-specific limitations make that hard to accomplish. + MergeAdjacent(vma_map, new_vma_handle); + } + + if (prot >= MemoryProt::GpuRead) { + // PS4s only map to GPU memory when the protection includes GPU access. + // If the address to map to is too high, PS4s throw a page fault and crash. + ASSERT_MSG(IsValidGpuMapping(mapped_addr, size), "Invalid address for GPU mapping"); rasterizer->MapMemory(mapped_addr, size); } + if (type == VMAType::Reserved || type == VMAType::PoolReserved) { + // For Reserved/PoolReserved mappings, we don't perform any address space allocations. + // Just set out_addr to mapped_addr instead. + *out_addr = std::bit_cast(mapped_addr); + } else { + // Type is either Direct, Flexible, or Code, these need to be mapped in our address space. + *out_addr = impl.Map(mapped_addr, size, alignment, phys_addr, is_exec); + } + + TRACK_ALLOC(*out_addr, size, "VMEM"); return ORBIS_OK; } -int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot, - MemoryMapFlags flags, uintptr_t fd, size_t offset) { +s32 MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, u64 size, MemoryProt prot, + MemoryMapFlags flags, s32 fd, s64 phys_addr) { + auto* h = Common::Singleton::Instance(); + VAddr mapped_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr; const size_t size_aligned = Common::AlignUp(size, 16_KB); @@ -449,8 +389,19 @@ int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, Mem vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } - // Map the file. - impl.MapFile(mapped_addr, size_aligned, offset, std::bit_cast(prot), fd); + // Get the file to map + auto file = h->GetFile(fd); + if (file == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + const auto handle = file->f.GetFileMapping(); + + impl.MapFile(mapped_addr, size_aligned, phys_addr, std::bit_cast(prot), handle); + + if (prot >= MemoryProt::GpuRead) { + ASSERT_MSG(false, "Files cannot be mapped to GPU memory"); + } // Add virtual memory area auto& new_vma = CarveVMA(mapped_addr, size_aligned)->second; @@ -478,6 +429,7 @@ s32 MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) { const bool is_exec = vma_base.is_exec; const auto start_in_vma = virtual_addr - vma_base_addr; const auto type = vma_base.type; + const auto prot = vma_base.prot; if (type != VMAType::PoolReserved && type != VMAType::Pooled) { LOG_ERROR(Kernel_Vmm, "Attempting to decommit non-pooled memory!"); @@ -489,7 +441,8 @@ s32 MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) { pool_budget += size; } - if (IsValidGpuMapping(virtual_addr, size)) { + if (prot >= MemoryProt::GpuRead) { + // If this mapping has GPU access, unmap from GPU. rasterizer->UnmapMemory(virtual_addr, size); } @@ -528,6 +481,7 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma const auto adjusted_size = vma_base_size - start_in_vma < size ? vma_base_size - start_in_vma : size; const bool has_backing = type == VMAType::Direct || type == VMAType::File; + const auto prot = vma_base.prot; if (type == VMAType::Free) { return adjusted_size; @@ -536,8 +490,9 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma flexible_usage -= adjusted_size; } - if (IsValidGpuMapping(virtual_addr, adjusted_size)) { - rasterizer->UnmapMemory(virtual_addr, adjusted_size); + if (prot >= MemoryProt::GpuRead) { + // If this mapping has GPU access, unmap from GPU. + rasterizer->UnmapMemory(virtual_addr, size); } // Mark region as free and attempt to coalesce it with neighbours. @@ -605,8 +560,8 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea vma_base, size_t s vma_base.size - start_in_vma < size ? vma_base.size - start_in_vma : size; if (vma_base.type == VMAType::Free) { - LOG_ERROR(Kernel_Vmm, "Cannot change protection on free memory region"); - return ORBIS_KERNEL_ERROR_EINVAL; + // On PS4, protecting freed memory does nothing. + return adjusted_size; } // Validate protection flags @@ -621,6 +576,18 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea vma_base, size_t s return ORBIS_KERNEL_ERROR_EINVAL; } + if (vma_base.prot < MemoryProt::GpuRead && prot >= MemoryProt::GpuRead) { + // New protection will give the GPU access to this VMA, perform a rasterizer map + ASSERT_MSG(IsValidGpuMapping(addr, size), "Invalid address for GPU mapping"); + rasterizer->MapMemory(addr, size); + } + + if (vma_base.prot >= MemoryProt::GpuRead && prot < MemoryProt::GpuRead) { + // New protection will remove the GPU's access to this VMA, perform a rasterizer unmap + ASSERT_MSG(IsValidGpuMapping(addr, size), "Invalid address for GPU unmap"); + rasterizer->UnmapMemory(addr, size); + } + // Change protection vma_base.prot = prot; @@ -798,12 +765,31 @@ s32 MemoryManager::SetDirectMemoryType(s64 phys_addr, s32 memory_type) { return ORBIS_OK; } -void MemoryManager::NameVirtualRange(VAddr virtual_addr, size_t size, std::string_view name) { - auto it = FindVMA(virtual_addr); +void MemoryManager::NameVirtualRange(VAddr virtual_addr, u64 size, std::string_view name) { + // Sizes are aligned up to the nearest 16_KB + auto aligned_size = Common::AlignUp(size, 16_KB); + // Addresses are aligned down to the nearest 16_KB + auto aligned_addr = Common::AlignDown(virtual_addr, 16_KB); - ASSERT_MSG(it->second.Contains(virtual_addr, size), - "Range provided is not fully contained in vma"); - it->second.name = name; + auto it = FindVMA(aligned_addr); + s64 remaining_size = aligned_size; + auto current_addr = aligned_addr; + while (remaining_size > 0) { + // Nothing needs to be done to free VMAs + if (!it->second.IsFree()) { + if (remaining_size < it->second.size) { + // We should split VMAs here, but this could cause trouble for Windows. + // Instead log a warning and name the whole VMA. + // it = CarveVMA(current_addr, remaining_size); + LOG_WARNING(Kernel_Vmm, "Trying to partially name a range"); + } + auto& vma = it->second; + vma.name = name; + } + remaining_size -= it->second.size; + current_addr += it->second.size; + it = FindVMA(current_addr); + } } void MemoryManager::InvalidateMemory(const VAddr addr, const u64 size) const { @@ -824,6 +810,8 @@ VAddr MemoryManager::SearchFree(VAddr virtual_addr, size_t size, u32 alignment) ASSERT_MSG(virtual_addr <= max_search_address, "Input address {:#x} is out of bounds", virtual_addr); + // Align up the virtual_addr first. + virtual_addr = Common::AlignUp(virtual_addr, alignment); auto it = FindVMA(virtual_addr); // If the VMA is free and contains the requested mapping we are done. diff --git a/src/core/memory.h b/src/core/memory.h index 883b48854..b3ebe3c27 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -183,20 +183,14 @@ public: void Free(PAddr phys_addr, size_t size); - int PoolReserve(void** out_addr, VAddr virtual_addr, size_t size, MemoryMapFlags flags, - u64 alignment = 0); - - int Reserve(void** out_addr, VAddr virtual_addr, size_t size, MemoryMapFlags flags, - u64 alignment = 0); - int PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot); - int MapMemory(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot, + s32 MapMemory(void** out_addr, VAddr virtual_addr, u64 size, MemoryProt prot, MemoryMapFlags flags, VMAType type, std::string_view name = "anon", bool is_exec = false, PAddr phys_addr = -1, u64 alignment = 0); - int MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot, - MemoryMapFlags flags, uintptr_t fd, size_t offset); + s32 MapFile(void** out_addr, VAddr virtual_addr, u64 size, MemoryProt prot, + MemoryMapFlags flags, s32 fd, s64 phys_addr); s32 PoolDecommit(VAddr virtual_addr, size_t size); @@ -221,7 +215,7 @@ public: s32 SetDirectMemoryType(s64 phys_addr, s32 memory_type); - void NameVirtualRange(VAddr virtual_addr, size_t size, std::string_view name); + void NameVirtualRange(VAddr virtual_addr, u64 size, std::string_view name); void InvalidateMemory(VAddr addr, u64 size) const; From 8fffdc39186f1748460c637fab251271fbc257d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Thu, 29 May 2025 21:20:16 +0200 Subject: [PATCH 2/4] Handle V_CVT_F64_U32 (#3008) --- src/shader_recompiler/frontend/translate/translate.h | 1 + src/shader_recompiler/frontend/translate/vector_alu.cpp | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 68d5e8dc8..7b4b03f27 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -183,6 +183,7 @@ public: void V_READFIRSTLANE_B32(const GcnInst& inst); void V_CVT_I32_F64(const GcnInst& inst); void V_CVT_F64_I32(const GcnInst& inst); + void V_CVT_F64_U32(const GcnInst& inst); void V_CVT_F32_I32(const GcnInst& inst); void V_CVT_F32_U32(const GcnInst& inst); void V_CVT_U32_F32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 6171cca07..fb3f52c7f 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -110,6 +110,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_CVT_I32_F64(inst); case Opcode::V_CVT_F64_I32: return V_CVT_F64_I32(inst); + case Opcode::V_CVT_F64_U32: + return V_CVT_F64_U32(inst); case Opcode::V_CVT_F32_I32: return V_CVT_F32_I32(inst); case Opcode::V_CVT_F32_U32: @@ -684,6 +686,11 @@ void Translator::V_CVT_F64_I32(const GcnInst& inst) { SetDst64(inst.dst[0], ir.ConvertSToF(64, 32, src0)); } +void Translator::V_CVT_F64_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + SetDst64(inst.dst[0], ir.ConvertUToF(64, 32, src0)); +} + void Translator::V_CVT_F32_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; SetDst(inst.dst[0], ir.ConvertSToF(32, 32, src0)); From 2091bc56513fd072ae8477b51c9a20be7817e618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Fri, 30 May 2025 01:56:24 +0200 Subject: [PATCH 3/4] Handle R128 bit in MIMG instructions (#3010) --- src/shader_recompiler/frontend/decode.cpp | 1 - .../frontend/translate/vector_memory.cpp | 5 +++++ src/shader_recompiler/info.h | 9 ++++++++- .../ir/passes/resource_tracking_pass.cpp | 1 + src/shader_recompiler/ir/reg.h | 1 + 5 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/decode.cpp b/src/shader_recompiler/frontend/decode.cpp index 20b78e869..37e8a0973 100644 --- a/src/shader_recompiler/frontend/decode.cpp +++ b/src/shader_recompiler/frontend/decode.cpp @@ -1032,7 +1032,6 @@ void GcnDecodeContext::decodeInstructionMIMG(uint64_t hexInstruction) { m_instruction.control.mimg = *reinterpret_cast(&hexInstruction); m_instruction.control.mimg.mod = getMimgModifier(m_instruction.opcode); - ASSERT(m_instruction.control.mimg.r128 == 0); } void GcnDecodeContext::decodeInstructionDS(uint64_t hexInstruction) { diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 5639bc56a..8c1946390 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -377,6 +377,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { IR::TextureInstInfo info{}; info.has_lod.Assign(has_mip); info.is_array.Assign(mimg.da); + info.is_r128.Assign(mimg.r128); const IR::Value texel = ir.ImageRead(handle, body, {}, {}, info); for (u32 i = 0; i < 4; i++) { @@ -426,6 +427,7 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { IR::TextureInstInfo info{}; info.is_array.Assign(mimg.da); + info.is_r128.Assign(mimg.r128); const IR::Value size = ir.ImageQueryDimension(tsharp, lod, ir.Imm1(has_mips), info); @@ -451,6 +453,7 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) { IR::TextureInstInfo info{}; info.is_array.Assign(mimg.da); + info.is_r128.Assign(mimg.r128); const IR::Value value = ir.GetVectorReg(val_reg); const IR::Value handle = ir.GetScalarReg(tsharp_reg); @@ -509,6 +512,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal info.has_lod.Assign(flags.any(MimgModifier::Lod)); info.is_array.Assign(mimg.da); info.is_unnormalized.Assign(mimg.unrm); + info.is_r128.Assign(mimg.r128); if (gather) { info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); @@ -617,6 +621,7 @@ void Translator::IMAGE_GET_LOD(const GcnInst& inst) { IR::TextureInstInfo info{}; info.is_array.Assign(mimg.da); + info.is_r128.Assign(mimg.r128); const IR::Value handle = ir.GetScalarReg(tsharp_reg); const IR::Value body = ir.CompositeConstruct( diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index d349d7827..24e0741c1 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -84,6 +84,7 @@ struct ImageResource { bool is_atomic{}; bool is_array{}; bool is_written{}; + bool is_r128{}; [[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; }; @@ -293,7 +294,13 @@ constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexce } constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept { - const auto image = info.ReadUdSharp(sharp_idx); + AmdGpu::Image image{0}; + if (!is_r128) { + image = info.ReadUdSharp(sharp_idx); + } else { + AmdGpu::Buffer buf = info.ReadUdSharp(sharp_idx); + memcpy(&image, &buf, sizeof(buf)); + } if (!image.Valid()) { // Fall back to null image if unbound. return AmdGpu::Image::Null(); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index cc0bf83d3..18c77e600 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -411,6 +411,7 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& .is_atomic = IsImageAtomicInstruction(inst), .is_array = bool(inst_info.is_array), .is_written = is_written, + .is_r128 = bool(inst_info.is_r128), }); IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index 622190cf0..82aa436a7 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -44,6 +44,7 @@ union TextureInstInfo { BitField<9, 1, u32> is_array; BitField<10, 1, u32> is_unnormalized; BitField<11, 1, u32> is_gather; + BitField<12, 1, u32> is_r128; }; union BufferInstInfo { From 790b54bf2953fd03b933c2e5d5247db41b76444a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Fri, 30 May 2025 03:51:36 +0200 Subject: [PATCH 4/4] Misc opcodes fixes (#3009) --- src/shader_recompiler/frontend/translate/translate.cpp | 2 +- src/shader_recompiler/frontend/translate/vector_memory.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index e49f95d9a..5675adf3c 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -380,7 +380,7 @@ T Translator::GetSrc64(const InstOperand& operand) { break; case OperandField::VccLo: if constexpr (is_float) { - UNREACHABLE(); + value = ir.PackDouble2x32(ir.CompositeConstruct(ir.GetVccLo(), ir.GetVccHi())); } else { value = ir.PackUint2x32(ir.CompositeConstruct(ir.GetVccLo(), ir.GetVccHi())); } diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 8c1946390..5c972c607 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -152,6 +152,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { // Image gather operations case Opcode::IMAGE_GATHER4: + case Opcode::IMAGE_GATHER4_L: case Opcode::IMAGE_GATHER4_LZ: case Opcode::IMAGE_GATHER4_C: case Opcode::IMAGE_GATHER4_O: