Core: Pooled Memory Fixes (#2895)
Some checks are pending
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions

* Update sceKernelMemoryPoolExpand

Hardware tests show that this function is basically the same as sceKernelAllocateDirectMemory, with some minor differences.
Update the memory searching code to match my updated AllocateDirectMemory code, with appropriate error conditions.

* Update MemoryPoolReserve

Only difference between real hw and our code is behavior with addr = 0.

* Don't coalesce PoolReserved areas.

Real hardware doesn't coalesce them.

* Update PoolCommit

Plenty of edge case behaviors to handle here.
Addresses are treated as fixed, EINVAL is returned for bad mappings, name should be preserved from PoolReserving, committed areas should coalesce, reserved areas get their phys_base updated

* Formatting

* Adjust fixed PoolReserve path

Hardware tests suggest this will overwrite all VMAs in the range. Run UnmapMemoryImpl on the full area, then reserve. Same logic applies to normal reservations too.

Also adjusts logic of the non-fixed path to more closely align with hardware observations.

* Remove phys_base modifications

This can be handled later. Doing the logic properly would likely take work in MergeAdjacent, and would probably need to be applied to normal dmem mappings too.

* Use VMAHandle.Contains()

Why do extra math when we have a function specifically for this?

* Update memory.cpp

* Remove unnecessary code

Since I've removed those two asserts, these two lines of code effectively do nothing.

* Clang

* Fix names

* Fix PoolDecommit

Should fix the address space regressions in UE titles on Windows.

* Fix error log

Should make the cause of this clearer?

* Clang

* Oops

* Remove coalesce on PoolCommit

Windows makes this more difficult.

* Track pool budgets

If you try to commit more pooled memory than is allocated, PoolCommit returns ENOMEM.
Also fixes error conditions for PoolDecommit, that should return EINVAL if given an address that isn't part of the pool.

Note: Seems like the pool budget can't hit zero? I used a <= comparison based on hardware tests, otherwise we're able to make more mappings than real hardware can.
This commit is contained in:
Stephen Miller 2025-05-11 04:59:14 -05:00 committed by GitHub
parent 6ece91c763
commit afcf3a12a3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 111 additions and 65 deletions

View file

@ -383,13 +383,12 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolExpand(u64 searchStart, u64 searchEnd, size_
LOG_ERROR(Kernel_Vmm, "Provided address range is invalid!");
return ORBIS_KERNEL_ERROR_EINVAL;
}
const bool is_in_range = searchEnd - searchStart >= len;
if (len <= 0 || !Common::Is64KBAligned(len) || !is_in_range) {
LOG_ERROR(Kernel_Vmm, "Provided address range is invalid!");
if (len <= 0 || !Common::Is64KBAligned(len)) {
LOG_ERROR(Kernel_Vmm, "Provided length {:#x} is invalid!", len);
return ORBIS_KERNEL_ERROR_EINVAL;
}
if (alignment != 0 && !Common::Is64KBAligned(alignment)) {
LOG_ERROR(Kernel_Vmm, "Alignment value is invalid!");
LOG_ERROR(Kernel_Vmm, "Alignment {:#x} is invalid!", alignment);
return ORBIS_KERNEL_ERROR_EINVAL;
}
if (physAddrOut == nullptr) {
@ -397,8 +396,21 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolExpand(u64 searchStart, u64 searchEnd, size_
return ORBIS_KERNEL_ERROR_EINVAL;
}
const bool is_in_range = searchEnd - searchStart >= len;
if (searchEnd <= searchStart || searchEnd < len || !is_in_range) {
LOG_ERROR(Kernel_Vmm,
"Provided address range is too small!"
" searchStart = {:#x}, searchEnd = {:#x}, length = {:#x}",
searchStart, searchEnd, len);
return ORBIS_KERNEL_ERROR_ENOMEM;
}
auto* memory = Core::Memory::Instance();
PAddr phys_addr = memory->PoolExpand(searchStart, searchEnd, len, alignment);
if (phys_addr == -1) {
return ORBIS_KERNEL_ERROR_ENOMEM;
}
*physAddrOut = static_cast<s64>(phys_addr);
LOG_INFO(Kernel_Vmm,
@ -413,10 +425,6 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolReserve(void* addrIn, size_t len, size_t ali
LOG_INFO(Kernel_Vmm, "addrIn = {}, len = {:#x}, alignment = {:#x}, flags = {:#x}",
fmt::ptr(addrIn), len, alignment, flags);
if (addrIn == nullptr) {
LOG_ERROR(Kernel_Vmm, "Address is invalid!");
return ORBIS_KERNEL_ERROR_EINVAL;
}
if (len == 0 || !Common::Is2MBAligned(len)) {
LOG_ERROR(Kernel_Vmm, "Map size is either zero or not 2MB aligned!");
return ORBIS_KERNEL_ERROR_EINVAL;
@ -469,9 +477,8 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolDecommit(void* addr, size_t len, int flags)
const VAddr pool_addr = reinterpret_cast<VAddr>(addr);
auto* memory = Core::Memory::Instance();
memory->PoolDecommit(pool_addr, len);
return ORBIS_OK;
return memory->PoolDecommit(pool_addr, len);
}
int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd, size_t offset,

View file

@ -109,31 +109,42 @@ bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_byt
PAddr MemoryManager::PoolExpand(PAddr search_start, PAddr search_end, size_t size, u64 alignment) {
std::scoped_lock lk{mutex};
alignment = alignment > 0 ? alignment : 64_KB;
auto dmem_area = FindDmemArea(search_start);
auto mapping_start = search_start > dmem_area->second.base
? Common::AlignUp(search_start, alignment)
: Common::AlignUp(dmem_area->second.base, alignment);
auto mapping_end = mapping_start + size;
const auto is_suitable = [&] {
const auto aligned_base = alignment > 0 ? Common::AlignUp(dmem_area->second.base, alignment)
: dmem_area->second.base;
const auto alignment_size = aligned_base - dmem_area->second.base;
const auto remaining_size =
dmem_area->second.size >= alignment_size ? dmem_area->second.size - alignment_size : 0;
return dmem_area->second.is_free && remaining_size >= size;
};
while (!is_suitable() && dmem_area->second.GetEnd() <= search_end) {
// Find the first free, large enough dmem area in the range.
while (!dmem_area->second.is_free || dmem_area->second.GetEnd() < mapping_end) {
// The current dmem_area isn't suitable, move to the next one.
dmem_area++;
}
ASSERT_MSG(is_suitable(), "Unable to find free direct memory area: size = {:#x}", size);
if (dmem_area == dmem_map.end()) {
break;
}
// Align free position
PAddr free_addr = dmem_area->second.base;
free_addr = alignment > 0 ? Common::AlignUp(free_addr, alignment) : free_addr;
// Update local variables based on the new dmem_area
mapping_start = Common::AlignUp(dmem_area->second.base, alignment);
mapping_end = mapping_start + size;
}
if (dmem_area == dmem_map.end()) {
// There are no suitable mappings in this range
LOG_ERROR(Kernel_Vmm, "Unable to find free direct memory area: size = {:#x}", size);
return -1;
}
// Add the allocated region to the list and commit its pages.
auto& area = CarveDmemArea(free_addr, size)->second;
auto& area = CarveDmemArea(mapping_start, size)->second;
area.is_free = false;
area.is_pooled = true;
return free_addr;
// Track how much dmem was allocated for pools.
pool_budget += size;
return mapping_start;
}
PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
@ -206,27 +217,27 @@ void MemoryManager::Free(PAddr phys_addr, size_t size) {
int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size,
MemoryMapFlags flags, u64 alignment) {
std::scoped_lock lk{mutex};
virtual_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr;
alignment = alignment > 0 ? alignment : 2_MB;
VAddr mapped_addr = alignment > 0 ? Common::AlignUp(virtual_addr, alignment) : virtual_addr;
VAddr min_address = Common::AlignUp(impl.SystemManagedVirtualBase(), alignment);
VAddr mapped_addr = Common::AlignUp(virtual_addr, alignment);
// Fixed mapping means the virtual address must exactly match the provided one.
if (True(flags & MemoryMapFlags::Fixed)) {
auto& vma = FindVMA(mapped_addr)->second;
// If the VMA is mapped, unmap the region first.
if (vma.IsMapped()) {
// Make sure we're mapping to a valid address
mapped_addr = mapped_addr > min_address ? mapped_addr : min_address;
auto vma = FindVMA(mapped_addr)->second;
size_t remaining_size = vma.base + vma.size - mapped_addr;
// If the VMA is mapped or there's not enough space, unmap the region first.
if (vma.IsMapped() || remaining_size < size) {
UnmapMemoryImpl(mapped_addr, size);
vma = FindVMA(mapped_addr)->second;
}
const size_t remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size,
"Memory region {:#x} to {:#x} is not large enough to reserve {:#x} to {:#x}",
vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size);
}
// Find the first free area starting with provided virtual address.
if (False(flags & MemoryMapFlags::Fixed)) {
// When MemoryMapFlags::Fixed is not specified, and mapped_addr is 0,
// search from address 0x200000000 instead.
mapped_addr = mapped_addr == 0 ? 0x200000000 : mapped_addr;
mapped_addr = SearchFree(mapped_addr, size, alignment);
if (mapped_addr == -1) {
// No suitable memory areas to map to
@ -241,7 +252,6 @@ int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size,
new_vma.prot = MemoryProt::NoAccess;
new_vma.name = "anon";
new_vma.type = VMAType::PoolReserved;
MergeAdjacent(vma_map, new_vma_handle);
*out_addr = std::bit_cast<void*>(mapped_addr);
return ORBIS_OK;
@ -258,15 +268,12 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem
// Fixed mapping means the virtual address must exactly match the provided one.
if (True(flags & MemoryMapFlags::Fixed)) {
auto vma = FindVMA(mapped_addr)->second;
// If the VMA is mapped, unmap the region first.
if (vma.IsMapped()) {
size_t remaining_size = vma.base + vma.size - mapped_addr;
// If the VMA is mapped or there's not enough space, unmap the region first.
if (vma.IsMapped() || remaining_size < size) {
UnmapMemoryImpl(mapped_addr, size);
vma = FindVMA(mapped_addr)->second;
}
const size_t remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size,
"Memory region {:#x} to {:#x} is not large enough to reserve {:#x} to {:#x}",
vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size);
}
// Find the first free area starting with provided virtual address.
@ -296,30 +303,47 @@ int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot)
const u64 alignment = 64_KB;
// When virtual addr is zero, force it to virtual_base. The guest cannot pass Fixed
// flag so we will take the branch that searches for free (or reserved) mappings.
virtual_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr;
// Input addresses to PoolCommit are treated as fixed.
VAddr mapped_addr = Common::AlignUp(virtual_addr, alignment);
// This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen.
const auto& vma = FindVMA(mapped_addr)->second;
const size_t remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(!vma.IsMapped() && remaining_size >= size,
"Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}",
vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size);
auto& vma = FindVMA(mapped_addr)->second;
if (vma.type != VMAType::PoolReserved) {
// If we're attempting to commit non-pooled memory, return EINVAL
LOG_ERROR(Kernel_Vmm, "Attempting to commit non-pooled memory at {:#x}", mapped_addr);
return ORBIS_KERNEL_ERROR_EINVAL;
}
// Perform the mapping.
void* out_addr = impl.Map(mapped_addr, size, alignment, -1, false);
TRACK_ALLOC(out_addr, size, "VMEM");
if (!vma.Contains(mapped_addr, size)) {
// If there's not enough space to commit, return EINVAL
LOG_ERROR(Kernel_Vmm,
"Pooled region {:#x} to {:#x} is not large enough to commit from {:#x} to {:#x}",
vma.base, vma.base + vma.size, mapped_addr, mapped_addr + size);
return ORBIS_KERNEL_ERROR_EINVAL;
}
auto& new_vma = CarveVMA(mapped_addr, size)->second;
if (pool_budget <= size) {
// If there isn't enough pooled memory to perform the mapping, return ENOMEM
LOG_ERROR(Kernel_Vmm, "Not enough pooled memory to perform mapping");
return ORBIS_KERNEL_ERROR_ENOMEM;
} else {
// Track how much pooled memory this commit will take
pool_budget -= size;
}
// Carve out the new VMA representing this mapping
const auto new_vma_handle = CarveVMA(mapped_addr, size);
auto& new_vma = new_vma_handle->second;
new_vma.disallow_merge = false;
new_vma.prot = prot;
new_vma.name = "";
new_vma.name = "anon";
new_vma.type = Core::VMAType::Pooled;
new_vma.is_exec = false;
new_vma.phys_base = 0;
// Perform the mapping
void* out_addr = impl.Map(mapped_addr, size, alignment, -1, false);
TRACK_ALLOC(out_addr, size, "VMEM");
if (IsValidGpuMapping(mapped_addr, size)) {
rasterizer->MapMemory(mapped_addr, size);
}
@ -438,7 +462,7 @@ int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, Mem
return ORBIS_OK;
}
void MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) {
s32 MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) {
std::scoped_lock lk{mutex};
const auto it = FindVMA(virtual_addr);
@ -453,6 +477,16 @@ void MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) {
const auto start_in_vma = virtual_addr - vma_base_addr;
const auto type = vma_base.type;
if (type != VMAType::PoolReserved && type != VMAType::Pooled) {
LOG_ERROR(Kernel_Vmm, "Attempting to decommit non-pooled memory!");
return ORBIS_KERNEL_ERROR_EINVAL;
}
if (type == VMAType::Pooled) {
// Track how much pooled memory is decommitted
pool_budget += size;
}
if (IsValidGpuMapping(virtual_addr, size)) {
rasterizer->UnmapMemory(virtual_addr, size);
}
@ -464,13 +498,17 @@ void MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) {
vma.prot = MemoryProt::NoAccess;
vma.phys_base = 0;
vma.disallow_merge = false;
vma.name = "";
vma.name = "anon";
MergeAdjacent(vma_map, new_it);
// Unmap the memory region.
impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + size, phys_base, is_exec,
false, false);
TRACK_FREE(virtual_addr, "VMEM");
if (type != VMAType::PoolReserved) {
// Unmap the memory region.
impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + size, phys_base,
is_exec, false, false);
TRACK_FREE(virtual_addr, "VMEM");
}
return ORBIS_OK;
}
s32 MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {

View file

@ -198,7 +198,7 @@ public:
int MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot,
MemoryMapFlags flags, uintptr_t fd, size_t offset);
void PoolDecommit(VAddr virtual_addr, size_t size);
s32 PoolDecommit(VAddr virtual_addr, size_t size);
s32 UnmapMemory(VAddr virtual_addr, size_t size);
@ -274,6 +274,7 @@ private:
size_t total_direct_size{};
size_t total_flexible_size{};
size_t flexible_usage{};
size_t pool_budget{};
Vulkan::Rasterizer* rasterizer{};
friend class ::Core::Devtools::Widget::MemoryMapViewer;