Merge branch 'main' into http-part2

This commit is contained in:
georgemoralis 2025-05-30 09:56:20 +03:00 committed by GitHub
commit 10f598f6d2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 218 additions and 184 deletions

View file

@ -8,7 +8,6 @@
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/singleton.h"
#include "core/file_sys/fs.h"
#include "core/libraries/kernel/kernel.h"
#include "core/libraries/kernel/memory.h"
#include "core/libraries/kernel/orbis_error.h"
@ -152,7 +151,8 @@ s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u
const VAddr in_addr = reinterpret_cast<VAddr>(*addr);
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
s32 result = memory->Reserve(addr, in_addr, len, map_flags, alignment);
s32 result = memory->MapMemory(addr, in_addr, len, Core::MemoryProt::NoAccess, map_flags,
Core::VMAType::Reserved, "anon", false, -1, alignment);
if (result == 0) {
LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr));
}
@ -263,13 +263,22 @@ int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void**
return memory->QueryProtection(std::bit_cast<VAddr>(addr), start, end, prot);
}
int PS4_SYSV_ABI sceKernelMProtect(const void* addr, size_t size, int prot) {
s32 PS4_SYSV_ABI sceKernelMprotect(const void* addr, u64 size, s32 prot) {
Core::MemoryManager* memory_manager = Core::Memory::Instance();
Core::MemoryProt protection_flags = static_cast<Core::MemoryProt>(prot);
return memory_manager->Protect(std::bit_cast<VAddr>(addr), size, protection_flags);
}
int PS4_SYSV_ABI sceKernelMTypeProtect(const void* addr, size_t size, int mtype, int prot) {
s32 PS4_SYSV_ABI posix_mprotect(const void* addr, u64 size, s32 prot) {
s32 result = sceKernelMprotect(addr, size, prot);
if (result < 0) {
ErrSceToPosix(result);
return -1;
}
return result;
}
s32 PS4_SYSV_ABI sceKernelMtypeprotect(const void* addr, u64 size, s32 mtype, s32 prot) {
Core::MemoryManager* memory_manager = Core::Memory::Instance();
Core::MemoryProt protection_flags = static_cast<Core::MemoryProt>(prot);
return memory_manager->Protect(std::bit_cast<VAddr>(addr), size, protection_flags);
@ -344,7 +353,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn
break;
}
case MemoryOpTypes::ORBIS_KERNEL_MAP_OP_PROTECT: {
result = sceKernelMProtect(entries[i].start, entries[i].length, entries[i].protection);
result = sceKernelMprotect(entries[i].start, entries[i].length, entries[i].protection);
LOG_INFO(Kernel_Vmm, "entry = {}, operation = {}, len = {:#x}, result = {}", i,
entries[i].operation, entries[i].length, result);
break;
@ -359,7 +368,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn
break;
}
case MemoryOpTypes::ORBIS_KERNEL_MAP_OP_TYPE_PROTECT: {
result = sceKernelMTypeProtect(entries[i].start, entries[i].length, entries[i].type,
result = sceKernelMtypeprotect(entries[i].start, entries[i].length, entries[i].type,
entries[i].protection);
LOG_INFO(Kernel_Vmm, "entry = {}, operation = {}, len = {:#x}, result = {}", i,
entries[i].operation, entries[i].length, result);
@ -380,7 +389,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn
return result;
}
s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, const char* name) {
s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, u64 len, const char* name) {
if (name == nullptr) {
LOG_ERROR(Kernel_Vmm, "name is invalid!");
return ORBIS_KERNEL_ERROR_EFAULT;
@ -396,8 +405,8 @@ s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, cons
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceKernelMemoryPoolExpand(u64 searchStart, u64 searchEnd, size_t len,
size_t alignment, u64* physAddrOut) {
s32 PS4_SYSV_ABI sceKernelMemoryPoolExpand(u64 searchStart, u64 searchEnd, u64 len, u64 alignment,
u64* physAddrOut) {
if (searchStart < 0 || searchEnd <= searchStart) {
LOG_ERROR(Kernel_Vmm, "Provided address range is invalid!");
return ORBIS_KERNEL_ERROR_EINVAL;
@ -439,10 +448,10 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolExpand(u64 searchStart, u64 searchEnd, size_
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceKernelMemoryPoolReserve(void* addrIn, size_t len, size_t alignment, int flags,
void** addrOut) {
LOG_INFO(Kernel_Vmm, "addrIn = {}, len = {:#x}, alignment = {:#x}, flags = {:#x}",
fmt::ptr(addrIn), len, alignment, flags);
s32 PS4_SYSV_ABI sceKernelMemoryPoolReserve(void* addr_in, u64 len, u64 alignment, s32 flags,
void** addr_out) {
LOG_INFO(Kernel_Vmm, "addr_in = {}, len = {:#x}, alignment = {:#x}, flags = {:#x}",
fmt::ptr(addr_in), len, alignment, flags);
if (len == 0 || !Common::Is2MBAligned(len)) {
LOG_ERROR(Kernel_Vmm, "Map size is either zero or not 2MB aligned!");
@ -456,14 +465,16 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolReserve(void* addrIn, size_t len, size_t ali
}
auto* memory = Core::Memory::Instance();
const VAddr in_addr = reinterpret_cast<VAddr>(addrIn);
const VAddr in_addr = reinterpret_cast<VAddr>(addr_in);
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
memory->PoolReserve(addrOut, in_addr, len, map_flags, alignment);
u64 map_alignment = alignment == 0 ? 2_MB : alignment;
return ORBIS_OK;
return memory->MapMemory(addr_out, std::bit_cast<VAddr>(addr_in), len,
Core::MemoryProt::NoAccess, map_flags, Core::VMAType::PoolReserved,
"anon", false, -1, map_alignment);
}
s32 PS4_SYSV_ABI sceKernelMemoryPoolCommit(void* addr, size_t len, int type, int prot, int flags) {
s32 PS4_SYSV_ABI sceKernelMemoryPoolCommit(void* addr, u64 len, s32 type, s32 prot, s32 flags) {
if (addr == nullptr) {
LOG_ERROR(Kernel_Vmm, "Address is invalid!");
return ORBIS_KERNEL_ERROR_EINVAL;
@ -482,7 +493,7 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolCommit(void* addr, size_t len, int type, int
return memory->PoolCommit(in_addr, len, mem_prot);
}
s32 PS4_SYSV_ABI sceKernelMemoryPoolDecommit(void* addr, size_t len, int flags) {
s32 PS4_SYSV_ABI sceKernelMemoryPoolDecommit(void* addr, u64 len, s32 flags) {
if (addr == nullptr) {
LOG_ERROR(Kernel_Vmm, "Address is invalid!");
return ORBIS_KERNEL_ERROR_EINVAL;
@ -523,12 +534,12 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolBatch(const OrbisKernelMemoryPoolBatchEntry*
break;
}
case OrbisKernelMemoryPoolOpcode::Protect: {
result = sceKernelMProtect(entry.protect_params.addr, entry.protect_params.len,
result = sceKernelMprotect(entry.protect_params.addr, entry.protect_params.len,
entry.protect_params.prot);
break;
}
case OrbisKernelMemoryPoolOpcode::TypeProtect: {
result = sceKernelMTypeProtect(
result = sceKernelMtypeprotect(
entry.type_protect_params.addr, entry.type_protect_params.len,
entry.type_protect_params.type, entry.type_protect_params.prot);
break;
@ -553,30 +564,48 @@ s32 PS4_SYSV_ABI sceKernelMemoryPoolBatch(const OrbisKernelMemoryPoolBatchEntry*
return result;
}
int PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, int prot, int flags, int fd, size_t offset,
void** res) {
LOG_INFO(Kernel_Vmm, "called addr = {}, len = {}, prot = {}, flags = {}, fd = {}, offset = {}",
fmt::ptr(addr), len, prot, flags, fd, offset);
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
void* PS4_SYSV_ABI posix_mmap(void* addr, u64 len, s32 prot, s32 flags, s32 fd, s64 phys_addr) {
LOG_INFO(Kernel_Vmm,
"called addr = {}, len = {}, prot = {}, flags = {}, fd = {}, phys_addr = {}",
fmt::ptr(addr), len, prot, flags, fd, phys_addr);
void* addr_out;
auto* memory = Core::Memory::Instance();
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
const auto mem_flags = static_cast<Core::MemoryMapFlags>(flags);
s32 result = ORBIS_OK;
if (fd == -1) {
return memory->MapMemory(res, std::bit_cast<VAddr>(addr), len, mem_prot, mem_flags,
result = memory->MapMemory(&addr_out, std::bit_cast<VAddr>(addr), len, mem_prot, mem_flags,
Core::VMAType::Flexible);
} else {
const uintptr_t handle = h->GetFile(fd)->f.GetFileMapping();
return memory->MapFile(res, std::bit_cast<VAddr>(addr), len, mem_prot, mem_flags, handle,
offset);
}
result = memory->MapFile(&addr_out, std::bit_cast<VAddr>(addr), len, mem_prot, mem_flags,
fd, phys_addr);
}
void* PS4_SYSV_ABI posix_mmap(void* addr, u64 len, int prot, int flags, int fd, u64 offset) {
void* ptr;
LOG_INFO(Kernel_Vmm, "posix mmap redirect to sceKernelMmap");
int result = sceKernelMmap(addr, len, prot, flags, fd, offset, &ptr);
ASSERT(result == 0);
return ptr;
if (result != ORBIS_OK) {
// If the memory mappings fail, mmap sets errno to the appropriate error code,
// then returns (void*)-1;
ErrSceToPosix(result);
return reinterpret_cast<void*>(-1);
}
return addr_out;
}
s32 PS4_SYSV_ABI sceKernelMmap(void* addr, u64 len, s32 prot, s32 flags, s32 fd, s64 phys_addr,
void** res) {
void* addr_out = posix_mmap(addr, len, prot, flags, fd, phys_addr);
if (addr_out == reinterpret_cast<void*>(-1)) {
// posix_mmap failed, calculate and return the appropriate kernel error code using errno.
LOG_ERROR(Kernel_Fs, "error = {}", *__Error());
return ErrnoToSceKernelError(*__Error());
}
// Set the outputted address
*res = addr_out;
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceKernelConfiguredFlexibleMemorySize(u64* sizeOut) {
@ -678,8 +707,9 @@ void RegisterMemory(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("n1-v6FgU7MQ", "libkernel", 1, "libkernel", 1, 1,
sceKernelConfiguredFlexibleMemorySize);
LIB_FUNCTION("9bfdLIyuwCY", "libkernel", 1, "libkernel", 1, 1, sceKernelMTypeProtect);
LIB_FUNCTION("vSMAm3cxYTY", "libkernel", 1, "libkernel", 1, 1, sceKernelMProtect);
LIB_FUNCTION("vSMAm3cxYTY", "libkernel", 1, "libkernel", 1, 1, sceKernelMprotect);
LIB_FUNCTION("YQOfxL4QfeU", "libScePosix", 1, "libkernel", 1, 1, posix_mprotect);
LIB_FUNCTION("9bfdLIyuwCY", "libkernel", 1, "libkernel", 1, 1, sceKernelMtypeprotect);
// Memory pool
LIB_FUNCTION("qCSfqDILlns", "libkernel", 1, "libkernel", 1, 1, sceKernelMemoryPoolExpand);

View file

@ -147,9 +147,9 @@ s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len,
int flags);
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot);
int PS4_SYSV_ABI sceKernelMProtect(const void* addr, size_t size, int prot);
s32 PS4_SYSV_ABI sceKernelMprotect(const void* addr, u64 size, s32 prot);
int PS4_SYSV_ABI sceKernelMTypeProtect(const void* addr, size_t size, int mtype, int prot);
s32 PS4_SYSV_ABI sceKernelMtypeprotect(const void* addr, u64 size, s32 mtype, s32 prot);
int PS4_SYSV_ABI sceKernelDirectMemoryQuery(u64 offset, int flags, OrbisQueryInfo* query_info,
size_t infoSize);
@ -165,14 +165,14 @@ s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEnt
s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEntries,
int* numEntriesOut, int flags);
s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, const char* name);
s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, u64 len, const char* name);
s32 PS4_SYSV_ABI sceKernelMemoryPoolExpand(u64 searchStart, u64 searchEnd, size_t len,
size_t alignment, u64* physAddrOut);
s32 PS4_SYSV_ABI sceKernelMemoryPoolReserve(void* addrIn, size_t len, size_t alignment, int flags,
void** addrOut);
s32 PS4_SYSV_ABI sceKernelMemoryPoolCommit(void* addr, size_t len, int type, int prot, int flags);
s32 PS4_SYSV_ABI sceKernelMemoryPoolDecommit(void* addr, size_t len, int flags);
s32 PS4_SYSV_ABI sceKernelMemoryPoolExpand(u64 searchStart, u64 searchEnd, u64 len, u64 alignment,
u64* physAddrOut);
s32 PS4_SYSV_ABI sceKernelMemoryPoolReserve(void* addr_in, u64 len, u64 alignment, s32 flags,
void** addr_out);
s32 PS4_SYSV_ABI sceKernelMemoryPoolCommit(void* addr, u64 len, s32 type, s32 prot, s32 flags);
s32 PS4_SYSV_ABI sceKernelMemoryPoolDecommit(void* addr, u64 len, s32 flags);
s32 PS4_SYSV_ABI sceKernelMemoryPoolBatch(const OrbisKernelMemoryPoolBatchEntry* entries, s32 count,
s32* num_processed, s32 flags);

View file

@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/config.h"
#include "common/debug.h"
#include "core/file_sys/fs.h"
#include "core/libraries/kernel/memory.h"
#include "core/libraries/kernel/orbis_error.h"
#include "core/libraries/kernel/process.h"
@ -181,6 +182,7 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
auto& area = CarveDmemArea(mapping_start, size)->second;
area.memory_type = memory_type;
area.is_free = false;
MergeAdjacent(dmem_map, dmem_area);
return mapping_start;
}
@ -214,90 +216,6 @@ void MemoryManager::Free(PAddr phys_addr, size_t size) {
MergeAdjacent(dmem_map, dmem_area);
}
int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size,
MemoryMapFlags flags, u64 alignment) {
std::scoped_lock lk{mutex};
alignment = alignment > 0 ? alignment : 2_MB;
VAddr min_address = Common::AlignUp(impl.SystemManagedVirtualBase(), alignment);
VAddr mapped_addr = Common::AlignUp(virtual_addr, alignment);
// Fixed mapping means the virtual address must exactly match the provided one.
if (True(flags & MemoryMapFlags::Fixed)) {
// Make sure we're mapping to a valid address
mapped_addr = mapped_addr > min_address ? mapped_addr : min_address;
auto vma = FindVMA(mapped_addr)->second;
size_t remaining_size = vma.base + vma.size - mapped_addr;
// If the VMA is mapped or there's not enough space, unmap the region first.
if (vma.IsMapped() || remaining_size < size) {
UnmapMemoryImpl(mapped_addr, size);
vma = FindVMA(mapped_addr)->second;
}
}
if (False(flags & MemoryMapFlags::Fixed)) {
// When MemoryMapFlags::Fixed is not specified, and mapped_addr is 0,
// search from address 0x200000000 instead.
mapped_addr = mapped_addr == 0 ? 0x200000000 : mapped_addr;
mapped_addr = SearchFree(mapped_addr, size, alignment);
if (mapped_addr == -1) {
// No suitable memory areas to map to
return ORBIS_KERNEL_ERROR_ENOMEM;
}
}
// Add virtual memory area
const auto new_vma_handle = CarveVMA(mapped_addr, size);
auto& new_vma = new_vma_handle->second;
new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce);
new_vma.prot = MemoryProt::NoAccess;
new_vma.name = "anon";
new_vma.type = VMAType::PoolReserved;
*out_addr = std::bit_cast<void*>(mapped_addr);
return ORBIS_OK;
}
int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, MemoryMapFlags flags,
u64 alignment) {
std::scoped_lock lk{mutex};
virtual_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr;
alignment = alignment > 0 ? alignment : 16_KB;
VAddr mapped_addr = alignment > 0 ? Common::AlignUp(virtual_addr, alignment) : virtual_addr;
// Fixed mapping means the virtual address must exactly match the provided one.
if (True(flags & MemoryMapFlags::Fixed)) {
auto vma = FindVMA(mapped_addr)->second;
size_t remaining_size = vma.base + vma.size - mapped_addr;
// If the VMA is mapped or there's not enough space, unmap the region first.
if (vma.IsMapped() || remaining_size < size) {
UnmapMemoryImpl(mapped_addr, size);
vma = FindVMA(mapped_addr)->second;
}
}
// Find the first free area starting with provided virtual address.
if (False(flags & MemoryMapFlags::Fixed)) {
mapped_addr = SearchFree(mapped_addr, size, alignment);
if (mapped_addr == -1) {
// No suitable memory areas to map to
return ORBIS_KERNEL_ERROR_ENOMEM;
}
}
// Add virtual memory area
const auto new_vma_handle = CarveVMA(mapped_addr, size);
auto& new_vma = new_vma_handle->second;
new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce);
new_vma.prot = MemoryProt::NoAccess;
new_vma.name = "anon";
new_vma.type = VMAType::Reserved;
MergeAdjacent(vma_map, new_vma_handle);
*out_addr = std::bit_cast<void*>(mapped_addr);
return ORBIS_OK;
}
int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot) {
std::scoped_lock lk{mutex};
@ -344,14 +262,17 @@ int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot)
void* out_addr = impl.Map(mapped_addr, size, alignment, -1, false);
TRACK_ALLOC(out_addr, size, "VMEM");
if (IsValidGpuMapping(mapped_addr, size)) {
if (prot >= MemoryProt::GpuRead) {
// PS4s only map to GPU memory when the protection includes GPU access.
// If the address to map to is too high, PS4s throw a page fault and crash.
ASSERT_MSG(IsValidGpuMapping(mapped_addr, size), "Invalid address for GPU mapping");
rasterizer->MapMemory(mapped_addr, size);
}
return ORBIS_OK;
}
int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot,
s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, MemoryProt prot,
MemoryMapFlags flags, VMAType type, std::string_view name,
bool is_exec, PAddr phys_addr, u64 alignment) {
std::scoped_lock lk{mutex};
@ -366,17 +287,18 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
VAddr mapped_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr;
// Fixed mapping means the virtual address must exactly match the provided one.
if (True(flags & MemoryMapFlags::Fixed)) {
// On a PS4, the Fixed flag is ignored if address 0 is provided.
if (True(flags & MemoryMapFlags::Fixed) && virtual_addr != 0) {
auto vma = FindVMA(mapped_addr)->second;
size_t remaining_size = vma.base + vma.size - mapped_addr;
// There's a possible edge case where we're mapping to a partially reserved range.
// To account for this, unmap any reserved areas within this mapping range first.
auto unmap_addr = mapped_addr;
auto unmap_size = size;
// If flag NoOverwrite is provided, don't overwrite mapped VMAs.
// When it isn't provided, VMAs can be overwritten regardless of if they're mapped.
while ((False(flags & MemoryMapFlags::NoOverwrite) || !vma.IsMapped()) &&
unmap_addr < mapped_addr + size && remaining_size < size) {
unmap_addr < mapped_addr + size) {
auto unmapped = UnmapBytesFromEntry(unmap_addr, vma, unmap_size);
unmap_addr += unmapped;
unmap_size -= unmapped;
@ -384,51 +306,69 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
}
vma = FindVMA(mapped_addr)->second;
remaining_size = vma.base + vma.size - mapped_addr;
auto remaining_size = vma.base + vma.size - mapped_addr;
if (vma.IsMapped() || remaining_size < size) {
LOG_ERROR(Kernel_Vmm, "Unable to map {:#x} bytes at address {:#x}", size, mapped_addr);
return ORBIS_KERNEL_ERROR_ENOMEM;
}
}
// Find the first free area starting with provided virtual address.
if (False(flags & MemoryMapFlags::Fixed)) {
// Provided address needs to be aligned before we can map.
} else {
// When MemoryMapFlags::Fixed is not specified, and mapped_addr is 0,
// search from address 0x200000000 instead.
alignment = alignment > 0 ? alignment : 16_KB;
mapped_addr = SearchFree(Common::AlignUp(mapped_addr, alignment), size, alignment);
mapped_addr = virtual_addr == 0 ? 0x200000000 : mapped_addr;
mapped_addr = SearchFree(mapped_addr, size, alignment);
if (mapped_addr == -1) {
// No suitable memory areas to map to
return ORBIS_KERNEL_ERROR_ENOMEM;
}
}
// Perform the mapping.
*out_addr = impl.Map(mapped_addr, size, alignment, phys_addr, is_exec);
TRACK_ALLOC(*out_addr, size, "VMEM");
// Create a memory area representing this mapping.
const auto new_vma_handle = CarveVMA(mapped_addr, size);
auto& new_vma = new_vma_handle->second;
auto& new_vma = CarveVMA(mapped_addr, size)->second;
new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce);
new_vma.prot = prot;
new_vma.name = name;
new_vma.type = type;
new_vma.is_exec = is_exec;
if (type == VMAType::Direct) {
new_vma.phys_base = phys_addr;
}
// If type is Flexible, we need to track how much flexible memory is used here.
if (type == VMAType::Flexible) {
flexible_usage += size;
}
if (IsValidGpuMapping(mapped_addr, size)) {
new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce);
new_vma.prot = prot;
new_vma.name = name;
new_vma.type = type;
new_vma.phys_base = phys_addr == -1 ? 0 : phys_addr;
new_vma.is_exec = is_exec;
if (type == VMAType::Reserved) {
// Technically this should be done for direct and flexible mappings too,
// But some Windows-specific limitations make that hard to accomplish.
MergeAdjacent(vma_map, new_vma_handle);
}
if (prot >= MemoryProt::GpuRead) {
// PS4s only map to GPU memory when the protection includes GPU access.
// If the address to map to is too high, PS4s throw a page fault and crash.
ASSERT_MSG(IsValidGpuMapping(mapped_addr, size), "Invalid address for GPU mapping");
rasterizer->MapMemory(mapped_addr, size);
}
if (type == VMAType::Reserved || type == VMAType::PoolReserved) {
// For Reserved/PoolReserved mappings, we don't perform any address space allocations.
// Just set out_addr to mapped_addr instead.
*out_addr = std::bit_cast<void*>(mapped_addr);
} else {
// Type is either Direct, Flexible, or Code, these need to be mapped in our address space.
*out_addr = impl.Map(mapped_addr, size, alignment, phys_addr, is_exec);
}
TRACK_ALLOC(*out_addr, size, "VMEM");
return ORBIS_OK;
}
int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot,
MemoryMapFlags flags, uintptr_t fd, size_t offset) {
s32 MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, u64 size, MemoryProt prot,
MemoryMapFlags flags, s32 fd, s64 phys_addr) {
auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
VAddr mapped_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr;
const size_t size_aligned = Common::AlignUp(size, 16_KB);
@ -449,8 +389,19 @@ int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, Mem
vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size);
}
// Map the file.
impl.MapFile(mapped_addr, size_aligned, offset, std::bit_cast<u32>(prot), fd);
// Get the file to map
auto file = h->GetFile(fd);
if (file == nullptr) {
return ORBIS_KERNEL_ERROR_EBADF;
}
const auto handle = file->f.GetFileMapping();
impl.MapFile(mapped_addr, size_aligned, phys_addr, std::bit_cast<u32>(prot), handle);
if (prot >= MemoryProt::GpuRead) {
ASSERT_MSG(false, "Files cannot be mapped to GPU memory");
}
// Add virtual memory area
auto& new_vma = CarveVMA(mapped_addr, size_aligned)->second;
@ -478,6 +429,7 @@ s32 MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) {
const bool is_exec = vma_base.is_exec;
const auto start_in_vma = virtual_addr - vma_base_addr;
const auto type = vma_base.type;
const auto prot = vma_base.prot;
if (type != VMAType::PoolReserved && type != VMAType::Pooled) {
LOG_ERROR(Kernel_Vmm, "Attempting to decommit non-pooled memory!");
@ -489,7 +441,8 @@ s32 MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) {
pool_budget += size;
}
if (IsValidGpuMapping(virtual_addr, size)) {
if (prot >= MemoryProt::GpuRead) {
// If this mapping has GPU access, unmap from GPU.
rasterizer->UnmapMemory(virtual_addr, size);
}
@ -528,6 +481,7 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma
const auto adjusted_size =
vma_base_size - start_in_vma < size ? vma_base_size - start_in_vma : size;
const bool has_backing = type == VMAType::Direct || type == VMAType::File;
const auto prot = vma_base.prot;
if (type == VMAType::Free) {
return adjusted_size;
@ -536,8 +490,9 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma
flexible_usage -= adjusted_size;
}
if (IsValidGpuMapping(virtual_addr, adjusted_size)) {
rasterizer->UnmapMemory(virtual_addr, adjusted_size);
if (prot >= MemoryProt::GpuRead) {
// If this mapping has GPU access, unmap from GPU.
rasterizer->UnmapMemory(virtual_addr, size);
}
// Mark region as free and attempt to coalesce it with neighbours.
@ -605,8 +560,8 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea vma_base, size_t s
vma_base.size - start_in_vma < size ? vma_base.size - start_in_vma : size;
if (vma_base.type == VMAType::Free) {
LOG_ERROR(Kernel_Vmm, "Cannot change protection on free memory region");
return ORBIS_KERNEL_ERROR_EINVAL;
// On PS4, protecting freed memory does nothing.
return adjusted_size;
}
// Validate protection flags
@ -621,6 +576,18 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea vma_base, size_t s
return ORBIS_KERNEL_ERROR_EINVAL;
}
if (vma_base.prot < MemoryProt::GpuRead && prot >= MemoryProt::GpuRead) {
// New protection will give the GPU access to this VMA, perform a rasterizer map
ASSERT_MSG(IsValidGpuMapping(addr, size), "Invalid address for GPU mapping");
rasterizer->MapMemory(addr, size);
}
if (vma_base.prot >= MemoryProt::GpuRead && prot < MemoryProt::GpuRead) {
// New protection will remove the GPU's access to this VMA, perform a rasterizer unmap
ASSERT_MSG(IsValidGpuMapping(addr, size), "Invalid address for GPU unmap");
rasterizer->UnmapMemory(addr, size);
}
// Change protection
vma_base.prot = prot;
@ -798,12 +765,31 @@ s32 MemoryManager::SetDirectMemoryType(s64 phys_addr, s32 memory_type) {
return ORBIS_OK;
}
void MemoryManager::NameVirtualRange(VAddr virtual_addr, size_t size, std::string_view name) {
auto it = FindVMA(virtual_addr);
void MemoryManager::NameVirtualRange(VAddr virtual_addr, u64 size, std::string_view name) {
// Sizes are aligned up to the nearest 16_KB
auto aligned_size = Common::AlignUp(size, 16_KB);
// Addresses are aligned down to the nearest 16_KB
auto aligned_addr = Common::AlignDown(virtual_addr, 16_KB);
ASSERT_MSG(it->second.Contains(virtual_addr, size),
"Range provided is not fully contained in vma");
it->second.name = name;
auto it = FindVMA(aligned_addr);
s64 remaining_size = aligned_size;
auto current_addr = aligned_addr;
while (remaining_size > 0) {
// Nothing needs to be done to free VMAs
if (!it->second.IsFree()) {
if (remaining_size < it->second.size) {
// We should split VMAs here, but this could cause trouble for Windows.
// Instead log a warning and name the whole VMA.
// it = CarveVMA(current_addr, remaining_size);
LOG_WARNING(Kernel_Vmm, "Trying to partially name a range");
}
auto& vma = it->second;
vma.name = name;
}
remaining_size -= it->second.size;
current_addr += it->second.size;
it = FindVMA(current_addr);
}
}
void MemoryManager::InvalidateMemory(const VAddr addr, const u64 size) const {
@ -824,6 +810,8 @@ VAddr MemoryManager::SearchFree(VAddr virtual_addr, size_t size, u32 alignment)
ASSERT_MSG(virtual_addr <= max_search_address, "Input address {:#x} is out of bounds",
virtual_addr);
// Align up the virtual_addr first.
virtual_addr = Common::AlignUp(virtual_addr, alignment);
auto it = FindVMA(virtual_addr);
// If the VMA is free and contains the requested mapping we are done.

View file

@ -183,20 +183,14 @@ public:
void Free(PAddr phys_addr, size_t size);
int PoolReserve(void** out_addr, VAddr virtual_addr, size_t size, MemoryMapFlags flags,
u64 alignment = 0);
int Reserve(void** out_addr, VAddr virtual_addr, size_t size, MemoryMapFlags flags,
u64 alignment = 0);
int PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot);
int MapMemory(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot,
s32 MapMemory(void** out_addr, VAddr virtual_addr, u64 size, MemoryProt prot,
MemoryMapFlags flags, VMAType type, std::string_view name = "anon",
bool is_exec = false, PAddr phys_addr = -1, u64 alignment = 0);
int MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot,
MemoryMapFlags flags, uintptr_t fd, size_t offset);
s32 MapFile(void** out_addr, VAddr virtual_addr, u64 size, MemoryProt prot,
MemoryMapFlags flags, s32 fd, s64 phys_addr);
s32 PoolDecommit(VAddr virtual_addr, size_t size);
@ -221,7 +215,7 @@ public:
s32 SetDirectMemoryType(s64 phys_addr, s32 memory_type);
void NameVirtualRange(VAddr virtual_addr, size_t size, std::string_view name);
void NameVirtualRange(VAddr virtual_addr, u64 size, std::string_view name);
void InvalidateMemory(VAddr addr, u64 size) const;

View file

@ -1032,7 +1032,6 @@ void GcnDecodeContext::decodeInstructionMIMG(uint64_t hexInstruction) {
m_instruction.control.mimg = *reinterpret_cast<InstControlMIMG*>(&hexInstruction);
m_instruction.control.mimg.mod = getMimgModifier(m_instruction.opcode);
ASSERT(m_instruction.control.mimg.r128 == 0);
}
void GcnDecodeContext::decodeInstructionDS(uint64_t hexInstruction) {

View file

@ -380,7 +380,7 @@ T Translator::GetSrc64(const InstOperand& operand) {
break;
case OperandField::VccLo:
if constexpr (is_float) {
UNREACHABLE();
value = ir.PackDouble2x32(ir.CompositeConstruct(ir.GetVccLo(), ir.GetVccHi()));
} else {
value = ir.PackUint2x32(ir.CompositeConstruct(ir.GetVccLo(), ir.GetVccHi()));
}

View file

@ -183,6 +183,7 @@ public:
void V_READFIRSTLANE_B32(const GcnInst& inst);
void V_CVT_I32_F64(const GcnInst& inst);
void V_CVT_F64_I32(const GcnInst& inst);
void V_CVT_F64_U32(const GcnInst& inst);
void V_CVT_F32_I32(const GcnInst& inst);
void V_CVT_F32_U32(const GcnInst& inst);
void V_CVT_U32_F32(const GcnInst& inst);

View file

@ -110,6 +110,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_CVT_I32_F64(inst);
case Opcode::V_CVT_F64_I32:
return V_CVT_F64_I32(inst);
case Opcode::V_CVT_F64_U32:
return V_CVT_F64_U32(inst);
case Opcode::V_CVT_F32_I32:
return V_CVT_F32_I32(inst);
case Opcode::V_CVT_F32_U32:
@ -684,6 +686,11 @@ void Translator::V_CVT_F64_I32(const GcnInst& inst) {
SetDst64(inst.dst[0], ir.ConvertSToF(64, 32, src0));
}
void Translator::V_CVT_F64_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
SetDst64(inst.dst[0], ir.ConvertUToF(64, 32, src0));
}
void Translator::V_CVT_F32_I32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
SetDst(inst.dst[0], ir.ConvertSToF(32, 32, src0));

View file

@ -152,6 +152,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
// Image gather operations
case Opcode::IMAGE_GATHER4:
case Opcode::IMAGE_GATHER4_L:
case Opcode::IMAGE_GATHER4_LZ:
case Opcode::IMAGE_GATHER4_C:
case Opcode::IMAGE_GATHER4_O:
@ -377,6 +378,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
IR::TextureInstInfo info{};
info.has_lod.Assign(has_mip);
info.is_array.Assign(mimg.da);
info.is_r128.Assign(mimg.r128);
const IR::Value texel = ir.ImageRead(handle, body, {}, {}, info);
for (u32 i = 0; i < 4; i++) {
@ -426,6 +428,7 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
IR::TextureInstInfo info{};
info.is_array.Assign(mimg.da);
info.is_r128.Assign(mimg.r128);
const IR::Value size = ir.ImageQueryDimension(tsharp, lod, ir.Imm1(has_mips), info);
@ -451,6 +454,7 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
IR::TextureInstInfo info{};
info.is_array.Assign(mimg.da);
info.is_r128.Assign(mimg.r128);
const IR::Value value = ir.GetVectorReg(val_reg);
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
@ -509,6 +513,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
info.has_lod.Assign(flags.any(MimgModifier::Lod));
info.is_array.Assign(mimg.da);
info.is_unnormalized.Assign(mimg.unrm);
info.is_r128.Assign(mimg.r128);
if (gather) {
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
@ -617,6 +622,7 @@ void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
IR::TextureInstInfo info{};
info.is_array.Assign(mimg.da);
info.is_r128.Assign(mimg.r128);
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
const IR::Value body = ir.CompositeConstruct(

View file

@ -84,6 +84,7 @@ struct ImageResource {
bool is_atomic{};
bool is_array{};
bool is_written{};
bool is_r128{};
[[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
};
@ -293,7 +294,13 @@ constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexce
}
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
const auto image = info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
AmdGpu::Image image{0};
if (!is_r128) {
image = info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
} else {
AmdGpu::Buffer buf = info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
memcpy(&image, &buf, sizeof(buf));
}
if (!image.Valid()) {
// Fall back to null image if unbound.
return AmdGpu::Image::Null();

View file

@ -411,6 +411,7 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
.is_atomic = IsImageAtomicInstruction(inst),
.is_array = bool(inst_info.is_array),
.is_written = is_written,
.is_r128 = bool(inst_info.is_r128),
});
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};

View file

@ -44,6 +44,7 @@ union TextureInstInfo {
BitField<9, 1, u32> is_array;
BitField<10, 1, u32> is_unnormalized;
BitField<11, 1, u32> is_gather;
BitField<12, 1, u32> is_r128;
};
union BufferInstInfo {