Initial support for compiling on ARM64. (#788)

This commit is contained in:
squidbus 2024-09-09 03:23:16 -07:00 committed by GitHub
parent adfb3af95f
commit 411449cd51
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 166 additions and 25 deletions

View file

@ -3,11 +3,13 @@
#include <boost/icl/separate_interval_set.hpp>
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/error.h"
#include "core/address_space.h"
#include "core/libraries/kernel/memory_management.h"
#include "core/memory.h"
#include "libraries/error_codes.h"
#ifdef _WIN32
#include <windows.h>
@ -15,9 +17,8 @@
#include <fcntl.h>
#include <sys/mman.h>
#endif
#include "libraries/error_codes.h"
#ifdef __APPLE__
#if defined(__APPLE__) && defined(ARCH_X86_64)
// Reserve space for the system address space using a zerofill section.
asm(".zerofill GUEST_SYSTEM,GUEST_SYSTEM,__guest_system,0xFBFC00000");
#endif
@ -308,12 +309,12 @@ struct AddressSpace::Impl {
constexpr int protection_flags = PROT_READ | PROT_WRITE;
constexpr int base_map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
#ifdef __APPLE__
// On ARM64 Macs, we run into limitations due to the commpage from 0xFC0000000 - 0xFFFFFFFFF
// and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF. We can allocate the system
// managed region, as well as system reserved if reduced in size slightly, but we cannot map
// the user region where we want, so we must let the OS put it wherever possible and hope
// the game won't rely on its location.
#if defined(__APPLE__) && defined(ARCH_X86_64)
// On ARM64 Macs under Rosetta 2, we run into limitations due to the commpage from
// 0xFC0000000 - 0xFFFFFFFFF and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF.
// We can allocate the system managed region, as well as system reserved if reduced in size
// slightly, but we cannot map the user region where we want, so we must let the OS put it
// wherever possible and hope the game won't rely on its location.
system_managed_base = reinterpret_cast<u8*>(
mmap(reinterpret_cast<void*>(SYSTEM_MANAGED_MIN), system_managed_size, protection_flags,
base_map_flags | MAP_FIXED, -1, 0));
@ -325,12 +326,22 @@ struct AddressSpace::Impl {
protection_flags, base_map_flags, -1, 0));
#else
const auto virtual_size = system_managed_size + system_reserved_size + user_size;
#if defined(ARCH_X86_64)
const auto virtual_base =
reinterpret_cast<u8*>(mmap(reinterpret_cast<void*>(SYSTEM_MANAGED_MIN), virtual_size,
protection_flags, base_map_flags | MAP_FIXED, -1, 0));
system_managed_base = virtual_base;
system_reserved_base = reinterpret_cast<u8*>(SYSTEM_RESERVED_MIN);
user_base = reinterpret_cast<u8*>(USER_MIN);
#else
// Map memory wherever possible and instruction translation can handle offsetting to the
// base.
const auto virtual_base = reinterpret_cast<u8*>(
mmap(nullptr, virtual_size, protection_flags, base_map_flags, -1, 0));
system_managed_base = virtual_base;
system_reserved_base = virtual_base + SYSTEM_RESERVED_MIN - SYSTEM_MANAGED_MIN;
user_base = virtual_base + USER_MIN - SYSTEM_MANAGED_MIN;
#endif
#endif
if (system_managed_base == MAP_FAILED || system_reserved_base == MAP_FAILED ||
user_base == MAP_FAILED) {
@ -430,9 +441,11 @@ struct AddressSpace::Impl {
if (write) {
flags |= PROT_WRITE;
}
#ifdef ARCH_X86_64
if (execute) {
flags |= PROT_EXEC;
}
#endif
int ret = mprotect(reinterpret_cast<void*>(virtual_addr), size, flags);
ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
}
@ -463,8 +476,14 @@ AddressSpace::~AddressSpace() = default;
void* AddressSpace::Map(VAddr virtual_addr, size_t size, u64 alignment, PAddr phys_addr,
bool is_exec) {
return impl->Map(virtual_addr, phys_addr, size,
is_exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
#if ARCH_X86_64
const auto prot = is_exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE;
#else
// On non-native architectures, we can simplify things by ignoring the execute flag for the
// canonical copy of the memory and rely on the JIT to map translated code as executable.
constexpr auto prot = PAGE_READWRITE;
#endif
return impl->Map(virtual_addr, phys_addr, size, prot);
}
void* AddressSpace::MapFile(VAddr virtual_addr, size_t size, size_t offset, u32 prot,

View file

@ -4,6 +4,7 @@
#pragma once
#include <memory>
#include "common/arch.h"
#include "common/enum.h"
#include "common/types.h"
@ -23,7 +24,7 @@ constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL;
constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL;
constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL;
constexpr VAddr SYSTEM_RESERVED_MIN = 0x07FFFFC000ULL;
#ifdef __APPLE__
#if defined(__APPLE__) && defined(ARCH_X86_64)
// Can only comfortably reserve the first 0x7C0000000 of system reserved space.
constexpr VAddr SYSTEM_RESERVED_MAX = 0xFBFFFFFFFULL;
#else

View file

@ -6,6 +6,7 @@
#include <thread>
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/error.h"
#include "common/logging/log.h"
@ -989,7 +990,9 @@ static void cleanup_thread(void* arg) {
static void* run_thread(void* arg) {
auto* thread = static_cast<ScePthread>(arg);
Common::SetCurrentThreadName(thread->name.c_str());
#ifdef ARCH_X86_64
Core::InitializeThreadPatchStack();
#endif
auto* linker = Common::Singleton<Core::Linker>::Instance();
linker->InitTlsForThread(false);
void* ret = nullptr;

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/config.h"
#include "common/logging/log.h"
@ -27,6 +28,7 @@ static PS4_SYSV_ABI void ProgramExitFunc() {
}
static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) {
#ifdef ARCH_X86_64
// reinterpret_cast<entry_func_t>(addr)(params, exit_func); // can't be used, stack has to have
// a specific layout
asm volatile("andq $-16, %%rsp\n" // Align to 16 bytes
@ -46,6 +48,9 @@ static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) {
:
: "r"(addr), "r"(params), "r"(exit_func)
: "rax", "rsi", "rdi");
#else
UNIMPLEMENTED_MSG("Missing RunMainEntry() implementation for target CPU architecture.");
#endif
}
Linker::Linker() : memory{Memory::Instance()} {}
@ -85,7 +90,9 @@ void Linker::Execute() {
// Init primary thread.
Common::SetCurrentThreadName("GAME_MainThread");
#ifdef ARCH_X86_64
InitializeThreadPatchStack();
#endif
Libraries::Kernel::pthreadInitSelfMainThread();
InitTlsForThread(true);

View file

@ -3,6 +3,7 @@
#include <xbyak/xbyak.h>
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/logging/log.h"
#ifdef ENABLE_QT_GUI
@ -134,9 +135,11 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
LOG_INFO(Core_Linker, "segment_mode ..........: {}", segment_mode);
add_segment(elf_pheader[i]);
#ifdef ARCH_X86_64
if (elf_pheader[i].p_flags & PF_EXEC) {
PatchInstructions(segment_addr, segment_file_size, c);
}
#endif
break;
}
case PT_DYNAMIC:

View file

@ -2,23 +2,28 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <mutex>
#include "common/arch.h"
#include "common/assert.h"
#include "common/types.h"
#include "core/tls.h"
#ifdef _WIN32
#include <windows.h>
#elif defined(__APPLE__)
#elif defined(__APPLE__) && defined(ARCH_X86_64)
#include <architecture/i386/table.h>
#include <boost/icl/interval_set.hpp>
#include <i386/user_ldt.h>
#include <sys/mman.h>
#elif !defined(ARCH_X86_64)
#include <pthread.h>
#endif
namespace Core {
#ifdef _WIN32
// Windows
static DWORD slot = 0;
static std::once_flag slot_alloc_flag;
@ -40,7 +45,9 @@ Tcb* GetTcbBase() {
return reinterpret_cast<Tcb*>(TlsGetValue(GetTcbKey()));
}
#elif defined(__APPLE__)
#elif defined(__APPLE__) && defined(ARCH_X86_64)
// Apple x86_64
// Reserve space in the 32-bit address range for allocating TCB pages.
asm(".zerofill TCB_SPACE,TCB_SPACE,__guest_system,0x3FC000");
@ -132,7 +139,9 @@ Tcb* GetTcbBase() {
return tcb;
}
#else
#elif defined(ARCH_X86_64)
// Other POSIX x86_64
void SetTcbBase(void* image_address) {
asm volatile("wrgsbase %0" ::"r"(image_address) : "memory");
@ -144,6 +153,32 @@ Tcb* GetTcbBase() {
return tcb;
}
#else
// POSIX non-x86_64
// Just sets up a simple thread-local variable to store it, then instruction translation can point
// code to it.
static pthread_key_t slot = 0;
static std::once_flag slot_alloc_flag;
static void AllocTcbKey() {
ASSERT(pthread_key_create(&slot, nullptr) == 0);
}
pthread_key_t GetTcbKey() {
std::call_once(slot_alloc_flag, &AllocTcbKey);
return slot;
}
void SetTcbBase(void* image_address) {
ASSERT(pthread_setspecific(GetTcbKey(), image_address) == 0);
}
Tcb* GetTcbBase() {
return static_cast<Tcb*>(pthread_getspecific(GetTcbKey()));
}
#endif
} // namespace Core