kernel: Rewrite pthread emulation (#1440)

* libkernel: Cleanup some function places

* kernel: Refactor thread functions

* kernel: It builds

* kernel: Fix a bunch of bugs, kernel thread heap

* kernel: File cleanup pt1

* File cleanup pt2

* File cleanup pt3

* File cleanup pt4

* kernel: Add missing funcs

* kernel: Add basic exceptions for linux

* gnmdriver: Add workload functions

* kernel: Fix new pthreads code on macOS. (#1441)

* kernel: Downgrade edeadlk to log

* gnmdriver: Add sceGnmSubmitCommandBuffersForWorkload

* exception: Add context register population for macOS. (#1444)

* kernel: Pthread rewrite touchups for Windows

* kernel: Multiplatform thread implementation

* mutex: Remove spamming log

* pthread_spec: Make assert into a log

* pthread_spec: Zero initialize array

* Attempt to fix non-Windows builds

* hotfix: change incorrect NID for scePthreadAttrSetaffinity

* scePthreadAttrSetaffinity implementation

* Attempt to fix Linux

* windows: Address a bunch of address space problems

* address_space: Fix unmap of region surrounded by placeholders

* libs: Reduce logging

* pthread: Implement condvar with waitable atomics and sleepqueue

* sleepq: Separate and make faster

* time: Remove delay execution

* Causes high cpu usage in Tohou Luna Nights

* kernel: Cleanup files again

* pthread: Add missing include

* semaphore: Use binary_semaphore instead of condvar

* Seems more reliable

* libraries/sysmodule: log module on `sceSysmoduleIsLoaded`

* libraries/kernel: implement `scePthreadSetPrio`

---------

Co-authored-by: squidbus <175574877+squidbus@users.noreply.github.com>
Co-authored-by: Daniel R. <47796739+polybiusproxy@users.noreply.github.com>
This commit is contained in:
TheTurtle 2024-11-21 22:59:38 +02:00 committed by GitHub
parent 6904764aab
commit c4506da0ae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
104 changed files with 5554 additions and 3979 deletions

View file

@ -5,6 +5,8 @@
#include "common/arch.h"
#include "common/assert.h"
#include "common/types.h"
#include "core/cpu_patches.h"
#include "core/libraries/kernel/threads/pthread.h"
#include "core/tls.h"
#ifdef _WIN32
@ -52,8 +54,13 @@ Tcb* GetTcbBase() {
// Reserve space in the 32-bit address range for allocating TCB pages.
asm(".zerofill TCB_SPACE,TCB_SPACE,__guest_system,0x3FC000");
static constexpr u64 ldt_region_base = 0x4000;
static constexpr u64 ldt_region_size = 0x3FC000;
struct LdtPage {
void* tcb;
u16 index;
};
static constexpr uintptr_t ldt_region_base = 0x4000;
static constexpr size_t ldt_region_size = 0x3FC000;
static constexpr u16 ldt_block_size = 0x1000;
static constexpr u16 ldt_index_base = 8;
static constexpr u16 ldt_index_total = (ldt_region_size - ldt_region_base) / ldt_block_size;
@ -61,11 +68,13 @@ static constexpr u16 ldt_index_total = (ldt_region_size - ldt_region_base) / ldt
static boost::icl::interval_set<u16> free_ldts{};
static std::mutex free_ldts_lock;
static std::once_flag ldt_region_init_flag;
static pthread_key_t ldt_page_slot = 0;
static u16 GetLdtIndex() {
sel_t selector;
asm volatile("mov %%fs, %0" : "=r"(selector));
return selector.index;
static void FreeLdtPage(void* raw) {
const auto* ldt_page = static_cast<LdtPage*>(raw);
std::unique_lock lock{free_ldts_lock};
free_ldts += ldt_page->index;
}
static void InitLdtRegion() {
@ -76,11 +85,20 @@ static void InitLdtRegion() {
free_ldts +=
boost::icl::interval<u16>::right_open(ldt_index_base, ldt_index_base + ldt_index_total);
ASSERT_MSG(pthread_key_create(&ldt_page_slot, FreeLdtPage) == 0,
"Failed to create thread LDT page key: {}", errno);
}
static void** SetupThreadLdt() {
void SetTcbBase(void* image_address) {
std::call_once(ldt_region_init_flag, InitLdtRegion);
auto* ldt_page = static_cast<LdtPage*>(pthread_getspecific(ldt_page_slot));
if (ldt_page != nullptr) {
// Update TCB pointer in existing page.
ldt_page->tcb = image_address;
return;
}
// Allocate a new LDT index for the current thread.
u16 ldt_index;
{
@ -89,10 +107,12 @@ static void** SetupThreadLdt() {
ldt_index = first(*free_ldts.begin());
free_ldts -= ldt_index;
}
const u64 addr = ldt_region_base + (ldt_index - ldt_index_base) * ldt_block_size;
const uintptr_t addr = ldt_region_base + (ldt_index - ldt_index_base) * ldt_block_size;
// Create an LDT entry for the TCB.
const ldt_entry ldt{.data{
ldt_entry ldt{};
ldt.data = {
.base00 = static_cast<u16>(addr),
.base16 = static_cast<u8>(addr >> 16),
.base24 = static_cast<u8>(addr >> 24),
@ -103,34 +123,27 @@ static void** SetupThreadLdt() {
.present = 1, // Segment present
.stksz = DESC_DATA_32B,
.granular = DESC_GRAN_BYTE,
}};
};
int ret = i386_set_ldt(ldt_index, &ldt, 1);
ASSERT_MSG(ret == ldt_index,
"Failed to set LDT for TLS area: expected {}, but syscall returned {}", ldt_index,
ret);
"Failed to set LDT {} at {:#x} for TLS area: syscall returned {}, errno {}",
ldt_index, addr, ret, errno);
// Set the FS segment to the created LDT.
const sel_t sel{
const sel_t new_selector{
.rpl = USER_PRIV,
.ti = SEL_LDT,
.index = ldt_index,
};
asm volatile("mov %0, %%fs" ::"r"(sel));
asm volatile("mov %0, %%fs" ::"r"(new_selector));
return reinterpret_cast<void**>(addr);
}
// Store the TCB base pointer and index in the created LDT area.
ldt_page = reinterpret_cast<LdtPage*>(addr);
ldt_page->tcb = image_address;
ldt_page->index = ldt_index;
static void FreeThreadLdt() {
std::unique_lock lock{free_ldts_lock};
free_ldts += GetLdtIndex();
}
void SetTcbBase(void* image_address) {
if (image_address != nullptr) {
*SetupThreadLdt() = image_address;
} else {
FreeThreadLdt();
}
ASSERT_MSG(pthread_setspecific(ldt_page_slot, ldt_page) == 0,
"Failed to store thread LDT page pointer: {}", errno);
}
Tcb* GetTcbBase() {
@ -181,4 +194,15 @@ Tcb* GetTcbBase() {
#endif
thread_local std::once_flag init_tls_flag;
void EnsureThreadInitialized() {
std::call_once(init_tls_flag, [] {
#ifdef ARCH_X86_64
InitializeThreadPatchStack();
#endif
SetTcbBase(Libraries::Kernel::g_curthread->tcb);
});
}
} // namespace Core