From f97c0deea966e200dc5d675dfaec90f3148a3865 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Tue, 13 May 2025 21:29:47 +0300 Subject: [PATCH 01/16] ngs2: removed possible nullptr value from logging (#2924) --- src/core/libraries/ngs2/ngs2.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/libraries/ngs2/ngs2.cpp b/src/core/libraries/ngs2/ngs2.cpp index 743be5fd6..9bb73536c 100644 --- a/src/core/libraries/ngs2/ngs2.cpp +++ b/src/core/libraries/ngs2/ngs2.cpp @@ -380,8 +380,7 @@ s32 PS4_SYSV_ABI sceNgs2GeomApply(const OrbisNgs2GeomListenerWork* listener, s32 PS4_SYSV_ABI sceNgs2PanInit(OrbisNgs2PanWork* work, const float* aSpeakerAngle, float unitAngle, u32 numSpeakers) { - LOG_ERROR(Lib_Ngs2, "aSpeakerAngle = {}, unitAngle = {}, numSpeakers = {}", *aSpeakerAngle, - unitAngle, numSpeakers); + LOG_ERROR(Lib_Ngs2, "unitAngle = {}, numSpeakers = {}", unitAngle, numSpeakers); return ORBIS_OK; } From 7334fb620bb760e998d10a3dce8d2c726440d906 Mon Sep 17 00:00:00 2001 From: Fire Cube Date: Tue, 13 May 2025 21:16:53 +0200 Subject: [PATCH 02/16] sceKernelAddTimerEvent implementation (#2906) * implementation * add backend (WIP) * now should be good - fix implementation based on homebrew tests - demote log to debug - make squidbus happy (hopefully) * fix moved m_name * fix clang * replace existing event when its same id and filter * run timercallback after addEvent and remove useless code * move KernelSignalRequest to the end * clang (i hate you) --- src/core/libraries/kernel/equeue.cpp | 88 ++++++++++++++++++++++++++++ src/core/libraries/kernel/equeue.h | 2 + 2 files changed, 90 insertions(+) diff --git a/src/core/libraries/kernel/equeue.cpp b/src/core/libraries/kernel/equeue.cpp index a4916208a..7e579614f 100644 --- a/src/core/libraries/kernel/equeue.cpp +++ b/src/core/libraries/kernel/equeue.cpp @@ -152,6 +152,16 @@ int EqueueInternal::WaitForSmallTimer(SceKernelEvent* ev, int num, u32 micros) { return count; } +bool EqueueInternal::EventExists(u64 id, s16 filter) { + std::scoped_lock lock{m_mutex}; + + const auto& it = std::ranges::find_if(m_events, [id, filter](auto& ev) { + return ev.event.ident == id && ev.event.filter == filter; + }); + + return it != m_events.cend(); +} + extern boost::asio::io_context io_context; extern void KernelSignalRequest(); @@ -300,6 +310,82 @@ int PS4_SYSV_ABI sceKernelDeleteHRTimerEvent(SceKernelEqueue eq, int id) { } } +static void TimerCallback(const boost::system::error_code& error, SceKernelEqueue eq, + SceKernelEvent kevent, SceKernelUseconds interval_ms) { + if (error) { + LOG_ERROR(Kernel_Event, "Timer callback error: {}", error.message()); + return; + } + + if (eq->EventExists(kevent.ident, kevent.filter)) { + eq->TriggerEvent(kevent.ident, SceKernelEvent::Filter::Timer, kevent.udata); + + if (!(kevent.flags & SceKernelEvent::Flags::OneShot)) { + auto timer = std::make_shared( + io_context, std::chrono::milliseconds(interval_ms)); + + timer->async_wait( + [eq, kevent, interval_ms, timer](const boost::system::error_code& ec) { + TimerCallback(ec, eq, kevent, interval_ms); + }); + } + } +} + +int PS4_SYSV_ABI sceKernelAddTimerEvent(SceKernelEqueue eq, int id, SceKernelUseconds usec, + void* udata) { + if (eq == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + const u64 interval_ms = static_cast(usec & 0xFFFFFFFF) / 1000; + + EqueueEvent event{}; + event.event.ident = static_cast(id); + event.event.filter = SceKernelEvent::Filter::Timer; + event.event.flags = SceKernelEvent::Flags::Add; + event.event.fflags = 0; + event.event.data = interval_ms; + event.event.udata = udata; + event.time_added = std::chrono::steady_clock::now(); + + if (eq->EventExists(event.event.ident, event.event.filter)) { + eq->RemoveEvent(id, SceKernelEvent::Filter::Timer); + LOG_DEBUG(Kernel_Event, + "Timer event already exists, removing it: queue name={}, queue id={}", + eq->GetName(), event.event.ident); + } + + LOG_DEBUG(Kernel_Event, + "Added timing event: queue name={}, queue id={}, ms-intevall={}, pointer={:x}", + eq->GetName(), event.event.ident, interval_ms, reinterpret_cast(udata)); + + auto timer = std::make_shared( + io_context, std::chrono::milliseconds(interval_ms)); + + if (!eq->AddEvent(event)) { + return ORBIS_KERNEL_ERROR_ENOMEM; + } + + timer->async_wait( + [eq, event_data = event.event, interval_ms, timer](const boost::system::error_code& ec) { + TimerCallback(ec, eq, event_data, interval_ms); + }); + + KernelSignalRequest(); + + return ORBIS_OK; +} + +int PS4_SYSV_ABI sceKernelDeleteTimerEvent(SceKernelEqueue eq, int id) { + if (eq == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + return eq->RemoveEvent(id, SceKernelEvent::Filter::Timer) ? ORBIS_OK + : ORBIS_KERNEL_ERROR_ENOENT; +} + int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id) { if (eq == nullptr) { return ORBIS_KERNEL_ERROR_EBADF; @@ -380,6 +466,8 @@ void RegisterEventQueue(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("WDszmSbWuDk", "libkernel", 1, "libkernel", 1, 1, sceKernelAddUserEventEdge); LIB_FUNCTION("R74tt43xP6k", "libkernel", 1, "libkernel", 1, 1, sceKernelAddHRTimerEvent); LIB_FUNCTION("J+LF6LwObXU", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteHRTimerEvent); + LIB_FUNCTION("57ZK+ODEXWY", "libkernel", 1, "libkernel", 1, 1, sceKernelAddTimerEvent); + LIB_FUNCTION("YWQFUyXIVdU", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteTimerEvent); LIB_FUNCTION("F6e0kwo4cnk", "libkernel", 1, "libkernel", 1, 1, sceKernelTriggerUserEvent); LIB_FUNCTION("LJDwdSNTnDg", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteUserEvent); LIB_FUNCTION("mJ7aghmgvfc", "libkernel", 1, "libkernel", 1, 1, sceKernelGetEventId); diff --git a/src/core/libraries/kernel/equeue.h b/src/core/libraries/kernel/equeue.h index 2bd7ef510..636496604 100644 --- a/src/core/libraries/kernel/equeue.h +++ b/src/core/libraries/kernel/equeue.h @@ -152,6 +152,8 @@ public: int WaitForSmallTimer(SceKernelEvent* ev, int num, u32 micros); + bool EventExists(u64 id, s16 filter); + private: std::string m_name; std::mutex m_mutex; From 484fbcc3209a2b3248f4f23a2354f5d5ad53eb65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Tue, 13 May 2025 22:19:56 +0200 Subject: [PATCH 03/16] Handle -1 as V_CMP_NE_U64 argument (#2919) --- .../frontend/translate/vector_alu.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 3ce86c131..6171cca07 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -989,13 +989,22 @@ void Translator::V_CMP_NE_U64(const GcnInst& inst) { } }; const IR::U1 src0{get_src(inst.src[0])}; - ASSERT(inst.src[1].field == OperandField::ConstZero); // src0 != 0 + auto op = [&inst, this](auto x) { + switch (inst.src[1].field) { + case OperandField::ConstZero: + return x; + case OperandField::SignedConstIntNeg: + return ir.LogicalNot(x); + default: + UNREACHABLE_MSG("unhandled V_CMP_NE_U64 source argument {}", u32(inst.src[1].field)); + } + }; switch (inst.dst[1].field) { case OperandField::VccLo: - ir.SetVcc(src0); + ir.SetVcc(op(src0)); break; case OperandField::ScalarGPR: - ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), src0); + ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), op(src0)); break; default: UNREACHABLE(); From 1832ec2ac2f9fe1a507a34fb2734cd6b6f3490a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Tue, 13 May 2025 22:54:22 +0200 Subject: [PATCH 04/16] Implement sceKernelIsStack (#2917) --- src/core/libraries/kernel/memory.cpp | 8 ++++++++ src/core/libraries/kernel/memory.h | 1 + src/core/memory.cpp | 29 ++++++++++++++++++++++++++++ src/core/memory.h | 2 ++ 4 files changed, 40 insertions(+) diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index dd0e07302..7af67d6d3 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -290,6 +290,13 @@ int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut directMemoryEndOut); } +int PS4_SYSV_ABI sceKernelIsStack(void* addr, void** start, void** end) { + LOG_DEBUG(Kernel_Vmm, "called, addr = {:#x}, start = {:#x}, end = {:#x}", fmt::ptr(addr), + fmt::ptr(start), fmt::ptr(end)); + auto* memory = Core::Memory::Instance(); + return memory->IsStack(std::bit_cast(addr), start, end); +} + s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEntries, int* numEntriesOut) { return sceKernelBatchMap2(entries, numEntries, numEntriesOut, @@ -636,6 +643,7 @@ void RegisterMemory(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("7oxv3PPCumo", "libkernel", 1, "libkernel", 1, 1, sceKernelReserveVirtualRange); LIB_FUNCTION("BC+OG5m9+bw", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemoryType); LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize); + LIB_FUNCTION("yDBwVAolDgg", "libkernel", 1, "libkernel", 1, 1, sceKernelIsStack); LIB_FUNCTION("NcaWUxfMNIQ", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedDirectMemory); LIB_FUNCTION("L-Q3LEjIbgA", "libkernel", 1, "libkernel", 1, 1, sceKernelMapDirectMemory); LIB_FUNCTION("WFcfL2lzido", "libkernel", 1, "libkernel", 1, 1, sceKernelQueryMemoryProtection); diff --git a/src/core/libraries/kernel/memory.h b/src/core/libraries/kernel/memory.h index 3e2bf8de5..92e158a00 100644 --- a/src/core/libraries/kernel/memory.h +++ b/src/core/libraries/kernel/memory.h @@ -158,6 +158,7 @@ void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func[]); int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut, void** directMemoryStartOut, void** directMemoryEndOut); +int PS4_SYSV_ABI sceKernelIsStack(void* addr, void** start, void** end); s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEntries, int* numEntriesOut); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 6438670d3..ec03d6c5e 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -949,4 +949,33 @@ int MemoryManager::GetDirectMemoryType(PAddr addr, int* directMemoryTypeOut, return ORBIS_OK; } +int MemoryManager::IsStack(VAddr addr, void** start, void** end) { + auto vma_handle = FindVMA(addr); + if (vma_handle == vma_map.end()) { + return ORBIS_KERNEL_ERROR_EINVAL; + } + + const VirtualMemoryArea& vma = vma_handle->second; + if (!vma.Contains(addr, 0) || vma.IsFree()) { + return ORBIS_KERNEL_ERROR_EACCES; + } + + auto stack_start = 0ul; + auto stack_end = 0ul; + if (vma.type == VMAType::Stack) { + stack_start = vma.base; + stack_end = vma.base + vma.size; + } + + if (start != nullptr) { + *start = reinterpret_cast(stack_start); + } + + if (end != nullptr) { + *end = reinterpret_cast(stack_end); + } + + return ORBIS_OK; +} + } // namespace Core diff --git a/src/core/memory.h b/src/core/memory.h index 4920aa397..4c143ff6f 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -223,6 +223,8 @@ public: void InvalidateMemory(VAddr addr, u64 size) const; + int IsStack(VAddr addr, void** start, void** end); + private: VMAHandle FindVMA(VAddr target) { return std::prev(vma_map.upper_bound(target)); From e5b675d607502bbe8d78204794a95c22ef2c50af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Tue, 13 May 2025 22:56:20 +0200 Subject: [PATCH 05/16] Handle IT_WAIT_REG_MEM with Register argument (#2927) --- src/core/libraries/gnmdriver/gnmdriver.cpp | 2 +- src/video_core/amdgpu/liverpool.cpp | 6 ++--- src/video_core/amdgpu/pm4_cmds.h | 26 +++++++++++++++------- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index f2f40e0e3..9cf340050 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -179,7 +179,7 @@ s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t add auto* wait_reg_mem = reinterpret_cast(cmdbuf); wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5}; wait_reg_mem->raw = (is_mem << 4u) | (cmp_func & 7u); - wait_reg_mem->poll_addr_lo = u32(addr & addr_mask); + wait_reg_mem->poll_addr_lo_raw = u32(addr & addr_mask); wait_reg_mem->poll_addr_hi = u32(addr >> 32u); wait_reg_mem->ref = ref; wait_reg_mem->mask = mask; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 598288085..0fbfa8b9b 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -696,10 +696,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanAddress(); if (vo_port->IsVoLabel(wait_addr) && num_submits == mapped_queues[GfxQueueId].submits.size()) { - vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); }); + vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(regs.reg_array); }); break; } - while (!wait_reg_mem->Test()) { + while (!wait_reg_mem->Test(regs.reg_array)) { YIELD_GFX(); } break; @@ -934,7 +934,7 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq case PM4ItOpcode::WaitRegMem: { const auto* wait_reg_mem = reinterpret_cast(header); ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); - while (!wait_reg_mem->Test()) { + while (!wait_reg_mem->Test(regs.reg_array)) { YIELD_ASC(vqid); } break; diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index cd175f6c9..066fa4b62 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -474,7 +474,12 @@ struct PM4CmdWaitRegMem { BitField<8, 1, Engine> engine; u32 raw; }; - u32 poll_addr_lo; + union { + BitField<0, 16, u32> reg; + BitField<2, 30, u32> poll_addr_lo; + BitField<0, 2, u32> swap; + u32 poll_addr_lo_raw; + }; u32 poll_addr_hi; u32 ref; u32 mask; @@ -485,28 +490,33 @@ struct PM4CmdWaitRegMem { return std::bit_cast((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo); } - bool Test() const { + u32 Reg() const { + return reg.Value(); + } + + bool Test(const std::array& regs) const { + u32 value = mem_space.Value() == MemSpace::Memory ? *Address() : regs[Reg()]; switch (function.Value()) { case Function::Always: { return true; } case Function::LessThan: { - return (*Address() & mask) < ref; + return (value & mask) < ref; } case Function::LessThanEqual: { - return (*Address() & mask) <= ref; + return (value & mask) <= ref; } case Function::Equal: { - return (*Address() & mask) == ref; + return (value & mask) == ref; } case Function::NotEqual: { - return (*Address() & mask) != ref; + return (value & mask) != ref; } case Function::GreaterThanEqual: { - return (*Address() & mask) >= ref; + return (value & mask) >= ref; } case Function::GreaterThan: { - return (*Address() & mask) > ref; + return (value & mask) > ref; } case Function::Reserved: [[fallthrough]]; From 0d127a82dda5dc655592bab5a662d5820cf97b78 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 13 May 2025 14:05:29 -0700 Subject: [PATCH 06/16] equeue: Clean up timers implementation. (#2925) --- src/core/libraries/kernel/equeue.cpp | 128 +++++++++++++++------------ src/core/libraries/kernel/equeue.h | 9 +- 2 files changed, 78 insertions(+), 59 deletions(-) diff --git a/src/core/libraries/kernel/equeue.cpp b/src/core/libraries/kernel/equeue.cpp index 7e579614f..8b3ae90cb 100644 --- a/src/core/libraries/kernel/equeue.cpp +++ b/src/core/libraries/kernel/equeue.cpp @@ -12,12 +12,25 @@ namespace Libraries::Kernel { +extern boost::asio::io_context io_context; +extern void KernelSignalRequest(); + +static constexpr auto HrTimerSpinlockThresholdUs = 1200u; + // Events are uniquely identified by id and filter. bool EqueueInternal::AddEvent(EqueueEvent& event) { std::scoped_lock lock{m_mutex}; event.time_added = std::chrono::steady_clock::now(); + if (event.event.filter == SceKernelEvent::Filter::Timer || + event.event.filter == SceKernelEvent::Filter::HrTimer) { + // HrTimer events are offset by the threshold of time at the end that we spinlock for + // greater accuracy. + const auto offset = + event.event.filter == SceKernelEvent::Filter::HrTimer ? HrTimerSpinlockThresholdUs : 0u; + event.timer_interval = std::chrono::microseconds(event.event.data - offset); + } const auto& it = std::ranges::find(m_events, event); if (it != m_events.cend()) { @@ -29,6 +42,47 @@ bool EqueueInternal::AddEvent(EqueueEvent& event) { return true; } +bool EqueueInternal::ScheduleEvent(u64 id, s16 filter, + void (*callback)(SceKernelEqueue, const SceKernelEvent&)) { + std::scoped_lock lock{m_mutex}; + + const auto& it = std::ranges::find_if(m_events, [id, filter](auto& ev) { + return ev.event.ident == id && ev.event.filter == filter; + }); + if (it == m_events.cend()) { + return false; + } + + const auto& event = *it; + ASSERT(event.event.filter == SceKernelEvent::Filter::Timer || + event.event.filter == SceKernelEvent::Filter::HrTimer); + + if (!it->timer) { + it->timer = std::make_unique(io_context, event.timer_interval); + } else { + // If the timer already exists we are scheduling a reoccurrence after the next period. + // Set the expiration time to the previous occurrence plus the period. + it->timer->expires_at(it->timer->expires_at() + event.timer_interval); + } + + it->timer->async_wait( + [this, event_data = event.event, callback](const boost::system::error_code& ec) { + if (ec) { + if (ec.value() != boost::system::errc::operation_canceled) { + LOG_ERROR(Kernel_Event, "Timer callback error: {}", ec.message()); + } else { + // Timer was cancelled (removed) before it triggered + LOG_DEBUG(Kernel_Event, "Timer cancelled"); + } + return; + } + callback(this, event_data); + }); + KernelSignalRequest(); + + return true; +} + bool EqueueInternal::RemoveEvent(u64 id, s16 filter) { bool has_found = false; std::scoped_lock lock{m_mutex}; @@ -162,20 +216,6 @@ bool EqueueInternal::EventExists(u64 id, s16 filter) { return it != m_events.cend(); } -extern boost::asio::io_context io_context; -extern void KernelSignalRequest(); - -static constexpr auto HrTimerSpinlockThresholdUs = 1200u; - -static void SmallTimerCallback(const boost::system::error_code& error, SceKernelEqueue eq, - SceKernelEvent kevent) { - static EqueueEvent event; - event.event = kevent; - event.event.data = HrTimerSpinlockThresholdUs; - eq->AddSmallTimer(event); - eq->TriggerEvent(kevent.ident, SceKernelEvent::Filter::HrTimer, kevent.udata); -} - int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name) { if (eq == nullptr) { LOG_ERROR(Kernel_Event, "Event queue is null!"); @@ -253,6 +293,14 @@ int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int return ORBIS_OK; } +static void HrTimerCallback(SceKernelEqueue eq, const SceKernelEvent& kevent) { + static EqueueEvent event; + event.event = kevent; + event.event.data = HrTimerSpinlockThresholdUs; + eq->AddSmallTimer(event); + eq->TriggerEvent(kevent.ident, SceKernelEvent::Filter::HrTimer, kevent.udata); +} + s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* ts, void* udata) { if (eq == nullptr) { return ORBIS_KERNEL_ERROR_EBADF; @@ -283,17 +331,10 @@ s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* return eq->AddSmallTimer(event) ? ORBIS_OK : ORBIS_KERNEL_ERROR_ENOMEM; } - event.timer = std::make_unique( - io_context, std::chrono::microseconds(total_us - HrTimerSpinlockThresholdUs)); - - event.timer->async_wait(std::bind(SmallTimerCallback, std::placeholders::_1, eq, event.event)); - - if (!eq->AddEvent(event)) { + if (!eq->AddEvent(event) || + !eq->ScheduleEvent(id, SceKernelEvent::Filter::HrTimer, HrTimerCallback)) { return ORBIS_KERNEL_ERROR_ENOMEM; } - - KernelSignalRequest(); - return ORBIS_OK; } @@ -310,24 +351,13 @@ int PS4_SYSV_ABI sceKernelDeleteHRTimerEvent(SceKernelEqueue eq, int id) { } } -static void TimerCallback(const boost::system::error_code& error, SceKernelEqueue eq, - SceKernelEvent kevent, SceKernelUseconds interval_ms) { - if (error) { - LOG_ERROR(Kernel_Event, "Timer callback error: {}", error.message()); - return; - } - +static void TimerCallback(SceKernelEqueue eq, const SceKernelEvent& kevent) { if (eq->EventExists(kevent.ident, kevent.filter)) { eq->TriggerEvent(kevent.ident, SceKernelEvent::Filter::Timer, kevent.udata); if (!(kevent.flags & SceKernelEvent::Flags::OneShot)) { - auto timer = std::make_shared( - io_context, std::chrono::milliseconds(interval_ms)); - - timer->async_wait( - [eq, kevent, interval_ms, timer](const boost::system::error_code& ec) { - TimerCallback(ec, eq, kevent, interval_ms); - }); + // Reschedule the event for its next period. + eq->ScheduleEvent(kevent.ident, kevent.filter, TimerCallback); } } } @@ -338,16 +368,13 @@ int PS4_SYSV_ABI sceKernelAddTimerEvent(SceKernelEqueue eq, int id, SceKernelUse return ORBIS_KERNEL_ERROR_EBADF; } - const u64 interval_ms = static_cast(usec & 0xFFFFFFFF) / 1000; - EqueueEvent event{}; event.event.ident = static_cast(id); event.event.filter = SceKernelEvent::Filter::Timer; event.event.flags = SceKernelEvent::Flags::Add; event.event.fflags = 0; - event.event.data = interval_ms; + event.event.data = usec; event.event.udata = udata; - event.time_added = std::chrono::steady_clock::now(); if (eq->EventExists(event.event.ident, event.event.filter)) { eq->RemoveEvent(id, SceKernelEvent::Filter::Timer); @@ -356,24 +383,13 @@ int PS4_SYSV_ABI sceKernelAddTimerEvent(SceKernelEqueue eq, int id, SceKernelUse eq->GetName(), event.event.ident); } - LOG_DEBUG(Kernel_Event, - "Added timing event: queue name={}, queue id={}, ms-intevall={}, pointer={:x}", - eq->GetName(), event.event.ident, interval_ms, reinterpret_cast(udata)); + LOG_DEBUG(Kernel_Event, "Added timing event: queue name={}, queue id={}, usec={}, pointer={:x}", + eq->GetName(), event.event.ident, usec, reinterpret_cast(udata)); - auto timer = std::make_shared( - io_context, std::chrono::milliseconds(interval_ms)); - - if (!eq->AddEvent(event)) { + if (!eq->AddEvent(event) || + !eq->ScheduleEvent(id, SceKernelEvent::Filter::Timer, TimerCallback)) { return ORBIS_KERNEL_ERROR_ENOMEM; } - - timer->async_wait( - [eq, event_data = event.event, interval_ms, timer](const boost::system::error_code& ec) { - TimerCallback(ec, eq, event_data, interval_ms); - }); - - KernelSignalRequest(); - return ORBIS_OK; } diff --git a/src/core/libraries/kernel/equeue.h b/src/core/libraries/kernel/equeue.h index 636496604..a0367c66a 100644 --- a/src/core/libraries/kernel/equeue.h +++ b/src/core/libraries/kernel/equeue.h @@ -21,6 +21,9 @@ namespace Libraries::Kernel { class EqueueInternal; struct EqueueEvent; +using SceKernelUseconds = u32; +using SceKernelEqueue = EqueueInternal*; + struct SceKernelEvent { enum Filter : s16 { None = 0, @@ -77,6 +80,7 @@ struct EqueueEvent { SceKernelEvent event; void* data = nullptr; std::chrono::steady_clock::time_point time_added; + std::chrono::microseconds timer_interval; std::unique_ptr timer; void ResetTriggerState() { @@ -133,6 +137,8 @@ public: } bool AddEvent(EqueueEvent& event); + bool ScheduleEvent(u64 id, s16 filter, + void (*callback)(SceKernelEqueue, const SceKernelEvent&)); bool RemoveEvent(u64 id, s16 filter); int WaitForEvents(SceKernelEvent* ev, int num, u32 micros); bool TriggerEvent(u64 ident, s16 filter, void* trigger_data); @@ -162,9 +168,6 @@ private: std::condition_variable m_cond; }; -using SceKernelUseconds = u32; -using SceKernelEqueue = EqueueInternal*; - u64 PS4_SYSV_ABI sceKernelGetEventData(const SceKernelEvent* ev); void RegisterEventQueue(Core::Loader::SymbolsResolver* sym); From 073f93172985480961ba09fece0048d94d763d6c Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 13 May 2025 14:14:11 -0700 Subject: [PATCH 07/16] fix: PM4CmdWaitRegMem memory address --- src/video_core/amdgpu/pm4_cmds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 066fa4b62..6dc7d97a6 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -487,7 +487,7 @@ struct PM4CmdWaitRegMem { template T Address() const { - return std::bit_cast((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo); + return std::bit_cast((uintptr_t(poll_addr_hi) << 32) | (poll_addr_lo << 2)); } u32 Reg() const { From 3ab69e24db5f976d7d675fea395b550e1ccdfc1d Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 13 May 2025 14:22:44 -0700 Subject: [PATCH 08/16] fix: Compiling with newer Boost --- src/core/libraries/kernel/equeue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/libraries/kernel/equeue.cpp b/src/core/libraries/kernel/equeue.cpp index 8b3ae90cb..ec3b50cd5 100644 --- a/src/core/libraries/kernel/equeue.cpp +++ b/src/core/libraries/kernel/equeue.cpp @@ -62,7 +62,7 @@ bool EqueueInternal::ScheduleEvent(u64 id, s16 filter, } else { // If the timer already exists we are scheduling a reoccurrence after the next period. // Set the expiration time to the previous occurrence plus the period. - it->timer->expires_at(it->timer->expires_at() + event.timer_interval); + it->timer->expires_at(it->timer->expiry() + event.timer_interval); } it->timer->async_wait( From 6abda17532fbf1e44e4ced877f79db33845a347c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Tue, 13 May 2025 23:31:14 +0200 Subject: [PATCH 09/16] Avoid post-increment of SGPR in S_*_LOAD_DWORD (#2928) --- src/shader_recompiler/frontend/translate/scalar_memory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_memory.cpp b/src/shader_recompiler/frontend/translate/scalar_memory.cpp index 89426e080..376cc304e 100644 --- a/src/shader_recompiler/frontend/translate/scalar_memory.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp @@ -53,7 +53,7 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) { ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1)); IR::ScalarReg dst_reg{inst.dst[0].code}; for (u32 i = 0; i < num_dwords; i++) { - ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i))); + ir.SetScalarReg(dst_reg + i, ir.ReadConst(base, ir.Imm32(dword_offset + i))); } } @@ -75,7 +75,7 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) { IR::ScalarReg dst_reg{inst.dst[0].code}; for (u32 i = 0; i < num_dwords; i++) { const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i)); - ir.SetScalarReg(dst_reg++, ir.ReadConstBuffer(vsharp, index)); + ir.SetScalarReg(dst_reg + i, ir.ReadConstBuffer(vsharp, index)); } } From 647b1d3ee476ce15fbf542e8b0f57e6debb12d81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Tue, 13 May 2025 23:34:22 +0200 Subject: [PATCH 10/16] Handle VgtStreamoutFlush event (#2929) --- src/video_core/amdgpu/liverpool.cpp | 21 ++++- src/video_core/amdgpu/liverpool.h | 13 +++- src/video_core/amdgpu/pm4_cmds.h | 114 ++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 2 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 0fbfa8b9b..686e8e84f 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -584,7 +584,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); + const auto* event = reinterpret_cast(header); + LOG_DEBUG(Render_Vulkan, + "Encountered EventWrite: event_type = {}, event_index = {}", + magic_enum::enum_name(event->event_type.Value()), + magic_enum::enum_name(event->event_index.Value())); + if (event->event_type.Value() == EventType::SoVgtStreamoutFlush) { + // TODO: handle proper synchronization, for now signal that update is done + // immediately + regs.cp_strmout_cntl.offset_update_done = 1; + } break; } case PM4ItOpcode::EventWriteEos: { @@ -732,6 +741,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); + LOG_WARNING(Render_Vulkan, + "Unimplemented IT_STRMOUT_BUFFER_UPDATE, update_memory = {}, " + "source_select = {}, buffer_select = {}", + strmout->update_memory.Value(), + magic_enum::enum_name(strmout->source_select.Value()), + strmout->buffer_select.Value()); + break; + } default: UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", static_cast(opcode), count); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index c4bebd05f..a62141099 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -1175,6 +1175,14 @@ struct Liverpool { BitField<22, 2, u32> onchip; }; + union StreamOutControl { + u32 raw; + struct { + u32 offset_update_done : 1; + u32 : 31; + }; + }; + union StreamOutConfig { u32 raw; struct { @@ -1378,7 +1386,9 @@ struct Liverpool { AaConfig aa_config; INSERT_PADDING_WORDS(0xA318 - 0xA2F8 - 1); ColorBuffer color_buffers[NumColorBuffers]; - INSERT_PADDING_WORDS(0xC242 - 0xA390); + INSERT_PADDING_WORDS(0xC03F - 0xA390); + StreamOutControl cp_strmout_cntl; + INSERT_PADDING_WORDS(0xC242 - 0xC040); PrimitiveType primitive_type; INSERT_PADDING_WORDS(0xC24C - 0xC243); u32 num_indices; @@ -1668,6 +1678,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A); static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381); +static_assert(GFX6_3D_REG_INDEX(cp_strmout_cntl) == 0xC03F); static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242); static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D); static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250); diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 6dc7d97a6..58ecda93e 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -246,6 +246,46 @@ struct PM4CmdNop { }; }; +enum class SourceSelect : u32 { + BufferOffset = 0, + VgtStrmoutBufferFilledSize = 1, + SrcAddress = 2, + None = 3, +}; + +struct PM4CmdStrmoutBufferUpdate { + PM4Type3Header header; + union { + BitField<0, 1, u32> update_memory; + BitField<1, 2, SourceSelect> source_select; + BitField<8, 2, u32> buffer_select; + u32 control; + }; + union { + BitField<2, 30, u32> dst_address_lo; + BitField<0, 2, u32> swap_dst; + }; + u32 dst_address_hi; + union { + u32 buffer_offset; + BitField<2, 30, u32> src_address_lo; + BitField<0, 2, u32> swap_src; + }; + u32 src_address_hi; + + template + T DstAddress() const { + ASSERT(update_memory.Value() == 1); + return reinterpret_cast(dst_address_lo.Value() | u64(dst_address_hi & 0xFFFF) << 32); + } + + template + T SrcAddress() const { + ASSERT(source_select.Value() == SourceSelect::SrcAddress); + return reinterpret_cast(src_address_lo.Value() | u64(src_address_hi & 0xFFFF) << 32); + } +}; + struct PM4CmdDrawIndexOffset2 { PM4Type3Header header; u32 max_size; ///< Maximum number of indices @@ -303,6 +343,80 @@ static u64 GetGpuClock64() { return static_cast(ticks); } +// VGT_EVENT_INITIATOR.EVENT_TYPE +enum class EventType : u32 { + SampleStreamoutStats1 = 1, + SampleStreamoutStats2 = 2, + SampleStreamoutStats3 = 3, + CacheFlushTs = 4, + ContextDone = 5, + CacheFlush = 6, + CsPartialFlush = 7, + VgtStreamoutSync = 8, + VgtStreamoutReset = 10, + EndOfPipeIncrDe = 11, + EndOfPipeIbEnd = 12, + RstPixCnt = 13, + VsPartialFlush = 15, + PsPartialFlush = 16, + FlushHsOutput = 17, + FlushLsOutput = 18, + CacheFlushAndInvTsEvent = 20, + ZpassDone = 21, + CacheFlushAndInvEvent = 22, + PerfcounterStart = 23, + PerfcounterStop = 24, + PipelineStatStart = 25, + PipelineStatStop = 26, + PerfcounterSample = 27, + FlushEsOutput = 28, + FlushGsOutput = 29, + SamplePipelineStat = 30, + SoVgtStreamoutFlush = 31, + SampleStreamoutStats = 32, + ResetVtxCnt = 33, + VgtFlush = 36, + ScSendDbVpz = 39, + BottomOfPipeTs = 40, + DbCacheFlushAndInv = 42, + FlushAndInvDbDataTs = 43, + FlushAndInvDbMeta = 44, + FlushAndInvCbDataTs = 45, + FlushAndInvCbMeta = 46, + CsDone = 47, + PsDone = 48, + FlushAndInvCbPixelData = 49, + ThreadTraceStart = 51, + ThreadTraceStop = 52, + ThreadTraceFlush = 54, + ThreadTraceFinish = 55, + PixelPipeStatControl = 56, + PixelPipeStatDump = 57, + PixelPipeStatReset = 58, +}; + +enum class EventIndex : u32 { + Other = 0, + ZpassDone = 1, + SamplePipelineStat = 2, + SampleStreamoutStatSx = 3, + CsVsPsPartialFlush = 4, + EopReserved = 5, + EosReserved = 6, + CacheFlush = 7, +}; + +struct PM4CmdEventWrite { + PM4Type3Header header; + union { + u32 event_control; + BitField<0, 6, EventType> event_type; ///< Event type written to VGT_EVENT_INITIATOR + BitField<8, 4, EventIndex> event_index; ///< Event index + BitField<20, 1, u32> inv_l2; ///< Send WBINVL2 op to the TC L2 cache when EVENT_INDEX = 0111 + }; + u32 address[]; +}; + struct PM4CmdEventWriteEop { PM4Type3Header header; union { From 1639640902f5e4aa5cc088a6becb36c3c8c57864 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 13 May 2025 14:46:59 -0700 Subject: [PATCH 11/16] event_flag: Lower error logs to debug. --- src/core/libraries/kernel/threads/event_flag.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/libraries/kernel/threads/event_flag.cpp b/src/core/libraries/kernel/threads/event_flag.cpp index 24ddcb927..91b17bd49 100644 --- a/src/core/libraries/kernel/threads/event_flag.cpp +++ b/src/core/libraries/kernel/threads/event_flag.cpp @@ -315,7 +315,7 @@ int PS4_SYSV_ABI sceKernelPollEventFlag(OrbisKernelEventFlag ef, u64 bitPattern, auto result = ef->Poll(bitPattern, wait, clear, pResultPat); if (result != ORBIS_OK && result != ORBIS_KERNEL_ERROR_EBUSY) { - LOG_ERROR(Kernel_Event, "returned {}", result); + LOG_DEBUG(Kernel_Event, "returned {:#x}", result); } return result; @@ -361,7 +361,7 @@ int PS4_SYSV_ABI sceKernelWaitEventFlag(OrbisKernelEventFlag ef, u64 bitPattern, u32 result = ef->Wait(bitPattern, wait, clear, pResultPat, pTimeout); if (result != ORBIS_OK && result != ORBIS_KERNEL_ERROR_ETIMEDOUT) { - LOG_ERROR(Kernel_Event, "returned {:#x}", result); + LOG_DEBUG(Kernel_Event, "returned {:#x}", result); } return result; From 99eaba7c963797b4282af4a4b78ec8d8ea358d80 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 13 May 2025 16:08:25 -0700 Subject: [PATCH 12/16] liverpool: Lower SetQueueReg to warning log. (#2930) --- src/video_core/amdgpu/liverpool.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 686e8e84f..d1cd98634 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -885,8 +885,9 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq } case PM4ItOpcode::SetQueueReg: { const auto* set_data = reinterpret_cast(header); - UNREACHABLE_MSG("Encountered compute SetQueueReg: vqid = {}, reg_offset = {:#x}", - set_data->vqid.Value(), set_data->reg_offset.Value()); + LOG_WARNING(Render, "Encountered compute SetQueueReg: vqid = {}, reg_offset = {:#x}", + set_data->vqid.Value(), set_data->reg_offset.Value()); + break; } case PM4ItOpcode::DispatchDirect: { const auto* dispatch_direct = reinterpret_cast(header); From 26c965cf4aee39903067015c2759f38f7f4444b0 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 13 May 2025 17:31:23 -0700 Subject: [PATCH 13/16] equeue: Fix timer cancel error code check. --- src/core/libraries/kernel/equeue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/libraries/kernel/equeue.cpp b/src/core/libraries/kernel/equeue.cpp index ec3b50cd5..958019cd3 100644 --- a/src/core/libraries/kernel/equeue.cpp +++ b/src/core/libraries/kernel/equeue.cpp @@ -68,7 +68,7 @@ bool EqueueInternal::ScheduleEvent(u64 id, s16 filter, it->timer->async_wait( [this, event_data = event.event, callback](const boost::system::error_code& ec) { if (ec) { - if (ec.value() != boost::system::errc::operation_canceled) { + if (ec != boost::system::errc::operation_canceled) { LOG_ERROR(Kernel_Event, "Timer callback error: {}", ec.message()); } else { // Timer was cancelled (removed) before it triggered From 6d38100a4118f161fdbf50cc0a1012d863d475d0 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Wed, 14 May 2025 09:09:23 -0500 Subject: [PATCH 14/16] Avoid logging nulls in sceKernelIsStack (#2933) Games would crash if providing nullptr to start or end. Also don't need the :# part when logging pointers, as they're automatically formatted. --- src/core/libraries/kernel/memory.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 7af67d6d3..bddfcfc84 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -291,8 +291,7 @@ int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut } int PS4_SYSV_ABI sceKernelIsStack(void* addr, void** start, void** end) { - LOG_DEBUG(Kernel_Vmm, "called, addr = {:#x}, start = {:#x}, end = {:#x}", fmt::ptr(addr), - fmt::ptr(start), fmt::ptr(end)); + LOG_DEBUG(Kernel_Vmm, "called, addr = {}", fmt::ptr(addr)); auto* memory = Core::Memory::Instance(); return memory->IsStack(std::bit_cast(addr), start, end); } From 98faff425ea325c693ce6e6d00f65d32e15d6665 Mon Sep 17 00:00:00 2001 From: MajorP93 Date: Wed, 14 May 2025 19:37:16 +0200 Subject: [PATCH 15/16] build: Target x86-64-v3 CPU architecture (#2934) --- CMakeLists.txt | 6 +++--- documents/Quickstart/Quickstart.md | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6780db417..8daa98dea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,9 +54,9 @@ else() endif() if (ARCHITECTURE STREQUAL "x86_64") - # Target the same CPU architecture as the PS4, to maintain the same level of compatibility. - # Exclude SSE4a as it is only available on AMD CPUs. - add_compile_options(-march=btver2 -mtune=generic -mno-sse4a) + # Target x86-64-v3 CPU architecture as this is a good balance between supporting performance critical + # instructions like AVX2 and maintaining support for older CPUs. + add_compile_options(-march=x86-64-v3) endif() if (APPLE AND ARCHITECTURE STREQUAL "x86_64" AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64") diff --git a/documents/Quickstart/Quickstart.md b/documents/Quickstart/Quickstart.md index 62df95e71..e2145ebbd 100644 --- a/documents/Quickstart/Quickstart.md +++ b/documents/Quickstart/Quickstart.md @@ -21,9 +21,9 @@ SPDX-License-Identifier: GPL-2.0-or-later - A processor with at least 4 cores and 6 threads - Above 2.5 GHz frequency -- A CPU supporting the following instruction sets: MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, F16C, CLMUL, AES, BMI1, MOVBE, XSAVE, ABM +- A CPU supporting the x86-64-v3 baseline. - **Intel**: Haswell generation or newer - - **AMD**: Jaguar generation or newer + - **AMD**: Excavator generation or newer - **Apple**: Rosetta 2 on macOS 15.4 or newer ### GPU @@ -55,4 +55,4 @@ To configure the emulator, you can go through the interface and go to "settings" You can also configure the emulator by editing the `config.toml` file located in the `user` folder created after the application is started (Mostly useful if you are using the SDL version). Some settings may be related to more technical development and debugging.\ -For more information on this, see [**Debugging**](https://github.com/shadps4-emu/shadPS4/blob/main/documents/Debugging/Debugging.md#configuration). \ No newline at end of file +For more information on this, see [**Debugging**](https://github.com/shadps4-emu/shadPS4/blob/main/documents/Debugging/Debugging.md#configuration). From aeb453698821f5d346de9602d9d7e8114f94a89a Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Thu, 15 May 2025 13:59:34 -0700 Subject: [PATCH 16/16] externals: Remove winpthreads. (#2932) --- .gitmodules | 4 - CMakeLists.txt | 9 +- externals/CMakeLists.txt | 6 - externals/winpthreads | 1 - src/common/thread.cpp | 39 +- src/common/thread.h | 3 + src/core/libraries/kernel/kernel.cpp | 3 + src/core/libraries/kernel/time.cpp | 560 +++++++++++++++------------ src/core/libraries/kernel/time.h | 10 +- 9 files changed, 362 insertions(+), 273 deletions(-) delete mode 160000 externals/winpthreads diff --git a/.gitmodules b/.gitmodules index 065a4570f..25b5d307b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -30,10 +30,6 @@ path = externals/xbyak url = https://github.com/herumi/xbyak.git shallow = true -[submodule "externals/winpthreads"] - path = externals/winpthreads - url = https://github.com/shadps4-emu/winpthreads.git - shallow = true [submodule "externals/magic_enum"] path = externals/magic_enum url = https://github.com/Neargye/magic_enum.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 8daa98dea..6c5dde7bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -239,13 +239,6 @@ if (APPLE) endif() list(POP_BACK CMAKE_MODULE_PATH) -# Note: Windows always has these functions through winpthreads -include(CheckSymbolExists) -check_symbol_exists(pthread_mutex_timedlock "pthread.h" HAVE_PTHREAD_MUTEX_TIMEDLOCK) -if(HAVE_PTHREAD_MUTEX_TIMEDLOCK OR WIN32) - add_compile_options(-DHAVE_PTHREAD_MUTEX_TIMEDLOCK) -endif() - if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") # libc++ requires -fexperimental-library to enable std::jthread and std::stop_token support. include(CheckCXXSymbolExists) @@ -1156,7 +1149,7 @@ if (ENABLE_QT_GUI) endif() if (WIN32) - target_link_libraries(shadps4 PRIVATE mincore winpthreads) + target_link_libraries(shadps4 PRIVATE mincore) if (MSVC) # MSVC likes putting opinions on what people can use, disable: diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index b92e13795..89b0fbfdd 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -137,12 +137,6 @@ if (NOT TARGET Zydis::Zydis) add_subdirectory(zydis) endif() -# Winpthreads -if (WIN32) - add_subdirectory(winpthreads) - target_include_directories(winpthreads INTERFACE winpthreads/include) -endif() - # sirit add_subdirectory(sirit) if (WIN32) diff --git a/externals/winpthreads b/externals/winpthreads deleted file mode 160000 index f35b0948d..000000000 --- a/externals/winpthreads +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f35b0948d36a736e6a2d052ae295a3ffde09703f diff --git a/src/common/thread.cpp b/src/common/thread.cpp index 9ef1e86d8..982041ebb 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include @@ -104,14 +105,24 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) { SetThreadPriority(handle, windows_priority); } -static void AccurateSleep(std::chrono::nanoseconds duration) { +bool AccurateSleep(const std::chrono::nanoseconds duration, std::chrono::nanoseconds* remaining, + const bool interruptible) { + const auto begin_sleep = std::chrono::high_resolution_clock::now(); + LARGE_INTEGER interval{ .QuadPart = -1 * (duration.count() / 100u), }; HANDLE timer = ::CreateWaitableTimer(NULL, TRUE, NULL); SetWaitableTimer(timer, &interval, 0, NULL, NULL, 0); - WaitForSingleObject(timer, INFINITE); + const auto ret = WaitForSingleObjectEx(timer, INFINITE, interruptible); ::CloseHandle(timer); + + if (remaining) { + const auto end_sleep = std::chrono::high_resolution_clock::now(); + const auto sleep_time = end_sleep - begin_sleep; + *remaining = duration > sleep_time ? duration - sleep_time : std::chrono::nanoseconds(0); + } + return ret == WAIT_OBJECT_0; } #else @@ -134,8 +145,24 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) { pthread_setschedparam(this_thread, scheduling_type, ¶ms); } -static void AccurateSleep(std::chrono::nanoseconds duration) { - std::this_thread::sleep_for(duration); +bool AccurateSleep(const std::chrono::nanoseconds duration, std::chrono::nanoseconds* remaining, + const bool interruptible) { + timespec request = { + .tv_sec = duration.count() / 1'000'000'000, + .tv_nsec = duration.count() % 1'000'000'000, + }; + timespec remain; + int ret; + while ((ret = nanosleep(&request, &remain)) < 0 && errno == EINTR) { + if (interruptible) { + break; + } + request = remain; + } + if (remaining) { + *remaining = std::chrono::nanoseconds(remain.tv_sec * 1'000'000'000 + remain.tv_nsec); + } + return ret == 0 || errno != EINTR; } #endif @@ -196,9 +223,9 @@ AccurateTimer::AccurateTimer(std::chrono::nanoseconds target_interval) : target_interval(target_interval) {} void AccurateTimer::Start() { - auto begin_sleep = std::chrono::high_resolution_clock::now(); + const auto begin_sleep = std::chrono::high_resolution_clock::now(); if (total_wait.count() > 0) { - AccurateSleep(total_wait); + AccurateSleep(total_wait, nullptr, false); } start_time = std::chrono::high_resolution_clock::now(); total_wait -= std::chrono::duration_cast(start_time - begin_sleep); diff --git a/src/common/thread.h b/src/common/thread.h index 92cc0c59d..5bd83d35c 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -25,6 +25,9 @@ void SetCurrentThreadName(const char* name); void SetThreadName(void* thread, const char* name); +bool AccurateSleep(std::chrono::nanoseconds duration, std::chrono::nanoseconds* remaining, + bool interruptible); + class AccurateTimer { std::chrono::nanoseconds target_interval{}; std::chrono::nanoseconds total_wait{}; diff --git a/src/core/libraries/kernel/kernel.cpp b/src/core/libraries/kernel/kernel.cpp index c7eafe799..180850217 100644 --- a/src/core/libraries/kernel/kernel.cpp +++ b/src/core/libraries/kernel/kernel.cpp @@ -108,6 +108,9 @@ void SetPosixErrno(int e) { case EACCES: g_posix_errno = POSIX_EACCES; break; + case EFAULT: + g_posix_errno = POSIX_EFAULT; + break; case EINVAL: g_posix_errno = POSIX_EINVAL; break; diff --git a/src/core/libraries/kernel/time.cpp b/src/core/libraries/kernel/time.cpp index b7e4c1756..2fe74d0a3 100644 --- a/src/core/libraries/kernel/time.cpp +++ b/src/core/libraries/kernel/time.cpp @@ -5,24 +5,23 @@ #include "common/assert.h" #include "common/native_clock.h" +#include "common/thread.h" #include "core/libraries/kernel/kernel.h" #include "core/libraries/kernel/orbis_error.h" +#include "core/libraries/kernel/posix_error.h" #include "core/libraries/kernel/time.h" #include "core/libraries/libs.h" #ifdef _WIN64 -#include #include - #include "common/ntapi.h" - #else #if __APPLE__ #include #endif +#include #include #include -#include #include #endif @@ -52,88 +51,116 @@ u64 PS4_SYSV_ABI sceKernelReadTsc() { return clock->GetUptime(); } -int PS4_SYSV_ABI sceKernelUsleep(u32 microseconds) { -#ifdef _WIN64 - const auto start_time = std::chrono::high_resolution_clock::now(); - auto total_wait_time = std::chrono::microseconds(microseconds); +static s32 posix_nanosleep_impl(const OrbisKernelTimespec* rqtp, OrbisKernelTimespec* rmtp, + const bool interruptible) { + if (!rqtp || rqtp->tv_sec < 0 || rqtp->tv_nsec < 0 || rqtp->tv_nsec >= 1'000'000'000) { + SetPosixErrno(EINVAL); + return -1; + } + const auto duration = std::chrono::nanoseconds(rqtp->tv_sec * 1'000'000'000 + rqtp->tv_nsec); + std::chrono::nanoseconds remain; + const auto uninterrupted = Common::AccurateSleep(duration, &remain, interruptible); + if (rmtp) { + rmtp->tv_sec = remain.count() / 1'000'000'000; + rmtp->tv_nsec = remain.count() % 1'000'000'000; + } + if (!uninterrupted) { + SetPosixErrno(EINTR); + return -1; + } + return 0; +} - while (total_wait_time.count() > 0) { - auto wait_time = std::chrono::ceil(total_wait_time).count(); - u64 res = SleepEx(static_cast(wait_time), true); - if (res == WAIT_IO_COMPLETION) { - auto elapsedTime = std::chrono::high_resolution_clock::now() - start_time; - auto elapsedMicroseconds = - std::chrono::duration_cast(elapsedTime).count(); - total_wait_time = std::chrono::microseconds(microseconds - elapsedMicroseconds); - } else { - break; - } +s32 PS4_SYSV_ABI posix_nanosleep(const OrbisKernelTimespec* rqtp, OrbisKernelTimespec* rmtp) { + return posix_nanosleep_impl(rqtp, rmtp, true); +} + +s32 PS4_SYSV_ABI sceKernelNanosleep(const OrbisKernelTimespec* rqtp, OrbisKernelTimespec* rmtp) { + if (const auto ret = posix_nanosleep_impl(rqtp, rmtp, false); ret < 0) { + return ErrnoToSceKernelError(*__Error()); + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI posix_usleep(u32 microseconds) { + const OrbisKernelTimespec ts = { + .tv_sec = microseconds / 1'000'000, + .tv_nsec = (microseconds % 1'000'000) * 1'000, + }; + return posix_nanosleep(&ts, nullptr); +} + +s32 PS4_SYSV_ABI sceKernelUsleep(u32 microseconds) { + const OrbisKernelTimespec ts = { + .tv_sec = microseconds / 1'000'000, + .tv_nsec = (microseconds % 1'000'000) * 1'000, + }; + return sceKernelNanosleep(&ts, nullptr); +} + +u32 PS4_SYSV_ABI posix_sleep(u32 seconds) { + const OrbisKernelTimespec ts = { + .tv_sec = seconds, + .tv_nsec = 0, + }; + OrbisKernelTimespec rm; + if (const auto ret = posix_nanosleep(&ts, &rm); ret < 0) { + return *__Error() == POSIX_EINTR ? rm.tv_sec + (rm.tv_nsec == 0 ? 0 : 1) : seconds; + } + return 0; +} + +s32 PS4_SYSV_ABI sceKernelSleep(u32 seconds) { + return sceKernelUsleep(seconds * 1'000'000); +} + +s32 PS4_SYSV_ABI posix_clock_gettime(u32 clock_id, OrbisKernelTimespec* ts) { + if (ts == nullptr) { + SetPosixErrno(EFAULT); + return -1; } - return 0; -#else - timespec start; - timespec remain; - start.tv_sec = microseconds / 1000000; - start.tv_nsec = (microseconds % 1000000) * 1000; - timespec* requested = &start; - int ret = 0; - do { - ret = nanosleep(requested, &remain); - requested = &remain; - } while (ret != 0); - return ret; -#endif -} + if (clock_id == ORBIS_CLOCK_PROCTIME) { + const auto us = sceKernelGetProcessTime(); + ts->tv_sec = static_cast(us / 1'000'000); + ts->tv_nsec = static_cast((us % 1'000'000) * 1000); + return 0; + } + if (clock_id == ORBIS_CLOCK_EXT_NETWORK || clock_id == ORBIS_CLOCK_EXT_DEBUG_NETWORK || + clock_id == ORBIS_CLOCK_EXT_AD_NETWORK || clock_id == ORBIS_CLOCK_EXT_RAW_NETWORK) { + LOG_ERROR(Lib_Kernel, "Unsupported clock type {}, using CLOCK_MONOTONIC", clock_id); + clock_id = ORBIS_CLOCK_MONOTONIC; + } -int PS4_SYSV_ABI posix_usleep(u32 microseconds) { - return sceKernelUsleep(microseconds); -} - -u32 PS4_SYSV_ABI sceKernelSleep(u32 seconds) { - std::this_thread::sleep_for(std::chrono::seconds(seconds)); - return 0; -} - -#ifdef _WIN64 -#ifndef CLOCK_REALTIME -#define CLOCK_REALTIME 0 -#endif -#ifndef CLOCK_MONOTONIC -#define CLOCK_MONOTONIC 1 -#endif -#ifndef CLOCK_PROCESS_CPUTIME_ID -#define CLOCK_PROCESS_CPUTIME_ID 2 -#endif -#ifndef CLOCK_THREAD_CPUTIME_ID -#define CLOCK_THREAD_CPUTIME_ID 3 -#endif -#ifndef CLOCK_REALTIME_COARSE -#define CLOCK_REALTIME_COARSE 5 -#endif -#ifndef CLOCK_MONOTONIC_COARSE -#define CLOCK_MONOTONIC_COARSE 6 -#endif - -#define DELTA_EPOCH_IN_100NS 116444736000000000ULL - -static u64 FileTimeTo100Ns(FILETIME& ft) { - return *reinterpret_cast(&ft); -} - -static s32 clock_gettime(u32 clock_id, struct timespec* ts) { +#ifdef _WIN32 + static const auto FileTimeTo100Ns = [](FILETIME& ft) { return *reinterpret_cast(&ft); }; switch (clock_id) { - case CLOCK_REALTIME: - case CLOCK_REALTIME_COARSE: { + case ORBIS_CLOCK_REALTIME: + case ORBIS_CLOCK_REALTIME_PRECISE: { FILETIME ft; - GetSystemTimeAsFileTime(&ft); - const u64 ns = FileTimeTo100Ns(ft) - DELTA_EPOCH_IN_100NS; + GetSystemTimePreciseAsFileTime(&ft); + static constexpr u64 DeltaEpochIn100ns = 116444736000000000ULL; + const u64 ns = FileTimeTo100Ns(ft) - DeltaEpochIn100ns; ts->tv_sec = ns / 10'000'000; ts->tv_nsec = (ns % 10'000'000) * 100; return 0; } - case CLOCK_MONOTONIC: - case CLOCK_MONOTONIC_COARSE: { + case ORBIS_CLOCK_SECOND: + case ORBIS_CLOCK_REALTIME_FAST: { + FILETIME ft; + GetSystemTimeAsFileTime(&ft); + static constexpr u64 DeltaEpochIn100ns = 116444736000000000ULL; + const u64 ns = FileTimeTo100Ns(ft) - DeltaEpochIn100ns; + ts->tv_sec = ns / 10'000'000; + ts->tv_nsec = (ns % 10'000'000) * 100; + return 0; + } + case ORBIS_CLOCK_UPTIME: + case ORBIS_CLOCK_UPTIME_PRECISE: + case ORBIS_CLOCK_MONOTONIC: + case ORBIS_CLOCK_MONOTONIC_PRECISE: + case ORBIS_CLOCK_UPTIME_FAST: + case ORBIS_CLOCK_MONOTONIC_FAST: { static LARGE_INTEGER pf = [] { LARGE_INTEGER res{}; QueryPerformanceFrequency(&pf); @@ -141,43 +168,53 @@ static s32 clock_gettime(u32 clock_id, struct timespec* ts) { }(); LARGE_INTEGER pc{}; - QueryPerformanceCounter(&pc); + if (!QueryPerformanceCounter(&pc)) { + SetPosixErrno(EFAULT); + return -1; + } ts->tv_sec = pc.QuadPart / pf.QuadPart; ts->tv_nsec = ((pc.QuadPart % pf.QuadPart) * 1000'000'000) / pf.QuadPart; return 0; } - case CLOCK_PROCESS_CPUTIME_ID: { + case ORBIS_CLOCK_THREAD_CPUTIME_ID: { FILETIME ct, et, kt, ut; - if (!GetProcessTimes(GetCurrentProcess(), &ct, &et, &kt, &ut)) { - return EFAULT; + if (!GetThreadTimes(GetCurrentThread(), &ct, &et, &kt, &ut)) { + SetPosixErrno(EFAULT); + return -1; } const u64 ns = FileTimeTo100Ns(ut) + FileTimeTo100Ns(kt); ts->tv_sec = ns / 10'000'000; ts->tv_nsec = (ns % 10'000'000) * 100; return 0; } - case CLOCK_THREAD_CPUTIME_ID: { + case ORBIS_CLOCK_VIRTUAL: { FILETIME ct, et, kt, ut; - if (!GetThreadTimes(GetCurrentThread(), &ct, &et, &kt, &ut)) { - return EFAULT; + if (!GetProcessTimes(GetCurrentProcess(), &ct, &et, &kt, &ut)) { + SetPosixErrno(EFAULT); + return -1; } - const u64 ns = FileTimeTo100Ns(ut) + FileTimeTo100Ns(kt); + const u64 ns = FileTimeTo100Ns(ut); + ts->tv_sec = ns / 10'000'000; + ts->tv_nsec = (ns % 10'000'000) * 100; + return 0; + } + case ORBIS_CLOCK_PROF: { + FILETIME ct, et, kt, ut; + if (!GetProcessTimes(GetCurrentProcess(), &ct, &et, &kt, &ut)) { + SetPosixErrno(EFAULT); + return -1; + } + const u64 ns = FileTimeTo100Ns(kt); ts->tv_sec = ns / 10'000'000; ts->tv_nsec = (ns % 10'000'000) * 100; return 0; } default: - return EINVAL; + SetPosixErrno(EFAULT); + return -1; } -} -#endif - -int PS4_SYSV_ABI orbis_clock_gettime(s32 clock_id, struct OrbisKernelTimespec* ts) { - if (ts == nullptr) { - return ORBIS_KERNEL_ERROR_EFAULT; - } - - clockid_t pclock_id = CLOCK_MONOTONIC; +#else + clockid_t pclock_id; switch (clock_id) { case ORBIS_CLOCK_REALTIME: case ORBIS_CLOCK_REALTIME_PRECISE: @@ -185,7 +222,7 @@ int PS4_SYSV_ABI orbis_clock_gettime(s32 clock_id, struct OrbisKernelTimespec* t break; case ORBIS_CLOCK_SECOND: case ORBIS_CLOCK_REALTIME_FAST: -#ifndef __APPLE__ +#ifdef CLOCK_REALTIME_COARSE pclock_id = CLOCK_REALTIME_COARSE; #else pclock_id = CLOCK_REALTIME; @@ -199,7 +236,7 @@ int PS4_SYSV_ABI orbis_clock_gettime(s32 clock_id, struct OrbisKernelTimespec* t break; case ORBIS_CLOCK_UPTIME_FAST: case ORBIS_CLOCK_MONOTONIC_FAST: -#ifndef __APPLE__ +#ifdef CLOCK_MONOTONIC_COARSE pclock_id = CLOCK_MONOTONIC_COARSE; #else pclock_id = CLOCK_MONOTONIC; @@ -208,196 +245,226 @@ int PS4_SYSV_ABI orbis_clock_gettime(s32 clock_id, struct OrbisKernelTimespec* t case ORBIS_CLOCK_THREAD_CPUTIME_ID: pclock_id = CLOCK_THREAD_CPUTIME_ID; break; - case ORBIS_CLOCK_PROCTIME: { - const auto us = sceKernelGetProcessTime(); - ts->tv_sec = us / 1'000'000; - ts->tv_nsec = (us % 1'000'000) * 1000; - return 0; - } case ORBIS_CLOCK_VIRTUAL: { -#ifdef _WIN64 - FILETIME ct, et, kt, ut; - if (!GetProcessTimes(GetCurrentProcess(), &ct, &et, &kt, &ut)) { - return EFAULT; - } - const u64 ns = FileTimeTo100Ns(ut); - ts->tv_sec = ns / 10'000'000; - ts->tv_nsec = (ns % 10'000'000) * 100; -#else - struct rusage ru; + rusage ru; const auto res = getrusage(RUSAGE_SELF, &ru); if (res < 0) { - return res; + SetPosixErrno(EFAULT); + return -1; } ts->tv_sec = ru.ru_utime.tv_sec; ts->tv_nsec = ru.ru_utime.tv_usec * 1000; -#endif return 0; } case ORBIS_CLOCK_PROF: { -#ifdef _WIN64 - FILETIME ct, et, kt, ut; - if (!GetProcessTimes(GetCurrentProcess(), &ct, &et, &kt, &ut)) { - return EFAULT; - } - const u64 ns = FileTimeTo100Ns(kt); - ts->tv_sec = ns / 10'000'000; - ts->tv_nsec = (ns % 10'000'000) * 100; -#else - struct rusage ru; + rusage ru; const auto res = getrusage(RUSAGE_SELF, &ru); if (res < 0) { - return res; + SetPosixErrno(EFAULT); + return -1; } ts->tv_sec = ru.ru_stime.tv_sec; ts->tv_nsec = ru.ru_stime.tv_usec * 1000; -#endif return 0; } - case ORBIS_CLOCK_EXT_NETWORK: - case ORBIS_CLOCK_EXT_DEBUG_NETWORK: - case ORBIS_CLOCK_EXT_AD_NETWORK: - case ORBIS_CLOCK_EXT_RAW_NETWORK: - pclock_id = CLOCK_MONOTONIC; - LOG_ERROR(Lib_Kernel, "unsupported = {} using CLOCK_MONOTONIC", clock_id); - break; default: - return EINVAL; + SetPosixErrno(EFAULT); + return -1; } timespec t{}; - int result = clock_gettime(pclock_id, &t); + const auto result = clock_gettime(pclock_id, &t); ts->tv_sec = t.tv_sec; ts->tv_nsec = t.tv_nsec; - return result; -} - -int PS4_SYSV_ABI sceKernelClockGettime(s32 clock_id, OrbisKernelTimespec* tp) { - const auto res = orbis_clock_gettime(clock_id, tp); - if (res < 0) { - return ErrnoToSceKernelError(res); + if (result < 0) { + SetPosixErrno(errno); + return -1; } - return ORBIS_OK; -} - -int PS4_SYSV_ABI posix_nanosleep(const OrbisKernelTimespec* rqtp, OrbisKernelTimespec* rmtp) { - const auto* request = reinterpret_cast(rqtp); - auto* remain = reinterpret_cast(rmtp); - return nanosleep(request, remain); -} - -int PS4_SYSV_ABI sceKernelNanosleep(const OrbisKernelTimespec* rqtp, OrbisKernelTimespec* rmtp) { - if (!rqtp || !rmtp) { - return ORBIS_KERNEL_ERROR_EFAULT; - } - - if (rqtp->tv_sec < 0 || rqtp->tv_nsec < 0) { - return ORBIS_KERNEL_ERROR_EINVAL; - } - - return posix_nanosleep(rqtp, rmtp); -} - -int PS4_SYSV_ABI sceKernelGettimeofday(OrbisKernelTimeval* tp) { - if (!tp) { - return ORBIS_KERNEL_ERROR_EFAULT; - } - -#ifdef _WIN64 - FILETIME filetime; - GetSystemTimePreciseAsFileTime(&filetime); - - constexpr u64 UNIX_TIME_START = 0x295E9648864000; - constexpr u64 TICKS_PER_SECOND = 1000000; - - u64 ticks = filetime.dwHighDateTime; - ticks <<= 32; - ticks |= filetime.dwLowDateTime; - ticks /= 10; - ticks -= UNIX_TIME_START; - - tp->tv_sec = ticks / TICKS_PER_SECOND; - tp->tv_usec = ticks % TICKS_PER_SECOND; -#else - timeval tv; - gettimeofday(&tv, nullptr); - tp->tv_sec = tv.tv_sec; - tp->tv_usec = tv.tv_usec; + return 0; #endif +} + +s32 PS4_SYSV_ABI sceKernelClockGettime(const u32 clock_id, OrbisKernelTimespec* ts) { + if (const auto ret = posix_clock_gettime(clock_id, ts); ret < 0) { + return ErrnoToSceKernelError(*__Error()); + } return ORBIS_OK; } -int PS4_SYSV_ABI gettimeofday(OrbisKernelTimeval* tp, OrbisKernelTimezone* tz) { - // FreeBSD docs mention that the kernel generally does not track these values - // and they are usually returned as zero. - if (tz) { - tz->tz_minuteswest = 0; - tz->tz_dsttime = 0; - } - return sceKernelGettimeofday(tp); -} - -s32 PS4_SYSV_ABI sceKernelGettimezone(OrbisKernelTimezone* tz) { -#ifdef _WIN64 - ASSERT(tz); - static int tzflag = 0; - if (!tzflag) { - _tzset(); - tzflag++; - } - tz->tz_minuteswest = _timezone / 60; - tz->tz_dsttime = _daylight; -#else - struct timezone tzz; - struct timeval tv; - gettimeofday(&tv, &tzz); - tz->tz_dsttime = tzz.tz_dsttime; - tz->tz_minuteswest = tzz.tz_minuteswest; -#endif - return ORBIS_OK; -} - -int PS4_SYSV_ABI posix_clock_getres(u32 clock_id, OrbisKernelTimespec* res) { +s32 PS4_SYSV_ABI posix_clock_getres(u32 clock_id, OrbisKernelTimespec* res) { if (res == nullptr) { - return ORBIS_KERNEL_ERROR_EFAULT; + SetPosixErrno(EFAULT); + return -1; } - clockid_t pclock_id = CLOCK_REALTIME; + + if (clock_id == ORBIS_CLOCK_EXT_NETWORK || clock_id == ORBIS_CLOCK_EXT_DEBUG_NETWORK || + clock_id == ORBIS_CLOCK_EXT_AD_NETWORK || clock_id == ORBIS_CLOCK_EXT_RAW_NETWORK) { + LOG_ERROR(Lib_Kernel, "Unsupported clock type {}, using CLOCK_MONOTONIC", clock_id); + clock_id = ORBIS_CLOCK_MONOTONIC; + } + +#ifdef _WIN32 + switch (clock_id) { + case ORBIS_CLOCK_SECOND: + case ORBIS_CLOCK_REALTIME_FAST: { + DWORD timeAdjustment; + DWORD timeIncrement; + BOOL isTimeAdjustmentDisabled; + if (!GetSystemTimeAdjustment(&timeAdjustment, &timeIncrement, &isTimeAdjustmentDisabled)) { + SetPosixErrno(EFAULT); + return -1; + } + res->tv_sec = 0; + res->tv_nsec = timeIncrement * 100; + return 0; + } + case ORBIS_CLOCK_REALTIME: + case ORBIS_CLOCK_REALTIME_PRECISE: + case ORBIS_CLOCK_UPTIME: + case ORBIS_CLOCK_UPTIME_PRECISE: + case ORBIS_CLOCK_MONOTONIC: + case ORBIS_CLOCK_MONOTONIC_PRECISE: + case ORBIS_CLOCK_UPTIME_FAST: + case ORBIS_CLOCK_MONOTONIC_FAST: { + LARGE_INTEGER pf; + if (!QueryPerformanceFrequency(&pf)) { + SetPosixErrno(EFAULT); + return -1; + } + res->tv_sec = 0; + res->tv_nsec = + std::max(static_cast((1000000000 + (pf.QuadPart >> 1)) / pf.QuadPart), 1); + return 0; + } + default: + UNREACHABLE(); + } +#else + clockid_t pclock_id; switch (clock_id) { case ORBIS_CLOCK_REALTIME: case ORBIS_CLOCK_REALTIME_PRECISE: - case ORBIS_CLOCK_REALTIME_FAST: pclock_id = CLOCK_REALTIME; break; case ORBIS_CLOCK_SECOND: + case ORBIS_CLOCK_REALTIME_FAST: +#ifdef CLOCK_REALTIME_COARSE + pclock_id = CLOCK_REALTIME_COARSE; +#else + pclock_id = CLOCK_REALTIME; +#endif + break; + case ORBIS_CLOCK_UPTIME: + case ORBIS_CLOCK_UPTIME_PRECISE: case ORBIS_CLOCK_MONOTONIC: case ORBIS_CLOCK_MONOTONIC_PRECISE: - case ORBIS_CLOCK_MONOTONIC_FAST: pclock_id = CLOCK_MONOTONIC; break; + case ORBIS_CLOCK_UPTIME_FAST: + case ORBIS_CLOCK_MONOTONIC_FAST: +#ifdef CLOCK_MONOTONIC_COARSE + pclock_id = CLOCK_MONOTONIC_COARSE; +#else + pclock_id = CLOCK_MONOTONIC; +#endif + break; default: UNREACHABLE(); } timespec t{}; - int result = clock_getres(pclock_id, &t); + const auto result = clock_getres(pclock_id, &t); res->tv_sec = t.tv_sec; res->tv_nsec = t.tv_nsec; - if (result == 0) { - return ORBIS_OK; + if (result < 0) { + SetPosixErrno(errno); + return -1; } - return ORBIS_KERNEL_ERROR_EINVAL; + return 0; +#endif } -int PS4_SYSV_ABI sceKernelConvertLocaltimeToUtc(time_t param_1, int64_t param_2, time_t* seconds, - OrbisKernelTimezone* timezone, int* dst_seconds) { +s32 PS4_SYSV_ABI sceKernelClockGetres(const u32 clock_id, OrbisKernelTimespec* res) { + if (const auto ret = posix_clock_getres(clock_id, res); ret < 0) { + return ErrnoToSceKernelError(*__Error()); + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI posix_gettimeofday(OrbisKernelTimeval* tp, OrbisKernelTimezone* tz) { +#ifdef _WIN64 + if (tp) { + FILETIME filetime; + GetSystemTimePreciseAsFileTime(&filetime); + + constexpr u64 UNIX_TIME_START = 0x295E9648864000; + constexpr u64 TICKS_PER_SECOND = 1000000; + + u64 ticks = filetime.dwHighDateTime; + ticks <<= 32; + ticks |= filetime.dwLowDateTime; + ticks /= 10; + ticks -= UNIX_TIME_START; + + tp->tv_sec = ticks / TICKS_PER_SECOND; + tp->tv_usec = ticks % TICKS_PER_SECOND; + } + if (tz) { + static int tzflag = 0; + if (!tzflag) { + _tzset(); + tzflag++; + } + tz->tz_minuteswest = _timezone / 60; + tz->tz_dsttime = _daylight; + } + return 0; +#else + struct timezone tzz; + timeval tv; + const auto ret = gettimeofday(&tv, &tzz); + if (tp) { + tp->tv_sec = tv.tv_sec; + tp->tv_usec = tv.tv_usec; + } + if (tz) { + tz->tz_dsttime = tzz.tz_dsttime; + tz->tz_minuteswest = tzz.tz_minuteswest; + } + if (ret < 0) { + SetPosixErrno(errno); + return -1; + } + return 0; +#endif +} + +s32 PS4_SYSV_ABI sceKernelGettimeofday(OrbisKernelTimeval* tp) { + if (const auto ret = posix_gettimeofday(tp, nullptr); ret < 0) { + return ErrnoToSceKernelError(*__Error()); + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceKernelGettimezone(OrbisKernelTimezone* tz) { + if (const auto ret = posix_gettimeofday(nullptr, tz); ret < 0) { + return ErrnoToSceKernelError(*__Error()); + } + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceKernelConvertLocaltimeToUtc(time_t param_1, int64_t param_2, time_t* seconds, + OrbisKernelTimezone* timezone, s32* dst_seconds) { LOG_INFO(Kernel, "called"); if (timezone) { sceKernelGettimezone(timezone); param_1 -= (timezone->tz_minuteswest + timezone->tz_dsttime) * 60; - if (seconds) + if (seconds) { *seconds = param_1; - if (dst_seconds) + } + if (dst_seconds) { *dst_seconds = timezone->tz_dsttime * 60; + } } else { return ORBIS_KERNEL_ERROR_EINVAL; } @@ -415,7 +482,7 @@ Common::NativeClock* GetClock() { } // namespace Dev -int PS4_SYSV_ABI sceKernelConvertUtcToLocaltime(time_t time, time_t* local_time, +s32 PS4_SYSV_ABI sceKernelConvertUtcToLocaltime(time_t time, time_t* local_time, struct OrbisTimesec* st, u64* dst_sec) { LOG_TRACE(Kernel, "Called"); #ifdef __APPLE__ @@ -444,28 +511,35 @@ int PS4_SYSV_ABI sceKernelConvertUtcToLocaltime(time_t time, time_t* local_time, void RegisterTime(Core::Loader::SymbolsResolver* sym) { clock = std::make_unique(); initial_ptc = clock->GetUptime(); + + // POSIX + LIB_FUNCTION("yS8U2TGCe1A", "libkernel", 1, "libkernel", 1, 1, posix_nanosleep); + LIB_FUNCTION("yS8U2TGCe1A", "libScePosix", 1, "libkernel", 1, 1, posix_nanosleep); + LIB_FUNCTION("QcteRwbsnV0", "libkernel", 1, "libkernel", 1, 1, posix_usleep); + LIB_FUNCTION("QcteRwbsnV0", "libScePosix", 1, "libkernel", 1, 1, posix_usleep); + LIB_FUNCTION("0wu33hunNdE", "libkernel", 1, "libkernel", 1, 1, posix_sleep); + LIB_FUNCTION("0wu33hunNdE", "libScePosix", 1, "libkernel", 1, 1, posix_sleep); + LIB_FUNCTION("lLMT9vJAck0", "libkernel", 1, "libkernel", 1, 1, posix_clock_gettime); + LIB_FUNCTION("lLMT9vJAck0", "libScePosix", 1, "libkernel", 1, 1, posix_clock_gettime); + LIB_FUNCTION("smIj7eqzZE8", "libkernel", 1, "libkernel", 1, 1, posix_clock_getres); + LIB_FUNCTION("smIj7eqzZE8", "libScePosix", 1, "libkernel", 1, 1, posix_clock_getres); + LIB_FUNCTION("n88vx3C5nW8", "libkernel", 1, "libkernel", 1, 1, posix_gettimeofday); + LIB_FUNCTION("n88vx3C5nW8", "libScePosix", 1, "libkernel", 1, 1, posix_gettimeofday); + + // Orbis LIB_FUNCTION("4J2sUJmuHZQ", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTime); LIB_FUNCTION("fgxnMeTNUtY", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTimeCounter); LIB_FUNCTION("BNowx2l588E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetProcessTimeCounterFrequency); LIB_FUNCTION("-2IRUCO--PM", "libkernel", 1, "libkernel", 1, 1, sceKernelReadTsc); LIB_FUNCTION("1j3S3n-tTW4", "libkernel", 1, "libkernel", 1, 1, sceKernelGetTscFrequency); - LIB_FUNCTION("ejekcaNQNq0", "libkernel", 1, "libkernel", 1, 1, sceKernelGettimeofday); - LIB_FUNCTION("n88vx3C5nW8", "libkernel", 1, "libkernel", 1, 1, gettimeofday); - LIB_FUNCTION("n88vx3C5nW8", "libScePosix", 1, "libkernel", 1, 1, gettimeofday); LIB_FUNCTION("QvsZxomvUHs", "libkernel", 1, "libkernel", 1, 1, sceKernelNanosleep); LIB_FUNCTION("1jfXLRVzisc", "libkernel", 1, "libkernel", 1, 1, sceKernelUsleep); - LIB_FUNCTION("QcteRwbsnV0", "libkernel", 1, "libkernel", 1, 1, posix_usleep); - LIB_FUNCTION("QcteRwbsnV0", "libScePosix", 1, "libkernel", 1, 1, posix_usleep); LIB_FUNCTION("-ZR+hG7aDHw", "libkernel", 1, "libkernel", 1, 1, sceKernelSleep); - LIB_FUNCTION("0wu33hunNdE", "libScePosix", 1, "libkernel", 1, 1, sceKernelSleep); - LIB_FUNCTION("yS8U2TGCe1A", "libkernel", 1, "libkernel", 1, 1, posix_nanosleep); - LIB_FUNCTION("yS8U2TGCe1A", "libScePosix", 1, "libkernel", 1, 1, posix_nanosleep); LIB_FUNCTION("QBi7HCK03hw", "libkernel", 1, "libkernel", 1, 1, sceKernelClockGettime); + LIB_FUNCTION("wRYVA5Zolso", "libkernel", 1, "libkernel", 1, 1, sceKernelClockGetres); + LIB_FUNCTION("ejekcaNQNq0", "libkernel", 1, "libkernel", 1, 1, sceKernelGettimeofday); LIB_FUNCTION("kOcnerypnQA", "libkernel", 1, "libkernel", 1, 1, sceKernelGettimezone); - LIB_FUNCTION("lLMT9vJAck0", "libkernel", 1, "libkernel", 1, 1, orbis_clock_gettime); - LIB_FUNCTION("lLMT9vJAck0", "libScePosix", 1, "libkernel", 1, 1, orbis_clock_gettime); - LIB_FUNCTION("smIj7eqzZE8", "libScePosix", 1, "libkernel", 1, 1, posix_clock_getres); LIB_FUNCTION("0NTHN1NKONI", "libkernel", 1, "libkernel", 1, 1, sceKernelConvertLocaltimeToUtc); LIB_FUNCTION("-o5uEDpN+oY", "libkernel", 1, "libkernel", 1, 1, sceKernelConvertUtcToLocaltime); } diff --git a/src/core/libraries/kernel/time.h b/src/core/libraries/kernel/time.h index 407b6f9ed..c80de7bc4 100644 --- a/src/core/libraries/kernel/time.h +++ b/src/core/libraries/kernel/time.h @@ -75,14 +75,14 @@ u64 PS4_SYSV_ABI sceKernelGetProcessTime(); u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounter(); u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounterFrequency(); u64 PS4_SYSV_ABI sceKernelReadTsc(); -int PS4_SYSV_ABI sceKernelClockGettime(s32 clock_id, OrbisKernelTimespec* tp); +s32 PS4_SYSV_ABI sceKernelClockGettime(u32 clock_id, OrbisKernelTimespec* tp); s32 PS4_SYSV_ABI sceKernelGettimezone(OrbisKernelTimezone* tz); -int PS4_SYSV_ABI sceKernelConvertLocaltimeToUtc(time_t param_1, int64_t param_2, time_t* seconds, - OrbisKernelTimezone* timezone, int* dst_seconds); +s32 PS4_SYSV_ABI sceKernelConvertLocaltimeToUtc(time_t param_1, int64_t param_2, time_t* seconds, + OrbisKernelTimezone* timezone, s32* dst_seconds); -int PS4_SYSV_ABI sceKernelConvertUtcToLocaltime(time_t time, time_t* local_time, OrbisTimesec* st, +s32 PS4_SYSV_ABI sceKernelConvertUtcToLocaltime(time_t time, time_t* local_time, OrbisTimesec* st, u64* dst_sec); -int PS4_SYSV_ABI sceKernelUsleep(u32 microseconds); +s32 PS4_SYSV_ABI sceKernelUsleep(u32 microseconds); void RegisterTime(Core::Loader::SymbolsResolver* sym);