Switch remaining CRLF terminated files to LF

This commit is contained in:
Daniel R. 2024-12-24 13:56:31 +01:00
parent 2c0f986c52
commit c284cf72e1
No known key found for this signature in database
GPG key ID: B8ADC8F57BA18DBA
28 changed files with 4856 additions and 4856 deletions

View file

@ -1,25 +1,25 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <cstddef> #include <cstddef>
#include <type_traits> #include <type_traits>
namespace Common { namespace Common {
/// Ceiled integer division. /// Ceiled integer division.
template <typename N, typename D> template <typename N, typename D>
requires std::is_integral_v<N> && std::is_unsigned_v<D> requires std::is_integral_v<N> && std::is_unsigned_v<D>
[[nodiscard]] constexpr N DivCeil(N number, D divisor) { [[nodiscard]] constexpr N DivCeil(N number, D divisor) {
return static_cast<N>((static_cast<D>(number) + divisor - 1) / divisor); return static_cast<N>((static_cast<D>(number) + divisor - 1) / divisor);
} }
/// Ceiled integer division with logarithmic divisor in base 2 /// Ceiled integer division with logarithmic divisor in base 2
template <typename N, typename D> template <typename N, typename D>
requires std::is_integral_v<N> && std::is_unsigned_v<D> requires std::is_integral_v<N> && std::is_unsigned_v<D>
[[nodiscard]] constexpr N DivCeilLog2(N value, D alignment_log2) { [[nodiscard]] constexpr N DivCeilLog2(N value, D alignment_log2) {
return static_cast<N>((static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2); return static_cast<N>((static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2);
} }
} // namespace Common } // namespace Common

View file

@ -1,30 +1,30 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#ifdef _WIN32 #ifdef _WIN32
#include "ntapi.h" #include "ntapi.h"
NtClose_t NtClose = nullptr; NtClose_t NtClose = nullptr;
NtSetInformationFile_t NtSetInformationFile = nullptr; NtSetInformationFile_t NtSetInformationFile = nullptr;
NtCreateThread_t NtCreateThread = nullptr; NtCreateThread_t NtCreateThread = nullptr;
NtTerminateThread_t NtTerminateThread = nullptr; NtTerminateThread_t NtTerminateThread = nullptr;
NtQueueApcThreadEx_t NtQueueApcThreadEx = nullptr; NtQueueApcThreadEx_t NtQueueApcThreadEx = nullptr;
namespace Common::NtApi { namespace Common::NtApi {
void Initialize() { void Initialize() {
HMODULE nt_handle = GetModuleHandleA("ntdll.dll"); HMODULE nt_handle = GetModuleHandleA("ntdll.dll");
// http://stackoverflow.com/a/31411628/4725495 // http://stackoverflow.com/a/31411628/4725495
NtClose = (NtClose_t)GetProcAddress(nt_handle, "NtClose"); NtClose = (NtClose_t)GetProcAddress(nt_handle, "NtClose");
NtSetInformationFile = NtSetInformationFile =
(NtSetInformationFile_t)GetProcAddress(nt_handle, "NtSetInformationFile"); (NtSetInformationFile_t)GetProcAddress(nt_handle, "NtSetInformationFile");
NtCreateThread = (NtCreateThread_t)GetProcAddress(nt_handle, "NtCreateThread"); NtCreateThread = (NtCreateThread_t)GetProcAddress(nt_handle, "NtCreateThread");
NtTerminateThread = (NtTerminateThread_t)GetProcAddress(nt_handle, "NtTerminateThread"); NtTerminateThread = (NtTerminateThread_t)GetProcAddress(nt_handle, "NtTerminateThread");
NtQueueApcThreadEx = (NtQueueApcThreadEx_t)GetProcAddress(nt_handle, "NtQueueApcThreadEx"); NtQueueApcThreadEx = (NtQueueApcThreadEx_t)GetProcAddress(nt_handle, "NtQueueApcThreadEx");
} }
} // namespace Common::NtApi } // namespace Common::NtApi
#endif #endif

File diff suppressed because it is too large Load diff

View file

@ -1,53 +1,53 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/spin_lock.h" #include "common/spin_lock.h"
#if _MSC_VER #if _MSC_VER
#include <intrin.h> #include <intrin.h>
#if _M_AMD64 #if _M_AMD64
#define __x86_64__ 1 #define __x86_64__ 1
#endif #endif
#if _M_ARM64 #if _M_ARM64
#define __aarch64__ 1 #define __aarch64__ 1
#endif #endif
#else #else
#if __x86_64__ #if __x86_64__
#include <xmmintrin.h> #include <xmmintrin.h>
#endif #endif
#endif #endif
namespace { namespace {
void ThreadPause() { void ThreadPause() {
#if __x86_64__ #if __x86_64__
_mm_pause(); _mm_pause();
#elif __aarch64__ && _MSC_VER #elif __aarch64__ && _MSC_VER
__yield(); __yield();
#elif __aarch64__ #elif __aarch64__
asm("yield"); asm("yield");
#endif #endif
} }
} // Anonymous namespace } // Anonymous namespace
namespace Common { namespace Common {
void SpinLock::lock() { void SpinLock::lock() {
while (lck.test_and_set(std::memory_order_acquire)) { while (lck.test_and_set(std::memory_order_acquire)) {
ThreadPause(); ThreadPause();
} }
} }
void SpinLock::unlock() { void SpinLock::unlock() {
lck.clear(std::memory_order_release); lck.clear(std::memory_order_release);
} }
bool SpinLock::try_lock() { bool SpinLock::try_lock() {
if (lck.test_and_set(std::memory_order_acquire)) { if (lck.test_and_set(std::memory_order_acquire)) {
return false; return false;
} }
return true; return true;
} }
} // namespace Common } // namespace Common

View file

@ -1,33 +1,33 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <atomic> #include <atomic>
namespace Common { namespace Common {
/** /**
* SpinLock class * SpinLock class
* a lock similar to mutex that forces a thread to spin wait instead calling the * a lock similar to mutex that forces a thread to spin wait instead calling the
* supervisor. Should be used on short sequences of code. * supervisor. Should be used on short sequences of code.
*/ */
class SpinLock { class SpinLock {
public: public:
SpinLock() = default; SpinLock() = default;
SpinLock(const SpinLock&) = delete; SpinLock(const SpinLock&) = delete;
SpinLock& operator=(const SpinLock&) = delete; SpinLock& operator=(const SpinLock&) = delete;
SpinLock(SpinLock&&) = delete; SpinLock(SpinLock&&) = delete;
SpinLock& operator=(SpinLock&&) = delete; SpinLock& operator=(SpinLock&&) = delete;
void lock(); void lock();
void unlock(); void unlock();
[[nodiscard]] bool try_lock(); [[nodiscard]] bool try_lock();
private: private:
std::atomic_flag lck = ATOMIC_FLAG_INIT; std::atomic_flag lck = ATOMIC_FLAG_INIT;
}; };
} // namespace Common } // namespace Common

View file

@ -1,61 +1,61 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <memory> #include <memory>
#include <utility> #include <utility>
namespace Common { namespace Common {
/// General purpose function wrapper similar to std::function. /// General purpose function wrapper similar to std::function.
/// Unlike std::function, the captured values don't have to be copyable. /// Unlike std::function, the captured values don't have to be copyable.
/// This class can be moved but not copied. /// This class can be moved but not copied.
template <typename ResultType, typename... Args> template <typename ResultType, typename... Args>
class UniqueFunction { class UniqueFunction {
class CallableBase { class CallableBase {
public: public:
virtual ~CallableBase() = default; virtual ~CallableBase() = default;
virtual ResultType operator()(Args&&...) = 0; virtual ResultType operator()(Args&&...) = 0;
}; };
template <typename Functor> template <typename Functor>
class Callable final : public CallableBase { class Callable final : public CallableBase {
public: public:
Callable(Functor&& functor_) : functor{std::move(functor_)} {} Callable(Functor&& functor_) : functor{std::move(functor_)} {}
~Callable() override = default; ~Callable() override = default;
ResultType operator()(Args&&... args) override { ResultType operator()(Args&&... args) override {
return functor(std::forward<Args>(args)...); return functor(std::forward<Args>(args)...);
} }
private: private:
Functor functor; Functor functor;
}; };
public: public:
UniqueFunction() = default; UniqueFunction() = default;
template <typename Functor> template <typename Functor>
UniqueFunction(Functor&& functor) UniqueFunction(Functor&& functor)
: callable{std::make_unique<Callable<Functor>>(std::move(functor))} {} : callable{std::make_unique<Callable<Functor>>(std::move(functor))} {}
UniqueFunction& operator=(UniqueFunction&& rhs) noexcept = default; UniqueFunction& operator=(UniqueFunction&& rhs) noexcept = default;
UniqueFunction(UniqueFunction&& rhs) noexcept = default; UniqueFunction(UniqueFunction&& rhs) noexcept = default;
UniqueFunction& operator=(const UniqueFunction&) = delete; UniqueFunction& operator=(const UniqueFunction&) = delete;
UniqueFunction(const UniqueFunction&) = delete; UniqueFunction(const UniqueFunction&) = delete;
ResultType operator()(Args&&... args) const { ResultType operator()(Args&&... args) const {
return (*callable)(std::forward<Args>(args)...); return (*callable)(std::forward<Args>(args)...);
} }
explicit operator bool() const noexcept { explicit operator bool() const noexcept {
return static_cast<bool>(callable); return static_cast<bool>(callable);
} }
private: private:
std::unique_ptr<CallableBase> callable; std::unique_ptr<CallableBase> callable;
}; };
} // namespace Common } // namespace Common

View file

@ -1,484 +1,484 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "fiber.h" #include "fiber.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/libraries/fiber/fiber_error.h" #include "core/libraries/fiber/fiber_error.h"
#include "core/libraries/libs.h" #include "core/libraries/libs.h"
#include "core/tls.h" #include "core/tls.h"
namespace Libraries::Fiber { namespace Libraries::Fiber {
static constexpr u32 kFiberSignature0 = 0xdef1649c; static constexpr u32 kFiberSignature0 = 0xdef1649c;
static constexpr u32 kFiberSignature1 = 0xb37592a0; static constexpr u32 kFiberSignature1 = 0xb37592a0;
static constexpr u32 kFiberOptSignature = 0xbb40e64d; static constexpr u32 kFiberOptSignature = 0xbb40e64d;
static constexpr u64 kFiberStackSignature = 0x7149f2ca7149f2ca; static constexpr u64 kFiberStackSignature = 0x7149f2ca7149f2ca;
static constexpr u64 kFiberStackSizeCheck = 0xdeadbeefdeadbeef; static constexpr u64 kFiberStackSizeCheck = 0xdeadbeefdeadbeef;
static std::atomic<u32> context_size_check = false; static std::atomic<u32> context_size_check = false;
OrbisFiberContext* GetFiberContext() { OrbisFiberContext* GetFiberContext() {
return Core::GetTcbBase()->tcb_fiber; return Core::GetTcbBase()->tcb_fiber;
} }
extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp"); extern "C" s32 PS4_SYSV_ABI _sceFiberSetJmp(OrbisFiberContext* ctx) asm("_sceFiberSetJmp");
extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) asm("_sceFiberLongJmp"); extern "C" s32 PS4_SYSV_ABI _sceFiberLongJmp(OrbisFiberContext* ctx) asm("_sceFiberLongJmp");
extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data, extern "C" void PS4_SYSV_ABI _sceFiberSwitchEntry(OrbisFiberData* data,
bool set_fpu) asm("_sceFiberSwitchEntry"); bool set_fpu) asm("_sceFiberSwitchEntry");
extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) asm("_sceFiberForceQuit"); extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) asm("_sceFiberForceQuit");
extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) { extern "C" void PS4_SYSV_ABI _sceFiberForceQuit(u64 ret) {
OrbisFiberContext* g_ctx = GetFiberContext(); OrbisFiberContext* g_ctx = GetFiberContext();
g_ctx->return_val = ret; g_ctx->return_val = ret;
_sceFiberLongJmp(g_ctx); _sceFiberLongJmp(g_ctx);
} }
void PS4_SYSV_ABI _sceFiberCheckStackOverflow(OrbisFiberContext* ctx) { void PS4_SYSV_ABI _sceFiberCheckStackOverflow(OrbisFiberContext* ctx) {
u64* stack_base = reinterpret_cast<u64*>(ctx->current_fiber->addr_context); u64* stack_base = reinterpret_cast<u64*>(ctx->current_fiber->addr_context);
if (stack_base && *stack_base != kFiberStackSignature) { if (stack_base && *stack_base != kFiberStackSignature) {
UNREACHABLE_MSG("Stack overflow detected in fiber."); UNREACHABLE_MSG("Stack overflow detected in fiber.");
} }
} }
void PS4_SYSV_ABI _sceFiberSwitchToFiber(OrbisFiber* fiber, u64 arg_on_run_to, void PS4_SYSV_ABI _sceFiberSwitchToFiber(OrbisFiber* fiber, u64 arg_on_run_to,
OrbisFiberContext* ctx) { OrbisFiberContext* ctx) {
OrbisFiberContext* fiber_ctx = fiber->context; OrbisFiberContext* fiber_ctx = fiber->context;
if (fiber_ctx) { if (fiber_ctx) {
ctx->arg_on_run_to = arg_on_run_to; ctx->arg_on_run_to = arg_on_run_to;
_sceFiberLongJmp(fiber_ctx); _sceFiberLongJmp(fiber_ctx);
__builtin_trap(); __builtin_trap();
} }
OrbisFiberData data{}; OrbisFiberData data{};
if (ctx->prev_fiber) { if (ctx->prev_fiber) {
OrbisFiber* prev_fiber = ctx->prev_fiber; OrbisFiber* prev_fiber = ctx->prev_fiber;
ctx->prev_fiber = nullptr; ctx->prev_fiber = nullptr;
data.state = reinterpret_cast<u32*>(&prev_fiber->state); data.state = reinterpret_cast<u32*>(&prev_fiber->state);
} else { } else {
data.state = nullptr; data.state = nullptr;
} }
data.entry = fiber->entry; data.entry = fiber->entry;
data.arg_on_initialize = fiber->arg_on_initialize; data.arg_on_initialize = fiber->arg_on_initialize;
data.arg_on_run_to = arg_on_run_to; data.arg_on_run_to = arg_on_run_to;
data.stack_addr = data.stack_addr =
reinterpret_cast<void*>(reinterpret_cast<u64>(fiber->addr_context) + fiber->size_context); reinterpret_cast<void*>(reinterpret_cast<u64>(fiber->addr_context) + fiber->size_context);
if (fiber->flags & FiberFlags::SetFpuRegs) { if (fiber->flags & FiberFlags::SetFpuRegs) {
data.fpucw = 0x037f; data.fpucw = 0x037f;
data.mxcsr = 0x9fc0; data.mxcsr = 0x9fc0;
_sceFiberSwitchEntry(&data, true); _sceFiberSwitchEntry(&data, true);
} else { } else {
_sceFiberSwitchEntry(&data, false); _sceFiberSwitchEntry(&data, false);
} }
__builtin_trap(); __builtin_trap();
} }
void PS4_SYSV_ABI _sceFiberSwitch(OrbisFiber* cur_fiber, OrbisFiber* fiber, u64 arg_on_run_to, void PS4_SYSV_ABI _sceFiberSwitch(OrbisFiber* cur_fiber, OrbisFiber* fiber, u64 arg_on_run_to,
OrbisFiberContext* ctx) { OrbisFiberContext* ctx) {
ctx->prev_fiber = cur_fiber; ctx->prev_fiber = cur_fiber;
ctx->current_fiber = fiber; ctx->current_fiber = fiber;
if (fiber->addr_context == nullptr) { if (fiber->addr_context == nullptr) {
ctx->prev_fiber = nullptr; ctx->prev_fiber = nullptr;
OrbisFiberData data{}; OrbisFiberData data{};
data.entry = fiber->entry; data.entry = fiber->entry;
data.arg_on_initialize = fiber->arg_on_initialize; data.arg_on_initialize = fiber->arg_on_initialize;
data.arg_on_run_to = arg_on_run_to; data.arg_on_run_to = arg_on_run_to;
data.stack_addr = reinterpret_cast<void*>(ctx->rsp & ~15); data.stack_addr = reinterpret_cast<void*>(ctx->rsp & ~15);
data.state = reinterpret_cast<u32*>(&cur_fiber->state); data.state = reinterpret_cast<u32*>(&cur_fiber->state);
if (fiber->flags & FiberFlags::SetFpuRegs) { if (fiber->flags & FiberFlags::SetFpuRegs) {
data.fpucw = 0x037f; data.fpucw = 0x037f;
data.mxcsr = 0x9fc0; data.mxcsr = 0x9fc0;
_sceFiberSwitchEntry(&data, true); _sceFiberSwitchEntry(&data, true);
} else { } else {
_sceFiberSwitchEntry(&data, false); _sceFiberSwitchEntry(&data, false);
} }
__builtin_trap(); __builtin_trap();
} }
_sceFiberSwitchToFiber(fiber, arg_on_run_to, ctx); _sceFiberSwitchToFiber(fiber, arg_on_run_to, ctx);
__builtin_trap(); __builtin_trap();
} }
void PS4_SYSV_ABI _sceFiberTerminate(OrbisFiber* fiber, u64 arg_on_return, OrbisFiberContext* ctx) { void PS4_SYSV_ABI _sceFiberTerminate(OrbisFiber* fiber, u64 arg_on_return, OrbisFiberContext* ctx) {
ctx->arg_on_return = arg_on_return; ctx->arg_on_return = arg_on_return;
_sceFiberLongJmp(ctx); _sceFiberLongJmp(ctx);
__builtin_trap(); __builtin_trap();
} }
s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry,
u64 arg_on_initialize, void* addr_context, u64 size_context, u64 arg_on_initialize, void* addr_context, u64 size_context,
const OrbisFiberOptParam* opt_param, u32 build_ver) { const OrbisFiberOptParam* opt_param, u32 build_ver) {
if (!fiber || !name || !entry) { if (!fiber || !name || !entry) {
return ORBIS_FIBER_ERROR_NULL; return ORBIS_FIBER_ERROR_NULL;
} }
if ((u64)fiber & 7 || (u64)addr_context & 15) { if ((u64)fiber & 7 || (u64)addr_context & 15) {
return ORBIS_FIBER_ERROR_ALIGNMENT; return ORBIS_FIBER_ERROR_ALIGNMENT;
} }
if (opt_param && (u64)opt_param & 7) { if (opt_param && (u64)opt_param & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT; return ORBIS_FIBER_ERROR_ALIGNMENT;
} }
if (size_context && size_context < ORBIS_FIBER_CONTEXT_MINIMUM_SIZE) { if (size_context && size_context < ORBIS_FIBER_CONTEXT_MINIMUM_SIZE) {
return ORBIS_FIBER_ERROR_RANGE; return ORBIS_FIBER_ERROR_RANGE;
} }
if (size_context & 15) { if (size_context & 15) {
return ORBIS_FIBER_ERROR_INVALID; return ORBIS_FIBER_ERROR_INVALID;
} }
if (!addr_context && size_context) { if (!addr_context && size_context) {
return ORBIS_FIBER_ERROR_INVALID; return ORBIS_FIBER_ERROR_INVALID;
} }
if (addr_context && !size_context) { if (addr_context && !size_context) {
return ORBIS_FIBER_ERROR_INVALID; return ORBIS_FIBER_ERROR_INVALID;
} }
if (opt_param && opt_param->magic != kFiberOptSignature) { if (opt_param && opt_param->magic != kFiberOptSignature) {
return ORBIS_FIBER_ERROR_INVALID; return ORBIS_FIBER_ERROR_INVALID;
} }
u32 flags = FiberFlags::None; u32 flags = FiberFlags::None;
if (build_ver >= 0x3500000) { if (build_ver >= 0x3500000) {
flags |= FiberFlags::SetFpuRegs; flags |= FiberFlags::SetFpuRegs;
} }
if (context_size_check) { if (context_size_check) {
flags |= FiberFlags::ContextSizeCheck; flags |= FiberFlags::ContextSizeCheck;
} }
strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH); strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH);
fiber->entry = entry; fiber->entry = entry;
fiber->arg_on_initialize = arg_on_initialize; fiber->arg_on_initialize = arg_on_initialize;
fiber->addr_context = addr_context; fiber->addr_context = addr_context;
fiber->size_context = size_context; fiber->size_context = size_context;
fiber->context = nullptr; fiber->context = nullptr;
fiber->flags = flags; fiber->flags = flags;
/* /*
A low stack area is problematic, as we can easily A low stack area is problematic, as we can easily
cause a stack overflow with our HLE. cause a stack overflow with our HLE.
*/ */
if (size_context && size_context <= 4096) { if (size_context && size_context <= 4096) {
LOG_WARNING(Lib_Fiber, "Fiber initialized with small stack area."); LOG_WARNING(Lib_Fiber, "Fiber initialized with small stack area.");
} }
fiber->magic_start = kFiberSignature0; fiber->magic_start = kFiberSignature0;
fiber->magic_end = kFiberSignature1; fiber->magic_end = kFiberSignature1;
if (addr_context != nullptr) { if (addr_context != nullptr) {
fiber->context_start = addr_context; fiber->context_start = addr_context;
fiber->context_end = fiber->context_end =
reinterpret_cast<void*>(reinterpret_cast<u64>(addr_context) + size_context); reinterpret_cast<void*>(reinterpret_cast<u64>(addr_context) + size_context);
/* Apply signature to start of stack */ /* Apply signature to start of stack */
*(u64*)addr_context = kFiberStackSignature; *(u64*)addr_context = kFiberStackSignature;
if (flags & FiberFlags::ContextSizeCheck) { if (flags & FiberFlags::ContextSizeCheck) {
u64* stack_start = reinterpret_cast<u64*>(fiber->context_start); u64* stack_start = reinterpret_cast<u64*>(fiber->context_start);
u64* stack_end = reinterpret_cast<u64*>(fiber->context_end); u64* stack_end = reinterpret_cast<u64*>(fiber->context_end);
u64* stack_ptr = stack_start + 1; u64* stack_ptr = stack_start + 1;
while (stack_ptr < stack_end) { while (stack_ptr < stack_end) {
*stack_ptr++ = kFiberStackSizeCheck; *stack_ptr++ = kFiberStackSizeCheck;
} }
} }
} }
fiber->state = FiberState::Idle; fiber->state = FiberState::Idle;
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI sceFiberOptParamInitialize(OrbisFiberOptParam* opt_param) { s32 PS4_SYSV_ABI sceFiberOptParamInitialize(OrbisFiberOptParam* opt_param) {
if (!opt_param) { if (!opt_param) {
return ORBIS_FIBER_ERROR_NULL; return ORBIS_FIBER_ERROR_NULL;
} }
if ((u64)opt_param & 7) { if ((u64)opt_param & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT; return ORBIS_FIBER_ERROR_ALIGNMENT;
} }
opt_param->magic = kFiberOptSignature; opt_param->magic = kFiberOptSignature;
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber) { s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber) {
if (!fiber) { if (!fiber) {
return ORBIS_FIBER_ERROR_NULL; return ORBIS_FIBER_ERROR_NULL;
} }
if ((u64)fiber & 7) { if ((u64)fiber & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT; return ORBIS_FIBER_ERROR_ALIGNMENT;
} }
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID; return ORBIS_FIBER_ERROR_INVALID;
} }
FiberState expected = FiberState::Idle; FiberState expected = FiberState::Idle;
if (!fiber->state.compare_exchange_strong(expected, FiberState::Terminated)) { if (!fiber->state.compare_exchange_strong(expected, FiberState::Terminated)) {
return ORBIS_FIBER_ERROR_STATE; return ORBIS_FIBER_ERROR_STATE;
} }
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return) { s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return) {
if (!fiber) { if (!fiber) {
return ORBIS_FIBER_ERROR_NULL; return ORBIS_FIBER_ERROR_NULL;
} }
if ((u64)fiber & 7) { if ((u64)fiber & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT; return ORBIS_FIBER_ERROR_ALIGNMENT;
} }
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID; return ORBIS_FIBER_ERROR_INVALID;
} }
Core::Tcb* tcb = Core::GetTcbBase(); Core::Tcb* tcb = Core::GetTcbBase();
if (tcb->tcb_fiber) { if (tcb->tcb_fiber) {
return ORBIS_FIBER_ERROR_PERMISSION; return ORBIS_FIBER_ERROR_PERMISSION;
} }
FiberState expected = FiberState::Idle; FiberState expected = FiberState::Idle;
if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) { if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) {
return ORBIS_FIBER_ERROR_STATE; return ORBIS_FIBER_ERROR_STATE;
} }
OrbisFiberContext ctx{}; OrbisFiberContext ctx{};
ctx.current_fiber = fiber; ctx.current_fiber = fiber;
ctx.prev_fiber = nullptr; ctx.prev_fiber = nullptr;
ctx.return_val = 0; ctx.return_val = 0;
tcb->tcb_fiber = &ctx; tcb->tcb_fiber = &ctx;
s32 jmp = _sceFiberSetJmp(&ctx); s32 jmp = _sceFiberSetJmp(&ctx);
if (!jmp) { if (!jmp) {
if (fiber->addr_context) { if (fiber->addr_context) {
_sceFiberSwitchToFiber(fiber, arg_on_run_to, &ctx); _sceFiberSwitchToFiber(fiber, arg_on_run_to, &ctx);
__builtin_trap(); __builtin_trap();
} }
OrbisFiberData data{}; OrbisFiberData data{};
data.entry = fiber->entry; data.entry = fiber->entry;
data.arg_on_initialize = fiber->arg_on_initialize; data.arg_on_initialize = fiber->arg_on_initialize;
data.arg_on_run_to = arg_on_run_to; data.arg_on_run_to = arg_on_run_to;
data.stack_addr = reinterpret_cast<void*>(ctx.rsp & ~15); data.stack_addr = reinterpret_cast<void*>(ctx.rsp & ~15);
data.state = nullptr; data.state = nullptr;
if (fiber->flags & FiberFlags::SetFpuRegs) { if (fiber->flags & FiberFlags::SetFpuRegs) {
data.fpucw = 0x037f; data.fpucw = 0x037f;
data.mxcsr = 0x9fc0; data.mxcsr = 0x9fc0;
_sceFiberSwitchEntry(&data, true); _sceFiberSwitchEntry(&data, true);
} else { } else {
_sceFiberSwitchEntry(&data, false); _sceFiberSwitchEntry(&data, false);
} }
} }
OrbisFiber* cur_fiber = ctx.current_fiber; OrbisFiber* cur_fiber = ctx.current_fiber;
ctx.current_fiber = nullptr; ctx.current_fiber = nullptr;
cur_fiber->state = FiberState::Idle; cur_fiber->state = FiberState::Idle;
if (ctx.return_val != 0) { if (ctx.return_val != 0) {
/* Fiber entry returned! This should never happen. */ /* Fiber entry returned! This should never happen. */
UNREACHABLE_MSG("Fiber entry function returned."); UNREACHABLE_MSG("Fiber entry function returned.");
} }
if (arg_on_return) { if (arg_on_return) {
*arg_on_return = ctx.arg_on_return; *arg_on_return = ctx.arg_on_return;
} }
tcb->tcb_fiber = nullptr; tcb->tcb_fiber = nullptr;
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run) { s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run) {
if (!fiber) { if (!fiber) {
return ORBIS_FIBER_ERROR_NULL; return ORBIS_FIBER_ERROR_NULL;
} }
if ((u64)fiber & 7) { if ((u64)fiber & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT; return ORBIS_FIBER_ERROR_ALIGNMENT;
} }
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID; return ORBIS_FIBER_ERROR_INVALID;
} }
OrbisFiberContext* g_ctx = GetFiberContext(); OrbisFiberContext* g_ctx = GetFiberContext();
if (!g_ctx) { if (!g_ctx) {
return ORBIS_FIBER_ERROR_PERMISSION; return ORBIS_FIBER_ERROR_PERMISSION;
} }
FiberState expected = FiberState::Idle; FiberState expected = FiberState::Idle;
if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) { if (!fiber->state.compare_exchange_strong(expected, FiberState::Run)) {
return ORBIS_FIBER_ERROR_STATE; return ORBIS_FIBER_ERROR_STATE;
} }
OrbisFiber* cur_fiber = g_ctx->current_fiber; OrbisFiber* cur_fiber = g_ctx->current_fiber;
if (cur_fiber->addr_context == nullptr) { if (cur_fiber->addr_context == nullptr) {
_sceFiberSwitch(cur_fiber, fiber, arg_on_run_to, g_ctx); _sceFiberSwitch(cur_fiber, fiber, arg_on_run_to, g_ctx);
__builtin_trap(); __builtin_trap();
} }
OrbisFiberContext ctx{}; OrbisFiberContext ctx{};
s32 jmp = _sceFiberSetJmp(&ctx); s32 jmp = _sceFiberSetJmp(&ctx);
if (!jmp) { if (!jmp) {
cur_fiber->context = &ctx; cur_fiber->context = &ctx;
_sceFiberCheckStackOverflow(g_ctx); _sceFiberCheckStackOverflow(g_ctx);
_sceFiberSwitch(cur_fiber, fiber, arg_on_run_to, g_ctx); _sceFiberSwitch(cur_fiber, fiber, arg_on_run_to, g_ctx);
__builtin_trap(); __builtin_trap();
} }
g_ctx = GetFiberContext(); g_ctx = GetFiberContext();
if (g_ctx->prev_fiber) { if (g_ctx->prev_fiber) {
g_ctx->prev_fiber->state = FiberState::Idle; g_ctx->prev_fiber->state = FiberState::Idle;
g_ctx->prev_fiber = nullptr; g_ctx->prev_fiber = nullptr;
} }
if (arg_on_run) { if (arg_on_run) {
*arg_on_run = g_ctx->arg_on_run_to; *arg_on_run = g_ctx->arg_on_run_to;
} }
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI sceFiberGetSelf(OrbisFiber** fiber) { s32 PS4_SYSV_ABI sceFiberGetSelf(OrbisFiber** fiber) {
if (!fiber) { if (!fiber) {
return ORBIS_FIBER_ERROR_NULL; return ORBIS_FIBER_ERROR_NULL;
} }
OrbisFiberContext* g_ctx = GetFiberContext(); OrbisFiberContext* g_ctx = GetFiberContext();
if (!g_ctx) { if (!g_ctx) {
return ORBIS_FIBER_ERROR_PERMISSION; return ORBIS_FIBER_ERROR_PERMISSION;
} }
*fiber = g_ctx->current_fiber; *fiber = g_ctx->current_fiber;
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI sceFiberReturnToThread(u64 arg_on_return, u64* arg_on_run) { s32 PS4_SYSV_ABI sceFiberReturnToThread(u64 arg_on_return, u64* arg_on_run) {
OrbisFiberContext* g_ctx = GetFiberContext(); OrbisFiberContext* g_ctx = GetFiberContext();
if (!g_ctx) { if (!g_ctx) {
return ORBIS_FIBER_ERROR_PERMISSION; return ORBIS_FIBER_ERROR_PERMISSION;
} }
OrbisFiber* cur_fiber = g_ctx->current_fiber; OrbisFiber* cur_fiber = g_ctx->current_fiber;
if (cur_fiber->addr_context) { if (cur_fiber->addr_context) {
OrbisFiberContext ctx{}; OrbisFiberContext ctx{};
s32 jmp = _sceFiberSetJmp(&ctx); s32 jmp = _sceFiberSetJmp(&ctx);
if (jmp) { if (jmp) {
g_ctx = GetFiberContext(); g_ctx = GetFiberContext();
if (g_ctx->prev_fiber) { if (g_ctx->prev_fiber) {
g_ctx->prev_fiber->state = FiberState::Idle; g_ctx->prev_fiber->state = FiberState::Idle;
g_ctx->prev_fiber = nullptr; g_ctx->prev_fiber = nullptr;
} }
if (arg_on_run) { if (arg_on_run) {
*arg_on_run = g_ctx->arg_on_run_to; *arg_on_run = g_ctx->arg_on_run_to;
} }
return ORBIS_OK; return ORBIS_OK;
} }
cur_fiber->context = &ctx; cur_fiber->context = &ctx;
_sceFiberCheckStackOverflow(g_ctx); _sceFiberCheckStackOverflow(g_ctx);
} }
_sceFiberTerminate(cur_fiber, arg_on_return, g_ctx); _sceFiberTerminate(cur_fiber, arg_on_return, g_ctx);
__builtin_trap(); __builtin_trap();
} }
s32 PS4_SYSV_ABI sceFiberGetInfo(OrbisFiber* fiber, OrbisFiberInfo* fiber_info) { s32 PS4_SYSV_ABI sceFiberGetInfo(OrbisFiber* fiber, OrbisFiberInfo* fiber_info) {
if (!fiber || !fiber_info) { if (!fiber || !fiber_info) {
return ORBIS_FIBER_ERROR_NULL; return ORBIS_FIBER_ERROR_NULL;
} }
if ((u64)fiber & 7 || (u64)fiber_info & 7) { if ((u64)fiber & 7 || (u64)fiber_info & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT; return ORBIS_FIBER_ERROR_ALIGNMENT;
} }
if (fiber_info->size != sizeof(OrbisFiberInfo)) { if (fiber_info->size != sizeof(OrbisFiberInfo)) {
return ORBIS_FIBER_ERROR_INVALID; return ORBIS_FIBER_ERROR_INVALID;
} }
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID; return ORBIS_FIBER_ERROR_INVALID;
} }
fiber_info->entry = fiber->entry; fiber_info->entry = fiber->entry;
fiber_info->arg_on_initialize = fiber->arg_on_initialize; fiber_info->arg_on_initialize = fiber->arg_on_initialize;
fiber_info->addr_context = fiber->addr_context; fiber_info->addr_context = fiber->addr_context;
fiber_info->size_context = fiber->size_context; fiber_info->size_context = fiber->size_context;
strncpy(fiber_info->name, fiber->name, ORBIS_FIBER_MAX_NAME_LENGTH); strncpy(fiber_info->name, fiber->name, ORBIS_FIBER_MAX_NAME_LENGTH);
fiber_info->size_context_margin = -1; fiber_info->size_context_margin = -1;
if (fiber->flags & FiberFlags::ContextSizeCheck && fiber->addr_context != nullptr) { if (fiber->flags & FiberFlags::ContextSizeCheck && fiber->addr_context != nullptr) {
u64 stack_margin = 0; u64 stack_margin = 0;
u64* stack_start = reinterpret_cast<u64*>(fiber->context_start); u64* stack_start = reinterpret_cast<u64*>(fiber->context_start);
u64* stack_end = reinterpret_cast<u64*>(fiber->context_end); u64* stack_end = reinterpret_cast<u64*>(fiber->context_end);
if (*stack_start == kFiberStackSignature) { if (*stack_start == kFiberStackSignature) {
u64* stack_ptr = stack_start + 1; u64* stack_ptr = stack_start + 1;
while (stack_ptr < stack_end) { while (stack_ptr < stack_end) {
if (*stack_ptr == kFiberStackSizeCheck) { if (*stack_ptr == kFiberStackSizeCheck) {
stack_ptr++; stack_ptr++;
} }
} }
stack_margin = stack_margin =
reinterpret_cast<u64>(stack_ptr) - reinterpret_cast<u64>(stack_start + 1); reinterpret_cast<u64>(stack_ptr) - reinterpret_cast<u64>(stack_start + 1);
} }
fiber_info->size_context_margin = stack_margin; fiber_info->size_context_margin = stack_margin;
} }
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI sceFiberStartContextSizeCheck(u32 flags) { s32 PS4_SYSV_ABI sceFiberStartContextSizeCheck(u32 flags) {
if (flags != 0) { if (flags != 0) {
return ORBIS_FIBER_ERROR_INVALID; return ORBIS_FIBER_ERROR_INVALID;
} }
u32 expected = 0; u32 expected = 0;
if (!context_size_check.compare_exchange_strong(expected, 1u)) { if (!context_size_check.compare_exchange_strong(expected, 1u)) {
return ORBIS_FIBER_ERROR_STATE; return ORBIS_FIBER_ERROR_STATE;
} }
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck() { s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck() {
u32 expected = 1; u32 expected = 1;
if (!context_size_check.compare_exchange_strong(expected, 0u)) { if (!context_size_check.compare_exchange_strong(expected, 0u)) {
return ORBIS_FIBER_ERROR_STATE; return ORBIS_FIBER_ERROR_STATE;
} }
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name) { s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name) {
if (!fiber || !name) { if (!fiber || !name) {
return ORBIS_FIBER_ERROR_NULL; return ORBIS_FIBER_ERROR_NULL;
} }
if ((u64)fiber & 7) { if ((u64)fiber & 7) {
return ORBIS_FIBER_ERROR_ALIGNMENT; return ORBIS_FIBER_ERROR_ALIGNMENT;
} }
if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) { if (fiber->magic_start != kFiberSignature0 || fiber->magic_end != kFiberSignature1) {
return ORBIS_FIBER_ERROR_INVALID; return ORBIS_FIBER_ERROR_INVALID;
} }
strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH); strncpy(fiber->name, name, ORBIS_FIBER_MAX_NAME_LENGTH);
return ORBIS_OK; return ORBIS_OK;
} }
void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym) { void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("hVYD7Ou2pCQ", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize); LIB_FUNCTION("hVYD7Ou2pCQ", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize);
LIB_FUNCTION("7+OJIpko9RY", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize); LIB_FUNCTION("7+OJIpko9RY", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberInitialize);
LIB_FUNCTION("asjUJJ+aa8s", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberOptParamInitialize); LIB_FUNCTION("asjUJJ+aa8s", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberOptParamInitialize);
LIB_FUNCTION("JeNX5F-NzQU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberFinalize); LIB_FUNCTION("JeNX5F-NzQU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberFinalize);
LIB_FUNCTION("a0LLrZWac0M", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRun); LIB_FUNCTION("a0LLrZWac0M", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRun);
LIB_FUNCTION("PFT2S-tJ7Uk", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberSwitch); LIB_FUNCTION("PFT2S-tJ7Uk", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberSwitch);
LIB_FUNCTION("p+zLIOg27zU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetSelf); LIB_FUNCTION("p+zLIOg27zU", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetSelf);
LIB_FUNCTION("B0ZX2hx9DMw", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberReturnToThread); LIB_FUNCTION("B0ZX2hx9DMw", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberReturnToThread);
LIB_FUNCTION("uq2Y5BFz0PE", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetInfo); LIB_FUNCTION("uq2Y5BFz0PE", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberGetInfo);
LIB_FUNCTION("Lcqty+QNWFc", "libSceFiber", 1, "libSceFiber", 1, 1, LIB_FUNCTION("Lcqty+QNWFc", "libSceFiber", 1, "libSceFiber", 1, 1,
sceFiberStartContextSizeCheck); sceFiberStartContextSizeCheck);
LIB_FUNCTION("Kj4nXMpnM8Y", "libSceFiber", 1, "libSceFiber", 1, 1, LIB_FUNCTION("Kj4nXMpnM8Y", "libSceFiber", 1, "libSceFiber", 1, 1,
sceFiberStopContextSizeCheck); sceFiberStopContextSizeCheck);
LIB_FUNCTION("JzyT91ucGDc", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRename); LIB_FUNCTION("JzyT91ucGDc", "libSceFiber", 1, "libSceFiber", 1, 1, sceFiberRename);
} }
} // namespace Libraries::Fiber } // namespace Libraries::Fiber

View file

@ -1,118 +1,118 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include "common/assert.h" #include "common/assert.h"
#include "common/types.h" #include "common/types.h"
#include <atomic> #include <atomic>
namespace Core::Loader { namespace Core::Loader {
class SymbolsResolver; class SymbolsResolver;
} }
namespace Libraries::Fiber { namespace Libraries::Fiber {
#define ORBIS_FIBER_MAX_NAME_LENGTH (31) #define ORBIS_FIBER_MAX_NAME_LENGTH (31)
#define ORBIS_FIBER_CONTEXT_MINIMUM_SIZE (512) #define ORBIS_FIBER_CONTEXT_MINIMUM_SIZE (512)
typedef void PS4_SYSV_ABI (*OrbisFiberEntry)(u64 arg_on_initialize, u64 arg_on_run); typedef void PS4_SYSV_ABI (*OrbisFiberEntry)(u64 arg_on_initialize, u64 arg_on_run);
enum FiberState : u32 { enum FiberState : u32 {
Run = 1u, Run = 1u,
Idle = 2u, Idle = 2u,
Terminated = 3u, Terminated = 3u,
}; };
enum FiberFlags : u32 { enum FiberFlags : u32 {
None = 0x0, None = 0x0,
NoUlobjmgr = 0x1, NoUlobjmgr = 0x1,
ContextSizeCheck = 0x10, ContextSizeCheck = 0x10,
SetFpuRegs = 0x100, SetFpuRegs = 0x100,
}; };
struct OrbisFiber; struct OrbisFiber;
struct OrbisFiberContext { struct OrbisFiberContext {
struct { struct {
u64 rax, rcx, rdx, rbx, rsp, rbp, r8, r9, r10, r11, r12, r13, r14, r15; u64 rax, rcx, rdx, rbx, rsp, rbp, r8, r9, r10, r11, r12, r13, r14, r15;
u16 fpucw; u16 fpucw;
u32 mxcsr; u32 mxcsr;
}; };
OrbisFiber* current_fiber; OrbisFiber* current_fiber;
OrbisFiber* prev_fiber; OrbisFiber* prev_fiber;
u64 arg_on_run_to; u64 arg_on_run_to;
u64 arg_on_return; u64 arg_on_return;
u64 return_val; u64 return_val;
}; };
struct OrbisFiberData { struct OrbisFiberData {
OrbisFiberEntry entry; OrbisFiberEntry entry;
u64 arg_on_initialize; u64 arg_on_initialize;
u64 arg_on_run_to; u64 arg_on_run_to;
void* stack_addr; void* stack_addr;
u32* state; u32* state;
u16 fpucw; u16 fpucw;
s8 pad[2]; s8 pad[2];
u32 mxcsr; u32 mxcsr;
}; };
struct OrbisFiber { struct OrbisFiber {
u32 magic_start; u32 magic_start;
std::atomic<FiberState> state; std::atomic<FiberState> state;
OrbisFiberEntry entry; OrbisFiberEntry entry;
u64 arg_on_initialize; u64 arg_on_initialize;
void* addr_context; void* addr_context;
u64 size_context; u64 size_context;
char name[ORBIS_FIBER_MAX_NAME_LENGTH + 1]; char name[ORBIS_FIBER_MAX_NAME_LENGTH + 1];
OrbisFiberContext* context; OrbisFiberContext* context;
u32 flags; u32 flags;
void* context_start; void* context_start;
void* context_end; void* context_end;
u32 magic_end; u32 magic_end;
}; };
static_assert(sizeof(OrbisFiber) <= 256); static_assert(sizeof(OrbisFiber) <= 256);
struct OrbisFiberInfo { struct OrbisFiberInfo {
u64 size; u64 size;
OrbisFiberEntry entry; OrbisFiberEntry entry;
u64 arg_on_initialize; u64 arg_on_initialize;
void* addr_context; void* addr_context;
u64 size_context; u64 size_context;
char name[ORBIS_FIBER_MAX_NAME_LENGTH + 1]; char name[ORBIS_FIBER_MAX_NAME_LENGTH + 1];
u64 size_context_margin; u64 size_context_margin;
u8 pad[48]; u8 pad[48];
}; };
static_assert(sizeof(OrbisFiberInfo) == 128); static_assert(sizeof(OrbisFiberInfo) == 128);
struct OrbisFiberOptParam { struct OrbisFiberOptParam {
u32 magic; u32 magic;
}; };
static_assert(sizeof(OrbisFiberOptParam) <= 128); static_assert(sizeof(OrbisFiberOptParam) <= 128);
s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry, s32 PS4_SYSV_ABI sceFiberInitialize(OrbisFiber* fiber, const char* name, OrbisFiberEntry entry,
u64 arg_on_initialize, void* addr_context, u64 size_context, u64 arg_on_initialize, void* addr_context, u64 size_context,
const OrbisFiberOptParam* opt_param, u32 build_version); const OrbisFiberOptParam* opt_param, u32 build_version);
s32 PS4_SYSV_ABI sceFiberOptParamInitialize(OrbisFiberOptParam* opt_param); s32 PS4_SYSV_ABI sceFiberOptParamInitialize(OrbisFiberOptParam* opt_param);
s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber); s32 PS4_SYSV_ABI sceFiberFinalize(OrbisFiber* fiber);
s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return); s32 PS4_SYSV_ABI sceFiberRun(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_return);
s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run); s32 PS4_SYSV_ABI sceFiberSwitch(OrbisFiber* fiber, u64 arg_on_run_to, u64* arg_on_run);
s32 PS4_SYSV_ABI sceFiberGetSelf(OrbisFiber** fiber); s32 PS4_SYSV_ABI sceFiberGetSelf(OrbisFiber** fiber);
s32 PS4_SYSV_ABI sceFiberReturnToThread(u64 arg_on_return, u64* arg_on_run); s32 PS4_SYSV_ABI sceFiberReturnToThread(u64 arg_on_return, u64* arg_on_run);
s32 PS4_SYSV_ABI sceFiberGetInfo(OrbisFiber* fiber, OrbisFiberInfo* fiber_info); s32 PS4_SYSV_ABI sceFiberGetInfo(OrbisFiber* fiber, OrbisFiberInfo* fiber_info);
s32 PS4_SYSV_ABI sceFiberStartContextSizeCheck(u32 flags); s32 PS4_SYSV_ABI sceFiberStartContextSizeCheck(u32 flags);
s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck(void); s32 PS4_SYSV_ABI sceFiberStopContextSizeCheck(void);
s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name); s32 PS4_SYSV_ABI sceFiberRename(OrbisFiber* fiber, const char* name);
void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym); void RegisterlibSceFiber(Core::Loader::SymbolsResolver* sym);
} // namespace Libraries::Fiber } // namespace Libraries::Fiber

View file

@ -1,121 +1,121 @@
# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project # SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
# SPDX-License-Identifier: GPL-2.0-or-later # SPDX-License-Identifier: GPL-2.0-or-later
.global _sceFiberSetJmp .global _sceFiberSetJmp
_sceFiberSetJmp: _sceFiberSetJmp:
movq %rax, 0x0(%rdi) movq %rax, 0x0(%rdi)
movq (%rsp), %rdx movq (%rsp), %rdx
movq %rdx, 0x10(%rdi) movq %rdx, 0x10(%rdi)
movq %rcx, 0x08(%rdi) movq %rcx, 0x08(%rdi)
movq %rbx, 0x18(%rdi) movq %rbx, 0x18(%rdi)
movq %rsp, 0x20(%rdi) movq %rsp, 0x20(%rdi)
movq %rbp, 0x28(%rdi) movq %rbp, 0x28(%rdi)
movq %r8, 0x30(%rdi) movq %r8, 0x30(%rdi)
movq %r9, 0x38(%rdi) movq %r9, 0x38(%rdi)
movq %r10, 0x40(%rdi) movq %r10, 0x40(%rdi)
movq %r11, 0x48(%rdi) movq %r11, 0x48(%rdi)
movq %r12, 0x50(%rdi) movq %r12, 0x50(%rdi)
movq %r13, 0x58(%rdi) movq %r13, 0x58(%rdi)
movq %r14, 0x60(%rdi) movq %r14, 0x60(%rdi)
movq %r15, 0x68(%rdi) movq %r15, 0x68(%rdi)
fnstcw 0x70(%rdi) fnstcw 0x70(%rdi)
stmxcsr 0x72(%rdi) stmxcsr 0x72(%rdi)
xor %eax, %eax xor %eax, %eax
ret ret
.global _sceFiberLongJmp .global _sceFiberLongJmp
_sceFiberLongJmp: _sceFiberLongJmp:
# MXCSR = (MXCSR & 0x3f) ^ (ctx->mxcsr & ~0x3f) # MXCSR = (MXCSR & 0x3f) ^ (ctx->mxcsr & ~0x3f)
stmxcsr -0x4(%rsp) stmxcsr -0x4(%rsp)
movl 0x72(%rdi), %eax movl 0x72(%rdi), %eax
andl $0xffffffc0, %eax andl $0xffffffc0, %eax
movl -0x4(%rsp), %ecx movl -0x4(%rsp), %ecx
andl $0x3f, %ecx andl $0x3f, %ecx
xorl %eax, %ecx xorl %eax, %ecx
movl %ecx, -0x4(%rsp) movl %ecx, -0x4(%rsp)
ldmxcsr -0x4(%rsp) ldmxcsr -0x4(%rsp)
movq 0x00(%rdi), %rax movq 0x00(%rdi), %rax
movq 0x08(%rdi), %rcx movq 0x08(%rdi), %rcx
movq 0x10(%rdi), %rdx movq 0x10(%rdi), %rdx
movq 0x18(%rdi), %rbx movq 0x18(%rdi), %rbx
movq 0x20(%rdi), %rsp movq 0x20(%rdi), %rsp
movq 0x28(%rdi), %rbp movq 0x28(%rdi), %rbp
movq 0x30(%rdi), %r8 movq 0x30(%rdi), %r8
movq 0x38(%rdi), %r9 movq 0x38(%rdi), %r9
movq 0x40(%rdi), %r10 movq 0x40(%rdi), %r10
movq 0x48(%rdi), %r11 movq 0x48(%rdi), %r11
movq 0x50(%rdi), %r12 movq 0x50(%rdi), %r12
movq 0x58(%rdi), %r13 movq 0x58(%rdi), %r13
movq 0x60(%rdi), %r14 movq 0x60(%rdi), %r14
movq 0x68(%rdi), %r15 movq 0x68(%rdi), %r15
fldcw 0x70(%rdi) fldcw 0x70(%rdi)
# Make the jump and return 1 # Make the jump and return 1
movq %rdx, 0x00(%rsp) movq %rdx, 0x00(%rsp)
movl $0x1, %eax movl $0x1, %eax
ret ret
.global _sceFiberSwitchEntry .global _sceFiberSwitchEntry
_sceFiberSwitchEntry: _sceFiberSwitchEntry:
mov %rdi, %r11 mov %rdi, %r11
# Set stack address to provided stack # Set stack address to provided stack
movq 0x18(%r11), %rsp movq 0x18(%r11), %rsp
xorl %ebp, %ebp xorl %ebp, %ebp
movq 0x20(%r11), %r10 # data->state movq 0x20(%r11), %r10 # data->state
# Set previous fiber state to Idle # Set previous fiber state to Idle
test %r10, %r10 test %r10, %r10
jz .clear_regs jz .clear_regs
movl $2, (%r10) movl $2, (%r10)
.clear_regs: .clear_regs:
test %esi, %esi test %esi, %esi
jz .skip_fpu_regs jz .skip_fpu_regs
ldmxcsr 0x2c(%r11) ldmxcsr 0x2c(%r11)
fldcw 0x28(%r11) fldcw 0x28(%r11)
.skip_fpu_regs: .skip_fpu_regs:
movq 0x08(%r11), %rdi # data->arg_on_initialize movq 0x08(%r11), %rdi # data->arg_on_initialize
movq 0x10(%r11), %rsi # data->arg_on_run_to movq 0x10(%r11), %rsi # data->arg_on_run_to
movq 0x00(%r11), %r11 # data->entry movq 0x00(%r11), %r11 # data->entry
xorl %eax, %eax xorl %eax, %eax
xorl %ebx, %ebx xorl %ebx, %ebx
xorl %ecx, %ecx xorl %ecx, %ecx
xorl %edx, %edx xorl %edx, %edx
xorq %r8, %r8 xorq %r8, %r8
xorq %r9, %r9 xorq %r9, %r9
xorq %r10, %r10 xorq %r10, %r10
xorq %r12, %r12 xorq %r12, %r12
xorq %r13, %r13 xorq %r13, %r13
xorq %r14, %r14 xorq %r14, %r14
xorq %r15, %r15 xorq %r15, %r15
pxor %mm0, %mm0 pxor %mm0, %mm0
pxor %mm1, %mm1 pxor %mm1, %mm1
pxor %mm2, %mm2 pxor %mm2, %mm2
pxor %mm3, %mm3 pxor %mm3, %mm3
pxor %mm4, %mm4 pxor %mm4, %mm4
pxor %mm5, %mm5 pxor %mm5, %mm5
pxor %mm6, %mm6 pxor %mm6, %mm6
pxor %mm7, %mm7 pxor %mm7, %mm7
emms emms
vzeroall vzeroall
# Call the fiber's entry function: entry(arg_on_initialize, arg_on_run_to) # Call the fiber's entry function: entry(arg_on_initialize, arg_on_run_to)
call *%r11 call *%r11
# Fiber returned, not good # Fiber returned, not good
movl $1, %edi movl $1, %edi
call _sceFiberForceQuit call _sceFiberForceQuit
ret ret

View file

@ -1,183 +1,183 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include "common/types.h" #include "common/types.h"
#include "core/libraries/rtc/rtc.h" #include "core/libraries/rtc/rtc.h"
enum class OrbisImeType : u32 { enum class OrbisImeType : u32 {
Default = 0, Default = 0,
BasicLatin = 1, BasicLatin = 1,
Url = 2, Url = 2,
Mail = 3, Mail = 3,
Number = 4, Number = 4,
}; };
enum class OrbisImeHorizontalAlignment : u32 { enum class OrbisImeHorizontalAlignment : u32 {
Left = 0, Left = 0,
Center = 1, Center = 1,
Right = 2, Right = 2,
}; };
enum class OrbisImeVerticalAlignment : u32 { enum class OrbisImeVerticalAlignment : u32 {
Top = 0, Top = 0,
Center = 1, Center = 1,
Bottom = 2, Bottom = 2,
}; };
enum class OrbisImeEnterLabel : u32 { enum class OrbisImeEnterLabel : u32 {
Default = 0, Default = 0,
Send = 1, Send = 1,
Search = 2, Search = 2,
Go = 3, Go = 3,
}; };
enum class OrbisImeInputMethod : u32 { enum class OrbisImeInputMethod : u32 {
Default = 0, Default = 0,
}; };
enum class OrbisImeEventId : u32 { enum class OrbisImeEventId : u32 {
Open = 0, Open = 0,
UpdateText = 1, UpdateText = 1,
UpdateCaret = 2, UpdateCaret = 2,
PressClose = 4, PressClose = 4,
PressEnter = 5, PressEnter = 5,
Abort = 6, Abort = 6,
CandidateListStart = 7, CandidateListStart = 7,
CandidateListEnd = 8, CandidateListEnd = 8,
CandidateWord = 9, CandidateWord = 9,
CandidateIndex = 10, CandidateIndex = 10,
CandidateDone = 11, CandidateDone = 11,
CandidateCancel = 12, CandidateCancel = 12,
ChangeDevice = 14, ChangeDevice = 14,
ChangeInputMethodState = 18, ChangeInputMethodState = 18,
KeyboardOpen = 256, KeyboardOpen = 256,
KeyboardKeycodeDoen = 257, KeyboardKeycodeDoen = 257,
KeyboardKeycodeUp = 258, KeyboardKeycodeUp = 258,
KeyboardKeycodeRepeat = 259, KeyboardKeycodeRepeat = 259,
KeyboardConnection = 260, KeyboardConnection = 260,
KeyboardDisconnection = 261, KeyboardDisconnection = 261,
KeyboardAbort = 262, KeyboardAbort = 262,
}; };
enum class OrbisImeKeyboardType : u32 { enum class OrbisImeKeyboardType : u32 {
NONE = 0, NONE = 0,
DANISH = 1, DANISH = 1,
GERMAN = 2, GERMAN = 2,
GERMAN_SW = 3, GERMAN_SW = 3,
ENGLISH_US = 4, ENGLISH_US = 4,
ENGLISH_GB = 5, ENGLISH_GB = 5,
SPANISH = 6, SPANISH = 6,
SPANISH_LA = 7, SPANISH_LA = 7,
FINNISH = 8, FINNISH = 8,
FRENCH = 9, FRENCH = 9,
FRENCH_BR = 10, FRENCH_BR = 10,
FRENCH_CA = 11, FRENCH_CA = 11,
FRENCH_SW = 12, FRENCH_SW = 12,
ITALIAN = 13, ITALIAN = 13,
DUTCH = 14, DUTCH = 14,
NORWEGIAN = 15, NORWEGIAN = 15,
POLISH = 16, POLISH = 16,
PORTUGUESE_BR = 17, PORTUGUESE_BR = 17,
PORTUGUESE_PT = 18, PORTUGUESE_PT = 18,
RUSSIAN = 19, RUSSIAN = 19,
SWEDISH = 20, SWEDISH = 20,
TURKISH = 21, TURKISH = 21,
JAPANESE_ROMAN = 22, JAPANESE_ROMAN = 22,
JAPANESE_KANA = 23, JAPANESE_KANA = 23,
KOREAN = 24, KOREAN = 24,
SM_CHINESE = 25, SM_CHINESE = 25,
TR_CHINESE_ZY = 26, TR_CHINESE_ZY = 26,
TR_CHINESE_PY_HK = 27, TR_CHINESE_PY_HK = 27,
TR_CHINESE_PY_TW = 28, TR_CHINESE_PY_TW = 28,
TR_CHINESE_CG = 29, TR_CHINESE_CG = 29,
ARABIC_AR = 30, ARABIC_AR = 30,
THAI = 31, THAI = 31,
CZECH = 32, CZECH = 32,
GREEK = 33, GREEK = 33,
INDONESIAN = 34, INDONESIAN = 34,
VIETNAMESE = 35, VIETNAMESE = 35,
ROMANIAN = 36, ROMANIAN = 36,
HUNGARIAN = 37, HUNGARIAN = 37,
}; };
enum class OrbisImeDeviceType : u32 { enum class OrbisImeDeviceType : u32 {
None = 0, None = 0,
Controller = 1, Controller = 1,
ExtKeyboard = 2, ExtKeyboard = 2,
RemoteOsk = 3, RemoteOsk = 3,
}; };
struct OrbisImeRect { struct OrbisImeRect {
f32 x; f32 x;
f32 y; f32 y;
u32 width; u32 width;
u32 height; u32 height;
}; };
struct OrbisImeTextAreaProperty { struct OrbisImeTextAreaProperty {
u32 mode; // OrbisImeTextAreaMode u32 mode; // OrbisImeTextAreaMode
u32 index; u32 index;
s32 length; s32 length;
}; };
struct OrbisImeEditText { struct OrbisImeEditText {
char16_t* str; char16_t* str;
u32 caret_index; u32 caret_index;
u32 area_num; u32 area_num;
OrbisImeTextAreaProperty text_area[4]; OrbisImeTextAreaProperty text_area[4];
}; };
struct OrbisImeKeycode { struct OrbisImeKeycode {
u16 keycode; u16 keycode;
char16_t character; char16_t character;
u32 status; u32 status;
OrbisImeKeyboardType type; OrbisImeKeyboardType type;
s32 user_id; s32 user_id;
u32 resource_id; u32 resource_id;
Libraries::Rtc::OrbisRtcTick timestamp; Libraries::Rtc::OrbisRtcTick timestamp;
}; };
struct OrbisImeKeyboardResourceIdArray { struct OrbisImeKeyboardResourceIdArray {
s32 userId; s32 userId;
u32 resourceId[5]; u32 resourceId[5];
}; };
enum class OrbisImeCaretMovementDirection : u32 { enum class OrbisImeCaretMovementDirection : u32 {
Still = 0, Still = 0,
Left = 1, Left = 1,
Right = 2, Right = 2,
Up = 3, Up = 3,
Down = 4, Down = 4,
Home = 5, Home = 5,
End = 6, End = 6,
PageUp = 7, PageUp = 7,
PageDown = 8, PageDown = 8,
Top = 9, Top = 9,
Bottom = 10, Bottom = 10,
}; };
union OrbisImeEventParam { union OrbisImeEventParam {
OrbisImeRect rect; OrbisImeRect rect;
OrbisImeEditText text; OrbisImeEditText text;
OrbisImeCaretMovementDirection caret_move; OrbisImeCaretMovementDirection caret_move;
OrbisImeKeycode keycode; OrbisImeKeycode keycode;
OrbisImeKeyboardResourceIdArray resource_id_array; OrbisImeKeyboardResourceIdArray resource_id_array;
char16_t* candidate_word; char16_t* candidate_word;
s32 candidate_index; s32 candidate_index;
OrbisImeDeviceType device_type; OrbisImeDeviceType device_type;
u32 input_method_state; u32 input_method_state;
s8 reserved[64]; s8 reserved[64];
}; };
struct OrbisImeEvent { struct OrbisImeEvent {
OrbisImeEventId id; OrbisImeEventId id;
OrbisImeEventParam param; OrbisImeEventParam param;
}; };
using OrbisImeTextFilter = PS4_SYSV_ABI int (*)(char16_t* outText, u32* outTextLength, using OrbisImeTextFilter = PS4_SYSV_ABI int (*)(char16_t* outText, u32* outTextLength,
const char16_t* srcText, u32 srcTextLength); const char16_t* srcText, u32 srcTextLength);
using OrbisImeEventHandler = PS4_SYSV_ABI void (*)(void* arg, const OrbisImeEvent* e); using OrbisImeEventHandler = PS4_SYSV_ABI void (*)(void* arg, const OrbisImeEvent* e);

View file

@ -1,253 +1,253 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "ime_ui.h" #include "ime_ui.h"
#include "imgui/imgui_std.h" #include "imgui/imgui_std.h"
namespace Libraries::Ime { namespace Libraries::Ime {
using namespace ImGui; using namespace ImGui;
static constexpr ImVec2 BUTTON_SIZE{100.0f, 30.0f}; static constexpr ImVec2 BUTTON_SIZE{100.0f, 30.0f};
ImeState::ImeState(const OrbisImeParam* param) { ImeState::ImeState(const OrbisImeParam* param) {
if (!param) { if (!param) {
return; return;
} }
work_buffer = param->work; work_buffer = param->work;
text_buffer = param->inputTextBuffer; text_buffer = param->inputTextBuffer;
std::size_t text_len = std::char_traits<char16_t>::length(text_buffer); std::size_t text_len = std::char_traits<char16_t>::length(text_buffer);
if (!ConvertOrbisToUTF8(text_buffer, text_len, current_text.begin(), if (!ConvertOrbisToUTF8(text_buffer, text_len, current_text.begin(),
ORBIS_IME_MAX_TEXT_LENGTH * 4)) { ORBIS_IME_MAX_TEXT_LENGTH * 4)) {
LOG_ERROR(Lib_ImeDialog, "Failed to convert text to utf8 encoding"); LOG_ERROR(Lib_ImeDialog, "Failed to convert text to utf8 encoding");
} }
} }
ImeState::ImeState(ImeState&& other) noexcept ImeState::ImeState(ImeState&& other) noexcept
: work_buffer(other.work_buffer), text_buffer(other.text_buffer), : work_buffer(other.work_buffer), text_buffer(other.text_buffer),
current_text(std::move(other.current_text)), event_queue(std::move(other.event_queue)) { current_text(std::move(other.current_text)), event_queue(std::move(other.event_queue)) {
other.text_buffer = nullptr; other.text_buffer = nullptr;
} }
ImeState& ImeState::operator=(ImeState&& other) noexcept { ImeState& ImeState::operator=(ImeState&& other) noexcept {
if (this != &other) { if (this != &other) {
work_buffer = other.work_buffer; work_buffer = other.work_buffer;
text_buffer = other.text_buffer; text_buffer = other.text_buffer;
current_text = std::move(other.current_text); current_text = std::move(other.current_text);
event_queue = std::move(other.event_queue); event_queue = std::move(other.event_queue);
other.text_buffer = nullptr; other.text_buffer = nullptr;
} }
return *this; return *this;
} }
void ImeState::SendEvent(OrbisImeEvent* event) { void ImeState::SendEvent(OrbisImeEvent* event) {
std::unique_lock lock{queue_mutex}; std::unique_lock lock{queue_mutex};
event_queue.push(*event); event_queue.push(*event);
} }
void ImeState::SendEnterEvent() { void ImeState::SendEnterEvent() {
OrbisImeEvent enterEvent{}; OrbisImeEvent enterEvent{};
enterEvent.id = OrbisImeEventId::PressEnter; enterEvent.id = OrbisImeEventId::PressEnter;
SendEvent(&enterEvent); SendEvent(&enterEvent);
} }
void ImeState::SendCloseEvent() { void ImeState::SendCloseEvent() {
OrbisImeEvent closeEvent{}; OrbisImeEvent closeEvent{};
closeEvent.id = OrbisImeEventId::PressClose; closeEvent.id = OrbisImeEventId::PressClose;
closeEvent.param.text.str = reinterpret_cast<char16_t*>(work_buffer); closeEvent.param.text.str = reinterpret_cast<char16_t*>(work_buffer);
SendEvent(&closeEvent); SendEvent(&closeEvent);
} }
void ImeState::SetText(const char16_t* text, u32 length) {} void ImeState::SetText(const char16_t* text, u32 length) {}
void ImeState::SetCaret(u32 position) {} void ImeState::SetCaret(u32 position) {}
bool ImeState::ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, bool ImeState::ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len,
char* utf8_text, std::size_t utf8_text_len) { char* utf8_text, std::size_t utf8_text_len) {
std::fill(utf8_text, utf8_text + utf8_text_len, '\0'); std::fill(utf8_text, utf8_text + utf8_text_len, '\0');
const ImWchar* orbis_text_ptr = reinterpret_cast<const ImWchar*>(orbis_text); const ImWchar* orbis_text_ptr = reinterpret_cast<const ImWchar*>(orbis_text);
ImTextStrToUtf8(utf8_text, utf8_text_len, orbis_text_ptr, orbis_text_ptr + orbis_text_len); ImTextStrToUtf8(utf8_text, utf8_text_len, orbis_text_ptr, orbis_text_ptr + orbis_text_len);
return true; return true;
} }
bool ImeState::ConvertUTF8ToOrbis(const char* utf8_text, std::size_t utf8_text_len, bool ImeState::ConvertUTF8ToOrbis(const char* utf8_text, std::size_t utf8_text_len,
char16_t* orbis_text, std::size_t orbis_text_len) { char16_t* orbis_text, std::size_t orbis_text_len) {
std::fill(orbis_text, orbis_text + orbis_text_len, u'\0'); std::fill(orbis_text, orbis_text + orbis_text_len, u'\0');
ImTextStrFromUtf8(reinterpret_cast<ImWchar*>(orbis_text), orbis_text_len, utf8_text, nullptr); ImTextStrFromUtf8(reinterpret_cast<ImWchar*>(orbis_text), orbis_text_len, utf8_text, nullptr);
return true; return true;
} }
ImeUi::ImeUi(ImeState* state, const OrbisImeParam* param) : state(state), ime_param(param) { ImeUi::ImeUi(ImeState* state, const OrbisImeParam* param) : state(state), ime_param(param) {
if (param) { if (param) {
AddLayer(this); AddLayer(this);
} }
} }
ImeUi::~ImeUi() { ImeUi::~ImeUi() {
std::scoped_lock lock(draw_mutex); std::scoped_lock lock(draw_mutex);
Free(); Free();
} }
ImeUi& ImeUi::operator=(ImeUi&& other) { ImeUi& ImeUi::operator=(ImeUi&& other) {
std::scoped_lock lock(draw_mutex, other.draw_mutex); std::scoped_lock lock(draw_mutex, other.draw_mutex);
Free(); Free();
state = other.state; state = other.state;
ime_param = other.ime_param; ime_param = other.ime_param;
first_render = other.first_render; first_render = other.first_render;
other.state = nullptr; other.state = nullptr;
other.ime_param = nullptr; other.ime_param = nullptr;
AddLayer(this); AddLayer(this);
return *this; return *this;
} }
void ImeUi::Draw() { void ImeUi::Draw() {
std::unique_lock lock{draw_mutex}; std::unique_lock lock{draw_mutex};
if (!state) { if (!state) {
return; return;
} }
const auto& ctx = *GetCurrentContext(); const auto& ctx = *GetCurrentContext();
const auto& io = ctx.IO; const auto& io = ctx.IO;
// TODO: Figure out how to properly translate the positions - // TODO: Figure out how to properly translate the positions -
// for example, if a game wants to center the IME panel, // for example, if a game wants to center the IME panel,
// we have to translate the panel position in a way that it // we have to translate the panel position in a way that it
// still becomes centered, as the game normally calculates // still becomes centered, as the game normally calculates
// the position assuming a it's running on a 1920x1080 screen, // the position assuming a it's running on a 1920x1080 screen,
// whereas we are running on a 1280x720 window size (by default). // whereas we are running on a 1280x720 window size (by default).
// //
// e.g. Panel position calculation from a game: // e.g. Panel position calculation from a game:
// param.posx = (1920 / 2) - (panelWidth / 2); // param.posx = (1920 / 2) - (panelWidth / 2);
// param.posy = (1080 / 2) - (panelHeight / 2); // param.posy = (1080 / 2) - (panelHeight / 2);
const auto size = GetIO().DisplaySize; const auto size = GetIO().DisplaySize;
f32 pos_x = (ime_param->posx / 1920.0f * (float)size.x); f32 pos_x = (ime_param->posx / 1920.0f * (float)size.x);
f32 pos_y = (ime_param->posy / 1080.0f * (float)size.y); f32 pos_y = (ime_param->posy / 1080.0f * (float)size.y);
ImVec2 window_pos = {pos_x, pos_y}; ImVec2 window_pos = {pos_x, pos_y};
ImVec2 window_size = {500.0f, 100.0f}; ImVec2 window_size = {500.0f, 100.0f};
// SetNextWindowPos(window_pos); // SetNextWindowPos(window_pos);
SetNextWindowPos(ImVec2(io.DisplaySize.x * 0.5f, io.DisplaySize.y * 0.5f), SetNextWindowPos(ImVec2(io.DisplaySize.x * 0.5f, io.DisplaySize.y * 0.5f),
ImGuiCond_FirstUseEver, ImVec2(0.5f, 0.5f)); ImGuiCond_FirstUseEver, ImVec2(0.5f, 0.5f));
SetNextWindowSize(window_size); SetNextWindowSize(window_size);
SetNextWindowCollapsed(false); SetNextWindowCollapsed(false);
if (first_render || !io.NavActive) { if (first_render || !io.NavActive) {
SetNextWindowFocus(); SetNextWindowFocus();
} }
if (Begin("IME##Ime", nullptr, if (Begin("IME##Ime", nullptr,
ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize |
ImGuiWindowFlags_NoSavedSettings)) { ImGuiWindowFlags_NoSavedSettings)) {
DrawPrettyBackground(); DrawPrettyBackground();
DrawInputText(); DrawInputText();
SetCursorPosY(GetCursorPosY() + 10.0f); SetCursorPosY(GetCursorPosY() + 10.0f);
const char* button_text; const char* button_text;
button_text = "Done##ImeDone"; button_text = "Done##ImeDone";
float button_spacing = 10.0f; float button_spacing = 10.0f;
float total_button_width = BUTTON_SIZE.x * 2 + button_spacing; float total_button_width = BUTTON_SIZE.x * 2 + button_spacing;
float button_start_pos = (window_size.x - total_button_width) / 2.0f; float button_start_pos = (window_size.x - total_button_width) / 2.0f;
SetCursorPosX(button_start_pos); SetCursorPosX(button_start_pos);
if (Button(button_text, BUTTON_SIZE) || (IsKeyPressed(ImGuiKey_Enter))) { if (Button(button_text, BUTTON_SIZE) || (IsKeyPressed(ImGuiKey_Enter))) {
state->SendEnterEvent(); state->SendEnterEvent();
} }
SameLine(0.0f, button_spacing); SameLine(0.0f, button_spacing);
if (Button("Close##ImeClose", BUTTON_SIZE)) { if (Button("Close##ImeClose", BUTTON_SIZE)) {
state->SendCloseEvent(); state->SendCloseEvent();
} }
} }
End(); End();
first_render = false; first_render = false;
} }
void ImeUi::DrawInputText() { void ImeUi::DrawInputText() {
ImVec2 input_size = {GetWindowWidth() - 40.0f, 0.0f}; ImVec2 input_size = {GetWindowWidth() - 40.0f, 0.0f};
SetCursorPosX(20.0f); SetCursorPosX(20.0f);
if (first_render) { if (first_render) {
SetKeyboardFocusHere(); SetKeyboardFocusHere();
} }
if (InputTextEx("##ImeInput", nullptr, state->current_text.begin(), ime_param->maxTextLength, if (InputTextEx("##ImeInput", nullptr, state->current_text.begin(), ime_param->maxTextLength,
input_size, ImGuiInputTextFlags_CallbackAlways, InputTextCallback, this)) { input_size, ImGuiInputTextFlags_CallbackAlways, InputTextCallback, this)) {
} }
} }
int ImeUi::InputTextCallback(ImGuiInputTextCallbackData* data) { int ImeUi::InputTextCallback(ImGuiInputTextCallbackData* data) {
ImeUi* ui = static_cast<ImeUi*>(data->UserData); ImeUi* ui = static_cast<ImeUi*>(data->UserData);
ASSERT(ui); ASSERT(ui);
static std::string lastText; static std::string lastText;
std::string currentText(data->Buf, data->BufTextLen); std::string currentText(data->Buf, data->BufTextLen);
if (currentText != lastText) { if (currentText != lastText) {
OrbisImeEditText eventParam{}; OrbisImeEditText eventParam{};
eventParam.str = reinterpret_cast<char16_t*>(ui->ime_param->work); eventParam.str = reinterpret_cast<char16_t*>(ui->ime_param->work);
eventParam.caret_index = data->CursorPos; eventParam.caret_index = data->CursorPos;
eventParam.area_num = 1; eventParam.area_num = 1;
eventParam.text_area[0].mode = 1; // Edit mode eventParam.text_area[0].mode = 1; // Edit mode
eventParam.text_area[0].index = data->CursorPos; eventParam.text_area[0].index = data->CursorPos;
eventParam.text_area[0].length = data->BufTextLen; eventParam.text_area[0].length = data->BufTextLen;
if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, eventParam.str, if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, eventParam.str,
ui->ime_param->maxTextLength)) { ui->ime_param->maxTextLength)) {
LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8"); LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8");
return 0; return 0;
} }
if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen,
ui->ime_param->inputTextBuffer, ui->ime_param->inputTextBuffer,
ui->ime_param->maxTextLength)) { ui->ime_param->maxTextLength)) {
LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8"); LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8");
return 0; return 0;
} }
OrbisImeEvent event{}; OrbisImeEvent event{};
event.id = OrbisImeEventId::UpdateText; event.id = OrbisImeEventId::UpdateText;
event.param.text = eventParam; event.param.text = eventParam;
lastText = currentText; lastText = currentText;
ui->state->SendEvent(&event); ui->state->SendEvent(&event);
} }
static int lastCaretPos = -1; static int lastCaretPos = -1;
if (lastCaretPos == -1) { if (lastCaretPos == -1) {
lastCaretPos = data->CursorPos; lastCaretPos = data->CursorPos;
} else if (data->CursorPos != lastCaretPos) { } else if (data->CursorPos != lastCaretPos) {
OrbisImeCaretMovementDirection caretDirection = OrbisImeCaretMovementDirection::Still; OrbisImeCaretMovementDirection caretDirection = OrbisImeCaretMovementDirection::Still;
if (data->CursorPos < lastCaretPos) { if (data->CursorPos < lastCaretPos) {
caretDirection = OrbisImeCaretMovementDirection::Left; caretDirection = OrbisImeCaretMovementDirection::Left;
} else if (data->CursorPos > lastCaretPos) { } else if (data->CursorPos > lastCaretPos) {
caretDirection = OrbisImeCaretMovementDirection::Right; caretDirection = OrbisImeCaretMovementDirection::Right;
} }
OrbisImeEvent event{}; OrbisImeEvent event{};
event.id = OrbisImeEventId::UpdateCaret; event.id = OrbisImeEventId::UpdateCaret;
event.param.caret_move = caretDirection; event.param.caret_move = caretDirection;
lastCaretPos = data->CursorPos; lastCaretPos = data->CursorPos;
ui->state->SendEvent(&event); ui->state->SendEvent(&event);
} }
return 0; return 0;
} }
void ImeUi::Free() { void ImeUi::Free() {
RemoveLayer(this); RemoveLayer(this);
} }
}; // namespace Libraries::Ime }; // namespace Libraries::Ime

View file

@ -1,76 +1,76 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <mutex> #include <mutex>
#include <imgui.h> #include <imgui.h>
#include <queue> #include <queue>
#include "imgui/imgui_layer.h" #include "imgui/imgui_layer.h"
#include "common/cstring.h" #include "common/cstring.h"
#include "common/types.h" #include "common/types.h"
#include "ime.h" #include "ime.h"
namespace Libraries::Ime { namespace Libraries::Ime {
class ImeHandler; class ImeHandler;
class ImeUi; class ImeUi;
class ImeState { class ImeState {
friend class ImeHandler; friend class ImeHandler;
friend class ImeUi; friend class ImeUi;
void* work_buffer{}; void* work_buffer{};
char16_t* text_buffer{}; char16_t* text_buffer{};
// A character can hold up to 4 bytes in UTF-8 // A character can hold up to 4 bytes in UTF-8
Common::CString<ORBIS_IME_MAX_TEXT_LENGTH * 4> current_text; Common::CString<ORBIS_IME_MAX_TEXT_LENGTH * 4> current_text;
std::queue<OrbisImeEvent> event_queue; std::queue<OrbisImeEvent> event_queue;
std::mutex queue_mutex; std::mutex queue_mutex;
public: public:
ImeState(const OrbisImeParam* param = nullptr); ImeState(const OrbisImeParam* param = nullptr);
ImeState(ImeState&& other) noexcept; ImeState(ImeState&& other) noexcept;
ImeState& operator=(ImeState&& other) noexcept; ImeState& operator=(ImeState&& other) noexcept;
void SendEvent(OrbisImeEvent* event); void SendEvent(OrbisImeEvent* event);
void SendEnterEvent(); void SendEnterEvent();
void SendCloseEvent(); void SendCloseEvent();
void SetText(const char16_t* text, u32 length); void SetText(const char16_t* text, u32 length);
void SetCaret(u32 position); void SetCaret(u32 position);
private: private:
bool ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, char* utf8_text, bool ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, char* utf8_text,
std::size_t native_text_len); std::size_t native_text_len);
bool ConvertUTF8ToOrbis(const char* native_text, std::size_t utf8_text_len, bool ConvertUTF8ToOrbis(const char* native_text, std::size_t utf8_text_len,
char16_t* orbis_text, std::size_t orbis_text_len); char16_t* orbis_text, std::size_t orbis_text_len);
}; };
class ImeUi : public ImGui::Layer { class ImeUi : public ImGui::Layer {
ImeState* state{}; ImeState* state{};
const OrbisImeParam* ime_param{}; const OrbisImeParam* ime_param{};
bool first_render = true; bool first_render = true;
std::mutex draw_mutex; std::mutex draw_mutex;
public: public:
explicit ImeUi(ImeState* state = nullptr, const OrbisImeParam* param = nullptr); explicit ImeUi(ImeState* state = nullptr, const OrbisImeParam* param = nullptr);
~ImeUi() override; ~ImeUi() override;
ImeUi(const ImeUi& other) = delete; ImeUi(const ImeUi& other) = delete;
ImeUi& operator=(ImeUi&& other); ImeUi& operator=(ImeUi&& other);
void Draw() override; void Draw() override;
private: private:
void Free(); void Free();
void DrawInputText(); void DrawInputText();
static int InputTextCallback(ImGuiInputTextCallbackData* data); static int InputTextCallback(ImGuiInputTextCallbackData* data);
}; };
}; // namespace Libraries::Ime }; // namespace Libraries::Ime

View file

@ -1,52 +1,52 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "mutex.h" #include "mutex.h"
#include "common/assert.h" #include "common/assert.h"
namespace Libraries::Kernel { namespace Libraries::Kernel {
TimedMutex::TimedMutex() { TimedMutex::TimedMutex() {
#ifdef _WIN64 #ifdef _WIN64
mtx = CreateMutex(nullptr, false, nullptr); mtx = CreateMutex(nullptr, false, nullptr);
ASSERT(mtx); ASSERT(mtx);
#endif #endif
} }
TimedMutex::~TimedMutex() { TimedMutex::~TimedMutex() {
#ifdef _WIN64 #ifdef _WIN64
CloseHandle(mtx); CloseHandle(mtx);
#endif #endif
} }
void TimedMutex::lock() { void TimedMutex::lock() {
#ifdef _WIN64 #ifdef _WIN64
for (;;) { for (;;) {
u64 res = WaitForSingleObjectEx(mtx, INFINITE, true); u64 res = WaitForSingleObjectEx(mtx, INFINITE, true);
if (res == WAIT_OBJECT_0) { if (res == WAIT_OBJECT_0) {
return; return;
} }
} }
#else #else
mtx.lock(); mtx.lock();
#endif #endif
} }
bool TimedMutex::try_lock() { bool TimedMutex::try_lock() {
#ifdef _WIN64 #ifdef _WIN64
return WaitForSingleObjectEx(mtx, 0, true) == WAIT_OBJECT_0; return WaitForSingleObjectEx(mtx, 0, true) == WAIT_OBJECT_0;
#else #else
return mtx.try_lock(); return mtx.try_lock();
#endif #endif
} }
void TimedMutex::unlock() { void TimedMutex::unlock() {
#ifdef _WIN64 #ifdef _WIN64
ReleaseMutex(mtx); ReleaseMutex(mtx);
#else #else
mtx.unlock(); mtx.unlock();
#endif #endif
} }
} // namespace Libraries::Kernel } // namespace Libraries::Kernel

View file

@ -1,80 +1,80 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <chrono> #include <chrono>
#include "common/types.h" #include "common/types.h"
#ifdef _WIN64 #ifdef _WIN64
#include <windows.h> #include <windows.h>
#else #else
#include <mutex> #include <mutex>
#endif #endif
namespace Libraries::Kernel { namespace Libraries::Kernel {
class TimedMutex { class TimedMutex {
public: public:
TimedMutex(); TimedMutex();
~TimedMutex(); ~TimedMutex();
void lock(); void lock();
bool try_lock(); bool try_lock();
void unlock(); void unlock();
template <class Rep, class Period> template <class Rep, class Period>
bool try_lock_for(const std::chrono::duration<Rep, Period>& rel_time) { bool try_lock_for(const std::chrono::duration<Rep, Period>& rel_time) {
#ifdef _WIN64 #ifdef _WIN64
constexpr auto zero = std::chrono::duration<Rep, Period>::zero(); constexpr auto zero = std::chrono::duration<Rep, Period>::zero();
const auto now = std::chrono::steady_clock::now(); const auto now = std::chrono::steady_clock::now();
std::chrono::steady_clock::time_point abs_time = now; std::chrono::steady_clock::time_point abs_time = now;
if (rel_time > zero) { if (rel_time > zero) {
constexpr auto max = (std::chrono::steady_clock::time_point::max)(); constexpr auto max = (std::chrono::steady_clock::time_point::max)();
if (abs_time < max - rel_time) { if (abs_time < max - rel_time) {
abs_time += rel_time; abs_time += rel_time;
} else { } else {
abs_time = max; abs_time = max;
} }
} }
return try_lock_until(abs_time); return try_lock_until(abs_time);
#else #else
return mtx.try_lock_for(rel_time); return mtx.try_lock_for(rel_time);
#endif #endif
} }
template <class Clock, class Duration> template <class Clock, class Duration>
bool try_lock_until(const std::chrono::time_point<Clock, Duration>& abs_time) { bool try_lock_until(const std::chrono::time_point<Clock, Duration>& abs_time) {
#ifdef _WIN64 #ifdef _WIN64
for (;;) { for (;;) {
const auto now = Clock::now(); const auto now = Clock::now();
if (abs_time <= now) { if (abs_time <= now) {
return false; return false;
} }
const auto rel_ms = std::chrono::ceil<std::chrono::milliseconds>(abs_time - now); const auto rel_ms = std::chrono::ceil<std::chrono::milliseconds>(abs_time - now);
u64 res = WaitForSingleObjectEx(mtx, static_cast<u64>(rel_ms.count()), true); u64 res = WaitForSingleObjectEx(mtx, static_cast<u64>(rel_ms.count()), true);
if (res == WAIT_OBJECT_0) { if (res == WAIT_OBJECT_0) {
return true; return true;
} else if (res == WAIT_TIMEOUT) { } else if (res == WAIT_TIMEOUT) {
return false; return false;
} }
} }
#else #else
return mtx.try_lock_until(abs_time); return mtx.try_lock_until(abs_time);
#endif #endif
} }
private: private:
#ifdef _WIN64 #ifdef _WIN64
HANDLE mtx; HANDLE mtx;
#else #else
std::timed_mutex mtx; std::timed_mutex mtx;
#endif #endif
}; };
} // namespace Libraries::Kernel } // namespace Libraries::Kernel

View file

@ -1,167 +1,167 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <atomic> #include <atomic>
#include <chrono> #include <chrono>
#include "common/assert.h" #include "common/assert.h"
#include "common/types.h" #include "common/types.h"
#ifdef _WIN64 #ifdef _WIN64
#include <windows.h> #include <windows.h>
#elif defined(__APPLE__) #elif defined(__APPLE__)
#include <dispatch/dispatch.h> #include <dispatch/dispatch.h>
#else #else
#include <semaphore> #include <semaphore>
#endif #endif
namespace Libraries::Kernel { namespace Libraries::Kernel {
template <s64 max> template <s64 max>
class Semaphore { class Semaphore {
public: public:
Semaphore(s32 initialCount) Semaphore(s32 initialCount)
#if !defined(_WIN64) && !defined(__APPLE__) #if !defined(_WIN64) && !defined(__APPLE__)
: sem{initialCount} : sem{initialCount}
#endif #endif
{ {
#ifdef _WIN64 #ifdef _WIN64
sem = CreateSemaphore(nullptr, initialCount, max, nullptr); sem = CreateSemaphore(nullptr, initialCount, max, nullptr);
ASSERT(sem); ASSERT(sem);
#elif defined(__APPLE__) #elif defined(__APPLE__)
sem = dispatch_semaphore_create(initialCount); sem = dispatch_semaphore_create(initialCount);
ASSERT(sem); ASSERT(sem);
#endif #endif
} }
~Semaphore() { ~Semaphore() {
#ifdef _WIN64 #ifdef _WIN64
CloseHandle(sem); CloseHandle(sem);
#elif defined(__APPLE__) #elif defined(__APPLE__)
dispatch_release(sem); dispatch_release(sem);
#endif #endif
} }
void release() { void release() {
#ifdef _WIN64 #ifdef _WIN64
ReleaseSemaphore(sem, 1, nullptr); ReleaseSemaphore(sem, 1, nullptr);
#elif defined(__APPLE__) #elif defined(__APPLE__)
dispatch_semaphore_signal(sem); dispatch_semaphore_signal(sem);
#else #else
sem.release(); sem.release();
#endif #endif
} }
void acquire() { void acquire() {
#ifdef _WIN64 #ifdef _WIN64
for (;;) { for (;;) {
u64 res = WaitForSingleObjectEx(sem, INFINITE, true); u64 res = WaitForSingleObjectEx(sem, INFINITE, true);
if (res == WAIT_OBJECT_0) { if (res == WAIT_OBJECT_0) {
return; return;
} }
} }
#elif defined(__APPLE__) #elif defined(__APPLE__)
for (;;) { for (;;) {
const auto res = dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER); const auto res = dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
if (res == 0) { if (res == 0) {
return; return;
} }
} }
#else #else
sem.acquire(); sem.acquire();
#endif #endif
} }
bool try_acquire() { bool try_acquire() {
#ifdef _WIN64 #ifdef _WIN64
return WaitForSingleObjectEx(sem, 0, true) == WAIT_OBJECT_0; return WaitForSingleObjectEx(sem, 0, true) == WAIT_OBJECT_0;
#elif defined(__APPLE__) #elif defined(__APPLE__)
return dispatch_semaphore_wait(sem, DISPATCH_TIME_NOW) == 0; return dispatch_semaphore_wait(sem, DISPATCH_TIME_NOW) == 0;
#else #else
return sem.try_acquire(); return sem.try_acquire();
#endif #endif
} }
template <class Rep, class Period> template <class Rep, class Period>
bool try_acquire_for(const std::chrono::duration<Rep, Period>& rel_time) { bool try_acquire_for(const std::chrono::duration<Rep, Period>& rel_time) {
#ifdef _WIN64 #ifdef _WIN64
const auto start_time = std::chrono::high_resolution_clock::now(); const auto start_time = std::chrono::high_resolution_clock::now();
auto rel_time_ms = std::chrono::ceil<std::chrono::milliseconds>(rel_time); auto rel_time_ms = std::chrono::ceil<std::chrono::milliseconds>(rel_time);
while (rel_time_ms.count() > 0) { while (rel_time_ms.count() > 0) {
u64 timeout_ms = static_cast<u64>(rel_time_ms.count()); u64 timeout_ms = static_cast<u64>(rel_time_ms.count());
u64 res = WaitForSingleObjectEx(sem, timeout_ms, true); u64 res = WaitForSingleObjectEx(sem, timeout_ms, true);
if (res == WAIT_OBJECT_0) { if (res == WAIT_OBJECT_0) {
return true; return true;
} else if (res == WAIT_IO_COMPLETION) { } else if (res == WAIT_IO_COMPLETION) {
auto elapsed_time = std::chrono::high_resolution_clock::now() - start_time; auto elapsed_time = std::chrono::high_resolution_clock::now() - start_time;
rel_time_ms -= std::chrono::duration_cast<std::chrono::milliseconds>(elapsed_time); rel_time_ms -= std::chrono::duration_cast<std::chrono::milliseconds>(elapsed_time);
} else { } else {
return false; return false;
} }
} }
return false; return false;
#elif defined(__APPLE__) #elif defined(__APPLE__)
const auto rel_time_ns = std::chrono::ceil<std::chrono::nanoseconds>(rel_time).count(); const auto rel_time_ns = std::chrono::ceil<std::chrono::nanoseconds>(rel_time).count();
const auto timeout = dispatch_time(DISPATCH_TIME_NOW, rel_time_ns); const auto timeout = dispatch_time(DISPATCH_TIME_NOW, rel_time_ns);
return dispatch_semaphore_wait(sem, timeout) == 0; return dispatch_semaphore_wait(sem, timeout) == 0;
#else #else
return sem.try_acquire_for(rel_time); return sem.try_acquire_for(rel_time);
#endif #endif
} }
template <class Clock, class Duration> template <class Clock, class Duration>
bool try_acquire_until(const std::chrono::time_point<Clock, Duration>& abs_time) { bool try_acquire_until(const std::chrono::time_point<Clock, Duration>& abs_time) {
#ifdef _WIN64 #ifdef _WIN64
const auto start_time = Clock::now(); const auto start_time = Clock::now();
if (start_time >= abs_time) { if (start_time >= abs_time) {
return false; return false;
} }
auto rel_time = std::chrono::ceil<std::chrono::milliseconds>(abs_time - start_time); auto rel_time = std::chrono::ceil<std::chrono::milliseconds>(abs_time - start_time);
while (rel_time.count() > 0) { while (rel_time.count() > 0) {
u64 timeout_ms = static_cast<u64>(rel_time.count()); u64 timeout_ms = static_cast<u64>(rel_time.count());
u64 res = WaitForSingleObjectEx(sem, timeout_ms, true); u64 res = WaitForSingleObjectEx(sem, timeout_ms, true);
if (res == WAIT_OBJECT_0) { if (res == WAIT_OBJECT_0) {
return true; return true;
} else if (res == WAIT_IO_COMPLETION) { } else if (res == WAIT_IO_COMPLETION) {
auto elapsed_time = Clock::now() - start_time; auto elapsed_time = Clock::now() - start_time;
rel_time -= std::chrono::duration_cast<std::chrono::milliseconds>(elapsed_time); rel_time -= std::chrono::duration_cast<std::chrono::milliseconds>(elapsed_time);
} else { } else {
return false; return false;
} }
} }
return false; return false;
#elif defined(__APPLE__) #elif defined(__APPLE__)
auto abs_s = std::chrono::time_point_cast<std::chrono::seconds>(abs_time); auto abs_s = std::chrono::time_point_cast<std::chrono::seconds>(abs_time);
auto abs_ns = std::chrono::time_point_cast<std::chrono::nanoseconds>(abs_time) - auto abs_ns = std::chrono::time_point_cast<std::chrono::nanoseconds>(abs_time) -
std::chrono::time_point_cast<std::chrono::nanoseconds>(abs_s); std::chrono::time_point_cast<std::chrono::nanoseconds>(abs_s);
const timespec abs_timespec = { const timespec abs_timespec = {
.tv_sec = abs_s.time_since_epoch().count(), .tv_sec = abs_s.time_since_epoch().count(),
.tv_nsec = abs_ns.count(), .tv_nsec = abs_ns.count(),
}; };
const auto timeout = dispatch_walltime(&abs_timespec, 0); const auto timeout = dispatch_walltime(&abs_timespec, 0);
return dispatch_semaphore_wait(sem, timeout) == 0; return dispatch_semaphore_wait(sem, timeout) == 0;
#else #else
return sem.try_acquire_until(abs_time); return sem.try_acquire_until(abs_time);
#endif #endif
} }
private: private:
#ifdef _WIN64 #ifdef _WIN64
HANDLE sem; HANDLE sem;
#elif defined(__APPLE__) #elif defined(__APPLE__)
dispatch_semaphore_t sem; dispatch_semaphore_t sem;
#else #else
std::counting_semaphore<max> sem; std::counting_semaphore<max> sem;
#endif #endif
}; };
using BinarySemaphore = Semaphore<1>; using BinarySemaphore = Semaphore<1>;
using CountingSemaphore = Semaphore<0x7FFFFFFF /*ORBIS_KERNEL_SEM_VALUE_MAX*/>; using CountingSemaphore = Semaphore<0x7FFFFFFF /*ORBIS_KERNEL_SEM_VALUE_MAX*/>;
} // namespace Libraries::Kernel } // namespace Libraries::Kernel

View file

@ -1,199 +1,199 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/libraries/libs.h" #include "core/libraries/libs.h"
#include "core/libraries/videodec/videodec2.h" #include "core/libraries/videodec/videodec2.h"
#include "core/libraries/videodec/videodec2_impl.h" #include "core/libraries/videodec/videodec2_impl.h"
#include "core/libraries/videodec/videodec_error.h" #include "core/libraries/videodec/videodec_error.h"
namespace Libraries::Vdec2 { namespace Libraries::Vdec2 {
static constexpr u64 kMinimumMemorySize = 32_MB; ///> Fake minimum memory size for querying static constexpr u64 kMinimumMemorySize = 32_MB; ///> Fake minimum memory size for querying
s32 PS4_SYSV_ABI s32 PS4_SYSV_ABI
sceVideodec2QueryComputeMemoryInfo(OrbisVideodec2ComputeMemoryInfo* computeMemInfo) { sceVideodec2QueryComputeMemoryInfo(OrbisVideodec2ComputeMemoryInfo* computeMemInfo) {
LOG_INFO(Lib_Vdec2, "called"); LOG_INFO(Lib_Vdec2, "called");
if (!computeMemInfo) { if (!computeMemInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
} }
if (computeMemInfo->thisSize != sizeof(OrbisVideodec2ComputeMemoryInfo)) { if (computeMemInfo->thisSize != sizeof(OrbisVideodec2ComputeMemoryInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
} }
computeMemInfo->cpuGpuMemory = nullptr; computeMemInfo->cpuGpuMemory = nullptr;
computeMemInfo->cpuGpuMemorySize = kMinimumMemorySize; computeMemInfo->cpuGpuMemorySize = kMinimumMemorySize;
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI s32 PS4_SYSV_ABI
sceVideodec2AllocateComputeQueue(const OrbisVideodec2ComputeConfigInfo* computeCfgInfo, sceVideodec2AllocateComputeQueue(const OrbisVideodec2ComputeConfigInfo* computeCfgInfo,
const OrbisVideodec2ComputeMemoryInfo* computeMemInfo, const OrbisVideodec2ComputeMemoryInfo* computeMemInfo,
OrbisVideodec2ComputeQueue* computeQueue) { OrbisVideodec2ComputeQueue* computeQueue) {
LOG_INFO(Lib_Vdec2, "called"); LOG_INFO(Lib_Vdec2, "called");
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI sceVideodec2ReleaseComputeQueue(OrbisVideodec2ComputeQueue computeQueue) { s32 PS4_SYSV_ABI sceVideodec2ReleaseComputeQueue(OrbisVideodec2ComputeQueue computeQueue) {
LOG_INFO(Lib_Vdec2, "called"); LOG_INFO(Lib_Vdec2, "called");
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI s32 PS4_SYSV_ABI
sceVideodec2QueryDecoderMemoryInfo(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo, sceVideodec2QueryDecoderMemoryInfo(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo,
OrbisVideodec2DecoderMemoryInfo* decoderMemInfo) { OrbisVideodec2DecoderMemoryInfo* decoderMemInfo) {
LOG_INFO(Lib_Vdec2, "called"); LOG_INFO(Lib_Vdec2, "called");
if (!decoderCfgInfo || !decoderMemInfo) { if (!decoderCfgInfo || !decoderMemInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
} }
if (decoderCfgInfo->thisSize != sizeof(OrbisVideodec2DecoderConfigInfo) || if (decoderCfgInfo->thisSize != sizeof(OrbisVideodec2DecoderConfigInfo) ||
decoderMemInfo->thisSize != sizeof(OrbisVideodec2DecoderMemoryInfo)) { decoderMemInfo->thisSize != sizeof(OrbisVideodec2DecoderMemoryInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
} }
decoderMemInfo->cpuMemory = nullptr; decoderMemInfo->cpuMemory = nullptr;
decoderMemInfo->gpuMemory = nullptr; decoderMemInfo->gpuMemory = nullptr;
decoderMemInfo->cpuGpuMemory = nullptr; decoderMemInfo->cpuGpuMemory = nullptr;
decoderMemInfo->cpuGpuMemorySize = kMinimumMemorySize; decoderMemInfo->cpuGpuMemorySize = kMinimumMemorySize;
decoderMemInfo->cpuMemorySize = kMinimumMemorySize; decoderMemInfo->cpuMemorySize = kMinimumMemorySize;
decoderMemInfo->gpuMemorySize = kMinimumMemorySize; decoderMemInfo->gpuMemorySize = kMinimumMemorySize;
decoderMemInfo->maxFrameBufferSize = kMinimumMemorySize; decoderMemInfo->maxFrameBufferSize = kMinimumMemorySize;
decoderMemInfo->frameBufferAlignment = 0x100; decoderMemInfo->frameBufferAlignment = 0x100;
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI sceVideodec2CreateDecoder(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo, s32 PS4_SYSV_ABI sceVideodec2CreateDecoder(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo,
const OrbisVideodec2DecoderMemoryInfo* decoderMemInfo, const OrbisVideodec2DecoderMemoryInfo* decoderMemInfo,
OrbisVideodec2Decoder* decoder) { OrbisVideodec2Decoder* decoder) {
LOG_INFO(Lib_Vdec2, "called"); LOG_INFO(Lib_Vdec2, "called");
if (!decoderCfgInfo || !decoderMemInfo || !decoder) { if (!decoderCfgInfo || !decoderMemInfo || !decoder) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
} }
if (decoderCfgInfo->thisSize != sizeof(OrbisVideodec2DecoderConfigInfo) || if (decoderCfgInfo->thisSize != sizeof(OrbisVideodec2DecoderConfigInfo) ||
decoderMemInfo->thisSize != sizeof(OrbisVideodec2DecoderMemoryInfo)) { decoderMemInfo->thisSize != sizeof(OrbisVideodec2DecoderMemoryInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
} }
*decoder = new VdecDecoder(*decoderCfgInfo, *decoderMemInfo); *decoder = new VdecDecoder(*decoderCfgInfo, *decoderMemInfo);
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI sceVideodec2DeleteDecoder(OrbisVideodec2Decoder decoder) { s32 PS4_SYSV_ABI sceVideodec2DeleteDecoder(OrbisVideodec2Decoder decoder) {
LOG_INFO(Lib_Vdec2, "called"); LOG_INFO(Lib_Vdec2, "called");
if (!decoder) { if (!decoder) {
return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE; return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE;
} }
delete decoder; delete decoder;
return ORBIS_OK; return ORBIS_OK;
} }
s32 PS4_SYSV_ABI sceVideodec2Decode(OrbisVideodec2Decoder decoder, s32 PS4_SYSV_ABI sceVideodec2Decode(OrbisVideodec2Decoder decoder,
const OrbisVideodec2InputData* inputData, const OrbisVideodec2InputData* inputData,
OrbisVideodec2FrameBuffer* frameBuffer, OrbisVideodec2FrameBuffer* frameBuffer,
OrbisVideodec2OutputInfo* outputInfo) { OrbisVideodec2OutputInfo* outputInfo) {
LOG_TRACE(Lib_Vdec2, "called"); LOG_TRACE(Lib_Vdec2, "called");
if (!decoder) { if (!decoder) {
return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE; return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE;
} }
if (!inputData || !frameBuffer || !outputInfo) { if (!inputData || !frameBuffer || !outputInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
} }
if (inputData->thisSize != sizeof(OrbisVideodec2InputData) || if (inputData->thisSize != sizeof(OrbisVideodec2InputData) ||
frameBuffer->thisSize != sizeof(OrbisVideodec2FrameBuffer)) { frameBuffer->thisSize != sizeof(OrbisVideodec2FrameBuffer)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
} }
return decoder->Decode(*inputData, *frameBuffer, *outputInfo); return decoder->Decode(*inputData, *frameBuffer, *outputInfo);
} }
s32 PS4_SYSV_ABI sceVideodec2Flush(OrbisVideodec2Decoder decoder, s32 PS4_SYSV_ABI sceVideodec2Flush(OrbisVideodec2Decoder decoder,
OrbisVideodec2FrameBuffer* frameBuffer, OrbisVideodec2FrameBuffer* frameBuffer,
OrbisVideodec2OutputInfo* outputInfo) { OrbisVideodec2OutputInfo* outputInfo) {
LOG_INFO(Lib_Vdec2, "called"); LOG_INFO(Lib_Vdec2, "called");
if (!decoder) { if (!decoder) {
return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE; return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE;
} }
if (!frameBuffer || !outputInfo) { if (!frameBuffer || !outputInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
} }
if (frameBuffer->thisSize != sizeof(OrbisVideodec2FrameBuffer) || if (frameBuffer->thisSize != sizeof(OrbisVideodec2FrameBuffer) ||
outputInfo->thisSize != sizeof(OrbisVideodec2OutputInfo)) { outputInfo->thisSize != sizeof(OrbisVideodec2OutputInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
} }
return decoder->Flush(*frameBuffer, *outputInfo); return decoder->Flush(*frameBuffer, *outputInfo);
} }
s32 PS4_SYSV_ABI sceVideodec2Reset(OrbisVideodec2Decoder decoder) { s32 PS4_SYSV_ABI sceVideodec2Reset(OrbisVideodec2Decoder decoder) {
LOG_INFO(Lib_Vdec2, "called"); LOG_INFO(Lib_Vdec2, "called");
if (!decoder) { if (!decoder) {
return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE; return ORBIS_VIDEODEC2_ERROR_DECODER_INSTANCE;
} }
return decoder->Reset(); return decoder->Reset();
} }
s32 PS4_SYSV_ABI sceVideodec2GetPictureInfo(const OrbisVideodec2OutputInfo* outputInfo, s32 PS4_SYSV_ABI sceVideodec2GetPictureInfo(const OrbisVideodec2OutputInfo* outputInfo,
void* p1stPictureInfoOut, void* p2ndPictureInfoOut) { void* p1stPictureInfoOut, void* p2ndPictureInfoOut) {
LOG_TRACE(Lib_Vdec2, "called"); LOG_TRACE(Lib_Vdec2, "called");
if (!outputInfo) { if (!outputInfo) {
return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER; return ORBIS_VIDEODEC2_ERROR_ARGUMENT_POINTER;
} }
if (outputInfo->thisSize != sizeof(OrbisVideodec2OutputInfo)) { if (outputInfo->thisSize != sizeof(OrbisVideodec2OutputInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
} }
if (outputInfo->pictureCount == 0 || gPictureInfos.empty()) { if (outputInfo->pictureCount == 0 || gPictureInfos.empty()) {
return ORBIS_OK; return ORBIS_OK;
} }
if (p1stPictureInfoOut) { if (p1stPictureInfoOut) {
OrbisVideodec2AvcPictureInfo* picInfo = OrbisVideodec2AvcPictureInfo* picInfo =
static_cast<OrbisVideodec2AvcPictureInfo*>(p1stPictureInfoOut); static_cast<OrbisVideodec2AvcPictureInfo*>(p1stPictureInfoOut);
if (picInfo->thisSize != sizeof(OrbisVideodec2AvcPictureInfo)) { if (picInfo->thisSize != sizeof(OrbisVideodec2AvcPictureInfo)) {
return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE; return ORBIS_VIDEODEC2_ERROR_STRUCT_SIZE;
} }
*picInfo = gPictureInfos.back(); *picInfo = gPictureInfos.back();
} }
if (outputInfo->pictureCount > 1) { if (outputInfo->pictureCount > 1) {
UNREACHABLE(); UNREACHABLE();
} }
return ORBIS_OK; return ORBIS_OK;
} }
void RegisterlibSceVdec2(Core::Loader::SymbolsResolver* sym) { void RegisterlibSceVdec2(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("RnDibcGCPKw", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, LIB_FUNCTION("RnDibcGCPKw", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2QueryComputeMemoryInfo); sceVideodec2QueryComputeMemoryInfo);
LIB_FUNCTION("eD+X2SmxUt4", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, LIB_FUNCTION("eD+X2SmxUt4", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2AllocateComputeQueue); sceVideodec2AllocateComputeQueue);
LIB_FUNCTION("UvtA3FAiF4Y", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, LIB_FUNCTION("UvtA3FAiF4Y", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2ReleaseComputeQueue); sceVideodec2ReleaseComputeQueue);
LIB_FUNCTION("qqMCwlULR+E", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, LIB_FUNCTION("qqMCwlULR+E", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2QueryDecoderMemoryInfo); sceVideodec2QueryDecoderMemoryInfo);
LIB_FUNCTION("CNNRoRYd8XI", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, LIB_FUNCTION("CNNRoRYd8XI", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2CreateDecoder); sceVideodec2CreateDecoder);
LIB_FUNCTION("jwImxXRGSKA", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, LIB_FUNCTION("jwImxXRGSKA", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2DeleteDecoder); sceVideodec2DeleteDecoder);
LIB_FUNCTION("852F5+q6+iM", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Decode); LIB_FUNCTION("852F5+q6+iM", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Decode);
LIB_FUNCTION("l1hXwscLuCY", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Flush); LIB_FUNCTION("l1hXwscLuCY", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Flush);
LIB_FUNCTION("wJXikG6QFN8", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Reset); LIB_FUNCTION("wJXikG6QFN8", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, sceVideodec2Reset);
LIB_FUNCTION("NtXRa3dRzU0", "libSceVideodec2", 1, "libSceVideodec2", 1, 1, LIB_FUNCTION("NtXRa3dRzU0", "libSceVideodec2", 1, "libSceVideodec2", 1, 1,
sceVideodec2GetPictureInfo); sceVideodec2GetPictureInfo);
} }
} // namespace Libraries::Vdec2 } // namespace Libraries::Vdec2

View file

@ -1,139 +1,139 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include "common/types.h" #include "common/types.h"
#include "videodec2_avc.h" #include "videodec2_avc.h"
namespace Core::Loader { namespace Core::Loader {
class SymbolsResolver; class SymbolsResolver;
} }
namespace Libraries::Vdec2 { namespace Libraries::Vdec2 {
class VdecDecoder; class VdecDecoder;
using OrbisVideodec2Decoder = VdecDecoder*; using OrbisVideodec2Decoder = VdecDecoder*;
using OrbisVideodec2ComputeQueue = void*; using OrbisVideodec2ComputeQueue = void*;
struct OrbisVideodec2DecoderConfigInfo { struct OrbisVideodec2DecoderConfigInfo {
u64 thisSize; u64 thisSize;
u32 resourceType; u32 resourceType;
u32 codecType; u32 codecType;
u32 profile; u32 profile;
u32 maxLevel; u32 maxLevel;
s32 maxFrameWidth; s32 maxFrameWidth;
s32 maxFrameHeight; s32 maxFrameHeight;
s32 maxDpbFrameCount; s32 maxDpbFrameCount;
u32 decodePipelineDepth; u32 decodePipelineDepth;
OrbisVideodec2ComputeQueue computeQueue; OrbisVideodec2ComputeQueue computeQueue;
u64 cpuAffinityMask; u64 cpuAffinityMask;
s32 cpuThreadPriority; s32 cpuThreadPriority;
bool optimizeProgressiveVideo; bool optimizeProgressiveVideo;
bool checkMemoryType; bool checkMemoryType;
u8 reserved0; u8 reserved0;
u8 reserved1; u8 reserved1;
void* extraConfigInfo; void* extraConfigInfo;
}; };
static_assert(sizeof(OrbisVideodec2DecoderConfigInfo) == 0x48); static_assert(sizeof(OrbisVideodec2DecoderConfigInfo) == 0x48);
struct OrbisVideodec2DecoderMemoryInfo { struct OrbisVideodec2DecoderMemoryInfo {
u64 thisSize; u64 thisSize;
u64 cpuMemorySize; u64 cpuMemorySize;
void* cpuMemory; void* cpuMemory;
u64 gpuMemorySize; u64 gpuMemorySize;
void* gpuMemory; void* gpuMemory;
u64 cpuGpuMemorySize; u64 cpuGpuMemorySize;
void* cpuGpuMemory; void* cpuGpuMemory;
u64 maxFrameBufferSize; u64 maxFrameBufferSize;
u32 frameBufferAlignment; u32 frameBufferAlignment;
u32 reserved0; u32 reserved0;
}; };
static_assert(sizeof(OrbisVideodec2DecoderMemoryInfo) == 0x48); static_assert(sizeof(OrbisVideodec2DecoderMemoryInfo) == 0x48);
struct OrbisVideodec2InputData { struct OrbisVideodec2InputData {
u64 thisSize; u64 thisSize;
void* auData; void* auData;
u64 auSize; u64 auSize;
u64 ptsData; u64 ptsData;
u64 dtsData; u64 dtsData;
u64 attachedData; u64 attachedData;
}; };
static_assert(sizeof(OrbisVideodec2InputData) == 0x30); static_assert(sizeof(OrbisVideodec2InputData) == 0x30);
struct OrbisVideodec2OutputInfo { struct OrbisVideodec2OutputInfo {
u64 thisSize; u64 thisSize;
bool isValid; bool isValid;
bool isErrorFrame; bool isErrorFrame;
u8 pictureCount; u8 pictureCount;
u32 codecType; u32 codecType;
u32 frameWidth; u32 frameWidth;
u32 framePitch; u32 framePitch;
u32 frameHeight; u32 frameHeight;
void* frameBuffer; void* frameBuffer;
u64 frameBufferSize; u64 frameBufferSize;
}; };
static_assert(sizeof(OrbisVideodec2OutputInfo) == 0x30); static_assert(sizeof(OrbisVideodec2OutputInfo) == 0x30);
struct OrbisVideodec2FrameBuffer { struct OrbisVideodec2FrameBuffer {
u64 thisSize; u64 thisSize;
void* frameBuffer; void* frameBuffer;
u64 frameBufferSize; u64 frameBufferSize;
bool isAccepted; bool isAccepted;
}; };
static_assert(sizeof(OrbisVideodec2FrameBuffer) == 0x20); static_assert(sizeof(OrbisVideodec2FrameBuffer) == 0x20);
struct OrbisVideodec2ComputeMemoryInfo { struct OrbisVideodec2ComputeMemoryInfo {
u64 thisSize; u64 thisSize;
u64 cpuGpuMemorySize; u64 cpuGpuMemorySize;
void* cpuGpuMemory; void* cpuGpuMemory;
}; };
static_assert(sizeof(OrbisVideodec2ComputeMemoryInfo) == 0x18); static_assert(sizeof(OrbisVideodec2ComputeMemoryInfo) == 0x18);
struct OrbisVideodec2ComputeConfigInfo { struct OrbisVideodec2ComputeConfigInfo {
u64 thisSize; u64 thisSize;
u16 computePipeId; u16 computePipeId;
u16 computeQueueId; u16 computeQueueId;
bool checkMemoryType; bool checkMemoryType;
u8 reserved0; u8 reserved0;
u16 reserved1; u16 reserved1;
}; };
static_assert(sizeof(OrbisVideodec2ComputeConfigInfo) == 0x10); static_assert(sizeof(OrbisVideodec2ComputeConfigInfo) == 0x10);
s32 PS4_SYSV_ABI s32 PS4_SYSV_ABI
sceVideodec2QueryComputeMemoryInfo(OrbisVideodec2ComputeMemoryInfo* computeMemInfo); sceVideodec2QueryComputeMemoryInfo(OrbisVideodec2ComputeMemoryInfo* computeMemInfo);
s32 PS4_SYSV_ABI s32 PS4_SYSV_ABI
sceVideodec2AllocateComputeQueue(const OrbisVideodec2ComputeConfigInfo* computeCfgInfo, sceVideodec2AllocateComputeQueue(const OrbisVideodec2ComputeConfigInfo* computeCfgInfo,
const OrbisVideodec2ComputeMemoryInfo* computeMemInfo, const OrbisVideodec2ComputeMemoryInfo* computeMemInfo,
OrbisVideodec2ComputeQueue* computeQueue); OrbisVideodec2ComputeQueue* computeQueue);
s32 PS4_SYSV_ABI sceVideodec2ReleaseComputeQueue(OrbisVideodec2ComputeQueue computeQueue); s32 PS4_SYSV_ABI sceVideodec2ReleaseComputeQueue(OrbisVideodec2ComputeQueue computeQueue);
s32 PS4_SYSV_ABI s32 PS4_SYSV_ABI
sceVideodec2QueryDecoderMemoryInfo(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo, sceVideodec2QueryDecoderMemoryInfo(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo,
OrbisVideodec2DecoderMemoryInfo* decoderMemInfo); OrbisVideodec2DecoderMemoryInfo* decoderMemInfo);
s32 PS4_SYSV_ABI sceVideodec2CreateDecoder(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo, s32 PS4_SYSV_ABI sceVideodec2CreateDecoder(const OrbisVideodec2DecoderConfigInfo* decoderCfgInfo,
const OrbisVideodec2DecoderMemoryInfo* decoderMemInfo, const OrbisVideodec2DecoderMemoryInfo* decoderMemInfo,
OrbisVideodec2Decoder* decoder); OrbisVideodec2Decoder* decoder);
s32 PS4_SYSV_ABI sceVideodec2DeleteDecoder(OrbisVideodec2Decoder decoder); s32 PS4_SYSV_ABI sceVideodec2DeleteDecoder(OrbisVideodec2Decoder decoder);
s32 PS4_SYSV_ABI sceVideodec2Decode(OrbisVideodec2Decoder decoder, s32 PS4_SYSV_ABI sceVideodec2Decode(OrbisVideodec2Decoder decoder,
const OrbisVideodec2InputData* inputData, const OrbisVideodec2InputData* inputData,
OrbisVideodec2FrameBuffer* frameBuffer, OrbisVideodec2FrameBuffer* frameBuffer,
OrbisVideodec2OutputInfo* outputInfo); OrbisVideodec2OutputInfo* outputInfo);
s32 PS4_SYSV_ABI sceVideodec2Flush(OrbisVideodec2Decoder decoder, s32 PS4_SYSV_ABI sceVideodec2Flush(OrbisVideodec2Decoder decoder,
OrbisVideodec2FrameBuffer* frameBuffer, OrbisVideodec2FrameBuffer* frameBuffer,
OrbisVideodec2OutputInfo* outputInfo); OrbisVideodec2OutputInfo* outputInfo);
s32 PS4_SYSV_ABI sceVideodec2Reset(OrbisVideodec2Decoder decoder); s32 PS4_SYSV_ABI sceVideodec2Reset(OrbisVideodec2Decoder decoder);
s32 PS4_SYSV_ABI sceVideodec2GetPictureInfo(const OrbisVideodec2OutputInfo* outputInfo, s32 PS4_SYSV_ABI sceVideodec2GetPictureInfo(const OrbisVideodec2OutputInfo* outputInfo,
void* p1stPictureInfo, void* p2ndPictureInfo); void* p1stPictureInfo, void* p2ndPictureInfo);
void RegisterlibSceVdec2(Core::Loader::SymbolsResolver* sym); void RegisterlibSceVdec2(Core::Loader::SymbolsResolver* sym);
} // namespace Libraries::Vdec2 } // namespace Libraries::Vdec2

View file

@ -1,60 +1,60 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include "common/types.h" #include "common/types.h"
namespace Libraries::Vdec2 { namespace Libraries::Vdec2 {
struct OrbisVideodec2AvcPictureInfo { struct OrbisVideodec2AvcPictureInfo {
u64 thisSize; u64 thisSize;
bool isValid; bool isValid;
u64 ptsData; u64 ptsData;
u64 dtsData; u64 dtsData;
u64 attachedData; u64 attachedData;
u8 idrPictureflag; u8 idrPictureflag;
u8 profile_idc; u8 profile_idc;
u8 level_idc; u8 level_idc;
u32 pic_width_in_mbs_minus1; u32 pic_width_in_mbs_minus1;
u32 pic_height_in_map_units_minus1; u32 pic_height_in_map_units_minus1;
u8 frame_mbs_only_flag; u8 frame_mbs_only_flag;
u8 frame_cropping_flag; u8 frame_cropping_flag;
u32 frameCropLeftOffset; u32 frameCropLeftOffset;
u32 frameCropRightOffset; u32 frameCropRightOffset;
u32 frameCropTopOffset; u32 frameCropTopOffset;
u32 frameCropBottomOffset; u32 frameCropBottomOffset;
u8 aspect_ratio_info_present_flag; u8 aspect_ratio_info_present_flag;
u8 aspect_ratio_idc; u8 aspect_ratio_idc;
u16 sar_width; u16 sar_width;
u16 sar_height; u16 sar_height;
u8 video_signal_type_present_flag; u8 video_signal_type_present_flag;
u8 video_format; u8 video_format;
u8 video_full_range_flag; u8 video_full_range_flag;
u8 colour_description_present_flag; u8 colour_description_present_flag;
u8 colour_primaries; u8 colour_primaries;
u8 transfer_characteristics; u8 transfer_characteristics;
u8 matrix_coefficients; u8 matrix_coefficients;
u8 timing_info_present_flag; u8 timing_info_present_flag;
u32 num_units_in_tick; u32 num_units_in_tick;
u32 time_scale; u32 time_scale;
u8 fixed_frame_rate_flag; u8 fixed_frame_rate_flag;
u8 bitstream_restriction_flag; u8 bitstream_restriction_flag;
u8 max_dec_frame_buffering; u8 max_dec_frame_buffering;
u8 pic_struct_present_flag; u8 pic_struct_present_flag;
u8 pic_struct; u8 pic_struct;
u8 field_pic_flag; u8 field_pic_flag;
u8 bottom_field_flag; u8 bottom_field_flag;
}; };
} // namespace Libraries::Vdec2 } // namespace Libraries::Vdec2

View file

@ -1,229 +1,229 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "videodec2_impl.h" #include "videodec2_impl.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/libraries/videodec/videodec_error.h" #include "core/libraries/videodec/videodec_error.h"
#include "common/support/avdec.h" #include "common/support/avdec.h"
namespace Libraries::Vdec2 { namespace Libraries::Vdec2 {
std::vector<OrbisVideodec2AvcPictureInfo> gPictureInfos; std::vector<OrbisVideodec2AvcPictureInfo> gPictureInfos;
static inline void CopyNV12Data(u8* dst, const AVFrame& src) { static inline void CopyNV12Data(u8* dst, const AVFrame& src) {
std::memcpy(dst, src.data[0], src.width * src.height); std::memcpy(dst, src.data[0], src.width * src.height);
std::memcpy(dst + (src.width * src.height), src.data[1], (src.width * src.height) / 2); std::memcpy(dst + (src.width * src.height), src.data[1], (src.width * src.height) / 2);
} }
VdecDecoder::VdecDecoder(const OrbisVideodec2DecoderConfigInfo& configInfo, VdecDecoder::VdecDecoder(const OrbisVideodec2DecoderConfigInfo& configInfo,
const OrbisVideodec2DecoderMemoryInfo& memoryInfo) { const OrbisVideodec2DecoderMemoryInfo& memoryInfo) {
ASSERT(configInfo.codecType == 1); /* AVC */ ASSERT(configInfo.codecType == 1); /* AVC */
const AVCodec* codec = avcodec_find_decoder(AV_CODEC_ID_H264); const AVCodec* codec = avcodec_find_decoder(AV_CODEC_ID_H264);
ASSERT(codec); ASSERT(codec);
mCodecContext = avcodec_alloc_context3(codec); mCodecContext = avcodec_alloc_context3(codec);
ASSERT(mCodecContext); ASSERT(mCodecContext);
mCodecContext->width = configInfo.maxFrameWidth; mCodecContext->width = configInfo.maxFrameWidth;
mCodecContext->height = configInfo.maxFrameHeight; mCodecContext->height = configInfo.maxFrameHeight;
avcodec_open2(mCodecContext, codec, nullptr); avcodec_open2(mCodecContext, codec, nullptr);
} }
VdecDecoder::~VdecDecoder() { VdecDecoder::~VdecDecoder() {
avcodec_free_context(&mCodecContext); avcodec_free_context(&mCodecContext);
sws_freeContext(mSwsContext); sws_freeContext(mSwsContext);
gPictureInfos.clear(); gPictureInfos.clear();
} }
s32 VdecDecoder::Decode(const OrbisVideodec2InputData& inputData, s32 VdecDecoder::Decode(const OrbisVideodec2InputData& inputData,
OrbisVideodec2FrameBuffer& frameBuffer, OrbisVideodec2FrameBuffer& frameBuffer,
OrbisVideodec2OutputInfo& outputInfo) { OrbisVideodec2OutputInfo& outputInfo) {
frameBuffer.isAccepted = false; frameBuffer.isAccepted = false;
outputInfo.thisSize = sizeof(OrbisVideodec2OutputInfo); outputInfo.thisSize = sizeof(OrbisVideodec2OutputInfo);
outputInfo.isValid = false; outputInfo.isValid = false;
outputInfo.isErrorFrame = true; outputInfo.isErrorFrame = true;
outputInfo.pictureCount = 0; outputInfo.pictureCount = 0;
if (!inputData.auData) { if (!inputData.auData) {
return ORBIS_VIDEODEC2_ERROR_ACCESS_UNIT_POINTER; return ORBIS_VIDEODEC2_ERROR_ACCESS_UNIT_POINTER;
} }
if (inputData.auSize == 0) { if (inputData.auSize == 0) {
return ORBIS_VIDEODEC2_ERROR_ACCESS_UNIT_SIZE; return ORBIS_VIDEODEC2_ERROR_ACCESS_UNIT_SIZE;
} }
AVPacket* packet = av_packet_alloc(); AVPacket* packet = av_packet_alloc();
if (!packet) { if (!packet) {
LOG_ERROR(Lib_Vdec2, "Failed to allocate packet"); LOG_ERROR(Lib_Vdec2, "Failed to allocate packet");
return ORBIS_VIDEODEC2_ERROR_API_FAIL; return ORBIS_VIDEODEC2_ERROR_API_FAIL;
} }
packet->data = (u8*)inputData.auData; packet->data = (u8*)inputData.auData;
packet->size = inputData.auSize; packet->size = inputData.auSize;
packet->pts = inputData.ptsData; packet->pts = inputData.ptsData;
packet->dts = inputData.dtsData; packet->dts = inputData.dtsData;
int ret = avcodec_send_packet(mCodecContext, packet); int ret = avcodec_send_packet(mCodecContext, packet);
if (ret < 0) { if (ret < 0) {
LOG_ERROR(Lib_Vdec2, "Error sending packet to decoder: {}", ret); LOG_ERROR(Lib_Vdec2, "Error sending packet to decoder: {}", ret);
av_packet_free(&packet); av_packet_free(&packet);
return ORBIS_VIDEODEC2_ERROR_API_FAIL; return ORBIS_VIDEODEC2_ERROR_API_FAIL;
} }
AVFrame* frame = av_frame_alloc(); AVFrame* frame = av_frame_alloc();
if (frame == nullptr) { if (frame == nullptr) {
LOG_ERROR(Lib_Vdec2, "Failed to allocate frame"); LOG_ERROR(Lib_Vdec2, "Failed to allocate frame");
av_packet_free(&packet); av_packet_free(&packet);
return ORBIS_VIDEODEC2_ERROR_API_FAIL; return ORBIS_VIDEODEC2_ERROR_API_FAIL;
} }
while (true) { while (true) {
ret = avcodec_receive_frame(mCodecContext, frame); ret = avcodec_receive_frame(mCodecContext, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break; break;
} else if (ret < 0) { } else if (ret < 0) {
LOG_ERROR(Lib_Vdec2, "Error receiving frame from decoder: {}", ret); LOG_ERROR(Lib_Vdec2, "Error receiving frame from decoder: {}", ret);
av_packet_free(&packet); av_packet_free(&packet);
av_frame_free(&frame); av_frame_free(&frame);
return ORBIS_VIDEODEC2_ERROR_API_FAIL; return ORBIS_VIDEODEC2_ERROR_API_FAIL;
} }
if (frame->format != AV_PIX_FMT_NV12) { if (frame->format != AV_PIX_FMT_NV12) {
AVFrame* nv12_frame = ConvertNV12Frame(*frame); AVFrame* nv12_frame = ConvertNV12Frame(*frame);
ASSERT(nv12_frame); ASSERT(nv12_frame);
av_frame_free(&frame); av_frame_free(&frame);
frame = nv12_frame; frame = nv12_frame;
} }
CopyNV12Data((u8*)frameBuffer.frameBuffer, *frame); CopyNV12Data((u8*)frameBuffer.frameBuffer, *frame);
frameBuffer.isAccepted = true; frameBuffer.isAccepted = true;
outputInfo.codecType = 1; // FIXME: Hardcoded to AVC outputInfo.codecType = 1; // FIXME: Hardcoded to AVC
outputInfo.frameWidth = frame->width; outputInfo.frameWidth = frame->width;
outputInfo.frameHeight = frame->height; outputInfo.frameHeight = frame->height;
outputInfo.framePitch = frame->linesize[0]; outputInfo.framePitch = frame->linesize[0];
outputInfo.frameBufferSize = frameBuffer.frameBufferSize; outputInfo.frameBufferSize = frameBuffer.frameBufferSize;
outputInfo.frameBuffer = frameBuffer.frameBuffer; outputInfo.frameBuffer = frameBuffer.frameBuffer;
outputInfo.isValid = true; outputInfo.isValid = true;
outputInfo.isErrorFrame = false; outputInfo.isErrorFrame = false;
outputInfo.pictureCount = 1; // TODO: 2 pictures for interlaced video outputInfo.pictureCount = 1; // TODO: 2 pictures for interlaced video
if (outputInfo.isValid) { if (outputInfo.isValid) {
OrbisVideodec2AvcPictureInfo pictureInfo = {}; OrbisVideodec2AvcPictureInfo pictureInfo = {};
pictureInfo.thisSize = sizeof(OrbisVideodec2AvcPictureInfo); pictureInfo.thisSize = sizeof(OrbisVideodec2AvcPictureInfo);
pictureInfo.isValid = true; pictureInfo.isValid = true;
pictureInfo.ptsData = inputData.ptsData; pictureInfo.ptsData = inputData.ptsData;
pictureInfo.dtsData = inputData.dtsData; pictureInfo.dtsData = inputData.dtsData;
pictureInfo.attachedData = inputData.attachedData; pictureInfo.attachedData = inputData.attachedData;
pictureInfo.frameCropLeftOffset = frame->crop_left; pictureInfo.frameCropLeftOffset = frame->crop_left;
pictureInfo.frameCropRightOffset = frame->crop_right; pictureInfo.frameCropRightOffset = frame->crop_right;
pictureInfo.frameCropTopOffset = frame->crop_top; pictureInfo.frameCropTopOffset = frame->crop_top;
pictureInfo.frameCropBottomOffset = frame->crop_bottom; pictureInfo.frameCropBottomOffset = frame->crop_bottom;
gPictureInfos.push_back(pictureInfo); gPictureInfos.push_back(pictureInfo);
} }
} }
av_packet_free(&packet); av_packet_free(&packet);
av_frame_free(&frame); av_frame_free(&frame);
return ORBIS_OK; return ORBIS_OK;
} }
s32 VdecDecoder::Flush(OrbisVideodec2FrameBuffer& frameBuffer, s32 VdecDecoder::Flush(OrbisVideodec2FrameBuffer& frameBuffer,
OrbisVideodec2OutputInfo& outputInfo) { OrbisVideodec2OutputInfo& outputInfo) {
frameBuffer.isAccepted = false; frameBuffer.isAccepted = false;
outputInfo.thisSize = sizeof(OrbisVideodec2OutputInfo); outputInfo.thisSize = sizeof(OrbisVideodec2OutputInfo);
outputInfo.isValid = false; outputInfo.isValid = false;
outputInfo.isErrorFrame = true; outputInfo.isErrorFrame = true;
outputInfo.pictureCount = 0; outputInfo.pictureCount = 0;
AVFrame* frame = av_frame_alloc(); AVFrame* frame = av_frame_alloc();
if (!frame) { if (!frame) {
LOG_ERROR(Lib_Vdec2, "Failed to allocate frame"); LOG_ERROR(Lib_Vdec2, "Failed to allocate frame");
return ORBIS_VIDEODEC2_ERROR_API_FAIL; return ORBIS_VIDEODEC2_ERROR_API_FAIL;
} }
while (true) { while (true) {
int ret = avcodec_receive_frame(mCodecContext, frame); int ret = avcodec_receive_frame(mCodecContext, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break; break;
} else if (ret < 0) { } else if (ret < 0) {
LOG_ERROR(Lib_Vdec2, "Error receiving frame from decoder: {}", ret); LOG_ERROR(Lib_Vdec2, "Error receiving frame from decoder: {}", ret);
av_frame_free(&frame); av_frame_free(&frame);
return ORBIS_VIDEODEC2_ERROR_API_FAIL; return ORBIS_VIDEODEC2_ERROR_API_FAIL;
} }
if (frame->format != AV_PIX_FMT_NV12) { if (frame->format != AV_PIX_FMT_NV12) {
AVFrame* nv12_frame = ConvertNV12Frame(*frame); AVFrame* nv12_frame = ConvertNV12Frame(*frame);
ASSERT(nv12_frame); ASSERT(nv12_frame);
av_frame_free(&frame); av_frame_free(&frame);
frame = nv12_frame; frame = nv12_frame;
} }
CopyNV12Data((u8*)frameBuffer.frameBuffer, *frame); CopyNV12Data((u8*)frameBuffer.frameBuffer, *frame);
frameBuffer.isAccepted = true; frameBuffer.isAccepted = true;
outputInfo.codecType = 1; // FIXME: Hardcoded to AVC outputInfo.codecType = 1; // FIXME: Hardcoded to AVC
outputInfo.frameWidth = frame->width; outputInfo.frameWidth = frame->width;
outputInfo.frameHeight = frame->height; outputInfo.frameHeight = frame->height;
outputInfo.framePitch = frame->linesize[0]; outputInfo.framePitch = frame->linesize[0];
outputInfo.frameBufferSize = frameBuffer.frameBufferSize; outputInfo.frameBufferSize = frameBuffer.frameBufferSize;
outputInfo.frameBuffer = frameBuffer.frameBuffer; outputInfo.frameBuffer = frameBuffer.frameBuffer;
outputInfo.isValid = true; outputInfo.isValid = true;
outputInfo.isErrorFrame = false; outputInfo.isErrorFrame = false;
outputInfo.pictureCount = 1; // TODO: 2 pictures for interlaced video outputInfo.pictureCount = 1; // TODO: 2 pictures for interlaced video
// FIXME: Should we add picture info here too? // FIXME: Should we add picture info here too?
} }
av_frame_free(&frame); av_frame_free(&frame);
return ORBIS_OK; return ORBIS_OK;
} }
s32 VdecDecoder::Reset() { s32 VdecDecoder::Reset() {
avcodec_flush_buffers(mCodecContext); avcodec_flush_buffers(mCodecContext);
gPictureInfos.clear(); gPictureInfos.clear();
return ORBIS_OK; return ORBIS_OK;
} }
AVFrame* VdecDecoder::ConvertNV12Frame(AVFrame& frame) { AVFrame* VdecDecoder::ConvertNV12Frame(AVFrame& frame) {
AVFrame* nv12_frame = av_frame_alloc(); AVFrame* nv12_frame = av_frame_alloc();
nv12_frame->pts = frame.pts; nv12_frame->pts = frame.pts;
nv12_frame->pkt_dts = frame.pkt_dts < 0 ? 0 : frame.pkt_dts; nv12_frame->pkt_dts = frame.pkt_dts < 0 ? 0 : frame.pkt_dts;
nv12_frame->format = AV_PIX_FMT_NV12; nv12_frame->format = AV_PIX_FMT_NV12;
nv12_frame->width = frame.width; nv12_frame->width = frame.width;
nv12_frame->height = frame.height; nv12_frame->height = frame.height;
nv12_frame->sample_aspect_ratio = frame.sample_aspect_ratio; nv12_frame->sample_aspect_ratio = frame.sample_aspect_ratio;
nv12_frame->crop_top = frame.crop_top; nv12_frame->crop_top = frame.crop_top;
nv12_frame->crop_bottom = frame.crop_bottom; nv12_frame->crop_bottom = frame.crop_bottom;
nv12_frame->crop_left = frame.crop_left; nv12_frame->crop_left = frame.crop_left;
nv12_frame->crop_right = frame.crop_right; nv12_frame->crop_right = frame.crop_right;
av_frame_get_buffer(nv12_frame, 0); av_frame_get_buffer(nv12_frame, 0);
if (mSwsContext == nullptr) { if (mSwsContext == nullptr) {
mSwsContext = sws_getContext(frame.width, frame.height, AVPixelFormat(frame.format), mSwsContext = sws_getContext(frame.width, frame.height, AVPixelFormat(frame.format),
nv12_frame->width, nv12_frame->height, AV_PIX_FMT_NV12, nv12_frame->width, nv12_frame->height, AV_PIX_FMT_NV12,
SWS_FAST_BILINEAR, nullptr, nullptr, nullptr); SWS_FAST_BILINEAR, nullptr, nullptr, nullptr);
} }
const auto res = sws_scale(mSwsContext, frame.data, frame.linesize, 0, frame.height, const auto res = sws_scale(mSwsContext, frame.data, frame.linesize, 0, frame.height,
nv12_frame->data, nv12_frame->linesize); nv12_frame->data, nv12_frame->linesize);
if (res < 0) { if (res < 0) {
LOG_ERROR(Lib_Vdec2, "Could not convert to NV12: {}", av_err2str(res)); LOG_ERROR(Lib_Vdec2, "Could not convert to NV12: {}", av_err2str(res));
return nullptr; return nullptr;
} }
return nv12_frame; return nv12_frame;
} }
} // namespace Libraries::Vdec2 } // namespace Libraries::Vdec2

View file

@ -1,39 +1,39 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <vector> #include <vector>
#include "videodec2.h" #include "videodec2.h"
extern "C" { extern "C" {
#include <libavcodec/avcodec.h> #include <libavcodec/avcodec.h>
#include <libavutil/imgutils.h> #include <libavutil/imgutils.h>
#include <libswscale/swscale.h> #include <libswscale/swscale.h>
} }
namespace Libraries::Vdec2 { namespace Libraries::Vdec2 {
extern std::vector<OrbisVideodec2AvcPictureInfo> gPictureInfos; extern std::vector<OrbisVideodec2AvcPictureInfo> gPictureInfos;
class VdecDecoder { class VdecDecoder {
public: public:
VdecDecoder(const OrbisVideodec2DecoderConfigInfo& configInfo, VdecDecoder(const OrbisVideodec2DecoderConfigInfo& configInfo,
const OrbisVideodec2DecoderMemoryInfo& memoryInfo); const OrbisVideodec2DecoderMemoryInfo& memoryInfo);
~VdecDecoder(); ~VdecDecoder();
s32 Decode(const OrbisVideodec2InputData& inputData, OrbisVideodec2FrameBuffer& frameBuffer, s32 Decode(const OrbisVideodec2InputData& inputData, OrbisVideodec2FrameBuffer& frameBuffer,
OrbisVideodec2OutputInfo& outputInfo); OrbisVideodec2OutputInfo& outputInfo);
s32 Flush(OrbisVideodec2FrameBuffer& frameBuffer, OrbisVideodec2OutputInfo& outputInfo); s32 Flush(OrbisVideodec2FrameBuffer& frameBuffer, OrbisVideodec2OutputInfo& outputInfo);
s32 Reset(); s32 Reset();
private: private:
AVFrame* ConvertNV12Frame(AVFrame& frame); AVFrame* ConvertNV12Frame(AVFrame& frame);
private: private:
AVCodecContext* mCodecContext = nullptr; AVCodecContext* mCodecContext = nullptr;
SwsContext* mSwsContext = nullptr; SwsContext* mSwsContext = nullptr;
}; };
} // namespace Libraries::Vdec2 } // namespace Libraries::Vdec2

View file

@ -1,151 +1,151 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/alignment.h" #include "common/alignment.h"
#include "core/libraries/kernel/threads/pthread.h" #include "core/libraries/kernel/threads/pthread.h"
#include "thread.h" #include "thread.h"
#ifdef _WIN64 #ifdef _WIN64
#include <windows.h> #include <windows.h>
#include "common/ntapi.h" #include "common/ntapi.h"
#else #else
#include <csignal> #include <csignal>
#include <pthread.h> #include <pthread.h>
#endif #endif
namespace Core { namespace Core {
#ifdef _WIN64 #ifdef _WIN64
#define KGDT64_R3_DATA (0x28) #define KGDT64_R3_DATA (0x28)
#define KGDT64_R3_CODE (0x30) #define KGDT64_R3_CODE (0x30)
#define KGDT64_R3_CMTEB (0x50) #define KGDT64_R3_CMTEB (0x50)
#define RPL_MASK (0x03) #define RPL_MASK (0x03)
#define INITIAL_FPUCW (0x037f) #define INITIAL_FPUCW (0x037f)
#define INITIAL_MXCSR_MASK (0xffbf) #define INITIAL_MXCSR_MASK (0xffbf)
#define EFLAGS_INTERRUPT_MASK (0x200) #define EFLAGS_INTERRUPT_MASK (0x200)
void InitializeTeb(INITIAL_TEB* teb, const ::Libraries::Kernel::PthreadAttr* attr) { void InitializeTeb(INITIAL_TEB* teb, const ::Libraries::Kernel::PthreadAttr* attr) {
teb->StackBase = (void*)((u64)attr->stackaddr_attr + attr->stacksize_attr); teb->StackBase = (void*)((u64)attr->stackaddr_attr + attr->stacksize_attr);
teb->StackLimit = nullptr; teb->StackLimit = nullptr;
teb->StackAllocationBase = attr->stackaddr_attr; teb->StackAllocationBase = attr->stackaddr_attr;
} }
void InitializeContext(CONTEXT* ctx, ThreadFunc func, void* arg, void InitializeContext(CONTEXT* ctx, ThreadFunc func, void* arg,
const ::Libraries::Kernel::PthreadAttr* attr) { const ::Libraries::Kernel::PthreadAttr* attr) {
/* Note: The stack has to be reversed */ /* Note: The stack has to be reversed */
ctx->Rsp = (u64)attr->stackaddr_attr + attr->stacksize_attr; ctx->Rsp = (u64)attr->stackaddr_attr + attr->stacksize_attr;
ctx->Rbp = (u64)attr->stackaddr_attr + attr->stacksize_attr; ctx->Rbp = (u64)attr->stackaddr_attr + attr->stacksize_attr;
ctx->Rcx = (u64)arg; ctx->Rcx = (u64)arg;
ctx->Rip = (u64)func; ctx->Rip = (u64)func;
ctx->SegGs = KGDT64_R3_DATA | RPL_MASK; ctx->SegGs = KGDT64_R3_DATA | RPL_MASK;
ctx->SegEs = KGDT64_R3_DATA | RPL_MASK; ctx->SegEs = KGDT64_R3_DATA | RPL_MASK;
ctx->SegDs = KGDT64_R3_DATA | RPL_MASK; ctx->SegDs = KGDT64_R3_DATA | RPL_MASK;
ctx->SegCs = KGDT64_R3_CODE | RPL_MASK; ctx->SegCs = KGDT64_R3_CODE | RPL_MASK;
ctx->SegSs = KGDT64_R3_DATA | RPL_MASK; ctx->SegSs = KGDT64_R3_DATA | RPL_MASK;
ctx->SegFs = KGDT64_R3_CMTEB | RPL_MASK; ctx->SegFs = KGDT64_R3_CMTEB | RPL_MASK;
ctx->EFlags = 0x3000 | EFLAGS_INTERRUPT_MASK; ctx->EFlags = 0x3000 | EFLAGS_INTERRUPT_MASK;
ctx->MxCsr = INITIAL_MXCSR; ctx->MxCsr = INITIAL_MXCSR;
ctx->FltSave.ControlWord = INITIAL_FPUCW; ctx->FltSave.ControlWord = INITIAL_FPUCW;
ctx->FltSave.MxCsr = INITIAL_MXCSR; ctx->FltSave.MxCsr = INITIAL_MXCSR;
ctx->FltSave.MxCsr_Mask = INITIAL_MXCSR_MASK; ctx->FltSave.MxCsr_Mask = INITIAL_MXCSR_MASK;
ctx->ContextFlags = ctx->ContextFlags =
CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS | CONTEXT_FLOATING_POINT; CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS | CONTEXT_FLOATING_POINT;
} }
#endif #endif
NativeThread::NativeThread() : native_handle{0} {} NativeThread::NativeThread() : native_handle{0} {}
NativeThread::~NativeThread() {} NativeThread::~NativeThread() {}
int NativeThread::Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr) { int NativeThread::Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr) {
#ifndef _WIN64 #ifndef _WIN64
pthread_t* pthr = reinterpret_cast<pthread_t*>(&native_handle); pthread_t* pthr = reinterpret_cast<pthread_t*>(&native_handle);
pthread_attr_t pattr; pthread_attr_t pattr;
pthread_attr_init(&pattr); pthread_attr_init(&pattr);
pthread_attr_setstack(&pattr, attr->stackaddr_attr, attr->stacksize_attr); pthread_attr_setstack(&pattr, attr->stackaddr_attr, attr->stacksize_attr);
return pthread_create(pthr, &pattr, (PthreadFunc)func, arg); return pthread_create(pthr, &pattr, (PthreadFunc)func, arg);
#else #else
CLIENT_ID clientId{}; CLIENT_ID clientId{};
INITIAL_TEB teb{}; INITIAL_TEB teb{};
CONTEXT ctx{}; CONTEXT ctx{};
clientId.UniqueProcess = GetCurrentProcess(); clientId.UniqueProcess = GetCurrentProcess();
clientId.UniqueThread = GetCurrentThread(); clientId.UniqueThread = GetCurrentThread();
InitializeTeb(&teb, attr); InitializeTeb(&teb, attr);
InitializeContext(&ctx, func, arg, attr); InitializeContext(&ctx, func, arg, attr);
return NtCreateThread(&native_handle, THREAD_ALL_ACCESS, nullptr, GetCurrentProcess(), return NtCreateThread(&native_handle, THREAD_ALL_ACCESS, nullptr, GetCurrentProcess(),
&clientId, &ctx, &teb, false); &clientId, &ctx, &teb, false);
#endif #endif
} }
void NativeThread::Exit() { void NativeThread::Exit() {
if (!native_handle) { if (!native_handle) {
return; return;
} }
tid = 0; tid = 0;
#ifdef _WIN64 #ifdef _WIN64
NtClose(native_handle); NtClose(native_handle);
native_handle = nullptr; native_handle = nullptr;
/* The Windows kernel will free the stack /* The Windows kernel will free the stack
given at thread creation via INITIAL_TEB given at thread creation via INITIAL_TEB
(StackAllocationBase) upon thread termination. (StackAllocationBase) upon thread termination.
In earlier Windows versions (NT4 to Windows Server 2003), In earlier Windows versions (NT4 to Windows Server 2003),
you could get around this via disabling FreeStackOnTermination you could get around this via disabling FreeStackOnTermination
on the TEB. This has been removed since then. on the TEB. This has been removed since then.
To avoid this, we must forcefully set the TEB To avoid this, we must forcefully set the TEB
deallocation stack pointer to NULL so ZwFreeVirtualMemory fails deallocation stack pointer to NULL so ZwFreeVirtualMemory fails
in the kernel and our stack is not freed. in the kernel and our stack is not freed.
*/ */
auto* teb = reinterpret_cast<TEB*>(NtCurrentTeb()); auto* teb = reinterpret_cast<TEB*>(NtCurrentTeb());
teb->DeallocationStack = nullptr; teb->DeallocationStack = nullptr;
NtTerminateThread(nullptr, 0); NtTerminateThread(nullptr, 0);
#else #else
// Disable and free the signal stack. // Disable and free the signal stack.
constexpr stack_t sig_stack = { constexpr stack_t sig_stack = {
.ss_flags = SS_DISABLE, .ss_flags = SS_DISABLE,
}; };
sigaltstack(&sig_stack, nullptr); sigaltstack(&sig_stack, nullptr);
if (sig_stack_ptr) { if (sig_stack_ptr) {
free(sig_stack_ptr); free(sig_stack_ptr);
sig_stack_ptr = nullptr; sig_stack_ptr = nullptr;
} }
pthread_exit(nullptr); pthread_exit(nullptr);
#endif #endif
} }
void NativeThread::Initialize() { void NativeThread::Initialize() {
#if _WIN64 #if _WIN64
tid = GetCurrentThreadId(); tid = GetCurrentThreadId();
#else #else
tid = (u64)pthread_self(); tid = (u64)pthread_self();
// Set up an alternate signal handler stack to avoid overflowing small thread stacks. // Set up an alternate signal handler stack to avoid overflowing small thread stacks.
const size_t page_size = getpagesize(); const size_t page_size = getpagesize();
const size_t sig_stack_size = Common::AlignUp(std::max<size_t>(64_KB, MINSIGSTKSZ), page_size); const size_t sig_stack_size = Common::AlignUp(std::max<size_t>(64_KB, MINSIGSTKSZ), page_size);
ASSERT_MSG(posix_memalign(&sig_stack_ptr, page_size, sig_stack_size) == 0, ASSERT_MSG(posix_memalign(&sig_stack_ptr, page_size, sig_stack_size) == 0,
"Failed to allocate signal stack: {}", errno); "Failed to allocate signal stack: {}", errno);
stack_t sig_stack; stack_t sig_stack;
sig_stack.ss_sp = sig_stack_ptr; sig_stack.ss_sp = sig_stack_ptr;
sig_stack.ss_size = sig_stack_size; sig_stack.ss_size = sig_stack_size;
sig_stack.ss_flags = 0; sig_stack.ss_flags = 0;
ASSERT_MSG(sigaltstack(&sig_stack, nullptr) == 0, "Failed to set signal stack: {}", errno); ASSERT_MSG(sigaltstack(&sig_stack, nullptr) == 0, "Failed to set signal stack: {}", errno);
#endif #endif
} }
} // namespace Core } // namespace Core

View file

@ -1,45 +1,45 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include "common/types.h" #include "common/types.h"
namespace Libraries::Kernel { namespace Libraries::Kernel {
struct PthreadAttr; struct PthreadAttr;
} // namespace Libraries::Kernel } // namespace Libraries::Kernel
namespace Core { namespace Core {
using ThreadFunc = void (*)(void*); using ThreadFunc = void (*)(void*);
using PthreadFunc = void* (*)(void*); using PthreadFunc = void* (*)(void*);
class NativeThread { class NativeThread {
public: public:
NativeThread(); NativeThread();
~NativeThread(); ~NativeThread();
int Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr); int Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr);
void Exit(); void Exit();
void Initialize(); void Initialize();
uintptr_t GetHandle() { uintptr_t GetHandle() {
return reinterpret_cast<uintptr_t>(native_handle); return reinterpret_cast<uintptr_t>(native_handle);
} }
u64 GetTid() { u64 GetTid() {
return tid; return tid;
} }
private: private:
#ifdef _WIN64 #ifdef _WIN64
void* native_handle; void* native_handle;
#else #else
uintptr_t native_handle; uintptr_t native_handle;
void* sig_stack_ptr; void* sig_stack_ptr;
#endif #endif
u64 tid; u64 tid;
}; };
} // namespace Core } // namespace Core

View file

@ -1,74 +1,74 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <optional> #include <optional>
#include <type_traits> #include <type_traits>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <queue> #include <queue>
#include "shader_recompiler/ir/value.h" #include "shader_recompiler/ir/value.h"
namespace Shader::IR { namespace Shader::IR {
// Use typename Instruction so the function can be used to return either const or mutable // Use typename Instruction so the function can be used to return either const or mutable
// Insts depending on the context. // Insts depending on the context.
template <typename Instruction, typename Pred> template <typename Instruction, typename Pred>
auto BreadthFirstSearch(Instruction* inst, auto BreadthFirstSearch(Instruction* inst,
Pred&& pred) -> std::invoke_result_t<Pred, Instruction*> { Pred&& pred) -> std::invoke_result_t<Pred, Instruction*> {
// Most often case the instruction is the desired already. // Most often case the instruction is the desired already.
if (std::optional result = pred(inst)) { if (std::optional result = pred(inst)) {
return result; return result;
} }
// Breadth-first search visiting the right most arguments first // Breadth-first search visiting the right most arguments first
boost::container::small_vector<Instruction*, 2> visited; boost::container::small_vector<Instruction*, 2> visited;
std::queue<Instruction*> queue; std::queue<Instruction*> queue;
queue.push(inst); queue.push(inst);
while (!queue.empty()) { while (!queue.empty()) {
// Pop one instruction from the queue // Pop one instruction from the queue
Instruction* inst{queue.front()}; Instruction* inst{queue.front()};
queue.pop(); queue.pop();
if (std::optional result = pred(inst)) { if (std::optional result = pred(inst)) {
// This is the instruction we were looking for // This is the instruction we were looking for
return result; return result;
} }
// Visit the right most arguments first // Visit the right most arguments first
for (size_t arg = inst->NumArgs(); arg--;) { for (size_t arg = inst->NumArgs(); arg--;) {
Value arg_value{inst->Arg(arg)}; Value arg_value{inst->Arg(arg)};
if (arg_value.IsImmediate()) { if (arg_value.IsImmediate()) {
continue; continue;
} }
// Queue instruction if it hasn't been visited // Queue instruction if it hasn't been visited
Instruction* arg_inst{arg_value.InstRecursive()}; Instruction* arg_inst{arg_value.InstRecursive()};
if (std::ranges::find(visited, arg_inst) == visited.end()) { if (std::ranges::find(visited, arg_inst) == visited.end()) {
visited.push_back(arg_inst); visited.push_back(arg_inst);
queue.push(arg_inst); queue.push(arg_inst);
} }
} }
} }
// SSA tree has been traversed and the result hasn't been found // SSA tree has been traversed and the result hasn't been found
return std::nullopt; return std::nullopt;
} }
template <typename Pred> template <typename Pred>
auto BreadthFirstSearch(const Value& value, auto BreadthFirstSearch(const Value& value,
Pred&& pred) -> std::invoke_result_t<Pred, const Inst*> { Pred&& pred) -> std::invoke_result_t<Pred, const Inst*> {
if (value.IsImmediate()) { if (value.IsImmediate()) {
// Nothing to do with immediates // Nothing to do with immediates
return std::nullopt; return std::nullopt;
} }
return BreadthFirstSearch(value.InstRecursive(), pred); return BreadthFirstSearch(value.InstRecursive(), pred);
} }
template <typename Pred> template <typename Pred>
auto BreadthFirstSearch(Value value, Pred&& pred) -> std::invoke_result_t<Pred, Inst*> { auto BreadthFirstSearch(Value value, Pred&& pred) -> std::invoke_result_t<Pred, Inst*> {
if (value.IsImmediate()) { if (value.IsImmediate()) {
// Nothing to do with immediates // Nothing to do with immediates
return std::nullopt; return std::nullopt;
} }
return BreadthFirstSearch(value.InstRecursive(), pred); return BreadthFirstSearch(value.InstRecursive(), pred);
} }
} // namespace Shader::IR } // namespace Shader::IR

View file

@ -1,203 +1,203 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <cstddef> #include <cstddef>
#include <optional> #include <optional>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "common/types.h" #include "common/types.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan { namespace Vulkan {
class Instance; class Instance;
class Scheduler; class Scheduler;
} // namespace Vulkan } // namespace Vulkan
VK_DEFINE_HANDLE(VmaAllocation) VK_DEFINE_HANDLE(VmaAllocation)
VK_DEFINE_HANDLE(VmaAllocator) VK_DEFINE_HANDLE(VmaAllocator)
struct VmaAllocationInfo; struct VmaAllocationInfo;
namespace VideoCore { namespace VideoCore {
/// Hints and requirements for the backing memory type of a commit /// Hints and requirements for the backing memory type of a commit
enum class MemoryUsage { enum class MemoryUsage {
DeviceLocal, ///< Requests device local buffer. DeviceLocal, ///< Requests device local buffer.
Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads
Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks
Stream, ///< Requests device local host visible buffer, falling back host memory. Stream, ///< Requests device local host visible buffer, falling back host memory.
}; };
constexpr vk::BufferUsageFlags ReadFlags = constexpr vk::BufferUsageFlags ReadFlags =
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eUniformTexelBuffer | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eUniformTexelBuffer |
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndirectBuffer; vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndirectBuffer;
constexpr vk::BufferUsageFlags AllFlags = ReadFlags | vk::BufferUsageFlagBits::eTransferDst | constexpr vk::BufferUsageFlags AllFlags = ReadFlags | vk::BufferUsageFlagBits::eTransferDst |
vk::BufferUsageFlagBits::eStorageTexelBuffer | vk::BufferUsageFlagBits::eStorageTexelBuffer |
vk::BufferUsageFlagBits::eStorageBuffer; vk::BufferUsageFlagBits::eStorageBuffer;
struct UniqueBuffer { struct UniqueBuffer {
explicit UniqueBuffer(vk::Device device, VmaAllocator allocator); explicit UniqueBuffer(vk::Device device, VmaAllocator allocator);
~UniqueBuffer(); ~UniqueBuffer();
UniqueBuffer(const UniqueBuffer&) = delete; UniqueBuffer(const UniqueBuffer&) = delete;
UniqueBuffer& operator=(const UniqueBuffer&) = delete; UniqueBuffer& operator=(const UniqueBuffer&) = delete;
UniqueBuffer(UniqueBuffer&& other) UniqueBuffer(UniqueBuffer&& other)
: allocator{std::exchange(other.allocator, VK_NULL_HANDLE)}, : allocator{std::exchange(other.allocator, VK_NULL_HANDLE)},
allocation{std::exchange(other.allocation, VK_NULL_HANDLE)}, allocation{std::exchange(other.allocation, VK_NULL_HANDLE)},
buffer{std::exchange(other.buffer, VK_NULL_HANDLE)} {} buffer{std::exchange(other.buffer, VK_NULL_HANDLE)} {}
UniqueBuffer& operator=(UniqueBuffer&& other) { UniqueBuffer& operator=(UniqueBuffer&& other) {
buffer = std::exchange(other.buffer, VK_NULL_HANDLE); buffer = std::exchange(other.buffer, VK_NULL_HANDLE);
allocator = std::exchange(other.allocator, VK_NULL_HANDLE); allocator = std::exchange(other.allocator, VK_NULL_HANDLE);
allocation = std::exchange(other.allocation, VK_NULL_HANDLE); allocation = std::exchange(other.allocation, VK_NULL_HANDLE);
return *this; return *this;
} }
void Create(const vk::BufferCreateInfo& image_ci, MemoryUsage usage, void Create(const vk::BufferCreateInfo& image_ci, MemoryUsage usage,
VmaAllocationInfo* out_alloc_info); VmaAllocationInfo* out_alloc_info);
operator vk::Buffer() const { operator vk::Buffer() const {
return buffer; return buffer;
} }
vk::Device device; vk::Device device;
VmaAllocator allocator; VmaAllocator allocator;
VmaAllocation allocation; VmaAllocation allocation;
vk::Buffer buffer{}; vk::Buffer buffer{};
}; };
class Buffer { class Buffer {
public: public:
explicit Buffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, explicit Buffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
MemoryUsage usage, VAddr cpu_addr_, vk::BufferUsageFlags flags, MemoryUsage usage, VAddr cpu_addr_, vk::BufferUsageFlags flags,
u64 size_bytes_); u64 size_bytes_);
Buffer& operator=(const Buffer&) = delete; Buffer& operator=(const Buffer&) = delete;
Buffer(const Buffer&) = delete; Buffer(const Buffer&) = delete;
Buffer& operator=(Buffer&&) = default; Buffer& operator=(Buffer&&) = default;
Buffer(Buffer&&) = default; Buffer(Buffer&&) = default;
vk::BufferView View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt, vk::BufferView View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt,
AmdGpu::NumberFormat nfmt); AmdGpu::NumberFormat nfmt);
/// Increases the likeliness of this being a stream buffer /// Increases the likeliness of this being a stream buffer
void IncreaseStreamScore(int score) noexcept { void IncreaseStreamScore(int score) noexcept {
stream_score += score; stream_score += score;
} }
/// Returns the likeliness of this being a stream buffer /// Returns the likeliness of this being a stream buffer
[[nodiscard]] int StreamScore() const noexcept { [[nodiscard]] int StreamScore() const noexcept {
return stream_score; return stream_score;
} }
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer /// Returns true when vaddr -> vaddr+size is fully contained in the buffer
[[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
} }
/// Returns the base CPU address of the buffer /// Returns the base CPU address of the buffer
[[nodiscard]] VAddr CpuAddr() const noexcept { [[nodiscard]] VAddr CpuAddr() const noexcept {
return cpu_addr; return cpu_addr;
} }
/// Returns the offset relative to the given CPU address /// Returns the offset relative to the given CPU address
[[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept { [[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept {
return static_cast<u32>(other_cpu_addr - cpu_addr); return static_cast<u32>(other_cpu_addr - cpu_addr);
} }
size_t SizeBytes() const { size_t SizeBytes() const {
return size_bytes; return size_bytes;
} }
vk::Buffer Handle() const noexcept { vk::Buffer Handle() const noexcept {
return buffer; return buffer;
} }
std::optional<vk::BufferMemoryBarrier2> GetBarrier(vk::AccessFlagBits2 dst_acess_mask, std::optional<vk::BufferMemoryBarrier2> GetBarrier(vk::AccessFlagBits2 dst_acess_mask,
vk::PipelineStageFlagBits2 dst_stage) { vk::PipelineStageFlagBits2 dst_stage) {
if (dst_acess_mask == access_mask && stage == dst_stage) { if (dst_acess_mask == access_mask && stage == dst_stage) {
return {}; return {};
} }
auto barrier = vk::BufferMemoryBarrier2{ auto barrier = vk::BufferMemoryBarrier2{
.srcStageMask = stage, .srcStageMask = stage,
.srcAccessMask = access_mask, .srcAccessMask = access_mask,
.dstStageMask = dst_stage, .dstStageMask = dst_stage,
.dstAccessMask = dst_acess_mask, .dstAccessMask = dst_acess_mask,
.buffer = buffer.buffer, .buffer = buffer.buffer,
.size = size_bytes, .size = size_bytes,
}; };
access_mask = dst_acess_mask; access_mask = dst_acess_mask;
stage = dst_stage; stage = dst_stage;
return barrier; return barrier;
} }
public: public:
VAddr cpu_addr = 0; VAddr cpu_addr = 0;
bool is_picked{}; bool is_picked{};
bool is_coherent{}; bool is_coherent{};
bool is_deleted{}; bool is_deleted{};
int stream_score = 0; int stream_score = 0;
size_t size_bytes = 0; size_t size_bytes = 0;
std::span<u8> mapped_data; std::span<u8> mapped_data;
const Vulkan::Instance* instance; const Vulkan::Instance* instance;
Vulkan::Scheduler* scheduler; Vulkan::Scheduler* scheduler;
MemoryUsage usage; MemoryUsage usage;
UniqueBuffer buffer; UniqueBuffer buffer;
vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone}; vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone};
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone}; vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone};
}; };
class StreamBuffer : public Buffer { class StreamBuffer : public Buffer {
public: public:
explicit StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, explicit StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
MemoryUsage usage, u64 size_bytes_); MemoryUsage usage, u64 size_bytes_);
/// Reserves a region of memory from the stream buffer. /// Reserves a region of memory from the stream buffer.
std::pair<u8*, u64> Map(u64 size, u64 alignment = 0); std::pair<u8*, u64> Map(u64 size, u64 alignment = 0);
/// Ensures that reserved bytes of memory are available to the GPU. /// Ensures that reserved bytes of memory are available to the GPU.
void Commit(); void Commit();
/// Maps and commits a memory region with user provided data /// Maps and commits a memory region with user provided data
u64 Copy(VAddr src, size_t size, size_t alignment = 0) { u64 Copy(VAddr src, size_t size, size_t alignment = 0) {
const auto [data, offset] = Map(size, alignment); const auto [data, offset] = Map(size, alignment);
std::memcpy(data, reinterpret_cast<const void*>(src), size); std::memcpy(data, reinterpret_cast<const void*>(src), size);
Commit(); Commit();
return offset; return offset;
} }
u64 GetFreeSize() const { u64 GetFreeSize() const {
return size_bytes - offset - mapped_size; return size_bytes - offset - mapped_size;
} }
private: private:
struct Watch { struct Watch {
u64 tick{}; u64 tick{};
u64 upper_bound{}; u64 upper_bound{};
}; };
/// Increases the amount of watches available. /// Increases the amount of watches available.
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
/// Waits pending watches until requested upper bound. /// Waits pending watches until requested upper bound.
void WaitPendingOperations(u64 requested_upper_bound); void WaitPendingOperations(u64 requested_upper_bound);
private: private:
u64 offset{}; u64 offset{};
u64 mapped_size{}; u64 mapped_size{};
std::vector<Watch> current_watches; std::vector<Watch> current_watches;
std::size_t current_watch_cursor{}; std::size_t current_watch_cursor{};
std::optional<size_t> invalidation_mark; std::optional<size_t> invalidation_mark;
std::vector<Watch> previous_watches; std::vector<Watch> previous_watches;
std::size_t wait_cursor{}; std::size_t wait_cursor{};
u64 wait_bound{}; u64 wait_bound{};
}; };
} // namespace VideoCore } // namespace VideoCore

File diff suppressed because it is too large Load diff

View file

@ -1,168 +1,168 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <mutex> #include <mutex>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <boost/icl/interval_map.hpp> #include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h> #include <tsl/robin_map.h>
#include "common/div_ceil.h" #include "common/div_ceil.h"
#include "common/slot_vector.h" #include "common/slot_vector.h"
#include "common/types.h" #include "common/types.h"
#include "video_core/buffer_cache/buffer.h" #include "video_core/buffer_cache/buffer.h"
#include "video_core/buffer_cache/memory_tracker_base.h" #include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/range_set.h" #include "video_core/buffer_cache/range_set.h"
#include "video_core/multi_level_page_table.h" #include "video_core/multi_level_page_table.h"
namespace AmdGpu { namespace AmdGpu {
struct Liverpool; struct Liverpool;
} }
namespace Shader { namespace Shader {
namespace Gcn { namespace Gcn {
struct FetchShaderData; struct FetchShaderData;
} }
struct Info; struct Info;
} // namespace Shader } // namespace Shader
namespace VideoCore { namespace VideoCore {
using BufferId = Common::SlotId; using BufferId = Common::SlotId;
static constexpr BufferId NULL_BUFFER_ID{0}; static constexpr BufferId NULL_BUFFER_ID{0};
class TextureCache; class TextureCache;
class BufferCache { class BufferCache {
public: public:
static constexpr u32 CACHING_PAGEBITS = 12; static constexpr u32 CACHING_PAGEBITS = 12;
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
static constexpr u64 DEVICE_PAGESIZE = 4_KB; static constexpr u64 DEVICE_PAGESIZE = 4_KB;
struct Traits { struct Traits {
using Entry = BufferId; using Entry = BufferId;
static constexpr size_t AddressSpaceBits = 40; static constexpr size_t AddressSpaceBits = 40;
static constexpr size_t FirstLevelBits = 14; static constexpr size_t FirstLevelBits = 14;
static constexpr size_t PageBits = CACHING_PAGEBITS; static constexpr size_t PageBits = CACHING_PAGEBITS;
}; };
using PageTable = MultiLevelPageTable<Traits>; using PageTable = MultiLevelPageTable<Traits>;
struct OverlapResult { struct OverlapResult {
boost::container::small_vector<BufferId, 16> ids; boost::container::small_vector<BufferId, 16> ids;
VAddr begin; VAddr begin;
VAddr end; VAddr end;
bool has_stream_leap = false; bool has_stream_leap = false;
}; };
public: public:
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache, AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
PageManager& tracker); PageManager& tracker);
~BufferCache(); ~BufferCache();
/// Returns a pointer to GDS device local buffer. /// Returns a pointer to GDS device local buffer.
[[nodiscard]] const Buffer* GetGdsBuffer() const noexcept { [[nodiscard]] const Buffer* GetGdsBuffer() const noexcept {
return &gds_buffer; return &gds_buffer;
} }
/// Retrieves the buffer with the specified id. /// Retrieves the buffer with the specified id.
[[nodiscard]] Buffer& GetBuffer(BufferId id) { [[nodiscard]] Buffer& GetBuffer(BufferId id) {
return slot_buffers[id]; return slot_buffers[id];
} }
[[nodiscard]] vk::BufferView& NullBufferView() { [[nodiscard]] vk::BufferView& NullBufferView() {
return null_buffer_view; return null_buffer_view;
} }
/// Invalidates any buffer in the logical page range. /// Invalidates any buffer in the logical page range.
void InvalidateMemory(VAddr device_addr, u64 size); void InvalidateMemory(VAddr device_addr, u64 size);
/// Binds host vertex buffers for the current draw. /// Binds host vertex buffers for the current draw.
bool BindVertexBuffers(const Shader::Info& vs_info, bool BindVertexBuffers(const Shader::Info& vs_info,
const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader); const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader);
/// Bind host index buffer for the current draw. /// Bind host index buffer for the current draw.
u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); u32 BindIndexBuffer(bool& is_indexed, u32 index_offset);
/// Writes a value to GPU buffer. /// Writes a value to GPU buffer.
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
[[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(std::span<const u32> data); [[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(std::span<const u32> data);
/// Obtains a buffer for the specified region. /// Obtains a buffer for the specified region.
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
bool is_texel_buffer = false, bool is_texel_buffer = false,
BufferId buffer_id = {}); BufferId buffer_id = {});
/// Attempts to obtain a buffer without modifying the cache contents. /// Attempts to obtain a buffer without modifying the cache contents.
[[nodiscard]] std::pair<Buffer*, u32> ObtainViewBuffer(VAddr gpu_addr, u32 size, [[nodiscard]] std::pair<Buffer*, u32> ObtainViewBuffer(VAddr gpu_addr, u32 size,
bool prefer_gpu); bool prefer_gpu);
/// Return true when a region is registered on the cache /// Return true when a region is registered on the cache
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
/// Return true when a CPU region is modified from the CPU /// Return true when a CPU region is modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
/// Return true when a CPU region is modified from the GPU /// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size); [[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
private: private:
template <typename Func> template <typename Func>
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE); const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE);
for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) { for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) {
const BufferId buffer_id = page_table[page]; const BufferId buffer_id = page_table[page];
if (!buffer_id) { if (!buffer_id) {
++page; ++page;
continue; continue;
} }
Buffer& buffer = slot_buffers[buffer_id]; Buffer& buffer = slot_buffers[buffer_id];
func(buffer_id, buffer); func(buffer_id, buffer);
const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
page = Common::DivCeil(end_addr, CACHING_PAGESIZE); page = Common::DivCeil(end_addr, CACHING_PAGESIZE);
} }
} }
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size); void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size);
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size); [[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
[[nodiscard]] BufferId CreateBuffer(VAddr device_addr, u32 wanted_size); [[nodiscard]] BufferId CreateBuffer(VAddr device_addr, u32 wanted_size);
void Register(BufferId buffer_id); void Register(BufferId buffer_id);
void Unregister(BufferId buffer_id); void Unregister(BufferId buffer_id);
template <bool insert> template <bool insert>
void ChangeRegister(BufferId buffer_id); void ChangeRegister(BufferId buffer_id);
void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer); void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer);
bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size); bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size);
void DeleteBuffer(BufferId buffer_id); void DeleteBuffer(BufferId buffer_id);
const Vulkan::Instance& instance; const Vulkan::Instance& instance;
Vulkan::Scheduler& scheduler; Vulkan::Scheduler& scheduler;
AmdGpu::Liverpool* liverpool; AmdGpu::Liverpool* liverpool;
TextureCache& texture_cache; TextureCache& texture_cache;
PageManager& tracker; PageManager& tracker;
StreamBuffer staging_buffer; StreamBuffer staging_buffer;
StreamBuffer stream_buffer; StreamBuffer stream_buffer;
Buffer gds_buffer; Buffer gds_buffer;
std::mutex mutex; std::mutex mutex;
Common::SlotVector<Buffer> slot_buffers; Common::SlotVector<Buffer> slot_buffers;
RangeSet gpu_modified_ranges; RangeSet gpu_modified_ranges;
vk::BufferView null_buffer_view; vk::BufferView null_buffer_view;
MemoryTracker memory_tracker; MemoryTracker memory_tracker;
PageTable page_table; PageTable page_table;
}; };
} // namespace VideoCore } // namespace VideoCore

View file

@ -1,175 +1,175 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <deque> #include <deque>
#include <type_traits> #include <type_traits>
#include <vector> #include <vector>
#include "common/types.h" #include "common/types.h"
#include "video_core/buffer_cache/word_manager.h" #include "video_core/buffer_cache/word_manager.h"
namespace VideoCore { namespace VideoCore {
class MemoryTracker { class MemoryTracker {
public: public:
static constexpr size_t MAX_CPU_PAGE_BITS = 40; static constexpr size_t MAX_CPU_PAGE_BITS = 40;
static constexpr size_t HIGHER_PAGE_BITS = 22; static constexpr size_t HIGHER_PAGE_BITS = 22;
static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
static constexpr size_t MANAGER_POOL_SIZE = 32; static constexpr size_t MANAGER_POOL_SIZE = 32;
static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
using Manager = WordManager<WORDS_STACK_NEEDED>; using Manager = WordManager<WORDS_STACK_NEEDED>;
public: public:
explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {} explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {}
~MemoryTracker() = default; ~MemoryTracker() = default;
/// Returns true if a region has been modified from the CPU /// Returns true if a region has been modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<true>( return IteratePages<true>(
query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
return manager->template IsRegionModified<Type::CPU>(offset, size); return manager->template IsRegionModified<Type::CPU>(offset, size);
}); });
} }
/// Returns true if a region has been modified from the GPU /// Returns true if a region has been modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<false>( return IteratePages<false>(
query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
return manager->template IsRegionModified<Type::GPU>(offset, size); return manager->template IsRegionModified<Type::GPU>(offset, size);
}); });
} }
/// Mark region as CPU modified, notifying the device_tracker about this change /// Mark region as CPU modified, notifying the device_tracker about this change
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
IteratePages<true>(dirty_cpu_addr, query_size, IteratePages<true>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) { [](Manager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::CPU, true>( manager->template ChangeRegionState<Type::CPU, true>(
manager->GetCpuAddr() + offset, size); manager->GetCpuAddr() + offset, size);
}); });
} }
/// Unmark region as CPU modified, notifying the device_tracker about this change /// Unmark region as CPU modified, notifying the device_tracker about this change
void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
IteratePages<true>(dirty_cpu_addr, query_size, IteratePages<true>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) { [](Manager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::CPU, false>( manager->template ChangeRegionState<Type::CPU, false>(
manager->GetCpuAddr() + offset, size); manager->GetCpuAddr() + offset, size);
}); });
} }
/// Mark region as modified from the host GPU /// Mark region as modified from the host GPU
void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
IteratePages<true>(dirty_cpu_addr, query_size, IteratePages<true>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) { [](Manager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::GPU, true>( manager->template ChangeRegionState<Type::GPU, true>(
manager->GetCpuAddr() + offset, size); manager->GetCpuAddr() + offset, size);
}); });
} }
/// Unmark region as modified from the host GPU /// Unmark region as modified from the host GPU
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
IteratePages<true>(dirty_cpu_addr, query_size, IteratePages<true>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) { [](Manager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::GPU, false>( manager->template ChangeRegionState<Type::GPU, false>(
manager->GetCpuAddr() + offset, size); manager->GetCpuAddr() + offset, size);
}); });
} }
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
template <typename Func> template <typename Func>
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
IteratePages<true>(query_cpu_range, query_size, IteratePages<true>(query_cpu_range, query_size,
[&func](Manager* manager, u64 offset, size_t size) { [&func](Manager* manager, u64 offset, size_t size) {
manager->template ForEachModifiedRange<Type::CPU, true>( manager->template ForEachModifiedRange<Type::CPU, true>(
manager->GetCpuAddr() + offset, size, func); manager->GetCpuAddr() + offset, size, func);
}); });
} }
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <bool clear, typename Func> template <bool clear, typename Func>
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
IteratePages<false>(query_cpu_range, query_size, IteratePages<false>(query_cpu_range, query_size,
[&func](Manager* manager, u64 offset, size_t size) { [&func](Manager* manager, u64 offset, size_t size) {
if constexpr (clear) { if constexpr (clear) {
manager->template ForEachModifiedRange<Type::GPU, true>( manager->template ForEachModifiedRange<Type::GPU, true>(
manager->GetCpuAddr() + offset, size, func); manager->GetCpuAddr() + offset, size, func);
} else { } else {
manager->template ForEachModifiedRange<Type::GPU, false>( manager->template ForEachModifiedRange<Type::GPU, false>(
manager->GetCpuAddr() + offset, size, func); manager->GetCpuAddr() + offset, size, func);
} }
}); });
} }
private: private:
/** /**
* @brief IteratePages Iterates L2 word manager page table. * @brief IteratePages Iterates L2 word manager page table.
* @param cpu_address Start byte cpu address * @param cpu_address Start byte cpu address
* @param size Size in bytes of the region of iterate. * @param size Size in bytes of the region of iterate.
* @param func Callback for each word manager. * @param func Callback for each word manager.
* @return * @return
*/ */
template <bool create_region_on_fail, typename Func> template <bool create_region_on_fail, typename Func>
bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, Manager*, u64, size_t>::type; using FuncReturn = typename std::invoke_result<Func, Manager*, u64, size_t>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
std::size_t remaining_size{size}; std::size_t remaining_size{size};
std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; u64 page_offset{cpu_address & HIGHER_PAGE_MASK};
while (remaining_size > 0) { while (remaining_size > 0) {
const std::size_t copy_amount{ const std::size_t copy_amount{
std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)}; std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
auto* manager{top_tier[page_index]}; auto* manager{top_tier[page_index]};
if (manager) { if (manager) {
if constexpr (BOOL_BREAK) { if constexpr (BOOL_BREAK) {
if (func(manager, page_offset, copy_amount)) { if (func(manager, page_offset, copy_amount)) {
return true; return true;
} }
} else { } else {
func(manager, page_offset, copy_amount); func(manager, page_offset, copy_amount);
} }
} else if constexpr (create_region_on_fail) { } else if constexpr (create_region_on_fail) {
CreateRegion(page_index); CreateRegion(page_index);
manager = top_tier[page_index]; manager = top_tier[page_index];
if constexpr (BOOL_BREAK) { if constexpr (BOOL_BREAK) {
if (func(manager, page_offset, copy_amount)) { if (func(manager, page_offset, copy_amount)) {
return true; return true;
} }
} else { } else {
func(manager, page_offset, copy_amount); func(manager, page_offset, copy_amount);
} }
} }
page_index++; page_index++;
page_offset = 0; page_offset = 0;
remaining_size -= copy_amount; remaining_size -= copy_amount;
} }
return false; return false;
} }
void CreateRegion(std::size_t page_index) { void CreateRegion(std::size_t page_index) {
const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS; const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS;
if (free_managers.empty()) { if (free_managers.empty()) {
manager_pool.emplace_back(); manager_pool.emplace_back();
auto& last_pool = manager_pool.back(); auto& last_pool = manager_pool.back();
for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) {
std::construct_at(&last_pool[i], tracker, 0, HIGHER_PAGE_SIZE); std::construct_at(&last_pool[i], tracker, 0, HIGHER_PAGE_SIZE);
free_managers.push_back(&last_pool[i]); free_managers.push_back(&last_pool[i]);
} }
} }
// Each manager tracks a 4_MB virtual address space. // Each manager tracks a 4_MB virtual address space.
auto* new_manager = free_managers.back(); auto* new_manager = free_managers.back();
new_manager->SetCpuAddress(base_cpu_addr); new_manager->SetCpuAddress(base_cpu_addr);
free_managers.pop_back(); free_managers.pop_back();
top_tier[page_index] = new_manager; top_tier[page_index] = new_manager;
} }
PageManager* tracker; PageManager* tracker;
std::deque<std::array<Manager, MANAGER_POOL_SIZE>> manager_pool; std::deque<std::array<Manager, MANAGER_POOL_SIZE>> manager_pool;
std::vector<Manager*> free_managers; std::vector<Manager*> free_managers;
std::array<Manager*, NUM_HIGH_PAGES> top_tier{}; std::array<Manager*, NUM_HIGH_PAGES> top_tier{};
}; };
} // namespace VideoCore } // namespace VideoCore

View file

@ -1,398 +1,398 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <span> #include <span>
#include <utility> #include <utility>
#include "common/div_ceil.h" #include "common/div_ceil.h"
#include "common/types.h" #include "common/types.h"
#include "video_core/page_manager.h" #include "video_core/page_manager.h"
namespace VideoCore { namespace VideoCore {
constexpr u64 PAGES_PER_WORD = 64; constexpr u64 PAGES_PER_WORD = 64;
constexpr u64 BYTES_PER_PAGE = 4_KB; constexpr u64 BYTES_PER_PAGE = 4_KB;
constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
enum class Type { enum class Type {
CPU, CPU,
GPU, GPU,
Untracked, Untracked,
}; };
/// Vector tracking modified pages tightly packed with small vector optimization /// Vector tracking modified pages tightly packed with small vector optimization
template <size_t stack_words = 1> template <size_t stack_words = 1>
struct WordsArray { struct WordsArray {
/// Returns the pointer to the words state /// Returns the pointer to the words state
[[nodiscard]] const u64* Pointer(bool is_short) const noexcept { [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
return is_short ? stack.data() : heap; return is_short ? stack.data() : heap;
} }
/// Returns the pointer to the words state /// Returns the pointer to the words state
[[nodiscard]] u64* Pointer(bool is_short) noexcept { [[nodiscard]] u64* Pointer(bool is_short) noexcept {
return is_short ? stack.data() : heap; return is_short ? stack.data() : heap;
} }
std::array<u64, stack_words> stack{}; ///< Small buffers storage std::array<u64, stack_words> stack{}; ///< Small buffers storage
u64* heap; ///< Not-small buffers pointer to the storage u64* heap; ///< Not-small buffers pointer to the storage
}; };
template <size_t stack_words = 1> template <size_t stack_words = 1>
struct Words { struct Words {
explicit Words() = default; explicit Words() = default;
explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD); num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD);
if (IsShort()) { if (IsShort()) {
cpu.stack.fill(~u64{0}); cpu.stack.fill(~u64{0});
gpu.stack.fill(0); gpu.stack.fill(0);
untracked.stack.fill(~u64{0}); untracked.stack.fill(~u64{0});
} else { } else {
// Share allocation between CPU and GPU pages and set their default values // Share allocation between CPU and GPU pages and set their default values
u64* const alloc = new u64[num_words * 3]; u64* const alloc = new u64[num_words * 3];
cpu.heap = alloc; cpu.heap = alloc;
gpu.heap = alloc + num_words; gpu.heap = alloc + num_words;
untracked.heap = alloc + num_words * 2; untracked.heap = alloc + num_words * 2;
std::fill_n(cpu.heap, num_words, ~u64{0}); std::fill_n(cpu.heap, num_words, ~u64{0});
std::fill_n(gpu.heap, num_words, 0); std::fill_n(gpu.heap, num_words, 0);
std::fill_n(untracked.heap, num_words, ~u64{0}); std::fill_n(untracked.heap, num_words, ~u64{0});
} }
// Clean up tailing bits // Clean up tailing bits
const u64 last_word_size = size_bytes % BYTES_PER_WORD; const u64 last_word_size = size_bytes % BYTES_PER_WORD;
const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
const u64 last_word = (~u64{0} << shift) >> shift; const u64 last_word = (~u64{0} << shift) >> shift;
cpu.Pointer(IsShort())[NumWords() - 1] = last_word; cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
untracked.Pointer(IsShort())[NumWords() - 1] = last_word; untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
} }
~Words() { ~Words() {
Release(); Release();
} }
Words& operator=(Words&& rhs) noexcept { Words& operator=(Words&& rhs) noexcept {
Release(); Release();
size_bytes = rhs.size_bytes; size_bytes = rhs.size_bytes;
num_words = rhs.num_words; num_words = rhs.num_words;
cpu = rhs.cpu; cpu = rhs.cpu;
gpu = rhs.gpu; gpu = rhs.gpu;
untracked = rhs.untracked; untracked = rhs.untracked;
rhs.cpu.heap = nullptr; rhs.cpu.heap = nullptr;
return *this; return *this;
} }
Words(Words&& rhs) noexcept Words(Words&& rhs) noexcept
: size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu}, : size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu},
untracked{rhs.untracked} { untracked{rhs.untracked} {
rhs.cpu.heap = nullptr; rhs.cpu.heap = nullptr;
} }
Words& operator=(const Words&) = delete; Words& operator=(const Words&) = delete;
Words(const Words&) = delete; Words(const Words&) = delete;
/// Returns true when the buffer fits in the small vector optimization /// Returns true when the buffer fits in the small vector optimization
[[nodiscard]] bool IsShort() const noexcept { [[nodiscard]] bool IsShort() const noexcept {
return num_words <= stack_words; return num_words <= stack_words;
} }
/// Returns the number of words of the buffer /// Returns the number of words of the buffer
[[nodiscard]] size_t NumWords() const noexcept { [[nodiscard]] size_t NumWords() const noexcept {
return num_words; return num_words;
} }
/// Release buffer resources /// Release buffer resources
void Release() { void Release() {
if (!IsShort()) { if (!IsShort()) {
// CPU written words is the base for the heap allocation // CPU written words is the base for the heap allocation
delete[] cpu.heap; delete[] cpu.heap;
} }
} }
template <Type type> template <Type type>
std::span<u64> Span() noexcept { std::span<u64> Span() noexcept {
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
return std::span<u64>(cpu.Pointer(IsShort()), num_words); return std::span<u64>(cpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::GPU) { } else if constexpr (type == Type::GPU) {
return std::span<u64>(gpu.Pointer(IsShort()), num_words); return std::span<u64>(gpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::Untracked) { } else if constexpr (type == Type::Untracked) {
return std::span<u64>(untracked.Pointer(IsShort()), num_words); return std::span<u64>(untracked.Pointer(IsShort()), num_words);
} }
} }
template <Type type> template <Type type>
std::span<const u64> Span() const noexcept { std::span<const u64> Span() const noexcept {
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
return std::span<const u64>(cpu.Pointer(IsShort()), num_words); return std::span<const u64>(cpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::GPU) { } else if constexpr (type == Type::GPU) {
return std::span<const u64>(gpu.Pointer(IsShort()), num_words); return std::span<const u64>(gpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::Untracked) { } else if constexpr (type == Type::Untracked) {
return std::span<const u64>(untracked.Pointer(IsShort()), num_words); return std::span<const u64>(untracked.Pointer(IsShort()), num_words);
} }
} }
u64 size_bytes = 0; u64 size_bytes = 0;
size_t num_words = 0; size_t num_words = 0;
WordsArray<stack_words> cpu; WordsArray<stack_words> cpu;
WordsArray<stack_words> gpu; WordsArray<stack_words> gpu;
WordsArray<stack_words> untracked; WordsArray<stack_words> untracked;
}; };
template <size_t stack_words = 1> template <size_t stack_words = 1>
class WordManager { class WordManager {
public: public:
explicit WordManager(PageManager* tracker_, VAddr cpu_addr_, u64 size_bytes) explicit WordManager(PageManager* tracker_, VAddr cpu_addr_, u64 size_bytes)
: tracker{tracker_}, cpu_addr{cpu_addr_}, words{size_bytes} {} : tracker{tracker_}, cpu_addr{cpu_addr_}, words{size_bytes} {}
explicit WordManager() = default; explicit WordManager() = default;
void SetCpuAddress(VAddr new_cpu_addr) { void SetCpuAddress(VAddr new_cpu_addr) {
cpu_addr = new_cpu_addr; cpu_addr = new_cpu_addr;
} }
VAddr GetCpuAddr() const { VAddr GetCpuAddr() const {
return cpu_addr; return cpu_addr;
} }
static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) { static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) {
constexpr size_t number_bits = sizeof(u64) * 8; constexpr size_t number_bits = sizeof(u64) * 8;
const size_t limit_page_end = number_bits - std::min(page_end, number_bits); const size_t limit_page_end = number_bits - std::min(page_end, number_bits);
u64 bits = (word >> page_start) << page_start; u64 bits = (word >> page_start) << page_start;
bits = (bits << limit_page_end) >> limit_page_end; bits = (bits << limit_page_end) >> limit_page_end;
return bits; return bits;
} }
static std::pair<size_t, size_t> GetWordPage(VAddr address) { static std::pair<size_t, size_t> GetWordPage(VAddr address) {
const size_t converted_address = static_cast<size_t>(address); const size_t converted_address = static_cast<size_t>(address);
const size_t word_number = converted_address / BYTES_PER_WORD; const size_t word_number = converted_address / BYTES_PER_WORD;
const size_t amount_pages = converted_address % BYTES_PER_WORD; const size_t amount_pages = converted_address % BYTES_PER_WORD;
return std::make_pair(word_number, amount_pages / BYTES_PER_PAGE); return std::make_pair(word_number, amount_pages / BYTES_PER_PAGE);
} }
template <typename Func> template <typename Func>
void IterateWords(size_t offset, size_t size, Func&& func) const { void IterateWords(size_t offset, size_t size, Func&& func) const {
using FuncReturn = std::invoke_result_t<Func, std::size_t, u64>; using FuncReturn = std::invoke_result_t<Func, std::size_t, u64>;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL)); const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL));
const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL)); const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL));
if (start >= SizeBytes() || end <= start) { if (start >= SizeBytes() || end <= start) {
return; return;
} }
auto [start_word, start_page] = GetWordPage(start); auto [start_word, start_page] = GetWordPage(start);
auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL); auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL);
const size_t num_words = NumWords(); const size_t num_words = NumWords();
start_word = std::min(start_word, num_words); start_word = std::min(start_word, num_words);
end_word = std::min(end_word, num_words); end_word = std::min(end_word, num_words);
const size_t diff = end_word - start_word; const size_t diff = end_word - start_word;
end_word += (end_page + PAGES_PER_WORD - 1ULL) / PAGES_PER_WORD; end_word += (end_page + PAGES_PER_WORD - 1ULL) / PAGES_PER_WORD;
end_word = std::min(end_word, num_words); end_word = std::min(end_word, num_words);
end_page += diff * PAGES_PER_WORD; end_page += diff * PAGES_PER_WORD;
constexpr u64 base_mask{~0ULL}; constexpr u64 base_mask{~0ULL};
for (size_t word_index = start_word; word_index < end_word; word_index++) { for (size_t word_index = start_word; word_index < end_word; word_index++) {
const u64 mask = ExtractBits(base_mask, start_page, end_page); const u64 mask = ExtractBits(base_mask, start_page, end_page);
start_page = 0; start_page = 0;
end_page -= PAGES_PER_WORD; end_page -= PAGES_PER_WORD;
if constexpr (BOOL_BREAK) { if constexpr (BOOL_BREAK) {
if (func(word_index, mask)) { if (func(word_index, mask)) {
return; return;
} }
} else { } else {
func(word_index, mask); func(word_index, mask);
} }
} }
} }
template <typename Func> template <typename Func>
void IteratePages(u64 mask, Func&& func) const { void IteratePages(u64 mask, Func&& func) const {
size_t offset = 0; size_t offset = 0;
while (mask != 0) { while (mask != 0) {
const size_t empty_bits = std::countr_zero(mask); const size_t empty_bits = std::countr_zero(mask);
offset += empty_bits; offset += empty_bits;
mask = mask >> empty_bits; mask = mask >> empty_bits;
const size_t continuous_bits = std::countr_one(mask); const size_t continuous_bits = std::countr_one(mask);
func(offset, continuous_bits); func(offset, continuous_bits);
mask = continuous_bits < PAGES_PER_WORD ? (mask >> continuous_bits) : 0; mask = continuous_bits < PAGES_PER_WORD ? (mask >> continuous_bits) : 0;
offset += continuous_bits; offset += continuous_bits;
} }
} }
/** /**
* Change the state of a range of pages * Change the state of a range of pages
* *
* @param dirty_addr Base address to mark or unmark as modified * @param dirty_addr Base address to mark or unmark as modified
* @param size Size in bytes to mark or unmark as modified * @param size Size in bytes to mark or unmark as modified
*/ */
template <Type type, bool enable> template <Type type, bool enable>
void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
std::span<u64> state_words = words.template Span<type>(); std::span<u64> state_words = words.template Span<type>();
[[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>(); [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) { IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) {
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
NotifyPageTracker<!enable>(index, untracked_words[index], mask); NotifyPageTracker<!enable>(index, untracked_words[index], mask);
} }
if constexpr (enable) { if constexpr (enable) {
state_words[index] |= mask; state_words[index] |= mask;
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
untracked_words[index] |= mask; untracked_words[index] |= mask;
} }
} else { } else {
state_words[index] &= ~mask; state_words[index] &= ~mask;
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
untracked_words[index] &= ~mask; untracked_words[index] &= ~mask;
} }
} }
}); });
} }
/** /**
* Loop over each page in the given range, turn off those bits and notify the tracker if * Loop over each page in the given range, turn off those bits and notify the tracker if
* needed. Call the given function on each turned off range. * needed. Call the given function on each turned off range.
* *
* @param query_cpu_range Base CPU address to loop over * @param query_cpu_range Base CPU address to loop over
* @param size Size in bytes of the CPU range to loop over * @param size Size in bytes of the CPU range to loop over
* @param func Function to call for each turned off region * @param func Function to call for each turned off region
*/ */
template <Type type, bool clear, typename Func> template <Type type, bool clear, typename Func>
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
static_assert(type != Type::Untracked); static_assert(type != Type::Untracked);
std::span<u64> state_words = words.template Span<type>(); std::span<u64> state_words = words.template Span<type>();
[[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>(); [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
const size_t offset = query_cpu_range - cpu_addr; const size_t offset = query_cpu_range - cpu_addr;
bool pending = false; bool pending = false;
size_t pending_offset{}; size_t pending_offset{};
size_t pending_pointer{}; size_t pending_pointer{};
const auto release = [&]() { const auto release = [&]() {
func(cpu_addr + pending_offset * BYTES_PER_PAGE, func(cpu_addr + pending_offset * BYTES_PER_PAGE,
(pending_pointer - pending_offset) * BYTES_PER_PAGE); (pending_pointer - pending_offset) * BYTES_PER_PAGE);
}; };
IterateWords(offset, size, [&](size_t index, u64 mask) { IterateWords(offset, size, [&](size_t index, u64 mask) {
if constexpr (type == Type::GPU) { if constexpr (type == Type::GPU) {
mask &= ~untracked_words[index]; mask &= ~untracked_words[index];
} }
const u64 word = state_words[index] & mask; const u64 word = state_words[index] & mask;
if constexpr (clear) { if constexpr (clear) {
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
NotifyPageTracker<true>(index, untracked_words[index], mask); NotifyPageTracker<true>(index, untracked_words[index], mask);
} }
state_words[index] &= ~mask; state_words[index] &= ~mask;
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
untracked_words[index] &= ~mask; untracked_words[index] &= ~mask;
} }
} }
const size_t base_offset = index * PAGES_PER_WORD; const size_t base_offset = index * PAGES_PER_WORD;
IteratePages(word, [&](size_t pages_offset, size_t pages_size) { IteratePages(word, [&](size_t pages_offset, size_t pages_size) {
const auto reset = [&]() { const auto reset = [&]() {
pending_offset = base_offset + pages_offset; pending_offset = base_offset + pages_offset;
pending_pointer = base_offset + pages_offset + pages_size; pending_pointer = base_offset + pages_offset + pages_size;
}; };
if (!pending) { if (!pending) {
reset(); reset();
pending = true; pending = true;
return; return;
} }
if (pending_pointer == base_offset + pages_offset) { if (pending_pointer == base_offset + pages_offset) {
pending_pointer += pages_size; pending_pointer += pages_size;
return; return;
} }
release(); release();
reset(); reset();
}); });
}); });
if (pending) { if (pending) {
release(); release();
} }
} }
/** /**
* Returns true when a region has been modified * Returns true when a region has been modified
* *
* @param offset Offset in bytes from the start of the buffer * @param offset Offset in bytes from the start of the buffer
* @param size Size in bytes of the region to query for modifications * @param size Size in bytes of the region to query for modifications
*/ */
template <Type type> template <Type type>
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked); static_assert(type != Type::Untracked);
const std::span<const u64> state_words = words.template Span<type>(); const std::span<const u64> state_words = words.template Span<type>();
[[maybe_unused]] const std::span<const u64> untracked_words = [[maybe_unused]] const std::span<const u64> untracked_words =
words.template Span<Type::Untracked>(); words.template Span<Type::Untracked>();
bool result = false; bool result = false;
IterateWords(offset, size, [&](size_t index, u64 mask) { IterateWords(offset, size, [&](size_t index, u64 mask) {
if constexpr (type == Type::GPU) { if constexpr (type == Type::GPU) {
mask &= ~untracked_words[index]; mask &= ~untracked_words[index];
} }
const u64 word = state_words[index] & mask; const u64 word = state_words[index] & mask;
if (word != 0) { if (word != 0) {
result = true; result = true;
return true; return true;
} }
return false; return false;
}); });
return result; return result;
} }
/// Returns the number of words of the manager /// Returns the number of words of the manager
[[nodiscard]] size_t NumWords() const noexcept { [[nodiscard]] size_t NumWords() const noexcept {
return words.NumWords(); return words.NumWords();
} }
/// Returns the size in bytes of the manager /// Returns the size in bytes of the manager
[[nodiscard]] u64 SizeBytes() const noexcept { [[nodiscard]] u64 SizeBytes() const noexcept {
return words.size_bytes; return words.size_bytes;
} }
/// Returns true when the buffer fits in the small vector optimization /// Returns true when the buffer fits in the small vector optimization
[[nodiscard]] bool IsShort() const noexcept { [[nodiscard]] bool IsShort() const noexcept {
return words.IsShort(); return words.IsShort();
} }
private: private:
template <Type type> template <Type type>
u64* Array() noexcept { u64* Array() noexcept {
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
return words.cpu.Pointer(IsShort()); return words.cpu.Pointer(IsShort());
} else if constexpr (type == Type::GPU) { } else if constexpr (type == Type::GPU) {
return words.gpu.Pointer(IsShort()); return words.gpu.Pointer(IsShort());
} else if constexpr (type == Type::Untracked) { } else if constexpr (type == Type::Untracked) {
return words.untracked.Pointer(IsShort()); return words.untracked.Pointer(IsShort());
} }
} }
template <Type type> template <Type type>
const u64* Array() const noexcept { const u64* Array() const noexcept {
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
return words.cpu.Pointer(IsShort()); return words.cpu.Pointer(IsShort());
} else if constexpr (type == Type::GPU) { } else if constexpr (type == Type::GPU) {
return words.gpu.Pointer(IsShort()); return words.gpu.Pointer(IsShort());
} else if constexpr (type == Type::Untracked) { } else if constexpr (type == Type::Untracked) {
return words.untracked.Pointer(IsShort()); return words.untracked.Pointer(IsShort());
} }
} }
/** /**
* Notify tracker about changes in the CPU tracking state of a word in the buffer * Notify tracker about changes in the CPU tracking state of a word in the buffer
* *
* @param word_index Index to the word to notify to the tracker * @param word_index Index to the word to notify to the tracker
* @param current_bits Current state of the word * @param current_bits Current state of the word
* @param new_bits New state of the word * @param new_bits New state of the word
* *
* @tparam add_to_tracker True when the tracker should start tracking the new pages * @tparam add_to_tracker True when the tracker should start tracking the new pages
*/ */
template <bool add_to_tracker> template <bool add_to_tracker>
void NotifyPageTracker(u64 word_index, u64 current_bits, u64 new_bits) const { void NotifyPageTracker(u64 word_index, u64 current_bits, u64 new_bits) const {
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
IteratePages(changed_bits, [&](size_t offset, size_t size) { IteratePages(changed_bits, [&](size_t offset, size_t size) {
tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE, tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE,
add_to_tracker ? 1 : -1); add_to_tracker ? 1 : -1);
}); });
} }
PageManager* tracker; PageManager* tracker;
VAddr cpu_addr = 0; VAddr cpu_addr = 0;
Words<stack_words> words; Words<stack_words> words;
}; };
} // namespace VideoCore } // namespace VideoCore