diff --git a/CMakeLists.txt b/CMakeLists.txt old mode 100755 new mode 100644 index 2c5598e09..504b4eeea --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -603,6 +603,8 @@ set(MISC_LIBS src/core/libraries/screenshot/screenshot.cpp src/core/libraries/move/move.h src/core/libraries/ulobjmgr/ulobjmgr.cpp src/core/libraries/ulobjmgr/ulobjmgr.h + src/core/libraries/signin_dialog/signindialog.cpp + src/core/libraries/signin_dialog/signindialog.h ) set(DEV_TOOLS src/core/devtools/layer.cpp diff --git a/externals/sirit b/externals/sirit index 427a42c9e..09a1416ab 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 427a42c9ed99b38204d9107bc3dc14e92458acf1 +Subproject commit 09a1416ab1b59ddfebd2618412f118f2004f3b2c diff --git a/src/common/io_file.cpp b/src/common/io_file.cpp index 3efadc6ea..6fa9062a7 100644 --- a/src/common/io_file.cpp +++ b/src/common/io_file.cpp @@ -131,9 +131,7 @@ namespace { case SeekOrigin::End: return SEEK_END; default: - LOG_ERROR(Common_Filesystem, "Unsupported origin {}, defaulting to SEEK_SET", - static_cast(origin)); - return SEEK_SET; + UNREACHABLE_MSG("Impossible SeekOrigin {}", static_cast(origin)); } } diff --git a/src/common/io_file.h b/src/common/io_file.h index fb20a2bc5..45787a092 100644 --- a/src/common/io_file.h +++ b/src/common/io_file.h @@ -61,8 +61,6 @@ enum class SeekOrigin : u32 { SetOrigin, // Seeks from the start of the file. CurrentPosition, // Seeks from the current file pointer position. End, // Seeks from the end of the file. - SeekHole, // Seeks from the start of the next hole in the file. - SeekData, // Seeks from the start of the next non-hole region in the file. }; class IOFile final { diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 078f5be89..1ec6d2e19 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -139,6 +139,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Lib, Hmd) \ SUB(Lib, Font) \ SUB(Lib, FontFt) \ + SUB(Lib, SigninDialog) \ CLS(Frontend) \ CLS(Render) \ SUB(Render, Vulkan) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index 5ecde11bb..27a87e082 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -104,8 +104,7 @@ enum class Class : u8 { Lib_NpParty, ///< The LibSceNpParty implementation Lib_Zlib, ///< The LibSceZlib implementation. Lib_Hmd, ///< The LibSceHmd implementation. - Lib_Font, ///< The libSceFont implementation. - Lib_FontFt, ///< The libSceFontFt implementation. + Lib_SigninDialog, ///< The LibSigninDialog implementation. Frontend, ///< Emulator UI Render, ///< Video Core Render_Vulkan, ///< Vulkan backend diff --git a/src/common/memory_patcher.cpp b/src/common/memory_patcher.cpp index bb2d23c45..cb51828cc 100644 --- a/src/common/memory_patcher.cpp +++ b/src/common/memory_patcher.cpp @@ -23,7 +23,7 @@ namespace MemoryPatcher { -uintptr_t g_eboot_address; +EXPORT uintptr_t g_eboot_address; uint64_t g_eboot_image_size; std::string g_game_serial; std::string patchFile; diff --git a/src/common/memory_patcher.h b/src/common/memory_patcher.h index 29045a6a2..968903a85 100644 --- a/src/common/memory_patcher.h +++ b/src/common/memory_patcher.h @@ -6,9 +6,15 @@ #include #include +#if defined(WIN32) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT __attribute__((visibility("default"))) +#endif + namespace MemoryPatcher { -extern uintptr_t g_eboot_address; +extern EXPORT uintptr_t g_eboot_address; extern uint64_t g_eboot_image_size; extern std::string g_game_serial; extern std::string patchFile; diff --git a/src/core/address_space.h b/src/core/address_space.h index 7ccc2cd1e..d7f3efc75 100644 --- a/src/core/address_space.h +++ b/src/core/address_space.h @@ -19,8 +19,6 @@ enum class MemoryPermission : u32 { }; DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission) -constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL; - constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL; constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL; constexpr VAddr SYSTEM_RESERVED_MIN = 0x07FFFFC000ULL; diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index c8106b270..8937ef04b 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -464,9 +464,8 @@ static std::pair TryPatch(u8* code, PatchModule* module) { if (needs_trampoline && instruction.length < 5) { // Trampoline is needed but instruction is too short to patch. - // Return false and length to fall back to the illegal instruction handler, - // or to signal to AOT compilation that this instruction should be skipped and - // handled at runtime. + // Return false and length to signal to AOT compilation that this instruction + // should be skipped and handled at runtime. return std::make_pair(false, instruction.length); } @@ -512,136 +511,137 @@ static std::pair TryPatch(u8* code, PatchModule* module) { #if defined(ARCH_X86_64) +static bool Is4ByteExtrqOrInsertq(void* code_address) { + u8* bytes = (u8*)code_address; + if (bytes[0] == 0x66 && bytes[1] == 0x0F && bytes[2] == 0x79) { + return true; // extrq + } else if (bytes[0] == 0xF2 && bytes[1] == 0x0F && bytes[2] == 0x79) { + return true; // insertq + } else { + return false; + } +} + static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) { - ZydisDecodedInstruction instruction; - ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; - const auto status = - Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); + // We need to decode the instruction to find out what it is. Normally we'd use a fully fleshed + // out decoder like Zydis, however Zydis does a bunch of stuff that impact performance that we + // don't care about. We can get information about the instruction a lot faster by writing a mini + // decoder here, since we know it is definitely an extrq or an insertq. If for some reason we + // need to interpret more instructions in the future (I don't see why we would), we can revert + // to using Zydis. + ZydisMnemonic mnemonic; + u8* bytes = (u8*)code_address; + if (bytes[0] == 0x66) { + mnemonic = ZYDIS_MNEMONIC_EXTRQ; + } else if (bytes[0] == 0xF2) { + mnemonic = ZYDIS_MNEMONIC_INSERTQ; + } else { + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + const auto status = + Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); + LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}", + fmt::ptr(code_address), + ZYAN_SUCCESS(status) ? ZydisMnemonicGetString(instruction.mnemonic) + : "Failed to decode"); + return false; + } - switch (instruction.mnemonic) { + ASSERT(bytes[1] == 0x0F && bytes[2] == 0x79); + + // Note: It's guaranteed that there's no REX prefix in these instructions checked by + // Is4ByteExtrqOrInsertq + u8 modrm = bytes[3]; + u8 rm = modrm & 0b111; + u8 reg = (modrm >> 3) & 0b111; + u8 mod = (modrm >> 6) & 0b11; + + ASSERT(mod == 0b11); // Any instruction we interpret here uses reg/reg addressing only + + int dstIndex = reg; + int srcIndex = rm; + + switch (mnemonic) { case ZYDIS_MNEMONIC_EXTRQ: { - bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && - operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; - if (immediateForm) { - LOG_CRITICAL(Core, "EXTRQ immediate form should have been patched at code address: {}", - fmt::ptr(code_address)); - return false; + const auto dst = Common::GetXmmPointer(ctx, dstIndex); + const auto src = Common::GetXmmPointer(ctx, srcIndex); + + u64 lowQWordSrc; + memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); + + u64 lowQWordDst; + memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); + + u64 length = lowQWordSrc & 0x3F; + u64 mask; + if (length == 0) { + length = 64; // for the check below + mask = 0xFFFF'FFFF'FFFF'FFFF; } else { - ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && - operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER && - operands[0].reg.value >= ZYDIS_REGISTER_XMM0 && - operands[0].reg.value <= ZYDIS_REGISTER_XMM15 && - operands[1].reg.value >= ZYDIS_REGISTER_XMM0 && - operands[1].reg.value <= ZYDIS_REGISTER_XMM15, - "Unexpected operand types for EXTRQ instruction"); - - const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0; - const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0; - - const auto dst = Common::GetXmmPointer(ctx, dstIndex); - const auto src = Common::GetXmmPointer(ctx, srcIndex); - - u64 lowQWordSrc; - memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); - - u64 lowQWordDst; - memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); - - u64 length = lowQWordSrc & 0x3F; - u64 mask; - if (length == 0) { - length = 64; // for the check below - mask = 0xFFFF'FFFF'FFFF'FFFF; - } else { - mask = (1ULL << length) - 1; - } - - u64 index = (lowQWordSrc >> 8) & 0x3F; - if (length + index > 64) { - // Undefined behavior if length + index is bigger than 64 according to the spec, - // we'll warn and continue execution. - LOG_TRACE(Core, - "extrq at {} with length {} and index {} is bigger than 64, " - "undefined behavior", - fmt::ptr(code_address), length, index); - } - - lowQWordDst >>= index; - lowQWordDst &= mask; - - memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); - - Common::IncrementRip(ctx, instruction.length); - - return true; + mask = (1ULL << length) - 1; } - break; + + u64 index = (lowQWordSrc >> 8) & 0x3F; + if (length + index > 64) { + // Undefined behavior if length + index is bigger than 64 according to the spec, + // we'll warn and continue execution. + LOG_TRACE(Core, + "extrq at {} with length {} and index {} is bigger than 64, " + "undefined behavior", + fmt::ptr(code_address), length, index); + } + + lowQWordDst >>= index; + lowQWordDst &= mask; + + memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); + + Common::IncrementRip(ctx, 4); + + return true; } case ZYDIS_MNEMONIC_INSERTQ: { - bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && - operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE; - if (immediateForm) { - LOG_CRITICAL(Core, - "INSERTQ immediate form should have been patched at code address: {}", - fmt::ptr(code_address)); - return false; + const auto dst = Common::GetXmmPointer(ctx, dstIndex); + const auto src = Common::GetXmmPointer(ctx, srcIndex); + + u64 lowQWordSrc, highQWordSrc; + memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); + memcpy(&highQWordSrc, (u8*)src + 8, sizeof(highQWordSrc)); + + u64 lowQWordDst; + memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); + + u64 length = highQWordSrc & 0x3F; + u64 mask; + if (length == 0) { + length = 64; // for the check below + mask = 0xFFFF'FFFF'FFFF'FFFF; } else { - ASSERT_MSG(operands[2].type == ZYDIS_OPERAND_TYPE_UNUSED && - operands[3].type == ZYDIS_OPERAND_TYPE_UNUSED, - "operands 2 and 3 must be unused for register form."); - - ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && - operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER, - "operands 0 and 1 must be registers."); - - const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0; - const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0; - - const auto dst = Common::GetXmmPointer(ctx, dstIndex); - const auto src = Common::GetXmmPointer(ctx, srcIndex); - - u64 lowQWordSrc, highQWordSrc; - memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc)); - memcpy(&highQWordSrc, (u8*)src + 8, sizeof(highQWordSrc)); - - u64 lowQWordDst; - memcpy(&lowQWordDst, dst, sizeof(lowQWordDst)); - - u64 length = highQWordSrc & 0x3F; - u64 mask; - if (length == 0) { - length = 64; // for the check below - mask = 0xFFFF'FFFF'FFFF'FFFF; - } else { - mask = (1ULL << length) - 1; - } - - u64 index = (highQWordSrc >> 8) & 0x3F; - if (length + index > 64) { - // Undefined behavior if length + index is bigger than 64 according to the spec, - // we'll warn and continue execution. - LOG_TRACE(Core, - "insertq at {} with length {} and index {} is bigger than 64, " - "undefined behavior", - fmt::ptr(code_address), length, index); - } - - lowQWordSrc &= mask; - lowQWordDst &= ~(mask << index); - lowQWordDst |= lowQWordSrc << index; - - memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); - - Common::IncrementRip(ctx, instruction.length); - - return true; + mask = (1ULL << length) - 1; } - break; + + u64 index = (highQWordSrc >> 8) & 0x3F; + if (length + index > 64) { + // Undefined behavior if length + index is bigger than 64 according to the spec, + // we'll warn and continue execution. + LOG_TRACE(Core, + "insertq at {} with length {} and index {} is bigger than 64, " + "undefined behavior", + fmt::ptr(code_address), length, index); + } + + lowQWordSrc &= mask; + lowQWordDst &= ~(mask << index); + lowQWordDst |= lowQWordSrc << index; + + memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); + + Common::IncrementRip(ctx, 4); + + return true; } default: { - LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}", - fmt::ptr(code_address), ZydisMnemonicGetString(instruction.mnemonic)); - return false; + UNREACHABLE(); } } @@ -695,9 +695,22 @@ static bool PatchesAccessViolationHandler(void* context, void* /* fault_address static bool PatchesIllegalInstructionHandler(void* context) { void* code_address = Common::GetRip(context); - if (!TryPatchJit(code_address)) { + if (Is4ByteExtrqOrInsertq(code_address)) { + // The instruction is not big enough for a relative jump, don't try to patch it and pass it + // to our illegal instruction interpreter directly return TryExecuteIllegalInstruction(context, code_address); + } else { + if (!TryPatchJit(code_address)) { + ZydisDecodedInstruction instruction; + ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; + const auto status = + Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); + LOG_ERROR(Core, "Failed to patch address {:x} -- mnemonic: {}", (u64)code_address, + ZYAN_SUCCESS(status) ? ZydisMnemonicGetString(instruction.mnemonic) + : "Failed to decode"); + } } + return true; } diff --git a/src/core/devices/random_device.cpp b/src/core/devices/random_device.cpp index 50934e3b8..b2754fe58 100644 --- a/src/core/devices/random_device.cpp +++ b/src/core/devices/random_device.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "common/logging/log.h" #include "random_device.h" diff --git a/src/core/devices/srandom_device.cpp b/src/core/devices/srandom_device.cpp index ab78ddbe2..5e51b1c39 100644 --- a/src/core/devices/srandom_device.cpp +++ b/src/core/devices/srandom_device.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "common/logging/log.h" #include "srandom_device.h" diff --git a/src/core/devices/urandom_device.cpp b/src/core/devices/urandom_device.cpp index c001aab83..7318a6ff7 100644 --- a/src/core/devices/urandom_device.cpp +++ b/src/core/devices/urandom_device.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include "common/logging/log.h" #include "urandom_device.h" diff --git a/src/core/devtools/widget/frame_dump.cpp b/src/core/devtools/widget/frame_dump.cpp index 646ccb6d6..2445bdcb5 100644 --- a/src/core/devtools/widget/frame_dump.cpp +++ b/src/core/devtools/widget/frame_dump.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include #include diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 25ac4921c..f2f40e0e3 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -505,9 +505,10 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da u32 flags) { LOG_TRACE(Lib_GnmDriver, "called"); - if ((!sceKernelIsNeoMode() || !UseNeoCompatSequences) && !cmdbuf && (size == 16) && - (shader_stage < ShaderStages::Max) && (vertex_sgpr_offset < 0x10u) && - (instance_sgpr_offset < 0x10u)) { + if ((!sceKernelIsNeoMode() || !UseNeoCompatSequences) && cmdbuf && (size == 16) && + (vertex_sgpr_offset < 0x10u) && (instance_sgpr_offset < 0x10u) && + (shader_stage == ShaderStages::Vs || shader_stage == ShaderStages::Es || + shader_stage == ShaderStages::Ls)) { cmdbuf = WriteHeader(cmdbuf, 2); cmdbuf = WriteBody(cmdbuf, 0u); @@ -535,10 +536,33 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da return -1; } -int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - UNREACHABLE(); - return ORBIS_OK; +int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(u32* cmdbuf, u32 size, u32 data_offset, u32 max_count, + u32 shader_stage, u32 vertex_sgpr_offset, + u32 instance_sgpr_offset, u32 flags) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (cmdbuf && (size == 11) && (vertex_sgpr_offset < 0x10u) && (instance_sgpr_offset < 0x10u) && + (shader_stage == ShaderStages::Vs || shader_stage == ShaderStages::Es || + shader_stage == ShaderStages::Ls)) { + + const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable; + cmdbuf = WriteHeader( + cmdbuf, 6, PM4ShaderType::ShaderGraphics, predicate); + + const auto sgpr_offset = indirect_sgpr_offsets[shader_stage]; + + cmdbuf[0] = data_offset; + cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset; + cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset; + cmdbuf[3] = max_count; + cmdbuf[4] = sizeof(DrawIndexedIndirectArgs); + cmdbuf[5] = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0; + + cmdbuf += 6; + WriteTrailingNop<3>(cmdbuf); + return ORBIS_OK; + } + return -1; } int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced() { diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 94d06c85f..a3d4968d3 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -51,7 +51,9 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da u32 max_count, u64 count_addr, u32 shader_stage, u32 vertex_sgpr_offset, u32 instance_sgpr_offset, u32 flags); -int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(); +int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(u32* cmdbuf, u32 size, u32 data_offset, u32 max_count, + u32 shader_stage, u32 vertex_sgpr_offset, + u32 instance_sgpr_offset, u32 flags); int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced(); s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset, u32 index_count, u32 flags); diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index bcfa15a62..ad372325c 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -67,10 +67,16 @@ s32 PS4_SYSV_ABI open(const char* raw_path, s32 flags, u16 mode) { bool write = (flags & 0x3) == ORBIS_KERNEL_O_WRONLY; bool rdwr = (flags & 0x3) == ORBIS_KERNEL_O_RDWR; + if (!read && !write && !rdwr) { + // Start by checking for invalid flags. + *__Error() = POSIX_EINVAL; + return -1; + } + bool nonblock = (flags & ORBIS_KERNEL_O_NONBLOCK) != 0; bool append = (flags & ORBIS_KERNEL_O_APPEND) != 0; - bool fsync = (flags & ORBIS_KERNEL_O_FSYNC) != 0; - bool sync = (flags & ORBIS_KERNEL_O_SYNC) != 0; + // Flags fsync and sync behave the same + bool sync = (flags & ORBIS_KERNEL_O_SYNC) != 0 || (flags & ORBIS_KERNEL_O_FSYNC) != 0; bool create = (flags & ORBIS_KERNEL_O_CREAT) != 0; bool truncate = (flags & ORBIS_KERNEL_O_TRUNC) != 0; bool excl = (flags & ORBIS_KERNEL_O_EXCL) != 0; @@ -78,6 +84,10 @@ s32 PS4_SYSV_ABI open(const char* raw_path, s32 flags, u16 mode) { bool direct = (flags & ORBIS_KERNEL_O_DIRECT) != 0; bool directory = (flags & ORBIS_KERNEL_O_DIRECTORY) != 0; + if (sync || direct || dsync || nonblock) { + LOG_WARNING(Kernel_Fs, "flags {:#x} not fully handled", flags); + } + std::string_view path{raw_path}; u32 handle = h->CreateHandle(); auto* file = h->GetFile(handle); @@ -94,84 +104,127 @@ s32 PS4_SYSV_ABI open(const char* raw_path, s32 flags, u16 mode) { } } - if (directory) { - file->type = Core::FileSys::FileType::Directory; - file->m_guest_name = path; - file->m_host_name = mnt->GetHostPath(file->m_guest_name); - if (!std::filesystem::is_directory(file->m_host_name)) { // directory doesn't exist + bool read_only = false; + file->m_guest_name = path; + file->m_host_name = mnt->GetHostPath(file->m_guest_name, &read_only); + bool exists = std::filesystem::exists(file->m_host_name); + s32 e = 0; + + if (create) { + if (excl && exists) { + // Error if file exists h->DeleteHandle(handle); - *__Error() = POSIX_ENOENT; + *__Error() = POSIX_EEXIST; + return -1; + } + + if (read_only) { + // Can't create files in a read only directory + h->DeleteHandle(handle); + *__Error() = POSIX_EROFS; + return -1; + } + // Create a file if it doesn't exist + Common::FS::IOFile out(file->m_host_name, Common::FS::FileAccessMode::Write); + } else if (!exists) { + // If we're not creating a file, and it doesn't exist, return ENOENT + h->DeleteHandle(handle); + *__Error() = POSIX_ENOENT; + return -1; + } + + if (std::filesystem::is_directory(file->m_host_name) || directory) { + // Directories can be opened even if the directory flag isn't set. + // In these cases, error behavior is identical to the directory code path. + directory = true; + } + + if (directory) { + if (!std::filesystem::is_directory(file->m_host_name)) { + // If the opened file is not a directory, return ENOTDIR. + // This will trigger when create & directory is specified, this is expected. + h->DeleteHandle(handle); + *__Error() = POSIX_ENOTDIR; + return -1; + } + + file->type = Core::FileSys::FileType::Directory; + + // Populate directory contents + mnt->IterateDirectory(file->m_guest_name, + [&file](const auto& ent_path, const auto ent_is_file) { + auto& dir_entry = file->dirents.emplace_back(); + dir_entry.name = ent_path.filename().string(); + dir_entry.isFile = ent_is_file; + }); + file->dirents_index = 0; + + if (read) { + e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Read); + } else if (write || rdwr) { + // Cannot open directories with any type of write access + h->DeleteHandle(handle); + *__Error() = POSIX_EISDIR; + return -1; + } + + if (e == EACCES) { + // Hack to bypass some platform limitations, ignore the error and continue as normal. + LOG_WARNING(Kernel_Fs, "Opening directories is not fully supported on this platform"); + e = 0; + } + + if (truncate) { + // Cannot open directories with truncate + h->DeleteHandle(handle); + *__Error() = POSIX_EISDIR; return -1; - } else { - if (create) { - return handle; // dir already exists - } else { - mnt->IterateDirectory(file->m_guest_name, - [&file](const auto& ent_path, const auto ent_is_file) { - auto& dir_entry = file->dirents.emplace_back(); - dir_entry.name = ent_path.filename().string(); - dir_entry.isFile = ent_is_file; - }); - file->dirents_index = 0; - } } } else { - file->m_guest_name = path; - file->m_host_name = mnt->GetHostPath(file->m_guest_name); - bool exists = std::filesystem::exists(file->m_host_name); - int e = 0; + file->type = Core::FileSys::FileType::Regular; - if (create) { - if (excl && exists) { - // Error if file exists - h->DeleteHandle(handle); - *__Error() = POSIX_EEXIST; - return -1; - } - // Create file if it doesn't exist - Common::FS::IOFile out(file->m_host_name, Common::FS::FileAccessMode::Write); - } else if (!exists) { - // File to open doesn't exist, return ENOENT + if (truncate && read_only) { + // Can't open files with truncate flag in a read only directory h->DeleteHandle(handle); - *__Error() = POSIX_ENOENT; + *__Error() = POSIX_EROFS; return -1; + } else if (truncate) { + // Open the file as read-write so we can truncate regardless of flags. + // Since open starts by closing the file, this won't interfere with later open calls. + e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::ReadWrite); + if (e == 0) { + // If the file was opened successfully, reduce size to 0 + file->f.SetSize(0); + } } if (read) { // Read only e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Read); + } else if (read_only) { + // Can't open files with write/read-write access in a read only directory + h->DeleteHandle(handle); + *__Error() = POSIX_EROFS; + return -1; + } else if (append) { + // Append can be specified with rdwr or write, but we treat it as a separate mode. + e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Append); } else if (write) { // Write only - if (append) { - e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Append); - } else { - e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Write); - } + e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Write); } else if (rdwr) { // Read and write - if (append) { - e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Append); - } else { - e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::ReadWrite); - } - } else { - // Invalid flags - *__Error() = POSIX_EINVAL; - return -1; - } - - if (truncate && e == 0) { - // If the file was opened successfully and truncate was enabled, reduce size to 0 - file->f.SetSize(0); - } - - if (e != 0) { - // Open failed in platform-specific code, errno needs to be converted. - h->DeleteHandle(handle); - SetPosixErrno(e); - return -1; + e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::ReadWrite); } } + + if (e != 0) { + // Open failed in platform-specific code, errno needs to be converted. + h->DeleteHandle(handle); + SetPosixErrno(e); + return -1; + } + file->is_opened = true; return handle; } @@ -365,10 +418,10 @@ s64 PS4_SYSV_ABI posix_lseek(s32 fd, s64 offset, s32 whence) { origin = Common::FS::SeekOrigin::CurrentPosition; } else if (whence == 2) { origin = Common::FS::SeekOrigin::End; - } else if (whence == 3) { - origin = Common::FS::SeekOrigin::SeekHole; - } else if (whence == 4) { - origin = Common::FS::SeekOrigin::SeekData; + } else if (whence == 3 || whence == 4) { + // whence parameter belongs to an unsupported POSIX extension + *__Error() = POSIX_ENOTTY; + return -1; } else { // whence parameter is invalid *__Error() = POSIX_EINVAL; @@ -486,13 +539,13 @@ s32 PS4_SYSV_ABI posix_rmdir(const char* path) { const std::filesystem::path dir_name = mnt->GetHostPath(path, &ro); - if (dir_name.empty() || !std::filesystem::is_directory(dir_name)) { - *__Error() = POSIX_ENOTDIR; + if (ro) { + *__Error() = POSIX_EROFS; return -1; } - if (ro) { - *__Error() = POSIX_EROFS; + if (dir_name.empty() || !std::filesystem::is_directory(dir_name)) { + *__Error() = POSIX_ENOTDIR; return -1; } @@ -523,8 +576,7 @@ s32 PS4_SYSV_ABI sceKernelRmdir(const char* path) { s32 PS4_SYSV_ABI posix_stat(const char* path, OrbisKernelStat* sb) { LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path); auto* mnt = Common::Singleton::Instance(); - bool ro = false; - const auto path_name = mnt->GetHostPath(path, &ro); + const auto path_name = mnt->GetHostPath(path); std::memset(sb, 0, sizeof(OrbisKernelStat)); const bool is_dir = std::filesystem::is_directory(path_name); const bool is_file = std::filesystem::is_regular_file(path_name); @@ -545,9 +597,6 @@ s32 PS4_SYSV_ABI posix_stat(const char* path, OrbisKernelStat* sb) { sb->st_blocks = (sb->st_size + 511) / 512; // TODO incomplete } - if (ro) { - sb->st_mode &= ~0000555u; - } return ORBIS_OK; } diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 8a0c91479..495ddc52f 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -126,9 +126,6 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info, size_t infoSize) { LOG_INFO(Kernel_Vmm, "called addr = {}, flags = {:#x}", fmt::ptr(addr), flags); - if (!addr) { - return ORBIS_KERNEL_ERROR_EACCES; - } auto* memory = Core::Memory::Instance(); return memory->VirtualQuery(std::bit_cast(addr), flags, info); } @@ -136,7 +133,6 @@ s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtual s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment) { LOG_INFO(Kernel_Vmm, "addr = {}, len = {:#x}, flags = {:#x}, alignment = {:#x}", fmt::ptr(*addr), len, flags, alignment); - if (addr == nullptr) { LOG_ERROR(Kernel_Vmm, "Address is invalid!"); return ORBIS_KERNEL_ERROR_EINVAL; @@ -155,9 +151,12 @@ s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u auto* memory = Core::Memory::Instance(); const VAddr in_addr = reinterpret_cast(*addr); const auto map_flags = static_cast(flags); - memory->Reserve(addr, in_addr, len, map_flags, alignment); - return ORBIS_OK; + s32 result = memory->Reserve(addr, in_addr, len, map_flags, alignment); + if (result == 0) { + LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr)); + } + return result; } int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, int flags, @@ -172,10 +171,12 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i LOG_ERROR(Kernel_Vmm, "Map size is either zero or not 16KB aligned!"); return ORBIS_KERNEL_ERROR_EINVAL; } + if (!Common::Is16KBAligned(directMemoryStart)) { LOG_ERROR(Kernel_Vmm, "Start address is not 16KB aligned!"); return ORBIS_KERNEL_ERROR_EINVAL; } + if (alignment != 0) { if ((!std::has_single_bit(alignment) && !Common::Is16KBAligned(alignment))) { LOG_ERROR(Kernel_Vmm, "Alignment value is invalid!"); @@ -183,14 +184,19 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i } } + if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) { + LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; + } + const VAddr in_addr = reinterpret_cast(*addr); const auto mem_prot = static_cast(prot); const auto map_flags = static_cast(flags); auto* memory = Core::Memory::Instance(); const auto ret = - memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, "", false, - directMemoryStart, alignment); + memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, name, + false, directMemoryStart, alignment); LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr)); return ret; @@ -199,7 +205,8 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i int PS4_SYSV_ABI sceKernelMapDirectMemory(void** addr, u64 len, int prot, int flags, s64 directMemoryStart, u64 alignment) { LOG_INFO(Kernel_Vmm, "called, redirected to sceKernelMapNamedDirectMemory"); - return sceKernelMapNamedDirectMemory(addr, len, prot, flags, directMemoryStart, alignment, ""); + return sceKernelMapNamedDirectMemory(addr, len, prot, flags, directMemoryStart, alignment, + "anon"); } s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t len, int prot, @@ -210,17 +217,16 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t return ORBIS_KERNEL_ERROR_EINVAL; } - static constexpr size_t MaxNameSize = 32; - if (std::strlen(name) > MaxNameSize) { - LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); - return ORBIS_KERNEL_ERROR_ENAMETOOLONG; - } - if (name == nullptr) { LOG_ERROR(Kernel_Vmm, "name is invalid!"); return ORBIS_KERNEL_ERROR_EFAULT; } + if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) { + LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; + } + const VAddr in_addr = reinterpret_cast(*addr_in_out); const auto mem_prot = static_cast(prot); const auto map_flags = static_cast(flags); @@ -236,7 +242,7 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot, int flags) { - return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, ""); + return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon"); } int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) { @@ -304,7 +310,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn case MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_DIRECT: { result = sceKernelMapNamedDirectMemory(&entries[i].start, entries[i].length, entries[i].protection, flags, - static_cast(entries[i].offset), 0, ""); + static_cast(entries[i].offset), 0, "anon"); LOG_INFO(Kernel_Vmm, "entry = {}, operation = {}, len = {:#x}, offset = {:#x}, type = {}, " "result = {}", @@ -326,7 +332,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn } case MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_FLEXIBLE: { result = sceKernelMapNamedFlexibleMemory(&entries[i].start, entries[i].length, - entries[i].protection, flags, ""); + entries[i].protection, flags, "anon"); LOG_INFO(Kernel_Vmm, "entry = {}, operation = {}, len = {:#x}, type = {}, " "result = {}", @@ -356,16 +362,16 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn } s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, const char* name) { - static constexpr size_t MaxNameSize = 32; - if (std::strlen(name) > MaxNameSize) { - LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); - return ORBIS_KERNEL_ERROR_ENAMETOOLONG; - } - if (name == nullptr) { LOG_ERROR(Kernel_Vmm, "name is invalid!"); return ORBIS_KERNEL_ERROR_EFAULT; } + + if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) { + LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!"); + return ORBIS_KERNEL_ERROR_ENAMETOOLONG; + } + auto* memory = Core::Memory::Instance(); memory->NameVirtualRange(std::bit_cast(addr), len, name); return ORBIS_OK; diff --git a/src/core/libraries/kernel/memory.h b/src/core/libraries/kernel/memory.h index 400b6c3fc..6acb559d1 100644 --- a/src/core/libraries/kernel/memory.h +++ b/src/core/libraries/kernel/memory.h @@ -47,6 +47,8 @@ enum MemoryOpTypes : u32 { ORBIS_KERNEL_MAP_OP_TYPE_PROTECT = 4 }; +constexpr u32 ORBIS_KERNEL_MAXIMUM_NAME_LENGTH = 32; + struct OrbisQueryInfo { uintptr_t start; uintptr_t end; @@ -59,14 +61,12 @@ struct OrbisVirtualQueryInfo { size_t offset; s32 protection; s32 memory_type; - union { - BitField<0, 1, u32> is_flexible; - BitField<1, 1, u32> is_direct; - BitField<2, 1, u32> is_stack; - BitField<3, 1, u32> is_pooled; - BitField<4, 1, u32> is_committed; - }; - std::array name; + u32 is_flexible : 1; + u32 is_direct : 1; + u32 is_stack : 1; + u32 is_pooled : 1; + u32 is_committed : 1; + char name[ORBIS_KERNEL_MAXIMUM_NAME_LENGTH]; }; struct OrbisKernelBatchMapEntry { diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index c4127ecf2..e791e74bf 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -289,7 +289,12 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt /* Create thread */ new_thread->native_thr = Core::NativeThread(); int ret = new_thread->native_thr.Create(RunThread, new_thread, &new_thread->attr); + ASSERT_MSG(ret == 0, "Failed to create thread with error {}", ret); + + if (attr != nullptr && *attr != nullptr && (*attr)->cpuset != nullptr) { + new_thread->SetAffinity((*attr)->cpuset); + } if (ret) { *thread = nullptr; } @@ -521,6 +526,69 @@ int PS4_SYSV_ABI posix_pthread_setcancelstate(PthreadCancelState state, return 0; } +int Pthread::SetAffinity(const Cpuset* cpuset) { + const auto processor_count = std::thread::hardware_concurrency(); + if (processor_count < 8) { + return 0; + } + if (cpuset == nullptr) { + return POSIX_EINVAL; + } + + u64 mask = cpuset->bits; + + uintptr_t handle = native_thr.GetHandle(); + if (handle == 0) { + return POSIX_ESRCH; + } + + // We don't use this currently because some games gets performance problems + // when applying affinity even on strong hardware + /* + #ifdef _WIN64 + DWORD_PTR affinity_mask = static_cast(mask); + if (!SetThreadAffinityMask(reinterpret_cast(handle), affinity_mask)) { + return POSIX_EINVAL; + } + + #elif defined(__linux__) + cpu_set_t cpu_set; + CPU_ZERO(&cpu_set); + + u64 mask = cpuset->bits; + for (int cpu = 0; cpu < std::min(64, CPU_SETSIZE); ++cpu) { + if (mask & (1ULL << cpu)) { + CPU_SET(cpu, &cpu_set); + } + } + + int result = + pthread_setaffinity_np(static_cast(handle), sizeof(cpu_set_t), &cpu_set); + if (result != 0) { + return POSIX_EINVAL; + } + #endif + */ + return 0; +} + +int PS4_SYSV_ABI posix_pthread_setaffinity_np(PthreadT thread, size_t cpusetsize, + const Cpuset* cpusetp) { + if (thread == nullptr || cpusetp == nullptr) { + return POSIX_EINVAL; + } + thread->attr.cpusetsize = cpusetsize; + return thread->SetAffinity(cpusetp); +} + +int PS4_SYSV_ABI scePthreadSetaffinity(PthreadT thread, const Cpuset mask) { + int result = posix_pthread_setaffinity_np(thread, 0x10, &mask); + if (result != 0) { + return ErrnoToSceKernelError(result); + } + return 0; +} + void RegisterThread(Core::Loader::SymbolsResolver* sym) { // Posix LIB_FUNCTION("Z4QosVuAsA0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_once); @@ -544,6 +612,7 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("Z4QosVuAsA0", "libkernel", 1, "libkernel", 1, 1, posix_pthread_once); LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", 1, 1, posix_pthread_self); LIB_FUNCTION("OxhIB8LB-PQ", "libkernel", 1, "libkernel", 1, 1, posix_pthread_create); + LIB_FUNCTION("5KWrg7-ZqvE", "libkernel", 1, "libkernel", 1, 1, posix_pthread_setaffinity_np); // Orbis LIB_FUNCTION("14bOACANTBo", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_once)); @@ -566,6 +635,7 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("W0Hpm2X0uPE", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_setprio)); LIB_FUNCTION("rNhWz+lvOMU", "libkernel", 1, "libkernel", 1, 1, _sceKernelSetThreadDtors); LIB_FUNCTION("6XG4B33N09g", "libkernel", 1, "libkernel", 1, 1, sched_yield); + LIB_FUNCTION("bt3CTBKmGyI", "libkernel", 1, "libkernel", 1, 1, scePthreadSetaffinity) } } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/threads/pthread.h b/src/core/libraries/kernel/threads/pthread.h index 089156776..09eed11b8 100644 --- a/src/core/libraries/kernel/threads/pthread.h +++ b/src/core/libraries/kernel/threads/pthread.h @@ -332,6 +332,8 @@ struct Pthread { return true; } } + + int SetAffinity(const Cpuset* cpuset); }; using PthreadT = Pthread*; diff --git a/src/core/libraries/libc_internal/printf.h b/src/core/libraries/libc_internal/printf.h index fe63481a0..9c22e922c 100644 --- a/src/core/libraries/libc_internal/printf.h +++ b/src/core/libraries/libc_internal/printf.h @@ -56,7 +56,6 @@ #include #include -#include #include #include #include diff --git a/src/core/libraries/libs.cpp b/src/core/libraries/libs.cpp index 3f5baf640..3826ff793 100644 --- a/src/core/libraries/libs.cpp +++ b/src/core/libraries/libs.cpp @@ -45,6 +45,7 @@ #include "core/libraries/save_data/savedata.h" #include "core/libraries/screenshot/screenshot.h" #include "core/libraries/share_play/shareplay.h" +#include "core/libraries/signin_dialog/signindialog.h" #include "core/libraries/system/commondialog.h" #include "core/libraries/system/msgdialog.h" #include "core/libraries/system/posix.h" @@ -120,6 +121,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) { Libraries::Hmd::RegisterlibSceHmd(sym); Libraries::DiscMap::RegisterlibSceDiscMap(sym); Libraries::Ulobjmgr::RegisterlibSceUlobjmgr(sym); + Libraries::SigninDialog::RegisterlibSceSigninDialog(sym); } } // namespace Libraries diff --git a/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp b/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp index a6ca8744d..edb5caa07 100644 --- a/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp +++ b/src/core/libraries/save_data/dialog/savedatadialog_ui.cpp @@ -49,13 +49,11 @@ void SaveDialogResult::CopyTo(OrbisSaveDataDialogResult& result) const { result.mode = this->mode; result.result = this->result; result.buttonId = this->button_id; - if (mode == SaveDataDialogMode::LIST || ElfInfo::Instance().FirmwareVer() >= ElfInfo::FW_45) { - if (result.dirName != nullptr) { - result.dirName->data.FromString(this->dir_name); - } - if (result.param != nullptr && this->param.GetString(SaveParams::MAINTITLE).has_value()) { - result.param->FromSFO(this->param); - } + if (result.dirName != nullptr) { + result.dirName->data.FromString(this->dir_name); + } + if (result.param != nullptr && this->param.GetString(SaveParams::MAINTITLE).has_value()) { + result.param->FromSFO(this->param); } result.userData = this->user_data; } @@ -345,12 +343,15 @@ SaveDialogUi::SaveDialogUi(SaveDialogUi&& other) noexcept } } -SaveDialogUi& SaveDialogUi::operator=(SaveDialogUi other) { +SaveDialogUi& SaveDialogUi::operator=(SaveDialogUi&& other) noexcept { std::scoped_lock lock(draw_mutex, other.draw_mutex); using std::swap; - swap(state, other.state); - swap(status, other.status); - swap(result, other.result); + state = other.state; + other.state = nullptr; + status = other.status; + other.status = nullptr; + result = other.result; + other.result = nullptr; if (status && *status == Status::RUNNING) { first_render = true; AddLayer(this); diff --git a/src/core/libraries/save_data/dialog/savedatadialog_ui.h b/src/core/libraries/save_data/dialog/savedatadialog_ui.h index aa67e1f5f..dc97268f4 100644 --- a/src/core/libraries/save_data/dialog/savedatadialog_ui.h +++ b/src/core/libraries/save_data/dialog/savedatadialog_ui.h @@ -300,7 +300,8 @@ public: ~SaveDialogUi() override; SaveDialogUi(const SaveDialogUi& other) = delete; SaveDialogUi(SaveDialogUi&& other) noexcept; - SaveDialogUi& operator=(SaveDialogUi other); + SaveDialogUi& operator=(SaveDialogUi& other) = delete; + SaveDialogUi& operator=(SaveDialogUi&& other) noexcept; void Finish(ButtonId buttonId, CommonDialog::Result r = CommonDialog::Result::OK); diff --git a/src/core/libraries/signin_dialog/signindialog.cpp b/src/core/libraries/signin_dialog/signindialog.cpp new file mode 100644 index 000000000..0e4eb63a2 --- /dev/null +++ b/src/core/libraries/signin_dialog/signindialog.cpp @@ -0,0 +1,64 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +// Generated By moduleGenerator +#include "common/logging/log.h" +#include "core/libraries/error_codes.h" +#include "core/libraries/libs.h" +#include "signindialog.h" + +namespace Libraries::SigninDialog { + +s32 PS4_SYSV_ABI sceSigninDialogInitialize() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceSigninDialogOpen() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +Status PS4_SYSV_ABI sceSigninDialogGetStatus() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called, return 'finished' status"); + return Status::FINISHED; +} + +Status PS4_SYSV_ABI sceSigninDialogUpdateStatus() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called, return 'finished' status"); + return Status::FINISHED; +} + +s32 PS4_SYSV_ABI sceSigninDialogGetResult() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceSigninDialogClose() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +s32 PS4_SYSV_ABI sceSigninDialogTerminate() { + LOG_ERROR(Lib_SigninDialog, "(STUBBED) called"); + return ORBIS_OK; +} + +void RegisterlibSceSigninDialog(Core::Loader::SymbolsResolver* sym) { + LIB_FUNCTION("mlYGfmqE3fQ", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogInitialize); + LIB_FUNCTION("JlpJVoRWv7U", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogOpen); + LIB_FUNCTION("2m077aeC+PA", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogGetStatus); + LIB_FUNCTION("Bw31liTFT3A", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogUpdateStatus); + LIB_FUNCTION("nqG7rqnYw1U", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogGetResult); + LIB_FUNCTION("M3OkENHcyiU", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogClose); + LIB_FUNCTION("LXlmS6PvJdU", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1, + sceSigninDialogTerminate); +}; + +} // namespace Libraries::SigninDialog diff --git a/src/core/libraries/signin_dialog/signindialog.h b/src/core/libraries/signin_dialog/signindialog.h new file mode 100644 index 000000000..8726ad1f6 --- /dev/null +++ b/src/core/libraries/signin_dialog/signindialog.h @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once +#include "common/types.h" + +namespace Core::Loader { +class SymbolsResolver; +} + +enum class Status : u32 { + NONE = 0, + INITIALIZED = 1, + RUNNING = 2, + FINISHED = 3, +}; + +namespace Libraries::SigninDialog { + +s32 PS4_SYSV_ABI sceSigninDialogInitialize(); +s32 PS4_SYSV_ABI sceSigninDialogOpen(); +Status PS4_SYSV_ABI sceSigninDialogGetStatus(); +Status PS4_SYSV_ABI sceSigninDialogUpdateStatus(); +s32 PS4_SYSV_ABI sceSigninDialogGetResult(); +s32 PS4_SYSV_ABI sceSigninDialogClose(); +s32 PS4_SYSV_ABI sceSigninDialogTerminate(); + +void RegisterlibSceSigninDialog(Core::Loader::SymbolsResolver* sym); +} // namespace Libraries::SigninDialog diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 494ffa70c..9861e813a 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -75,7 +75,8 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) { // Clamp size to the remaining size of the current VMA. auto vma = FindVMA(virtual_addr); - ASSERT_MSG(vma != vma_map.end(), "Attempted to access invalid GPU address {:#x}", virtual_addr); + ASSERT_MSG(vma->second.Contains(virtual_addr, 0), + "Attempted to access invalid GPU address {:#x}", virtual_addr); u64 clamped_size = vma->second.base + vma->second.size - virtual_addr; ++vma; @@ -96,6 +97,8 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) { bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) { const VAddr virtual_addr = std::bit_cast(address); const auto& vma = FindVMA(virtual_addr)->second; + ASSERT_MSG(vma.Contains(virtual_addr, 0), + "Attempting to access out of bounds memory at address {:#x}", virtual_addr); if (vma.type != VMAType::Direct) { return false; } @@ -145,10 +148,12 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, auto mapping_end = mapping_start + size; // Find the first free, large enough dmem area in the range. - while ((!dmem_area->second.is_free || dmem_area->second.GetEnd() < mapping_end) && - dmem_area != dmem_map.end()) { + while (!dmem_area->second.is_free || dmem_area->second.GetEnd() < mapping_end) { // The current dmem_area isn't suitable, move to the next one. dmem_area++; + if (dmem_area == dmem_map.end()) { + break; + } // Update local variables based on the new dmem_area mapping_start = Common::AlignUp(dmem_area->second.base, alignment); @@ -172,7 +177,6 @@ void MemoryManager::Free(PAddr phys_addr, size_t size) { std::scoped_lock lk{mutex}; auto dmem_area = CarveDmemArea(phys_addr, size); - ASSERT(dmem_area != dmem_map.end() && dmem_area->second.size >= size); // Release any dmem mappings that reference this physical block. std::vector> remove_list; @@ -216,12 +220,18 @@ int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size, vma = FindVMA(mapped_addr)->second; } const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); + ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size, + "Memory region {:#x} to {:#x} is not large enough to reserve {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size, alignment); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } // Add virtual memory area @@ -229,7 +239,7 @@ int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size, auto& new_vma = new_vma_handle->second; new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); new_vma.prot = MemoryProt::NoAccess; - new_vma.name = ""; + new_vma.name = "anon"; new_vma.type = VMAType::PoolReserved; MergeAdjacent(vma_map, new_vma_handle); @@ -247,19 +257,25 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem // Fixed mapping means the virtual address must exactly match the provided one. if (True(flags & MemoryMapFlags::Fixed)) { - auto& vma = FindVMA(mapped_addr)->second; + auto vma = FindVMA(mapped_addr)->second; // If the VMA is mapped, unmap the region first. if (vma.IsMapped()) { UnmapMemoryImpl(mapped_addr, size); vma = FindVMA(mapped_addr)->second; } const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); + ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size, + "Memory region {:#x} to {:#x} is not large enough to reserve {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size, alignment); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } // Add virtual memory area @@ -267,7 +283,7 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem auto& new_vma = new_vma_handle->second; new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); new_vma.prot = MemoryProt::NoAccess; - new_vma.name = ""; + new_vma.name = "anon"; new_vma.type = VMAType::Reserved; MergeAdjacent(vma_map, new_vma_handle); @@ -288,7 +304,9 @@ int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot) // This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen. const auto& vma = FindVMA(mapped_addr)->second; const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size, + "Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); // Perform the mapping. void* out_addr = impl.Map(mapped_addr, size, alignment, -1, false); @@ -302,7 +320,10 @@ int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot) new_vma.is_exec = false; new_vma.phys_base = 0; - rasterizer->MapMemory(mapped_addr, size); + if (IsValidGpuMapping(mapped_addr, size)) { + rasterizer->MapMemory(mapped_addr, size); + } + return ORBIS_OK; } @@ -325,15 +346,34 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M // Fixed mapping means the virtual address must exactly match the provided one. if (True(flags & MemoryMapFlags::Fixed)) { - // This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen. - const auto& vma = FindVMA(mapped_addr)->second; - const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); + auto vma = FindVMA(mapped_addr)->second; + size_t remaining_size = vma.base + vma.size - mapped_addr; + // There's a possible edge case where we're mapping to a partially reserved range. + // To account for this, unmap any reserved areas within this mapping range first. + auto unmap_addr = mapped_addr; + auto unmap_size = size; + while (!vma.IsMapped() && unmap_addr < mapped_addr + size && remaining_size < size) { + auto unmapped = UnmapBytesFromEntry(unmap_addr, vma, unmap_size); + unmap_addr += unmapped; + unmap_size -= unmapped; + vma = FindVMA(unmap_addr)->second; + } + + // This should return SCE_KERNEL_ERROR_ENOMEM but rarely happens. + vma = FindVMA(mapped_addr)->second; + remaining_size = vma.base + vma.size - mapped_addr; + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size, + "Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size, alignment); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } // Perform the mapping. @@ -353,7 +393,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M if (type == VMAType::Flexible) { flexible_usage += size; } - rasterizer->MapMemory(mapped_addr, size); + + if (IsValidGpuMapping(mapped_addr, size)) { + rasterizer->MapMemory(mapped_addr, size); + } return ORBIS_OK; } @@ -366,12 +409,18 @@ int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, Mem // Find first free area to map the file. if (False(flags & MemoryMapFlags::Fixed)) { mapped_addr = SearchFree(mapped_addr, size_aligned, 1); + if (mapped_addr == -1) { + // No suitable memory areas to map to + return ORBIS_KERNEL_ERROR_ENOMEM; + } } if (True(flags & MemoryMapFlags::Fixed)) { const auto& vma = FindVMA(virtual_addr)->second; const size_t remaining_size = vma.base + vma.size - virtual_addr; - ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size, + "Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}", + vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size); } // Map the file. @@ -404,7 +453,9 @@ void MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) { const auto start_in_vma = virtual_addr - vma_base_addr; const auto type = vma_base.type; - rasterizer->UnmapMemory(virtual_addr, size); + if (IsValidGpuMapping(virtual_addr, size)) { + rasterizer->UnmapMemory(virtual_addr, size); + } // Mark region as free and attempt to coalesce it with neighbours. const auto new_it = CarveVMA(virtual_addr, size); @@ -444,7 +495,10 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma if (type == VMAType::Flexible) { flexible_usage -= adjusted_size; } - rasterizer->UnmapMemory(virtual_addr, adjusted_size); + + if (IsValidGpuMapping(virtual_addr, adjusted_size)) { + rasterizer->UnmapMemory(virtual_addr, adjusted_size); + } // Mark region as free and attempt to coalesce it with neighbours. const auto new_it = CarveVMA(virtual_addr, adjusted_size); @@ -471,6 +525,8 @@ s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, u64 size) { do { auto it = FindVMA(virtual_addr + unmapped_bytes); auto& vma_base = it->second; + ASSERT_MSG(vma_base.Contains(virtual_addr + unmapped_bytes, 0), + "Address {:#x} is out of bounds", virtual_addr + unmapped_bytes); auto unmapped = UnmapBytesFromEntry(virtual_addr + unmapped_bytes, vma_base, size - unmapped_bytes); ASSERT_MSG(unmapped > 0, "Failed to unmap memory, progress is impossible"); @@ -485,7 +541,10 @@ int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr const auto it = FindVMA(addr); const auto& vma = it->second; - ASSERT_MSG(vma.type != VMAType::Free, "Provided address is not mapped"); + if (!vma.Contains(addr, 0) || vma.IsFree()) { + LOG_ERROR(Kernel_Vmm, "Address {:#x} is not mapped", addr); + return ORBIS_KERNEL_ERROR_EACCES; + } if (start != nullptr) { *start = reinterpret_cast(vma.base); @@ -555,6 +614,8 @@ s32 MemoryManager::Protect(VAddr addr, size_t size, MemoryProt prot) { do { auto it = FindVMA(addr + protected_bytes); auto& vma_base = it->second; + ASSERT_MSG(vma_base.Contains(addr + protected_bytes, 0), "Address {:#x} is out of bounds", + addr + protected_bytes); auto result = 0; result = ProtectBytes(addr + protected_bytes, vma_base, size - protected_bytes, prot); if (result < 0) { @@ -571,8 +632,16 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, ::Libraries::Kernel::OrbisVirtualQueryInfo* info) { std::scoped_lock lk{mutex}; - auto it = FindVMA(addr); - if (it->second.type == VMAType::Free && flags == 1) { + // FindVMA on addresses before the vma_map return garbage data. + auto query_addr = + addr < impl.SystemManagedVirtualBase() ? impl.SystemManagedVirtualBase() : addr; + if (addr < query_addr && flags == 0) { + LOG_WARNING(Kernel_Vmm, "VirtualQuery on free memory region"); + return ORBIS_KERNEL_ERROR_EACCES; + } + auto it = FindVMA(query_addr); + + while (it->second.type == VMAType::Free && flags == 1 && it != --vma_map.end()) { ++it; } if (it->second.type == VMAType::Free) { @@ -585,15 +654,17 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, info->end = vma.base + vma.size; info->offset = vma.phys_base; info->protection = static_cast(vma.prot); - info->is_flexible.Assign(vma.type == VMAType::Flexible); - info->is_direct.Assign(vma.type == VMAType::Direct); - info->is_stack.Assign(vma.type == VMAType::Stack); - info->is_pooled.Assign(vma.type == VMAType::PoolReserved || vma.type == VMAType::Pooled); - info->is_committed.Assign(vma.IsMapped()); - vma.name.copy(info->name.data(), std::min(info->name.size(), vma.name.size())); + info->is_flexible = vma.type == VMAType::Flexible ? 1 : 0; + info->is_direct = vma.type == VMAType::Direct ? 1 : 0; + info->is_stack = vma.type == VMAType::Stack ? 1 : 0; + info->is_pooled = vma.type == VMAType::PoolReserved || vma.type == VMAType::Pooled ? 1 : 0; + info->is_committed = vma.IsMapped() ? 1 : 0; + + strncpy(info->name, vma.name.data(), ::Libraries::Kernel::ORBIS_KERNEL_MAXIMUM_NAME_LENGTH); + if (vma.type == VMAType::Direct) { const auto dmem_it = FindDmemArea(vma.phys_base); - ASSERT(dmem_it != dmem_map.end()); + ASSERT_MSG(vma.phys_base <= dmem_it->second.GetEnd(), "vma.phys_base is not in dmem_map!"); info->memory_type = dmem_it->second.memory_type; } else { info->memory_type = ::Libraries::Kernel::SCE_KERNEL_WB_ONION; @@ -607,11 +678,11 @@ int MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next, std::scoped_lock lk{mutex}; auto dmem_area = FindDmemArea(addr); - while (dmem_area != dmem_map.end() && dmem_area->second.is_free && find_next) { + while (dmem_area != --dmem_map.end() && dmem_area->second.is_free && find_next) { dmem_area++; } - if (dmem_area == dmem_map.end() || dmem_area->second.is_free) { + if (dmem_area->second.is_free) { LOG_ERROR(Core, "Unable to find allocated direct memory region to query!"); return ORBIS_KERNEL_ERROR_EACCES; } @@ -691,36 +762,56 @@ VAddr MemoryManager::SearchFree(VAddr virtual_addr, size_t size, u32 alignment) virtual_addr = min_search_address; } + // If the requested address is beyond the maximum our code can handle, throw an assert + auto max_search_address = impl.UserVirtualBase() + impl.UserVirtualSize(); + ASSERT_MSG(virtual_addr <= max_search_address, "Input address {:#x} is out of bounds", + virtual_addr); + auto it = FindVMA(virtual_addr); - ASSERT_MSG(it != vma_map.end(), "Specified mapping address was not found!"); // If the VMA is free and contains the requested mapping we are done. if (it->second.IsFree() && it->second.Contains(virtual_addr, size)) { return virtual_addr; } + // Search for the first free VMA that fits our mapping. - const auto is_suitable = [&] { + while (it != vma_map.end()) { if (!it->second.IsFree()) { - return false; + it++; + continue; } + const auto& vma = it->second; virtual_addr = Common::AlignUp(vma.base, alignment); // Sometimes the alignment itself might be larger than the VMA. if (virtual_addr > vma.base + vma.size) { - return false; + it++; + continue; } + + // Make sure the address is within our defined bounds + if (virtual_addr >= max_search_address) { + // There are no free mappings within our safely usable address space. + break; + } + + // If there's enough space in the VMA, return the address. const size_t remaining_size = vma.base + vma.size - virtual_addr; - return remaining_size >= size; - }; - while (!is_suitable()) { - ++it; + if (remaining_size >= size) { + return virtual_addr; + } + it++; } - return virtual_addr; + + // Couldn't find a suitable VMA, return an error. + LOG_ERROR(Kernel_Vmm, "Couldn't find a free mapping for address {:#x}, size {:#x}", + virtual_addr, size); + return -1; } MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size) { auto vma_handle = FindVMA(virtual_addr); - ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map"); + ASSERT_MSG(vma_handle->second.Contains(virtual_addr, 0), "Virtual address not in vm_map"); const VirtualMemoryArea& vma = vma_handle->second; ASSERT_MSG(vma.base <= virtual_addr, "Adding a mapping to already mapped region"); @@ -749,7 +840,7 @@ MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size MemoryManager::DMemHandle MemoryManager::CarveDmemArea(PAddr addr, size_t size) { auto dmem_handle = FindDmemArea(addr); - ASSERT_MSG(dmem_handle != dmem_map.end(), "Physical address not in dmem_map"); + ASSERT_MSG(addr <= dmem_handle->second.GetEnd(), "Physical address not in dmem_map"); const DirectMemoryArea& area = dmem_handle->second; ASSERT_MSG(area.base <= addr, "Adding an allocation to already allocated region"); @@ -804,7 +895,7 @@ int MemoryManager::GetDirectMemoryType(PAddr addr, int* directMemoryTypeOut, auto dmem_area = FindDmemArea(addr); - if (dmem_area == dmem_map.end() || dmem_area->second.is_free) { + if (addr > dmem_area->second.GetEnd() || dmem_area->second.is_free) { LOG_ERROR(Core, "Unable to find allocated direct memory region to check type!"); return ORBIS_KERNEL_ERROR_ENOENT; } diff --git a/src/core/memory.h b/src/core/memory.h index a6a55e288..3a204eb96 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -157,6 +157,12 @@ public: return impl.SystemReservedVirtualBase(); } + bool IsValidGpuMapping(VAddr virtual_addr, u64 size) { + // The PS4's GPU can only handle 40 bit addresses. + const VAddr max_gpu_address{0x10000000000}; + return virtual_addr + size < max_gpu_address; + } + bool IsValidAddress(const void* addr) const noexcept { const VAddr virtual_addr = reinterpret_cast(addr); const auto end_it = std::prev(vma_map.end()); @@ -186,7 +192,7 @@ public: int PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot); int MapMemory(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot, - MemoryMapFlags flags, VMAType type, std::string_view name = "", + MemoryMapFlags flags, VMAType type, std::string_view name = "anon", bool is_exec = false, PAddr phys_addr = -1, u64 alignment = 0); int MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot, diff --git a/src/core/module.cpp b/src/core/module.cpp index cbe44457c..f31bbed6c 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -19,8 +19,7 @@ namespace Core { using EntryFunc = PS4_SYSV_ABI int (*)(size_t args, const void* argp, void* param); -static u64 LoadOffset = CODE_BASE_OFFSET; -static constexpr u64 CODE_BASE_INCR = 0x010000000u; +static constexpr u64 ModuleLoadBase = 0x800000000; static u64 GetAlignedSize(const elf_program_header& phdr) { return (phdr.p_align != 0 ? (phdr.p_memsz + (phdr.p_align - 1)) & ~(phdr.p_align - 1) @@ -84,7 +83,7 @@ static std::string StringToNid(std::string_view symbol) { } Module::Module(Core::MemoryManager* memory_, const std::filesystem::path& file_, u32& max_tls_index) - : memory{memory_}, file{file_}, name{file.stem().string()} { + : memory{memory_}, file{file_}, name{file.filename().string()} { elf.Open(file); if (elf.IsElfFile()) { LoadModuleToMemory(max_tls_index); @@ -113,10 +112,8 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { // Map module segments (and possible TLS trampolines) void** out_addr = reinterpret_cast(&base_virtual_addr); - memory->MapMemory(out_addr, memory->SystemReservedVirtualBase() + LoadOffset, - aligned_base_size + TrampolineSize, MemoryProt::CpuReadWrite, - MemoryMapFlags::Fixed, VMAType::Code, name, true); - LoadOffset += CODE_BASE_INCR * (1 + aligned_base_size / CODE_BASE_INCR); + memory->MapMemory(out_addr, ModuleLoadBase, aligned_base_size + TrampolineSize, + MemoryProt::CpuReadWrite, MemoryMapFlags::NoFlags, VMAType::Code, name, true); LOG_INFO(Core_Linker, "Loading module {} to {}", name, fmt::ptr(*out_addr)); #ifdef ARCH_X86_64 @@ -229,7 +226,7 @@ void Module::LoadModuleToMemory(u32& max_tls_index) { LOG_INFO(Core_Linker, "program entry addr ..........: {:#018x}", entry_addr); if (MemoryPatcher::g_eboot_address == 0) { - if (name == "eboot") { + if (name == "eboot.bin") { MemoryPatcher::g_eboot_address = base_virtual_addr; MemoryPatcher::g_eboot_image_size = base_size; MemoryPatcher::OnGameLoaded(); diff --git a/src/core/tls.h b/src/core/tls.h index 6edd6a297..46ca8153b 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -5,6 +5,8 @@ #include "common/types.h" +void* memset(void* ptr, int value, size_t num); + namespace Xbyak { class CodeGenerator; } @@ -41,9 +43,18 @@ Tcb* GetTcbBase(); /// Makes sure TLS is initialized for the thread before entering guest. void EnsureThreadInitialized(); +template +__attribute__((optnone)) void ClearStack() { + volatile void* buf = alloca(size); + memset(const_cast(buf), 0, size); + buf = nullptr; +} + template ReturnType ExecuteGuest(PS4_SYSV_ABI ReturnType (*func)(FuncArgs...), CallArgs&&... args) { EnsureThreadInitialized(); + // clear stack to avoid trash from EnsureThreadInitialized + ClearStack<13_KB>(); return func(std::forward(args)...); } diff --git a/src/emulator.cpp b/src/emulator.cpp index ecc1cee4e..c945ff89a 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -10,7 +10,6 @@ #include "common/logging/log.h" #ifdef ENABLE_QT_GUI #include -#include "common/memory_patcher.h" #endif #include "common/assert.h" #ifdef ENABLE_DISCORD_RPC @@ -20,6 +19,7 @@ #include #endif #include "common/elf_info.h" +#include "common/memory_patcher.h" #include "common/ntapi.h" #include "common/path_util.h" #include "common/polyfill_thread.h" @@ -56,27 +56,6 @@ Emulator::Emulator() { WSADATA wsaData; WSAStartup(versionWanted, &wsaData); #endif - - // Create stdin/stdout/stderr - Common::Singleton::Instance()->CreateStdHandles(); - - // Defer until after logging is initialized. - memory = Core::Memory::Instance(); - controller = Common::Singleton::Instance(); - linker = Common::Singleton::Instance(); - - // Load renderdoc module. - VideoCore::LoadRenderDoc(); - - // Start the timer (Play Time) -#ifdef ENABLE_QT_GUI - start_time = std::chrono::steady_clock::now(); - const auto user_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); - QString filePath = QString::fromStdString((user_dir / "play_time.txt").string()); - QFile file(filePath); - ASSERT_MSG(file.open(QIODevice::ReadWrite | QIODevice::Text), - "Error opening or creating play_time.txt"); -#endif } Emulator::~Emulator() { @@ -100,58 +79,93 @@ void Emulator::Run(const std::filesystem::path& file, const std::vector::Instance(); - mnt->Mount(game_folder, "/app0"); + mnt->Mount(game_folder, "/app0", true); // Certain games may use /hostapp as well such as CUSA001100 - mnt->Mount(game_folder, "/hostapp"); + mnt->Mount(game_folder, "/hostapp", true); - auto& game_info = Common::ElfInfo::Instance(); + const auto param_sfo_path = mnt->GetHostPath("/app0/sce_sys/param.sfo"); + const auto param_sfo_exists = std::filesystem::exists(param_sfo_path); - // Loading param.sfo file if exists + // Load param.sfo details if it exists std::string id; std::string title; std::string app_version; u32 fw_version; Common::PSFAttributes psf_attributes{}; - - const auto param_sfo_path = mnt->GetHostPath("/app0/sce_sys/param.sfo"); - if (!std::filesystem::exists(param_sfo_path) || !Config::getSeparateLogFilesEnabled()) { - Common::Log::Initialize(); - Common::Log::Start(); - } - - if (std::filesystem::exists(param_sfo_path)) { + if (param_sfo_exists) { auto* param_sfo = Common::Singleton::Instance(); - const bool success = param_sfo->Open(param_sfo_path); - ASSERT_MSG(success, "Failed to open param.sfo"); + ASSERT_MSG(param_sfo->Open(param_sfo_path), "Failed to open param.sfo"); + const auto content_id = param_sfo->GetString("CONTENT_ID"); ASSERT_MSG(content_id.has_value(), "Failed to get CONTENT_ID"); + id = std::string(*content_id, 7, 9); - - if (Config::getSeparateLogFilesEnabled()) { - Common::Log::Initialize(id + ".log"); - Common::Log::Start(); + title = param_sfo->GetString("TITLE").value_or("Unknown title"); + fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000); + app_version = param_sfo->GetString("APP_VER").value_or("Unknown version"); + if (const auto raw_attributes = param_sfo->GetInteger("ATTRIBUTE")) { + psf_attributes.raw = *raw_attributes; } - LOG_INFO(Loader, "Starting shadps4 emulator v{} ", Common::g_version); - LOG_INFO(Loader, "Revision {}", Common::g_scm_rev); - LOG_INFO(Loader, "Branch {}", Common::g_scm_branch); - LOG_INFO(Loader, "Description {}", Common::g_scm_desc); - LOG_INFO(Loader, "Remote {}", Common::g_scm_remote_url); + } - LOG_INFO(Config, "General LogType: {}", Config::getLogType()); - LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole()); - LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu()); - LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders()); - LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv()); - LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId()); - LOG_INFO(Config, "Vulkan vkValidation: {}", Config::vkValidationEnabled()); - LOG_INFO(Config, "Vulkan vkValidationSync: {}", Config::vkValidationSyncEnabled()); - LOG_INFO(Config, "Vulkan vkValidationGpu: {}", Config::vkValidationGpuEnabled()); - LOG_INFO(Config, "Vulkan crashDiagnostics: {}", Config::getVkCrashDiagnosticEnabled()); - LOG_INFO(Config, "Vulkan hostMarkers: {}", Config::getVkHostMarkersEnabled()); - LOG_INFO(Config, "Vulkan guestMarkers: {}", Config::getVkGuestMarkersEnabled()); - LOG_INFO(Config, "Vulkan rdocEnable: {}", Config::isRdocEnabled()); + // Initialize logging as soon as possible + if (!id.empty() && Config::getSeparateLogFilesEnabled()) { + Common::Log::Initialize(id + ".log"); + } else { + Common::Log::Initialize(); + } + Common::Log::Start(); + LOG_INFO(Loader, "Starting shadps4 emulator v{} ", Common::g_version); + LOG_INFO(Loader, "Revision {}", Common::g_scm_rev); + LOG_INFO(Loader, "Branch {}", Common::g_scm_branch); + LOG_INFO(Loader, "Description {}", Common::g_scm_desc); + LOG_INFO(Loader, "Remote {}", Common::g_scm_remote_url); + + LOG_INFO(Config, "General LogType: {}", Config::getLogType()); + LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole()); + LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu()); + LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders()); + LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv()); + LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId()); + LOG_INFO(Config, "Vulkan vkValidation: {}", Config::vkValidationEnabled()); + LOG_INFO(Config, "Vulkan vkValidationSync: {}", Config::vkValidationSyncEnabled()); + LOG_INFO(Config, "Vulkan vkValidationGpu: {}", Config::vkValidationGpuEnabled()); + LOG_INFO(Config, "Vulkan crashDiagnostics: {}", Config::getVkCrashDiagnosticEnabled()); + LOG_INFO(Config, "Vulkan hostMarkers: {}", Config::getVkHostMarkersEnabled()); + LOG_INFO(Config, "Vulkan guestMarkers: {}", Config::getVkGuestMarkersEnabled()); + LOG_INFO(Config, "Vulkan rdocEnable: {}", Config::isRdocEnabled()); + + if (param_sfo_exists) { + LOG_INFO(Loader, "Game id: {} Title: {}", id, title); + LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version); + } + if (!args.empty()) { + const auto argc = std::min(args.size(), 32); + for (auto i = 0; i < argc; i++) { + LOG_INFO(Loader, "Game argument {}: {}", i, args[i]); + } + if (args.size() > 32) { + LOG_ERROR(Loader, "Too many game arguments, only passing the first 32"); + } + } + + // Create stdin/stdout/stderr + Common::Singleton::Instance()->CreateStdHandles(); + + // Initialize components + memory = Core::Memory::Instance(); + controller = Common::Singleton::Instance(); + linker = Common::Singleton::Instance(); + + // Load renderdoc module + VideoCore::LoadRenderDoc(); + + // Initialize patcher and trophies + if (!id.empty()) { + MemoryPatcher::g_game_serial = id; Libraries::NpTrophy::game_serial = id; + const auto trophyDir = Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) / id / "TrophyFiles"; if (!std::filesystem::exists(trophyDir)) { @@ -160,41 +174,9 @@ void Emulator::Run(const std::filesystem::path& file, const std::vectorstart(60000); // 60000 ms = 1 minute -#endif - title = param_sfo->GetString("TITLE").value_or("Unknown title"); - LOG_INFO(Loader, "Game id: {} Title: {}", id, title); - fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000); - app_version = param_sfo->GetString("APP_VER").value_or("Unknown version"); - LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version); - if (const auto raw_attributes = param_sfo->GetInteger("ATTRIBUTE")) { - psf_attributes.raw = *raw_attributes; - } - if (!args.empty()) { - int argc = std::min(args.size(), 32); - for (int i = 0; i < argc; i++) { - LOG_INFO(Loader, "Game argument {}: {}", i, args[i]); - } - if (args.size() > 32) { - LOG_ERROR(Loader, "Too many game arguments, only passing the first 32"); - } - } - } - - const auto pic1_path = mnt->GetHostPath("/app0/sce_sys/pic1.png"); - if (std::filesystem::exists(pic1_path)) { - game_info.splash_path = pic1_path; } + auto& game_info = Common::ElfInfo::Instance(); game_info.initialized = true; game_info.game_serial = id; game_info.title = title; @@ -203,6 +185,11 @@ void Emulator::Run(const std::filesystem::path& file, const std::vectorGetHostPath("/app0/sce_sys/pic1.png"); + if (std::filesystem::exists(pic1_path)) { + game_info.splash_path = pic1_path; + } + std::string game_title = fmt::format("{} - {} <{}>", id, title, app_version); std::string window_title = ""; if (Common::g_is_release) { @@ -233,11 +220,15 @@ void Emulator::Run(const std::filesystem::path& file, const std::vectorMount(mount_data_dir, "/data"); // should just exist, manually create with game serial + + // Mounting temp folders const auto& mount_temp_dir = Common::FS::GetUserPath(Common::FS::PathType::TempDataDir) / id; - if (!std::filesystem::exists(mount_temp_dir)) { - std::filesystem::create_directory(mount_temp_dir); + if (std::filesystem::exists(mount_temp_dir)) { + // Temp folder should be cleared on each boot. + std::filesystem::remove_all(mount_temp_dir); } - mnt->Mount(mount_temp_dir, "/temp0"); // called in app_content ==> stat/mkdir + std::filesystem::create_directory(mount_temp_dir); + mnt->Mount(mount_temp_dir, "/temp0"); mnt->Mount(mount_temp_dir, "/temp"); const auto& mount_download_dir = @@ -282,6 +273,25 @@ void Emulator::Run(const std::filesystem::path& file, const std::vectorstart(60000); // 60000 ms = 1 minute + + start_time = std::chrono::steady_clock::now(); + const auto user_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir); + QString filePath = QString::fromStdString((user_dir / "play_time.txt").string()); + QFile file(filePath); + ASSERT_MSG(file.open(QIODevice::ReadWrite | QIODevice::Text), + "Error opening or creating play_time.txt"); + } +#endif + linker->Execute(args); window->InitTimers(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 936f82cd6..9ebb842cc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -270,6 +270,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct if (info.has_image_query) { ctx.AddCapability(spv::Capability::ImageQuery); } + if (info.uses_atomic_float_min_max && profile.supports_image_fp32_atomic_min_max) { + ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max"); + ctx.AddCapability(spv::Capability::AtomicFloat32MinMaxEXT); + } if (info.uses_lane_id) { ctx.AddCapability(spv::Capability::GroupNonUniform); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 211899714..c3799fb4b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -75,6 +75,14 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id va const auto [scope, semantics]{AtomicArgs(ctx)}; return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); } + +Id ImageAtomicF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id pointer{ctx.OpImageTexelPointer(ctx.image_f32, texture.id, coords, ctx.ConstU32(0U))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.F32[1], pointer, scope, semantics, value); +} } // Anonymous namespace Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { @@ -187,6 +195,40 @@ Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicUMax); } +Id EmitImageAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { + if (ctx.profile.supports_image_fp32_atomic_min_max) { + return ImageAtomicF32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicFMax); + } + + const auto u32_value = ctx.OpBitcast(ctx.U32[1], value); + const auto sign_bit_set = + ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u)); + + const auto result = ctx.OpSelect( + ctx.F32[1], sign_bit_set, + EmitBitCastF32U32(ctx, EmitImageAtomicUMin32(ctx, inst, handle, coords, u32_value)), + EmitBitCastF32U32(ctx, EmitImageAtomicSMax32(ctx, inst, handle, coords, u32_value))); + + return result; +} + +Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { + if (ctx.profile.supports_image_fp32_atomic_min_max) { + return ImageAtomicF32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicFMin); + } + + const auto u32_value = ctx.OpBitcast(ctx.U32[1], value); + const auto sign_bit_set = + ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u)); + + const auto result = ctx.OpSelect( + ctx.F32[1], sign_bit_set, + EmitBitCastF32U32(ctx, EmitImageAtomicUMax32(ctx, inst, handle, coords, u32_value)), + EmitBitCastF32U32(ctx, EmitImageAtomicSMin32(ctx, inst, handle, coords, u32_value))); + + return result; +} + Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) { // TODO: This is not yet implemented throw NotImplementedException("SPIR-V Instruction"); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e4071bb95..83e8afd78 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -529,7 +529,7 @@ void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto // Bounds checking enabled, wrap in a conditional branch. auto compare_index = index; if (N > 1) { - index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1)); + compare_index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1)); } const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size); const Id in_bounds_label = ctx.OpLabel(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 079f1005d..269f372d5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -482,6 +482,8 @@ Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); +Id EmitImageAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); +Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 8433251ff..2640030df 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -869,6 +869,7 @@ void EmitContext::DefineImagesAndSamplers() { } if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) { image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); + image_f32 = TypePointer(spv::StorageClass::Image, F32[1]); } if (info.samplers.empty()) { return; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 784748658..38d55e0e4 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -207,6 +207,7 @@ public: Id invocation_id{}; Id subgroup_local_invocation_id{}; Id image_u32{}; + Id image_f32{}; Id shared_memory_u8{}; Id shared_memory_u16{}; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index c5a5814a4..e49f95d9a 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -517,7 +517,9 @@ void Translator::EmitFetch(const GcnInst& inst) { const auto values = ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1), ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3)); - const auto swizzled = ApplySwizzle(ir, values, buffer.DstSelect()); + const auto converted = + IR::ApplyReadNumberConversionVec4(ir, values, buffer.GetNumberConversion()); + const auto swizzled = ApplySwizzle(ir, converted, buffer.DstSelect()); for (u32 i = 0; i < 4; i++) { ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)}); } diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index ed7788d8c..5639bc56a 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -115,8 +115,12 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return IMAGE_ATOMIC(AtomicOp::Smin, inst); case Opcode::IMAGE_ATOMIC_UMIN: return IMAGE_ATOMIC(AtomicOp::Umin, inst); + case Opcode::IMAGE_ATOMIC_FMIN: + return IMAGE_ATOMIC(AtomicOp::Fmin, inst); case Opcode::IMAGE_ATOMIC_SMAX: return IMAGE_ATOMIC(AtomicOp::Smax, inst); + case Opcode::IMAGE_ATOMIC_FMAX: + return IMAGE_ATOMIC(AtomicOp::Fmax, inst); case Opcode::IMAGE_ATOMIC_UMAX: return IMAGE_ATOMIC(AtomicOp::Umax, inst); case Opcode::IMAGE_ATOMIC_AND: @@ -139,6 +143,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { case Opcode::IMAGE_SAMPLE_C_LZ: case Opcode::IMAGE_SAMPLE_O: case Opcode::IMAGE_SAMPLE_L_O: + case Opcode::IMAGE_SAMPLE_B_O: case Opcode::IMAGE_SAMPLE_LZ_O: case Opcode::IMAGE_SAMPLE_C_O: case Opcode::IMAGE_SAMPLE_C_LZ_O: @@ -466,6 +471,10 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) { return ir.ImageAtomicIMax(handle, body, value, true, info); case AtomicOp::Umax: return ir.ImageAtomicUMax(handle, body, value, info); + case AtomicOp::Fmax: + return ir.ImageAtomicFMax(handle, body, value, info); + case AtomicOp::Fmin: + return ir.ImageAtomicFMin(handle, body, value, info); case AtomicOp::And: return ir.ImageAtomicAnd(handle, body, value, info); case AtomicOp::Or: diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 8dcf9c5c4..12e48c8e4 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -196,6 +196,7 @@ struct Info { bool has_discard{}; bool has_image_gather{}; bool has_image_query{}; + bool uses_atomic_float_min_max{}; bool uses_lane_id{}; bool uses_group_quad{}; bool uses_group_ballot{}; @@ -280,6 +281,11 @@ constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept // Fall back to null image if unbound. return AmdGpu::Image::Null(); } + const auto data_fmt = image.GetDataFmt(); + if (is_depth && data_fmt != AmdGpu::DataFormat::Format16 && + data_fmt != AmdGpu::DataFormat::Format32) { + return AmdGpu::Image::NullDepth(); + } return image; } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index e1ebf2206..01d945178 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1870,6 +1870,16 @@ Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const return Inst(Opcode::ImageAtomicUMax32, Flags{info}, handle, coords, value); } +Value IREmitter::ImageAtomicFMax(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + return Inst(Opcode::ImageAtomicFMax32, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicFMin(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + return Inst(Opcode::ImageAtomicFMin32, Flags{info}, handle, coords, value); +} + Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value, bool is_signed, TextureInstInfo info) { return is_signed ? ImageAtomicSMax(handle, coords, value, info) diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index d978b3b4f..8f8a12736 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -321,6 +321,10 @@ public: const Value& value, TextureInstInfo info); [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicFMax(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicFMin(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value, bool is_signed, TextureInstInfo info); [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value, diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 580156f5b..a57310fb9 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -94,6 +94,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::ImageAtomicUMin32: case Opcode::ImageAtomicSMax32: case Opcode::ImageAtomicUMax32: + case Opcode::ImageAtomicFMax32: + case Opcode::ImageAtomicFMin32: case Opcode::ImageAtomicInc32: case Opcode::ImageAtomicDec32: case Opcode::ImageAtomicAnd32: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 6f186808c..ab6dbfde9 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -420,6 +420,8 @@ OPCODE(ImageAtomicSMin32, U32, Opaq OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicFMax32, F32, Opaque, Opaque, F32, ) +OPCODE(ImageAtomicFMin32, F32, Opaque, Opaque, F32, ) OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp index 658a495bc..65be02541 100644 --- a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp +++ b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp @@ -206,7 +206,8 @@ static void LowerBufferFormatInst(IR::Block& block, IR::Inst& inst, Info& info) .swizzle = is_inst_typed ? AmdGpu::RemapSwizzle(flags.inst_data_fmt.Value(), AmdGpu::IdentityMapping) : buffer.DstSelect(), - .num_conversion = is_inst_typed ? AmdGpu::MapNumberConversion(flags.inst_num_fmt.Value()) + .num_conversion = is_inst_typed ? AmdGpu::MapNumberConversion(flags.inst_num_fmt.Value(), + flags.inst_data_fmt.Value()) : buffer.GetNumberConversion(), .num_components = AmdGpu::NumComponents(data_format), }; diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 778da149f..cc0bf83d3 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -101,6 +101,8 @@ bool IsImageAtomicInstruction(const IR::Inst& inst) { case IR::Opcode::ImageAtomicUMin32: case IR::Opcode::ImageAtomicSMax32: case IR::Opcode::ImageAtomicUMax32: + case IR::Opcode::ImageAtomicFMax32: + case IR::Opcode::ImageAtomicFMin32: case IR::Opcode::ImageAtomicInc32: case IR::Opcode::ImageAtomicDec32: case IR::Opcode::ImageAtomicAnd32: @@ -361,6 +363,12 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!"); image = AmdGpu::Image::Null(); } + const auto data_fmt = image.GetDataFmt(); + if (inst_info.is_depth && data_fmt != AmdGpu::DataFormat::Format16 && + data_fmt != AmdGpu::DataFormat::Format32) { + LOG_ERROR(Render_Vulkan, "Shader compiled using non-depth image with depth instruction!"); + image = AmdGpu::Image::NullDepth(); + } ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite; diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index d739b2da5..f53a0f4d4 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -71,6 +71,10 @@ void Visit(Info& info, const IR::Inst& inst) { case IR::Opcode::ImageQueryLod: info.has_image_query = true; break; + case IR::Opcode::ImageAtomicFMax32: + case IR::Opcode::ImageAtomicFMin32: + info.uses_atomic_float_min_max = true; + break; case IR::Opcode::LaneId: info.uses_lane_id = true; break; diff --git a/src/shader_recompiler/ir/reinterpret.h b/src/shader_recompiler/ir/reinterpret.h index b65b19928..99819cbb9 100644 --- a/src/shader_recompiler/ir/reinterpret.h +++ b/src/shader_recompiler/ir/reinterpret.h @@ -34,6 +34,18 @@ inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value, case AmdGpu::NumberConversion::UnormToUbnorm: // Convert 0...1 to -1...1 return ir.FPSub(ir.FPMul(value, ir.Imm32(2.f)), ir.Imm32(1.f)); + case AmdGpu::NumberConversion::Sint8ToSnormNz: { + const IR::U32 additon = ir.IAdd(ir.IMul(ir.BitCast(value), ir.Imm32(2)), ir.Imm32(1)); + const IR::F32 left = ir.ConvertSToF(32, 32, additon); + const IR::F32 max = ir.Imm32(float(std::numeric_limits::max())); + return ir.FPDiv(left, max); + } + case AmdGpu::NumberConversion::Sint16ToSnormNz: { + const IR::U32 additon = ir.IAdd(ir.IMul(ir.BitCast(value), ir.Imm32(2)), ir.Imm32(1)); + const IR::F32 left = ir.ConvertSToF(32, 32, additon); + const IR::F32 max = ir.Imm32(float(std::numeric_limits::max())); + return ir.FPDiv(left, max); + } default: UNREACHABLE(); } @@ -66,6 +78,20 @@ inline F32 ApplyWriteNumberConversion(IREmitter& ir, const F32& value, case AmdGpu::NumberConversion::UnormToUbnorm: // Convert -1...1 to 0...1 return ir.FPDiv(ir.FPAdd(value, ir.Imm32(1.f)), ir.Imm32(2.f)); + case AmdGpu::NumberConversion::Sint8ToSnormNz: { + const IR::F32 max = ir.Imm32(float(std::numeric_limits::max())); + const IR::F32 mul = ir.FPMul(ir.FPClamp(value, ir.Imm32(-1.f), ir.Imm32(1.f)), max); + const IR::F32 left = ir.FPSub(mul, ir.Imm32(1.f)); + const IR::U32 raw = ir.ConvertFToS(32, ir.FPDiv(left, ir.Imm32(2.f))); + return ir.BitCast(raw); + } + case AmdGpu::NumberConversion::Sint16ToSnormNz: { + const IR::F32 max = ir.Imm32(float(std::numeric_limits::max())); + const IR::F32 mul = ir.FPMul(ir.FPClamp(value, ir.Imm32(-1.f), ir.Imm32(1.f)), max); + const IR::F32 left = ir.FPSub(mul, ir.Imm32(1.f)); + const IR::U32 raw = ir.ConvertFToS(32, ir.FPDiv(left, ir.Imm32(2.f))); + return ir.BitCast(raw); + } default: UNREACHABLE(); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 9aac6230a..853e4854d 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -29,6 +29,7 @@ struct Profile { bool supports_native_cube_calc{}; bool supports_trinary_minmax{}; bool supports_robust_buffer_access{}; + bool supports_image_fp32_atomic_min_max{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 517392b98..b8ed42f5b 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -169,10 +169,10 @@ static constexpr u32 MaxColorBuffers = 8; struct PsColorBuffer { AmdGpu::NumberFormat num_format : 4; - AmdGpu::NumberConversion num_conversion : 2; + AmdGpu::NumberConversion num_conversion : 3; AmdGpu::Liverpool::ShaderExportFormat export_format : 4; u32 needs_unorm_fixup : 1; - u32 pad : 21; + u32 pad : 20; AmdGpu::CompMapping swizzle; auto operator<=>(const PsColorBuffer&) const noexcept = default; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 967b952c6..4c8e3367a 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -455,14 +455,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); const auto offset = draw_indirect->data_offset; - const auto size = sizeof(DrawIndirectArgs); + const auto stride = sizeof(DrawIndirectArgs); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin(fmt::format("gfx:{}:DrawIndirect", cmd_address)); - rasterizer->DrawIndirect(false, indirect_args_addr, offset, size, 1, 0); + rasterizer->DrawIndirect(false, indirect_args_addr, offset, stride, 1, 0); rasterizer->ScopeMarkerEnd(); } break; @@ -471,7 +471,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); const auto offset = draw_index_indirect->data_offset; - const auto size = sizeof(DrawIndexedIndirectArgs); + const auto stride = sizeof(DrawIndexedIndirectArgs); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } @@ -479,25 +479,46 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); rasterizer->ScopeMarkerBegin( fmt::format("gfx:{}:DrawIndexIndirect", cmd_address)); - rasterizer->DrawIndirect(true, indirect_args_addr, offset, size, 1, 0); + rasterizer->DrawIndirect(true, indirect_args_addr, offset, stride, 1, 0); rasterizer->ScopeMarkerEnd(); } break; } - case PM4ItOpcode::DrawIndexIndirectCountMulti: { + case PM4ItOpcode::DrawIndexIndirectMulti: { const auto* draw_index_indirect = reinterpret_cast(header); const auto offset = draw_index_indirect->data_offset; if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } + if (rasterizer) { + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin( + fmt::format("gfx:{}:DrawIndexIndirectMulti", cmd_address)); + rasterizer->DrawIndirect(true, indirect_args_addr, offset, + draw_index_indirect->stride, + draw_index_indirect->count, 0); + rasterizer->ScopeMarkerEnd(); + } + break; + } + case PM4ItOpcode::DrawIndexIndirectCountMulti: { + const auto* draw_index_indirect = + reinterpret_cast(header); + const auto offset = draw_index_indirect->data_offset; + if (DebugState.DumpingCurrentReg()) { + DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); + } if (rasterizer) { const auto cmd_address = reinterpret_cast(header); rasterizer->ScopeMarkerBegin( fmt::format("gfx:{}:DrawIndexIndirectCountMulti", cmd_address)); - rasterizer->DrawIndirect( - true, indirect_args_addr, offset, draw_index_indirect->stride, - draw_index_indirect->count, draw_index_indirect->countAddr); + rasterizer->DrawIndirect(true, indirect_args_addr, offset, + draw_index_indirect->stride, + draw_index_indirect->count, + draw_index_indirect->count_indirect_enable.Value() + ? draw_index_indirect->count_addr + : 0); rasterizer->ScopeMarkerEnd(); } break; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 8f9292f1c..c4bebd05f 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -924,15 +924,11 @@ struct Liverpool { } [[nodiscard]] NumberFormat GetNumberFmt() const { - // There is a small difference between T# and CB number types, account for it. - return RemapNumberFormat(info.number_type == NumberFormat::SnormNz - ? NumberFormat::Srgb - : info.number_type.Value(), - info.format); + return RemapNumberFormat(GetFixedNumberFormat(), info.format); } [[nodiscard]] NumberConversion GetNumberConversion() const { - return MapNumberConversion(info.number_type); + return MapNumberConversion(GetFixedNumberFormat(), info.format); } [[nodiscard]] CompMapping Swizzle() const { @@ -973,6 +969,13 @@ struct Liverpool { const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx]; return RemapSwizzle(info.format, mrt_swizzle); } + + private: + [[nodiscard]] NumberFormat GetFixedNumberFormat() const { + // There is a small difference between T# and CB number types, account for it. + return info.number_type == NumberFormat::SnormNz ? NumberFormat::Srgb + : info.number_type.Value(); + } }; enum ContextRegs : u32 { diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index ae1d32e00..6b55f5b65 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -860,6 +860,24 @@ struct PM4CmdDrawIndexIndirect { }; struct PM4CmdDrawIndexIndirectMulti { + PM4Type3Header header; ///< header + u32 data_offset; ///< Byte aligned offset where the required data structure starts + union { + u32 dw2; + BitField<0, 16, u32> base_vtx_loc; ///< Offset where the CP will write the + ///< BaseVertexLocation it fetched from memory + }; + union { + u32 dw3; + BitField<0, 16, u32> start_inst_loc; ///< Offset where the CP will write the + ///< StartInstanceLocation it fetched from memory + }; + u32 count; ///< Count of data structures to loop through before going to next packet + u32 stride; ///< Stride in memory from one data structure to the next + u32 draw_initiator; ///< Draw Initiator Register +}; + +struct PM4CmdDrawIndexIndirectCountMulti { PM4Type3Header header; ///< header u32 data_offset; ///< Byte aligned offset where the required data structure starts union { @@ -874,14 +892,14 @@ struct PM4CmdDrawIndexIndirectMulti { }; union { u32 dw4; - BitField<0, 16, u32> drawIndexLoc; ///< register offset to write the Draw Index count + BitField<0, 16, u32> draw_index_loc; ///< register offset to write the Draw Index count BitField<30, 1, u32> - countIndirectEnable; ///< Indicates the data structure count is in memory + count_indirect_enable; ///< Indicates the data structure count is in memory BitField<31, 1, u32> - drawIndexEnable; ///< Enables writing of Draw Index count to DRAW_INDEX_LOC + draw_index_enable; ///< Enables writing of Draw Index count to DRAW_INDEX_LOC }; u32 count; ///< Count of data structures to loop through before going to next packet - u64 countAddr; ///< DWord aligned Address[31:2]; Valid if countIndirectEnable is set + u64 count_addr; ///< DWord aligned Address[31:2]; Valid if countIndirectEnable is set u32 stride; ///< Stride in memory from one data structure to the next u32 draw_initiator; ///< Draw Initiator Register }; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 64a85c812..9060074fb 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -68,7 +68,7 @@ struct Buffer { } NumberConversion GetNumberConversion() const noexcept { - return MapNumberConversion(NumberFormat(num_format)); + return MapNumberConversion(NumberFormat(num_format), DataFormat(data_format)); } u32 GetStride() const noexcept { @@ -219,6 +219,19 @@ struct Image { return image; } + static constexpr Image NullDepth() { + Image image{}; + image.data_format = u64(DataFormat::Format32); + image.num_format = u64(NumberFormat::Float); + image.dst_sel_x = u64(CompSwizzle::Red); + image.dst_sel_y = u64(CompSwizzle::Green); + image.dst_sel_z = u64(CompSwizzle::Blue); + image.dst_sel_w = u64(CompSwizzle::Alpha); + image.tiling_index = u64(TilingMode::Texture_MicroTiled); + image.type = u64(ImageType::Color2D); + return image; + } + bool Valid() const { return (type & 0x8u) != 0; } @@ -292,7 +305,7 @@ struct Image { } NumberConversion GetNumberConversion() const noexcept { - return MapNumberConversion(NumberFormat(num_format)); + return MapNumberConversion(NumberFormat(num_format), DataFormat(data_format)); } TilingMode GetTilingMode() const { diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h index d1cf19076..ab0df689e 100644 --- a/src/video_core/amdgpu/types.h +++ b/src/video_core/amdgpu/types.h @@ -197,6 +197,8 @@ enum class NumberConversion : u32 { UintToUscaled = 1, SintToSscaled = 2, UnormToUbnorm = 3, + Sint8ToSnormNz = 5, + Sint16ToSnormNz = 6, }; struct CompMapping { @@ -287,6 +289,7 @@ inline NumberFormat RemapNumberFormat(const NumberFormat format, const DataForma case NumberFormat::Uscaled: return NumberFormat::Uint; case NumberFormat::Sscaled: + case NumberFormat::SnormNz: return NumberFormat::Sint; case NumberFormat::Ubnorm: return NumberFormat::Unorm; @@ -336,14 +339,28 @@ inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizz } } -inline NumberConversion MapNumberConversion(const NumberFormat format) { - switch (format) { +inline NumberConversion MapNumberConversion(const NumberFormat num_fmt, const DataFormat data_fmt) { + switch (num_fmt) { case NumberFormat::Uscaled: return NumberConversion::UintToUscaled; case NumberFormat::Sscaled: return NumberConversion::SintToSscaled; case NumberFormat::Ubnorm: return NumberConversion::UnormToUbnorm; + case NumberFormat::SnormNz: { + switch (data_fmt) { + case DataFormat::Format8: + case DataFormat::Format8_8: + case DataFormat::Format8_8_8_8: + return NumberConversion::Sint8ToSnormNz; + case DataFormat::Format16: + case DataFormat::Format16_16: + case DataFormat::Format16_16_16_16: + return NumberConversion::Sint16ToSnormNz; + default: + UNREACHABLE_MSG("data_fmt = {}", u32(data_fmt)); + } + } default: return NumberConversion::None; } diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index cdf736a89..fb9fd755e 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -177,8 +177,8 @@ void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) { if (instance.IsVertexInputDynamicState()) { cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data()); } else { - cmdbuf.bindVertexBuffers2EXT(0, num_buffers, host_buffers.data(), host_offsets.data(), - host_sizes.data(), host_strides.data()); + cmdbuf.bindVertexBuffers2(0, num_buffers, host_buffers.data(), host_offsets.data(), + host_sizes.data(), host_strides.data()); } } diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 072807124..1004d850f 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -210,7 +210,8 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceRobustness2FeaturesEXT, vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT, vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, - vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(); + vk::PhysicalDevicePortabilitySubsetFeaturesKHR, + vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>(); features = feature_chain.get().features; const vk::StructureChain properties_chain = physical_device.getProperties2< @@ -272,6 +273,13 @@ bool Instance::CreateDevice() { image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME); amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME); amd_shader_trinary_minmax = add_extension(VK_AMD_SHADER_TRINARY_MINMAX_EXTENSION_NAME); + shader_atomic_float2 = add_extension(VK_EXT_SHADER_ATOMIC_FLOAT_2_EXTENSION_NAME); + if (shader_atomic_float2) { + shader_atomic_float2_features = + feature_chain.get(); + LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}", + shader_atomic_float2_features.shaderImageFloat32AtomicMinMax); + } const bool calibrated_timestamps = TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false; @@ -330,6 +338,7 @@ bool Instance::CreateDevice() { .geometryShader = features.geometryShader, .tessellationShader = features.tessellationShader, .logicOp = features.logicOp, + .multiDrawIndirect = features.multiDrawIndirect, .depthBiasClamp = features.depthBiasClamp, .fillModeNonSolid = features.fillModeNonSolid, .depthBounds = features.depthBounds, @@ -401,6 +410,10 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{ .legacyVertexAttributes = true, }, + vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{ + .shaderImageFloat32AtomicMinMax = + shader_atomic_float2_features.shaderImageFloat32AtomicMinMax, + }, #ifdef __APPLE__ portability_features, #endif @@ -430,6 +443,9 @@ bool Instance::CreateDevice() { if (!legacy_vertex_attributes) { device_chain.unlink(); } + if (!shader_atomic_float2) { + device_chain.unlink(); + } auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get()); if (device_result != vk::Result::eSuccess) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index bf9af1f24..573473869 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -165,6 +165,12 @@ public: return amd_shader_trinary_minmax; } + /// Returns true when the shaderImageFloat32AtomicMinMax feature of + /// VK_EXT_shader_atomic_float2 is supported. + bool IsShaderAtomicFloatImage32MinMaxSupported() const { + return shader_atomic_float2 && shader_atomic_float2_features.shaderImageFloat32AtomicMinMax; + } + /// Returns true when geometry shaders are supported by the device bool IsGeometryStageSupported() const { return features.geometryShader; @@ -336,6 +342,7 @@ private: vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features; vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features; vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features; + vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features; vk::DriverIdKHR driver_id; vk::UniqueDebugUtilsMessengerEXT debug_callback{}; std::string vendor_name; @@ -360,6 +367,7 @@ private: bool image_load_store_lod{}; bool amd_gcn_shader{}; bool amd_shader_trinary_minmax{}; + bool shader_atomic_float2{}; bool portability_subset{}; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0b991cda0..0a0c81d4c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -206,6 +206,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(), .supports_trinary_minmax = instance_.IsAmdShaderTrinaryMinMaxSupported(), .supports_robust_buffer_access = instance_.IsRobustBufferAccess2Supported(), + .supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || diff --git a/src/video_core/renderer_vulkan/vk_presenter.cpp b/src/video_core/renderer_vulkan/vk_presenter.cpp index 6bd4b26fa..09dd23cb6 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.cpp +++ b/src/video_core/renderer_vulkan/vk_presenter.cpp @@ -270,7 +270,25 @@ Frame* Presenter::PrepareLastFrame() { return frame; } -Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop) { +static vk::Format GetFrameViewFormat(const Libraries::VideoOut::PixelFormat format) { + switch (format) { + case Libraries::VideoOut::PixelFormat::A8B8G8R8Srgb: + return vk::Format::eR8G8B8A8Srgb; + case Libraries::VideoOut::PixelFormat::A8R8G8B8Srgb: + return vk::Format::eB8G8R8A8Srgb; + case Libraries::VideoOut::PixelFormat::A2R10G10B10: + case Libraries::VideoOut::PixelFormat::A2R10G10B10Srgb: + case Libraries::VideoOut::PixelFormat::A2R10G10B10Bt2020Pq: + return vk::Format::eA2R10G10B10UnormPack32; + default: + break; + } + UNREACHABLE_MSG("Unknown format={}", static_cast(format)); + return {}; +} + +Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, + const Libraries::VideoOut::PixelFormat format, bool is_eop) { // Request a free presentation frame. Frame* frame = GetRenderFrame(); @@ -324,7 +342,7 @@ Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop) cmdbuf); VideoCore::ImageViewInfo info{}; - info.format = image.info.pixel_format; + info.format = GetFrameViewFormat(format); // Exclude alpha from output frame to avoid blending with UI. info.mapping = vk::ComponentMapping{ .r = vk::ComponentSwizzle::eIdentity, diff --git a/src/video_core/renderer_vulkan/vk_presenter.h b/src/video_core/renderer_vulkan/vk_presenter.h index ad2708474..8ed2052ee 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.h +++ b/src/video_core/renderer_vulkan/vk_presenter.h @@ -70,11 +70,12 @@ public: auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address}; const auto image_id = texture_cache.FindImage(desc); texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler); - return PrepareFrameInternal(image_id, is_eop); + return PrepareFrameInternal(image_id, attribute.attrib.pixel_format, is_eop); } Frame* PrepareBlankFrame(bool is_eop) { - return PrepareFrameInternal(VideoCore::NULL_IMAGE_ID, is_eop); + return PrepareFrameInternal(VideoCore::NULL_IMAGE_ID, + Libraries::VideoOut::PixelFormat::Unknown, is_eop); } VideoCore::Image& RegisterVideoOutSurface( @@ -119,7 +120,8 @@ public: } private: - Frame* PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop = true); + Frame* PrepareFrameInternal(VideoCore::ImageId image_id, + Libraries::VideoOut::PixelFormat format, bool is_eop = true); Frame* GetRenderFrame(); void SetExpectedGameSize(s32 width, s32 height); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 4caa781b9..e7b42a34b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -618,8 +618,9 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin if (instance.IsNullDescriptorSupported()) { image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); } else { - auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); - image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, + auto& null_image_view = + texture_cache.FindTexture(VideoCore::NULL_IMAGE_ID, desc.view_info); + image_infos.emplace_back(VK_NULL_HANDLE, *null_image_view.image_view, vk::ImageLayout::eGeneral); } } else { diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 26928eaf7..39322f449 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -16,14 +16,15 @@ using VideoOutFormat = Libraries::VideoOut::PixelFormat; static vk::Format ConvertPixelFormat(const VideoOutFormat format) { switch (format) { - case VideoOutFormat::A8R8G8B8Srgb: - return vk::Format::eB8G8R8A8Srgb; case VideoOutFormat::A8B8G8R8Srgb: + // Remaining formats are mapped to RGBA for internal consistency and changed to BGRA in the + // frame image view. + case VideoOutFormat::A8R8G8B8Srgb: return vk::Format::eR8G8B8A8Srgb; case VideoOutFormat::A2R10G10B10: case VideoOutFormat::A2R10G10B10Srgb: case VideoOutFormat::A2R10G10B10Bt2020Pq: - return vk::Format::eA2R10G10B10UnormPack32; + return vk::Format::eA2B10G10R10UnormPack32; default: break; } diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index 23c703d23..6a17490bf 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -34,8 +34,6 @@ struct ImageViewInfo { struct Image; -constexpr Common::SlotId NULL_IMAGE_VIEW_ID{0}; - struct ImageView { ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image, ImageId image_id); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 047bb3dfe..82f4d6413 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -8,6 +8,7 @@ #include "common/debug.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/page_manager.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/host_compatibility.h" @@ -23,31 +24,41 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& BufferCache& buffer_cache_, PageManager& tracker_) : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_}, tile_manager{instance, scheduler} { + // Create basic null image at fixed image ID. + const auto null_id = GetNullImage(vk::Format::eR8G8B8A8Unorm); + ASSERT(null_id.index == NULL_IMAGE_ID.index); +} + +TextureCache::~TextureCache() = default; + +ImageId TextureCache::GetNullImage(const vk::Format format) { + const auto existing_image = null_images.find(format); + if (existing_image != null_images.end()) { + return existing_image->second; + } + ImageInfo info{}; - info.pixel_format = vk::Format::eR8G8B8A8Unorm; + info.pixel_format = format; info.type = vk::ImageType::e2D; - info.tiling_idx = u32(AmdGpu::TilingMode::Texture_MicroTiled); + info.tiling_idx = static_cast(AmdGpu::TilingMode::Texture_MicroTiled); info.num_bits = 32; info.UpdateSize(); + const ImageId null_id = slot_images.insert(instance, scheduler, info); - ASSERT(null_id.index == NULL_IMAGE_ID.index); auto& img = slot_images[null_id]; + const vk::Image& null_image = img.image; - Vulkan::SetObjectName(instance.GetDevice(), null_image, "Null Image"); + Vulkan::SetObjectName(instance.GetDevice(), null_image, + fmt::format("Null Image ({})", vk::to_string(format))); + img.flags = ImageFlagBits::Empty; img.track_addr = img.info.guest_address; img.track_addr_end = img.info.guest_address + img.info.guest_size; - ImageViewInfo view_info; - const auto null_view_id = - slot_image_views.insert(instance, view_info, slot_images[null_id], null_id); - ASSERT(null_view_id.index == NULL_IMAGE_VIEW_ID.index); - const vk::ImageView& null_image_view = slot_image_views[null_view_id].image_view.get(); - Vulkan::SetObjectName(instance.GetDevice(), null_image_view, "Null Image View"); + null_images.emplace(format, null_id); + return null_id; } -TextureCache::~TextureCache() = default; - void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) { if (image.hash == 0) { // Initialize hash @@ -296,7 +307,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) { const auto& info = desc.info; if (info.guest_address == 0) [[unlikely]] { - return NULL_IMAGE_ID; + return GetNullImage(info.pixel_format); } std::scoped_lock lock{mutex}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f262768ea..b6bf88958 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -246,6 +246,9 @@ private: } } + /// Gets or creates a null image for a particular format. + ImageId GetNullImage(vk::Format format); + /// Create an image from the given parameters [[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr); @@ -285,6 +288,7 @@ private: Common::SlotVector slot_images; Common::SlotVector slot_image_views; tsl::robin_map samplers; + tsl::robin_map null_images; PageTable page_table; std::mutex mutex;