Merge branch 'main' into fontlib

This commit is contained in:
georgemoralis 2025-05-10 09:46:43 +03:00 committed by GitHub
commit 54efdf68b7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
67 changed files with 1104 additions and 464 deletions

2
CMakeLists.txt Executable file → Normal file
View file

@ -603,6 +603,8 @@ set(MISC_LIBS src/core/libraries/screenshot/screenshot.cpp
src/core/libraries/move/move.h
src/core/libraries/ulobjmgr/ulobjmgr.cpp
src/core/libraries/ulobjmgr/ulobjmgr.h
src/core/libraries/signin_dialog/signindialog.cpp
src/core/libraries/signin_dialog/signindialog.h
)
set(DEV_TOOLS src/core/devtools/layer.cpp

2
externals/sirit vendored

@ -1 +1 @@
Subproject commit 427a42c9ed99b38204d9107bc3dc14e92458acf1
Subproject commit 09a1416ab1b59ddfebd2618412f118f2004f3b2c

View file

@ -131,9 +131,7 @@ namespace {
case SeekOrigin::End:
return SEEK_END;
default:
LOG_ERROR(Common_Filesystem, "Unsupported origin {}, defaulting to SEEK_SET",
static_cast<u32>(origin));
return SEEK_SET;
UNREACHABLE_MSG("Impossible SeekOrigin {}", static_cast<u32>(origin));
}
}

View file

@ -61,8 +61,6 @@ enum class SeekOrigin : u32 {
SetOrigin, // Seeks from the start of the file.
CurrentPosition, // Seeks from the current file pointer position.
End, // Seeks from the end of the file.
SeekHole, // Seeks from the start of the next hole in the file.
SeekData, // Seeks from the start of the next non-hole region in the file.
};
class IOFile final {

View file

@ -139,6 +139,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
SUB(Lib, Hmd) \
SUB(Lib, Font) \
SUB(Lib, FontFt) \
SUB(Lib, SigninDialog) \
CLS(Frontend) \
CLS(Render) \
SUB(Render, Vulkan) \

View file

@ -104,8 +104,7 @@ enum class Class : u8 {
Lib_NpParty, ///< The LibSceNpParty implementation
Lib_Zlib, ///< The LibSceZlib implementation.
Lib_Hmd, ///< The LibSceHmd implementation.
Lib_Font, ///< The libSceFont implementation.
Lib_FontFt, ///< The libSceFontFt implementation.
Lib_SigninDialog, ///< The LibSigninDialog implementation.
Frontend, ///< Emulator UI
Render, ///< Video Core
Render_Vulkan, ///< Vulkan backend

View file

@ -23,7 +23,7 @@
namespace MemoryPatcher {
uintptr_t g_eboot_address;
EXPORT uintptr_t g_eboot_address;
uint64_t g_eboot_image_size;
std::string g_game_serial;
std::string patchFile;

View file

@ -6,9 +6,15 @@
#include <string>
#include <vector>
#if defined(WIN32)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT __attribute__((visibility("default")))
#endif
namespace MemoryPatcher {
extern uintptr_t g_eboot_address;
extern EXPORT uintptr_t g_eboot_address;
extern uint64_t g_eboot_image_size;
extern std::string g_game_serial;
extern std::string patchFile;

View file

@ -19,8 +19,6 @@ enum class MemoryPermission : u32 {
};
DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission)
constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL;
constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL;
constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL;
constexpr VAddr SYSTEM_RESERVED_MIN = 0x07FFFFC000ULL;

View file

@ -464,9 +464,8 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
if (needs_trampoline && instruction.length < 5) {
// Trampoline is needed but instruction is too short to patch.
// Return false and length to fall back to the illegal instruction handler,
// or to signal to AOT compilation that this instruction should be skipped and
// handled at runtime.
// Return false and length to signal to AOT compilation that this instruction
// should be skipped and handled at runtime.
return std::make_pair(false, instruction.length);
}
@ -512,136 +511,137 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
#if defined(ARCH_X86_64)
static bool Is4ByteExtrqOrInsertq(void* code_address) {
u8* bytes = (u8*)code_address;
if (bytes[0] == 0x66 && bytes[1] == 0x0F && bytes[2] == 0x79) {
return true; // extrq
} else if (bytes[0] == 0xF2 && bytes[1] == 0x0F && bytes[2] == 0x79) {
return true; // insertq
} else {
return false;
}
}
static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) {
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
const auto status =
Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address);
// We need to decode the instruction to find out what it is. Normally we'd use a fully fleshed
// out decoder like Zydis, however Zydis does a bunch of stuff that impact performance that we
// don't care about. We can get information about the instruction a lot faster by writing a mini
// decoder here, since we know it is definitely an extrq or an insertq. If for some reason we
// need to interpret more instructions in the future (I don't see why we would), we can revert
// to using Zydis.
ZydisMnemonic mnemonic;
u8* bytes = (u8*)code_address;
if (bytes[0] == 0x66) {
mnemonic = ZYDIS_MNEMONIC_EXTRQ;
} else if (bytes[0] == 0xF2) {
mnemonic = ZYDIS_MNEMONIC_INSERTQ;
} else {
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
const auto status =
Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address);
LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}",
fmt::ptr(code_address),
ZYAN_SUCCESS(status) ? ZydisMnemonicGetString(instruction.mnemonic)
: "Failed to decode");
return false;
}
switch (instruction.mnemonic) {
ASSERT(bytes[1] == 0x0F && bytes[2] == 0x79);
// Note: It's guaranteed that there's no REX prefix in these instructions checked by
// Is4ByteExtrqOrInsertq
u8 modrm = bytes[3];
u8 rm = modrm & 0b111;
u8 reg = (modrm >> 3) & 0b111;
u8 mod = (modrm >> 6) & 0b11;
ASSERT(mod == 0b11); // Any instruction we interpret here uses reg/reg addressing only
int dstIndex = reg;
int srcIndex = rm;
switch (mnemonic) {
case ZYDIS_MNEMONIC_EXTRQ: {
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
if (immediateForm) {
LOG_CRITICAL(Core, "EXTRQ immediate form should have been patched at code address: {}",
fmt::ptr(code_address));
return false;
const auto dst = Common::GetXmmPointer(ctx, dstIndex);
const auto src = Common::GetXmmPointer(ctx, srcIndex);
u64 lowQWordSrc;
memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc));
u64 lowQWordDst;
memcpy(&lowQWordDst, dst, sizeof(lowQWordDst));
u64 length = lowQWordSrc & 0x3F;
u64 mask;
if (length == 0) {
length = 64; // for the check below
mask = 0xFFFF'FFFF'FFFF'FFFF;
} else {
ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[0].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[0].reg.value <= ZYDIS_REGISTER_XMM15 &&
operands[1].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[1].reg.value <= ZYDIS_REGISTER_XMM15,
"Unexpected operand types for EXTRQ instruction");
const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0;
const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0;
const auto dst = Common::GetXmmPointer(ctx, dstIndex);
const auto src = Common::GetXmmPointer(ctx, srcIndex);
u64 lowQWordSrc;
memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc));
u64 lowQWordDst;
memcpy(&lowQWordDst, dst, sizeof(lowQWordDst));
u64 length = lowQWordSrc & 0x3F;
u64 mask;
if (length == 0) {
length = 64; // for the check below
mask = 0xFFFF'FFFF'FFFF'FFFF;
} else {
mask = (1ULL << length) - 1;
}
u64 index = (lowQWordSrc >> 8) & 0x3F;
if (length + index > 64) {
// Undefined behavior if length + index is bigger than 64 according to the spec,
// we'll warn and continue execution.
LOG_TRACE(Core,
"extrq at {} with length {} and index {} is bigger than 64, "
"undefined behavior",
fmt::ptr(code_address), length, index);
}
lowQWordDst >>= index;
lowQWordDst &= mask;
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
Common::IncrementRip(ctx, instruction.length);
return true;
mask = (1ULL << length) - 1;
}
break;
u64 index = (lowQWordSrc >> 8) & 0x3F;
if (length + index > 64) {
// Undefined behavior if length + index is bigger than 64 according to the spec,
// we'll warn and continue execution.
LOG_TRACE(Core,
"extrq at {} with length {} and index {} is bigger than 64, "
"undefined behavior",
fmt::ptr(code_address), length, index);
}
lowQWordDst >>= index;
lowQWordDst &= mask;
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
Common::IncrementRip(ctx, 4);
return true;
}
case ZYDIS_MNEMONIC_INSERTQ: {
bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
if (immediateForm) {
LOG_CRITICAL(Core,
"INSERTQ immediate form should have been patched at code address: {}",
fmt::ptr(code_address));
return false;
const auto dst = Common::GetXmmPointer(ctx, dstIndex);
const auto src = Common::GetXmmPointer(ctx, srcIndex);
u64 lowQWordSrc, highQWordSrc;
memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc));
memcpy(&highQWordSrc, (u8*)src + 8, sizeof(highQWordSrc));
u64 lowQWordDst;
memcpy(&lowQWordDst, dst, sizeof(lowQWordDst));
u64 length = highQWordSrc & 0x3F;
u64 mask;
if (length == 0) {
length = 64; // for the check below
mask = 0xFFFF'FFFF'FFFF'FFFF;
} else {
ASSERT_MSG(operands[2].type == ZYDIS_OPERAND_TYPE_UNUSED &&
operands[3].type == ZYDIS_OPERAND_TYPE_UNUSED,
"operands 2 and 3 must be unused for register form.");
ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER,
"operands 0 and 1 must be registers.");
const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0;
const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0;
const auto dst = Common::GetXmmPointer(ctx, dstIndex);
const auto src = Common::GetXmmPointer(ctx, srcIndex);
u64 lowQWordSrc, highQWordSrc;
memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc));
memcpy(&highQWordSrc, (u8*)src + 8, sizeof(highQWordSrc));
u64 lowQWordDst;
memcpy(&lowQWordDst, dst, sizeof(lowQWordDst));
u64 length = highQWordSrc & 0x3F;
u64 mask;
if (length == 0) {
length = 64; // for the check below
mask = 0xFFFF'FFFF'FFFF'FFFF;
} else {
mask = (1ULL << length) - 1;
}
u64 index = (highQWordSrc >> 8) & 0x3F;
if (length + index > 64) {
// Undefined behavior if length + index is bigger than 64 according to the spec,
// we'll warn and continue execution.
LOG_TRACE(Core,
"insertq at {} with length {} and index {} is bigger than 64, "
"undefined behavior",
fmt::ptr(code_address), length, index);
}
lowQWordSrc &= mask;
lowQWordDst &= ~(mask << index);
lowQWordDst |= lowQWordSrc << index;
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
Common::IncrementRip(ctx, instruction.length);
return true;
mask = (1ULL << length) - 1;
}
break;
u64 index = (highQWordSrc >> 8) & 0x3F;
if (length + index > 64) {
// Undefined behavior if length + index is bigger than 64 according to the spec,
// we'll warn and continue execution.
LOG_TRACE(Core,
"insertq at {} with length {} and index {} is bigger than 64, "
"undefined behavior",
fmt::ptr(code_address), length, index);
}
lowQWordSrc &= mask;
lowQWordDst &= ~(mask << index);
lowQWordDst |= lowQWordSrc << index;
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
Common::IncrementRip(ctx, 4);
return true;
}
default: {
LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}",
fmt::ptr(code_address), ZydisMnemonicGetString(instruction.mnemonic));
return false;
UNREACHABLE();
}
}
@ -695,9 +695,22 @@ static bool PatchesAccessViolationHandler(void* context, void* /* fault_address
static bool PatchesIllegalInstructionHandler(void* context) {
void* code_address = Common::GetRip(context);
if (!TryPatchJit(code_address)) {
if (Is4ByteExtrqOrInsertq(code_address)) {
// The instruction is not big enough for a relative jump, don't try to patch it and pass it
// to our illegal instruction interpreter directly
return TryExecuteIllegalInstruction(context, code_address);
} else {
if (!TryPatchJit(code_address)) {
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
const auto status =
Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address);
LOG_ERROR(Core, "Failed to patch address {:x} -- mnemonic: {}", (u64)code_address,
ZYAN_SUCCESS(status) ? ZydisMnemonicGetString(instruction.mnemonic)
: "Failed to decode");
}
}
return true;
}

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstdlib>
#include <ctime>
#include "common/logging/log.h"
#include "random_device.h"

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstdlib>
#include <ctime>
#include "common/logging/log.h"
#include "srandom_device.h"

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstdlib>
#include <ctime>
#include "common/logging/log.h"
#include "urandom_device.h"

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstdio>
#include <ctime>
#include <fmt/chrono.h>
#include <imgui.h>
#include <magic_enum/magic_enum.hpp>

View file

@ -505,9 +505,10 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da
u32 flags) {
LOG_TRACE(Lib_GnmDriver, "called");
if ((!sceKernelIsNeoMode() || !UseNeoCompatSequences) && !cmdbuf && (size == 16) &&
(shader_stage < ShaderStages::Max) && (vertex_sgpr_offset < 0x10u) &&
(instance_sgpr_offset < 0x10u)) {
if ((!sceKernelIsNeoMode() || !UseNeoCompatSequences) && cmdbuf && (size == 16) &&
(vertex_sgpr_offset < 0x10u) && (instance_sgpr_offset < 0x10u) &&
(shader_stage == ShaderStages::Vs || shader_stage == ShaderStages::Es ||
shader_stage == ShaderStages::Ls)) {
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 2);
cmdbuf = WriteBody(cmdbuf, 0u);
@ -535,10 +536,33 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da
return -1;
}
int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
UNREACHABLE();
return ORBIS_OK;
int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(u32* cmdbuf, u32 size, u32 data_offset, u32 max_count,
u32 shader_stage, u32 vertex_sgpr_offset,
u32 instance_sgpr_offset, u32 flags) {
LOG_TRACE(Lib_GnmDriver, "called");
if (cmdbuf && (size == 11) && (vertex_sgpr_offset < 0x10u) && (instance_sgpr_offset < 0x10u) &&
(shader_stage == ShaderStages::Vs || shader_stage == ShaderStages::Es ||
shader_stage == ShaderStages::Ls)) {
const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable;
cmdbuf = WriteHeader<PM4ItOpcode::DrawIndexIndirectMulti>(
cmdbuf, 6, PM4ShaderType::ShaderGraphics, predicate);
const auto sgpr_offset = indirect_sgpr_offsets[shader_stage];
cmdbuf[0] = data_offset;
cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset;
cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset;
cmdbuf[3] = max_count;
cmdbuf[4] = sizeof(DrawIndexedIndirectArgs);
cmdbuf[5] = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0;
cmdbuf += 6;
WriteTrailingNop<3>(cmdbuf);
return ORBIS_OK;
}
return -1;
}
int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced() {

View file

@ -51,7 +51,9 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da
u32 max_count, u64 count_addr, u32 shader_stage,
u32 vertex_sgpr_offset, u32 instance_sgpr_offset,
u32 flags);
int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti();
int PS4_SYSV_ABI sceGnmDrawIndexIndirectMulti(u32* cmdbuf, u32 size, u32 data_offset, u32 max_count,
u32 shader_stage, u32 vertex_sgpr_offset,
u32 instance_sgpr_offset, u32 flags);
int PS4_SYSV_ABI sceGnmDrawIndexMultiInstanced();
s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset, u32 index_count,
u32 flags);

View file

@ -67,10 +67,16 @@ s32 PS4_SYSV_ABI open(const char* raw_path, s32 flags, u16 mode) {
bool write = (flags & 0x3) == ORBIS_KERNEL_O_WRONLY;
bool rdwr = (flags & 0x3) == ORBIS_KERNEL_O_RDWR;
if (!read && !write && !rdwr) {
// Start by checking for invalid flags.
*__Error() = POSIX_EINVAL;
return -1;
}
bool nonblock = (flags & ORBIS_KERNEL_O_NONBLOCK) != 0;
bool append = (flags & ORBIS_KERNEL_O_APPEND) != 0;
bool fsync = (flags & ORBIS_KERNEL_O_FSYNC) != 0;
bool sync = (flags & ORBIS_KERNEL_O_SYNC) != 0;
// Flags fsync and sync behave the same
bool sync = (flags & ORBIS_KERNEL_O_SYNC) != 0 || (flags & ORBIS_KERNEL_O_FSYNC) != 0;
bool create = (flags & ORBIS_KERNEL_O_CREAT) != 0;
bool truncate = (flags & ORBIS_KERNEL_O_TRUNC) != 0;
bool excl = (flags & ORBIS_KERNEL_O_EXCL) != 0;
@ -78,6 +84,10 @@ s32 PS4_SYSV_ABI open(const char* raw_path, s32 flags, u16 mode) {
bool direct = (flags & ORBIS_KERNEL_O_DIRECT) != 0;
bool directory = (flags & ORBIS_KERNEL_O_DIRECTORY) != 0;
if (sync || direct || dsync || nonblock) {
LOG_WARNING(Kernel_Fs, "flags {:#x} not fully handled", flags);
}
std::string_view path{raw_path};
u32 handle = h->CreateHandle();
auto* file = h->GetFile(handle);
@ -94,84 +104,127 @@ s32 PS4_SYSV_ABI open(const char* raw_path, s32 flags, u16 mode) {
}
}
if (directory) {
file->type = Core::FileSys::FileType::Directory;
file->m_guest_name = path;
file->m_host_name = mnt->GetHostPath(file->m_guest_name);
if (!std::filesystem::is_directory(file->m_host_name)) { // directory doesn't exist
bool read_only = false;
file->m_guest_name = path;
file->m_host_name = mnt->GetHostPath(file->m_guest_name, &read_only);
bool exists = std::filesystem::exists(file->m_host_name);
s32 e = 0;
if (create) {
if (excl && exists) {
// Error if file exists
h->DeleteHandle(handle);
*__Error() = POSIX_ENOENT;
*__Error() = POSIX_EEXIST;
return -1;
}
if (read_only) {
// Can't create files in a read only directory
h->DeleteHandle(handle);
*__Error() = POSIX_EROFS;
return -1;
}
// Create a file if it doesn't exist
Common::FS::IOFile out(file->m_host_name, Common::FS::FileAccessMode::Write);
} else if (!exists) {
// If we're not creating a file, and it doesn't exist, return ENOENT
h->DeleteHandle(handle);
*__Error() = POSIX_ENOENT;
return -1;
}
if (std::filesystem::is_directory(file->m_host_name) || directory) {
// Directories can be opened even if the directory flag isn't set.
// In these cases, error behavior is identical to the directory code path.
directory = true;
}
if (directory) {
if (!std::filesystem::is_directory(file->m_host_name)) {
// If the opened file is not a directory, return ENOTDIR.
// This will trigger when create & directory is specified, this is expected.
h->DeleteHandle(handle);
*__Error() = POSIX_ENOTDIR;
return -1;
}
file->type = Core::FileSys::FileType::Directory;
// Populate directory contents
mnt->IterateDirectory(file->m_guest_name,
[&file](const auto& ent_path, const auto ent_is_file) {
auto& dir_entry = file->dirents.emplace_back();
dir_entry.name = ent_path.filename().string();
dir_entry.isFile = ent_is_file;
});
file->dirents_index = 0;
if (read) {
e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Read);
} else if (write || rdwr) {
// Cannot open directories with any type of write access
h->DeleteHandle(handle);
*__Error() = POSIX_EISDIR;
return -1;
}
if (e == EACCES) {
// Hack to bypass some platform limitations, ignore the error and continue as normal.
LOG_WARNING(Kernel_Fs, "Opening directories is not fully supported on this platform");
e = 0;
}
if (truncate) {
// Cannot open directories with truncate
h->DeleteHandle(handle);
*__Error() = POSIX_EISDIR;
return -1;
} else {
if (create) {
return handle; // dir already exists
} else {
mnt->IterateDirectory(file->m_guest_name,
[&file](const auto& ent_path, const auto ent_is_file) {
auto& dir_entry = file->dirents.emplace_back();
dir_entry.name = ent_path.filename().string();
dir_entry.isFile = ent_is_file;
});
file->dirents_index = 0;
}
}
} else {
file->m_guest_name = path;
file->m_host_name = mnt->GetHostPath(file->m_guest_name);
bool exists = std::filesystem::exists(file->m_host_name);
int e = 0;
file->type = Core::FileSys::FileType::Regular;
if (create) {
if (excl && exists) {
// Error if file exists
h->DeleteHandle(handle);
*__Error() = POSIX_EEXIST;
return -1;
}
// Create file if it doesn't exist
Common::FS::IOFile out(file->m_host_name, Common::FS::FileAccessMode::Write);
} else if (!exists) {
// File to open doesn't exist, return ENOENT
if (truncate && read_only) {
// Can't open files with truncate flag in a read only directory
h->DeleteHandle(handle);
*__Error() = POSIX_ENOENT;
*__Error() = POSIX_EROFS;
return -1;
} else if (truncate) {
// Open the file as read-write so we can truncate regardless of flags.
// Since open starts by closing the file, this won't interfere with later open calls.
e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::ReadWrite);
if (e == 0) {
// If the file was opened successfully, reduce size to 0
file->f.SetSize(0);
}
}
if (read) {
// Read only
e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Read);
} else if (read_only) {
// Can't open files with write/read-write access in a read only directory
h->DeleteHandle(handle);
*__Error() = POSIX_EROFS;
return -1;
} else if (append) {
// Append can be specified with rdwr or write, but we treat it as a separate mode.
e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Append);
} else if (write) {
// Write only
if (append) {
e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Append);
} else {
e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Write);
}
e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Write);
} else if (rdwr) {
// Read and write
if (append) {
e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::Append);
} else {
e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::ReadWrite);
}
} else {
// Invalid flags
*__Error() = POSIX_EINVAL;
return -1;
}
if (truncate && e == 0) {
// If the file was opened successfully and truncate was enabled, reduce size to 0
file->f.SetSize(0);
}
if (e != 0) {
// Open failed in platform-specific code, errno needs to be converted.
h->DeleteHandle(handle);
SetPosixErrno(e);
return -1;
e = file->f.Open(file->m_host_name, Common::FS::FileAccessMode::ReadWrite);
}
}
if (e != 0) {
// Open failed in platform-specific code, errno needs to be converted.
h->DeleteHandle(handle);
SetPosixErrno(e);
return -1;
}
file->is_opened = true;
return handle;
}
@ -365,10 +418,10 @@ s64 PS4_SYSV_ABI posix_lseek(s32 fd, s64 offset, s32 whence) {
origin = Common::FS::SeekOrigin::CurrentPosition;
} else if (whence == 2) {
origin = Common::FS::SeekOrigin::End;
} else if (whence == 3) {
origin = Common::FS::SeekOrigin::SeekHole;
} else if (whence == 4) {
origin = Common::FS::SeekOrigin::SeekData;
} else if (whence == 3 || whence == 4) {
// whence parameter belongs to an unsupported POSIX extension
*__Error() = POSIX_ENOTTY;
return -1;
} else {
// whence parameter is invalid
*__Error() = POSIX_EINVAL;
@ -486,13 +539,13 @@ s32 PS4_SYSV_ABI posix_rmdir(const char* path) {
const std::filesystem::path dir_name = mnt->GetHostPath(path, &ro);
if (dir_name.empty() || !std::filesystem::is_directory(dir_name)) {
*__Error() = POSIX_ENOTDIR;
if (ro) {
*__Error() = POSIX_EROFS;
return -1;
}
if (ro) {
*__Error() = POSIX_EROFS;
if (dir_name.empty() || !std::filesystem::is_directory(dir_name)) {
*__Error() = POSIX_ENOTDIR;
return -1;
}
@ -523,8 +576,7 @@ s32 PS4_SYSV_ABI sceKernelRmdir(const char* path) {
s32 PS4_SYSV_ABI posix_stat(const char* path, OrbisKernelStat* sb) {
LOG_INFO(Kernel_Fs, "(PARTIAL) path = {}", path);
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
bool ro = false;
const auto path_name = mnt->GetHostPath(path, &ro);
const auto path_name = mnt->GetHostPath(path);
std::memset(sb, 0, sizeof(OrbisKernelStat));
const bool is_dir = std::filesystem::is_directory(path_name);
const bool is_file = std::filesystem::is_regular_file(path_name);
@ -545,9 +597,6 @@ s32 PS4_SYSV_ABI posix_stat(const char* path, OrbisKernelStat* sb) {
sb->st_blocks = (sb->st_size + 511) / 512;
// TODO incomplete
}
if (ro) {
sb->st_mode &= ~0000555u;
}
return ORBIS_OK;
}

View file

@ -126,9 +126,6 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE
s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info,
size_t infoSize) {
LOG_INFO(Kernel_Vmm, "called addr = {}, flags = {:#x}", fmt::ptr(addr), flags);
if (!addr) {
return ORBIS_KERNEL_ERROR_EACCES;
}
auto* memory = Core::Memory::Instance();
return memory->VirtualQuery(std::bit_cast<VAddr>(addr), flags, info);
}
@ -136,7 +133,6 @@ s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtual
s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment) {
LOG_INFO(Kernel_Vmm, "addr = {}, len = {:#x}, flags = {:#x}, alignment = {:#x}",
fmt::ptr(*addr), len, flags, alignment);
if (addr == nullptr) {
LOG_ERROR(Kernel_Vmm, "Address is invalid!");
return ORBIS_KERNEL_ERROR_EINVAL;
@ -155,9 +151,12 @@ s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u
auto* memory = Core::Memory::Instance();
const VAddr in_addr = reinterpret_cast<VAddr>(*addr);
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
memory->Reserve(addr, in_addr, len, map_flags, alignment);
return ORBIS_OK;
s32 result = memory->Reserve(addr, in_addr, len, map_flags, alignment);
if (result == 0) {
LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr));
}
return result;
}
int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, int flags,
@ -172,10 +171,12 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i
LOG_ERROR(Kernel_Vmm, "Map size is either zero or not 16KB aligned!");
return ORBIS_KERNEL_ERROR_EINVAL;
}
if (!Common::Is16KBAligned(directMemoryStart)) {
LOG_ERROR(Kernel_Vmm, "Start address is not 16KB aligned!");
return ORBIS_KERNEL_ERROR_EINVAL;
}
if (alignment != 0) {
if ((!std::has_single_bit(alignment) && !Common::Is16KBAligned(alignment))) {
LOG_ERROR(Kernel_Vmm, "Alignment value is invalid!");
@ -183,14 +184,19 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i
}
}
if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) {
LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!");
return ORBIS_KERNEL_ERROR_ENAMETOOLONG;
}
const VAddr in_addr = reinterpret_cast<VAddr>(*addr);
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
auto* memory = Core::Memory::Instance();
const auto ret =
memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, "", false,
directMemoryStart, alignment);
memory->MapMemory(addr, in_addr, len, mem_prot, map_flags, Core::VMAType::Direct, name,
false, directMemoryStart, alignment);
LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr));
return ret;
@ -199,7 +205,8 @@ int PS4_SYSV_ABI sceKernelMapNamedDirectMemory(void** addr, u64 len, int prot, i
int PS4_SYSV_ABI sceKernelMapDirectMemory(void** addr, u64 len, int prot, int flags,
s64 directMemoryStart, u64 alignment) {
LOG_INFO(Kernel_Vmm, "called, redirected to sceKernelMapNamedDirectMemory");
return sceKernelMapNamedDirectMemory(addr, len, prot, flags, directMemoryStart, alignment, "");
return sceKernelMapNamedDirectMemory(addr, len, prot, flags, directMemoryStart, alignment,
"anon");
}
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
@ -210,17 +217,16 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t
return ORBIS_KERNEL_ERROR_EINVAL;
}
static constexpr size_t MaxNameSize = 32;
if (std::strlen(name) > MaxNameSize) {
LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!");
return ORBIS_KERNEL_ERROR_ENAMETOOLONG;
}
if (name == nullptr) {
LOG_ERROR(Kernel_Vmm, "name is invalid!");
return ORBIS_KERNEL_ERROR_EFAULT;
}
if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) {
LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!");
return ORBIS_KERNEL_ERROR_ENAMETOOLONG;
}
const VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out);
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
@ -236,7 +242,7 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
int flags) {
return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "");
return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon");
}
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot) {
@ -304,7 +310,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn
case MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_DIRECT: {
result = sceKernelMapNamedDirectMemory(&entries[i].start, entries[i].length,
entries[i].protection, flags,
static_cast<s64>(entries[i].offset), 0, "");
static_cast<s64>(entries[i].offset), 0, "anon");
LOG_INFO(Kernel_Vmm,
"entry = {}, operation = {}, len = {:#x}, offset = {:#x}, type = {}, "
"result = {}",
@ -326,7 +332,7 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn
}
case MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_FLEXIBLE: {
result = sceKernelMapNamedFlexibleMemory(&entries[i].start, entries[i].length,
entries[i].protection, flags, "");
entries[i].protection, flags, "anon");
LOG_INFO(Kernel_Vmm,
"entry = {}, operation = {}, len = {:#x}, type = {}, "
"result = {}",
@ -356,16 +362,16 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn
}
s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, const char* name) {
static constexpr size_t MaxNameSize = 32;
if (std::strlen(name) > MaxNameSize) {
LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!");
return ORBIS_KERNEL_ERROR_ENAMETOOLONG;
}
if (name == nullptr) {
LOG_ERROR(Kernel_Vmm, "name is invalid!");
return ORBIS_KERNEL_ERROR_EFAULT;
}
if (std::strlen(name) >= ORBIS_KERNEL_MAXIMUM_NAME_LENGTH) {
LOG_ERROR(Kernel_Vmm, "name exceeds 32 bytes!");
return ORBIS_KERNEL_ERROR_ENAMETOOLONG;
}
auto* memory = Core::Memory::Instance();
memory->NameVirtualRange(std::bit_cast<VAddr>(addr), len, name);
return ORBIS_OK;

View file

@ -47,6 +47,8 @@ enum MemoryOpTypes : u32 {
ORBIS_KERNEL_MAP_OP_TYPE_PROTECT = 4
};
constexpr u32 ORBIS_KERNEL_MAXIMUM_NAME_LENGTH = 32;
struct OrbisQueryInfo {
uintptr_t start;
uintptr_t end;
@ -59,14 +61,12 @@ struct OrbisVirtualQueryInfo {
size_t offset;
s32 protection;
s32 memory_type;
union {
BitField<0, 1, u32> is_flexible;
BitField<1, 1, u32> is_direct;
BitField<2, 1, u32> is_stack;
BitField<3, 1, u32> is_pooled;
BitField<4, 1, u32> is_committed;
};
std::array<char, 32> name;
u32 is_flexible : 1;
u32 is_direct : 1;
u32 is_stack : 1;
u32 is_pooled : 1;
u32 is_committed : 1;
char name[ORBIS_KERNEL_MAXIMUM_NAME_LENGTH];
};
struct OrbisKernelBatchMapEntry {

View file

@ -289,7 +289,12 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt
/* Create thread */
new_thread->native_thr = Core::NativeThread();
int ret = new_thread->native_thr.Create(RunThread, new_thread, &new_thread->attr);
ASSERT_MSG(ret == 0, "Failed to create thread with error {}", ret);
if (attr != nullptr && *attr != nullptr && (*attr)->cpuset != nullptr) {
new_thread->SetAffinity((*attr)->cpuset);
}
if (ret) {
*thread = nullptr;
}
@ -521,6 +526,69 @@ int PS4_SYSV_ABI posix_pthread_setcancelstate(PthreadCancelState state,
return 0;
}
int Pthread::SetAffinity(const Cpuset* cpuset) {
const auto processor_count = std::thread::hardware_concurrency();
if (processor_count < 8) {
return 0;
}
if (cpuset == nullptr) {
return POSIX_EINVAL;
}
u64 mask = cpuset->bits;
uintptr_t handle = native_thr.GetHandle();
if (handle == 0) {
return POSIX_ESRCH;
}
// We don't use this currently because some games gets performance problems
// when applying affinity even on strong hardware
/*
#ifdef _WIN64
DWORD_PTR affinity_mask = static_cast<DWORD_PTR>(mask);
if (!SetThreadAffinityMask(reinterpret_cast<HANDLE>(handle), affinity_mask)) {
return POSIX_EINVAL;
}
#elif defined(__linux__)
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
u64 mask = cpuset->bits;
for (int cpu = 0; cpu < std::min(64, CPU_SETSIZE); ++cpu) {
if (mask & (1ULL << cpu)) {
CPU_SET(cpu, &cpu_set);
}
}
int result =
pthread_setaffinity_np(static_cast<pthread_t>(handle), sizeof(cpu_set_t), &cpu_set);
if (result != 0) {
return POSIX_EINVAL;
}
#endif
*/
return 0;
}
int PS4_SYSV_ABI posix_pthread_setaffinity_np(PthreadT thread, size_t cpusetsize,
const Cpuset* cpusetp) {
if (thread == nullptr || cpusetp == nullptr) {
return POSIX_EINVAL;
}
thread->attr.cpusetsize = cpusetsize;
return thread->SetAffinity(cpusetp);
}
int PS4_SYSV_ABI scePthreadSetaffinity(PthreadT thread, const Cpuset mask) {
int result = posix_pthread_setaffinity_np(thread, 0x10, &mask);
if (result != 0) {
return ErrnoToSceKernelError(result);
}
return 0;
}
void RegisterThread(Core::Loader::SymbolsResolver* sym) {
// Posix
LIB_FUNCTION("Z4QosVuAsA0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_once);
@ -544,6 +612,7 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("Z4QosVuAsA0", "libkernel", 1, "libkernel", 1, 1, posix_pthread_once);
LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", 1, 1, posix_pthread_self);
LIB_FUNCTION("OxhIB8LB-PQ", "libkernel", 1, "libkernel", 1, 1, posix_pthread_create);
LIB_FUNCTION("5KWrg7-ZqvE", "libkernel", 1, "libkernel", 1, 1, posix_pthread_setaffinity_np);
// Orbis
LIB_FUNCTION("14bOACANTBo", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_once));
@ -566,6 +635,7 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("W0Hpm2X0uPE", "libkernel", 1, "libkernel", 1, 1, ORBIS(posix_pthread_setprio));
LIB_FUNCTION("rNhWz+lvOMU", "libkernel", 1, "libkernel", 1, 1, _sceKernelSetThreadDtors);
LIB_FUNCTION("6XG4B33N09g", "libkernel", 1, "libkernel", 1, 1, sched_yield);
LIB_FUNCTION("bt3CTBKmGyI", "libkernel", 1, "libkernel", 1, 1, scePthreadSetaffinity)
}
} // namespace Libraries::Kernel

View file

@ -332,6 +332,8 @@ struct Pthread {
return true;
}
}
int SetAffinity(const Cpuset* cpuset);
};
using PthreadT = Pthread*;

View file

@ -56,7 +56,6 @@
#include <stdio.h>
#include <cstdarg>
#include <cstdbool>
#include <cstddef>
#include <cstdint>
#include <cstring>

View file

@ -45,6 +45,7 @@
#include "core/libraries/save_data/savedata.h"
#include "core/libraries/screenshot/screenshot.h"
#include "core/libraries/share_play/shareplay.h"
#include "core/libraries/signin_dialog/signindialog.h"
#include "core/libraries/system/commondialog.h"
#include "core/libraries/system/msgdialog.h"
#include "core/libraries/system/posix.h"
@ -120,6 +121,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) {
Libraries::Hmd::RegisterlibSceHmd(sym);
Libraries::DiscMap::RegisterlibSceDiscMap(sym);
Libraries::Ulobjmgr::RegisterlibSceUlobjmgr(sym);
Libraries::SigninDialog::RegisterlibSceSigninDialog(sym);
}
} // namespace Libraries

View file

@ -49,13 +49,11 @@ void SaveDialogResult::CopyTo(OrbisSaveDataDialogResult& result) const {
result.mode = this->mode;
result.result = this->result;
result.buttonId = this->button_id;
if (mode == SaveDataDialogMode::LIST || ElfInfo::Instance().FirmwareVer() >= ElfInfo::FW_45) {
if (result.dirName != nullptr) {
result.dirName->data.FromString(this->dir_name);
}
if (result.param != nullptr && this->param.GetString(SaveParams::MAINTITLE).has_value()) {
result.param->FromSFO(this->param);
}
if (result.dirName != nullptr) {
result.dirName->data.FromString(this->dir_name);
}
if (result.param != nullptr && this->param.GetString(SaveParams::MAINTITLE).has_value()) {
result.param->FromSFO(this->param);
}
result.userData = this->user_data;
}
@ -345,12 +343,15 @@ SaveDialogUi::SaveDialogUi(SaveDialogUi&& other) noexcept
}
}
SaveDialogUi& SaveDialogUi::operator=(SaveDialogUi other) {
SaveDialogUi& SaveDialogUi::operator=(SaveDialogUi&& other) noexcept {
std::scoped_lock lock(draw_mutex, other.draw_mutex);
using std::swap;
swap(state, other.state);
swap(status, other.status);
swap(result, other.result);
state = other.state;
other.state = nullptr;
status = other.status;
other.status = nullptr;
result = other.result;
other.result = nullptr;
if (status && *status == Status::RUNNING) {
first_render = true;
AddLayer(this);

View file

@ -300,7 +300,8 @@ public:
~SaveDialogUi() override;
SaveDialogUi(const SaveDialogUi& other) = delete;
SaveDialogUi(SaveDialogUi&& other) noexcept;
SaveDialogUi& operator=(SaveDialogUi other);
SaveDialogUi& operator=(SaveDialogUi& other) = delete;
SaveDialogUi& operator=(SaveDialogUi&& other) noexcept;
void Finish(ButtonId buttonId, CommonDialog::Result r = CommonDialog::Result::OK);

View file

@ -0,0 +1,64 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// Generated By moduleGenerator
#include "common/logging/log.h"
#include "core/libraries/error_codes.h"
#include "core/libraries/libs.h"
#include "signindialog.h"
namespace Libraries::SigninDialog {
s32 PS4_SYSV_ABI sceSigninDialogInitialize() {
LOG_ERROR(Lib_SigninDialog, "(STUBBED) called");
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceSigninDialogOpen() {
LOG_ERROR(Lib_SigninDialog, "(STUBBED) called");
return ORBIS_OK;
}
Status PS4_SYSV_ABI sceSigninDialogGetStatus() {
LOG_ERROR(Lib_SigninDialog, "(STUBBED) called, return 'finished' status");
return Status::FINISHED;
}
Status PS4_SYSV_ABI sceSigninDialogUpdateStatus() {
LOG_ERROR(Lib_SigninDialog, "(STUBBED) called, return 'finished' status");
return Status::FINISHED;
}
s32 PS4_SYSV_ABI sceSigninDialogGetResult() {
LOG_ERROR(Lib_SigninDialog, "(STUBBED) called");
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceSigninDialogClose() {
LOG_ERROR(Lib_SigninDialog, "(STUBBED) called");
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceSigninDialogTerminate() {
LOG_ERROR(Lib_SigninDialog, "(STUBBED) called");
return ORBIS_OK;
}
void RegisterlibSceSigninDialog(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("mlYGfmqE3fQ", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1,
sceSigninDialogInitialize);
LIB_FUNCTION("JlpJVoRWv7U", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1,
sceSigninDialogOpen);
LIB_FUNCTION("2m077aeC+PA", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1,
sceSigninDialogGetStatus);
LIB_FUNCTION("Bw31liTFT3A", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1,
sceSigninDialogUpdateStatus);
LIB_FUNCTION("nqG7rqnYw1U", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1,
sceSigninDialogGetResult);
LIB_FUNCTION("M3OkENHcyiU", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1,
sceSigninDialogClose);
LIB_FUNCTION("LXlmS6PvJdU", "libSceSigninDialog", 1, "libSceSigninDialog", 1, 1,
sceSigninDialogTerminate);
};
} // namespace Libraries::SigninDialog

View file

@ -0,0 +1,29 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace Core::Loader {
class SymbolsResolver;
}
enum class Status : u32 {
NONE = 0,
INITIALIZED = 1,
RUNNING = 2,
FINISHED = 3,
};
namespace Libraries::SigninDialog {
s32 PS4_SYSV_ABI sceSigninDialogInitialize();
s32 PS4_SYSV_ABI sceSigninDialogOpen();
Status PS4_SYSV_ABI sceSigninDialogGetStatus();
Status PS4_SYSV_ABI sceSigninDialogUpdateStatus();
s32 PS4_SYSV_ABI sceSigninDialogGetResult();
s32 PS4_SYSV_ABI sceSigninDialogClose();
s32 PS4_SYSV_ABI sceSigninDialogTerminate();
void RegisterlibSceSigninDialog(Core::Loader::SymbolsResolver* sym);
} // namespace Libraries::SigninDialog

View file

@ -75,7 +75,8 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) {
// Clamp size to the remaining size of the current VMA.
auto vma = FindVMA(virtual_addr);
ASSERT_MSG(vma != vma_map.end(), "Attempted to access invalid GPU address {:#x}", virtual_addr);
ASSERT_MSG(vma->second.Contains(virtual_addr, 0),
"Attempted to access invalid GPU address {:#x}", virtual_addr);
u64 clamped_size = vma->second.base + vma->second.size - virtual_addr;
++vma;
@ -96,6 +97,8 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) {
bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) {
const VAddr virtual_addr = std::bit_cast<VAddr>(address);
const auto& vma = FindVMA(virtual_addr)->second;
ASSERT_MSG(vma.Contains(virtual_addr, 0),
"Attempting to access out of bounds memory at address {:#x}", virtual_addr);
if (vma.type != VMAType::Direct) {
return false;
}
@ -145,10 +148,12 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
auto mapping_end = mapping_start + size;
// Find the first free, large enough dmem area in the range.
while ((!dmem_area->second.is_free || dmem_area->second.GetEnd() < mapping_end) &&
dmem_area != dmem_map.end()) {
while (!dmem_area->second.is_free || dmem_area->second.GetEnd() < mapping_end) {
// The current dmem_area isn't suitable, move to the next one.
dmem_area++;
if (dmem_area == dmem_map.end()) {
break;
}
// Update local variables based on the new dmem_area
mapping_start = Common::AlignUp(dmem_area->second.base, alignment);
@ -172,7 +177,6 @@ void MemoryManager::Free(PAddr phys_addr, size_t size) {
std::scoped_lock lk{mutex};
auto dmem_area = CarveDmemArea(phys_addr, size);
ASSERT(dmem_area != dmem_map.end() && dmem_area->second.size >= size);
// Release any dmem mappings that reference this physical block.
std::vector<std::pair<VAddr, u64>> remove_list;
@ -216,12 +220,18 @@ int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size,
vma = FindVMA(mapped_addr)->second;
}
const size_t remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size);
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size,
"Memory region {:#x} to {:#x} is not large enough to reserve {:#x} to {:#x}",
vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size);
}
// Find the first free area starting with provided virtual address.
if (False(flags & MemoryMapFlags::Fixed)) {
mapped_addr = SearchFree(mapped_addr, size, alignment);
if (mapped_addr == -1) {
// No suitable memory areas to map to
return ORBIS_KERNEL_ERROR_ENOMEM;
}
}
// Add virtual memory area
@ -229,7 +239,7 @@ int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size,
auto& new_vma = new_vma_handle->second;
new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce);
new_vma.prot = MemoryProt::NoAccess;
new_vma.name = "";
new_vma.name = "anon";
new_vma.type = VMAType::PoolReserved;
MergeAdjacent(vma_map, new_vma_handle);
@ -247,19 +257,25 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem
// Fixed mapping means the virtual address must exactly match the provided one.
if (True(flags & MemoryMapFlags::Fixed)) {
auto& vma = FindVMA(mapped_addr)->second;
auto vma = FindVMA(mapped_addr)->second;
// If the VMA is mapped, unmap the region first.
if (vma.IsMapped()) {
UnmapMemoryImpl(mapped_addr, size);
vma = FindVMA(mapped_addr)->second;
}
const size_t remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size);
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size,
"Memory region {:#x} to {:#x} is not large enough to reserve {:#x} to {:#x}",
vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size);
}
// Find the first free area starting with provided virtual address.
if (False(flags & MemoryMapFlags::Fixed)) {
mapped_addr = SearchFree(mapped_addr, size, alignment);
if (mapped_addr == -1) {
// No suitable memory areas to map to
return ORBIS_KERNEL_ERROR_ENOMEM;
}
}
// Add virtual memory area
@ -267,7 +283,7 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem
auto& new_vma = new_vma_handle->second;
new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce);
new_vma.prot = MemoryProt::NoAccess;
new_vma.name = "";
new_vma.name = "anon";
new_vma.type = VMAType::Reserved;
MergeAdjacent(vma_map, new_vma_handle);
@ -288,7 +304,9 @@ int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot)
// This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen.
const auto& vma = FindVMA(mapped_addr)->second;
const size_t remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(!vma.IsMapped() && remaining_size >= size);
ASSERT_MSG(!vma.IsMapped() && remaining_size >= size,
"Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}",
vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size);
// Perform the mapping.
void* out_addr = impl.Map(mapped_addr, size, alignment, -1, false);
@ -302,7 +320,10 @@ int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot)
new_vma.is_exec = false;
new_vma.phys_base = 0;
rasterizer->MapMemory(mapped_addr, size);
if (IsValidGpuMapping(mapped_addr, size)) {
rasterizer->MapMemory(mapped_addr, size);
}
return ORBIS_OK;
}
@ -325,15 +346,34 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
// Fixed mapping means the virtual address must exactly match the provided one.
if (True(flags & MemoryMapFlags::Fixed)) {
// This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen.
const auto& vma = FindVMA(mapped_addr)->second;
const size_t remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(!vma.IsMapped() && remaining_size >= size);
auto vma = FindVMA(mapped_addr)->second;
size_t remaining_size = vma.base + vma.size - mapped_addr;
// There's a possible edge case where we're mapping to a partially reserved range.
// To account for this, unmap any reserved areas within this mapping range first.
auto unmap_addr = mapped_addr;
auto unmap_size = size;
while (!vma.IsMapped() && unmap_addr < mapped_addr + size && remaining_size < size) {
auto unmapped = UnmapBytesFromEntry(unmap_addr, vma, unmap_size);
unmap_addr += unmapped;
unmap_size -= unmapped;
vma = FindVMA(unmap_addr)->second;
}
// This should return SCE_KERNEL_ERROR_ENOMEM but rarely happens.
vma = FindVMA(mapped_addr)->second;
remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(!vma.IsMapped() && remaining_size >= size,
"Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}",
vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size);
}
// Find the first free area starting with provided virtual address.
if (False(flags & MemoryMapFlags::Fixed)) {
mapped_addr = SearchFree(mapped_addr, size, alignment);
if (mapped_addr == -1) {
// No suitable memory areas to map to
return ORBIS_KERNEL_ERROR_ENOMEM;
}
}
// Perform the mapping.
@ -353,7 +393,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
if (type == VMAType::Flexible) {
flexible_usage += size;
}
rasterizer->MapMemory(mapped_addr, size);
if (IsValidGpuMapping(mapped_addr, size)) {
rasterizer->MapMemory(mapped_addr, size);
}
return ORBIS_OK;
}
@ -366,12 +409,18 @@ int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, Mem
// Find first free area to map the file.
if (False(flags & MemoryMapFlags::Fixed)) {
mapped_addr = SearchFree(mapped_addr, size_aligned, 1);
if (mapped_addr == -1) {
// No suitable memory areas to map to
return ORBIS_KERNEL_ERROR_ENOMEM;
}
}
if (True(flags & MemoryMapFlags::Fixed)) {
const auto& vma = FindVMA(virtual_addr)->second;
const size_t remaining_size = vma.base + vma.size - virtual_addr;
ASSERT_MSG(!vma.IsMapped() && remaining_size >= size);
ASSERT_MSG(!vma.IsMapped() && remaining_size >= size,
"Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}",
vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size);
}
// Map the file.
@ -404,7 +453,9 @@ void MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) {
const auto start_in_vma = virtual_addr - vma_base_addr;
const auto type = vma_base.type;
rasterizer->UnmapMemory(virtual_addr, size);
if (IsValidGpuMapping(virtual_addr, size)) {
rasterizer->UnmapMemory(virtual_addr, size);
}
// Mark region as free and attempt to coalesce it with neighbours.
const auto new_it = CarveVMA(virtual_addr, size);
@ -444,7 +495,10 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma
if (type == VMAType::Flexible) {
flexible_usage -= adjusted_size;
}
rasterizer->UnmapMemory(virtual_addr, adjusted_size);
if (IsValidGpuMapping(virtual_addr, adjusted_size)) {
rasterizer->UnmapMemory(virtual_addr, adjusted_size);
}
// Mark region as free and attempt to coalesce it with neighbours.
const auto new_it = CarveVMA(virtual_addr, adjusted_size);
@ -471,6 +525,8 @@ s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, u64 size) {
do {
auto it = FindVMA(virtual_addr + unmapped_bytes);
auto& vma_base = it->second;
ASSERT_MSG(vma_base.Contains(virtual_addr + unmapped_bytes, 0),
"Address {:#x} is out of bounds", virtual_addr + unmapped_bytes);
auto unmapped =
UnmapBytesFromEntry(virtual_addr + unmapped_bytes, vma_base, size - unmapped_bytes);
ASSERT_MSG(unmapped > 0, "Failed to unmap memory, progress is impossible");
@ -485,7 +541,10 @@ int MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr
const auto it = FindVMA(addr);
const auto& vma = it->second;
ASSERT_MSG(vma.type != VMAType::Free, "Provided address is not mapped");
if (!vma.Contains(addr, 0) || vma.IsFree()) {
LOG_ERROR(Kernel_Vmm, "Address {:#x} is not mapped", addr);
return ORBIS_KERNEL_ERROR_EACCES;
}
if (start != nullptr) {
*start = reinterpret_cast<void*>(vma.base);
@ -555,6 +614,8 @@ s32 MemoryManager::Protect(VAddr addr, size_t size, MemoryProt prot) {
do {
auto it = FindVMA(addr + protected_bytes);
auto& vma_base = it->second;
ASSERT_MSG(vma_base.Contains(addr + protected_bytes, 0), "Address {:#x} is out of bounds",
addr + protected_bytes);
auto result = 0;
result = ProtectBytes(addr + protected_bytes, vma_base, size - protected_bytes, prot);
if (result < 0) {
@ -571,8 +632,16 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags,
::Libraries::Kernel::OrbisVirtualQueryInfo* info) {
std::scoped_lock lk{mutex};
auto it = FindVMA(addr);
if (it->second.type == VMAType::Free && flags == 1) {
// FindVMA on addresses before the vma_map return garbage data.
auto query_addr =
addr < impl.SystemManagedVirtualBase() ? impl.SystemManagedVirtualBase() : addr;
if (addr < query_addr && flags == 0) {
LOG_WARNING(Kernel_Vmm, "VirtualQuery on free memory region");
return ORBIS_KERNEL_ERROR_EACCES;
}
auto it = FindVMA(query_addr);
while (it->second.type == VMAType::Free && flags == 1 && it != --vma_map.end()) {
++it;
}
if (it->second.type == VMAType::Free) {
@ -585,15 +654,17 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags,
info->end = vma.base + vma.size;
info->offset = vma.phys_base;
info->protection = static_cast<s32>(vma.prot);
info->is_flexible.Assign(vma.type == VMAType::Flexible);
info->is_direct.Assign(vma.type == VMAType::Direct);
info->is_stack.Assign(vma.type == VMAType::Stack);
info->is_pooled.Assign(vma.type == VMAType::PoolReserved || vma.type == VMAType::Pooled);
info->is_committed.Assign(vma.IsMapped());
vma.name.copy(info->name.data(), std::min(info->name.size(), vma.name.size()));
info->is_flexible = vma.type == VMAType::Flexible ? 1 : 0;
info->is_direct = vma.type == VMAType::Direct ? 1 : 0;
info->is_stack = vma.type == VMAType::Stack ? 1 : 0;
info->is_pooled = vma.type == VMAType::PoolReserved || vma.type == VMAType::Pooled ? 1 : 0;
info->is_committed = vma.IsMapped() ? 1 : 0;
strncpy(info->name, vma.name.data(), ::Libraries::Kernel::ORBIS_KERNEL_MAXIMUM_NAME_LENGTH);
if (vma.type == VMAType::Direct) {
const auto dmem_it = FindDmemArea(vma.phys_base);
ASSERT(dmem_it != dmem_map.end());
ASSERT_MSG(vma.phys_base <= dmem_it->second.GetEnd(), "vma.phys_base is not in dmem_map!");
info->memory_type = dmem_it->second.memory_type;
} else {
info->memory_type = ::Libraries::Kernel::SCE_KERNEL_WB_ONION;
@ -607,11 +678,11 @@ int MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next,
std::scoped_lock lk{mutex};
auto dmem_area = FindDmemArea(addr);
while (dmem_area != dmem_map.end() && dmem_area->second.is_free && find_next) {
while (dmem_area != --dmem_map.end() && dmem_area->second.is_free && find_next) {
dmem_area++;
}
if (dmem_area == dmem_map.end() || dmem_area->second.is_free) {
if (dmem_area->second.is_free) {
LOG_ERROR(Core, "Unable to find allocated direct memory region to query!");
return ORBIS_KERNEL_ERROR_EACCES;
}
@ -691,36 +762,56 @@ VAddr MemoryManager::SearchFree(VAddr virtual_addr, size_t size, u32 alignment)
virtual_addr = min_search_address;
}
// If the requested address is beyond the maximum our code can handle, throw an assert
auto max_search_address = impl.UserVirtualBase() + impl.UserVirtualSize();
ASSERT_MSG(virtual_addr <= max_search_address, "Input address {:#x} is out of bounds",
virtual_addr);
auto it = FindVMA(virtual_addr);
ASSERT_MSG(it != vma_map.end(), "Specified mapping address was not found!");
// If the VMA is free and contains the requested mapping we are done.
if (it->second.IsFree() && it->second.Contains(virtual_addr, size)) {
return virtual_addr;
}
// Search for the first free VMA that fits our mapping.
const auto is_suitable = [&] {
while (it != vma_map.end()) {
if (!it->second.IsFree()) {
return false;
it++;
continue;
}
const auto& vma = it->second;
virtual_addr = Common::AlignUp(vma.base, alignment);
// Sometimes the alignment itself might be larger than the VMA.
if (virtual_addr > vma.base + vma.size) {
return false;
it++;
continue;
}
// Make sure the address is within our defined bounds
if (virtual_addr >= max_search_address) {
// There are no free mappings within our safely usable address space.
break;
}
// If there's enough space in the VMA, return the address.
const size_t remaining_size = vma.base + vma.size - virtual_addr;
return remaining_size >= size;
};
while (!is_suitable()) {
++it;
if (remaining_size >= size) {
return virtual_addr;
}
it++;
}
return virtual_addr;
// Couldn't find a suitable VMA, return an error.
LOG_ERROR(Kernel_Vmm, "Couldn't find a free mapping for address {:#x}, size {:#x}",
virtual_addr, size);
return -1;
}
MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size) {
auto vma_handle = FindVMA(virtual_addr);
ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map");
ASSERT_MSG(vma_handle->second.Contains(virtual_addr, 0), "Virtual address not in vm_map");
const VirtualMemoryArea& vma = vma_handle->second;
ASSERT_MSG(vma.base <= virtual_addr, "Adding a mapping to already mapped region");
@ -749,7 +840,7 @@ MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size
MemoryManager::DMemHandle MemoryManager::CarveDmemArea(PAddr addr, size_t size) {
auto dmem_handle = FindDmemArea(addr);
ASSERT_MSG(dmem_handle != dmem_map.end(), "Physical address not in dmem_map");
ASSERT_MSG(addr <= dmem_handle->second.GetEnd(), "Physical address not in dmem_map");
const DirectMemoryArea& area = dmem_handle->second;
ASSERT_MSG(area.base <= addr, "Adding an allocation to already allocated region");
@ -804,7 +895,7 @@ int MemoryManager::GetDirectMemoryType(PAddr addr, int* directMemoryTypeOut,
auto dmem_area = FindDmemArea(addr);
if (dmem_area == dmem_map.end() || dmem_area->second.is_free) {
if (addr > dmem_area->second.GetEnd() || dmem_area->second.is_free) {
LOG_ERROR(Core, "Unable to find allocated direct memory region to check type!");
return ORBIS_KERNEL_ERROR_ENOENT;
}

View file

@ -157,6 +157,12 @@ public:
return impl.SystemReservedVirtualBase();
}
bool IsValidGpuMapping(VAddr virtual_addr, u64 size) {
// The PS4's GPU can only handle 40 bit addresses.
const VAddr max_gpu_address{0x10000000000};
return virtual_addr + size < max_gpu_address;
}
bool IsValidAddress(const void* addr) const noexcept {
const VAddr virtual_addr = reinterpret_cast<VAddr>(addr);
const auto end_it = std::prev(vma_map.end());
@ -186,7 +192,7 @@ public:
int PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot);
int MapMemory(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot,
MemoryMapFlags flags, VMAType type, std::string_view name = "",
MemoryMapFlags flags, VMAType type, std::string_view name = "anon",
bool is_exec = false, PAddr phys_addr = -1, u64 alignment = 0);
int MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot,

View file

@ -19,8 +19,7 @@ namespace Core {
using EntryFunc = PS4_SYSV_ABI int (*)(size_t args, const void* argp, void* param);
static u64 LoadOffset = CODE_BASE_OFFSET;
static constexpr u64 CODE_BASE_INCR = 0x010000000u;
static constexpr u64 ModuleLoadBase = 0x800000000;
static u64 GetAlignedSize(const elf_program_header& phdr) {
return (phdr.p_align != 0 ? (phdr.p_memsz + (phdr.p_align - 1)) & ~(phdr.p_align - 1)
@ -84,7 +83,7 @@ static std::string StringToNid(std::string_view symbol) {
}
Module::Module(Core::MemoryManager* memory_, const std::filesystem::path& file_, u32& max_tls_index)
: memory{memory_}, file{file_}, name{file.stem().string()} {
: memory{memory_}, file{file_}, name{file.filename().string()} {
elf.Open(file);
if (elf.IsElfFile()) {
LoadModuleToMemory(max_tls_index);
@ -113,10 +112,8 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
// Map module segments (and possible TLS trampolines)
void** out_addr = reinterpret_cast<void**>(&base_virtual_addr);
memory->MapMemory(out_addr, memory->SystemReservedVirtualBase() + LoadOffset,
aligned_base_size + TrampolineSize, MemoryProt::CpuReadWrite,
MemoryMapFlags::Fixed, VMAType::Code, name, true);
LoadOffset += CODE_BASE_INCR * (1 + aligned_base_size / CODE_BASE_INCR);
memory->MapMemory(out_addr, ModuleLoadBase, aligned_base_size + TrampolineSize,
MemoryProt::CpuReadWrite, MemoryMapFlags::NoFlags, VMAType::Code, name, true);
LOG_INFO(Core_Linker, "Loading module {} to {}", name, fmt::ptr(*out_addr));
#ifdef ARCH_X86_64
@ -229,7 +226,7 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
LOG_INFO(Core_Linker, "program entry addr ..........: {:#018x}", entry_addr);
if (MemoryPatcher::g_eboot_address == 0) {
if (name == "eboot") {
if (name == "eboot.bin") {
MemoryPatcher::g_eboot_address = base_virtual_addr;
MemoryPatcher::g_eboot_image_size = base_size;
MemoryPatcher::OnGameLoaded();

View file

@ -5,6 +5,8 @@
#include "common/types.h"
void* memset(void* ptr, int value, size_t num);
namespace Xbyak {
class CodeGenerator;
}
@ -41,9 +43,18 @@ Tcb* GetTcbBase();
/// Makes sure TLS is initialized for the thread before entering guest.
void EnsureThreadInitialized();
template <size_t size>
__attribute__((optnone)) void ClearStack() {
volatile void* buf = alloca(size);
memset(const_cast<void*>(buf), 0, size);
buf = nullptr;
}
template <class ReturnType, class... FuncArgs, class... CallArgs>
ReturnType ExecuteGuest(PS4_SYSV_ABI ReturnType (*func)(FuncArgs...), CallArgs&&... args) {
EnsureThreadInitialized();
// clear stack to avoid trash from EnsureThreadInitialized
ClearStack<13_KB>();
return func(std::forward<CallArgs>(args)...);
}

View file

@ -10,7 +10,6 @@
#include "common/logging/log.h"
#ifdef ENABLE_QT_GUI
#include <QtCore>
#include "common/memory_patcher.h"
#endif
#include "common/assert.h"
#ifdef ENABLE_DISCORD_RPC
@ -20,6 +19,7 @@
#include <WinSock2.h>
#endif
#include "common/elf_info.h"
#include "common/memory_patcher.h"
#include "common/ntapi.h"
#include "common/path_util.h"
#include "common/polyfill_thread.h"
@ -56,27 +56,6 @@ Emulator::Emulator() {
WSADATA wsaData;
WSAStartup(versionWanted, &wsaData);
#endif
// Create stdin/stdout/stderr
Common::Singleton<FileSys::HandleTable>::Instance()->CreateStdHandles();
// Defer until after logging is initialized.
memory = Core::Memory::Instance();
controller = Common::Singleton<Input::GameController>::Instance();
linker = Common::Singleton<Core::Linker>::Instance();
// Load renderdoc module.
VideoCore::LoadRenderDoc();
// Start the timer (Play Time)
#ifdef ENABLE_QT_GUI
start_time = std::chrono::steady_clock::now();
const auto user_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
QString filePath = QString::fromStdString((user_dir / "play_time.txt").string());
QFile file(filePath);
ASSERT_MSG(file.open(QIODevice::ReadWrite | QIODevice::Text),
"Error opening or creating play_time.txt");
#endif
}
Emulator::~Emulator() {
@ -100,58 +79,93 @@ void Emulator::Run(const std::filesystem::path& file, const std::vector<std::str
// Applications expect to be run from /app0 so mount the file's parent path as app0.
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
mnt->Mount(game_folder, "/app0");
mnt->Mount(game_folder, "/app0", true);
// Certain games may use /hostapp as well such as CUSA001100
mnt->Mount(game_folder, "/hostapp");
mnt->Mount(game_folder, "/hostapp", true);
auto& game_info = Common::ElfInfo::Instance();
const auto param_sfo_path = mnt->GetHostPath("/app0/sce_sys/param.sfo");
const auto param_sfo_exists = std::filesystem::exists(param_sfo_path);
// Loading param.sfo file if exists
// Load param.sfo details if it exists
std::string id;
std::string title;
std::string app_version;
u32 fw_version;
Common::PSFAttributes psf_attributes{};
const auto param_sfo_path = mnt->GetHostPath("/app0/sce_sys/param.sfo");
if (!std::filesystem::exists(param_sfo_path) || !Config::getSeparateLogFilesEnabled()) {
Common::Log::Initialize();
Common::Log::Start();
}
if (std::filesystem::exists(param_sfo_path)) {
if (param_sfo_exists) {
auto* param_sfo = Common::Singleton<PSF>::Instance();
const bool success = param_sfo->Open(param_sfo_path);
ASSERT_MSG(success, "Failed to open param.sfo");
ASSERT_MSG(param_sfo->Open(param_sfo_path), "Failed to open param.sfo");
const auto content_id = param_sfo->GetString("CONTENT_ID");
ASSERT_MSG(content_id.has_value(), "Failed to get CONTENT_ID");
id = std::string(*content_id, 7, 9);
if (Config::getSeparateLogFilesEnabled()) {
Common::Log::Initialize(id + ".log");
Common::Log::Start();
title = param_sfo->GetString("TITLE").value_or("Unknown title");
fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000);
app_version = param_sfo->GetString("APP_VER").value_or("Unknown version");
if (const auto raw_attributes = param_sfo->GetInteger("ATTRIBUTE")) {
psf_attributes.raw = *raw_attributes;
}
LOG_INFO(Loader, "Starting shadps4 emulator v{} ", Common::g_version);
LOG_INFO(Loader, "Revision {}", Common::g_scm_rev);
LOG_INFO(Loader, "Branch {}", Common::g_scm_branch);
LOG_INFO(Loader, "Description {}", Common::g_scm_desc);
LOG_INFO(Loader, "Remote {}", Common::g_scm_remote_url);
}
LOG_INFO(Config, "General LogType: {}", Config::getLogType());
LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId());
LOG_INFO(Config, "Vulkan vkValidation: {}", Config::vkValidationEnabled());
LOG_INFO(Config, "Vulkan vkValidationSync: {}", Config::vkValidationSyncEnabled());
LOG_INFO(Config, "Vulkan vkValidationGpu: {}", Config::vkValidationGpuEnabled());
LOG_INFO(Config, "Vulkan crashDiagnostics: {}", Config::getVkCrashDiagnosticEnabled());
LOG_INFO(Config, "Vulkan hostMarkers: {}", Config::getVkHostMarkersEnabled());
LOG_INFO(Config, "Vulkan guestMarkers: {}", Config::getVkGuestMarkersEnabled());
LOG_INFO(Config, "Vulkan rdocEnable: {}", Config::isRdocEnabled());
// Initialize logging as soon as possible
if (!id.empty() && Config::getSeparateLogFilesEnabled()) {
Common::Log::Initialize(id + ".log");
} else {
Common::Log::Initialize();
}
Common::Log::Start();
LOG_INFO(Loader, "Starting shadps4 emulator v{} ", Common::g_version);
LOG_INFO(Loader, "Revision {}", Common::g_scm_rev);
LOG_INFO(Loader, "Branch {}", Common::g_scm_branch);
LOG_INFO(Loader, "Description {}", Common::g_scm_desc);
LOG_INFO(Loader, "Remote {}", Common::g_scm_remote_url);
LOG_INFO(Config, "General LogType: {}", Config::getLogType());
LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
LOG_INFO(Config, "Vulkan gpuId: {}", Config::getGpuId());
LOG_INFO(Config, "Vulkan vkValidation: {}", Config::vkValidationEnabled());
LOG_INFO(Config, "Vulkan vkValidationSync: {}", Config::vkValidationSyncEnabled());
LOG_INFO(Config, "Vulkan vkValidationGpu: {}", Config::vkValidationGpuEnabled());
LOG_INFO(Config, "Vulkan crashDiagnostics: {}", Config::getVkCrashDiagnosticEnabled());
LOG_INFO(Config, "Vulkan hostMarkers: {}", Config::getVkHostMarkersEnabled());
LOG_INFO(Config, "Vulkan guestMarkers: {}", Config::getVkGuestMarkersEnabled());
LOG_INFO(Config, "Vulkan rdocEnable: {}", Config::isRdocEnabled());
if (param_sfo_exists) {
LOG_INFO(Loader, "Game id: {} Title: {}", id, title);
LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version);
}
if (!args.empty()) {
const auto argc = std::min<size_t>(args.size(), 32);
for (auto i = 0; i < argc; i++) {
LOG_INFO(Loader, "Game argument {}: {}", i, args[i]);
}
if (args.size() > 32) {
LOG_ERROR(Loader, "Too many game arguments, only passing the first 32");
}
}
// Create stdin/stdout/stderr
Common::Singleton<FileSys::HandleTable>::Instance()->CreateStdHandles();
// Initialize components
memory = Core::Memory::Instance();
controller = Common::Singleton<Input::GameController>::Instance();
linker = Common::Singleton<Core::Linker>::Instance();
// Load renderdoc module
VideoCore::LoadRenderDoc();
// Initialize patcher and trophies
if (!id.empty()) {
MemoryPatcher::g_game_serial = id;
Libraries::NpTrophy::game_serial = id;
const auto trophyDir =
Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) / id / "TrophyFiles";
if (!std::filesystem::exists(trophyDir)) {
@ -160,41 +174,9 @@ void Emulator::Run(const std::filesystem::path& file, const std::vector<std::str
LOG_ERROR(Loader, "Couldn't extract trophies");
}
}
#ifdef ENABLE_QT_GUI
MemoryPatcher::g_game_serial = id;
// Timer for 'Play Time'
QTimer* timer = new QTimer();
QObject::connect(timer, &QTimer::timeout, [this, id]() {
UpdatePlayTime(id);
start_time = std::chrono::steady_clock::now();
});
timer->start(60000); // 60000 ms = 1 minute
#endif
title = param_sfo->GetString("TITLE").value_or("Unknown title");
LOG_INFO(Loader, "Game id: {} Title: {}", id, title);
fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000);
app_version = param_sfo->GetString("APP_VER").value_or("Unknown version");
LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version);
if (const auto raw_attributes = param_sfo->GetInteger("ATTRIBUTE")) {
psf_attributes.raw = *raw_attributes;
}
if (!args.empty()) {
int argc = std::min<int>(args.size(), 32);
for (int i = 0; i < argc; i++) {
LOG_INFO(Loader, "Game argument {}: {}", i, args[i]);
}
if (args.size() > 32) {
LOG_ERROR(Loader, "Too many game arguments, only passing the first 32");
}
}
}
const auto pic1_path = mnt->GetHostPath("/app0/sce_sys/pic1.png");
if (std::filesystem::exists(pic1_path)) {
game_info.splash_path = pic1_path;
}
auto& game_info = Common::ElfInfo::Instance();
game_info.initialized = true;
game_info.game_serial = id;
game_info.title = title;
@ -203,6 +185,11 @@ void Emulator::Run(const std::filesystem::path& file, const std::vector<std::str
game_info.raw_firmware_ver = fw_version;
game_info.psf_attributes = psf_attributes;
const auto pic1_path = mnt->GetHostPath("/app0/sce_sys/pic1.png");
if (std::filesystem::exists(pic1_path)) {
game_info.splash_path = pic1_path;
}
std::string game_title = fmt::format("{} - {} <{}>", id, title, app_version);
std::string window_title = "";
if (Common::g_is_release) {
@ -233,11 +220,15 @@ void Emulator::Run(const std::filesystem::path& file, const std::vector<std::str
std::filesystem::create_directory(mount_data_dir);
}
mnt->Mount(mount_data_dir, "/data"); // should just exist, manually create with game serial
// Mounting temp folders
const auto& mount_temp_dir = Common::FS::GetUserPath(Common::FS::PathType::TempDataDir) / id;
if (!std::filesystem::exists(mount_temp_dir)) {
std::filesystem::create_directory(mount_temp_dir);
if (std::filesystem::exists(mount_temp_dir)) {
// Temp folder should be cleared on each boot.
std::filesystem::remove_all(mount_temp_dir);
}
mnt->Mount(mount_temp_dir, "/temp0"); // called in app_content ==> stat/mkdir
std::filesystem::create_directory(mount_temp_dir);
mnt->Mount(mount_temp_dir, "/temp0");
mnt->Mount(mount_temp_dir, "/temp");
const auto& mount_download_dir =
@ -282,6 +273,25 @@ void Emulator::Run(const std::filesystem::path& file, const std::vector<std::str
}
#endif
// Start the timer (Play Time)
#ifdef ENABLE_QT_GUI
if (!id.empty()) {
auto* timer = new QTimer();
QObject::connect(timer, &QTimer::timeout, [this, id]() {
UpdatePlayTime(id);
start_time = std::chrono::steady_clock::now();
});
timer->start(60000); // 60000 ms = 1 minute
start_time = std::chrono::steady_clock::now();
const auto user_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
QString filePath = QString::fromStdString((user_dir / "play_time.txt").string());
QFile file(filePath);
ASSERT_MSG(file.open(QIODevice::ReadWrite | QIODevice::Text),
"Error opening or creating play_time.txt");
}
#endif
linker->Execute(args);
window->InitTimers();

View file

@ -270,6 +270,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
if (info.has_image_query) {
ctx.AddCapability(spv::Capability::ImageQuery);
}
if (info.uses_atomic_float_min_max && profile.supports_image_fp32_atomic_min_max) {
ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max");
ctx.AddCapability(spv::Capability::AtomicFloat32MinMaxEXT);
}
if (info.uses_lane_id) {
ctx.AddCapability(spv::Capability::GroupNonUniform);
}

View file

@ -75,6 +75,14 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id va
const auto [scope, semantics]{AtomicArgs(ctx)};
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
}
Id ImageAtomicF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id pointer{ctx.OpImageTexelPointer(ctx.image_f32, texture.id, coords, ctx.ConstU32(0U))};
const auto [scope, semantics]{AtomicArgs(ctx)};
return (ctx.*atomic_func)(ctx.F32[1], pointer, scope, semantics, value);
}
} // Anonymous namespace
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
@ -187,6 +195,40 @@ Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicUMax);
}
Id EmitImageAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
if (ctx.profile.supports_image_fp32_atomic_min_max) {
return ImageAtomicF32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicFMax);
}
const auto u32_value = ctx.OpBitcast(ctx.U32[1], value);
const auto sign_bit_set =
ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u));
const auto result = ctx.OpSelect(
ctx.F32[1], sign_bit_set,
EmitBitCastF32U32(ctx, EmitImageAtomicUMin32(ctx, inst, handle, coords, u32_value)),
EmitBitCastF32U32(ctx, EmitImageAtomicSMax32(ctx, inst, handle, coords, u32_value)));
return result;
}
Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
if (ctx.profile.supports_image_fp32_atomic_min_max) {
return ImageAtomicF32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicFMin);
}
const auto u32_value = ctx.OpBitcast(ctx.U32[1], value);
const auto sign_bit_set =
ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u));
const auto result = ctx.OpSelect(
ctx.F32[1], sign_bit_set,
EmitBitCastF32U32(ctx, EmitImageAtomicUMax32(ctx, inst, handle, coords, u32_value)),
EmitBitCastF32U32(ctx, EmitImageAtomicSMin32(ctx, inst, handle, coords, u32_value)));
return result;
}
Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) {
// TODO: This is not yet implemented
throw NotImplementedException("SPIR-V Instruction");

View file

@ -529,7 +529,7 @@ void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto
// Bounds checking enabled, wrap in a conditional branch.
auto compare_index = index;
if (N > 1) {
index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1));
compare_index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1));
}
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size);
const Id in_bounds_label = ctx.OpLabel();

View file

@ -482,6 +482,8 @@ Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords
Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);

View file

@ -869,6 +869,7 @@ void EmitContext::DefineImagesAndSamplers() {
}
if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) {
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
image_f32 = TypePointer(spv::StorageClass::Image, F32[1]);
}
if (info.samplers.empty()) {
return;

View file

@ -207,6 +207,7 @@ public:
Id invocation_id{};
Id subgroup_local_invocation_id{};
Id image_u32{};
Id image_f32{};
Id shared_memory_u8{};
Id shared_memory_u16{};

View file

@ -517,7 +517,9 @@ void Translator::EmitFetch(const GcnInst& inst) {
const auto values =
ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1),
ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3));
const auto swizzled = ApplySwizzle(ir, values, buffer.DstSelect());
const auto converted =
IR::ApplyReadNumberConversionVec4(ir, values, buffer.GetNumberConversion());
const auto swizzled = ApplySwizzle(ir, converted, buffer.DstSelect());
for (u32 i = 0; i < 4; i++) {
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)});
}

View file

@ -115,8 +115,12 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return IMAGE_ATOMIC(AtomicOp::Smin, inst);
case Opcode::IMAGE_ATOMIC_UMIN:
return IMAGE_ATOMIC(AtomicOp::Umin, inst);
case Opcode::IMAGE_ATOMIC_FMIN:
return IMAGE_ATOMIC(AtomicOp::Fmin, inst);
case Opcode::IMAGE_ATOMIC_SMAX:
return IMAGE_ATOMIC(AtomicOp::Smax, inst);
case Opcode::IMAGE_ATOMIC_FMAX:
return IMAGE_ATOMIC(AtomicOp::Fmax, inst);
case Opcode::IMAGE_ATOMIC_UMAX:
return IMAGE_ATOMIC(AtomicOp::Umax, inst);
case Opcode::IMAGE_ATOMIC_AND:
@ -139,6 +143,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
case Opcode::IMAGE_SAMPLE_C_LZ:
case Opcode::IMAGE_SAMPLE_O:
case Opcode::IMAGE_SAMPLE_L_O:
case Opcode::IMAGE_SAMPLE_B_O:
case Opcode::IMAGE_SAMPLE_LZ_O:
case Opcode::IMAGE_SAMPLE_C_O:
case Opcode::IMAGE_SAMPLE_C_LZ_O:
@ -466,6 +471,10 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
return ir.ImageAtomicIMax(handle, body, value, true, info);
case AtomicOp::Umax:
return ir.ImageAtomicUMax(handle, body, value, info);
case AtomicOp::Fmax:
return ir.ImageAtomicFMax(handle, body, value, info);
case AtomicOp::Fmin:
return ir.ImageAtomicFMin(handle, body, value, info);
case AtomicOp::And:
return ir.ImageAtomicAnd(handle, body, value, info);
case AtomicOp::Or:

View file

@ -196,6 +196,7 @@ struct Info {
bool has_discard{};
bool has_image_gather{};
bool has_image_query{};
bool uses_atomic_float_min_max{};
bool uses_lane_id{};
bool uses_group_quad{};
bool uses_group_ballot{};
@ -280,6 +281,11 @@ constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept
// Fall back to null image if unbound.
return AmdGpu::Image::Null();
}
const auto data_fmt = image.GetDataFmt();
if (is_depth && data_fmt != AmdGpu::DataFormat::Format16 &&
data_fmt != AmdGpu::DataFormat::Format32) {
return AmdGpu::Image::NullDepth();
}
return image;
}

View file

@ -1870,6 +1870,16 @@ Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const
return Inst(Opcode::ImageAtomicUMax32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicFMax(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicFMax32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicFMin(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicFMin32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
bool is_signed, TextureInstInfo info) {
return is_signed ? ImageAtomicSMax(handle, coords, value, info)

View file

@ -321,6 +321,10 @@ public:
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicFMax(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicFMin(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
const Value& value, bool is_signed, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,

View file

@ -94,6 +94,8 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::ImageAtomicUMin32:
case Opcode::ImageAtomicSMax32:
case Opcode::ImageAtomicUMax32:
case Opcode::ImageAtomicFMax32:
case Opcode::ImageAtomicFMin32:
case Opcode::ImageAtomicInc32:
case Opcode::ImageAtomicDec32:
case Opcode::ImageAtomicAnd32:

View file

@ -420,6 +420,8 @@ OPCODE(ImageAtomicSMin32, U32, Opaq
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicFMax32, F32, Opaque, Opaque, F32, )
OPCODE(ImageAtomicFMin32, F32, Opaque, Opaque, F32, )
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )

View file

@ -206,7 +206,8 @@ static void LowerBufferFormatInst(IR::Block& block, IR::Inst& inst, Info& info)
.swizzle = is_inst_typed
? AmdGpu::RemapSwizzle(flags.inst_data_fmt.Value(), AmdGpu::IdentityMapping)
: buffer.DstSelect(),
.num_conversion = is_inst_typed ? AmdGpu::MapNumberConversion(flags.inst_num_fmt.Value())
.num_conversion = is_inst_typed ? AmdGpu::MapNumberConversion(flags.inst_num_fmt.Value(),
flags.inst_data_fmt.Value())
: buffer.GetNumberConversion(),
.num_components = AmdGpu::NumComponents(data_format),
};

View file

@ -101,6 +101,8 @@ bool IsImageAtomicInstruction(const IR::Inst& inst) {
case IR::Opcode::ImageAtomicUMin32:
case IR::Opcode::ImageAtomicSMax32:
case IR::Opcode::ImageAtomicUMax32:
case IR::Opcode::ImageAtomicFMax32:
case IR::Opcode::ImageAtomicFMin32:
case IR::Opcode::ImageAtomicInc32:
case IR::Opcode::ImageAtomicDec32:
case IR::Opcode::ImageAtomicAnd32:
@ -361,6 +363,12 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
image = AmdGpu::Image::Null();
}
const auto data_fmt = image.GetDataFmt();
if (inst_info.is_depth && data_fmt != AmdGpu::DataFormat::Format16 &&
data_fmt != AmdGpu::DataFormat::Format32) {
LOG_ERROR(Render_Vulkan, "Shader compiled using non-depth image with depth instruction!");
image = AmdGpu::Image::NullDepth();
}
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;

View file

@ -71,6 +71,10 @@ void Visit(Info& info, const IR::Inst& inst) {
case IR::Opcode::ImageQueryLod:
info.has_image_query = true;
break;
case IR::Opcode::ImageAtomicFMax32:
case IR::Opcode::ImageAtomicFMin32:
info.uses_atomic_float_min_max = true;
break;
case IR::Opcode::LaneId:
info.uses_lane_id = true;
break;

View file

@ -34,6 +34,18 @@ inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value,
case AmdGpu::NumberConversion::UnormToUbnorm:
// Convert 0...1 to -1...1
return ir.FPSub(ir.FPMul(value, ir.Imm32(2.f)), ir.Imm32(1.f));
case AmdGpu::NumberConversion::Sint8ToSnormNz: {
const IR::U32 additon = ir.IAdd(ir.IMul(ir.BitCast<U32>(value), ir.Imm32(2)), ir.Imm32(1));
const IR::F32 left = ir.ConvertSToF(32, 32, additon);
const IR::F32 max = ir.Imm32(float(std::numeric_limits<u8>::max()));
return ir.FPDiv(left, max);
}
case AmdGpu::NumberConversion::Sint16ToSnormNz: {
const IR::U32 additon = ir.IAdd(ir.IMul(ir.BitCast<U32>(value), ir.Imm32(2)), ir.Imm32(1));
const IR::F32 left = ir.ConvertSToF(32, 32, additon);
const IR::F32 max = ir.Imm32(float(std::numeric_limits<u16>::max()));
return ir.FPDiv(left, max);
}
default:
UNREACHABLE();
}
@ -66,6 +78,20 @@ inline F32 ApplyWriteNumberConversion(IREmitter& ir, const F32& value,
case AmdGpu::NumberConversion::UnormToUbnorm:
// Convert -1...1 to 0...1
return ir.FPDiv(ir.FPAdd(value, ir.Imm32(1.f)), ir.Imm32(2.f));
case AmdGpu::NumberConversion::Sint8ToSnormNz: {
const IR::F32 max = ir.Imm32(float(std::numeric_limits<u8>::max()));
const IR::F32 mul = ir.FPMul(ir.FPClamp(value, ir.Imm32(-1.f), ir.Imm32(1.f)), max);
const IR::F32 left = ir.FPSub(mul, ir.Imm32(1.f));
const IR::U32 raw = ir.ConvertFToS(32, ir.FPDiv(left, ir.Imm32(2.f)));
return ir.BitCast<F32>(raw);
}
case AmdGpu::NumberConversion::Sint16ToSnormNz: {
const IR::F32 max = ir.Imm32(float(std::numeric_limits<u16>::max()));
const IR::F32 mul = ir.FPMul(ir.FPClamp(value, ir.Imm32(-1.f), ir.Imm32(1.f)), max);
const IR::F32 left = ir.FPSub(mul, ir.Imm32(1.f));
const IR::U32 raw = ir.ConvertFToS(32, ir.FPDiv(left, ir.Imm32(2.f)));
return ir.BitCast<F32>(raw);
}
default:
UNREACHABLE();
}

View file

@ -29,6 +29,7 @@ struct Profile {
bool supports_native_cube_calc{};
bool supports_trinary_minmax{};
bool supports_robust_buffer_access{};
bool supports_image_fp32_atomic_min_max{};
bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{};
bool needs_manual_interpolation{};

View file

@ -169,10 +169,10 @@ static constexpr u32 MaxColorBuffers = 8;
struct PsColorBuffer {
AmdGpu::NumberFormat num_format : 4;
AmdGpu::NumberConversion num_conversion : 2;
AmdGpu::NumberConversion num_conversion : 3;
AmdGpu::Liverpool::ShaderExportFormat export_format : 4;
u32 needs_unorm_fixup : 1;
u32 pad : 21;
u32 pad : 20;
AmdGpu::CompMapping swizzle;
auto operator<=>(const PsColorBuffer&) const noexcept = default;

View file

@ -455,14 +455,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
case PM4ItOpcode::DrawIndirect: {
const auto* draw_indirect = reinterpret_cast<const PM4CmdDrawIndirect*>(header);
const auto offset = draw_indirect->data_offset;
const auto size = sizeof(DrawIndirectArgs);
const auto stride = sizeof(DrawIndirectArgs);
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
}
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("gfx:{}:DrawIndirect", cmd_address));
rasterizer->DrawIndirect(false, indirect_args_addr, offset, size, 1, 0);
rasterizer->DrawIndirect(false, indirect_args_addr, offset, stride, 1, 0);
rasterizer->ScopeMarkerEnd();
}
break;
@ -471,7 +471,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto* draw_index_indirect =
reinterpret_cast<const PM4CmdDrawIndexIndirect*>(header);
const auto offset = draw_index_indirect->data_offset;
const auto size = sizeof(DrawIndexedIndirectArgs);
const auto stride = sizeof(DrawIndexedIndirectArgs);
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
}
@ -479,25 +479,46 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(
fmt::format("gfx:{}:DrawIndexIndirect", cmd_address));
rasterizer->DrawIndirect(true, indirect_args_addr, offset, size, 1, 0);
rasterizer->DrawIndirect(true, indirect_args_addr, offset, stride, 1, 0);
rasterizer->ScopeMarkerEnd();
}
break;
}
case PM4ItOpcode::DrawIndexIndirectCountMulti: {
case PM4ItOpcode::DrawIndexIndirectMulti: {
const auto* draw_index_indirect =
reinterpret_cast<const PM4CmdDrawIndexIndirectMulti*>(header);
const auto offset = draw_index_indirect->data_offset;
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
}
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(
fmt::format("gfx:{}:DrawIndexIndirectMulti", cmd_address));
rasterizer->DrawIndirect(true, indirect_args_addr, offset,
draw_index_indirect->stride,
draw_index_indirect->count, 0);
rasterizer->ScopeMarkerEnd();
}
break;
}
case PM4ItOpcode::DrawIndexIndirectCountMulti: {
const auto* draw_index_indirect =
reinterpret_cast<const PM4CmdDrawIndexIndirectCountMulti*>(header);
const auto offset = draw_index_indirect->data_offset;
if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
}
if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(
fmt::format("gfx:{}:DrawIndexIndirectCountMulti", cmd_address));
rasterizer->DrawIndirect(
true, indirect_args_addr, offset, draw_index_indirect->stride,
draw_index_indirect->count, draw_index_indirect->countAddr);
rasterizer->DrawIndirect(true, indirect_args_addr, offset,
draw_index_indirect->stride,
draw_index_indirect->count,
draw_index_indirect->count_indirect_enable.Value()
? draw_index_indirect->count_addr
: 0);
rasterizer->ScopeMarkerEnd();
}
break;

View file

@ -924,15 +924,11 @@ struct Liverpool {
}
[[nodiscard]] NumberFormat GetNumberFmt() const {
// There is a small difference between T# and CB number types, account for it.
return RemapNumberFormat(info.number_type == NumberFormat::SnormNz
? NumberFormat::Srgb
: info.number_type.Value(),
info.format);
return RemapNumberFormat(GetFixedNumberFormat(), info.format);
}
[[nodiscard]] NumberConversion GetNumberConversion() const {
return MapNumberConversion(info.number_type);
return MapNumberConversion(GetFixedNumberFormat(), info.format);
}
[[nodiscard]] CompMapping Swizzle() const {
@ -973,6 +969,13 @@ struct Liverpool {
const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx];
return RemapSwizzle(info.format, mrt_swizzle);
}
private:
[[nodiscard]] NumberFormat GetFixedNumberFormat() const {
// There is a small difference between T# and CB number types, account for it.
return info.number_type == NumberFormat::SnormNz ? NumberFormat::Srgb
: info.number_type.Value();
}
};
enum ContextRegs : u32 {

View file

@ -860,6 +860,24 @@ struct PM4CmdDrawIndexIndirect {
};
struct PM4CmdDrawIndexIndirectMulti {
PM4Type3Header header; ///< header
u32 data_offset; ///< Byte aligned offset where the required data structure starts
union {
u32 dw2;
BitField<0, 16, u32> base_vtx_loc; ///< Offset where the CP will write the
///< BaseVertexLocation it fetched from memory
};
union {
u32 dw3;
BitField<0, 16, u32> start_inst_loc; ///< Offset where the CP will write the
///< StartInstanceLocation it fetched from memory
};
u32 count; ///< Count of data structures to loop through before going to next packet
u32 stride; ///< Stride in memory from one data structure to the next
u32 draw_initiator; ///< Draw Initiator Register
};
struct PM4CmdDrawIndexIndirectCountMulti {
PM4Type3Header header; ///< header
u32 data_offset; ///< Byte aligned offset where the required data structure starts
union {
@ -874,14 +892,14 @@ struct PM4CmdDrawIndexIndirectMulti {
};
union {
u32 dw4;
BitField<0, 16, u32> drawIndexLoc; ///< register offset to write the Draw Index count
BitField<0, 16, u32> draw_index_loc; ///< register offset to write the Draw Index count
BitField<30, 1, u32>
countIndirectEnable; ///< Indicates the data structure count is in memory
count_indirect_enable; ///< Indicates the data structure count is in memory
BitField<31, 1, u32>
drawIndexEnable; ///< Enables writing of Draw Index count to DRAW_INDEX_LOC
draw_index_enable; ///< Enables writing of Draw Index count to DRAW_INDEX_LOC
};
u32 count; ///< Count of data structures to loop through before going to next packet
u64 countAddr; ///< DWord aligned Address[31:2]; Valid if countIndirectEnable is set
u64 count_addr; ///< DWord aligned Address[31:2]; Valid if countIndirectEnable is set
u32 stride; ///< Stride in memory from one data structure to the next
u32 draw_initiator; ///< Draw Initiator Register
};

View file

@ -68,7 +68,7 @@ struct Buffer {
}
NumberConversion GetNumberConversion() const noexcept {
return MapNumberConversion(NumberFormat(num_format));
return MapNumberConversion(NumberFormat(num_format), DataFormat(data_format));
}
u32 GetStride() const noexcept {
@ -219,6 +219,19 @@ struct Image {
return image;
}
static constexpr Image NullDepth() {
Image image{};
image.data_format = u64(DataFormat::Format32);
image.num_format = u64(NumberFormat::Float);
image.dst_sel_x = u64(CompSwizzle::Red);
image.dst_sel_y = u64(CompSwizzle::Green);
image.dst_sel_z = u64(CompSwizzle::Blue);
image.dst_sel_w = u64(CompSwizzle::Alpha);
image.tiling_index = u64(TilingMode::Texture_MicroTiled);
image.type = u64(ImageType::Color2D);
return image;
}
bool Valid() const {
return (type & 0x8u) != 0;
}
@ -292,7 +305,7 @@ struct Image {
}
NumberConversion GetNumberConversion() const noexcept {
return MapNumberConversion(NumberFormat(num_format));
return MapNumberConversion(NumberFormat(num_format), DataFormat(data_format));
}
TilingMode GetTilingMode() const {

View file

@ -197,6 +197,8 @@ enum class NumberConversion : u32 {
UintToUscaled = 1,
SintToSscaled = 2,
UnormToUbnorm = 3,
Sint8ToSnormNz = 5,
Sint16ToSnormNz = 6,
};
struct CompMapping {
@ -287,6 +289,7 @@ inline NumberFormat RemapNumberFormat(const NumberFormat format, const DataForma
case NumberFormat::Uscaled:
return NumberFormat::Uint;
case NumberFormat::Sscaled:
case NumberFormat::SnormNz:
return NumberFormat::Sint;
case NumberFormat::Ubnorm:
return NumberFormat::Unorm;
@ -336,14 +339,28 @@ inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizz
}
}
inline NumberConversion MapNumberConversion(const NumberFormat format) {
switch (format) {
inline NumberConversion MapNumberConversion(const NumberFormat num_fmt, const DataFormat data_fmt) {
switch (num_fmt) {
case NumberFormat::Uscaled:
return NumberConversion::UintToUscaled;
case NumberFormat::Sscaled:
return NumberConversion::SintToSscaled;
case NumberFormat::Ubnorm:
return NumberConversion::UnormToUbnorm;
case NumberFormat::SnormNz: {
switch (data_fmt) {
case DataFormat::Format8:
case DataFormat::Format8_8:
case DataFormat::Format8_8_8_8:
return NumberConversion::Sint8ToSnormNz;
case DataFormat::Format16:
case DataFormat::Format16_16:
case DataFormat::Format16_16_16_16:
return NumberConversion::Sint16ToSnormNz;
default:
UNREACHABLE_MSG("data_fmt = {}", u32(data_fmt));
}
}
default:
return NumberConversion::None;
}

View file

@ -177,8 +177,8 @@ void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
if (instance.IsVertexInputDynamicState()) {
cmdbuf.bindVertexBuffers(0, num_buffers, host_buffers.data(), host_offsets.data());
} else {
cmdbuf.bindVertexBuffers2EXT(0, num_buffers, host_buffers.data(), host_offsets.data(),
host_sizes.data(), host_strides.data());
cmdbuf.bindVertexBuffers2(0, num_buffers, host_buffers.data(), host_offsets.data(),
host_sizes.data(), host_strides.data());
}
}

View file

@ -210,7 +210,8 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceRobustness2FeaturesEXT,
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
vk::PhysicalDevicePortabilitySubsetFeaturesKHR,
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
features = feature_chain.get().features;
const vk::StructureChain properties_chain = physical_device.getProperties2<
@ -272,6 +273,13 @@ bool Instance::CreateDevice() {
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
amd_shader_trinary_minmax = add_extension(VK_AMD_SHADER_TRINARY_MINMAX_EXTENSION_NAME);
shader_atomic_float2 = add_extension(VK_EXT_SHADER_ATOMIC_FLOAT_2_EXTENSION_NAME);
if (shader_atomic_float2) {
shader_atomic_float2_features =
feature_chain.get<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}",
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax);
}
const bool calibrated_timestamps =
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
@ -330,6 +338,7 @@ bool Instance::CreateDevice() {
.geometryShader = features.geometryShader,
.tessellationShader = features.tessellationShader,
.logicOp = features.logicOp,
.multiDrawIndirect = features.multiDrawIndirect,
.depthBiasClamp = features.depthBiasClamp,
.fillModeNonSolid = features.fillModeNonSolid,
.depthBounds = features.depthBounds,
@ -401,6 +410,10 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
.legacyVertexAttributes = true,
},
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{
.shaderImageFloat32AtomicMinMax =
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax,
},
#ifdef __APPLE__
portability_features,
#endif
@ -430,6 +443,9 @@ bool Instance::CreateDevice() {
if (!legacy_vertex_attributes) {
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
}
if (!shader_atomic_float2) {
device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
}
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
if (device_result != vk::Result::eSuccess) {

View file

@ -165,6 +165,12 @@ public:
return amd_shader_trinary_minmax;
}
/// Returns true when the shaderImageFloat32AtomicMinMax feature of
/// VK_EXT_shader_atomic_float2 is supported.
bool IsShaderAtomicFloatImage32MinMaxSupported() const {
return shader_atomic_float2 && shader_atomic_float2_features.shaderImageFloat32AtomicMinMax;
}
/// Returns true when geometry shaders are supported by the device
bool IsGeometryStageSupported() const {
return features.geometryShader;
@ -336,6 +342,7 @@ private:
vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features;
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features;
vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features;
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features;
vk::DriverIdKHR driver_id;
vk::UniqueDebugUtilsMessengerEXT debug_callback{};
std::string vendor_name;
@ -360,6 +367,7 @@ private:
bool image_load_store_lod{};
bool amd_gcn_shader{};
bool amd_shader_trinary_minmax{};
bool shader_atomic_float2{};
bool portability_subset{};
};

View file

@ -206,6 +206,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
.supports_trinary_minmax = instance_.IsAmdShaderTrinaryMinMaxSupported(),
.supports_robust_buffer_access = instance_.IsRobustBufferAccess2Supported(),
.supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(),
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||

View file

@ -270,7 +270,25 @@ Frame* Presenter::PrepareLastFrame() {
return frame;
}
Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop) {
static vk::Format GetFrameViewFormat(const Libraries::VideoOut::PixelFormat format) {
switch (format) {
case Libraries::VideoOut::PixelFormat::A8B8G8R8Srgb:
return vk::Format::eR8G8B8A8Srgb;
case Libraries::VideoOut::PixelFormat::A8R8G8B8Srgb:
return vk::Format::eB8G8R8A8Srgb;
case Libraries::VideoOut::PixelFormat::A2R10G10B10:
case Libraries::VideoOut::PixelFormat::A2R10G10B10Srgb:
case Libraries::VideoOut::PixelFormat::A2R10G10B10Bt2020Pq:
return vk::Format::eA2R10G10B10UnormPack32;
default:
break;
}
UNREACHABLE_MSG("Unknown format={}", static_cast<u32>(format));
return {};
}
Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id,
const Libraries::VideoOut::PixelFormat format, bool is_eop) {
// Request a free presentation frame.
Frame* frame = GetRenderFrame();
@ -324,7 +342,7 @@ Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop)
cmdbuf);
VideoCore::ImageViewInfo info{};
info.format = image.info.pixel_format;
info.format = GetFrameViewFormat(format);
// Exclude alpha from output frame to avoid blending with UI.
info.mapping = vk::ComponentMapping{
.r = vk::ComponentSwizzle::eIdentity,

View file

@ -70,11 +70,12 @@ public:
auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(desc);
texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler);
return PrepareFrameInternal(image_id, is_eop);
return PrepareFrameInternal(image_id, attribute.attrib.pixel_format, is_eop);
}
Frame* PrepareBlankFrame(bool is_eop) {
return PrepareFrameInternal(VideoCore::NULL_IMAGE_ID, is_eop);
return PrepareFrameInternal(VideoCore::NULL_IMAGE_ID,
Libraries::VideoOut::PixelFormat::Unknown, is_eop);
}
VideoCore::Image& RegisterVideoOutSurface(
@ -119,7 +120,8 @@ public:
}
private:
Frame* PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop = true);
Frame* PrepareFrameInternal(VideoCore::ImageId image_id,
Libraries::VideoOut::PixelFormat format, bool is_eop = true);
Frame* GetRenderFrame();
void SetExpectedGameSize(s32 width, s32 height);

View file

@ -618,8 +618,9 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
if (instance.IsNullDescriptorSupported()) {
image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
} else {
auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID);
image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view,
auto& null_image_view =
texture_cache.FindTexture(VideoCore::NULL_IMAGE_ID, desc.view_info);
image_infos.emplace_back(VK_NULL_HANDLE, *null_image_view.image_view,
vk::ImageLayout::eGeneral);
}
} else {

View file

@ -16,14 +16,15 @@ using VideoOutFormat = Libraries::VideoOut::PixelFormat;
static vk::Format ConvertPixelFormat(const VideoOutFormat format) {
switch (format) {
case VideoOutFormat::A8R8G8B8Srgb:
return vk::Format::eB8G8R8A8Srgb;
case VideoOutFormat::A8B8G8R8Srgb:
// Remaining formats are mapped to RGBA for internal consistency and changed to BGRA in the
// frame image view.
case VideoOutFormat::A8R8G8B8Srgb:
return vk::Format::eR8G8B8A8Srgb;
case VideoOutFormat::A2R10G10B10:
case VideoOutFormat::A2R10G10B10Srgb:
case VideoOutFormat::A2R10G10B10Bt2020Pq:
return vk::Format::eA2R10G10B10UnormPack32;
return vk::Format::eA2B10G10R10UnormPack32;
default:
break;
}

View file

@ -34,8 +34,6 @@ struct ImageViewInfo {
struct Image;
constexpr Common::SlotId NULL_IMAGE_VIEW_ID{0};
struct ImageView {
ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info, Image& image,
ImageId image_id);

View file

@ -8,6 +8,7 @@
#include "common/debug.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/page_manager.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/host_compatibility.h"
@ -23,31 +24,41 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
BufferCache& buffer_cache_, PageManager& tracker_)
: instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_},
tile_manager{instance, scheduler} {
// Create basic null image at fixed image ID.
const auto null_id = GetNullImage(vk::Format::eR8G8B8A8Unorm);
ASSERT(null_id.index == NULL_IMAGE_ID.index);
}
TextureCache::~TextureCache() = default;
ImageId TextureCache::GetNullImage(const vk::Format format) {
const auto existing_image = null_images.find(format);
if (existing_image != null_images.end()) {
return existing_image->second;
}
ImageInfo info{};
info.pixel_format = vk::Format::eR8G8B8A8Unorm;
info.pixel_format = format;
info.type = vk::ImageType::e2D;
info.tiling_idx = u32(AmdGpu::TilingMode::Texture_MicroTiled);
info.tiling_idx = static_cast<u32>(AmdGpu::TilingMode::Texture_MicroTiled);
info.num_bits = 32;
info.UpdateSize();
const ImageId null_id = slot_images.insert(instance, scheduler, info);
ASSERT(null_id.index == NULL_IMAGE_ID.index);
auto& img = slot_images[null_id];
const vk::Image& null_image = img.image;
Vulkan::SetObjectName(instance.GetDevice(), null_image, "Null Image");
Vulkan::SetObjectName(instance.GetDevice(), null_image,
fmt::format("Null Image ({})", vk::to_string(format)));
img.flags = ImageFlagBits::Empty;
img.track_addr = img.info.guest_address;
img.track_addr_end = img.info.guest_address + img.info.guest_size;
ImageViewInfo view_info;
const auto null_view_id =
slot_image_views.insert(instance, view_info, slot_images[null_id], null_id);
ASSERT(null_view_id.index == NULL_IMAGE_VIEW_ID.index);
const vk::ImageView& null_image_view = slot_image_views[null_view_id].image_view.get();
Vulkan::SetObjectName(instance.GetDevice(), null_image_view, "Null Image View");
null_images.emplace(format, null_id);
return null_id;
}
TextureCache::~TextureCache() = default;
void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) {
if (image.hash == 0) {
// Initialize hash
@ -296,7 +307,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
const auto& info = desc.info;
if (info.guest_address == 0) [[unlikely]] {
return NULL_IMAGE_ID;
return GetNullImage(info.pixel_format);
}
std::scoped_lock lock{mutex};

View file

@ -246,6 +246,9 @@ private:
}
}
/// Gets or creates a null image for a particular format.
ImageId GetNullImage(vk::Format format);
/// Create an image from the given parameters
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr);
@ -285,6 +288,7 @@ private:
Common::SlotVector<Image> slot_images;
Common::SlotVector<ImageView> slot_image_views;
tsl::robin_map<u64, Sampler> samplers;
tsl::robin_map<vk::Format, ImageId> null_images;
PageTable page_table;
std::mutex mutex;