This commit is contained in:
Antonio 2024-09-03 16:23:04 -04:00
commit ed01f2f371
58 changed files with 1979 additions and 697 deletions

View file

@ -212,31 +212,38 @@ static void RestoreRegisters(Xbyak::CodeGenerator& c,
}
/// Switches to the patch stack and stores all registers.
static void SaveContext(Xbyak::CodeGenerator& c) {
static void SaveContext(Xbyak::CodeGenerator& c, bool save_flags = false) {
SaveStack(c);
for (int reg = Xbyak::Operand::RAX; reg <= Xbyak::Operand::R15; reg++) {
c.push(Xbyak::Reg64(reg));
}
for (int reg = 0; reg <= 7; reg++) {
c.sub(rsp, 32);
c.lea(rsp, ptr[rsp - 32]);
c.vmovdqu(ptr[rsp], Xbyak::Ymm(reg));
}
if (save_flags) {
c.pushfq();
}
}
/// Restores all registers and restores the original stack.
/// If the destination is a register, it is not restored to preserve the output.
static void RestoreContext(Xbyak::CodeGenerator& c, const Xbyak::Operand& dst) {
static void RestoreContext(Xbyak::CodeGenerator& c, const Xbyak::Operand& dst,
bool restore_flags = false) {
if (restore_flags) {
c.popfq();
}
for (int reg = 7; reg >= 0; reg--) {
if ((!dst.isXMM() && !dst.isYMM()) || dst.getIdx() != reg) {
c.vmovdqu(Xbyak::Ymm(reg), ptr[rsp]);
}
c.add(rsp, 32);
c.lea(rsp, ptr[rsp + 32]);
}
for (int reg = Xbyak::Operand::R15; reg >= Xbyak::Operand::RAX; reg--) {
if (!dst.isREG() || dst.getIdx() != reg) {
c.pop(Xbyak::Reg64(reg));
} else {
c.add(rsp, 8);
c.lea(rsp, ptr[rsp + 8]);
}
}
RestoreStack(c);
@ -307,9 +314,24 @@ static void GenerateBLSI(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat
SaveRegisters(c, {scratch});
// BLSI sets CF to zero if source is zero, otherwise it sets CF to one.
Xbyak::Label set_carry, clear_carry, end;
c.mov(scratch, *src);
c.neg(scratch);
c.neg(scratch); // NEG, like BLSI, clears CF if the source is zero and sets it otherwise
c.jc(set_carry);
c.jmp(clear_carry);
c.L(set_carry);
c.and_(scratch, *src);
c.stc(); // setting/clearing carry needs to happen after the AND because that clears CF
c.jmp(end);
c.L(clear_carry);
c.and_(scratch, *src);
// We don't need to clear carry here since AND does that for us
c.L(end);
c.mov(dst, scratch);
RestoreRegisters(c, {scratch});
@ -323,9 +345,26 @@ static void GenerateBLSMSK(const ZydisDecodedOperand* operands, Xbyak::CodeGener
SaveRegisters(c, {scratch});
Xbyak::Label set_carry, clear_carry, end;
// BLSMSK sets CF to zero if source is NOT zero, otherwise it sets CF to one.
c.mov(scratch, *src);
c.test(scratch, scratch);
c.jz(set_carry);
c.jmp(clear_carry);
c.L(set_carry);
c.dec(scratch);
c.xor_(scratch, *src);
c.stc();
c.jmp(end);
c.L(clear_carry);
c.dec(scratch);
c.xor_(scratch, *src);
// We don't need to clear carry here since XOR does that for us
c.L(end);
c.mov(dst, scratch);
RestoreRegisters(c, {scratch});
@ -339,9 +378,26 @@ static void GenerateBLSR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat
SaveRegisters(c, {scratch});
Xbyak::Label set_carry, clear_carry, end;
// BLSR sets CF to zero if source is NOT zero, otherwise it sets CF to one.
c.mov(scratch, *src);
c.test(scratch, scratch);
c.jz(set_carry);
c.jmp(clear_carry);
c.L(set_carry);
c.dec(scratch);
c.and_(scratch, *src);
c.stc();
c.jmp(end);
c.L(clear_carry);
c.dec(scratch);
c.and_(scratch, *src);
// We don't need to clear carry here since AND does that for us
c.L(end);
c.mov(dst, scratch);
RestoreRegisters(c, {scratch});
@ -361,7 +417,7 @@ static void GenerateVCVTPH2PS(const ZydisDecodedOperand* operands, Xbyak::CodeGe
const auto float_count = dst.getBit() / 32;
const auto byte_count = float_count * 4;
SaveContext(c);
SaveContext(c, true);
// Allocate stack space for outputs and load into first parameter.
c.sub(rsp, byte_count);
@ -397,7 +453,7 @@ static void GenerateVCVTPH2PS(const ZydisDecodedOperand* operands, Xbyak::CodeGe
}
c.add(rsp, byte_count);
RestoreContext(c, dst);
RestoreContext(c, dst, true);
}
using SingleToHalfFloatConverter = half_float::half (*)(float);
@ -425,7 +481,7 @@ static void GenerateVCVTPS2PH(const ZydisDecodedOperand* operands, Xbyak::CodeGe
const auto float_count = src.getBit() / 32;
const auto byte_count = float_count * 4;
SaveContext(c);
SaveContext(c, true);
if (dst->isXMM()) {
// Allocate stack space for outputs and load into first parameter.
@ -472,7 +528,7 @@ static void GenerateVCVTPS2PH(const ZydisDecodedOperand* operands, Xbyak::CodeGe
c.add(rsp, byte_count);
}
RestoreContext(c, *dst);
RestoreContext(c, *dst, true);
}
static bool FilterRosetta2Only(const ZydisDecodedOperand*) {

View file

@ -499,7 +499,7 @@ int PS4_SYSV_ABI sceGnmDestroyWorkloadStream() {
}
void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
LOG_INFO(Lib_GnmDriver, "vqid {}, offset_dw {}", gnm_vqid, next_offs_dw);
LOG_DEBUG(Lib_GnmDriver, "vqid {}, offset_dw {}", gnm_vqid, next_offs_dw);
if (gnm_vqid == 0) {
return;
@ -2054,7 +2054,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, u32* dcb_gpu_addrs
u32* dcb_sizes_in_bytes, u32* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes, u32 vo_handle,
u32 buf_idx, u32 flip_mode, u32 flip_arg) {
LOG_INFO(Lib_GnmDriver, "called [buf = {}]", buf_idx);
LOG_DEBUG(Lib_GnmDriver, "called [buf = {}]", buf_idx);
auto* cmdbuf = dcb_gpu_addrs[count - 1];
const auto size_dw = dcb_sizes_in_bytes[count - 1] / 4;
@ -2078,7 +2078,7 @@ int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload() {
s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[],
u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes) {
LOG_INFO(Lib_GnmDriver, "called");
LOG_DEBUG(Lib_GnmDriver, "called");
if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) {
LOG_ERROR(Lib_GnmDriver, "dcbGpuAddrs and dcbSizesInBytes must not be NULL");
@ -2154,7 +2154,7 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload() {
}
int PS4_SYSV_ABI sceGnmSubmitDone() {
LOG_INFO(Lib_GnmDriver, "called");
LOG_DEBUG(Lib_GnmDriver, "called");
if (!liverpool->IsGpuIdle()) {
submission_lock = true;
}

View file

@ -56,7 +56,7 @@ struct OrbisVirtualQueryInfo {
BitField<1, 1, u32> is_direct;
BitField<2, 1, u32> is_stack;
BitField<3, 1, u32> is_pooled;
BitField<4, 1, u32> is_commited;
BitField<4, 1, u32> is_committed;
};
std::array<char, 32> name;
};

View file

@ -565,7 +565,7 @@ int PS4_SYSV_ABI sceUserServiceGetLoginFlag() {
}
s32 PS4_SYSV_ABI sceUserServiceGetLoginUserIdList(OrbisUserServiceLoginUserIdList* userIdList) {
LOG_INFO(Lib_UserService, "called");
LOG_DEBUG(Lib_UserService, "called");
if (userIdList == nullptr) {
LOG_ERROR(Lib_UserService, "user_id is null");
return ORBIS_USER_SERVICE_ERROR_INVALID_ARGUMENT;

View file

@ -140,8 +140,8 @@ s32 PS4_SYSV_ABI sceVideoOutSubmitFlip(s32 handle, s32 bufferIndex, s32 flipMode
return ORBIS_VIDEO_OUT_ERROR_INVALID_INDEX;
}
LOG_INFO(Lib_VideoOut, "bufferIndex = {}, flipMode = {}, flipArg = {}", bufferIndex, flipMode,
flipArg);
LOG_DEBUG(Lib_VideoOut, "bufferIndex = {}, flipMode = {}, flipArg = {}", bufferIndex, flipMode,
flipArg);
if (!driver->SubmitFlip(port, bufferIndex, flipArg)) {
LOG_ERROR(Lib_VideoOut, "Flip queue is full");

View file

@ -421,16 +421,20 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags,
const auto& vma = it->second;
info->start = vma.base;
info->end = vma.base + vma.size;
info->offset = vma.phys_base;
info->protection = static_cast<s32>(vma.prot);
info->is_flexible.Assign(vma.type == VMAType::Flexible);
info->is_direct.Assign(vma.type == VMAType::Direct);
info->is_commited.Assign(vma.type != VMAType::Free && vma.type != VMAType::Reserved);
info->is_stack.Assign(vma.type == VMAType::Stack);
info->is_pooled.Assign(vma.type == VMAType::Pooled);
info->is_committed.Assign(vma.type != VMAType::Free && vma.type != VMAType::Reserved);
vma.name.copy(info->name.data(), std::min(info->name.size(), vma.name.size()));
if (vma.type == VMAType::Direct) {
const auto dmem_it = FindDmemArea(vma.phys_base);
ASSERT(dmem_it != dmem_map.end());
info->offset = vma.phys_base;
info->memory_type = dmem_it->second.memory_type;
} else {
info->memory_type = ::Libraries::Kernel::SCE_KERNEL_WB_ONION;
}
return ORBIS_OK;