mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-05 10:13:15 +00:00
''
This commit is contained in:
commit
ed01f2f371
58 changed files with 1979 additions and 697 deletions
|
@ -212,31 +212,38 @@ static void RestoreRegisters(Xbyak::CodeGenerator& c,
|
|||
}
|
||||
|
||||
/// Switches to the patch stack and stores all registers.
|
||||
static void SaveContext(Xbyak::CodeGenerator& c) {
|
||||
static void SaveContext(Xbyak::CodeGenerator& c, bool save_flags = false) {
|
||||
SaveStack(c);
|
||||
for (int reg = Xbyak::Operand::RAX; reg <= Xbyak::Operand::R15; reg++) {
|
||||
c.push(Xbyak::Reg64(reg));
|
||||
}
|
||||
for (int reg = 0; reg <= 7; reg++) {
|
||||
c.sub(rsp, 32);
|
||||
c.lea(rsp, ptr[rsp - 32]);
|
||||
c.vmovdqu(ptr[rsp], Xbyak::Ymm(reg));
|
||||
}
|
||||
if (save_flags) {
|
||||
c.pushfq();
|
||||
}
|
||||
}
|
||||
|
||||
/// Restores all registers and restores the original stack.
|
||||
/// If the destination is a register, it is not restored to preserve the output.
|
||||
static void RestoreContext(Xbyak::CodeGenerator& c, const Xbyak::Operand& dst) {
|
||||
static void RestoreContext(Xbyak::CodeGenerator& c, const Xbyak::Operand& dst,
|
||||
bool restore_flags = false) {
|
||||
if (restore_flags) {
|
||||
c.popfq();
|
||||
}
|
||||
for (int reg = 7; reg >= 0; reg--) {
|
||||
if ((!dst.isXMM() && !dst.isYMM()) || dst.getIdx() != reg) {
|
||||
c.vmovdqu(Xbyak::Ymm(reg), ptr[rsp]);
|
||||
}
|
||||
c.add(rsp, 32);
|
||||
c.lea(rsp, ptr[rsp + 32]);
|
||||
}
|
||||
for (int reg = Xbyak::Operand::R15; reg >= Xbyak::Operand::RAX; reg--) {
|
||||
if (!dst.isREG() || dst.getIdx() != reg) {
|
||||
c.pop(Xbyak::Reg64(reg));
|
||||
} else {
|
||||
c.add(rsp, 8);
|
||||
c.lea(rsp, ptr[rsp + 8]);
|
||||
}
|
||||
}
|
||||
RestoreStack(c);
|
||||
|
@ -307,9 +314,24 @@ static void GenerateBLSI(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat
|
|||
|
||||
SaveRegisters(c, {scratch});
|
||||
|
||||
// BLSI sets CF to zero if source is zero, otherwise it sets CF to one.
|
||||
Xbyak::Label set_carry, clear_carry, end;
|
||||
|
||||
c.mov(scratch, *src);
|
||||
c.neg(scratch);
|
||||
c.neg(scratch); // NEG, like BLSI, clears CF if the source is zero and sets it otherwise
|
||||
c.jc(set_carry);
|
||||
c.jmp(clear_carry);
|
||||
|
||||
c.L(set_carry);
|
||||
c.and_(scratch, *src);
|
||||
c.stc(); // setting/clearing carry needs to happen after the AND because that clears CF
|
||||
c.jmp(end);
|
||||
|
||||
c.L(clear_carry);
|
||||
c.and_(scratch, *src);
|
||||
// We don't need to clear carry here since AND does that for us
|
||||
|
||||
c.L(end);
|
||||
c.mov(dst, scratch);
|
||||
|
||||
RestoreRegisters(c, {scratch});
|
||||
|
@ -323,9 +345,26 @@ static void GenerateBLSMSK(const ZydisDecodedOperand* operands, Xbyak::CodeGener
|
|||
|
||||
SaveRegisters(c, {scratch});
|
||||
|
||||
Xbyak::Label set_carry, clear_carry, end;
|
||||
|
||||
// BLSMSK sets CF to zero if source is NOT zero, otherwise it sets CF to one.
|
||||
c.mov(scratch, *src);
|
||||
c.test(scratch, scratch);
|
||||
c.jz(set_carry);
|
||||
c.jmp(clear_carry);
|
||||
|
||||
c.L(set_carry);
|
||||
c.dec(scratch);
|
||||
c.xor_(scratch, *src);
|
||||
c.stc();
|
||||
c.jmp(end);
|
||||
|
||||
c.L(clear_carry);
|
||||
c.dec(scratch);
|
||||
c.xor_(scratch, *src);
|
||||
// We don't need to clear carry here since XOR does that for us
|
||||
|
||||
c.L(end);
|
||||
c.mov(dst, scratch);
|
||||
|
||||
RestoreRegisters(c, {scratch});
|
||||
|
@ -339,9 +378,26 @@ static void GenerateBLSR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat
|
|||
|
||||
SaveRegisters(c, {scratch});
|
||||
|
||||
Xbyak::Label set_carry, clear_carry, end;
|
||||
|
||||
// BLSR sets CF to zero if source is NOT zero, otherwise it sets CF to one.
|
||||
c.mov(scratch, *src);
|
||||
c.test(scratch, scratch);
|
||||
c.jz(set_carry);
|
||||
c.jmp(clear_carry);
|
||||
|
||||
c.L(set_carry);
|
||||
c.dec(scratch);
|
||||
c.and_(scratch, *src);
|
||||
c.stc();
|
||||
c.jmp(end);
|
||||
|
||||
c.L(clear_carry);
|
||||
c.dec(scratch);
|
||||
c.and_(scratch, *src);
|
||||
// We don't need to clear carry here since AND does that for us
|
||||
|
||||
c.L(end);
|
||||
c.mov(dst, scratch);
|
||||
|
||||
RestoreRegisters(c, {scratch});
|
||||
|
@ -361,7 +417,7 @@ static void GenerateVCVTPH2PS(const ZydisDecodedOperand* operands, Xbyak::CodeGe
|
|||
const auto float_count = dst.getBit() / 32;
|
||||
const auto byte_count = float_count * 4;
|
||||
|
||||
SaveContext(c);
|
||||
SaveContext(c, true);
|
||||
|
||||
// Allocate stack space for outputs and load into first parameter.
|
||||
c.sub(rsp, byte_count);
|
||||
|
@ -397,7 +453,7 @@ static void GenerateVCVTPH2PS(const ZydisDecodedOperand* operands, Xbyak::CodeGe
|
|||
}
|
||||
c.add(rsp, byte_count);
|
||||
|
||||
RestoreContext(c, dst);
|
||||
RestoreContext(c, dst, true);
|
||||
}
|
||||
|
||||
using SingleToHalfFloatConverter = half_float::half (*)(float);
|
||||
|
@ -425,7 +481,7 @@ static void GenerateVCVTPS2PH(const ZydisDecodedOperand* operands, Xbyak::CodeGe
|
|||
const auto float_count = src.getBit() / 32;
|
||||
const auto byte_count = float_count * 4;
|
||||
|
||||
SaveContext(c);
|
||||
SaveContext(c, true);
|
||||
|
||||
if (dst->isXMM()) {
|
||||
// Allocate stack space for outputs and load into first parameter.
|
||||
|
@ -472,7 +528,7 @@ static void GenerateVCVTPS2PH(const ZydisDecodedOperand* operands, Xbyak::CodeGe
|
|||
c.add(rsp, byte_count);
|
||||
}
|
||||
|
||||
RestoreContext(c, *dst);
|
||||
RestoreContext(c, *dst, true);
|
||||
}
|
||||
|
||||
static bool FilterRosetta2Only(const ZydisDecodedOperand*) {
|
||||
|
|
|
@ -499,7 +499,7 @@ int PS4_SYSV_ABI sceGnmDestroyWorkloadStream() {
|
|||
}
|
||||
|
||||
void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
|
||||
LOG_INFO(Lib_GnmDriver, "vqid {}, offset_dw {}", gnm_vqid, next_offs_dw);
|
||||
LOG_DEBUG(Lib_GnmDriver, "vqid {}, offset_dw {}", gnm_vqid, next_offs_dw);
|
||||
|
||||
if (gnm_vqid == 0) {
|
||||
return;
|
||||
|
@ -2054,7 +2054,7 @@ s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, u32* dcb_gpu_addrs
|
|||
u32* dcb_sizes_in_bytes, u32* ccb_gpu_addrs[],
|
||||
u32* ccb_sizes_in_bytes, u32 vo_handle,
|
||||
u32 buf_idx, u32 flip_mode, u32 flip_arg) {
|
||||
LOG_INFO(Lib_GnmDriver, "called [buf = {}]", buf_idx);
|
||||
LOG_DEBUG(Lib_GnmDriver, "called [buf = {}]", buf_idx);
|
||||
|
||||
auto* cmdbuf = dcb_gpu_addrs[count - 1];
|
||||
const auto size_dw = dcb_sizes_in_bytes[count - 1] / 4;
|
||||
|
@ -2078,7 +2078,7 @@ int PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload() {
|
|||
s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[],
|
||||
u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[],
|
||||
u32* ccb_sizes_in_bytes) {
|
||||
LOG_INFO(Lib_GnmDriver, "called");
|
||||
LOG_DEBUG(Lib_GnmDriver, "called");
|
||||
|
||||
if (!dcb_gpu_addrs || !dcb_sizes_in_bytes) {
|
||||
LOG_ERROR(Lib_GnmDriver, "dcbGpuAddrs and dcbSizesInBytes must not be NULL");
|
||||
|
@ -2154,7 +2154,7 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload() {
|
|||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmSubmitDone() {
|
||||
LOG_INFO(Lib_GnmDriver, "called");
|
||||
LOG_DEBUG(Lib_GnmDriver, "called");
|
||||
if (!liverpool->IsGpuIdle()) {
|
||||
submission_lock = true;
|
||||
}
|
||||
|
|
|
@ -56,7 +56,7 @@ struct OrbisVirtualQueryInfo {
|
|||
BitField<1, 1, u32> is_direct;
|
||||
BitField<2, 1, u32> is_stack;
|
||||
BitField<3, 1, u32> is_pooled;
|
||||
BitField<4, 1, u32> is_commited;
|
||||
BitField<4, 1, u32> is_committed;
|
||||
};
|
||||
std::array<char, 32> name;
|
||||
};
|
||||
|
|
|
@ -565,7 +565,7 @@ int PS4_SYSV_ABI sceUserServiceGetLoginFlag() {
|
|||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceUserServiceGetLoginUserIdList(OrbisUserServiceLoginUserIdList* userIdList) {
|
||||
LOG_INFO(Lib_UserService, "called");
|
||||
LOG_DEBUG(Lib_UserService, "called");
|
||||
if (userIdList == nullptr) {
|
||||
LOG_ERROR(Lib_UserService, "user_id is null");
|
||||
return ORBIS_USER_SERVICE_ERROR_INVALID_ARGUMENT;
|
||||
|
|
|
@ -140,8 +140,8 @@ s32 PS4_SYSV_ABI sceVideoOutSubmitFlip(s32 handle, s32 bufferIndex, s32 flipMode
|
|||
return ORBIS_VIDEO_OUT_ERROR_INVALID_INDEX;
|
||||
}
|
||||
|
||||
LOG_INFO(Lib_VideoOut, "bufferIndex = {}, flipMode = {}, flipArg = {}", bufferIndex, flipMode,
|
||||
flipArg);
|
||||
LOG_DEBUG(Lib_VideoOut, "bufferIndex = {}, flipMode = {}, flipArg = {}", bufferIndex, flipMode,
|
||||
flipArg);
|
||||
|
||||
if (!driver->SubmitFlip(port, bufferIndex, flipArg)) {
|
||||
LOG_ERROR(Lib_VideoOut, "Flip queue is full");
|
||||
|
|
|
@ -421,16 +421,20 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags,
|
|||
const auto& vma = it->second;
|
||||
info->start = vma.base;
|
||||
info->end = vma.base + vma.size;
|
||||
info->offset = vma.phys_base;
|
||||
info->protection = static_cast<s32>(vma.prot);
|
||||
info->is_flexible.Assign(vma.type == VMAType::Flexible);
|
||||
info->is_direct.Assign(vma.type == VMAType::Direct);
|
||||
info->is_commited.Assign(vma.type != VMAType::Free && vma.type != VMAType::Reserved);
|
||||
info->is_stack.Assign(vma.type == VMAType::Stack);
|
||||
info->is_pooled.Assign(vma.type == VMAType::Pooled);
|
||||
info->is_committed.Assign(vma.type != VMAType::Free && vma.type != VMAType::Reserved);
|
||||
vma.name.copy(info->name.data(), std::min(info->name.size(), vma.name.size()));
|
||||
if (vma.type == VMAType::Direct) {
|
||||
const auto dmem_it = FindDmemArea(vma.phys_base);
|
||||
ASSERT(dmem_it != dmem_map.end());
|
||||
info->offset = vma.phys_base;
|
||||
info->memory_type = dmem_it->second.memory_type;
|
||||
} else {
|
||||
info->memory_type = ::Libraries::Kernel::SCE_KERNEL_WB_ONION;
|
||||
}
|
||||
|
||||
return ORBIS_OK;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue