mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-14 08:12:16 +00:00
Optimize games that hit unpatchable EXTRQ/INSERTQ (#2888)
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
* Make signal handler faster * I love clang-format * Use faster decoding * MacOS CI
This commit is contained in:
parent
3b7c36e1ba
commit
58df609ba0
1 changed files with 136 additions and 123 deletions
|
@ -464,9 +464,8 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
|
|||
|
||||
if (needs_trampoline && instruction.length < 5) {
|
||||
// Trampoline is needed but instruction is too short to patch.
|
||||
// Return false and length to fall back to the illegal instruction handler,
|
||||
// or to signal to AOT compilation that this instruction should be skipped and
|
||||
// handled at runtime.
|
||||
// Return false and length to signal to AOT compilation that this instruction
|
||||
// should be skipped and handled at runtime.
|
||||
return std::make_pair(false, instruction.length);
|
||||
}
|
||||
|
||||
|
@ -512,136 +511,137 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
|
|||
|
||||
#if defined(ARCH_X86_64)
|
||||
|
||||
static bool Is4ByteExtrqOrInsertq(void* code_address) {
|
||||
u8* bytes = (u8*)code_address;
|
||||
if (bytes[0] == 0x66 && bytes[1] == 0x0F && bytes[2] == 0x79) {
|
||||
return true; // extrq
|
||||
} else if (bytes[0] == 0xF2 && bytes[1] == 0x0F && bytes[2] == 0x79) {
|
||||
return true; // insertq
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) {
|
||||
ZydisDecodedInstruction instruction;
|
||||
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
|
||||
const auto status =
|
||||
Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address);
|
||||
// We need to decode the instruction to find out what it is. Normally we'd use a fully fleshed
|
||||
// out decoder like Zydis, however Zydis does a bunch of stuff that impact performance that we
|
||||
// don't care about. We can get information about the instruction a lot faster by writing a mini
|
||||
// decoder here, since we know it is definitely an extrq or an insertq. If for some reason we
|
||||
// need to interpret more instructions in the future (I don't see why we would), we can revert
|
||||
// to using Zydis.
|
||||
ZydisMnemonic mnemonic;
|
||||
u8* bytes = (u8*)code_address;
|
||||
if (bytes[0] == 0x66) {
|
||||
mnemonic = ZYDIS_MNEMONIC_EXTRQ;
|
||||
} else if (bytes[0] == 0xF2) {
|
||||
mnemonic = ZYDIS_MNEMONIC_INSERTQ;
|
||||
} else {
|
||||
ZydisDecodedInstruction instruction;
|
||||
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
|
||||
const auto status =
|
||||
Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address);
|
||||
LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}",
|
||||
fmt::ptr(code_address),
|
||||
ZYAN_SUCCESS(status) ? ZydisMnemonicGetString(instruction.mnemonic)
|
||||
: "Failed to decode");
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (instruction.mnemonic) {
|
||||
ASSERT(bytes[1] == 0x0F && bytes[2] == 0x79);
|
||||
|
||||
// Note: It's guaranteed that there's no REX prefix in these instructions checked by
|
||||
// Is4ByteExtrqOrInsertq
|
||||
u8 modrm = bytes[3];
|
||||
u8 rm = modrm & 0b111;
|
||||
u8 reg = (modrm >> 3) & 0b111;
|
||||
u8 mod = (modrm >> 6) & 0b11;
|
||||
|
||||
ASSERT(mod == 0b11); // Any instruction we interpret here uses reg/reg addressing only
|
||||
|
||||
int dstIndex = reg;
|
||||
int srcIndex = rm;
|
||||
|
||||
switch (mnemonic) {
|
||||
case ZYDIS_MNEMONIC_EXTRQ: {
|
||||
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
|
||||
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
|
||||
if (immediateForm) {
|
||||
LOG_CRITICAL(Core, "EXTRQ immediate form should have been patched at code address: {}",
|
||||
fmt::ptr(code_address));
|
||||
return false;
|
||||
const auto dst = Common::GetXmmPointer(ctx, dstIndex);
|
||||
const auto src = Common::GetXmmPointer(ctx, srcIndex);
|
||||
|
||||
u64 lowQWordSrc;
|
||||
memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc));
|
||||
|
||||
u64 lowQWordDst;
|
||||
memcpy(&lowQWordDst, dst, sizeof(lowQWordDst));
|
||||
|
||||
u64 length = lowQWordSrc & 0x3F;
|
||||
u64 mask;
|
||||
if (length == 0) {
|
||||
length = 64; // for the check below
|
||||
mask = 0xFFFF'FFFF'FFFF'FFFF;
|
||||
} else {
|
||||
ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
|
||||
operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER &&
|
||||
operands[0].reg.value >= ZYDIS_REGISTER_XMM0 &&
|
||||
operands[0].reg.value <= ZYDIS_REGISTER_XMM15 &&
|
||||
operands[1].reg.value >= ZYDIS_REGISTER_XMM0 &&
|
||||
operands[1].reg.value <= ZYDIS_REGISTER_XMM15,
|
||||
"Unexpected operand types for EXTRQ instruction");
|
||||
|
||||
const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0;
|
||||
const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0;
|
||||
|
||||
const auto dst = Common::GetXmmPointer(ctx, dstIndex);
|
||||
const auto src = Common::GetXmmPointer(ctx, srcIndex);
|
||||
|
||||
u64 lowQWordSrc;
|
||||
memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc));
|
||||
|
||||
u64 lowQWordDst;
|
||||
memcpy(&lowQWordDst, dst, sizeof(lowQWordDst));
|
||||
|
||||
u64 length = lowQWordSrc & 0x3F;
|
||||
u64 mask;
|
||||
if (length == 0) {
|
||||
length = 64; // for the check below
|
||||
mask = 0xFFFF'FFFF'FFFF'FFFF;
|
||||
} else {
|
||||
mask = (1ULL << length) - 1;
|
||||
}
|
||||
|
||||
u64 index = (lowQWordSrc >> 8) & 0x3F;
|
||||
if (length + index > 64) {
|
||||
// Undefined behavior if length + index is bigger than 64 according to the spec,
|
||||
// we'll warn and continue execution.
|
||||
LOG_TRACE(Core,
|
||||
"extrq at {} with length {} and index {} is bigger than 64, "
|
||||
"undefined behavior",
|
||||
fmt::ptr(code_address), length, index);
|
||||
}
|
||||
|
||||
lowQWordDst >>= index;
|
||||
lowQWordDst &= mask;
|
||||
|
||||
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
|
||||
|
||||
Common::IncrementRip(ctx, instruction.length);
|
||||
|
||||
return true;
|
||||
mask = (1ULL << length) - 1;
|
||||
}
|
||||
break;
|
||||
|
||||
u64 index = (lowQWordSrc >> 8) & 0x3F;
|
||||
if (length + index > 64) {
|
||||
// Undefined behavior if length + index is bigger than 64 according to the spec,
|
||||
// we'll warn and continue execution.
|
||||
LOG_TRACE(Core,
|
||||
"extrq at {} with length {} and index {} is bigger than 64, "
|
||||
"undefined behavior",
|
||||
fmt::ptr(code_address), length, index);
|
||||
}
|
||||
|
||||
lowQWordDst >>= index;
|
||||
lowQWordDst &= mask;
|
||||
|
||||
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
|
||||
|
||||
Common::IncrementRip(ctx, 4);
|
||||
|
||||
return true;
|
||||
}
|
||||
case ZYDIS_MNEMONIC_INSERTQ: {
|
||||
bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
|
||||
operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
|
||||
if (immediateForm) {
|
||||
LOG_CRITICAL(Core,
|
||||
"INSERTQ immediate form should have been patched at code address: {}",
|
||||
fmt::ptr(code_address));
|
||||
return false;
|
||||
const auto dst = Common::GetXmmPointer(ctx, dstIndex);
|
||||
const auto src = Common::GetXmmPointer(ctx, srcIndex);
|
||||
|
||||
u64 lowQWordSrc, highQWordSrc;
|
||||
memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc));
|
||||
memcpy(&highQWordSrc, (u8*)src + 8, sizeof(highQWordSrc));
|
||||
|
||||
u64 lowQWordDst;
|
||||
memcpy(&lowQWordDst, dst, sizeof(lowQWordDst));
|
||||
|
||||
u64 length = highQWordSrc & 0x3F;
|
||||
u64 mask;
|
||||
if (length == 0) {
|
||||
length = 64; // for the check below
|
||||
mask = 0xFFFF'FFFF'FFFF'FFFF;
|
||||
} else {
|
||||
ASSERT_MSG(operands[2].type == ZYDIS_OPERAND_TYPE_UNUSED &&
|
||||
operands[3].type == ZYDIS_OPERAND_TYPE_UNUSED,
|
||||
"operands 2 and 3 must be unused for register form.");
|
||||
|
||||
ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
|
||||
operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER,
|
||||
"operands 0 and 1 must be registers.");
|
||||
|
||||
const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0;
|
||||
const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0;
|
||||
|
||||
const auto dst = Common::GetXmmPointer(ctx, dstIndex);
|
||||
const auto src = Common::GetXmmPointer(ctx, srcIndex);
|
||||
|
||||
u64 lowQWordSrc, highQWordSrc;
|
||||
memcpy(&lowQWordSrc, src, sizeof(lowQWordSrc));
|
||||
memcpy(&highQWordSrc, (u8*)src + 8, sizeof(highQWordSrc));
|
||||
|
||||
u64 lowQWordDst;
|
||||
memcpy(&lowQWordDst, dst, sizeof(lowQWordDst));
|
||||
|
||||
u64 length = highQWordSrc & 0x3F;
|
||||
u64 mask;
|
||||
if (length == 0) {
|
||||
length = 64; // for the check below
|
||||
mask = 0xFFFF'FFFF'FFFF'FFFF;
|
||||
} else {
|
||||
mask = (1ULL << length) - 1;
|
||||
}
|
||||
|
||||
u64 index = (highQWordSrc >> 8) & 0x3F;
|
||||
if (length + index > 64) {
|
||||
// Undefined behavior if length + index is bigger than 64 according to the spec,
|
||||
// we'll warn and continue execution.
|
||||
LOG_TRACE(Core,
|
||||
"insertq at {} with length {} and index {} is bigger than 64, "
|
||||
"undefined behavior",
|
||||
fmt::ptr(code_address), length, index);
|
||||
}
|
||||
|
||||
lowQWordSrc &= mask;
|
||||
lowQWordDst &= ~(mask << index);
|
||||
lowQWordDst |= lowQWordSrc << index;
|
||||
|
||||
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
|
||||
|
||||
Common::IncrementRip(ctx, instruction.length);
|
||||
|
||||
return true;
|
||||
mask = (1ULL << length) - 1;
|
||||
}
|
||||
break;
|
||||
|
||||
u64 index = (highQWordSrc >> 8) & 0x3F;
|
||||
if (length + index > 64) {
|
||||
// Undefined behavior if length + index is bigger than 64 according to the spec,
|
||||
// we'll warn and continue execution.
|
||||
LOG_TRACE(Core,
|
||||
"insertq at {} with length {} and index {} is bigger than 64, "
|
||||
"undefined behavior",
|
||||
fmt::ptr(code_address), length, index);
|
||||
}
|
||||
|
||||
lowQWordSrc &= mask;
|
||||
lowQWordDst &= ~(mask << index);
|
||||
lowQWordDst |= lowQWordSrc << index;
|
||||
|
||||
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
|
||||
|
||||
Common::IncrementRip(ctx, 4);
|
||||
|
||||
return true;
|
||||
}
|
||||
default: {
|
||||
LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}",
|
||||
fmt::ptr(code_address), ZydisMnemonicGetString(instruction.mnemonic));
|
||||
return false;
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -695,9 +695,22 @@ static bool PatchesAccessViolationHandler(void* context, void* /* fault_address
|
|||
|
||||
static bool PatchesIllegalInstructionHandler(void* context) {
|
||||
void* code_address = Common::GetRip(context);
|
||||
if (!TryPatchJit(code_address)) {
|
||||
if (Is4ByteExtrqOrInsertq(code_address)) {
|
||||
// The instruction is not big enough for a relative jump, don't try to patch it and pass it
|
||||
// to our illegal instruction interpreter directly
|
||||
return TryExecuteIllegalInstruction(context, code_address);
|
||||
} else {
|
||||
if (!TryPatchJit(code_address)) {
|
||||
ZydisDecodedInstruction instruction;
|
||||
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
|
||||
const auto status =
|
||||
Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address);
|
||||
LOG_ERROR(Core, "Failed to patch address {:x} -- mnemonic: {}", (u64)code_address,
|
||||
ZYAN_SUCCESS(status) ? ZydisMnemonicGetString(instruction.mnemonic)
|
||||
: "Failed to decode");
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue