Optimize games that hit unpatchable EXTRQ/INSERTQ (#2888)
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions

* Make signal handler faster

* I love clang-format

* Use faster decoding

* MacOS CI
This commit is contained in:
Paris Oplopoios 2025-05-08 19:59:12 +03:00 committed by GitHub
parent 3b7c36e1ba
commit 58df609ba0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -464,9 +464,8 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
if (needs_trampoline && instruction.length < 5) { if (needs_trampoline && instruction.length < 5) {
// Trampoline is needed but instruction is too short to patch. // Trampoline is needed but instruction is too short to patch.
// Return false and length to fall back to the illegal instruction handler, // Return false and length to signal to AOT compilation that this instruction
// or to signal to AOT compilation that this instruction should be skipped and // should be skipped and handled at runtime.
// handled at runtime.
return std::make_pair(false, instruction.length); return std::make_pair(false, instruction.length);
} }
@ -512,32 +511,58 @@ static std::pair<bool, u64> TryPatch(u8* code, PatchModule* module) {
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
static bool Is4ByteExtrqOrInsertq(void* code_address) {
u8* bytes = (u8*)code_address;
if (bytes[0] == 0x66 && bytes[1] == 0x0F && bytes[2] == 0x79) {
return true; // extrq
} else if (bytes[0] == 0xF2 && bytes[1] == 0x0F && bytes[2] == 0x79) {
return true; // insertq
} else {
return false;
}
}
static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) { static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) {
// We need to decode the instruction to find out what it is. Normally we'd use a fully fleshed
// out decoder like Zydis, however Zydis does a bunch of stuff that impact performance that we
// don't care about. We can get information about the instruction a lot faster by writing a mini
// decoder here, since we know it is definitely an extrq or an insertq. If for some reason we
// need to interpret more instructions in the future (I don't see why we would), we can revert
// to using Zydis.
ZydisMnemonic mnemonic;
u8* bytes = (u8*)code_address;
if (bytes[0] == 0x66) {
mnemonic = ZYDIS_MNEMONIC_EXTRQ;
} else if (bytes[0] == 0xF2) {
mnemonic = ZYDIS_MNEMONIC_INSERTQ;
} else {
ZydisDecodedInstruction instruction; ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT]; ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
const auto status = const auto status =
Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address); Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address);
LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}",
switch (instruction.mnemonic) { fmt::ptr(code_address),
case ZYDIS_MNEMONIC_EXTRQ: { ZYAN_SUCCESS(status) ? ZydisMnemonicGetString(instruction.mnemonic)
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && : "Failed to decode");
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
if (immediateForm) {
LOG_CRITICAL(Core, "EXTRQ immediate form should have been patched at code address: {}",
fmt::ptr(code_address));
return false; return false;
} else { }
ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[0].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[0].reg.value <= ZYDIS_REGISTER_XMM15 &&
operands[1].reg.value >= ZYDIS_REGISTER_XMM0 &&
operands[1].reg.value <= ZYDIS_REGISTER_XMM15,
"Unexpected operand types for EXTRQ instruction");
const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0; ASSERT(bytes[1] == 0x0F && bytes[2] == 0x79);
const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0;
// Note: It's guaranteed that there's no REX prefix in these instructions checked by
// Is4ByteExtrqOrInsertq
u8 modrm = bytes[3];
u8 rm = modrm & 0b111;
u8 reg = (modrm >> 3) & 0b111;
u8 mod = (modrm >> 6) & 0b11;
ASSERT(mod == 0b11); // Any instruction we interpret here uses reg/reg addressing only
int dstIndex = reg;
int srcIndex = rm;
switch (mnemonic) {
case ZYDIS_MNEMONIC_EXTRQ: {
const auto dst = Common::GetXmmPointer(ctx, dstIndex); const auto dst = Common::GetXmmPointer(ctx, dstIndex);
const auto src = Common::GetXmmPointer(ctx, srcIndex); const auto src = Common::GetXmmPointer(ctx, srcIndex);
@ -571,32 +596,11 @@ static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) {
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
Common::IncrementRip(ctx, instruction.length); Common::IncrementRip(ctx, 4);
return true; return true;
} }
break;
}
case ZYDIS_MNEMONIC_INSERTQ: { case ZYDIS_MNEMONIC_INSERTQ: {
bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
if (immediateForm) {
LOG_CRITICAL(Core,
"INSERTQ immediate form should have been patched at code address: {}",
fmt::ptr(code_address));
return false;
} else {
ASSERT_MSG(operands[2].type == ZYDIS_OPERAND_TYPE_UNUSED &&
operands[3].type == ZYDIS_OPERAND_TYPE_UNUSED,
"operands 2 and 3 must be unused for register form.");
ASSERT_MSG(operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER &&
operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER,
"operands 0 and 1 must be registers.");
const auto dstIndex = operands[0].reg.value - ZYDIS_REGISTER_XMM0;
const auto srcIndex = operands[1].reg.value - ZYDIS_REGISTER_XMM0;
const auto dst = Common::GetXmmPointer(ctx, dstIndex); const auto dst = Common::GetXmmPointer(ctx, dstIndex);
const auto src = Common::GetXmmPointer(ctx, srcIndex); const auto src = Common::GetXmmPointer(ctx, srcIndex);
@ -632,16 +636,12 @@ static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) {
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
Common::IncrementRip(ctx, instruction.length); Common::IncrementRip(ctx, 4);
return true; return true;
} }
break;
}
default: { default: {
LOG_ERROR(Core, "Unhandled illegal instruction at code address {}: {}", UNREACHABLE();
fmt::ptr(code_address), ZydisMnemonicGetString(instruction.mnemonic));
return false;
} }
} }
@ -695,9 +695,22 @@ static bool PatchesAccessViolationHandler(void* context, void* /* fault_address
static bool PatchesIllegalInstructionHandler(void* context) { static bool PatchesIllegalInstructionHandler(void* context) {
void* code_address = Common::GetRip(context); void* code_address = Common::GetRip(context);
if (!TryPatchJit(code_address)) { if (Is4ByteExtrqOrInsertq(code_address)) {
// The instruction is not big enough for a relative jump, don't try to patch it and pass it
// to our illegal instruction interpreter directly
return TryExecuteIllegalInstruction(context, code_address); return TryExecuteIllegalInstruction(context, code_address);
} else {
if (!TryPatchJit(code_address)) {
ZydisDecodedInstruction instruction;
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT];
const auto status =
Common::Decoder::Instance()->decodeInstruction(instruction, operands, code_address);
LOG_ERROR(Core, "Failed to patch address {:x} -- mnemonic: {}", (u64)code_address,
ZYAN_SUCCESS(status) ? ZydisMnemonicGetString(instruction.mnemonic)
: "Failed to decode");
} }
}
return true; return true;
} }