From 0a11060964fc3cc8a50aab94c4ae970de7a10ab1 Mon Sep 17 00:00:00 2001 From: offtkp Date: Wed, 9 Jul 2025 12:20:57 +0300 Subject: [PATCH] Zero top bits in INSERTQ/EXTRQ --- src/core/cpu_patches.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index 8512858e9..a82ec4ff2 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -177,8 +177,7 @@ static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operan c.mov(scratch2, mask); c.and_(scratch1, scratch2); - // Writeback to xmm register, extrq instruction says top 64-bits are undefined so we don't - // care to preserve them + // Writeback to xmm register, extrq instruction says top 64-bits are undefined but zeroed on AMD CPUs c.vmovq(xmm_dst, scratch1); c.pop(scratch2); @@ -307,8 +306,8 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper // dst |= src c.or_(scratch2, scratch1); - // Insert scratch2 into low 64 bits of dst, upper 64 bits are unaffected - c.vpinsrq(xmm_dst, xmm_dst, scratch2, 0); + // Insert scratch2 into low 64 bits of dst, upper 64 bits are undefined but zeroed on AMD CPUs + c.vmovq(xmm_dst, scratch2); c.pop(mask); c.pop(scratch2); @@ -374,7 +373,7 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper c.and_(scratch2, mask); c.or_(scratch2, scratch1); - // Upper 64 bits are undefined in insertq + // Upper 64 bits are undefined in insertq but AMD CPUs zero them c.vmovq(xmm_dst, scratch2); c.pop(mask); @@ -635,6 +634,7 @@ static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) { lowQWordDst >>= index; lowQWordDst &= mask; + memset((u8*)dst + sizeof(u64), 0, sizeof(u64)); memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); Common::IncrementRip(ctx, 4); @@ -675,6 +675,7 @@ static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) { lowQWordDst &= ~(mask << index); lowQWordDst |= lowQWordSrc << index; + memset((u8*)dst + sizeof(u64), 0, sizeof(u64)); memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); Common::IncrementRip(ctx, 4);