diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index e4f65cd31..8512858e9 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -163,9 +163,7 @@ static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operan mask = (1ULL << length) - 1; } - if (length + index > 64) { - mask = 0xFFFF'FFFF'FFFF'FFFF; - } + ASSERT_MSG(length + index <= 64, "length + index must be less than or equal to 64."); // Get lower qword from xmm register c.vmovq(scratch1, xmm_dst); @@ -179,8 +177,8 @@ static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operan c.mov(scratch2, mask); c.and_(scratch1, scratch2); - // Writeback to xmm register, extrq instruction says top 64-bits are undefined but zeroed on - // AMD CPUs + // Writeback to xmm register, extrq instruction says top 64-bits are undefined so we don't + // care to preserve them c.vmovq(xmm_dst, scratch1); c.pop(scratch2); @@ -289,9 +287,7 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper mask_value = (1ULL << length) - 1; } - if (length + index > 64) { - mask_value = 0xFFFF'FFFF'FFFF'FFFF; - } + ASSERT_MSG(length + index <= 64, "length + index must be less than or equal to 64."); c.vmovq(scratch1, xmm_src); c.vmovq(scratch2, xmm_dst); @@ -311,9 +307,8 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper // dst |= src c.or_(scratch2, scratch1); - // Insert scratch2 into low 64 bits of dst, upper 64 bits are undefined but zeroed on AMD - // CPUs - c.vmovq(xmm_dst, scratch2); + // Insert scratch2 into low 64 bits of dst, upper 64 bits are unaffected + c.vpinsrq(xmm_dst, xmm_dst, scratch2, 0); c.pop(mask); c.pop(scratch2); @@ -379,7 +374,7 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper c.and_(scratch2, mask); c.or_(scratch2, scratch1); - // Upper 64 bits are undefined in insertq but AMD CPUs zero them + // Upper 64 bits are undefined in insertq c.vmovq(xmm_dst, scratch2); c.pop(mask); @@ -640,7 +635,6 @@ static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) { lowQWordDst >>= index; lowQWordDst &= mask; - memset((u8*)dst + sizeof(u64), 0, sizeof(u64)); memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); Common::IncrementRip(ctx, 4); @@ -681,7 +675,6 @@ static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) { lowQWordDst &= ~(mask << index); lowQWordDst |= lowQWordSrc << index; - memset((u8*)dst + sizeof(u64), 0, sizeof(u64)); memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); Common::IncrementRip(ctx, 4);