Zero top bits in INSERTQ/EXTRQ (#3217)

* Zero top bits in INSERTQ/EXTRQ

* Clang-format

* Don't assert
This commit is contained in:
Paris Oplopoios 2025-07-09 13:55:21 +03:00 committed by GitHub
parent df4314f831
commit f5336358ea
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -163,7 +163,9 @@ static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operan
mask = (1ULL << length) - 1; mask = (1ULL << length) - 1;
} }
ASSERT_MSG(length + index <= 64, "length + index must be less than or equal to 64."); if (length + index > 64) {
mask = 0xFFFF'FFFF'FFFF'FFFF;
}
// Get lower qword from xmm register // Get lower qword from xmm register
c.vmovq(scratch1, xmm_dst); c.vmovq(scratch1, xmm_dst);
@ -177,8 +179,8 @@ static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operan
c.mov(scratch2, mask); c.mov(scratch2, mask);
c.and_(scratch1, scratch2); c.and_(scratch1, scratch2);
// Writeback to xmm register, extrq instruction says top 64-bits are undefined so we don't // Writeback to xmm register, extrq instruction says top 64-bits are undefined but zeroed on
// care to preserve them // AMD CPUs
c.vmovq(xmm_dst, scratch1); c.vmovq(xmm_dst, scratch1);
c.pop(scratch2); c.pop(scratch2);
@ -287,7 +289,9 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper
mask_value = (1ULL << length) - 1; mask_value = (1ULL << length) - 1;
} }
ASSERT_MSG(length + index <= 64, "length + index must be less than or equal to 64."); if (length + index > 64) {
mask_value = 0xFFFF'FFFF'FFFF'FFFF;
}
c.vmovq(scratch1, xmm_src); c.vmovq(scratch1, xmm_src);
c.vmovq(scratch2, xmm_dst); c.vmovq(scratch2, xmm_dst);
@ -307,8 +311,9 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper
// dst |= src // dst |= src
c.or_(scratch2, scratch1); c.or_(scratch2, scratch1);
// Insert scratch2 into low 64 bits of dst, upper 64 bits are unaffected // Insert scratch2 into low 64 bits of dst, upper 64 bits are undefined but zeroed on AMD
c.vpinsrq(xmm_dst, xmm_dst, scratch2, 0); // CPUs
c.vmovq(xmm_dst, scratch2);
c.pop(mask); c.pop(mask);
c.pop(scratch2); c.pop(scratch2);
@ -374,7 +379,7 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper
c.and_(scratch2, mask); c.and_(scratch2, mask);
c.or_(scratch2, scratch1); c.or_(scratch2, scratch1);
// Upper 64 bits are undefined in insertq // Upper 64 bits are undefined in insertq but AMD CPUs zero them
c.vmovq(xmm_dst, scratch2); c.vmovq(xmm_dst, scratch2);
c.pop(mask); c.pop(mask);
@ -635,6 +640,7 @@ static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) {
lowQWordDst >>= index; lowQWordDst >>= index;
lowQWordDst &= mask; lowQWordDst &= mask;
memset((u8*)dst + sizeof(u64), 0, sizeof(u64));
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
Common::IncrementRip(ctx, 4); Common::IncrementRip(ctx, 4);
@ -675,6 +681,7 @@ static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) {
lowQWordDst &= ~(mask << index); lowQWordDst &= ~(mask << index);
lowQWordDst |= lowQWordSrc << index; lowQWordDst |= lowQWordSrc << index;
memset((u8*)dst + sizeof(u64), 0, sizeof(u64));
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
Common::IncrementRip(ctx, 4); Common::IncrementRip(ctx, 4);