ARMeilleure: Add initial support for AVX512(EVEX encoding) (#3663)
* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as short-hands for `F+VL` and `F+VL+DQ`. * ARMeilleure: Add initial support for EVEX instruction encoding Does not implement rounding, or exception controls. * ARMeilleure: Add `X86Vpternlogd` Accelerates the vector-`Not` instruction. * ARMeilleure: Add check for `OSXSAVE` for AVX{2,512} * ARMeilleure: Add check for `XCR0` flags Add XCR0 register checks for AVX and AVX512F, following the guidelines from section 14.3 and 15.2 from the Intel Architecture Software Developer's Manual. * ARMeilleure: Increment InternalVersion * ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting * ARMeilleure: Move XCR0 procedure to GetXcr0Eax * ARMeilleure: Add `XCR0` to `FeatureInfo` structure * ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly Avoids an additional allocation * ARMeilleure: Formatting fixes
This commit is contained in:
parent
d7310d7a1c
commit
295fbd0542
10 changed files with 193 additions and 9 deletions
|
@ -1033,7 +1033,13 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
Debug.Assert(opCode != BadOp, "Invalid opcode value.");
|
||||
|
||||
if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
|
||||
if ((flags & InstructionFlags.Evex) != 0 && HardwareCapabilities.SupportsEvexEncoding)
|
||||
{
|
||||
WriteEvexInst(dest, src1, src2, type, flags, opCode);
|
||||
|
||||
opCode &= 0xff;
|
||||
}
|
||||
else if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
|
||||
{
|
||||
// In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits.
|
||||
|
||||
|
@ -1152,6 +1158,103 @@ namespace ARMeilleure.CodeGen.X86
|
|||
}
|
||||
}
|
||||
|
||||
private void WriteEvexInst(
|
||||
Operand dest,
|
||||
Operand src1,
|
||||
Operand src2,
|
||||
OperandType type,
|
||||
InstructionFlags flags,
|
||||
int opCode,
|
||||
bool broadcast = false,
|
||||
int registerWidth = 128,
|
||||
int maskRegisterIdx = 0,
|
||||
bool zeroElements = false)
|
||||
{
|
||||
int destIdx = dest.GetRegister().Index;
|
||||
int src1Idx = src1.GetRegister().Index;
|
||||
int src2Idx = src2.GetRegister().Index;
|
||||
|
||||
WriteByte(0x62);
|
||||
|
||||
// P0
|
||||
// Extend dest register
|
||||
bool r = (destIdx & 8) == 0;
|
||||
// Extend src register
|
||||
bool x = (src1Idx & 16) == 0;
|
||||
// Extend src register
|
||||
bool b = (src1Idx & 8) == 0;
|
||||
// Extend dest register
|
||||
bool rp = (destIdx & 16) == 0;
|
||||
// Escape code index
|
||||
byte mm = 0b00;
|
||||
|
||||
switch ((ushort)(opCode >> 8))
|
||||
{
|
||||
case 0xf00: mm = 0b01; break;
|
||||
case 0xf38: mm = 0b10; break;
|
||||
case 0xf3a: mm = 0b11; break;
|
||||
|
||||
default: Debug.Assert(false, $"Failed to EVEX encode opcode 0x{opCode:X}."); break;
|
||||
}
|
||||
|
||||
WriteByte(
|
||||
(byte)(
|
||||
(r ? 0x80 : 0) |
|
||||
(x ? 0x40 : 0) |
|
||||
(b ? 0x20 : 0) |
|
||||
(rp ? 0x10 : 0) |
|
||||
mm));
|
||||
|
||||
// P1
|
||||
// Specify 64-bit lane mode
|
||||
bool w = Is64Bits(type);
|
||||
// Src2 register index
|
||||
byte vvvv = (byte)(src2Idx & 0b1111);
|
||||
// Opcode prefix
|
||||
byte pp = (flags & InstructionFlags.PrefixMask) switch
|
||||
{
|
||||
InstructionFlags.Prefix66 => 0b01,
|
||||
InstructionFlags.PrefixF3 => 0b10,
|
||||
InstructionFlags.PrefixF2 => 0b11,
|
||||
_ => 0
|
||||
};
|
||||
WriteByte(
|
||||
(byte)(
|
||||
(w ? 0x80 : 0) |
|
||||
(vvvv << 3) |
|
||||
0b100 |
|
||||
pp));
|
||||
|
||||
// P2
|
||||
// Mask register determines what elements to zero, rather than what elements to merge
|
||||
bool z = zeroElements;
|
||||
// Specifies register-width
|
||||
byte ll = 0b00;
|
||||
switch (registerWidth)
|
||||
{
|
||||
case 128: ll = 0b00; break;
|
||||
case 256: ll = 0b01; break;
|
||||
case 512: ll = 0b10; break;
|
||||
|
||||
default: Debug.Assert(false, $"Invalid EVEX vector register width {registerWidth}."); break;
|
||||
}
|
||||
// Embedded broadcast in the case of a memory operand
|
||||
bool bcast = broadcast;
|
||||
// Extend src2 register
|
||||
bool vp = (src2Idx & 16) == 0;
|
||||
// Mask register index
|
||||
Debug.Assert(maskRegisterIdx < 8, $"Invalid mask register index {maskRegisterIdx}.");
|
||||
byte aaa = (byte)(maskRegisterIdx & 0b111);
|
||||
|
||||
WriteByte(
|
||||
(byte)(
|
||||
(z ? 0x80 : 0) |
|
||||
(ll << 5) |
|
||||
(bcast ? 0x10 : 0) |
|
||||
(vp ? 8 : 0) |
|
||||
aaa));
|
||||
}
|
||||
|
||||
private void WriteCompactInst(Operand operand, int opCode)
|
||||
{
|
||||
int regIndex = operand.GetRegister().Index;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue