ARMeilleure: Add initial support for AVX512(EVEX encoding) (#3663)

* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as short-hands for `F+VL` and `F+VL+DQ`. * ARMeilleure: Add initial support for EVEX instruction encoding Does not implement rounding, or exception controls. * ARMeilleure: Add `X86Vpternlogd` Accelerates the vector-`Not` instruction. * ARMeilleure: Add check for `OSXSAVE` for AVX{2,512} * ARMeilleure: Add check for `XCR0` flags Add XCR0 register checks for AVX and AVX512F, following the guidelines from section 14.3 and 15.2 from the Intel Architecture Software Developer's Manual. * ARMeilleure: Increment InternalVersion * ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting * ARMeilleure: Move XCR0 procedure to GetXcr0Eax * ARMeilleure: Add `XCR0` to `FeatureInfo` structure * ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly Avoids an additional allocation * ARMeilleure: Formatting fixes
2022-12-18 11:46:13 -08:00 · 2022-12-18 11:46:13 -08:00 · 295fbd0542
commit 295fbd0542
parent d7310d7a1c
10 changed files with 193 additions and 9 deletions
--- a/ARMeilleure/CodeGen/X86/Assembler.cs
+++ b/ARMeilleure/CodeGen/X86/Assembler.cs
@ -1033,7 +1033,13 @@ namespace ARMeilleure.CodeGen.X86

            Debug.Assert(opCode != BadOp, "Invalid opcode value.");

-            if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
+            if ((flags & InstructionFlags.Evex) != 0 && HardwareCapabilities.SupportsEvexEncoding)
+            {
+                WriteEvexInst(dest, src1, src2, type, flags, opCode);
+
+                opCode &= 0xff;
+            }
+            else if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
            {
                // In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits.

@ -1152,6 +1158,103 @@ namespace ARMeilleure.CodeGen.X86
            }
        }

+        private void WriteEvexInst(
+            Operand dest,
+            Operand src1,
+            Operand src2,
+            OperandType type,
+            InstructionFlags flags,
+            int opCode,
+            bool broadcast = false,
+            int registerWidth = 128,
+            int maskRegisterIdx = 0,
+            bool zeroElements = false)
+        {
+            int destIdx = dest.GetRegister().Index;
+            int src1Idx = src1.GetRegister().Index;
+            int src2Idx = src2.GetRegister().Index;
+
+            WriteByte(0x62);
+
+            // P0
+            // Extend dest register
+            bool r = (destIdx & 8) == 0;
+            // Extend src register
+            bool x = (src1Idx & 16) == 0;
+            // Extend src register
+            bool b = (src1Idx & 8) == 0;
+            // Extend dest register
+            bool rp = (destIdx & 16) == 0;
+            // Escape code index
+            byte mm = 0b00;
+
+            switch ((ushort)(opCode >> 8))
+            {
+                case 0xf00: mm = 0b01; break;
+                case 0xf38: mm = 0b10; break;
+                case 0xf3a: mm = 0b11; break;
+
+                default: Debug.Assert(false, $"Failed to EVEX encode opcode 0x{opCode:X}."); break;
+            }
+
+            WriteByte(
+                (byte)(
+                    (r ? 0x80 : 0) |
+                    (x ? 0x40 : 0) |
+                    (b ? 0x20 : 0) |
+                    (rp ? 0x10 : 0) |
+                    mm));
+
+            // P1
+            // Specify 64-bit lane mode
+            bool w = Is64Bits(type);
+            // Src2 register index
+            byte vvvv = (byte)(src2Idx & 0b1111);
+            // Opcode prefix
+            byte pp = (flags & InstructionFlags.PrefixMask) switch
+            {
+                InstructionFlags.Prefix66 => 0b01,
+                InstructionFlags.PrefixF3 => 0b10,
+                InstructionFlags.PrefixF2 => 0b11,
+                _ => 0
+            };
+            WriteByte(
+                (byte)(
+                    (w ? 0x80 : 0) |
+                    (vvvv << 3) |
+                    0b100 |
+                    pp));
+
+            // P2
+            // Mask register determines what elements to zero, rather than what elements to merge
+            bool z = zeroElements;
+            // Specifies register-width
+            byte ll = 0b00;
+            switch (registerWidth)
+            {
+                case 128: ll = 0b00; break;
+                case 256: ll = 0b01; break;
+                case 512: ll = 0b10; break;
+
+                default: Debug.Assert(false, $"Invalid EVEX vector register width {registerWidth}."); break;
+            }
+            // Embedded broadcast in the case of a memory operand
+            bool bcast = broadcast;
+            // Extend src2 register
+            bool vp = (src2Idx & 16) == 0;
+            // Mask register index
+            Debug.Assert(maskRegisterIdx < 8, $"Invalid mask register index {maskRegisterIdx}.");
+            byte aaa = (byte)(maskRegisterIdx & 0b111);
+
+            WriteByte(
+                (byte)(
+                    (z ? 0x80 : 0) |
+                    (ll << 5) |
+                    (bcast ? 0x10 : 0) |
+                    (vp ? 8 : 0) |
+                    aaa));
+        }
+
        private void WriteCompactInst(Operand operand, int opCode)
        {
            int regIndex = operand.GetRegister().Index;