emitter: Pass OpArg by reference where possible

This commit is contained in:
Lioncash 2015-08-20 04:14:51 -04:00
parent 0609b89f74
commit 55461125ba
2 changed files with 721 additions and 721 deletions

View file

@ -344,19 +344,19 @@ private:
void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
void WriteMulDivType(int bits, OpArg src, int ext);
void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false);
void WriteShift(int bits, OpArg dest, OpArg &shift, int ext);
void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext);
void WriteShift(int bits, OpArg dest, const OpArg& shift, int ext);
void WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext);
void WriteMXCSR(OpArg arg, int ext);
void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg);
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg);
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
@ -413,8 +413,8 @@ public:
// Stack control
void PUSH(X64Reg reg);
void POP(X64Reg reg);
void PUSH(int bits, const OpArg &reg);
void POP(int bits, const OpArg &reg);
void PUSH(int bits, const OpArg& reg);
void POP(int bits, const OpArg& reg);
void PUSHF();
void POPF();
@ -424,21 +424,21 @@ public:
void UD2();
FixupBranch J(bool force5bytes = false);
void JMP(const u8 * addr, bool force5Bytes = false);
void JMP(const u8* addr, bool force5Bytes = false);
void JMP(OpArg arg);
void JMPptr(const OpArg &arg);
void JMPptr(const OpArg& arg);
void JMPself(); //infinite loop!
#ifdef CALL
#undef CALL
#endif
void CALL(const void *fnptr);
void CALL(const void* fnptr);
void CALLptr(OpArg arg);
FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
//void J_CC(CCFlags conditionCode, JumpTarget target);
void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false);
void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false);
void SetJumpTarget(const FixupBranch &branch);
void SetJumpTarget(const FixupBranch& branch);
void SETcc(CCFlags flag, OpArg dest);
// Note: CMOV brings small if any benefit on current cpus.
@ -450,8 +450,8 @@ public:
void SFENCE();
// Bit scan
void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit
void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit
void BSF(int bits, X64Reg dest, const OpArg& src); // Bottom bit to top bit
void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit
// Cache control
enum PrefetchLevel
@ -462,37 +462,37 @@ public:
PF_T2, //Levels 3+ (aliased to T0 on AMD)
};
void PREFETCH(PrefetchLevel level, OpArg arg);
void MOVNTI(int bits, OpArg dest, X64Reg src);
void MOVNTDQ(OpArg arg, X64Reg regOp);
void MOVNTPS(OpArg arg, X64Reg regOp);
void MOVNTPD(OpArg arg, X64Reg regOp);
void MOVNTI(int bits, const OpArg& dest, X64Reg src);
void MOVNTDQ(const OpArg& arg, X64Reg regOp);
void MOVNTPS(const OpArg& arg, X64Reg regOp);
void MOVNTPD(const OpArg& arg, X64Reg regOp);
// Multiplication / division
void MUL(int bits, OpArg src); //UNSIGNED
void IMUL(int bits, OpArg src); //SIGNED
void IMUL(int bits, X64Reg regOp, OpArg src);
void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm);
void DIV(int bits, OpArg src);
void IDIV(int bits, OpArg src);
void MUL(int bits, const OpArg& src); //UNSIGNED
void IMUL(int bits, const OpArg& src); //SIGNED
void IMUL(int bits, X64Reg regOp, const OpArg& src);
void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm);
void DIV(int bits, const OpArg& src);
void IDIV(int bits, const OpArg& src);
// Shift
void ROL(int bits, OpArg dest, OpArg shift);
void ROR(int bits, OpArg dest, OpArg shift);
void RCL(int bits, OpArg dest, OpArg shift);
void RCR(int bits, OpArg dest, OpArg shift);
void SHL(int bits, OpArg dest, OpArg shift);
void SHR(int bits, OpArg dest, OpArg shift);
void SAR(int bits, OpArg dest, OpArg shift);
void ROL(int bits, const OpArg& dest, const OpArg& shift);
void ROR(int bits, const OpArg& dest, const OpArg& shift);
void RCL(int bits, const OpArg& dest, const OpArg& shift);
void RCR(int bits, const OpArg& dest, const OpArg& shift);
void SHL(int bits, const OpArg& dest, const OpArg& shift);
void SHR(int bits, const OpArg& dest, const OpArg& shift);
void SAR(int bits, const OpArg& dest, const OpArg& shift);
// Bit Test
void BT(int bits, OpArg dest, OpArg index);
void BTS(int bits, OpArg dest, OpArg index);
void BTR(int bits, OpArg dest, OpArg index);
void BTC(int bits, OpArg dest, OpArg index);
void BT(int bits, const OpArg& dest, const OpArg& index);
void BTS(int bits, const OpArg& dest, const OpArg& index);
void BTR(int bits, const OpArg& dest, const OpArg& index);
void BTC(int bits, const OpArg& dest, const OpArg& index);
// Double-Precision Shift
void SHRD(int bits, OpArg dest, OpArg src, OpArg shift);
void SHLD(int bits, OpArg dest, OpArg src, OpArg shift);
void SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);
void SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);
// Extend EAX into EDX in various ways
void CWD(int bits = 16);
@ -506,23 +506,23 @@ public:
void LEA(int bits, X64Reg dest, OpArg src);
// Integer arithmetic
void NEG (int bits, OpArg src);
void ADD (int bits, const OpArg &a1, const OpArg &a2);
void ADC (int bits, const OpArg &a1, const OpArg &a2);
void SUB (int bits, const OpArg &a1, const OpArg &a2);
void SBB (int bits, const OpArg &a1, const OpArg &a2);
void AND (int bits, const OpArg &a1, const OpArg &a2);
void CMP (int bits, const OpArg &a1, const OpArg &a2);
void NEG(int bits, const OpArg& src);
void ADD(int bits, const OpArg& a1, const OpArg& a2);
void ADC(int bits, const OpArg& a1, const OpArg& a2);
void SUB(int bits, const OpArg& a1, const OpArg& a2);
void SBB(int bits, const OpArg& a1, const OpArg& a2);
void AND(int bits, const OpArg& a1, const OpArg& a2);
void CMP(int bits, const OpArg& a1, const OpArg& a2);
// Bit operations
void NOT (int bits, OpArg src);
void OR (int bits, const OpArg &a1, const OpArg &a2);
void XOR (int bits, const OpArg &a1, const OpArg &a2);
void MOV (int bits, const OpArg &a1, const OpArg &a2);
void TEST(int bits, const OpArg &a1, const OpArg &a2);
void NOT (int bits, const OpArg& src);
void OR(int bits, const OpArg& a1, const OpArg& a2);
void XOR(int bits, const OpArg& a1, const OpArg& a2);
void MOV(int bits, const OpArg& a1, const OpArg& a2);
void TEST(int bits, const OpArg& a1, const OpArg& a2);
// Are these useful at all? Consider removing.
void XCHG(int bits, const OpArg &a1, const OpArg &a2);
void XCHG(int bits, const OpArg& a1, const OpArg& a2);
void XCHG_AHAL();
// Byte swapping (32 and 64-bit only).
@ -536,13 +536,13 @@ public:
void MOVBE(int dbits, const OpArg& dest, const OpArg& src);
// Available only on AMD >= Phenom or Intel >= Haswell
void LZCNT(int bits, X64Reg dest, OpArg src);
void LZCNT(int bits, X64Reg dest, const OpArg& src);
// Note: this one is actually part of BMI1
void TZCNT(int bits, X64Reg dest, OpArg src);
void TZCNT(int bits, X64Reg dest, const OpArg& src);
// WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
void STMXCSR(OpArg memloc);
void LDMXCSR(OpArg memloc);
void STMXCSR(const OpArg& memloc);
void LDMXCSR(const OpArg& memloc);
// Prefixes
void LOCK();
@ -569,259 +569,259 @@ public:
x87_FPUBusy = 0x8000,
};
void FLD(int bits, OpArg src);
void FST(int bits, OpArg dest);
void FSTP(int bits, OpArg dest);
void FLD(int bits, const OpArg& src);
void FST(int bits, const OpArg& dest);
void FSTP(int bits, const OpArg& dest);
void FNSTSW_AX();
void FWAIT();
// SSE/SSE2: Floating point arithmetic
void ADDSS(X64Reg regOp, OpArg arg);
void ADDSD(X64Reg regOp, OpArg arg);
void SUBSS(X64Reg regOp, OpArg arg);
void SUBSD(X64Reg regOp, OpArg arg);
void MULSS(X64Reg regOp, OpArg arg);
void MULSD(X64Reg regOp, OpArg arg);
void DIVSS(X64Reg regOp, OpArg arg);
void DIVSD(X64Reg regOp, OpArg arg);
void MINSS(X64Reg regOp, OpArg arg);
void MINSD(X64Reg regOp, OpArg arg);
void MAXSS(X64Reg regOp, OpArg arg);
void MAXSD(X64Reg regOp, OpArg arg);
void SQRTSS(X64Reg regOp, OpArg arg);
void SQRTSD(X64Reg regOp, OpArg arg);
void RSQRTSS(X64Reg regOp, OpArg arg);
void ADDSS(X64Reg regOp, const OpArg& arg);
void ADDSD(X64Reg regOp, const OpArg& arg);
void SUBSS(X64Reg regOp, const OpArg& arg);
void SUBSD(X64Reg regOp, const OpArg& arg);
void MULSS(X64Reg regOp, const OpArg& arg);
void MULSD(X64Reg regOp, const OpArg& arg);
void DIVSS(X64Reg regOp, const OpArg& arg);
void DIVSD(X64Reg regOp, const OpArg& arg);
void MINSS(X64Reg regOp, const OpArg& arg);
void MINSD(X64Reg regOp, const OpArg& arg);
void MAXSS(X64Reg regOp, const OpArg& arg);
void MAXSD(X64Reg regOp, const OpArg& arg);
void SQRTSS(X64Reg regOp, const OpArg& arg);
void SQRTSD(X64Reg regOp, const OpArg& arg);
void RSQRTSS(X64Reg regOp, const OpArg& arg);
// SSE/SSE2: Floating point bitwise (yes)
void CMPSS(X64Reg regOp, OpArg arg, u8 compare);
void CMPSD(X64Reg regOp, OpArg arg, u8 compare);
void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare);
void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare);
void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); }
void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); }
void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); }
void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); }
void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); }
void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); }
void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); }
void CMPEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_EQ); }
void CMPLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LT); }
void CMPLESS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LE); }
void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_UNORD); }
void CMPNEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NEQ); }
void CMPNLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NLT); }
void CMPORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_ORD); }
// SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
void ADDPS(X64Reg regOp, OpArg arg);
void ADDPD(X64Reg regOp, OpArg arg);
void SUBPS(X64Reg regOp, OpArg arg);
void SUBPD(X64Reg regOp, OpArg arg);
void CMPPS(X64Reg regOp, OpArg arg, u8 compare);
void CMPPD(X64Reg regOp, OpArg arg, u8 compare);
void MULPS(X64Reg regOp, OpArg arg);
void MULPD(X64Reg regOp, OpArg arg);
void DIVPS(X64Reg regOp, OpArg arg);
void DIVPD(X64Reg regOp, OpArg arg);
void MINPS(X64Reg regOp, OpArg arg);
void MINPD(X64Reg regOp, OpArg arg);
void MAXPS(X64Reg regOp, OpArg arg);
void MAXPD(X64Reg regOp, OpArg arg);
void SQRTPS(X64Reg regOp, OpArg arg);
void SQRTPD(X64Reg regOp, OpArg arg);
void RCPPS(X64Reg regOp, OpArg arg);
void RSQRTPS(X64Reg regOp, OpArg arg);
void ADDPS(X64Reg regOp, const OpArg& arg);
void ADDPD(X64Reg regOp, const OpArg& arg);
void SUBPS(X64Reg regOp, const OpArg& arg);
void SUBPD(X64Reg regOp, const OpArg& arg);
void CMPPS(X64Reg regOp, const OpArg& arg, u8 compare);
void CMPPD(X64Reg regOp, const OpArg& arg, u8 compare);
void MULPS(X64Reg regOp, const OpArg& arg);
void MULPD(X64Reg regOp, const OpArg& arg);
void DIVPS(X64Reg regOp, const OpArg& arg);
void DIVPD(X64Reg regOp, const OpArg& arg);
void MINPS(X64Reg regOp, const OpArg& arg);
void MINPD(X64Reg regOp, const OpArg& arg);
void MAXPS(X64Reg regOp, const OpArg& arg);
void MAXPD(X64Reg regOp, const OpArg& arg);
void SQRTPS(X64Reg regOp, const OpArg& arg);
void SQRTPD(X64Reg regOp, const OpArg& arg);
void RCPPS(X64Reg regOp, const OpArg& arg);
void RSQRTPS(X64Reg regOp, const OpArg& arg);
// SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double)
void ANDPS(X64Reg regOp, OpArg arg);
void ANDPD(X64Reg regOp, OpArg arg);
void ANDNPS(X64Reg regOp, OpArg arg);
void ANDNPD(X64Reg regOp, OpArg arg);
void ORPS(X64Reg regOp, OpArg arg);
void ORPD(X64Reg regOp, OpArg arg);
void XORPS(X64Reg regOp, OpArg arg);
void XORPD(X64Reg regOp, OpArg arg);
void ANDPS(X64Reg regOp, const OpArg& arg);
void ANDPD(X64Reg regOp, const OpArg& arg);
void ANDNPS(X64Reg regOp, const OpArg& arg);
void ANDNPD(X64Reg regOp, const OpArg& arg);
void ORPS(X64Reg regOp, const OpArg& arg);
void ORPD(X64Reg regOp, const OpArg& arg);
void XORPS(X64Reg regOp, const OpArg& arg);
void XORPD(X64Reg regOp, const OpArg& arg);
// SSE/SSE2: Shuffle components. These are tricky - see Intel documentation.
void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle);
void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle);
void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle);
void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle);
// SSE/SSE2: Useful alternative to shuffle in some cases.
void MOVDDUP(X64Reg regOp, OpArg arg);
void MOVDDUP(X64Reg regOp, const OpArg& arg);
// TODO: Actually implement
#if 0
// SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products...
void ADDSUBPS(X64Reg dest, OpArg src);
void ADDSUBPD(X64Reg dest, OpArg src);
void HADDPD(X64Reg dest, OpArg src);
void HSUBPS(X64Reg dest, OpArg src);
void HSUBPD(X64Reg dest, OpArg src);
void ADDSUBPS(X64Reg dest, const OpArg& src);
void ADDSUBPD(X64Reg dest, const OpArg& src);
void HADDPD(X64Reg dest, const OpArg& src);
void HSUBPS(X64Reg dest, const OpArg& src);
void HSUBPD(X64Reg dest, const OpArg& src);
// SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
void DPPD(X64Reg dest, OpArg src, u8 arg);
void DPPD(X64Reg dest, const OpArg& src, u8 arg);
// These are probably useful for VFPU emulation.
void INSERTPS(X64Reg dest, OpArg src, u8 arg);
void EXTRACTPS(OpArg dest, X64Reg src, u8 arg);
void INSERTPS(X64Reg dest, const OpArg& src, u8 arg);
void EXTRACTPS(const OpArg& dest, X64Reg src, u8 arg);
#endif
// SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy.
void HADDPS(X64Reg dest, OpArg src);
void HADDPS(X64Reg dest, const OpArg& src);
// SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
void DPPS(X64Reg dest, OpArg src, u8 arg);
void DPPS(X64Reg dest, const OpArg& src, u8 arg);
void UNPCKLPS(X64Reg dest, OpArg src);
void UNPCKHPS(X64Reg dest, OpArg src);
void UNPCKLPD(X64Reg dest, OpArg src);
void UNPCKHPD(X64Reg dest, OpArg src);
void UNPCKLPS(X64Reg dest, const OpArg& src);
void UNPCKHPS(X64Reg dest, const OpArg& src);
void UNPCKLPD(X64Reg dest, const OpArg& src);
void UNPCKHPD(X64Reg dest, const OpArg& src);
// SSE/SSE2: Compares.
void COMISS(X64Reg regOp, OpArg arg);
void COMISD(X64Reg regOp, OpArg arg);
void UCOMISS(X64Reg regOp, OpArg arg);
void UCOMISD(X64Reg regOp, OpArg arg);
void COMISS(X64Reg regOp, const OpArg& arg);
void COMISD(X64Reg regOp, const OpArg& arg);
void UCOMISS(X64Reg regOp, const OpArg& arg);
void UCOMISD(X64Reg regOp, const OpArg& arg);
// SSE/SSE2: Moves. Use the right data type for your data, in most cases.
void MOVAPS(X64Reg regOp, OpArg arg);
void MOVAPD(X64Reg regOp, OpArg arg);
void MOVAPS(OpArg arg, X64Reg regOp);
void MOVAPD(OpArg arg, X64Reg regOp);
void MOVAPS(X64Reg regOp, const OpArg& arg);
void MOVAPD(X64Reg regOp, const OpArg& arg);
void MOVAPS(const OpArg& arg, X64Reg regOp);
void MOVAPD(const OpArg& arg, X64Reg regOp);
void MOVUPS(X64Reg regOp, OpArg arg);
void MOVUPD(X64Reg regOp, OpArg arg);
void MOVUPS(OpArg arg, X64Reg regOp);
void MOVUPD(OpArg arg, X64Reg regOp);
void MOVUPS(X64Reg regOp, const OpArg& arg);
void MOVUPD(X64Reg regOp, const OpArg& arg);
void MOVUPS(const OpArg& arg, X64Reg regOp);
void MOVUPD(const OpArg& arg, X64Reg regOp);
void MOVDQA(X64Reg regOp, OpArg arg);
void MOVDQA(OpArg arg, X64Reg regOp);
void MOVDQU(X64Reg regOp, OpArg arg);
void MOVDQU(OpArg arg, X64Reg regOp);
void MOVDQA(X64Reg regOp, const OpArg& arg);
void MOVDQA(const OpArg& arg, X64Reg regOp);
void MOVDQU(X64Reg regOp, const OpArg& arg);
void MOVDQU(const OpArg& arg, X64Reg regOp);
void MOVSS(X64Reg regOp, OpArg arg);
void MOVSD(X64Reg regOp, OpArg arg);
void MOVSS(OpArg arg, X64Reg regOp);
void MOVSD(OpArg arg, X64Reg regOp);
void MOVSS(X64Reg regOp, const OpArg& arg);
void MOVSD(X64Reg regOp, const OpArg& arg);
void MOVSS(const OpArg& arg, X64Reg regOp);
void MOVSD(const OpArg& arg, X64Reg regOp);
void MOVLPS(X64Reg regOp, OpArg arg);
void MOVLPD(X64Reg regOp, OpArg arg);
void MOVLPS(OpArg arg, X64Reg regOp);
void MOVLPD(OpArg arg, X64Reg regOp);
void MOVLPS(X64Reg regOp, const OpArg& arg);
void MOVLPD(X64Reg regOp, const OpArg& arg);
void MOVLPS(const OpArg& arg, X64Reg regOp);
void MOVLPD(const OpArg& arg, X64Reg regOp);
void MOVHPS(X64Reg regOp, OpArg arg);
void MOVHPD(X64Reg regOp, OpArg arg);
void MOVHPS(OpArg arg, X64Reg regOp);
void MOVHPD(OpArg arg, X64Reg regOp);
void MOVHPS(X64Reg regOp, const OpArg& arg);
void MOVHPD(X64Reg regOp, const OpArg& arg);
void MOVHPS(const OpArg& arg, X64Reg regOp);
void MOVHPD(const OpArg& arg, X64Reg regOp);
void MOVHLPS(X64Reg regOp1, X64Reg regOp2);
void MOVLHPS(X64Reg regOp1, X64Reg regOp2);
void MOVD_xmm(X64Reg dest, const OpArg &arg);
void MOVD_xmm(X64Reg dest, const OpArg& arg);
void MOVQ_xmm(X64Reg dest, OpArg arg);
void MOVD_xmm(const OpArg &arg, X64Reg src);
void MOVD_xmm(const OpArg& arg, X64Reg src);
void MOVQ_xmm(OpArg arg, X64Reg src);
// SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question.
void MOVMSKPS(X64Reg dest, OpArg arg);
void MOVMSKPD(X64Reg dest, OpArg arg);
void MOVMSKPS(X64Reg dest, const OpArg& arg);
void MOVMSKPD(X64Reg dest, const OpArg& arg);
// SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one.
void MASKMOVDQU(X64Reg dest, X64Reg src);
void LDDQU(X64Reg dest, OpArg src);
void LDDQU(X64Reg dest, const OpArg& src);
// SSE/SSE2: Data type conversions.
void CVTPS2PD(X64Reg dest, OpArg src);
void CVTPD2PS(X64Reg dest, OpArg src);
void CVTSS2SD(X64Reg dest, OpArg src);
void CVTSI2SS(X64Reg dest, OpArg src);
void CVTSD2SS(X64Reg dest, OpArg src);
void CVTSI2SD(X64Reg dest, OpArg src);
void CVTDQ2PD(X64Reg regOp, OpArg arg);
void CVTPD2DQ(X64Reg regOp, OpArg arg);
void CVTDQ2PS(X64Reg regOp, OpArg arg);
void CVTPS2DQ(X64Reg regOp, OpArg arg);
void CVTPS2PD(X64Reg dest, const OpArg& src);
void CVTPD2PS(X64Reg dest, const OpArg& src);
void CVTSS2SD(X64Reg dest, const OpArg& src);
void CVTSI2SS(X64Reg dest, const OpArg& src);
void CVTSD2SS(X64Reg dest, const OpArg& src);
void CVTSI2SD(X64Reg dest, const OpArg& src);
void CVTDQ2PD(X64Reg regOp, const OpArg& arg);
void CVTPD2DQ(X64Reg regOp, const OpArg& arg);
void CVTDQ2PS(X64Reg regOp, const OpArg& arg);
void CVTPS2DQ(X64Reg regOp, const OpArg& arg);
void CVTTPS2DQ(X64Reg regOp, OpArg arg);
void CVTTPD2DQ(X64Reg regOp, OpArg arg);
void CVTTPS2DQ(X64Reg regOp, const OpArg& arg);
void CVTTPD2DQ(X64Reg regOp, const OpArg& arg);
// Destinations are X64 regs (rax, rbx, ...) for these instructions.
void CVTSS2SI(X64Reg xregdest, OpArg src);
void CVTSD2SI(X64Reg xregdest, OpArg src);
void CVTTSS2SI(X64Reg xregdest, OpArg arg);
void CVTTSD2SI(X64Reg xregdest, OpArg arg);
void CVTSS2SI(X64Reg xregdest, const OpArg& src);
void CVTSD2SI(X64Reg xregdest, const OpArg& src);
void CVTTSS2SI(X64Reg xregdest, const OpArg& arg);
void CVTTSD2SI(X64Reg xregdest, const OpArg& arg);
// SSE2: Packed integer instructions
void PACKSSDW(X64Reg dest, OpArg arg);
void PACKSSWB(X64Reg dest, OpArg arg);
void PACKUSDW(X64Reg dest, OpArg arg);
void PACKUSWB(X64Reg dest, OpArg arg);
void PACKSSDW(X64Reg dest, const OpArg& arg);
void PACKSSWB(X64Reg dest, const OpArg& arg);
void PACKUSDW(X64Reg dest, const OpArg& arg);
void PACKUSWB(X64Reg dest, const OpArg& arg);
void PUNPCKLBW(X64Reg dest, const OpArg &arg);
void PUNPCKLWD(X64Reg dest, const OpArg &arg);
void PUNPCKLDQ(X64Reg dest, const OpArg &arg);
void PUNPCKLQDQ(X64Reg dest, const OpArg &arg);
void PTEST(X64Reg dest, OpArg arg);
void PAND(X64Reg dest, OpArg arg);
void PANDN(X64Reg dest, OpArg arg);
void PXOR(X64Reg dest, OpArg arg);
void POR(X64Reg dest, OpArg arg);
void PTEST(X64Reg dest, const OpArg& arg);
void PAND(X64Reg dest, const OpArg& arg);
void PANDN(X64Reg dest, const OpArg& arg);
void PXOR(X64Reg dest, const OpArg& arg);
void POR(X64Reg dest, const OpArg& arg);
void PADDB(X64Reg dest, OpArg arg);
void PADDW(X64Reg dest, OpArg arg);
void PADDD(X64Reg dest, OpArg arg);
void PADDQ(X64Reg dest, OpArg arg);
void PADDB(X64Reg dest, const OpArg& arg);
void PADDW(X64Reg dest, const OpArg& arg);
void PADDD(X64Reg dest, const OpArg& arg);
void PADDQ(X64Reg dest, const OpArg& arg);
void PADDSB(X64Reg dest, OpArg arg);
void PADDSW(X64Reg dest, OpArg arg);
void PADDUSB(X64Reg dest, OpArg arg);
void PADDUSW(X64Reg dest, OpArg arg);
void PADDSB(X64Reg dest, const OpArg& arg);
void PADDSW(X64Reg dest, const OpArg& arg);
void PADDUSB(X64Reg dest, const OpArg& arg);
void PADDUSW(X64Reg dest, const OpArg& arg);
void PSUBB(X64Reg dest, OpArg arg);
void PSUBW(X64Reg dest, OpArg arg);
void PSUBD(X64Reg dest, OpArg arg);
void PSUBQ(X64Reg dest, OpArg arg);
void PSUBB(X64Reg dest, const OpArg& arg);
void PSUBW(X64Reg dest, const OpArg& arg);
void PSUBD(X64Reg dest, const OpArg& arg);
void PSUBQ(X64Reg dest, const OpArg& arg);
void PSUBSB(X64Reg dest, OpArg arg);
void PSUBSW(X64Reg dest, OpArg arg);
void PSUBUSB(X64Reg dest, OpArg arg);
void PSUBUSW(X64Reg dest, OpArg arg);
void PSUBSB(X64Reg dest, const OpArg& arg);
void PSUBSW(X64Reg dest, const OpArg& arg);
void PSUBUSB(X64Reg dest, const OpArg& arg);
void PSUBUSW(X64Reg dest, const OpArg& arg);
void PAVGB(X64Reg dest, OpArg arg);
void PAVGW(X64Reg dest, OpArg arg);
void PAVGB(X64Reg dest, const OpArg& arg);
void PAVGW(X64Reg dest, const OpArg& arg);
void PCMPEQB(X64Reg dest, OpArg arg);
void PCMPEQW(X64Reg dest, OpArg arg);
void PCMPEQD(X64Reg dest, OpArg arg);
void PCMPEQB(X64Reg dest, const OpArg& arg);
void PCMPEQW(X64Reg dest, const OpArg& arg);
void PCMPEQD(X64Reg dest, const OpArg& arg);
void PCMPGTB(X64Reg dest, OpArg arg);
void PCMPGTW(X64Reg dest, OpArg arg);
void PCMPGTD(X64Reg dest, OpArg arg);
void PCMPGTB(X64Reg dest, const OpArg& arg);
void PCMPGTW(X64Reg dest, const OpArg& arg);
void PCMPGTD(X64Reg dest, const OpArg& arg);
void PEXTRW(X64Reg dest, OpArg arg, u8 subreg);
void PINSRW(X64Reg dest, OpArg arg, u8 subreg);
void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg);
void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg);
void PMADDWD(X64Reg dest, OpArg arg);
void PSADBW(X64Reg dest, OpArg arg);
void PMADDWD(X64Reg dest, const OpArg& arg);
void PSADBW(X64Reg dest, const OpArg& arg);
void PMAXSW(X64Reg dest, OpArg arg);
void PMAXUB(X64Reg dest, OpArg arg);
void PMINSW(X64Reg dest, OpArg arg);
void PMINUB(X64Reg dest, OpArg arg);
void PMAXSW(X64Reg dest, const OpArg& arg);
void PMAXUB(X64Reg dest, const OpArg& arg);
void PMINSW(X64Reg dest, const OpArg& arg);
void PMINUB(X64Reg dest, const OpArg& arg);
// SSE4: More MAX/MIN instructions.
void PMINSB(X64Reg dest, OpArg arg);
void PMINSD(X64Reg dest, OpArg arg);
void PMINUW(X64Reg dest, OpArg arg);
void PMINUD(X64Reg dest, OpArg arg);
void PMAXSB(X64Reg dest, OpArg arg);
void PMAXSD(X64Reg dest, OpArg arg);
void PMAXUW(X64Reg dest, OpArg arg);
void PMAXUD(X64Reg dest, OpArg arg);
void PMINSB(X64Reg dest, const OpArg& arg);
void PMINSD(X64Reg dest, const OpArg& arg);
void PMINUW(X64Reg dest, const OpArg& arg);
void PMINUD(X64Reg dest, const OpArg& arg);
void PMAXSB(X64Reg dest, const OpArg& arg);
void PMAXSD(X64Reg dest, const OpArg& arg);
void PMAXUW(X64Reg dest, const OpArg& arg);
void PMAXUD(X64Reg dest, const OpArg& arg);
void PMOVMSKB(X64Reg dest, OpArg arg);
void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle);
void PSHUFB(X64Reg dest, OpArg arg);
void PMOVMSKB(X64Reg dest, const OpArg& arg);
void PSHUFD(X64Reg dest, const OpArg& arg, u8 shuffle);
void PSHUFB(X64Reg dest, const OpArg& arg);
void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle);
void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle);
void PSHUFLW(X64Reg dest, const OpArg& arg, u8 shuffle);
void PSHUFHW(X64Reg dest, const OpArg& arg, u8 shuffle);
void PSRLW(X64Reg reg, int shift);
void PSRLD(X64Reg reg, int shift);
void PSRLQ(X64Reg reg, int shift);
void PSRLQ(X64Reg reg, OpArg arg);
void PSRLQ(X64Reg reg, const OpArg& arg);
void PSRLDQ(X64Reg reg, int shift);
void PSLLW(X64Reg reg, int shift);
@ -833,198 +833,198 @@ public:
void PSRAD(X64Reg reg, int shift);
// SSE4: data type conversions
void PMOVSXBW(X64Reg dest, OpArg arg);
void PMOVSXBD(X64Reg dest, OpArg arg);
void PMOVSXBQ(X64Reg dest, OpArg arg);
void PMOVSXWD(X64Reg dest, OpArg arg);
void PMOVSXWQ(X64Reg dest, OpArg arg);
void PMOVSXDQ(X64Reg dest, OpArg arg);
void PMOVZXBW(X64Reg dest, OpArg arg);
void PMOVZXBD(X64Reg dest, OpArg arg);
void PMOVZXBQ(X64Reg dest, OpArg arg);
void PMOVZXWD(X64Reg dest, OpArg arg);
void PMOVZXWQ(X64Reg dest, OpArg arg);
void PMOVZXDQ(X64Reg dest, OpArg arg);
void PMOVSXBW(X64Reg dest, const OpArg& arg);
void PMOVSXBD(X64Reg dest, const OpArg& arg);
void PMOVSXBQ(X64Reg dest, const OpArg& arg);
void PMOVSXWD(X64Reg dest, const OpArg& arg);
void PMOVSXWQ(X64Reg dest, const OpArg& arg);
void PMOVSXDQ(X64Reg dest, const OpArg& arg);
void PMOVZXBW(X64Reg dest, const OpArg& arg);
void PMOVZXBD(X64Reg dest, const OpArg& arg);
void PMOVZXBQ(X64Reg dest, const OpArg& arg);
void PMOVZXWD(X64Reg dest, const OpArg& arg);
void PMOVZXWQ(X64Reg dest, const OpArg& arg);
void PMOVZXDQ(X64Reg dest, const OpArg& arg);
// SSE4: variable blend instructions (xmm0 implicit argument)
void PBLENDVB(X64Reg dest, OpArg arg);
void BLENDVPS(X64Reg dest, OpArg arg);
void BLENDVPD(X64Reg dest, OpArg arg);
void PBLENDVB(X64Reg dest, const OpArg& arg);
void BLENDVPS(X64Reg dest, const OpArg& arg);
void BLENDVPD(X64Reg dest, const OpArg& arg);
void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend);
void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend);
// SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.)
void ROUNDSS(X64Reg dest, OpArg arg, u8 mode);
void ROUNDSD(X64Reg dest, OpArg arg, u8 mode);
void ROUNDPS(X64Reg dest, OpArg arg, u8 mode);
void ROUNDPD(X64Reg dest, OpArg arg, u8 mode);
void ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode);
void ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode);
void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode);
void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode);
void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
// AVX
void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle);
void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle);
void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
// FMA3
void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
// VEX GPR instructions
void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate);
void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void BLSR(int bits, X64Reg regOp, OpArg arg);
void BLSMSK(int bits, X64Reg regOp, OpArg arg);
void BLSI(int bits, X64Reg regOp, OpArg arg);
void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
void SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
void RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate);
void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
void BLSR(int bits, X64Reg regOp, const OpArg& arg);
void BLSMSK(int bits, X64Reg regOp, const OpArg& arg);
void BLSI(int bits, X64Reg regOp, const OpArg& arg);
void BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void RDTSC();
// Utility functions
// The difference between this and CALL is that this aligns the stack
// where appropriate.
void ABI_CallFunction(const void *func);
void ABI_CallFunction(const void* func);
template <typename T>
void ABI_CallFunction(T (*func)()) {
ABI_CallFunction((const void *)func);
ABI_CallFunction((const void*)func);
}
void ABI_CallFunction(const u8 *func) {
ABI_CallFunction((const void *)func);
void ABI_CallFunction(const u8* func) {
ABI_CallFunction((const void*)func);
}
void ABI_CallFunctionC16(const void *func, u16 param1);
void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2);
void ABI_CallFunctionC16(const void* func, u16 param1);
void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2);
// These only support u32 parameters, but that's enough for a lot of uses.
// These will destroy the 1 or 2 first "parameter regs".
void ABI_CallFunctionC(const void *func, u32 param1);
void ABI_CallFunctionCC(const void *func, u32 param1, u32 param2);
void ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3);
void ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3);
void ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4);
void ABI_CallFunctionP(const void *func, void *param1);
void ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2);
void ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3);
void ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3);
void ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2);
void ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3);
void ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1);
void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2);
void ABI_CallFunctionC(const void* func, u32 param1);
void ABI_CallFunctionCC(const void* func, u32 param1, u32 param2);
void ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3);
void ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3);
void ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, void* param4);
void ABI_CallFunctionP(const void* func, void* param1);
void ABI_CallFunctionPA(const void* func, void* param1, const OpArg& arg2);
void ABI_CallFunctionPAA(const void* func, void* param1, const OpArg& arg2, const OpArg& arg3);
void ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3);
void ABI_CallFunctionAC(const void* func, const OpArg& arg1, u32 param2);
void ABI_CallFunctionACC(const void* func, const OpArg& arg1, u32 param2, u32 param3);
void ABI_CallFunctionA(const void* func, const OpArg& arg1);
void ABI_CallFunctionAA(const void* func, const OpArg& arg1, const OpArg& arg2);
// Pass a register as a parameter.
void ABI_CallFunctionR(const void *func, X64Reg reg1);
void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2);
void ABI_CallFunctionR(const void* func, X64Reg reg1);
void ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2);
template <typename Tr, typename T1>
void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) {
ABI_CallFunctionC((const void *)func, param1);
ABI_CallFunctionC((const void*)func, param1);
}
// A function that doesn't have any control over what it will do to regs,