Sources: Run clang-format on everything.

This commit is contained in:
Emmanuel Gil Peyrot 2016-09-18 09:38:01 +09:00
parent fe948af095
commit dc8479928c
386 changed files with 19560 additions and 18080 deletions

View file

@ -22,7 +22,8 @@ using namespace Gen;
// Shared code between Win64 and Unix64
void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) {
void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size,
size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) {
size_t shadow = 0;
#if defined(_WIN32)
shadow = 0x20;
@ -49,17 +50,19 @@ void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_
*xmm_offsetp = subtraction - xmm_base_subtraction;
}
size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
size_t needed_frame_size) {
size_t shadow, subtraction, xmm_offset;
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction,
&xmm_offset);
for (int r : mask & ABI_ALL_GPRS)
for (int r : mask& ABI_ALL_GPRS)
PUSH((X64Reg)r);
if (subtraction)
SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
for (int x : mask & ABI_ALL_FPRS) {
for (int x : mask& ABI_ALL_FPRS) {
MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16));
xmm_offset += 16;
}
@ -67,12 +70,14 @@ size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_align
return shadow;
}
void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
size_t needed_frame_size) {
size_t shadow, subtraction, xmm_offset;
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction,
&xmm_offset);
for (int x : mask & ABI_ALL_FPRS) {
MOVAPD((X64Reg) (x - 16), MDisp(RSP, (int)xmm_offset));
for (int x : mask& ABI_ALL_FPRS) {
MOVAPD((X64Reg)(x - 16), MDisp(RSP, (int)xmm_offset));
xmm_offset += 16;
}
@ -86,10 +91,9 @@ void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignmen
}
// Common functions
void XEmitter::ABI_CallFunction(const void *func) {
void XEmitter::ABI_CallFunction(const void* func) {
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -98,11 +102,10 @@ void XEmitter::ABI_CallFunction(const void *func) {
}
}
void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
void XEmitter::ABI_CallFunctionC16(const void* func, u16 param1) {
MOV(32, R(ABI_PARAM1), Imm32((u32)param1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -111,25 +114,11 @@ void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
}
}
void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) {
void XEmitter::ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32((u32)param2));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -138,12 +127,23 @@ void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
}
}
void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
void XEmitter::ABI_CallFunctionC(const void* func, u32 param1) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionCC(const void* func, u32 param1, u32 param2) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -152,13 +152,12 @@ void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
}
}
void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) {
void XEmitter::ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(32, R(ABI_PARAM3), Imm32(param3));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -167,13 +166,12 @@ void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32
}
}
void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) {
void XEmitter::ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(64, R(ABI_PARAM3), ImmPtr(param3));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -182,14 +180,14 @@ void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, voi
}
}
void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4) {
void XEmitter::ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3,
void* param4) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(32, R(ABI_PARAM3), Imm32(param3));
MOV(64, R(ABI_PARAM4), ImmPtr(param4));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -198,11 +196,10 @@ void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u3
}
}
void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
void XEmitter::ABI_CallFunctionP(const void* func, void* param1) {
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -211,13 +208,12 @@ void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
}
}
void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) {
void XEmitter::ABI_CallFunctionPA(const void* func, void* param1, const Gen::OpArg& arg2) {
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
if (!arg2.IsSimpleReg(ABI_PARAM2))
MOV(32, R(ABI_PARAM2), arg2);
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -226,15 +222,15 @@ void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpA
}
}
void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) {
void XEmitter::ABI_CallFunctionPAA(const void* func, void* param1, const Gen::OpArg& arg2,
const Gen::OpArg& arg3) {
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
if (!arg2.IsSimpleReg(ABI_PARAM2))
MOV(32, R(ABI_PARAM2), arg2);
if (!arg3.IsSimpleReg(ABI_PARAM3))
MOV(32, R(ABI_PARAM3), arg3);
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -243,13 +239,12 @@ void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::Op
}
}
void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) {
void XEmitter::ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3) {
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
MOV(64, R(ABI_PARAM2), ImmPtr(param2));
MOV(32, R(ABI_PARAM3), Imm32(param3));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -259,12 +254,11 @@ void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2,
}
// Pass a register as a parameter.
void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
void XEmitter::ABI_CallFunctionR(const void* func, X64Reg reg1) {
if (reg1 != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(reg1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -274,7 +268,7 @@ void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
}
// Pass two registers as parameters.
void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) {
void XEmitter::ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2) {
if (reg2 != ABI_PARAM1) {
if (reg1 != ABI_PARAM1)
MOV(64, R(ABI_PARAM1), R(reg1));
@ -287,8 +281,7 @@ void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) {
MOV(64, R(ABI_PARAM1), R(reg1));
}
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -297,14 +290,12 @@ void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) {
}
}
void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2)
{
void XEmitter::ABI_CallFunctionAC(const void* func, const Gen::OpArg& arg1, u32 param2) {
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(32, R(ABI_PARAM1), arg1);
MOV(32, R(ABI_PARAM2), Imm32(param2));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -313,15 +304,14 @@ void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32
}
}
void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3)
{
void XEmitter::ABI_CallFunctionACC(const void* func, const Gen::OpArg& arg1, u32 param2,
u32 param3) {
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(32, R(ABI_PARAM1), arg1);
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(64, R(ABI_PARAM3), Imm64(param3));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -330,13 +320,11 @@ void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32
}
}
void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1)
{
void XEmitter::ABI_CallFunctionA(const void* func, const Gen::OpArg& arg1) {
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(32, R(ABI_PARAM1), arg1);
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
@ -345,15 +333,14 @@ void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1)
}
}
void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2)
{
void XEmitter::ABI_CallFunctionAA(const void* func, const Gen::OpArg& arg1,
const Gen::OpArg& arg2) {
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(32, R(ABI_PARAM1), arg1);
if (!arg2.IsSimpleReg(ABI_PARAM2))
MOV(32, R(ABI_PARAM2), arg2);
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));

View file

@ -12,7 +12,8 @@
// Windows 64-bit
// * 4-reg "fastcall" variant, very new-skool stack handling
// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_
// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself
// calls_
// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space.
// Scratch: RAX RCX RDX R8 R9 R10 R11
// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15
@ -35,10 +36,10 @@
#define ABI_PARAM4 R9
// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
#define ABI_ALL_CALLER_SAVED \
(BitSet32 { RAX, RCX, RDX, R8, R9, R10, R11, \
XMM0+16, XMM1+16, XMM2+16, XMM3+16, XMM4+16, XMM5+16 })
#else //64-bit Unix / OS X
#define ABI_ALL_CALLER_SAVED \
(BitSet32{RAX, RCX, RDX, R8, R9, R10, R11, XMM0 + 16, XMM1 + 16, XMM2 + 16, XMM3 + 16, \
XMM4 + 16, XMM5 + 16})
#else // 64-bit Unix / OS X
#define ABI_PARAM1 RDI
#define ABI_PARAM2 RSI
@ -49,9 +50,7 @@
// TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably
// don't actually clobber them.
#define ABI_ALL_CALLER_SAVED \
(BitSet32 { RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 } | \
ABI_ALL_FPRS)
#define ABI_ALL_CALLER_SAVED (BitSet32{RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11} | ABI_ALL_FPRS)
#endif // WIN32
#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)

View file

@ -15,8 +15,8 @@ namespace Common {
#ifndef _MSC_VER
#ifdef __FreeBSD__
#include <sys/types.h>
#include <machine/cpufunc.h>
#include <sys/types.h>
#endif
static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
@ -26,15 +26,9 @@ static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
#else
info[0] = function_id; // eax
info[2] = subfunction_id; // ecx
__asm__(
"cpuid"
: "=a" (info[0]),
"=b" (info[1]),
"=c" (info[2]),
"=d" (info[3])
: "a" (function_id),
"c" (subfunction_id)
);
__asm__("cpuid"
: "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
: "a"(function_id), "c"(subfunction_id));
#endif
}
@ -88,14 +82,22 @@ static CPUCaps Detect() {
if (max_std_fn >= 1) {
__cpuid(cpu_id, 0x00000001);
if ((cpu_id[3] >> 25) & 1) caps.sse = true;
if ((cpu_id[3] >> 26) & 1) caps.sse2 = true;
if ((cpu_id[2]) & 1) caps.sse3 = true;
if ((cpu_id[2] >> 9) & 1) caps.ssse3 = true;
if ((cpu_id[2] >> 19) & 1) caps.sse4_1 = true;
if ((cpu_id[2] >> 20) & 1) caps.sse4_2 = true;
if ((cpu_id[2] >> 22) & 1) caps.movbe = true;
if ((cpu_id[2] >> 25) & 1) caps.aes = true;
if ((cpu_id[3] >> 25) & 1)
caps.sse = true;
if ((cpu_id[3] >> 26) & 1)
caps.sse2 = true;
if ((cpu_id[2]) & 1)
caps.sse3 = true;
if ((cpu_id[2] >> 9) & 1)
caps.ssse3 = true;
if ((cpu_id[2] >> 19) & 1)
caps.sse4_1 = true;
if ((cpu_id[2] >> 20) & 1)
caps.sse4_2 = true;
if ((cpu_id[2] >> 22) & 1)
caps.movbe = true;
if ((cpu_id[2] >> 25) & 1)
caps.aes = true;
if ((cpu_id[3] >> 24) & 1) {
caps.fxsave_fxrstor = true;
@ -140,10 +142,14 @@ static CPUCaps Detect() {
if (max_ex_fn >= 0x80000001) {
// Check for more features
__cpuid(cpu_id, 0x80000001);
if (cpu_id[2] & 1) caps.lahf_sahf_64 = true;
if ((cpu_id[2] >> 5) & 1) caps.lzcnt = true;
if ((cpu_id[2] >> 16) & 1) caps.fma4 = true;
if ((cpu_id[3] >> 29) & 1) caps.long_mode = true;
if (cpu_id[2] & 1)
caps.lahf_sahf_64 = true;
if ((cpu_id[2] >> 5) & 1)
caps.lzcnt = true;
if ((cpu_id[2] >> 16) & 1)
caps.fma4 = true;
if ((cpu_id[3] >> 29) & 1)
caps.long_mode = true;
}
return caps;
@ -162,24 +168,38 @@ std::string GetCPUCapsString() {
sum += caps.brand_string;
sum += ")";
if (caps.sse) sum += ", SSE";
if (caps.sse)
sum += ", SSE";
if (caps.sse2) {
sum += ", SSE2";
if (!caps.flush_to_zero) sum += " (without DAZ)";
if (!caps.flush_to_zero)
sum += " (without DAZ)";
}
if (caps.sse3) sum += ", SSE3";
if (caps.ssse3) sum += ", SSSE3";
if (caps.sse4_1) sum += ", SSE4.1";
if (caps.sse4_2) sum += ", SSE4.2";
if (caps.avx) sum += ", AVX";
if (caps.avx2) sum += ", AVX2";
if (caps.bmi1) sum += ", BMI1";
if (caps.bmi2) sum += ", BMI2";
if (caps.fma) sum += ", FMA";
if (caps.aes) sum += ", AES";
if (caps.movbe) sum += ", MOVBE";
if (caps.long_mode) sum += ", 64-bit support";
if (caps.sse3)
sum += ", SSE3";
if (caps.ssse3)
sum += ", SSSE3";
if (caps.sse4_1)
sum += ", SSE4.1";
if (caps.sse4_2)
sum += ", SSE4.2";
if (caps.avx)
sum += ", AVX";
if (caps.avx2)
sum += ", AVX2";
if (caps.bmi1)
sum += ", BMI1";
if (caps.bmi2)
sum += ", BMI2";
if (caps.fma)
sum += ", FMA";
if (caps.aes)
sum += ", AES";
if (caps.movbe)
sum += ", MOVBE";
if (caps.long_mode)
sum += ", 64-bit support";
return sum;
}

File diff suppressed because it is too large Load diff

View file

@ -21,8 +21,8 @@
#include "common/assert.h"
#include "common/bit_set.h"
#include "common/common_types.h"
#include "common/code_block.h"
#include "common/common_types.h"
#if defined(ARCHITECTURE_x86_64) && !defined(_ARCH_64)
#define _ARCH_64
@ -34,75 +34,145 @@
#define PTRBITS 32
#endif
namespace Gen
{
namespace Gen {
enum X64Reg
{
EAX = 0, EBX = 3, ECX = 1, EDX = 2,
ESI = 6, EDI = 7, EBP = 5, ESP = 4,
enum X64Reg {
EAX = 0,
EBX = 3,
ECX = 1,
EDX = 2,
ESI = 6,
EDI = 7,
EBP = 5,
ESP = 4,
RAX = 0, RBX = 3, RCX = 1, RDX = 2,
RSI = 6, RDI = 7, RBP = 5, RSP = 4,
R8 = 8, R9 = 9, R10 = 10,R11 = 11,
R12 = 12,R13 = 13,R14 = 14,R15 = 15,
RAX = 0,
RBX = 3,
RCX = 1,
RDX = 2,
RSI = 6,
RDI = 7,
RBP = 5,
RSP = 4,
R8 = 8,
R9 = 9,
R10 = 10,
R11 = 11,
R12 = 12,
R13 = 13,
R14 = 14,
R15 = 15,
AL = 0, BL = 3, CL = 1, DL = 2,
SIL = 6, DIL = 7, BPL = 5, SPL = 4,
AH = 0x104, BH = 0x107, CH = 0x105, DH = 0x106,
AL = 0,
BL = 3,
CL = 1,
DL = 2,
SIL = 6,
DIL = 7,
BPL = 5,
SPL = 4,
AH = 0x104,
BH = 0x107,
CH = 0x105,
DH = 0x106,
AX = 0, BX = 3, CX = 1, DX = 2,
SI = 6, DI = 7, BP = 5, SP = 4,
AX = 0,
BX = 3,
CX = 1,
DX = 2,
SI = 6,
DI = 7,
BP = 5,
SP = 4,
XMM0=0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
XMM0 = 0,
XMM1,
XMM2,
XMM3,
XMM4,
XMM5,
XMM6,
XMM7,
XMM8,
XMM9,
XMM10,
XMM11,
XMM12,
XMM13,
XMM14,
XMM15,
YMM0=0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15,
YMM0 = 0,
YMM1,
YMM2,
YMM3,
YMM4,
YMM5,
YMM6,
YMM7,
YMM8,
YMM9,
YMM10,
YMM11,
YMM12,
YMM13,
YMM14,
YMM15,
INVALID_REG = 0xFFFFFFFF
};
enum CCFlags
{
CC_O = 0,
CC_NO = 1,
CC_B = 2, CC_C = 2, CC_NAE = 2,
CC_NB = 3, CC_NC = 3, CC_AE = 3,
CC_Z = 4, CC_E = 4,
CC_NZ = 5, CC_NE = 5,
CC_BE = 6, CC_NA = 6,
CC_NBE = 7, CC_A = 7,
CC_S = 8,
CC_NS = 9,
CC_P = 0xA, CC_PE = 0xA,
CC_NP = 0xB, CC_PO = 0xB,
CC_L = 0xC, CC_NGE = 0xC,
CC_NL = 0xD, CC_GE = 0xD,
CC_LE = 0xE, CC_NG = 0xE,
CC_NLE = 0xF, CC_G = 0xF
enum CCFlags {
CC_O = 0,
CC_NO = 1,
CC_B = 2,
CC_C = 2,
CC_NAE = 2,
CC_NB = 3,
CC_NC = 3,
CC_AE = 3,
CC_Z = 4,
CC_E = 4,
CC_NZ = 5,
CC_NE = 5,
CC_BE = 6,
CC_NA = 6,
CC_NBE = 7,
CC_A = 7,
CC_S = 8,
CC_NS = 9,
CC_P = 0xA,
CC_PE = 0xA,
CC_NP = 0xB,
CC_PO = 0xB,
CC_L = 0xC,
CC_NGE = 0xC,
CC_NL = 0xD,
CC_GE = 0xD,
CC_LE = 0xE,
CC_NG = 0xE,
CC_NLE = 0xF,
CC_G = 0xF
};
enum
{
enum {
NUMGPRs = 16,
NUMXMMs = 16,
};
enum
{
enum {
SCALE_NONE = 0,
SCALE_1 = 1,
SCALE_2 = 2,
SCALE_4 = 4,
SCALE_8 = 8,
SCALE_ATREG = 16,
//SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG
// SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG
SCALE_NOBASE_2 = 34,
SCALE_NOBASE_4 = 36,
SCALE_NOBASE_8 = 40,
SCALE_RIP = 0xFF,
SCALE_IMM8 = 0xF0,
SCALE_IMM8 = 0xF0,
SCALE_IMM16 = 0xF1,
SCALE_IMM32 = 0xF2,
SCALE_IMM64 = 0xF3,
@ -114,7 +184,7 @@ enum NormalOp {
nrmSUB,
nrmSBB,
nrmAND,
nrmOR ,
nrmOR,
nrmXOR,
nrmMOV,
nrmTEST,
@ -157,68 +227,74 @@ enum FloatRound {
class XEmitter;
// RIP addressing does not benefit from micro op fusion on Core arch
struct OpArg
{
struct OpArg {
friend class XEmitter;
constexpr OpArg() = default; // dummy op arg, used for storage
constexpr OpArg() = default; // dummy op arg, used for storage
constexpr OpArg(u64 offset_, int scale_, X64Reg rmReg = RAX, X64Reg scaledReg = RAX)
: scale(static_cast<u8>(scale_))
, offsetOrBaseReg(static_cast<u16>(rmReg))
, indexReg(static_cast<u16>(scaledReg))
, offset(offset_)
{
: scale(static_cast<u8>(scale_)), offsetOrBaseReg(static_cast<u16>(rmReg)),
indexReg(static_cast<u16>(scaledReg)), offset(offset_) {
}
constexpr bool operator==(const OpArg &b) const
{
return operandReg == b.operandReg &&
scale == b.scale &&
offsetOrBaseReg == b.offsetOrBaseReg &&
indexReg == b.indexReg &&
offset == b.offset;
constexpr bool operator==(const OpArg& b) const {
return operandReg == b.operandReg && scale == b.scale &&
offsetOrBaseReg == b.offsetOrBaseReg && indexReg == b.indexReg && offset == b.offset;
}
void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const;
void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W = 0) const;
void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=INVALID_REG, bool warn_64bit_offset = true) const;
void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits);
void WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const;
void WriteRex(XEmitter* emit, int opBits, int bits, int customOp = -1) const;
void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm,
int W = 0) const;
void WriteRest(XEmitter* emit, int extraBytes = 0, X64Reg operandReg = INVALID_REG,
bool warn_64bit_offset = true) const;
void WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg operandReg, int bits);
void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand,
int bits) const;
constexpr bool IsImm() const { return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || scale == SCALE_IMM64; }
constexpr bool IsSimpleReg() const { return scale == SCALE_NONE; }
constexpr bool IsSimpleReg(X64Reg reg) const
{
constexpr bool IsImm() const {
return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 ||
scale == SCALE_IMM64;
}
constexpr bool IsSimpleReg() const {
return scale == SCALE_NONE;
}
constexpr bool IsSimpleReg(X64Reg reg) const {
return IsSimpleReg() && GetSimpleReg() == reg;
}
int GetImmBits() const
{
switch (scale)
{
case SCALE_IMM8: return 8;
case SCALE_IMM16: return 16;
case SCALE_IMM32: return 32;
case SCALE_IMM64: return 64;
default: return -1;
int GetImmBits() const {
switch (scale) {
case SCALE_IMM8:
return 8;
case SCALE_IMM16:
return 16;
case SCALE_IMM32:
return 32;
case SCALE_IMM64:
return 64;
default:
return -1;
}
}
void SetImmBits(int bits) {
switch (bits)
{
case 8: scale = SCALE_IMM8; break;
case 16: scale = SCALE_IMM16; break;
case 32: scale = SCALE_IMM32; break;
case 64: scale = SCALE_IMM64; break;
switch (bits) {
case 8:
scale = SCALE_IMM8;
break;
case 16:
scale = SCALE_IMM16;
break;
case 32:
scale = SCALE_IMM32;
break;
case 64:
scale = SCALE_IMM64;
break;
}
}
constexpr X64Reg GetSimpleReg() const
{
return scale == SCALE_NONE
? static_cast<X64Reg>(offsetOrBaseReg)
: INVALID_REG;
constexpr X64Reg GetSimpleReg() const {
return scale == SCALE_NONE ? static_cast<X64Reg>(offsetOrBaseReg) : INVALID_REG;
}
constexpr u32 GetImmValue() const {
@ -234,41 +310,50 @@ private:
u8 scale = 0;
u16 offsetOrBaseReg = 0;
u16 indexReg = 0;
u64 offset = 0; // use RIP-relative as much as possible - 64-bit immediates are not available.
u64 offset = 0; // use RIP-relative as much as possible - 64-bit immediates are not available.
u16 operandReg = 0;
};
template <typename T>
inline OpArg M(const T *ptr) { return OpArg(reinterpret_cast<u64>(ptr), static_cast<int>(SCALE_RIP)); }
constexpr OpArg R(X64Reg value) { return OpArg(0, SCALE_NONE, value); }
constexpr OpArg MatR(X64Reg value) { return OpArg(0, SCALE_ATREG, value); }
inline OpArg M(const T* ptr) {
return OpArg(reinterpret_cast<u64>(ptr), static_cast<int>(SCALE_RIP));
}
constexpr OpArg R(X64Reg value) {
return OpArg(0, SCALE_NONE, value);
}
constexpr OpArg MatR(X64Reg value) {
return OpArg(0, SCALE_ATREG, value);
}
constexpr OpArg MDisp(X64Reg value, int offset)
{
constexpr OpArg MDisp(X64Reg value, int offset) {
return OpArg(static_cast<u32>(offset), SCALE_ATREG, value);
}
constexpr OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset)
{
constexpr OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) {
return OpArg(offset, scale, base, scaled);
}
constexpr OpArg MScaled(X64Reg scaled, int scale, int offset)
{
return scale == SCALE_1
? OpArg(offset, SCALE_ATREG, scaled)
: OpArg(offset, scale | 0x20, RAX, scaled);
constexpr OpArg MScaled(X64Reg scaled, int scale, int offset) {
return scale == SCALE_1 ? OpArg(offset, SCALE_ATREG, scaled)
: OpArg(offset, scale | 0x20, RAX, scaled);
}
constexpr OpArg MRegSum(X64Reg base, X64Reg offset)
{
constexpr OpArg MRegSum(X64Reg base, X64Reg offset) {
return MComplex(base, offset, 1, 0);
}
constexpr OpArg Imm8 (u8 imm) { return OpArg(imm, SCALE_IMM8); }
constexpr OpArg Imm16(u16 imm) { return OpArg(imm, SCALE_IMM16); } //rarely used
constexpr OpArg Imm32(u32 imm) { return OpArg(imm, SCALE_IMM32); }
constexpr OpArg Imm64(u64 imm) { return OpArg(imm, SCALE_IMM64); }
constexpr OpArg Imm8(u8 imm) {
return OpArg(imm, SCALE_IMM8);
}
constexpr OpArg Imm16(u16 imm) {
return OpArg(imm, SCALE_IMM16);
} // rarely used
constexpr OpArg Imm32(u32 imm) {
return OpArg(imm, SCALE_IMM32);
}
constexpr OpArg Imm64(u64 imm) {
return OpArg(imm, SCALE_IMM64);
}
constexpr OpArg UImmAuto(u32 imm) {
return OpArg(imm, imm >= 128 ? SCALE_IMM32 : SCALE_IMM8);
}
@ -277,8 +362,7 @@ constexpr OpArg SImmAuto(s32 imm) {
}
template <typename T>
OpArg ImmPtr(const T* imm)
{
OpArg ImmPtr(const T* imm) {
#ifdef _ARCH_64
return Imm64(reinterpret_cast<u64>(imm));
#else
@ -286,36 +370,31 @@ OpArg ImmPtr(const T* imm)
#endif
}
inline u32 PtrOffset(const void* ptr, const void* base)
{
inline u32 PtrOffset(const void* ptr, const void* base) {
#ifdef _ARCH_64
s64 distance = (s64)ptr-(s64)base;
if (distance >= 0x80000000LL ||
distance < -0x80000000LL)
{
s64 distance = (s64)ptr - (s64)base;
if (distance >= 0x80000000LL || distance < -0x80000000LL) {
ASSERT_MSG(0, "pointer offset out of range");
return 0;
}
return (u32)distance;
#else
return (u32)ptr-(u32)base;
return (u32)ptr - (u32)base;
#endif
}
//usage: int a[]; ARRAY_OFFSET(a,10)
#define ARRAY_OFFSET(array,index) ((u32)((u64)&(array)[index]-(u64)&(array)[0]))
//usage: struct {int e;} s; STRUCT_OFFSET(s,e)
#define STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str)))
// usage: int a[]; ARRAY_OFFSET(a,10)
#define ARRAY_OFFSET(array, index) ((u32)((u64) & (array)[index] - (u64) & (array)[0]))
// usage: struct {int e;} s; STRUCT_OFFSET(s,e)
#define STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str)))
struct FixupBranch
{
u8 *ptr;
int type; //0 = 8bit 1 = 32bit
struct FixupBranch {
u8* ptr;
int type; // 0 = 8bit 1 = 32bit
};
enum SSECompare
{
enum SSECompare {
EQ = 0,
LT,
LE,
@ -326,11 +405,10 @@ enum SSECompare
ORD,
};
class XEmitter
{
friend struct OpArg; // for Write8 etc
class XEmitter {
friend struct OpArg; // for Write8 etc
private:
u8 *code;
u8* code;
bool flags_locked;
void CheckFlags();
@ -347,14 +425,19 @@ private:
void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
int extrabytes = 0);
void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
int extrabytes = 0);
void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
int extrabytes = 0);
void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
int extrabytes = 0);
void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg);
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
void WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size,
size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
protected:
void Write8(u8 value);
@ -363,26 +446,38 @@ protected:
void Write64(u64 value);
public:
XEmitter() { code = nullptr; flags_locked = false; }
XEmitter(u8 *code_ptr) { code = code_ptr; flags_locked = false; }
virtual ~XEmitter() {}
XEmitter() {
code = nullptr;
flags_locked = false;
}
XEmitter(u8* code_ptr) {
code = code_ptr;
flags_locked = false;
}
virtual ~XEmitter() {
}
void WriteModRM(int mod, int rm, int reg);
void WriteSIB(int scale, int index, int base);
void SetCodePtr(u8 *ptr);
void SetCodePtr(u8* ptr);
void ReserveCodeSpace(int bytes);
const u8 *AlignCode4();
const u8 *AlignCode16();
const u8 *AlignCodePage();
const u8 *GetCodePtr() const;
u8 *GetWritableCodePtr();
const u8* AlignCode4();
const u8* AlignCode16();
const u8* AlignCodePage();
const u8* GetCodePtr() const;
u8* GetWritableCodePtr();
void LockFlags() { flags_locked = true; }
void UnlockFlags() { flags_locked = false; }
void LockFlags() {
flags_locked = true;
}
void UnlockFlags() {
flags_locked = false;
}
// Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU
// INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other string instr.,
// INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other
// string instr.,
// INC and DEC are slow on Intel Core, but not on AMD. They create a
// false flag dependency because they only update a subset of the flags.
// XCHG is SLOW and should be avoided.
@ -401,11 +496,11 @@ public:
void CLC();
void CMC();
// These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and AMD!
// These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and
// AMD!
void LAHF(); // 3 cycle vector path
void SAHF(); // direct path fast
// Stack control
void PUSH(X64Reg reg);
void POP(X64Reg reg);
@ -422,7 +517,7 @@ public:
void JMP(const u8* addr, bool force5Bytes = false);
void JMPptr(const OpArg& arg);
void JMPself(); //infinite loop!
void JMPself(); // infinite loop!
#ifdef CALL
#undef CALL
#endif
@ -450,12 +545,11 @@ public:
void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit
// Cache control
enum PrefetchLevel
{
PF_NTA, //Non-temporal (data used once and only once)
PF_T0, //All cache levels
PF_T1, //Levels 2+ (aliased to T0 on AMD)
PF_T2, //Levels 3+ (aliased to T0 on AMD)
enum PrefetchLevel {
PF_NTA, // Non-temporal (data used once and only once)
PF_T0, // All cache levels
PF_T1, // Levels 2+ (aliased to T0 on AMD)
PF_T2, // Levels 3+ (aliased to T0 on AMD)
};
void PREFETCH(PrefetchLevel level, OpArg arg);
void MOVNTI(int bits, const OpArg& dest, X64Reg src);
@ -464,8 +558,8 @@ public:
void MOVNTPD(const OpArg& arg, X64Reg regOp);
// Multiplication / division
void MUL(int bits, const OpArg& src); //UNSIGNED
void IMUL(int bits, const OpArg& src); //SIGNED
void MUL(int bits, const OpArg& src); // UNSIGNED
void IMUL(int bits, const OpArg& src); // SIGNED
void IMUL(int bits, X64Reg regOp, const OpArg& src);
void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm);
void DIV(int bits, const OpArg& src);
@ -492,11 +586,19 @@ public:
// Extend EAX into EDX in various ways
void CWD(int bits = 16);
void CDQ() {CWD(32);}
void CQO() {CWD(64);}
void CDQ() {
CWD(32);
}
void CQO() {
CWD(64);
}
void CBW(int bits = 8);
void CWDE() {CBW(16);}
void CDQE() {CBW(32);}
void CWDE() {
CBW(16);
}
void CDQE() {
CBW(32);
}
// Load effective address
void LEA(int bits, X64Reg dest, OpArg src);
@ -511,7 +613,7 @@ public:
void CMP(int bits, const OpArg& a1, const OpArg& a2);
// Bit operations
void NOT (int bits, const OpArg& src);
void NOT(int bits, const OpArg& src);
void OR(int bits, const OpArg& a1, const OpArg& a2);
void XOR(int bits, const OpArg& a1, const OpArg& a2);
void MOV(int bits, const OpArg& a1, const OpArg& a2);
@ -525,7 +627,8 @@ public:
void BSWAP(int bits, X64Reg reg);
// Sign/zero extension
void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src); //automatically uses MOVSXD if necessary
void MOVSX(int dbits, int sbits, X64Reg dest,
OpArg src); // automatically uses MOVSXD if necessary
void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src);
// Available only on Atom or >= Haswell so far. Test with GetCPUCaps().movbe.
@ -593,13 +696,27 @@ public:
void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare);
void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare);
void CMPEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_EQ); }
void CMPLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LT); }
void CMPLESS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LE); }
void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_UNORD); }
void CMPNEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NEQ); }
void CMPNLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NLT); }
void CMPORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_ORD); }
void CMPEQSS(X64Reg regOp, const OpArg& arg) {
CMPSS(regOp, arg, CMP_EQ);
}
void CMPLTSS(X64Reg regOp, const OpArg& arg) {
CMPSS(regOp, arg, CMP_LT);
}
void CMPLESS(X64Reg regOp, const OpArg& arg) {
CMPSS(regOp, arg, CMP_LE);
}
void CMPUNORDSS(X64Reg regOp, const OpArg& arg) {
CMPSS(regOp, arg, CMP_UNORD);
}
void CMPNEQSS(X64Reg regOp, const OpArg& arg) {
CMPSS(regOp, arg, CMP_NEQ);
}
void CMPNLTSS(X64Reg regOp, const OpArg& arg) {
CMPSS(regOp, arg, CMP_NLT);
}
void CMPORDSS(X64Reg regOp, const OpArg& arg) {
CMPSS(regOp, arg, CMP_ORD);
}
// SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
void ADDPS(X64Reg regOp, const OpArg& arg);
@ -638,10 +755,12 @@ public:
// SSE/SSE2: Useful alternative to shuffle in some cases.
void MOVDDUP(X64Reg regOp, const OpArg& arg);
// SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy.
// SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily
// on Ivy.
void HADDPS(X64Reg dest, const OpArg& src);
// SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
// SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg
// contains both a read mask and a write "mask".
void DPPS(X64Reg dest, const OpArg& src, u8 arg);
void UNPCKLPS(X64Reg dest, const OpArg& src);
@ -694,11 +813,13 @@ public:
void MOVD_xmm(const OpArg& arg, X64Reg src);
void MOVQ_xmm(OpArg arg, X64Reg src);
// SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question.
// SSE/SSE2: Generates a mask from the high bits of the components of the packed register in
// question.
void MOVMSKPS(X64Reg dest, const OpArg& arg);
void MOVMSKPD(X64Reg dest, const OpArg& arg);
// SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one.
// SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a
// weird one.
void MASKMOVDQU(X64Reg dest, X64Reg src);
void LDDQU(X64Reg dest, const OpArg& src);
@ -729,10 +850,10 @@ public:
void PACKUSDW(X64Reg dest, const OpArg& arg);
void PACKUSWB(X64Reg dest, const OpArg& arg);
void PUNPCKLBW(X64Reg dest, const OpArg &arg);
void PUNPCKLWD(X64Reg dest, const OpArg &arg);
void PUNPCKLDQ(X64Reg dest, const OpArg &arg);
void PUNPCKLQDQ(X64Reg dest, const OpArg &arg);
void PUNPCKLBW(X64Reg dest, const OpArg& arg);
void PUNPCKLWD(X64Reg dest, const OpArg& arg);
void PUNPCKLDQ(X64Reg dest, const OpArg& arg);
void PUNPCKLQDQ(X64Reg dest, const OpArg& arg);
void PTEST(X64Reg dest, const OpArg& arg);
void PAND(X64Reg dest, const OpArg& arg);
@ -839,25 +960,57 @@ public:
void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode);
void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode);
void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
void ROUNDNEARSS(X64Reg dest, const OpArg& arg) {
ROUNDSS(dest, arg, FROUND_NEAREST);
}
void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) {
ROUNDSS(dest, arg, FROUND_FLOOR);
}
void ROUNDCEILSS(X64Reg dest, const OpArg& arg) {
ROUNDSS(dest, arg, FROUND_CEIL);
}
void ROUNDZEROSS(X64Reg dest, const OpArg& arg) {
ROUNDSS(dest, arg, FROUND_ZERO);
}
void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
void ROUNDNEARSD(X64Reg dest, const OpArg& arg) {
ROUNDSD(dest, arg, FROUND_NEAREST);
}
void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) {
ROUNDSD(dest, arg, FROUND_FLOOR);
}
void ROUNDCEILSD(X64Reg dest, const OpArg& arg) {
ROUNDSD(dest, arg, FROUND_CEIL);
}
void ROUNDZEROSD(X64Reg dest, const OpArg& arg) {
ROUNDSD(dest, arg, FROUND_ZERO);
}
void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
void ROUNDNEARPS(X64Reg dest, const OpArg& arg) {
ROUNDPS(dest, arg, FROUND_NEAREST);
}
void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) {
ROUNDPS(dest, arg, FROUND_FLOOR);
}
void ROUNDCEILPS(X64Reg dest, const OpArg& arg) {
ROUNDPS(dest, arg, FROUND_CEIL);
}
void ROUNDZEROPS(X64Reg dest, const OpArg& arg) {
ROUNDPS(dest, arg, FROUND_ZERO);
}
void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
void ROUNDNEARPD(X64Reg dest, const OpArg& arg) {
ROUNDPD(dest, arg, FROUND_NEAREST);
}
void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) {
ROUNDPD(dest, arg, FROUND_FLOOR);
}
void ROUNDCEILPD(X64Reg dest, const OpArg& arg) {
ROUNDPD(dest, arg, FROUND_CEIL);
}
void ROUNDZEROPD(X64Reg dest, const OpArg& arg) {
ROUNDPD(dest, arg, FROUND_ZERO);
}
// AVX
void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
@ -981,7 +1134,6 @@ public:
void ABI_CallFunctionC16(const void* func, u16 param1);
void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2);
// These only support u32 parameters, but that's enough for a lot of uses.
// These will destroy the 1 or 2 first "parameter regs".
void ABI_CallFunctionC(const void* func, u32 param1);
@ -1012,29 +1164,38 @@ public:
*
* @param mask Registers to push on the stack (high 16 bits are XMMs, low 16 bits are GPRs)
* @param rsp_alignment Current alignment of the stack pointer, must be 0 or 8
* @param needed_frame_size Additional space needed, e.g., for function arguments passed on the stack
* @param needed_frame_size Additional space needed, e.g., for function arguments passed on the
* stack
* @return Size of the shadow space, i.e., offset of the frame
*/
size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
size_t needed_frame_size = 0);
/**
* Restores specified registers and adjusts the stack to its original alignment, i.e., the alignment before
* Restores specified registers and adjusts the stack to its original alignment, i.e., the
* alignment before
* the matching PushRegistersAndAdjustStack.
*
* @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are GPRs)
* @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must be 0 or 8
* @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are
* GPRs)
* @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must
* be 0 or 8
* @param needed_frame_size Additional space that was needed
* @warning Stack must be currently 16-byte aligned
*/
void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
#ifdef _M_IX86
static int ABI_GetNumXMMRegs() { return 8; }
#else
static int ABI_GetNumXMMRegs() { return 16; }
#endif
}; // class XEmitter
void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
size_t needed_frame_size = 0);
#ifdef _M_IX86
static int ABI_GetNumXMMRegs() {
return 8;
}
#else
static int ABI_GetNumXMMRegs() {
return 16;
}
#endif
}; // class XEmitter
// Everything that needs to generate X86 code should inherit from this.
// You get memory management for free, plus, you can use all the MOV etc functions without
@ -1045,4 +1206,4 @@ public:
void PoisonMemory() override;
};
} // namespace
} // namespace