Sources: Run clang-format on everything.
This commit is contained in:
parent
fe948af095
commit
dc8479928c
386 changed files with 19560 additions and 18080 deletions
|
@ -22,7 +22,8 @@ using namespace Gen;
|
|||
|
||||
// Shared code between Win64 and Unix64
|
||||
|
||||
void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) {
|
||||
void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size,
|
||||
size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) {
|
||||
size_t shadow = 0;
|
||||
#if defined(_WIN32)
|
||||
shadow = 0x20;
|
||||
|
@ -49,17 +50,19 @@ void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_
|
|||
*xmm_offsetp = subtraction - xmm_base_subtraction;
|
||||
}
|
||||
|
||||
size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
|
||||
size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
|
||||
size_t needed_frame_size) {
|
||||
size_t shadow, subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
|
||||
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction,
|
||||
&xmm_offset);
|
||||
|
||||
for (int r : mask & ABI_ALL_GPRS)
|
||||
for (int r : mask& ABI_ALL_GPRS)
|
||||
PUSH((X64Reg)r);
|
||||
|
||||
if (subtraction)
|
||||
SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
|
||||
|
||||
for (int x : mask & ABI_ALL_FPRS) {
|
||||
for (int x : mask& ABI_ALL_FPRS) {
|
||||
MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16));
|
||||
xmm_offset += 16;
|
||||
}
|
||||
|
@ -67,12 +70,14 @@ size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_align
|
|||
return shadow;
|
||||
}
|
||||
|
||||
void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
|
||||
void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
|
||||
size_t needed_frame_size) {
|
||||
size_t shadow, subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
|
||||
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction,
|
||||
&xmm_offset);
|
||||
|
||||
for (int x : mask & ABI_ALL_FPRS) {
|
||||
MOVAPD((X64Reg) (x - 16), MDisp(RSP, (int)xmm_offset));
|
||||
for (int x : mask& ABI_ALL_FPRS) {
|
||||
MOVAPD((X64Reg)(x - 16), MDisp(RSP, (int)xmm_offset));
|
||||
xmm_offset += 16;
|
||||
}
|
||||
|
||||
|
@ -86,10 +91,9 @@ void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignmen
|
|||
}
|
||||
|
||||
// Common functions
|
||||
void XEmitter::ABI_CallFunction(const void *func) {
|
||||
void XEmitter::ABI_CallFunction(const void* func) {
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -98,11 +102,10 @@ void XEmitter::ABI_CallFunction(const void *func) {
|
|||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
|
||||
void XEmitter::ABI_CallFunctionC16(const void* func, u16 param1) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32((u32)param1));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -111,25 +114,11 @@ void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
|
|||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) {
|
||||
void XEmitter::ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32((u32)param2));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -138,12 +127,23 @@ void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
|
|||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
|
||||
void XEmitter::ABI_CallFunctionC(const void* func, u32 param1) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCC(const void* func, u32 param1, u32 param2) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -152,13 +152,12 @@ void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
|
|||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) {
|
||||
void XEmitter::ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
MOV(32, R(ABI_PARAM3), Imm32(param3));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -167,13 +166,12 @@ void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32
|
|||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) {
|
||||
void XEmitter::ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
MOV(64, R(ABI_PARAM3), ImmPtr(param3));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -182,14 +180,14 @@ void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, voi
|
|||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4) {
|
||||
void XEmitter::ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3,
|
||||
void* param4) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
MOV(32, R(ABI_PARAM3), Imm32(param3));
|
||||
MOV(64, R(ABI_PARAM4), ImmPtr(param4));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -198,11 +196,10 @@ void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u3
|
|||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
|
||||
void XEmitter::ABI_CallFunctionP(const void* func, void* param1) {
|
||||
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -211,13 +208,12 @@ void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
|
|||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) {
|
||||
void XEmitter::ABI_CallFunctionPA(const void* func, void* param1, const Gen::OpArg& arg2) {
|
||||
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
|
||||
if (!arg2.IsSimpleReg(ABI_PARAM2))
|
||||
MOV(32, R(ABI_PARAM2), arg2);
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -226,15 +222,15 @@ void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpA
|
|||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) {
|
||||
void XEmitter::ABI_CallFunctionPAA(const void* func, void* param1, const Gen::OpArg& arg2,
|
||||
const Gen::OpArg& arg3) {
|
||||
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
|
||||
if (!arg2.IsSimpleReg(ABI_PARAM2))
|
||||
MOV(32, R(ABI_PARAM2), arg2);
|
||||
if (!arg3.IsSimpleReg(ABI_PARAM3))
|
||||
MOV(32, R(ABI_PARAM3), arg3);
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -243,13 +239,12 @@ void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::Op
|
|||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) {
|
||||
void XEmitter::ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3) {
|
||||
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
|
||||
MOV(64, R(ABI_PARAM2), ImmPtr(param2));
|
||||
MOV(32, R(ABI_PARAM3), Imm32(param3));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -259,12 +254,11 @@ void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2,
|
|||
}
|
||||
|
||||
// Pass a register as a parameter.
|
||||
void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
|
||||
void XEmitter::ABI_CallFunctionR(const void* func, X64Reg reg1) {
|
||||
if (reg1 != ABI_PARAM1)
|
||||
MOV(32, R(ABI_PARAM1), R(reg1));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -274,7 +268,7 @@ void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
|
|||
}
|
||||
|
||||
// Pass two registers as parameters.
|
||||
void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) {
|
||||
void XEmitter::ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2) {
|
||||
if (reg2 != ABI_PARAM1) {
|
||||
if (reg1 != ABI_PARAM1)
|
||||
MOV(64, R(ABI_PARAM1), R(reg1));
|
||||
|
@ -287,8 +281,7 @@ void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) {
|
|||
MOV(64, R(ABI_PARAM1), R(reg1));
|
||||
}
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -297,14 +290,12 @@ void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) {
|
|||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2)
|
||||
{
|
||||
void XEmitter::ABI_CallFunctionAC(const void* func, const Gen::OpArg& arg1, u32 param2) {
|
||||
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
||||
MOV(32, R(ABI_PARAM1), arg1);
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -313,15 +304,14 @@ void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32
|
|||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3)
|
||||
{
|
||||
void XEmitter::ABI_CallFunctionACC(const void* func, const Gen::OpArg& arg1, u32 param2,
|
||||
u32 param3) {
|
||||
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
||||
MOV(32, R(ABI_PARAM1), arg1);
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
MOV(64, R(ABI_PARAM3), Imm64(param3));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -330,13 +320,11 @@ void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32
|
|||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1)
|
||||
{
|
||||
void XEmitter::ABI_CallFunctionA(const void* func, const Gen::OpArg& arg1) {
|
||||
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
||||
MOV(32, R(ABI_PARAM1), arg1);
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
@ -345,15 +333,14 @@ void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1)
|
|||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2)
|
||||
{
|
||||
void XEmitter::ABI_CallFunctionAA(const void* func, const Gen::OpArg& arg1,
|
||||
const Gen::OpArg& arg2) {
|
||||
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
||||
MOV(32, R(ABI_PARAM1), arg1);
|
||||
if (!arg2.IsSimpleReg(ABI_PARAM2))
|
||||
MOV(32, R(ABI_PARAM2), arg2);
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
|
|
|
@ -12,7 +12,8 @@
|
|||
|
||||
// Windows 64-bit
|
||||
// * 4-reg "fastcall" variant, very new-skool stack handling
|
||||
// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_
|
||||
// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself
|
||||
// calls_
|
||||
// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space.
|
||||
// Scratch: RAX RCX RDX R8 R9 R10 R11
|
||||
// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15
|
||||
|
@ -35,10 +36,10 @@
|
|||
#define ABI_PARAM4 R9
|
||||
|
||||
// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
|
||||
#define ABI_ALL_CALLER_SAVED \
|
||||
(BitSet32 { RAX, RCX, RDX, R8, R9, R10, R11, \
|
||||
XMM0+16, XMM1+16, XMM2+16, XMM3+16, XMM4+16, XMM5+16 })
|
||||
#else //64-bit Unix / OS X
|
||||
#define ABI_ALL_CALLER_SAVED \
|
||||
(BitSet32{RAX, RCX, RDX, R8, R9, R10, R11, XMM0 + 16, XMM1 + 16, XMM2 + 16, XMM3 + 16, \
|
||||
XMM4 + 16, XMM5 + 16})
|
||||
#else // 64-bit Unix / OS X
|
||||
|
||||
#define ABI_PARAM1 RDI
|
||||
#define ABI_PARAM2 RSI
|
||||
|
@ -49,9 +50,7 @@
|
|||
|
||||
// TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably
|
||||
// don't actually clobber them.
|
||||
#define ABI_ALL_CALLER_SAVED \
|
||||
(BitSet32 { RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 } | \
|
||||
ABI_ALL_FPRS)
|
||||
#define ABI_ALL_CALLER_SAVED (BitSet32{RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11} | ABI_ALL_FPRS)
|
||||
#endif // WIN32
|
||||
|
||||
#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
|
||||
|
|
|
@ -15,8 +15,8 @@ namespace Common {
|
|||
#ifndef _MSC_VER
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
#include <sys/types.h>
|
||||
#include <machine/cpufunc.h>
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
|
||||
static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
|
||||
|
@ -26,15 +26,9 @@ static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
|
|||
#else
|
||||
info[0] = function_id; // eax
|
||||
info[2] = subfunction_id; // ecx
|
||||
__asm__(
|
||||
"cpuid"
|
||||
: "=a" (info[0]),
|
||||
"=b" (info[1]),
|
||||
"=c" (info[2]),
|
||||
"=d" (info[3])
|
||||
: "a" (function_id),
|
||||
"c" (subfunction_id)
|
||||
);
|
||||
__asm__("cpuid"
|
||||
: "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
|
||||
: "a"(function_id), "c"(subfunction_id));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -88,14 +82,22 @@ static CPUCaps Detect() {
|
|||
if (max_std_fn >= 1) {
|
||||
__cpuid(cpu_id, 0x00000001);
|
||||
|
||||
if ((cpu_id[3] >> 25) & 1) caps.sse = true;
|
||||
if ((cpu_id[3] >> 26) & 1) caps.sse2 = true;
|
||||
if ((cpu_id[2]) & 1) caps.sse3 = true;
|
||||
if ((cpu_id[2] >> 9) & 1) caps.ssse3 = true;
|
||||
if ((cpu_id[2] >> 19) & 1) caps.sse4_1 = true;
|
||||
if ((cpu_id[2] >> 20) & 1) caps.sse4_2 = true;
|
||||
if ((cpu_id[2] >> 22) & 1) caps.movbe = true;
|
||||
if ((cpu_id[2] >> 25) & 1) caps.aes = true;
|
||||
if ((cpu_id[3] >> 25) & 1)
|
||||
caps.sse = true;
|
||||
if ((cpu_id[3] >> 26) & 1)
|
||||
caps.sse2 = true;
|
||||
if ((cpu_id[2]) & 1)
|
||||
caps.sse3 = true;
|
||||
if ((cpu_id[2] >> 9) & 1)
|
||||
caps.ssse3 = true;
|
||||
if ((cpu_id[2] >> 19) & 1)
|
||||
caps.sse4_1 = true;
|
||||
if ((cpu_id[2] >> 20) & 1)
|
||||
caps.sse4_2 = true;
|
||||
if ((cpu_id[2] >> 22) & 1)
|
||||
caps.movbe = true;
|
||||
if ((cpu_id[2] >> 25) & 1)
|
||||
caps.aes = true;
|
||||
|
||||
if ((cpu_id[3] >> 24) & 1) {
|
||||
caps.fxsave_fxrstor = true;
|
||||
|
@ -140,10 +142,14 @@ static CPUCaps Detect() {
|
|||
if (max_ex_fn >= 0x80000001) {
|
||||
// Check for more features
|
||||
__cpuid(cpu_id, 0x80000001);
|
||||
if (cpu_id[2] & 1) caps.lahf_sahf_64 = true;
|
||||
if ((cpu_id[2] >> 5) & 1) caps.lzcnt = true;
|
||||
if ((cpu_id[2] >> 16) & 1) caps.fma4 = true;
|
||||
if ((cpu_id[3] >> 29) & 1) caps.long_mode = true;
|
||||
if (cpu_id[2] & 1)
|
||||
caps.lahf_sahf_64 = true;
|
||||
if ((cpu_id[2] >> 5) & 1)
|
||||
caps.lzcnt = true;
|
||||
if ((cpu_id[2] >> 16) & 1)
|
||||
caps.fma4 = true;
|
||||
if ((cpu_id[3] >> 29) & 1)
|
||||
caps.long_mode = true;
|
||||
}
|
||||
|
||||
return caps;
|
||||
|
@ -162,24 +168,38 @@ std::string GetCPUCapsString() {
|
|||
sum += caps.brand_string;
|
||||
sum += ")";
|
||||
|
||||
if (caps.sse) sum += ", SSE";
|
||||
if (caps.sse)
|
||||
sum += ", SSE";
|
||||
if (caps.sse2) {
|
||||
sum += ", SSE2";
|
||||
if (!caps.flush_to_zero) sum += " (without DAZ)";
|
||||
if (!caps.flush_to_zero)
|
||||
sum += " (without DAZ)";
|
||||
}
|
||||
|
||||
if (caps.sse3) sum += ", SSE3";
|
||||
if (caps.ssse3) sum += ", SSSE3";
|
||||
if (caps.sse4_1) sum += ", SSE4.1";
|
||||
if (caps.sse4_2) sum += ", SSE4.2";
|
||||
if (caps.avx) sum += ", AVX";
|
||||
if (caps.avx2) sum += ", AVX2";
|
||||
if (caps.bmi1) sum += ", BMI1";
|
||||
if (caps.bmi2) sum += ", BMI2";
|
||||
if (caps.fma) sum += ", FMA";
|
||||
if (caps.aes) sum += ", AES";
|
||||
if (caps.movbe) sum += ", MOVBE";
|
||||
if (caps.long_mode) sum += ", 64-bit support";
|
||||
if (caps.sse3)
|
||||
sum += ", SSE3";
|
||||
if (caps.ssse3)
|
||||
sum += ", SSSE3";
|
||||
if (caps.sse4_1)
|
||||
sum += ", SSE4.1";
|
||||
if (caps.sse4_2)
|
||||
sum += ", SSE4.2";
|
||||
if (caps.avx)
|
||||
sum += ", AVX";
|
||||
if (caps.avx2)
|
||||
sum += ", AVX2";
|
||||
if (caps.bmi1)
|
||||
sum += ", BMI1";
|
||||
if (caps.bmi2)
|
||||
sum += ", BMI2";
|
||||
if (caps.fma)
|
||||
sum += ", FMA";
|
||||
if (caps.aes)
|
||||
sum += ", AES";
|
||||
if (caps.movbe)
|
||||
sum += ", MOVBE";
|
||||
if (caps.long_mode)
|
||||
sum += ", 64-bit support";
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -21,8 +21,8 @@
|
|||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_set.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/code_block.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
#if defined(ARCHITECTURE_x86_64) && !defined(_ARCH_64)
|
||||
#define _ARCH_64
|
||||
|
@ -34,75 +34,145 @@
|
|||
#define PTRBITS 32
|
||||
#endif
|
||||
|
||||
namespace Gen
|
||||
{
|
||||
namespace Gen {
|
||||
|
||||
enum X64Reg
|
||||
{
|
||||
EAX = 0, EBX = 3, ECX = 1, EDX = 2,
|
||||
ESI = 6, EDI = 7, EBP = 5, ESP = 4,
|
||||
enum X64Reg {
|
||||
EAX = 0,
|
||||
EBX = 3,
|
||||
ECX = 1,
|
||||
EDX = 2,
|
||||
ESI = 6,
|
||||
EDI = 7,
|
||||
EBP = 5,
|
||||
ESP = 4,
|
||||
|
||||
RAX = 0, RBX = 3, RCX = 1, RDX = 2,
|
||||
RSI = 6, RDI = 7, RBP = 5, RSP = 4,
|
||||
R8 = 8, R9 = 9, R10 = 10,R11 = 11,
|
||||
R12 = 12,R13 = 13,R14 = 14,R15 = 15,
|
||||
RAX = 0,
|
||||
RBX = 3,
|
||||
RCX = 1,
|
||||
RDX = 2,
|
||||
RSI = 6,
|
||||
RDI = 7,
|
||||
RBP = 5,
|
||||
RSP = 4,
|
||||
R8 = 8,
|
||||
R9 = 9,
|
||||
R10 = 10,
|
||||
R11 = 11,
|
||||
R12 = 12,
|
||||
R13 = 13,
|
||||
R14 = 14,
|
||||
R15 = 15,
|
||||
|
||||
AL = 0, BL = 3, CL = 1, DL = 2,
|
||||
SIL = 6, DIL = 7, BPL = 5, SPL = 4,
|
||||
AH = 0x104, BH = 0x107, CH = 0x105, DH = 0x106,
|
||||
AL = 0,
|
||||
BL = 3,
|
||||
CL = 1,
|
||||
DL = 2,
|
||||
SIL = 6,
|
||||
DIL = 7,
|
||||
BPL = 5,
|
||||
SPL = 4,
|
||||
AH = 0x104,
|
||||
BH = 0x107,
|
||||
CH = 0x105,
|
||||
DH = 0x106,
|
||||
|
||||
AX = 0, BX = 3, CX = 1, DX = 2,
|
||||
SI = 6, DI = 7, BP = 5, SP = 4,
|
||||
AX = 0,
|
||||
BX = 3,
|
||||
CX = 1,
|
||||
DX = 2,
|
||||
SI = 6,
|
||||
DI = 7,
|
||||
BP = 5,
|
||||
SP = 4,
|
||||
|
||||
XMM0=0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
|
||||
XMM0 = 0,
|
||||
XMM1,
|
||||
XMM2,
|
||||
XMM3,
|
||||
XMM4,
|
||||
XMM5,
|
||||
XMM6,
|
||||
XMM7,
|
||||
XMM8,
|
||||
XMM9,
|
||||
XMM10,
|
||||
XMM11,
|
||||
XMM12,
|
||||
XMM13,
|
||||
XMM14,
|
||||
XMM15,
|
||||
|
||||
YMM0=0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
|
||||
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15,
|
||||
YMM0 = 0,
|
||||
YMM1,
|
||||
YMM2,
|
||||
YMM3,
|
||||
YMM4,
|
||||
YMM5,
|
||||
YMM6,
|
||||
YMM7,
|
||||
YMM8,
|
||||
YMM9,
|
||||
YMM10,
|
||||
YMM11,
|
||||
YMM12,
|
||||
YMM13,
|
||||
YMM14,
|
||||
YMM15,
|
||||
|
||||
INVALID_REG = 0xFFFFFFFF
|
||||
};
|
||||
|
||||
enum CCFlags
|
||||
{
|
||||
CC_O = 0,
|
||||
CC_NO = 1,
|
||||
CC_B = 2, CC_C = 2, CC_NAE = 2,
|
||||
CC_NB = 3, CC_NC = 3, CC_AE = 3,
|
||||
CC_Z = 4, CC_E = 4,
|
||||
CC_NZ = 5, CC_NE = 5,
|
||||
CC_BE = 6, CC_NA = 6,
|
||||
CC_NBE = 7, CC_A = 7,
|
||||
CC_S = 8,
|
||||
CC_NS = 9,
|
||||
CC_P = 0xA, CC_PE = 0xA,
|
||||
CC_NP = 0xB, CC_PO = 0xB,
|
||||
CC_L = 0xC, CC_NGE = 0xC,
|
||||
CC_NL = 0xD, CC_GE = 0xD,
|
||||
CC_LE = 0xE, CC_NG = 0xE,
|
||||
CC_NLE = 0xF, CC_G = 0xF
|
||||
enum CCFlags {
|
||||
CC_O = 0,
|
||||
CC_NO = 1,
|
||||
CC_B = 2,
|
||||
CC_C = 2,
|
||||
CC_NAE = 2,
|
||||
CC_NB = 3,
|
||||
CC_NC = 3,
|
||||
CC_AE = 3,
|
||||
CC_Z = 4,
|
||||
CC_E = 4,
|
||||
CC_NZ = 5,
|
||||
CC_NE = 5,
|
||||
CC_BE = 6,
|
||||
CC_NA = 6,
|
||||
CC_NBE = 7,
|
||||
CC_A = 7,
|
||||
CC_S = 8,
|
||||
CC_NS = 9,
|
||||
CC_P = 0xA,
|
||||
CC_PE = 0xA,
|
||||
CC_NP = 0xB,
|
||||
CC_PO = 0xB,
|
||||
CC_L = 0xC,
|
||||
CC_NGE = 0xC,
|
||||
CC_NL = 0xD,
|
||||
CC_GE = 0xD,
|
||||
CC_LE = 0xE,
|
||||
CC_NG = 0xE,
|
||||
CC_NLE = 0xF,
|
||||
CC_G = 0xF
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
enum {
|
||||
NUMGPRs = 16,
|
||||
NUMXMMs = 16,
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
enum {
|
||||
SCALE_NONE = 0,
|
||||
SCALE_1 = 1,
|
||||
SCALE_2 = 2,
|
||||
SCALE_4 = 4,
|
||||
SCALE_8 = 8,
|
||||
SCALE_ATREG = 16,
|
||||
//SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG
|
||||
// SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG
|
||||
SCALE_NOBASE_2 = 34,
|
||||
SCALE_NOBASE_4 = 36,
|
||||
SCALE_NOBASE_8 = 40,
|
||||
SCALE_RIP = 0xFF,
|
||||
SCALE_IMM8 = 0xF0,
|
||||
SCALE_IMM8 = 0xF0,
|
||||
SCALE_IMM16 = 0xF1,
|
||||
SCALE_IMM32 = 0xF2,
|
||||
SCALE_IMM64 = 0xF3,
|
||||
|
@ -114,7 +184,7 @@ enum NormalOp {
|
|||
nrmSUB,
|
||||
nrmSBB,
|
||||
nrmAND,
|
||||
nrmOR ,
|
||||
nrmOR,
|
||||
nrmXOR,
|
||||
nrmMOV,
|
||||
nrmTEST,
|
||||
|
@ -157,68 +227,74 @@ enum FloatRound {
|
|||
class XEmitter;
|
||||
|
||||
// RIP addressing does not benefit from micro op fusion on Core arch
|
||||
struct OpArg
|
||||
{
|
||||
struct OpArg {
|
||||
friend class XEmitter;
|
||||
|
||||
constexpr OpArg() = default; // dummy op arg, used for storage
|
||||
constexpr OpArg() = default; // dummy op arg, used for storage
|
||||
constexpr OpArg(u64 offset_, int scale_, X64Reg rmReg = RAX, X64Reg scaledReg = RAX)
|
||||
: scale(static_cast<u8>(scale_))
|
||||
, offsetOrBaseReg(static_cast<u16>(rmReg))
|
||||
, indexReg(static_cast<u16>(scaledReg))
|
||||
, offset(offset_)
|
||||
{
|
||||
: scale(static_cast<u8>(scale_)), offsetOrBaseReg(static_cast<u16>(rmReg)),
|
||||
indexReg(static_cast<u16>(scaledReg)), offset(offset_) {
|
||||
}
|
||||
|
||||
constexpr bool operator==(const OpArg &b) const
|
||||
{
|
||||
return operandReg == b.operandReg &&
|
||||
scale == b.scale &&
|
||||
offsetOrBaseReg == b.offsetOrBaseReg &&
|
||||
indexReg == b.indexReg &&
|
||||
offset == b.offset;
|
||||
constexpr bool operator==(const OpArg& b) const {
|
||||
return operandReg == b.operandReg && scale == b.scale &&
|
||||
offsetOrBaseReg == b.offsetOrBaseReg && indexReg == b.indexReg && offset == b.offset;
|
||||
}
|
||||
|
||||
void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const;
|
||||
void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W = 0) const;
|
||||
void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=INVALID_REG, bool warn_64bit_offset = true) const;
|
||||
void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits);
|
||||
void WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const;
|
||||
void WriteRex(XEmitter* emit, int opBits, int bits, int customOp = -1) const;
|
||||
void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm,
|
||||
int W = 0) const;
|
||||
void WriteRest(XEmitter* emit, int extraBytes = 0, X64Reg operandReg = INVALID_REG,
|
||||
bool warn_64bit_offset = true) const;
|
||||
void WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg operandReg, int bits);
|
||||
void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand,
|
||||
int bits) const;
|
||||
|
||||
constexpr bool IsImm() const { return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || scale == SCALE_IMM64; }
|
||||
constexpr bool IsSimpleReg() const { return scale == SCALE_NONE; }
|
||||
constexpr bool IsSimpleReg(X64Reg reg) const
|
||||
{
|
||||
constexpr bool IsImm() const {
|
||||
return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 ||
|
||||
scale == SCALE_IMM64;
|
||||
}
|
||||
constexpr bool IsSimpleReg() const {
|
||||
return scale == SCALE_NONE;
|
||||
}
|
||||
constexpr bool IsSimpleReg(X64Reg reg) const {
|
||||
return IsSimpleReg() && GetSimpleReg() == reg;
|
||||
}
|
||||
|
||||
int GetImmBits() const
|
||||
{
|
||||
switch (scale)
|
||||
{
|
||||
case SCALE_IMM8: return 8;
|
||||
case SCALE_IMM16: return 16;
|
||||
case SCALE_IMM32: return 32;
|
||||
case SCALE_IMM64: return 64;
|
||||
default: return -1;
|
||||
int GetImmBits() const {
|
||||
switch (scale) {
|
||||
case SCALE_IMM8:
|
||||
return 8;
|
||||
case SCALE_IMM16:
|
||||
return 16;
|
||||
case SCALE_IMM32:
|
||||
return 32;
|
||||
case SCALE_IMM64:
|
||||
return 64;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
void SetImmBits(int bits) {
|
||||
switch (bits)
|
||||
{
|
||||
case 8: scale = SCALE_IMM8; break;
|
||||
case 16: scale = SCALE_IMM16; break;
|
||||
case 32: scale = SCALE_IMM32; break;
|
||||
case 64: scale = SCALE_IMM64; break;
|
||||
switch (bits) {
|
||||
case 8:
|
||||
scale = SCALE_IMM8;
|
||||
break;
|
||||
case 16:
|
||||
scale = SCALE_IMM16;
|
||||
break;
|
||||
case 32:
|
||||
scale = SCALE_IMM32;
|
||||
break;
|
||||
case 64:
|
||||
scale = SCALE_IMM64;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr X64Reg GetSimpleReg() const
|
||||
{
|
||||
return scale == SCALE_NONE
|
||||
? static_cast<X64Reg>(offsetOrBaseReg)
|
||||
: INVALID_REG;
|
||||
constexpr X64Reg GetSimpleReg() const {
|
||||
return scale == SCALE_NONE ? static_cast<X64Reg>(offsetOrBaseReg) : INVALID_REG;
|
||||
}
|
||||
|
||||
constexpr u32 GetImmValue() const {
|
||||
|
@ -234,41 +310,50 @@ private:
|
|||
u8 scale = 0;
|
||||
u16 offsetOrBaseReg = 0;
|
||||
u16 indexReg = 0;
|
||||
u64 offset = 0; // use RIP-relative as much as possible - 64-bit immediates are not available.
|
||||
u64 offset = 0; // use RIP-relative as much as possible - 64-bit immediates are not available.
|
||||
u16 operandReg = 0;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline OpArg M(const T *ptr) { return OpArg(reinterpret_cast<u64>(ptr), static_cast<int>(SCALE_RIP)); }
|
||||
constexpr OpArg R(X64Reg value) { return OpArg(0, SCALE_NONE, value); }
|
||||
constexpr OpArg MatR(X64Reg value) { return OpArg(0, SCALE_ATREG, value); }
|
||||
inline OpArg M(const T* ptr) {
|
||||
return OpArg(reinterpret_cast<u64>(ptr), static_cast<int>(SCALE_RIP));
|
||||
}
|
||||
constexpr OpArg R(X64Reg value) {
|
||||
return OpArg(0, SCALE_NONE, value);
|
||||
}
|
||||
constexpr OpArg MatR(X64Reg value) {
|
||||
return OpArg(0, SCALE_ATREG, value);
|
||||
}
|
||||
|
||||
constexpr OpArg MDisp(X64Reg value, int offset)
|
||||
{
|
||||
constexpr OpArg MDisp(X64Reg value, int offset) {
|
||||
return OpArg(static_cast<u32>(offset), SCALE_ATREG, value);
|
||||
}
|
||||
|
||||
constexpr OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset)
|
||||
{
|
||||
constexpr OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) {
|
||||
return OpArg(offset, scale, base, scaled);
|
||||
}
|
||||
|
||||
constexpr OpArg MScaled(X64Reg scaled, int scale, int offset)
|
||||
{
|
||||
return scale == SCALE_1
|
||||
? OpArg(offset, SCALE_ATREG, scaled)
|
||||
: OpArg(offset, scale | 0x20, RAX, scaled);
|
||||
constexpr OpArg MScaled(X64Reg scaled, int scale, int offset) {
|
||||
return scale == SCALE_1 ? OpArg(offset, SCALE_ATREG, scaled)
|
||||
: OpArg(offset, scale | 0x20, RAX, scaled);
|
||||
}
|
||||
|
||||
constexpr OpArg MRegSum(X64Reg base, X64Reg offset)
|
||||
{
|
||||
constexpr OpArg MRegSum(X64Reg base, X64Reg offset) {
|
||||
return MComplex(base, offset, 1, 0);
|
||||
}
|
||||
|
||||
constexpr OpArg Imm8 (u8 imm) { return OpArg(imm, SCALE_IMM8); }
|
||||
constexpr OpArg Imm16(u16 imm) { return OpArg(imm, SCALE_IMM16); } //rarely used
|
||||
constexpr OpArg Imm32(u32 imm) { return OpArg(imm, SCALE_IMM32); }
|
||||
constexpr OpArg Imm64(u64 imm) { return OpArg(imm, SCALE_IMM64); }
|
||||
constexpr OpArg Imm8(u8 imm) {
|
||||
return OpArg(imm, SCALE_IMM8);
|
||||
}
|
||||
constexpr OpArg Imm16(u16 imm) {
|
||||
return OpArg(imm, SCALE_IMM16);
|
||||
} // rarely used
|
||||
constexpr OpArg Imm32(u32 imm) {
|
||||
return OpArg(imm, SCALE_IMM32);
|
||||
}
|
||||
constexpr OpArg Imm64(u64 imm) {
|
||||
return OpArg(imm, SCALE_IMM64);
|
||||
}
|
||||
constexpr OpArg UImmAuto(u32 imm) {
|
||||
return OpArg(imm, imm >= 128 ? SCALE_IMM32 : SCALE_IMM8);
|
||||
}
|
||||
|
@ -277,8 +362,7 @@ constexpr OpArg SImmAuto(s32 imm) {
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
OpArg ImmPtr(const T* imm)
|
||||
{
|
||||
OpArg ImmPtr(const T* imm) {
|
||||
#ifdef _ARCH_64
|
||||
return Imm64(reinterpret_cast<u64>(imm));
|
||||
#else
|
||||
|
@ -286,36 +370,31 @@ OpArg ImmPtr(const T* imm)
|
|||
#endif
|
||||
}
|
||||
|
||||
inline u32 PtrOffset(const void* ptr, const void* base)
|
||||
{
|
||||
inline u32 PtrOffset(const void* ptr, const void* base) {
|
||||
#ifdef _ARCH_64
|
||||
s64 distance = (s64)ptr-(s64)base;
|
||||
if (distance >= 0x80000000LL ||
|
||||
distance < -0x80000000LL)
|
||||
{
|
||||
s64 distance = (s64)ptr - (s64)base;
|
||||
if (distance >= 0x80000000LL || distance < -0x80000000LL) {
|
||||
ASSERT_MSG(0, "pointer offset out of range");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return (u32)distance;
|
||||
#else
|
||||
return (u32)ptr-(u32)base;
|
||||
return (u32)ptr - (u32)base;
|
||||
#endif
|
||||
}
|
||||
|
||||
//usage: int a[]; ARRAY_OFFSET(a,10)
|
||||
#define ARRAY_OFFSET(array,index) ((u32)((u64)&(array)[index]-(u64)&(array)[0]))
|
||||
//usage: struct {int e;} s; STRUCT_OFFSET(s,e)
|
||||
#define STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str)))
|
||||
// usage: int a[]; ARRAY_OFFSET(a,10)
|
||||
#define ARRAY_OFFSET(array, index) ((u32)((u64) & (array)[index] - (u64) & (array)[0]))
|
||||
// usage: struct {int e;} s; STRUCT_OFFSET(s,e)
|
||||
#define STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str)))
|
||||
|
||||
struct FixupBranch
|
||||
{
|
||||
u8 *ptr;
|
||||
int type; //0 = 8bit 1 = 32bit
|
||||
struct FixupBranch {
|
||||
u8* ptr;
|
||||
int type; // 0 = 8bit 1 = 32bit
|
||||
};
|
||||
|
||||
enum SSECompare
|
||||
{
|
||||
enum SSECompare {
|
||||
EQ = 0,
|
||||
LT,
|
||||
LE,
|
||||
|
@ -326,11 +405,10 @@ enum SSECompare
|
|||
ORD,
|
||||
};
|
||||
|
||||
class XEmitter
|
||||
{
|
||||
friend struct OpArg; // for Write8 etc
|
||||
class XEmitter {
|
||||
friend struct OpArg; // for Write8 etc
|
||||
private:
|
||||
u8 *code;
|
||||
u8* code;
|
||||
bool flags_locked;
|
||||
|
||||
void CheckFlags();
|
||||
|
@ -347,14 +425,19 @@ private:
|
|||
void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
|
||||
void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
|
||||
void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
|
||||
void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
|
||||
void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
|
||||
void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
|
||||
void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
|
||||
void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
|
||||
int extrabytes = 0);
|
||||
void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
|
||||
int extrabytes = 0);
|
||||
void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
|
||||
int extrabytes = 0);
|
||||
void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
|
||||
int extrabytes = 0);
|
||||
void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg);
|
||||
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
|
||||
void WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
|
||||
|
||||
void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
|
||||
void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size,
|
||||
size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
|
||||
|
||||
protected:
|
||||
void Write8(u8 value);
|
||||
|
@ -363,26 +446,38 @@ protected:
|
|||
void Write64(u64 value);
|
||||
|
||||
public:
|
||||
XEmitter() { code = nullptr; flags_locked = false; }
|
||||
XEmitter(u8 *code_ptr) { code = code_ptr; flags_locked = false; }
|
||||
virtual ~XEmitter() {}
|
||||
XEmitter() {
|
||||
code = nullptr;
|
||||
flags_locked = false;
|
||||
}
|
||||
XEmitter(u8* code_ptr) {
|
||||
code = code_ptr;
|
||||
flags_locked = false;
|
||||
}
|
||||
virtual ~XEmitter() {
|
||||
}
|
||||
|
||||
void WriteModRM(int mod, int rm, int reg);
|
||||
void WriteSIB(int scale, int index, int base);
|
||||
|
||||
void SetCodePtr(u8 *ptr);
|
||||
void SetCodePtr(u8* ptr);
|
||||
void ReserveCodeSpace(int bytes);
|
||||
const u8 *AlignCode4();
|
||||
const u8 *AlignCode16();
|
||||
const u8 *AlignCodePage();
|
||||
const u8 *GetCodePtr() const;
|
||||
u8 *GetWritableCodePtr();
|
||||
const u8* AlignCode4();
|
||||
const u8* AlignCode16();
|
||||
const u8* AlignCodePage();
|
||||
const u8* GetCodePtr() const;
|
||||
u8* GetWritableCodePtr();
|
||||
|
||||
void LockFlags() { flags_locked = true; }
|
||||
void UnlockFlags() { flags_locked = false; }
|
||||
void LockFlags() {
|
||||
flags_locked = true;
|
||||
}
|
||||
void UnlockFlags() {
|
||||
flags_locked = false;
|
||||
}
|
||||
|
||||
// Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU
|
||||
// INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other string instr.,
|
||||
// INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other
|
||||
// string instr.,
|
||||
// INC and DEC are slow on Intel Core, but not on AMD. They create a
|
||||
// false flag dependency because they only update a subset of the flags.
|
||||
// XCHG is SLOW and should be avoided.
|
||||
|
@ -401,11 +496,11 @@ public:
|
|||
void CLC();
|
||||
void CMC();
|
||||
|
||||
// These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and AMD!
|
||||
// These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and
|
||||
// AMD!
|
||||
void LAHF(); // 3 cycle vector path
|
||||
void SAHF(); // direct path fast
|
||||
|
||||
|
||||
// Stack control
|
||||
void PUSH(X64Reg reg);
|
||||
void POP(X64Reg reg);
|
||||
|
@ -422,7 +517,7 @@ public:
|
|||
|
||||
void JMP(const u8* addr, bool force5Bytes = false);
|
||||
void JMPptr(const OpArg& arg);
|
||||
void JMPself(); //infinite loop!
|
||||
void JMPself(); // infinite loop!
|
||||
#ifdef CALL
|
||||
#undef CALL
|
||||
#endif
|
||||
|
@ -450,12 +545,11 @@ public:
|
|||
void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit
|
||||
|
||||
// Cache control
|
||||
enum PrefetchLevel
|
||||
{
|
||||
PF_NTA, //Non-temporal (data used once and only once)
|
||||
PF_T0, //All cache levels
|
||||
PF_T1, //Levels 2+ (aliased to T0 on AMD)
|
||||
PF_T2, //Levels 3+ (aliased to T0 on AMD)
|
||||
enum PrefetchLevel {
|
||||
PF_NTA, // Non-temporal (data used once and only once)
|
||||
PF_T0, // All cache levels
|
||||
PF_T1, // Levels 2+ (aliased to T0 on AMD)
|
||||
PF_T2, // Levels 3+ (aliased to T0 on AMD)
|
||||
};
|
||||
void PREFETCH(PrefetchLevel level, OpArg arg);
|
||||
void MOVNTI(int bits, const OpArg& dest, X64Reg src);
|
||||
|
@ -464,8 +558,8 @@ public:
|
|||
void MOVNTPD(const OpArg& arg, X64Reg regOp);
|
||||
|
||||
// Multiplication / division
|
||||
void MUL(int bits, const OpArg& src); //UNSIGNED
|
||||
void IMUL(int bits, const OpArg& src); //SIGNED
|
||||
void MUL(int bits, const OpArg& src); // UNSIGNED
|
||||
void IMUL(int bits, const OpArg& src); // SIGNED
|
||||
void IMUL(int bits, X64Reg regOp, const OpArg& src);
|
||||
void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm);
|
||||
void DIV(int bits, const OpArg& src);
|
||||
|
@ -492,11 +586,19 @@ public:
|
|||
|
||||
// Extend EAX into EDX in various ways
|
||||
void CWD(int bits = 16);
|
||||
void CDQ() {CWD(32);}
|
||||
void CQO() {CWD(64);}
|
||||
void CDQ() {
|
||||
CWD(32);
|
||||
}
|
||||
void CQO() {
|
||||
CWD(64);
|
||||
}
|
||||
void CBW(int bits = 8);
|
||||
void CWDE() {CBW(16);}
|
||||
void CDQE() {CBW(32);}
|
||||
void CWDE() {
|
||||
CBW(16);
|
||||
}
|
||||
void CDQE() {
|
||||
CBW(32);
|
||||
}
|
||||
|
||||
// Load effective address
|
||||
void LEA(int bits, X64Reg dest, OpArg src);
|
||||
|
@ -511,7 +613,7 @@ public:
|
|||
void CMP(int bits, const OpArg& a1, const OpArg& a2);
|
||||
|
||||
// Bit operations
|
||||
void NOT (int bits, const OpArg& src);
|
||||
void NOT(int bits, const OpArg& src);
|
||||
void OR(int bits, const OpArg& a1, const OpArg& a2);
|
||||
void XOR(int bits, const OpArg& a1, const OpArg& a2);
|
||||
void MOV(int bits, const OpArg& a1, const OpArg& a2);
|
||||
|
@ -525,7 +627,8 @@ public:
|
|||
void BSWAP(int bits, X64Reg reg);
|
||||
|
||||
// Sign/zero extension
|
||||
void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src); //automatically uses MOVSXD if necessary
|
||||
void MOVSX(int dbits, int sbits, X64Reg dest,
|
||||
OpArg src); // automatically uses MOVSXD if necessary
|
||||
void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src);
|
||||
|
||||
// Available only on Atom or >= Haswell so far. Test with GetCPUCaps().movbe.
|
||||
|
@ -593,13 +696,27 @@ public:
|
|||
void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare);
|
||||
void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare);
|
||||
|
||||
void CMPEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_EQ); }
|
||||
void CMPLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LT); }
|
||||
void CMPLESS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LE); }
|
||||
void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_UNORD); }
|
||||
void CMPNEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NEQ); }
|
||||
void CMPNLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NLT); }
|
||||
void CMPORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_ORD); }
|
||||
void CMPEQSS(X64Reg regOp, const OpArg& arg) {
|
||||
CMPSS(regOp, arg, CMP_EQ);
|
||||
}
|
||||
void CMPLTSS(X64Reg regOp, const OpArg& arg) {
|
||||
CMPSS(regOp, arg, CMP_LT);
|
||||
}
|
||||
void CMPLESS(X64Reg regOp, const OpArg& arg) {
|
||||
CMPSS(regOp, arg, CMP_LE);
|
||||
}
|
||||
void CMPUNORDSS(X64Reg regOp, const OpArg& arg) {
|
||||
CMPSS(regOp, arg, CMP_UNORD);
|
||||
}
|
||||
void CMPNEQSS(X64Reg regOp, const OpArg& arg) {
|
||||
CMPSS(regOp, arg, CMP_NEQ);
|
||||
}
|
||||
void CMPNLTSS(X64Reg regOp, const OpArg& arg) {
|
||||
CMPSS(regOp, arg, CMP_NLT);
|
||||
}
|
||||
void CMPORDSS(X64Reg regOp, const OpArg& arg) {
|
||||
CMPSS(regOp, arg, CMP_ORD);
|
||||
}
|
||||
|
||||
// SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
|
||||
void ADDPS(X64Reg regOp, const OpArg& arg);
|
||||
|
@ -638,10 +755,12 @@ public:
|
|||
// SSE/SSE2: Useful alternative to shuffle in some cases.
|
||||
void MOVDDUP(X64Reg regOp, const OpArg& arg);
|
||||
|
||||
// SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy.
|
||||
// SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily
|
||||
// on Ivy.
|
||||
void HADDPS(X64Reg dest, const OpArg& src);
|
||||
|
||||
// SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
|
||||
// SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg
|
||||
// contains both a read mask and a write "mask".
|
||||
void DPPS(X64Reg dest, const OpArg& src, u8 arg);
|
||||
|
||||
void UNPCKLPS(X64Reg dest, const OpArg& src);
|
||||
|
@ -694,11 +813,13 @@ public:
|
|||
void MOVD_xmm(const OpArg& arg, X64Reg src);
|
||||
void MOVQ_xmm(OpArg arg, X64Reg src);
|
||||
|
||||
// SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question.
|
||||
// SSE/SSE2: Generates a mask from the high bits of the components of the packed register in
|
||||
// question.
|
||||
void MOVMSKPS(X64Reg dest, const OpArg& arg);
|
||||
void MOVMSKPD(X64Reg dest, const OpArg& arg);
|
||||
|
||||
// SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one.
|
||||
// SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a
|
||||
// weird one.
|
||||
void MASKMOVDQU(X64Reg dest, X64Reg src);
|
||||
void LDDQU(X64Reg dest, const OpArg& src);
|
||||
|
||||
|
@ -729,10 +850,10 @@ public:
|
|||
void PACKUSDW(X64Reg dest, const OpArg& arg);
|
||||
void PACKUSWB(X64Reg dest, const OpArg& arg);
|
||||
|
||||
void PUNPCKLBW(X64Reg dest, const OpArg &arg);
|
||||
void PUNPCKLWD(X64Reg dest, const OpArg &arg);
|
||||
void PUNPCKLDQ(X64Reg dest, const OpArg &arg);
|
||||
void PUNPCKLQDQ(X64Reg dest, const OpArg &arg);
|
||||
void PUNPCKLBW(X64Reg dest, const OpArg& arg);
|
||||
void PUNPCKLWD(X64Reg dest, const OpArg& arg);
|
||||
void PUNPCKLDQ(X64Reg dest, const OpArg& arg);
|
||||
void PUNPCKLQDQ(X64Reg dest, const OpArg& arg);
|
||||
|
||||
void PTEST(X64Reg dest, const OpArg& arg);
|
||||
void PAND(X64Reg dest, const OpArg& arg);
|
||||
|
@ -839,25 +960,57 @@ public:
|
|||
void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode);
|
||||
void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode);
|
||||
|
||||
void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
|
||||
void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
|
||||
void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
|
||||
void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
|
||||
void ROUNDNEARSS(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDSS(dest, arg, FROUND_NEAREST);
|
||||
}
|
||||
void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDSS(dest, arg, FROUND_FLOOR);
|
||||
}
|
||||
void ROUNDCEILSS(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDSS(dest, arg, FROUND_CEIL);
|
||||
}
|
||||
void ROUNDZEROSS(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDSS(dest, arg, FROUND_ZERO);
|
||||
}
|
||||
|
||||
void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
|
||||
void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
|
||||
void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
|
||||
void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
|
||||
void ROUNDNEARSD(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDSD(dest, arg, FROUND_NEAREST);
|
||||
}
|
||||
void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDSD(dest, arg, FROUND_FLOOR);
|
||||
}
|
||||
void ROUNDCEILSD(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDSD(dest, arg, FROUND_CEIL);
|
||||
}
|
||||
void ROUNDZEROSD(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDSD(dest, arg, FROUND_ZERO);
|
||||
}
|
||||
|
||||
void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
|
||||
void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
|
||||
void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
|
||||
void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
|
||||
void ROUNDNEARPS(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDPS(dest, arg, FROUND_NEAREST);
|
||||
}
|
||||
void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDPS(dest, arg, FROUND_FLOOR);
|
||||
}
|
||||
void ROUNDCEILPS(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDPS(dest, arg, FROUND_CEIL);
|
||||
}
|
||||
void ROUNDZEROPS(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDPS(dest, arg, FROUND_ZERO);
|
||||
}
|
||||
|
||||
void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
|
||||
void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
|
||||
void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
|
||||
void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
|
||||
void ROUNDNEARPD(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDPD(dest, arg, FROUND_NEAREST);
|
||||
}
|
||||
void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDPD(dest, arg, FROUND_FLOOR);
|
||||
}
|
||||
void ROUNDCEILPD(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDPD(dest, arg, FROUND_CEIL);
|
||||
}
|
||||
void ROUNDZEROPD(X64Reg dest, const OpArg& arg) {
|
||||
ROUNDPD(dest, arg, FROUND_ZERO);
|
||||
}
|
||||
|
||||
// AVX
|
||||
void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
|
||||
|
@ -981,7 +1134,6 @@ public:
|
|||
void ABI_CallFunctionC16(const void* func, u16 param1);
|
||||
void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2);
|
||||
|
||||
|
||||
// These only support u32 parameters, but that's enough for a lot of uses.
|
||||
// These will destroy the 1 or 2 first "parameter regs".
|
||||
void ABI_CallFunctionC(const void* func, u32 param1);
|
||||
|
@ -1012,29 +1164,38 @@ public:
|
|||
*
|
||||
* @param mask Registers to push on the stack (high 16 bits are XMMs, low 16 bits are GPRs)
|
||||
* @param rsp_alignment Current alignment of the stack pointer, must be 0 or 8
|
||||
* @param needed_frame_size Additional space needed, e.g., for function arguments passed on the stack
|
||||
* @param needed_frame_size Additional space needed, e.g., for function arguments passed on the
|
||||
* stack
|
||||
* @return Size of the shadow space, i.e., offset of the frame
|
||||
*/
|
||||
size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
|
||||
size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
|
||||
size_t needed_frame_size = 0);
|
||||
|
||||
/**
|
||||
* Restores specified registers and adjusts the stack to its original alignment, i.e., the alignment before
|
||||
* Restores specified registers and adjusts the stack to its original alignment, i.e., the
|
||||
* alignment before
|
||||
* the matching PushRegistersAndAdjustStack.
|
||||
*
|
||||
* @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are GPRs)
|
||||
* @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must be 0 or 8
|
||||
* @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are
|
||||
* GPRs)
|
||||
* @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must
|
||||
* be 0 or 8
|
||||
* @param needed_frame_size Additional space that was needed
|
||||
* @warning Stack must be currently 16-byte aligned
|
||||
*/
|
||||
void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
|
||||
|
||||
#ifdef _M_IX86
|
||||
static int ABI_GetNumXMMRegs() { return 8; }
|
||||
#else
|
||||
static int ABI_GetNumXMMRegs() { return 16; }
|
||||
#endif
|
||||
}; // class XEmitter
|
||||
void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
|
||||
size_t needed_frame_size = 0);
|
||||
|
||||
#ifdef _M_IX86
|
||||
static int ABI_GetNumXMMRegs() {
|
||||
return 8;
|
||||
}
|
||||
#else
|
||||
static int ABI_GetNumXMMRegs() {
|
||||
return 16;
|
||||
}
|
||||
#endif
|
||||
}; // class XEmitter
|
||||
|
||||
// Everything that needs to generate X86 code should inherit from this.
|
||||
// You get memory management for free, plus, you can use all the MOV etc functions without
|
||||
|
@ -1045,4 +1206,4 @@ public:
|
|||
void PoisonMemory() override;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue