Add support for guest Fz (Fpcr) mode through host Ftz and Daz (Mxcsr) modes (fast paths). (#1630)
* Add support for guest Fz (Fpcr) mode through host Ftz and Daz (Mxcsr) modes (fast paths). * Ptc.InternalVersion = 1630 * Nits. * Address comments. * Update Ptc.cs * Address comment.
This commit is contained in:
parent
668720b088
commit
567ea726e1
14 changed files with 221 additions and 27 deletions
|
@ -72,7 +72,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
{
|
||||
_instTable = new InstructionInfo[(int)X86Instruction.Count];
|
||||
|
||||
// Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags
|
||||
// Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags
|
||||
Add(X86Instruction.Add, new InstructionInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstructionFlags.None));
|
||||
Add(X86Instruction.Addpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Addps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex));
|
||||
|
@ -131,6 +131,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None));
|
||||
Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Jmp, new InstructionInfo(0x040000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
|
||||
Add(X86Instruction.Ldmxcsr, new InstructionInfo(0x02000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex));
|
||||
Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None));
|
||||
Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex));
|
||||
|
@ -257,6 +258,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(X86Instruction.Sqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex));
|
||||
Add(X86Instruction.Sqrtsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2));
|
||||
Add(X86Instruction.Sqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3));
|
||||
Add(X86Instruction.Stmxcsr, new InstructionInfo(0x03000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex));
|
||||
Add(X86Instruction.Sub, new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstructionFlags.None));
|
||||
Add(X86Instruction.Subpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Subps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex));
|
||||
|
@ -526,6 +528,11 @@ namespace ARMeilleure.CodeGen.X86
|
|||
WriteInstruction(dest, null, OperandType.None, X86Instruction.Jmp);
|
||||
}
|
||||
|
||||
public void Ldmxcsr(Operand dest)
|
||||
{
|
||||
WriteInstruction(dest, null, OperandType.I32, X86Instruction.Ldmxcsr);
|
||||
}
|
||||
|
||||
public void Lea(Operand dest, Operand source, OperandType type)
|
||||
{
|
||||
WriteInstruction(dest, source, type, X86Instruction.Lea);
|
||||
|
@ -796,6 +803,11 @@ namespace ARMeilleure.CodeGen.X86
|
|||
WriteOpCode(dest, null, null, OperandType.None, info.Flags, info.OpRRM | (int)condition);
|
||||
}
|
||||
|
||||
public void Stmxcsr(Operand dest)
|
||||
{
|
||||
WriteInstruction(dest, null, OperandType.I32, X86Instruction.Stmxcsr);
|
||||
}
|
||||
|
||||
public void Sub(Operand dest, Operand source, OperandType type)
|
||||
{
|
||||
WriteInstruction(dest, source, type, X86Instruction.Sub);
|
||||
|
|
|
@ -250,6 +250,40 @@ namespace ARMeilleure.CodeGen.X86
|
|||
break;
|
||||
}
|
||||
|
||||
case IntrinsicType.Mxcsr:
|
||||
{
|
||||
Operand offset = operation.GetSource(0);
|
||||
Operand bits = operation.GetSource(1);
|
||||
|
||||
Debug.Assert(offset.Kind == OperandKind.Constant && bits.Kind == OperandKind.Constant);
|
||||
Debug.Assert(offset.Type == OperandType.I32 && bits.Type == OperandType.I32);
|
||||
|
||||
int offs = offset.AsInt32() + context.CallArgsRegionSize;
|
||||
|
||||
Operand rsp = Register(X86Register.Rsp);
|
||||
|
||||
MemoryOperand memOp = MemoryOp(OperandType.I32, rsp, null, Multiplier.x1, offs);
|
||||
|
||||
Debug.Assert(HardwareCapabilities.SupportsSse || HardwareCapabilities.SupportsVexEncoding);
|
||||
|
||||
context.Assembler.Stmxcsr(memOp);
|
||||
|
||||
if (intrinOp.Intrinsic == Intrinsic.X86Mxcsrmb)
|
||||
{
|
||||
context.Assembler.Or(memOp, bits, OperandType.I32);
|
||||
}
|
||||
else /* if (intrinOp.Intrinsic == Intrinsic.X86Mxcsrub) */
|
||||
{
|
||||
Operand notBits = Const(~bits.AsInt32());
|
||||
|
||||
context.Assembler.And(memOp, notBits, OperandType.I32);
|
||||
}
|
||||
|
||||
context.Assembler.Ldmxcsr(memOp);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case IntrinsicType.PopCount:
|
||||
{
|
||||
Operand dest = operation.Destination;
|
||||
|
|
|
@ -76,6 +76,8 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Mxcsrmb, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr)); // Mask bits.
|
||||
Add(Intrinsic.X86Mxcsrub, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr)); // Unmask bits.
|
||||
Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
|
||||
|
|
|
@ -3,6 +3,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
enum IntrinsicType
|
||||
{
|
||||
Comis_,
|
||||
Mxcsr,
|
||||
PopCount,
|
||||
Unary,
|
||||
UnaryToGpr,
|
||||
|
|
|
@ -114,6 +114,16 @@ namespace ARMeilleure.CodeGen.X86
|
|||
node = HandleVectorInsert8(block.Operations, node, operation);
|
||||
}
|
||||
break;
|
||||
|
||||
case Instruction.Extended:
|
||||
IntrinsicOperation intrinOp = (IntrinsicOperation)operation;
|
||||
|
||||
if (intrinOp.Intrinsic == Intrinsic.X86Mxcsrmb || intrinOp.Intrinsic == Intrinsic.X86Mxcsrub)
|
||||
{
|
||||
int stackOffset = stackAlloc.Allocate(OperandType.I32);
|
||||
operation.SetSources(new Operand[] { Const(stackOffset), operation.GetSource(0) });
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
{
|
||||
enum X86Instruction
|
||||
{
|
||||
None,
|
||||
Add,
|
||||
Addpd,
|
||||
Addps,
|
||||
|
@ -60,6 +61,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Imul128,
|
||||
Insertps,
|
||||
Jmp,
|
||||
Ldmxcsr,
|
||||
Lea,
|
||||
Maxpd,
|
||||
Maxps,
|
||||
|
@ -186,6 +188,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Sqrtps,
|
||||
Sqrtsd,
|
||||
Sqrtss,
|
||||
Stmxcsr,
|
||||
Sub,
|
||||
Subpd,
|
||||
Subps,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue