CodeGen Optimisations (LSRA and Translator) (#978)
* Start of JIT garbage collection improvements - thread static pool for Operand, MemoryOperand, Operation - Operands and Operations are always to be constructed via their static helper classes, so they can be pooled. - removing LinkedList from Node for sources/destinations (replaced with List<>s for now, but probably could do arrays since size is bounded) - removing params constructors from Node - LinkedList<> to List<> with Clear() for Operand assignments/uses - ThreadStaticPool is very simple and basically just exists for the purpose of our specific translation allocation problem. Right now it will stay at the worst case allocation count for that thread (so far) - the pool can never shrink. - Still some cases of Operand[] that haven't been removed yet. Will need to evaluate them (eg. is there a reasonable max number of params for Calls?) * ConcurrentStack instead of ConcurrentQueue for Rejit * Optimize some parts of LSRA - BitMap now operates on 64-bit int rather than 32-bit - BitMap is now pooled in a ThreadStatic pool (within lrsa) - BitMap now is now its own iterator. Marginally speeds up iterating through the bits. - A few cases where enumerators were generated have been converted to forms that generate less garbage. - New data structure for sorting _usePositions in LiveIntervals. Much faster split, NextUseAfter, initial insertion. Random insertion is slightly slower. - That last one is WIP since you need to insert the values backwards. It would be ideal if it just flipped it for you, uncomplicating things on the caller side. * Use a static pool of thread static pools. (yes.) Prevents each execution thread creating its own lowCq pool and making me cry. * Move constant value to top, change naming convention. * Fix iteration of memory operands. * Increase max thread count. * Address Feedback
This commit is contained in:
parent
7475e180b4
commit
8226997bc7
24 changed files with 868 additions and 347 deletions
|
@ -9,6 +9,9 @@ using System;
|
|||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Numerics;
|
||||
|
||||
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace ARMeilleure.CodeGen.X86
|
||||
{
|
||||
|
@ -550,7 +553,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3.
|
||||
{
|
||||
MemoryOperand memOp = new MemoryOperand(OperandType.I64, src1);
|
||||
MemoryOperand memOp = MemoryOp(OperandType.I64, src1);
|
||||
|
||||
context.Assembler.Cmpxchg16b(memOp);
|
||||
}
|
||||
|
@ -561,7 +564,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
EnsureSameType(src2, src3);
|
||||
|
||||
MemoryOperand memOp = new MemoryOperand(src3.Type, src1);
|
||||
MemoryOperand memOp = MemoryOp(src3.Type, src1);
|
||||
|
||||
context.Assembler.Cmpxchg(memOp, src3);
|
||||
}
|
||||
|
@ -751,7 +754,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
// operand size constant to the destination register.
|
||||
context.JumpToNear(X86Condition.NotEqual);
|
||||
|
||||
context.Assembler.Mov(dest, new Operand(operandSize | operandMask), OperandType.I32);
|
||||
context.Assembler.Mov(dest, Const(operandSize | operandMask), OperandType.I32);
|
||||
|
||||
context.JumpHere();
|
||||
|
||||
|
@ -759,7 +762,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
// starting from the least significant bit. However we are supposed to
|
||||
// return the number of 0 bits on the high end. So, we invert the result
|
||||
// of the BSR using XOR to get the correct value.
|
||||
context.Assembler.Xor(dest, new Operand(operandMask), OperandType.I32);
|
||||
context.Assembler.Xor(dest, Const(operandMask), OperandType.I32);
|
||||
}
|
||||
|
||||
private static void GenerateCpuId(CodeGenContext context, Operation operation)
|
||||
|
@ -828,7 +831,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
Operand rsp = Register(X86Register.Rsp);
|
||||
|
||||
MemoryOperand memOp = new MemoryOperand(dest.Type, rsp, null, Multiplier.x1, offs);
|
||||
MemoryOperand memOp = MemoryOp(dest.Type, rsp, null, Multiplier.x1, offs);
|
||||
|
||||
GenerateLoad(context, memOp, dest);
|
||||
}
|
||||
|
@ -1027,7 +1030,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
Operand rsp = Register(X86Register.Rsp);
|
||||
|
||||
MemoryOperand memOp = new MemoryOperand(source.Type, rsp, null, Multiplier.x1, offs);
|
||||
MemoryOperand memOp = MemoryOp(source.Type, rsp, null, Multiplier.x1, offs);
|
||||
|
||||
GenerateStore(context, memOp, source);
|
||||
}
|
||||
|
@ -1043,7 +1046,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
Operand rsp = Register(X86Register.Rsp);
|
||||
|
||||
MemoryOperand memOp = new MemoryOperand(OperandType.I64, rsp, null, Multiplier.x1, offs);
|
||||
MemoryOperand memOp = MemoryOp(OperandType.I64, rsp, null, Multiplier.x1, offs);
|
||||
|
||||
context.Assembler.Lea(dest, memOp, OperandType.I64);
|
||||
}
|
||||
|
@ -1247,7 +1250,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
if ((index & 1) != 0)
|
||||
{
|
||||
context.Assembler.Shr(dest, new Operand(8), OperandType.I32);
|
||||
context.Assembler.Shr(dest, Const(8), OperandType.I32);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1286,7 +1289,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
context.Assembler.Pinsrw(dest, dest, src2, (byte)(index * words + word));
|
||||
|
||||
// Move next word down.
|
||||
context.Assembler.Ror(src2, new Operand(16), src2.Type);
|
||||
context.Assembler.Ror(src2, Const(16), src2.Type);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1594,7 +1597,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
while (mask != 0)
|
||||
{
|
||||
int bit = BitUtils.LowestBitSet(mask);
|
||||
int bit = BitOperations.TrailingZeroCount(mask);
|
||||
|
||||
context.Assembler.Push(Register((X86Register)bit));
|
||||
|
||||
|
@ -1614,7 +1617,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
if (reservedStackSize != 0)
|
||||
{
|
||||
context.Assembler.Sub(rsp, new Operand(reservedStackSize), OperandType.I64);
|
||||
context.Assembler.Sub(rsp, Const(reservedStackSize), OperandType.I64);
|
||||
}
|
||||
|
||||
int offset = reservedStackSize;
|
||||
|
@ -1623,11 +1626,11 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
while (mask != 0)
|
||||
{
|
||||
int bit = BitUtils.LowestBitSet(mask);
|
||||
int bit = BitOperations.TrailingZeroCount(mask);
|
||||
|
||||
offset -= 16;
|
||||
|
||||
MemoryOperand memOp = new MemoryOperand(OperandType.V128, rsp, null, Multiplier.x1, offset);
|
||||
MemoryOperand memOp = MemoryOp(OperandType.V128, rsp, null, Multiplier.x1, offset);
|
||||
|
||||
context.Assembler.Movdqu(memOp, Xmm((X86Register)bit));
|
||||
|
||||
|
@ -1653,11 +1656,11 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
while (mask != 0)
|
||||
{
|
||||
int bit = BitUtils.LowestBitSet(mask);
|
||||
int bit = BitOperations.TrailingZeroCount(mask);
|
||||
|
||||
offset -= 16;
|
||||
|
||||
MemoryOperand memOp = new MemoryOperand(OperandType.V128, rsp, null, Multiplier.x1, offset);
|
||||
MemoryOperand memOp = MemoryOp(OperandType.V128, rsp, null, Multiplier.x1, offset);
|
||||
|
||||
context.Assembler.Movdqu(Xmm((X86Register)bit), memOp);
|
||||
|
||||
|
@ -1666,7 +1669,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
if (reservedStackSize != 0)
|
||||
{
|
||||
context.Assembler.Add(rsp, new Operand(reservedStackSize), OperandType.I64);
|
||||
context.Assembler.Add(rsp, Const(reservedStackSize), OperandType.I64);
|
||||
}
|
||||
|
||||
mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
|
||||
|
@ -1698,7 +1701,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
for (int offset = PageSize; offset < size; offset += PageSize)
|
||||
{
|
||||
Operand memOp = new MemoryOperand(OperandType.I32, rsp, null, Multiplier.x1, -offset);
|
||||
Operand memOp = MemoryOp(OperandType.I32, rsp, null, Multiplier.x1, -offset);
|
||||
|
||||
context.Assembler.Mov(temp, memOp, OperandType.I32);
|
||||
}
|
||||
|
@ -1711,17 +1714,17 @@ namespace ARMeilleure.CodeGen.X86
|
|||
return operand as MemoryOperand;
|
||||
}
|
||||
|
||||
return new MemoryOperand(type, operand);
|
||||
return MemoryOp(type, operand);
|
||||
}
|
||||
|
||||
private static Operand Register(X86Register register, OperandType type = OperandType.I64)
|
||||
{
|
||||
return new Operand((int)register, RegisterType.Integer, type);
|
||||
return OperandHelper.Register((int)register, RegisterType.Integer, type);
|
||||
}
|
||||
|
||||
private static Operand Xmm(X86Register register)
|
||||
{
|
||||
return new Operand((int)register, RegisterType.Vector, OperandType.V128);
|
||||
return OperandHelper.Register((int)register, RegisterType.Vector, OperandType.V128);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue