CodeGen Optimisations (LSRA and Translator) (#978)

* Start of JIT garbage collection improvements

- thread static pool for Operand, MemoryOperand, Operation
- Operands and Operations are always to be constructed via their static
helper classes, so they can be pooled.
- removing LinkedList from Node for sources/destinations (replaced with
List<>s for now, but probably could do arrays since size is bounded)
- removing params constructors from Node
- LinkedList<> to List<> with Clear() for Operand assignments/uses
- ThreadStaticPool is very simple and basically just exists for the
purpose of our specific translation allocation problem. Right now it
will stay at the worst case allocation count for that thread (so far) -
the pool can never shrink.

- Still some cases of Operand[] that haven't been removed yet. Will need
to evaluate them (eg. is there a reasonable max number of params for
Calls?)

* ConcurrentStack instead of ConcurrentQueue for Rejit

* Optimize some parts of LSRA

- BitMap now operates on 64-bit int rather than 32-bit
- BitMap is now pooled in a ThreadStatic pool (within lrsa)
- BitMap now is now its own iterator. Marginally speeds up iterating
through the bits.
- A few cases where enumerators were generated have been converted to
forms that generate less garbage.
- New data structure for sorting _usePositions in LiveIntervals. Much
faster split, NextUseAfter, initial insertion. Random insertion is
slightly slower.
- That last one is WIP since you need to insert the values backwards. It
would be ideal if it just flipped it for you, uncomplicating things on
the caller side.

* Use a static pool of thread static pools. (yes.)

Prevents each execution thread creating its own lowCq pool and making me cry.

* Move constant value to top, change naming convention.

* Fix iteration of memory operands.

* Increase max thread count.

* Address Feedback
This commit is contained in:
riperiperi 2020-03-18 11:44:32 +00:00 committed by GitHub
parent 7475e180b4
commit 8226997bc7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
24 changed files with 868 additions and 347 deletions

View file

@ -5,6 +5,7 @@ using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Numerics;
namespace ARMeilleure.CodeGen.RegisterAllocators
{
@ -32,7 +33,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
private int _operationsCount;
private class AllocationContext
private class AllocationContext : IDisposable
{
public RegisterMasks Masks { get; }
@ -49,8 +50,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
StackAlloc = stackAlloc;
Masks = masks;
Active = new BitMap(intervalsCount);
Inactive = new BitMap(intervalsCount);
Active = BitMapPool.Allocate(intervalsCount);
Inactive = BitMapPool.Allocate(intervalsCount);
}
public void MoveActiveToInactive(int bit)
@ -69,6 +70,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
dest.Set(bit);
}
public void Dispose()
{
BitMapPool.Release();
}
}
public AllocationResult RunPass(
@ -121,10 +127,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
InsertSplitCopies();
InsertSplitCopiesAtEdges(cfg);
return new AllocationResult(
AllocationResult result = new AllocationResult(
context.IntUsedRegisters,
context.VecUsedRegisters,
context.StackAlloc.TotalSize);
context.Dispose();
return result;
}
private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex)
@ -618,15 +628,22 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
bool hasSingleOrNoSuccessor = block.Next == null || block.Branch == null;
foreach (BasicBlock successor in Successors(block))
for (int i = 0; i < 2; i++)
{
// This used to use an enumerable, but it ended up generating a lot of garbage, so now it is a loop.
BasicBlock successor = (i == 0) ? block.Next : block.Branch;
if (successor == null)
{
continue;
}
int succIndex = successor.Index;
// If the current node is a split node, then the actual successor node
// (the successor before the split) should be right after it.
if (IsSplitEdgeBlock(successor))
{
succIndex = Successors(successor).First().Index;
succIndex = FirstSuccessor(successor).Index;
}
CopyResolver copyResolver = new CopyResolver();
@ -699,8 +716,10 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
Operand register = GetRegister(current);
foreach (int usePosition in current.UsePositions())
IList<int> usePositions = current.UsePositions();
for (int i = usePositions.Count - 1; i >= 0; i--)
{
int usePosition = -usePositions[i];
(_, Node operation) = GetOperationNode(usePosition);
for (int index = 0; index < operation.SourcesCount; index++)
@ -743,7 +762,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
Debug.Assert(!interval.IsSpilled, "Spilled intervals are not allowed.");
return new Operand(
return OperandHelper.Register(
interval.Register.Index,
interval.Register.Type,
interval.Local.Type);
@ -778,8 +797,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
_operationNodes.Add((block.Operations, node));
foreach (Operand dest in Destinations(node))
for (int i = 0; i < node.DestinationsCount; i++)
{
Operand dest = node.GetDestination(i);
if (dest.Kind == OperandKind.LocalVariable && visited.Add(dest))
{
dest.NumberLocal(_intervals.Count);
@ -815,12 +835,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
// Compute local live sets.
for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
{
BitMap liveGen = new BitMap(mapSize);
BitMap liveKill = new BitMap(mapSize);
BitMap liveGen = BitMapPool.Allocate(mapSize);
BitMap liveKill = BitMapPool.Allocate(mapSize);
for (Node node = block.Operations.First; node != null; node = node.ListNext)
{
foreach (Operand source in Sources(node))
Sources(node, (source) =>
{
int id = GetOperandId(source);
@ -828,10 +848,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
liveGen.Set(id);
}
}
});
foreach (Operand dest in Destinations(node))
for (int i = 0; i < node.DestinationsCount; i++)
{
Operand dest = node.GetDestination(i);
liveKill.Set(GetOperandId(dest));
}
}
@ -846,8 +867,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
for (int index = 0; index < cfg.Blocks.Count; index++)
{
blkLiveIn [index] = new BitMap(mapSize);
blkLiveOut[index] = new BitMap(mapSize);
blkLiveIn [index] = BitMapPool.Allocate(mapSize);
blkLiveOut[index] = BitMapPool.Allocate(mapSize);
}
bool modified;
@ -862,12 +883,10 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
BitMap liveOut = blkLiveOut[block.Index];
foreach (BasicBlock successor in Successors(block))
if ((block.Next != null && liveOut.Set(blkLiveIn[block.Next.Index])) ||
(block.Branch != null && liveOut.Set(blkLiveIn[block.Branch.Index])))
{
if (liveOut.Set(blkLiveIn[successor.Index]))
{
modified = true;
}
modified = true;
}
BitMap liveIn = blkLiveIn[block.Index];
@ -920,21 +939,22 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
operationPos -= InstructionGap;
foreach (Operand dest in Destinations(node))
for (int i = 0; i < node.DestinationsCount; i++)
{
Operand dest = node.GetDestination(i);
LiveInterval interval = _intervals[GetOperandId(dest)];
interval.SetStart(operationPos + 1);
interval.AddUsePosition(operationPos + 1);
}
foreach (Operand source in Sources(node))
Sources(node, (source) =>
{
LiveInterval interval = _intervals[GetOperandId(source)];
interval.AddRange(blockStart, operationPos + 1);
interval.AddUsePosition(operationPos);
}
});
if (node is Operation operation && operation.Instruction == Instruction.Call)
{
@ -949,7 +969,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
while (mask != 0)
{
int regIndex = BitUtils.LowestBitSet(mask);
int regIndex = BitOperations.TrailingZeroCount(mask);
Register callerSavedReg = new Register(regIndex, regType);
@ -982,17 +1002,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0);
}
private static IEnumerable<BasicBlock> Successors(BasicBlock block)
private static BasicBlock FirstSuccessor(BasicBlock block)
{
if (block.Next != null)
{
yield return block.Next;
}
if (block.Branch != null)
{
yield return block.Branch;
}
return block.Next ?? block.Branch;
}
private static IEnumerable<Node> BottomOperations(BasicBlock block)
@ -1007,15 +1019,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
}
}
private static IEnumerable<Operand> Destinations(Node node)
{
for (int index = 0; index < node.DestinationsCount; index++)
{
yield return node.GetDestination(index);
}
}
private static IEnumerable<Operand> Sources(Node node)
private static void Sources(Node node, Action<Operand> action)
{
for (int index = 0; index < node.SourcesCount; index++)
{
@ -1023,7 +1027,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
if (IsLocalOrRegister(source.Kind))
{
yield return source;
action(source);
}
else if (source.Kind == OperandKind.Memory)
{
@ -1031,12 +1035,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
if (memOp.BaseAddress != null)
{
yield return memOp.BaseAddress;
action(memOp.BaseAddress);
}
if (memOp.Index != null)
{
yield return memOp.Index;
action(memOp.Index);
}
}
}