Add a new JIT compiler for CPU code (#693)

* Start of the ARMeilleure project

* Refactoring around the old IRAdapter, now renamed to PreAllocator

* Optimize the LowestBitSet method

* Add CLZ support and fix CLS implementation

* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks

* Implement the ByteSwap IR instruction, and some refactoring on the assembler

* Implement the DivideUI IR instruction and fix 64-bits IDIV

* Correct constant operand type on CSINC

* Move division instructions implementation to InstEmitDiv

* Fix destination type for the ConditionalSelect IR instruction

* Implement UMULH and SMULH, with new IR instructions

* Fix some issues with shift instructions

* Fix constant types for BFM instructions

* Fix up new tests using the new V128 struct

* Update tests

* Move DIV tests to a separate file

* Add support for calls, and some instructions that depends on them

* Start adding support for SIMD & FP types, along with some of the related ARM instructions

* Fix some typos and the divide instruction with FP operands

* Fix wrong method call on Clz_V

* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes

* Implement SIMD logical instructions and more misc. fixes

* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations

* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes

* Implement SIMD shift instruction and fix Dup_V

* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table

* Fix check with tolerance on tester

* Implement FP & SIMD comparison instructions, and some fixes

* Update FCVT (Scalar) encoding on the table to support the Half-float variants

* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes

* Use old memory access methods, made a start on SIMD memory insts support, some fixes

* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes

* Fix arguments count with struct return values, other fixes

* More instructions

* Misc. fixes and integrate LDj3SNuD fixes

* Update tests

* Add a faster linear scan allocator, unwinding support on windows, and other changes

* Update Ryujinx.HLE

* Update Ryujinx.Graphics

* Fix V128 return pointer passing, RCX is clobbered

* Update Ryujinx.Tests

* Update ITimeZoneService

* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks

* Use generic GetFunctionPointerForDelegate method and other tweaks

* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics

* Remove some unused code on the assembler

* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler

* Add hardware capability detection

* Fix regression on Sha1h and revert Fcm** changes

* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator

* Fix silly mistake introduced on last commit on CpuId

* Generate inline stack probes when the stack allocation is too large

* Initial support for the System-V ABI

* Support multiple destination operands

* Fix SSE2 VectorInsert8 path, and other fixes

* Change placement of XMM callee save and restore code to match other compilers

* Rename Dest to Destination and Inst to Instruction

* Fix a regression related to calls and the V128 type

* Add an extra space on comments to match code style

* Some refactoring

* Fix vector insert FP32 SSE2 path

* Port over the ARM32 instructions

* Avoid memory protection races on JIT Cache

* Another fix on VectorInsert FP32 (thanks to LDj3SNuD

* Float operands don't need to use the same register when VEX is supported

* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks

* Some nits, small improvements on the pre allocator

* CpuThreadState is gone

* Allow changing CPU emulators with a config entry

* Add runtime identifiers on the ARMeilleure project

* Allow switching between CPUs through a config entry (pt. 2)

* Change win10-x64 to win-x64 on projects

* Update the Ryujinx project to use ARMeilleure

* Ensure that the selected register is valid on the hybrid allocator

* Allow exiting on returns to 0 (should fix test regression)

* Remove register assignments for most used variables on the hybrid allocator

* Do not use fixed registers as spill temp

* Add missing namespace and remove unneeded using

* Address PR feedback

* Fix types, etc

* Enable AssumeStrictAbiCompliance by default

* Ensure that Spill and Fill don't load or store any more than necessary
This commit is contained in:
gdkchan 2019-08-08 15:56:22 -03:00 committed by emmauss
parent 1ba58e9942
commit a731ab3a2a
310 changed files with 37389 additions and 2086 deletions

View file

@ -0,0 +1,153 @@
using ARMeilleure.Decoders;
using ARMeilleure.Instructions;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Memory;
using ARMeilleure.State;
using System.Collections.Generic;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.Translation
{
class ArmEmitterContext : EmitterContext
{
private Dictionary<ulong, Operand> _labels;
private OpCode _optOpLastCompare;
private OpCode _optOpLastFlagSet;
private Operand _optCmpTempN;
private Operand _optCmpTempM;
private Block _currBlock;
public Block CurrBlock
{
get
{
return _currBlock;
}
set
{
_currBlock = value;
ResetBlockState();
}
}
public OpCode CurrOp { get; set; }
public MemoryManager Memory { get; }
public Aarch32Mode Mode { get; }
public ArmEmitterContext(MemoryManager memory, Aarch32Mode mode)
{
Memory = memory;
Mode = mode;
_labels = new Dictionary<ulong, Operand>();
}
public Operand GetLabel(ulong address)
{
if (!_labels.TryGetValue(address, out Operand label))
{
label = Label();
_labels.Add(address, label);
}
return label;
}
public void MarkComparison(Operand n, Operand m)
{
_optOpLastCompare = CurrOp;
_optCmpTempN = Copy(n);
_optCmpTempM = Copy(m);
}
public void MarkFlagSet(PState stateFlag)
{
// Set this only if any of the NZCV flag bits were modified.
// This is used to ensure that when emiting a direct IL branch
// instruction for compare + branch sequences, we're not expecting
// to use comparison values from an old instruction, when in fact
// the flags were already overwritten by another instruction further along.
if (stateFlag >= PState.VFlag)
{
_optOpLastFlagSet = CurrOp;
}
}
private void ResetBlockState()
{
_optOpLastCompare = null;
_optOpLastFlagSet = null;
}
public Operand TryGetComparisonResult(Condition condition)
{
if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet)
{
return null;
}
Operand n = _optCmpTempN;
Operand m = _optCmpTempM;
InstName cmpName = _optOpLastCompare.Instruction.Name;
if (cmpName == InstName.Subs)
{
switch (condition)
{
case Condition.Eq: return ICompareEqual (n, m);
case Condition.Ne: return ICompareNotEqual (n, m);
case Condition.GeUn: return ICompareGreaterOrEqualUI(n, m);
case Condition.LtUn: return ICompareLessUI (n, m);
case Condition.GtUn: return ICompareGreaterUI (n, m);
case Condition.LeUn: return ICompareLessOrEqualUI (n, m);
case Condition.Ge: return ICompareGreaterOrEqual (n, m);
case Condition.Lt: return ICompareLess (n, m);
case Condition.Gt: return ICompareGreater (n, m);
case Condition.Le: return ICompareLessOrEqual (n, m);
}
}
else if (cmpName == InstName.Adds && _optOpLastCompare is IOpCodeAluImm op)
{
// There are several limitations that needs to be taken into account for CMN comparisons:
// - The unsigned comparisons are not valid, as they depend on the
// carry flag value, and they will have different values for addition and
// subtraction. For addition, it's carry, and for subtraction, it's borrow.
// So, we need to make sure we're not doing a unsigned compare for the CMN case.
// - We can only do the optimization for the immediate variants,
// because when the second operand value is exactly INT_MIN, we can't
// negate the value as theres no positive counterpart.
// Such invalid values can't be encoded on the immediate encodings.
if (op.RegisterSize == RegisterSize.Int32)
{
m = Const((int)-op.Immediate);
}
else
{
m = Const(-op.Immediate);
}
switch (condition)
{
case Condition.Eq: return ICompareEqual (n, m);
case Condition.Ne: return ICompareNotEqual (n, m);
case Condition.Ge: return ICompareGreaterOrEqual(n, m);
case Condition.Lt: return ICompareLess (n, m);
case Condition.Gt: return ICompareGreater (n, m);
case Condition.Le: return ICompareLessOrEqual (n, m);
}
}
return null;
}
}
}

View file

@ -0,0 +1,47 @@
using ARMeilleure.CodeGen;
using ARMeilleure.CodeGen.X86;
using ARMeilleure.Diagnostics;
using ARMeilleure.IntermediateRepresentation;
using System;
using System.Runtime.InteropServices;
namespace ARMeilleure.Translation
{
static class Compiler
{
public static T Compile<T>(
ControlFlowGraph cfg,
OperandType[] funcArgTypes,
OperandType funcReturnType,
CompilerOptions options)
{
Logger.StartPass(PassName.Dominance);
Dominance.FindDominators(cfg);
Dominance.FindDominanceFrontiers(cfg);
Logger.EndPass(PassName.Dominance);
Logger.StartPass(PassName.SsaConstruction);
if ((options & CompilerOptions.SsaForm) != 0)
{
Ssa.Construct(cfg);
}
else
{
RegisterToLocal.Rename(cfg);
}
Logger.EndPass(PassName.SsaConstruction, cfg);
CompilerContext cctx = new CompilerContext(cfg, funcArgTypes, funcReturnType, options);
CompiledFunction func = CodeGenerator.Generate(cctx);
IntPtr codePtr = JitCache.Map(func);
return Marshal.GetDelegateForFunctionPointer<T>(codePtr);
}
}
}

View file

@ -0,0 +1,26 @@
using ARMeilleure.IntermediateRepresentation;
namespace ARMeilleure.Translation
{
struct CompilerContext
{
public ControlFlowGraph Cfg { get; }
public OperandType[] FuncArgTypes { get; }
public OperandType FuncReturnType { get; }
public CompilerOptions Options { get; }
public CompilerContext(
ControlFlowGraph cfg,
OperandType[] funcArgTypes,
OperandType funcReturnType,
CompilerOptions options)
{
Cfg = cfg;
FuncArgTypes = funcArgTypes;
FuncReturnType = funcReturnType;
Options = options;
}
}
}

View file

@ -0,0 +1,16 @@
using System;
namespace ARMeilleure.Translation
{
[Flags]
enum CompilerOptions
{
None = 0,
SsaForm = 1 << 0,
Optimize = 1 << 1,
Lsra = 1 << 2,
MediumCq = SsaForm | Optimize,
HighCq = SsaForm | Optimize | Lsra
}
}

View file

@ -0,0 +1,158 @@
using ARMeilleure.IntermediateRepresentation;
using System;
using System.Collections.Generic;
using System.Diagnostics;
namespace ARMeilleure.Translation
{
class ControlFlowGraph
{
public BasicBlock Entry { get; }
public LinkedList<BasicBlock> Blocks { get; }
public BasicBlock[] PostOrderBlocks { get; }
public int[] PostOrderMap { get; }
public ControlFlowGraph(BasicBlock entry, LinkedList<BasicBlock> blocks)
{
Entry = entry;
Blocks = blocks;
RemoveUnreachableBlocks(blocks);
HashSet<BasicBlock> visited = new HashSet<BasicBlock>();
Stack<BasicBlock> blockStack = new Stack<BasicBlock>();
PostOrderBlocks = new BasicBlock[blocks.Count];
PostOrderMap = new int[blocks.Count];
visited.Add(entry);
blockStack.Push(entry);
int index = 0;
while (blockStack.TryPop(out BasicBlock block))
{
if (block.Next != null && visited.Add(block.Next))
{
blockStack.Push(block);
blockStack.Push(block.Next);
}
else if (block.Branch != null && visited.Add(block.Branch))
{
blockStack.Push(block);
blockStack.Push(block.Branch);
}
else
{
PostOrderMap[block.Index] = index;
PostOrderBlocks[index++] = block;
}
}
}
private void RemoveUnreachableBlocks(LinkedList<BasicBlock> blocks)
{
HashSet<BasicBlock> visited = new HashSet<BasicBlock>();
Queue<BasicBlock> workQueue = new Queue<BasicBlock>();
visited.Add(Entry);
workQueue.Enqueue(Entry);
while (workQueue.TryDequeue(out BasicBlock block))
{
Debug.Assert(block.Index != -1, "Invalid block index.");
if (block.Next != null && visited.Add(block.Next))
{
workQueue.Enqueue(block.Next);
}
if (block.Branch != null && visited.Add(block.Branch))
{
workQueue.Enqueue(block.Branch);
}
}
if (visited.Count < blocks.Count)
{
// Remove unreachable blocks and renumber.
int index = 0;
for (LinkedListNode<BasicBlock> node = blocks.First; node != null;)
{
LinkedListNode<BasicBlock> nextNode = node.Next;
BasicBlock block = node.Value;
if (!visited.Contains(block))
{
block.Next = null;
block.Branch = null;
blocks.Remove(node);
}
else
{
block.Index = index++;
}
node = nextNode;
}
}
}
public BasicBlock SplitEdge(BasicBlock predecessor, BasicBlock successor)
{
BasicBlock splitBlock = new BasicBlock(Blocks.Count);
if (predecessor.Next == successor)
{
predecessor.Next = splitBlock;
}
if (predecessor.Branch == successor)
{
predecessor.Branch = splitBlock;
}
if (splitBlock.Predecessors.Count == 0)
{
throw new ArgumentException("Predecessor and successor are not connected.");
}
// Insert the new block on the list of blocks.
BasicBlock succPrev = successor.Node.Previous?.Value;
if (succPrev != null && succPrev != predecessor && succPrev.Next == successor)
{
// Can't insert after the predecessor or before the successor.
// Here, we insert it before the successor by also spliting another
// edge (the one between the block before "successor" and "successor").
BasicBlock splitBlock2 = new BasicBlock(splitBlock.Index + 1);
succPrev.Next = splitBlock2;
splitBlock2.Branch = successor;
splitBlock2.Operations.AddLast(new Operation(Instruction.Branch, null));
Blocks.AddBefore(successor.Node, splitBlock2);
}
splitBlock.Next = successor;
Blocks.AddBefore(successor.Node, splitBlock);
return splitBlock;
}
}
}

View file

@ -0,0 +1,26 @@
using System;
using System.Collections.Concurrent;
using System.Reflection;
namespace ARMeilleure.Translation
{
static class DelegateCache
{
private static ConcurrentDictionary<string, Delegate> _delegates;
static DelegateCache()
{
_delegates = new ConcurrentDictionary<string, Delegate>();
}
public static Delegate GetOrAdd(Delegate dlg)
{
return _delegates.GetOrAdd(GetKey(dlg.Method), (key) => dlg);
}
private static string GetKey(MethodInfo info)
{
return $"{info.DeclaringType.FullName}.{info.Name}";
}
}
}

View file

@ -0,0 +1,95 @@
using ARMeilleure.IntermediateRepresentation;
using System.Diagnostics;
namespace ARMeilleure.Translation
{
static class Dominance
{
// Those methods are an implementation of the algorithms on "A Simple, Fast Dominance Algorithm".
// https://www.cs.rice.edu/~keith/EMBED/dom.pdf
public static void FindDominators(ControlFlowGraph cfg)
{
BasicBlock Intersect(BasicBlock block1, BasicBlock block2)
{
while (block1 != block2)
{
while (cfg.PostOrderMap[block1.Index] < cfg.PostOrderMap[block2.Index])
{
block1 = block1.ImmediateDominator;
}
while (cfg.PostOrderMap[block2.Index] < cfg.PostOrderMap[block1.Index])
{
block2 = block2.ImmediateDominator;
}
}
return block1;
}
cfg.Entry.ImmediateDominator = cfg.Entry;
Debug.Assert(cfg.Entry == cfg.PostOrderBlocks[cfg.PostOrderBlocks.Length - 1]);
bool modified;
do
{
modified = false;
for (int blkIndex = cfg.PostOrderBlocks.Length - 2; blkIndex >= 0; blkIndex--)
{
BasicBlock block = cfg.PostOrderBlocks[blkIndex];
BasicBlock newIDom = null;
foreach (BasicBlock predecessor in block.Predecessors)
{
if (predecessor.ImmediateDominator != null)
{
if (newIDom != null)
{
newIDom = Intersect(predecessor, newIDom);
}
else
{
newIDom = predecessor;
}
}
}
if (block.ImmediateDominator != newIDom)
{
block.ImmediateDominator = newIDom;
modified = true;
}
}
}
while (modified);
}
public static void FindDominanceFrontiers(ControlFlowGraph cfg)
{
foreach (BasicBlock block in cfg.Blocks)
{
if (block.Predecessors.Count < 2)
{
continue;
}
for (int pBlkIndex = 0; pBlkIndex < block.Predecessors.Count; pBlkIndex++)
{
BasicBlock current = block.Predecessors[pBlkIndex];
while (current != block.ImmediateDominator)
{
current.DominanceFrontiers.Add(block);
current = current.ImmediateDominator;
}
}
}
}
}
}

View file

@ -0,0 +1,562 @@
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.Translation
{
class EmitterContext
{
private Dictionary<Operand, BasicBlock> _irLabels;
private LinkedList<BasicBlock> _irBlocks;
private BasicBlock _irBlock;
private bool _needsNewBlock;
public EmitterContext()
{
_irLabels = new Dictionary<Operand, BasicBlock>();
_irBlocks = new LinkedList<BasicBlock>();
_needsNewBlock = true;
}
public Operand Add(Operand op1, Operand op2)
{
return Add(Instruction.Add, Local(op1.Type), op1, op2);
}
public Operand BitwiseAnd(Operand op1, Operand op2)
{
return Add(Instruction.BitwiseAnd, Local(op1.Type), op1, op2);
}
public Operand BitwiseExclusiveOr(Operand op1, Operand op2)
{
return Add(Instruction.BitwiseExclusiveOr, Local(op1.Type), op1, op2);
}
public Operand BitwiseNot(Operand op1)
{
return Add(Instruction.BitwiseNot, Local(op1.Type), op1);
}
public Operand BitwiseOr(Operand op1, Operand op2)
{
return Add(Instruction.BitwiseOr, Local(op1.Type), op1, op2);
}
public void Branch(Operand label)
{
Add(Instruction.Branch, null);
BranchToLabel(label);
}
public void BranchIfFalse(Operand label, Operand op1)
{
Add(Instruction.BranchIfFalse, null, op1);
BranchToLabel(label);
}
public void BranchIfTrue(Operand label, Operand op1)
{
Add(Instruction.BranchIfTrue, null, op1);
BranchToLabel(label);
}
public Operand ByteSwap(Operand op1)
{
return Add(Instruction.ByteSwap, Local(op1.Type), op1);
}
public Operand Call(Delegate func, params Operand[] callArgs)
{
// Add the delegate to the cache to ensure it will not be garbage collected.
func = DelegateCache.GetOrAdd(func);
IntPtr ptr = Marshal.GetFunctionPointerForDelegate<Delegate>(func);
OperandType returnType = GetOperandType(func.Method.ReturnType);
return Call(Const(ptr.ToInt64()), returnType, callArgs);
}
private static Dictionary<TypeCode, OperandType> _typeCodeToOperandTypeMap =
new Dictionary<TypeCode, OperandType>()
{
{ TypeCode.Boolean, OperandType.I32 },
{ TypeCode.Byte, OperandType.I32 },
{ TypeCode.Char, OperandType.I32 },
{ TypeCode.Double, OperandType.FP64 },
{ TypeCode.Int16, OperandType.I32 },
{ TypeCode.Int32, OperandType.I32 },
{ TypeCode.Int64, OperandType.I64 },
{ TypeCode.SByte, OperandType.I32 },
{ TypeCode.Single, OperandType.FP32 },
{ TypeCode.UInt16, OperandType.I32 },
{ TypeCode.UInt32, OperandType.I32 },
{ TypeCode.UInt64, OperandType.I64 }
};
private static OperandType GetOperandType(Type type)
{
if (_typeCodeToOperandTypeMap.TryGetValue(Type.GetTypeCode(type), out OperandType ot))
{
return ot;
}
else if (type == typeof(V128))
{
return OperandType.V128;
}
else if (type == typeof(void))
{
return OperandType.None;
}
throw new ArgumentException($"Invalid type \"{type.Name}\".");
}
public Operand Call(Operand address, OperandType returnType, params Operand[] callArgs)
{
Operand[] args = new Operand[callArgs.Length + 1];
args[0] = address;
Array.Copy(callArgs, 0, args, 1, callArgs.Length);
if (returnType != OperandType.None)
{
return Add(Instruction.Call, Local(returnType), args);
}
else
{
return Add(Instruction.Call, null, args);
}
}
public Operand CompareAndSwap128(Operand address, Operand expected, Operand desired)
{
return Add(Instruction.CompareAndSwap128, Local(OperandType.V128), address, expected, desired);
}
public Operand ConditionalSelect(Operand op1, Operand op2, Operand op3)
{
return Add(Instruction.ConditionalSelect, Local(op2.Type), op1, op2, op3);
}
public Operand ConvertI64ToI32(Operand op1)
{
if (op1.Type != OperandType.I64)
{
throw new ArgumentException($"Invalid operand type \"{op1.Type}\".");
}
return Add(Instruction.ConvertI64ToI32, Local(OperandType.I32), op1);
}
public Operand ConvertToFP(OperandType type, Operand op1)
{
return Add(Instruction.ConvertToFP, Local(type), op1);
}
public Operand ConvertToFPUI(OperandType type, Operand op1)
{
return Add(Instruction.ConvertToFPUI, Local(type), op1);
}
public Operand Copy(Operand op1)
{
return Add(Instruction.Copy, Local(op1.Type), op1);
}
public Operand Copy(Operand dest, Operand op1)
{
if (dest.Kind != OperandKind.Register)
{
throw new ArgumentException($"Invalid dest operand kind \"{dest.Kind}\".");
}
return Add(Instruction.Copy, dest, op1);
}
public Operand CountLeadingZeros(Operand op1)
{
return Add(Instruction.CountLeadingZeros, Local(op1.Type), op1);
}
internal Operand CpuId()
{
return Add(Instruction.CpuId, Local(OperandType.I64));
}
public Operand Divide(Operand op1, Operand op2)
{
return Add(Instruction.Divide, Local(op1.Type), op1, op2);
}
public Operand DivideUI(Operand op1, Operand op2)
{
return Add(Instruction.DivideUI, Local(op1.Type), op1, op2);
}
public Operand ICompareEqual(Operand op1, Operand op2)
{
return Add(Instruction.CompareEqual, Local(OperandType.I32), op1, op2);
}
public Operand ICompareGreater(Operand op1, Operand op2)
{
return Add(Instruction.CompareGreater, Local(OperandType.I32), op1, op2);
}
public Operand ICompareGreaterOrEqual(Operand op1, Operand op2)
{
return Add(Instruction.CompareGreaterOrEqual, Local(OperandType.I32), op1, op2);
}
public Operand ICompareGreaterOrEqualUI(Operand op1, Operand op2)
{
return Add(Instruction.CompareGreaterOrEqualUI, Local(OperandType.I32), op1, op2);
}
public Operand ICompareGreaterUI(Operand op1, Operand op2)
{
return Add(Instruction.CompareGreaterUI, Local(OperandType.I32), op1, op2);
}
public Operand ICompareLess(Operand op1, Operand op2)
{
return Add(Instruction.CompareLess, Local(OperandType.I32), op1, op2);
}
public Operand ICompareLessOrEqual(Operand op1, Operand op2)
{
return Add(Instruction.CompareLessOrEqual, Local(OperandType.I32), op1, op2);
}
public Operand ICompareLessOrEqualUI(Operand op1, Operand op2)
{
return Add(Instruction.CompareLessOrEqualUI, Local(OperandType.I32), op1, op2);
}
public Operand ICompareLessUI(Operand op1, Operand op2)
{
return Add(Instruction.CompareLessUI, Local(OperandType.I32), op1, op2);
}
public Operand ICompareNotEqual(Operand op1, Operand op2)
{
return Add(Instruction.CompareNotEqual, Local(OperandType.I32), op1, op2);
}
public Operand Load(OperandType type, Operand address)
{
return Add(Instruction.Load, Local(type), address);
}
public Operand Load16(Operand address)
{
return Add(Instruction.Load16, Local(OperandType.I32), address);
}
public Operand Load8(Operand address)
{
return Add(Instruction.Load8, Local(OperandType.I32), address);
}
public Operand LoadArgument(OperandType type, int index)
{
return Add(Instruction.LoadArgument, Local(type), Const(index));
}
public void LoadFromContext()
{
_needsNewBlock = true;
Add(Instruction.LoadFromContext);
}
public Operand Multiply(Operand op1, Operand op2)
{
return Add(Instruction.Multiply, Local(op1.Type), op1, op2);
}
public Operand Multiply64HighSI(Operand op1, Operand op2)
{
return Add(Instruction.Multiply64HighSI, Local(OperandType.I64), op1, op2);
}
public Operand Multiply64HighUI(Operand op1, Operand op2)
{
return Add(Instruction.Multiply64HighUI, Local(OperandType.I64), op1, op2);
}
public Operand Negate(Operand op1)
{
return Add(Instruction.Negate, Local(op1.Type), op1);
}
public void Return()
{
Add(Instruction.Return);
_needsNewBlock = true;
}
public void Return(Operand op1)
{
Add(Instruction.Return, null, op1);
_needsNewBlock = true;
}
public Operand RotateRight(Operand op1, Operand op2)
{
return Add(Instruction.RotateRight, Local(op1.Type), op1, op2);
}
public Operand ShiftLeft(Operand op1, Operand op2)
{
return Add(Instruction.ShiftLeft, Local(op1.Type), op1, op2);
}
public Operand ShiftRightSI(Operand op1, Operand op2)
{
return Add(Instruction.ShiftRightSI, Local(op1.Type), op1, op2);
}
public Operand ShiftRightUI(Operand op1, Operand op2)
{
return Add(Instruction.ShiftRightUI, Local(op1.Type), op1, op2);
}
public Operand SignExtend16(OperandType type, Operand op1)
{
return Add(Instruction.SignExtend16, Local(type), op1);
}
public Operand SignExtend32(OperandType type, Operand op1)
{
return Add(Instruction.SignExtend32, Local(type), op1);
}
public Operand SignExtend8(OperandType type, Operand op1)
{
return Add(Instruction.SignExtend8, Local(type), op1);
}
public void Store(Operand address, Operand value)
{
Add(Instruction.Store, null, address, value);
}
public void Store16(Operand address, Operand value)
{
Add(Instruction.Store16, null, address, value);
}
public void Store8(Operand address, Operand value)
{
Add(Instruction.Store8, null, address, value);
}
public void StoreToContext()
{
Add(Instruction.StoreToContext);
_needsNewBlock = true;
}
public Operand Subtract(Operand op1, Operand op2)
{
return Add(Instruction.Subtract, Local(op1.Type), op1, op2);
}
public Operand VectorCreateScalar(Operand value)
{
return Add(Instruction.VectorCreateScalar, Local(OperandType.V128), value);
}
public Operand VectorExtract(OperandType type, Operand vector, int index)
{
return Add(Instruction.VectorExtract, Local(type), vector, Const(index));
}
public Operand VectorExtract16(Operand vector, int index)
{
return Add(Instruction.VectorExtract16, Local(OperandType.I32), vector, Const(index));
}
public Operand VectorExtract8(Operand vector, int index)
{
return Add(Instruction.VectorExtract8, Local(OperandType.I32), vector, Const(index));
}
public Operand VectorInsert(Operand vector, Operand value, int index)
{
return Add(Instruction.VectorInsert, Local(OperandType.V128), vector, value, Const(index));
}
public Operand VectorInsert16(Operand vector, Operand value, int index)
{
return Add(Instruction.VectorInsert16, Local(OperandType.V128), vector, value, Const(index));
}
public Operand VectorInsert8(Operand vector, Operand value, int index)
{
return Add(Instruction.VectorInsert8, Local(OperandType.V128), vector, value, Const(index));
}
public Operand VectorZero()
{
return Add(Instruction.VectorZero, Local(OperandType.V128));
}
public Operand VectorZeroUpper64(Operand vector)
{
return Add(Instruction.VectorZeroUpper64, Local(OperandType.V128), vector);
}
public Operand VectorZeroUpper96(Operand vector)
{
return Add(Instruction.VectorZeroUpper96, Local(OperandType.V128), vector);
}
public Operand ZeroExtend16(OperandType type, Operand op1)
{
return Add(Instruction.ZeroExtend16, Local(type), op1);
}
public Operand ZeroExtend32(OperandType type, Operand op1)
{
return Add(Instruction.ZeroExtend32, Local(type), op1);
}
public Operand ZeroExtend8(OperandType type, Operand op1)
{
return Add(Instruction.ZeroExtend8, Local(type), op1);
}
private Operand Add(Instruction inst, Operand dest = null, params Operand[] sources)
{
if (_needsNewBlock)
{
NewNextBlock();
}
Operation operation = new Operation(inst, dest, sources);
_irBlock.Operations.AddLast(operation);
return dest;
}
public Operand AddIntrinsic(Intrinsic intrin, params Operand[] args)
{
return Add(intrin, Local(OperandType.V128), args);
}
public Operand AddIntrinsicInt(Intrinsic intrin, params Operand[] args)
{
return Add(intrin, Local(OperandType.I32), args);
}
public Operand AddIntrinsicLong(Intrinsic intrin, params Operand[] args)
{
return Add(intrin, Local(OperandType.I64), args);
}
private Operand Add(Intrinsic intrin, Operand dest, params Operand[] sources)
{
if (_needsNewBlock)
{
NewNextBlock();
}
IntrinsicOperation operation = new IntrinsicOperation(intrin, dest, sources);
_irBlock.Operations.AddLast(operation);
return dest;
}
private void BranchToLabel(Operand label)
{
if (!_irLabels.TryGetValue(label, out BasicBlock branchBlock))
{
branchBlock = new BasicBlock();
_irLabels.Add(label, branchBlock);
}
_irBlock.Branch = branchBlock;
_needsNewBlock = true;
}
public void MarkLabel(Operand label)
{
if (_irLabels.TryGetValue(label, out BasicBlock nextBlock))
{
nextBlock.Index = _irBlocks.Count;
nextBlock.Node = _irBlocks.AddLast(nextBlock);
NextBlock(nextBlock);
}
else
{
NewNextBlock();
_irLabels.Add(label, _irBlock);
}
}
private void NewNextBlock()
{
BasicBlock block = new BasicBlock(_irBlocks.Count);
block.Node = _irBlocks.AddLast(block);
NextBlock(block);
}
private void NextBlock(BasicBlock nextBlock)
{
if (_irBlock != null && !EndsWithUnconditional(_irBlock))
{
_irBlock.Next = nextBlock;
}
_irBlock = nextBlock;
_needsNewBlock = false;
}
private static bool EndsWithUnconditional(BasicBlock block)
{
Operation lastOp = block.GetLastOp() as Operation;
if (lastOp == null)
{
return false;
}
return lastOp.Instruction == Instruction.Branch ||
lastOp.Instruction == Instruction.Return;
}
public ControlFlowGraph GetControlFlowGraph()
{
return new ControlFlowGraph(_irBlocks.First.Value, _irBlocks);
}
}
}

View file

@ -0,0 +1,6 @@
using System;
namespace ARMeilleure.Translation
{
delegate ulong GuestFunction(IntPtr nativeContextPtr);
}

View file

@ -0,0 +1,9 @@
using ARMeilleure.State;
namespace ARMeilleure.Translation
{
public interface ITranslator
{
void Execute(IExecutionContext context, ulong address);
}
}

View file

@ -0,0 +1,135 @@
using ARMeilleure.CodeGen;
using ARMeilleure.Memory;
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
namespace ARMeilleure.Translation
{
static class JitCache
{
private const int PageSize = 4 * 1024;
private const int PageMask = PageSize - 1;
private const int CodeAlignment = 4; // Bytes
private const int CacheSize = 512 * 1024 * 1024;
private static IntPtr _basePointer;
private static int _offset;
private static List<JitCacheEntry> _cacheEntries;
private static object _lock;
static JitCache()
{
_basePointer = MemoryManagement.Allocate(CacheSize);
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
JitUnwindWindows.InstallFunctionTableHandler(_basePointer, CacheSize);
// The first page is used for the table based SEH structs.
_offset = PageSize;
}
_cacheEntries = new List<JitCacheEntry>();
_lock = new object();
}
public static IntPtr Map(CompiledFunction func)
{
byte[] code = func.Code;
lock (_lock)
{
int funcOffset = Allocate(code.Length);
IntPtr funcPtr = _basePointer + funcOffset;
Marshal.Copy(code, 0, funcPtr, code.Length);
ReprotectRange(funcOffset, code.Length);
Add(new JitCacheEntry(funcOffset, code.Length, func.UnwindInfo));
return funcPtr;
}
}
private static void ReprotectRange(int offset, int size)
{
// Map pages that are already full as RX.
// Map pages that are not full yet as RWX.
// On unix, the address must be page aligned.
int endOffs = offset + size;
int pageStart = offset & ~PageMask;
int pageEnd = endOffs & ~PageMask;
int fullPagesSize = pageEnd - pageStart;
if (fullPagesSize != 0)
{
IntPtr funcPtr = _basePointer + pageStart;
MemoryManagement.Reprotect(funcPtr, (ulong)fullPagesSize, MemoryProtection.ReadAndExecute);
}
int remaining = endOffs - pageEnd;
if (remaining != 0)
{
IntPtr funcPtr = _basePointer + pageEnd;
MemoryManagement.Reprotect(funcPtr, (ulong)remaining, MemoryProtection.ReadWriteExecute);
}
}
private static int Allocate(int codeSize)
{
codeSize = checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
int allocOffset = _offset;
_offset += codeSize;
if ((ulong)(uint)_offset > CacheSize)
{
throw new OutOfMemoryException();
}
return allocOffset;
}
private static void Add(JitCacheEntry entry)
{
_cacheEntries.Add(entry);
}
public static bool TryFind(int offset, out JitCacheEntry entry)
{
lock (_lock)
{
foreach (JitCacheEntry cacheEntry in _cacheEntries)
{
int endOffset = cacheEntry.Offset + cacheEntry.Size;
if (offset >= cacheEntry.Offset && offset < endOffset)
{
entry = cacheEntry;
return true;
}
}
}
entry = default(JitCacheEntry);
return false;
}
}
}

View file

@ -0,0 +1,19 @@
using ARMeilleure.CodeGen.Unwinding;
namespace ARMeilleure.Translation
{
struct JitCacheEntry
{
public int Offset { get; }
public int Size { get; }
public UnwindInfo UnwindInfo { get; }
public JitCacheEntry(int offset, int size, UnwindInfo unwindInfo)
{
Offset = offset;
Size = size;
UnwindInfo = unwindInfo;
}
}
}

View file

@ -0,0 +1,164 @@
using ARMeilleure.IntermediateRepresentation;
using System;
using System.Runtime.InteropServices;
namespace ARMeilleure.Translation
{
static class JitUnwindWindows
{
private const int MaxUnwindCodesArraySize = 9 + 10 * 2 + 3;
private struct RuntimeFunction
{
public uint BeginAddress;
public uint EndAddress;
public uint UnwindData;
}
private struct UnwindInfo
{
public byte VersionAndFlags;
public byte SizeOfProlog;
public byte CountOfUnwindCodes;
public byte FrameRegister;
public unsafe fixed ushort UnwindCodes[MaxUnwindCodesArraySize];
}
private enum UnwindOperation
{
PushNonvol = 0,
AllocLarge = 1,
AllocSmall = 2,
SetFpreg = 3,
SaveNonvol = 4,
SaveNonvolFar = 5,
SaveXmm128 = 8,
SaveXmm128Far = 9,
PushMachframe = 10
}
private unsafe delegate RuntimeFunction* GetRuntimeFunctionCallback(ulong controlPc, IntPtr context);
[DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
private static unsafe extern bool RtlInstallFunctionTableCallback(
ulong tableIdentifier,
ulong baseAddress,
uint length,
GetRuntimeFunctionCallback callback,
IntPtr context,
string outOfProcessCallbackDll);
private static GetRuntimeFunctionCallback _getRuntimeFunctionCallback;
private static int _sizeOfRuntimeFunction;
private unsafe static RuntimeFunction* _runtimeFunction;
private unsafe static UnwindInfo* _unwindInfo;
public static void InstallFunctionTableHandler(IntPtr codeCachePointer, uint codeCacheLength)
{
ulong codeCachePtr = (ulong)codeCachePointer.ToInt64();
_sizeOfRuntimeFunction = Marshal.SizeOf<RuntimeFunction>();
bool result;
unsafe
{
_runtimeFunction = (RuntimeFunction*)codeCachePointer;
_unwindInfo = (UnwindInfo*)(codeCachePointer + _sizeOfRuntimeFunction);
_getRuntimeFunctionCallback = new GetRuntimeFunctionCallback(FunctionTableHandler);
result = RtlInstallFunctionTableCallback(
codeCachePtr | 3,
codeCachePtr,
codeCacheLength,
_getRuntimeFunctionCallback,
codeCachePointer,
null);
}
if (!result)
{
throw new InvalidOperationException("Failure installing function table callback.");
}
}
private static unsafe RuntimeFunction* FunctionTableHandler(ulong controlPc, IntPtr context)
{
int offset = (int)((long)controlPc - context.ToInt64());
if (!JitCache.TryFind(offset, out JitCacheEntry funcEntry))
{
// Not found.
return null;
}
var unwindInfo = funcEntry.UnwindInfo;
int codeIndex = 0;
int spOffset = unwindInfo.FixedAllocSize;
foreach (var entry in unwindInfo.PushEntries)
{
if (entry.Type == RegisterType.Vector)
{
spOffset -= 16;
}
}
for (int index = unwindInfo.PushEntries.Length - 1; index >= 0; index--)
{
var entry = unwindInfo.PushEntries[index];
if (entry.Type == RegisterType.Vector)
{
ushort uwop = PackUwop(UnwindOperation.SaveXmm128, entry.StreamEndOffset, entry.Index);
_unwindInfo->UnwindCodes[codeIndex++] = uwop;
_unwindInfo->UnwindCodes[codeIndex++] = (ushort)spOffset;
spOffset += 16;
}
}
_unwindInfo->UnwindCodes[0] = PackUwop(UnwindOperation.AllocLarge, unwindInfo.PrologueSize, 1);
_unwindInfo->UnwindCodes[1] = (ushort)(unwindInfo.FixedAllocSize >> 0);
_unwindInfo->UnwindCodes[2] = (ushort)(unwindInfo.FixedAllocSize >> 16);
codeIndex += 3;
for (int index = unwindInfo.PushEntries.Length - 1; index >= 0; index--)
{
var entry = unwindInfo.PushEntries[index];
if (entry.Type == RegisterType.Integer)
{
ushort uwop = PackUwop(UnwindOperation.PushNonvol, entry.StreamEndOffset, entry.Index);
_unwindInfo->UnwindCodes[codeIndex++] = uwop;
}
}
_unwindInfo->VersionAndFlags = 1;
_unwindInfo->SizeOfProlog = (byte)unwindInfo.PrologueSize;
_unwindInfo->CountOfUnwindCodes = (byte)codeIndex;
_unwindInfo->FrameRegister = 0;
_runtimeFunction->BeginAddress = (uint)funcEntry.Offset;
_runtimeFunction->EndAddress = (uint)(funcEntry.Offset + funcEntry.Size);
_runtimeFunction->UnwindData = (uint)_sizeOfRuntimeFunction;
return _runtimeFunction;
}
private static ushort PackUwop(UnwindOperation uwop, int prologOffset, int opInfo)
{
return (ushort)(prologOffset | ((int)uwop << 8) | (opInfo << 12));
}
}
}

View file

@ -0,0 +1,39 @@
using System.Collections.Concurrent;
namespace ARMeilleure.Translation
{
class PriorityQueue<T>
{
private ConcurrentQueue<T>[] _queues;
public PriorityQueue(int priorities)
{
_queues = new ConcurrentQueue<T>[priorities];
for (int index = 0; index < priorities; index++)
{
_queues[index] = new ConcurrentQueue<T>();
}
}
public void Enqueue(int priority, T value)
{
_queues[priority].Enqueue(value);
}
public bool TryDequeue(out T value)
{
for (int index = 0; index < _queues.Length; index++)
{
if (_queues[index].TryDequeue(out value))
{
return true;
}
}
value = default(T);
return false;
}
}
}

View file

@ -0,0 +1,52 @@
using ARMeilleure.IntermediateRepresentation;
using System.Collections.Generic;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.Translation
{
static class RegisterToLocal
{
public static void Rename(ControlFlowGraph cfg)
{
Dictionary<Register, Operand> registerToLocalMap = new Dictionary<Register, Operand>();
Operand GetLocal(Operand op)
{
Register register = op.GetRegister();
if (!registerToLocalMap.TryGetValue(register, out Operand local))
{
local = Local(op.Type);
registerToLocalMap.Add(register, local);
}
return local;
}
foreach (BasicBlock block in cfg.Blocks)
{
foreach (Node node in block.Operations)
{
Operand dest = node.Destination;
if (dest != null && dest.Kind == OperandKind.Register)
{
node.Destination = GetLocal(dest);
}
for (int index = 0; index < node.SourcesCount; index++)
{
Operand source = node.GetSource(index);
if (source.Kind == OperandKind.Register)
{
node.SetSource(index, GetLocal(source));
}
}
}
}
}
}
}

View file

@ -0,0 +1,413 @@
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
using System;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.Translation
{
static class RegisterUsage
{
private const long CallerSavedIntRegistersMask = 0x7fL << 9;
private const long PStateNzcvFlagsMask = 0xfL << 60;
private const long CallerSavedVecRegistersMask = 0xffffL << 16;
private const int RegsCount = 32;
private const int RegsMask = RegsCount - 1;
private struct RegisterMask : IEquatable<RegisterMask>
{
public long IntMask { get; set; }
public long VecMask { get; set; }
public RegisterMask(long intMask, long vecMask)
{
IntMask = intMask;
VecMask = vecMask;
}
public static RegisterMask operator &(RegisterMask x, RegisterMask y)
{
return new RegisterMask(x.IntMask & y.IntMask, x.VecMask & y.VecMask);
}
public static RegisterMask operator |(RegisterMask x, RegisterMask y)
{
return new RegisterMask(x.IntMask | y.IntMask, x.VecMask | y.VecMask);
}
public static RegisterMask operator ~(RegisterMask x)
{
return new RegisterMask(~x.IntMask, ~x.VecMask);
}
public static bool operator ==(RegisterMask x, RegisterMask y)
{
return x.Equals(y);
}
public static bool operator !=(RegisterMask x, RegisterMask y)
{
return !x.Equals(y);
}
public override bool Equals(object obj)
{
return obj is RegisterMask regMask && Equals(regMask);
}
public bool Equals(RegisterMask other)
{
return IntMask == other.IntMask && VecMask == other.VecMask;
}
public override int GetHashCode()
{
return HashCode.Combine(IntMask, VecMask);
}
}
public static void RunPass(ControlFlowGraph cfg, bool isCompleteFunction)
{
// Compute local register inputs and outputs used inside blocks.
RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Count];
RegisterMask[] localOutputs = new RegisterMask[cfg.Blocks.Count];
foreach (BasicBlock block in cfg.Blocks)
{
foreach (Node node in block.Operations)
{
Operation operation = node as Operation;
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
{
Operand source = operation.GetSource(srcIndex);
if (source.Kind != OperandKind.Register)
{
continue;
}
Register register = source.GetRegister();
localInputs[block.Index] |= GetMask(register) & ~localOutputs[block.Index];
}
if (operation.Destination != null && operation.Destination.Kind == OperandKind.Register)
{
localOutputs[block.Index] |= GetMask(operation.Destination.GetRegister());
}
}
}
// Compute global register inputs and outputs used across blocks.
RegisterMask[] globalCmnOutputs = new RegisterMask[cfg.Blocks.Count];
RegisterMask[] globalInputs = new RegisterMask[cfg.Blocks.Count];
RegisterMask[] globalOutputs = new RegisterMask[cfg.Blocks.Count];
bool modified;
bool firstPass = true;
do
{
modified = false;
// Compute register outputs.
for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
{
BasicBlock block = cfg.PostOrderBlocks[index];
if (block.Predecessors.Count != 0 && !HasContextLoad(block))
{
BasicBlock predecessor = block.Predecessors[0];
RegisterMask cmnOutputs = localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
RegisterMask outputs = globalOutputs[predecessor.Index];
for (int pIndex = 1; pIndex < block.Predecessors.Count; pIndex++)
{
predecessor = block.Predecessors[pIndex];
cmnOutputs &= localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
outputs |= globalOutputs[predecessor.Index];
}
globalInputs[block.Index] |= outputs & ~cmnOutputs;
if (!firstPass)
{
cmnOutputs &= globalCmnOutputs[block.Index];
}
if (Exchange(globalCmnOutputs, block.Index, cmnOutputs))
{
modified = true;
}
outputs |= localOutputs[block.Index];
if (Exchange(globalOutputs, block.Index, globalOutputs[block.Index] | outputs))
{
modified = true;
}
}
else if (Exchange(globalOutputs, block.Index, localOutputs[block.Index]))
{
modified = true;
}
}
// Compute register inputs.
for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
{
BasicBlock block = cfg.PostOrderBlocks[index];
RegisterMask inputs = localInputs[block.Index];
if (block.Next != null)
{
inputs |= globalInputs[block.Next.Index];
}
if (block.Branch != null)
{
inputs |= globalInputs[block.Branch.Index];
}
inputs &= ~globalCmnOutputs[block.Index];
if (Exchange(globalInputs, block.Index, globalInputs[block.Index] | inputs))
{
modified = true;
}
}
firstPass = false;
}
while (modified);
// Insert load and store context instructions where needed.
foreach (BasicBlock block in cfg.Blocks)
{
bool hasContextLoad = HasContextLoad(block);
if (hasContextLoad)
{
block.Operations.RemoveFirst();
}
// The only block without any predecessor should be the entry block.
// It always needs a context load as it is the first block to run.
if (block.Predecessors.Count == 0 || hasContextLoad)
{
LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector);
LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer);
}
bool hasContextStore = HasContextStore(block);
if (hasContextStore)
{
block.Operations.RemoveLast();
}
if (EndsWithReturn(block) || hasContextStore)
{
StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, isCompleteFunction);
StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, isCompleteFunction);
}
}
}
private static bool HasContextLoad(BasicBlock block)
{
return StartsWith(block, Instruction.LoadFromContext) && block.Operations.First.Value.SourcesCount == 0;
}
private static bool HasContextStore(BasicBlock block)
{
return EndsWith(block, Instruction.StoreToContext) && block.GetLastOp().SourcesCount == 0;
}
private static bool StartsWith(BasicBlock block, Instruction inst)
{
if (block.Operations.Count == 0)
{
return false;
}
return block.Operations.First.Value is Operation operation && operation.Instruction == inst;
}
private static bool EndsWith(BasicBlock block, Instruction inst)
{
if (block.Operations.Count == 0)
{
return false;
}
return block.Operations.Last.Value is Operation operation && operation.Instruction == inst;
}
private static RegisterMask GetMask(Register register)
{
long intMask = 0;
long vecMask = 0;
switch (register.Type)
{
case RegisterType.Flag: intMask = (1L << RegsCount) << register.Index; break;
case RegisterType.Integer: intMask = 1L << register.Index; break;
case RegisterType.Vector: vecMask = 1L << register.Index; break;
}
return new RegisterMask(intMask, vecMask);
}
private static bool Exchange(RegisterMask[] masks, int blkIndex, RegisterMask value)
{
RegisterMask oldValue = masks[blkIndex];
masks[blkIndex] = value;
return oldValue != value;
}
private static void LoadLocals(BasicBlock block, long inputs, RegisterType baseType)
{
Operand arg0 = Local(OperandType.I64);
for (int bit = 63; bit >= 0; bit--)
{
long mask = 1L << bit;
if ((inputs & mask) == 0)
{
continue;
}
Operand dest = GetRegFromBit(bit, baseType);
long offset = NativeContext.GetRegisterOffset(dest.GetRegister());
Operand addr = Local(OperandType.I64);
Operation loadOp = new Operation(Instruction.Load, dest, addr);
block.Operations.AddFirst(loadOp);
Operation calcOffsOp = new Operation(Instruction.Add, addr, arg0, Const(offset));
block.Operations.AddFirst(calcOffsOp);
}
Operation loadArg0 = new Operation(Instruction.LoadArgument, arg0, Const(0));
block.Operations.AddFirst(loadArg0);
}
private static void StoreLocals(BasicBlock block, long outputs, RegisterType baseType, bool isCompleteFunction)
{
if (Optimizations.AssumeStrictAbiCompliance && isCompleteFunction)
{
if (baseType == RegisterType.Integer || baseType == RegisterType.Flag)
{
outputs = ClearCallerSavedIntRegs(outputs);
}
else /* if (baseType == RegisterType.Vector) */
{
outputs = ClearCallerSavedVecRegs(outputs);
}
}
Operand arg0 = Local(OperandType.I64);
Operation loadArg0 = new Operation(Instruction.LoadArgument, arg0, Const(0));
block.Append(loadArg0);
for (int bit = 0; bit < 64; bit++)
{
long mask = 1L << bit;
if ((outputs & mask) == 0)
{
continue;
}
Operand source = GetRegFromBit(bit, baseType);
long offset = NativeContext.GetRegisterOffset(source.GetRegister());
Operand addr = Local(OperandType.I64);
Operation calcOffsOp = new Operation(Instruction.Add, addr, arg0, Const(offset));
block.Append(calcOffsOp);
Operation storeOp = new Operation(Instruction.Store, null, addr, source);
block.Append(storeOp);
}
}
private static Operand GetRegFromBit(int bit, RegisterType baseType)
{
if (bit < RegsCount)
{
return new Operand(bit, baseType, GetOperandType(baseType));
}
else if (baseType == RegisterType.Integer)
{
return new Operand(bit & RegsMask, RegisterType.Flag, OperandType.I32);
}
else
{
throw new ArgumentOutOfRangeException(nameof(bit));
}
}
private static OperandType GetOperandType(RegisterType type)
{
switch (type)
{
case RegisterType.Flag: return OperandType.I32;
case RegisterType.Integer: return OperandType.I64;
case RegisterType.Vector: return OperandType.V128;
}
throw new ArgumentException($"Invalid register type \"{type}\".");
}
private static bool EndsWithReturn(BasicBlock block)
{
if (!(block.GetLastOp() is Operation operation))
{
return false;
}
return operation.Instruction == Instruction.Return;
}
private static long ClearCallerSavedIntRegs(long mask)
{
// TODO: ARM32 support.
mask &= ~(CallerSavedIntRegistersMask | PStateNzcvFlagsMask);
return mask;
}
private static long ClearCallerSavedVecRegs(long mask)
{
// TODO: ARM32 support.
mask &= ~CallerSavedVecRegistersMask;
return mask;
}
}
}

View file

@ -0,0 +1,293 @@
using ARMeilleure.Common;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
using System.Collections.Generic;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.Translation
{
static partial class Ssa
{
private class DefMap
{
private Dictionary<Register, Operand> _map;
private BitMap _phiMasks;
public DefMap()
{
_map = new Dictionary<Register, Operand>();
_phiMasks = new BitMap(RegisterConsts.TotalCount);
}
public bool TryAddOperand(Register reg, Operand operand)
{
return _map.TryAdd(reg, operand);
}
public bool TryGetOperand(Register reg, out Operand operand)
{
return _map.TryGetValue(reg, out operand);
}
public bool AddPhi(Register reg)
{
return _phiMasks.Set(GetIdFromRegister(reg));
}
public bool HasPhi(Register reg)
{
return _phiMasks.IsSet(GetIdFromRegister(reg));
}
}
public static void Construct(ControlFlowGraph cfg)
{
DefMap[] globalDefs = new DefMap[cfg.Blocks.Count];
foreach (BasicBlock block in cfg.Blocks)
{
globalDefs[block.Index] = new DefMap();
}
Queue<BasicBlock> dfPhiBlocks = new Queue<BasicBlock>();
// First pass, get all defs and locals uses.
foreach (BasicBlock block in cfg.Blocks)
{
Operand[] localDefs = new Operand[RegisterConsts.TotalCount];
LinkedListNode<Node> node = block.Operations.First;
Operand RenameLocal(Operand operand)
{
if (operand != null && operand.Kind == OperandKind.Register)
{
Operand local = localDefs[GetIdFromRegister(operand.GetRegister())];
operand = local ?? operand;
}
return operand;
}
while (node != null)
{
if (node.Value is Operation operation)
{
for (int index = 0; index < operation.SourcesCount; index++)
{
operation.SetSource(index, RenameLocal(operation.GetSource(index)));
}
Operand dest = operation.Destination;
if (dest != null && dest.Kind == OperandKind.Register)
{
Operand local = Local(dest.Type);
localDefs[GetIdFromRegister(dest.GetRegister())] = local;
operation.Destination = local;
}
}
node = node.Next;
}
for (int index = 0; index < RegisterConsts.TotalCount; index++)
{
Operand local = localDefs[index];
if (local == null)
{
continue;
}
Register reg = GetRegisterFromId(index);
globalDefs[block.Index].TryAddOperand(reg, local);
dfPhiBlocks.Enqueue(block);
while (dfPhiBlocks.TryDequeue(out BasicBlock dfPhiBlock))
{
foreach (BasicBlock domFrontier in dfPhiBlock.DominanceFrontiers)
{
if (globalDefs[domFrontier.Index].AddPhi(reg))
{
dfPhiBlocks.Enqueue(domFrontier);
}
}
}
}
}
// Second pass, rename variables with definitions on different blocks.
foreach (BasicBlock block in cfg.Blocks)
{
Operand[] localDefs = new Operand[RegisterConsts.TotalCount];
LinkedListNode<Node> node = block.Operations.First;
Operand RenameGlobal(Operand operand)
{
if (operand != null && operand.Kind == OperandKind.Register)
{
int key = GetIdFromRegister(operand.GetRegister());
Operand local = localDefs[key];
if (local == null)
{
local = FindDef(globalDefs, block, operand);
localDefs[key] = local;
}
operand = local;
}
return operand;
}
while (node != null)
{
if (node.Value is Operation operation)
{
for (int index = 0; index < operation.SourcesCount; index++)
{
operation.SetSource(index, RenameGlobal(operation.GetSource(index)));
}
}
node = node.Next;
}
}
}
private static Operand FindDef(DefMap[] globalDefs, BasicBlock current, Operand operand)
{
if (globalDefs[current.Index].HasPhi(operand.GetRegister()))
{
return InsertPhi(globalDefs, current, operand);
}
if (current != current.ImmediateDominator)
{
return FindDefOnPred(globalDefs, current.ImmediateDominator, operand);
}
return Undef();
}
private static Operand FindDefOnPred(DefMap[] globalDefs, BasicBlock current, Operand operand)
{
BasicBlock previous;
do
{
DefMap defMap = globalDefs[current.Index];
Register reg = operand.GetRegister();
if (defMap.TryGetOperand(reg, out Operand lastDef))
{
return lastDef;
}
if (defMap.HasPhi(reg))
{
return InsertPhi(globalDefs, current, operand);
}
previous = current;
current = current.ImmediateDominator;
}
while (previous != current);
return Undef();
}
private static Operand InsertPhi(DefMap[] globalDefs, BasicBlock block, Operand operand)
{
// This block has a Phi that has not been materialized yet, but that
// would define a new version of the variable we're looking for. We need
// to materialize the Phi, add all the block/operand pairs into the Phi, and
// then use the definition from that Phi.
Operand local = Local(operand.Type);
PhiNode phi = new PhiNode(local, block.Predecessors.Count);
AddPhi(block, phi);
globalDefs[block.Index].TryAddOperand(operand.GetRegister(), local);
for (int index = 0; index < block.Predecessors.Count; index++)
{
BasicBlock predecessor = block.Predecessors[index];
phi.SetBlock(index, predecessor);
phi.SetSource(index, FindDefOnPred(globalDefs, predecessor, operand));
}
return local;
}
private static void AddPhi(BasicBlock block, PhiNode phi)
{
LinkedListNode<Node> node = block.Operations.First;
if (node != null)
{
while (node.Next?.Value is PhiNode)
{
node = node.Next;
}
}
if (node?.Value is PhiNode)
{
block.Operations.AddAfter(node, phi);
}
else
{
block.Operations.AddFirst(phi);
}
}
private static int GetIdFromRegister(Register reg)
{
if (reg.Type == RegisterType.Integer)
{
return reg.Index;
}
else if (reg.Type == RegisterType.Vector)
{
return RegisterConsts.IntRegsCount + reg.Index;
}
else /* if (reg.Type == RegisterType.Flag) */
{
return RegisterConsts.IntAndVecRegsCount + reg.Index;
}
}
private static Register GetRegisterFromId(int id)
{
if (id < RegisterConsts.IntRegsCount)
{
return new Register(id, RegisterType.Integer);
}
else if (id < RegisterConsts.IntAndVecRegsCount)
{
return new Register(id - RegisterConsts.IntRegsCount, RegisterType.Vector);
}
else /* if (id < RegisterConsts.TotalCount) */
{
return new Register(id - RegisterConsts.IntAndVecRegsCount, RegisterType.Flag);
}
}
}
}

View file

@ -0,0 +1,46 @@
using ARMeilleure.IntermediateRepresentation;
using System.Collections.Generic;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.Translation
{
static partial class Ssa
{
public static void Deconstruct(ControlFlowGraph cfg)
{
foreach (BasicBlock block in cfg.Blocks)
{
LinkedListNode<Node> node = block.Operations.First;
while (node?.Value is PhiNode phi)
{
LinkedListNode<Node> nextNode = node.Next;
Operand local = Local(phi.Destination.Type);
for (int index = 0; index < phi.SourcesCount; index++)
{
BasicBlock predecessor = phi.GetBlock(index);
Operand source = phi.GetSource(index);
predecessor.Append(new Operation(Instruction.Copy, local, source));
phi.SetSource(index, null);
}
Operation copyOp = new Operation(Instruction.Copy, phi.Destination, local);
block.Operations.AddBefore(node, copyOp);
phi.Destination = null;
block.Operations.Remove(node);
node = nextNode;
}
}
}
}
}

View file

@ -0,0 +1,30 @@
using System.Threading;
namespace ARMeilleure.Translation
{
class TranslatedFunction
{
private const int MinCallsForRejit = 100;
private GuestFunction _func;
private bool _rejit;
private int _callCount;
public TranslatedFunction(GuestFunction func, bool rejit)
{
_func = func;
_rejit = rejit;
}
public ulong Execute(State.ExecutionContext context)
{
return _func(context.NativeContextPtr);
}
public bool ShouldRejit()
{
return _rejit && Interlocked.Increment(ref _callCount) == MinCallsForRejit;
}
}
}

View file

@ -0,0 +1,253 @@
using ARMeilleure.Decoders;
using ARMeilleure.Diagnostics;
using ARMeilleure.Instructions;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Memory;
using ARMeilleure.State;
using System;
using System.Collections.Concurrent;
using System.Threading;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.Translation
{
public class Translator : ITranslator
{
private const ulong CallFlag = InstEmitFlowHelper.CallFlag;
private MemoryManager _memory;
private ConcurrentDictionary<ulong, TranslatedFunction> _funcs;
private PriorityQueue<ulong> _backgroundQueue;
private AutoResetEvent _backgroundTranslatorEvent;
private volatile int _threadCount;
public Translator(MemoryManager memory)
{
_memory = memory;
_funcs = new ConcurrentDictionary<ulong, TranslatedFunction>();
_backgroundQueue = new PriorityQueue<ulong>(2);
_backgroundTranslatorEvent = new AutoResetEvent(false);
}
private void TranslateQueuedSubs()
{
while (_threadCount != 0)
{
if (_backgroundQueue.TryDequeue(out ulong address))
{
TranslatedFunction func = Translate(address, ExecutionMode.Aarch64, highCq: true);
_funcs.AddOrUpdate(address, func, (key, oldFunc) => func);
}
else
{
_backgroundTranslatorEvent.WaitOne();
}
}
}
public void Execute(IExecutionContext ctx, ulong address)
{
State.ExecutionContext context = (State.ExecutionContext)ctx;
if (Interlocked.Increment(ref _threadCount) == 1)
{
Thread backgroundTranslatorThread = new Thread(TranslateQueuedSubs);
backgroundTranslatorThread.Priority = ThreadPriority.Lowest;
backgroundTranslatorThread.Start();
}
Statistics.InitializeTimer();
NativeInterface.RegisterThread(context, _memory);
do
{
address = ExecuteSingle(context, address);
}
while (context.Running && (address & ~1UL) != 0);
NativeInterface.UnregisterThread();
if (Interlocked.Decrement(ref _threadCount) == 0)
{
_backgroundTranslatorEvent.Set();
}
}
public ulong ExecuteSingle(State.ExecutionContext context, ulong address)
{
TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode);
Statistics.StartTimer();
ulong nextAddr = func.Execute(context);
Statistics.StopTimer(address);
return nextAddr;
}
private TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
{
// TODO: Investigate how we should handle code at unaligned addresses.
// Currently, those low bits are used to store special flags.
bool isCallTarget = (address & CallFlag) != 0;
address &= ~CallFlag;
if (!_funcs.TryGetValue(address, out TranslatedFunction func))
{
func = Translate(address, mode, highCq: false);
_funcs.TryAdd(address, func);
}
else if (isCallTarget && func.ShouldRejit())
{
_backgroundQueue.Enqueue(0, address);
_backgroundTranslatorEvent.Set();
}
return func;
}
private TranslatedFunction Translate(ulong address, ExecutionMode mode, bool highCq)
{
ArmEmitterContext context = new ArmEmitterContext(_memory, Aarch32Mode.User);
Logger.StartPass(PassName.Decoding);
Block[] blocks = highCq
? Decoder.DecodeFunction (_memory, address, mode)
: Decoder.DecodeBasicBlock(_memory, address, mode);
Logger.EndPass(PassName.Decoding);
Logger.StartPass(PassName.Translation);
EmitSynchronization(context);
if (blocks[0].Address != address)
{
context.Branch(context.GetLabel(address));
}
ControlFlowGraph cfg = EmitAndGetCFG(context, blocks);
Logger.EndPass(PassName.Translation);
Logger.StartPass(PassName.RegisterUsage);
RegisterUsage.RunPass(cfg, isCompleteFunction: false);
Logger.EndPass(PassName.RegisterUsage);
OperandType[] argTypes = new OperandType[] { OperandType.I64 };
CompilerOptions options = highCq
? CompilerOptions.HighCq
: CompilerOptions.None;
GuestFunction func = Compiler.Compile<GuestFunction>(cfg, argTypes, OperandType.I64, options);
return new TranslatedFunction(func, rejit: !highCq);
}
private static ControlFlowGraph EmitAndGetCFG(ArmEmitterContext context, Block[] blocks)
{
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
{
Block block = blocks[blkIndex];
context.CurrBlock = block;
context.MarkLabel(context.GetLabel(block.Address));
for (int opcIndex = 0; opcIndex < block.OpCodes.Count; opcIndex++)
{
OpCode opCode = block.OpCodes[opcIndex];
context.CurrOp = opCode;
bool isLastOp = opcIndex == block.OpCodes.Count - 1;
if (isLastOp && block.Branch != null && block.Branch.Address <= block.Address)
{
EmitSynchronization(context);
}
Operand lblPredicateSkip = null;
if (opCode is OpCode32 op && op.Cond < Condition.Al)
{
lblPredicateSkip = Label();
InstEmitFlowHelper.EmitCondBranch(context, lblPredicateSkip, op.Cond.Invert());
}
if (opCode.Instruction.Emitter != null)
{
opCode.Instruction.Emitter(context);
}
else
{
throw new InvalidOperationException($"Invalid instruction \"{opCode.Instruction.Name}\".");
}
if (lblPredicateSkip != null)
{
context.MarkLabel(lblPredicateSkip);
// If this is the last op on the block, and there's no "next" block
// after this one, then we have to return right now, with the address
// of the next instruction to be executed (in the case that the condition
// is false, and the branch was not taken, as all basic blocks should end
// with some kind of branch).
if (isLastOp && block.Next == null)
{
context.Return(Const(opCode.Address + (ulong)opCode.OpCodeSizeInBytes));
}
}
}
}
return context.GetControlFlowGraph();
}
private static void EmitSynchronization(EmitterContext context)
{
long countOffs = NativeContext.GetCounterOffset();
Operand countAddr = context.Add(context.LoadArgument(OperandType.I64, 0), Const(countOffs));
Operand count = context.Load(OperandType.I32, countAddr);
Operand lblNonZero = Label();
Operand lblExit = Label();
context.BranchIfTrue(lblNonZero, count);
context.Call(new _Void(NativeInterface.CheckSynchronization));
context.Branch(lblExit);
context.MarkLabel(lblNonZero);
count = context.Subtract(count, Const(1));
context.Store(countAddr, count);
context.MarkLabel(lblExit);
}
}
}