Add multi-level function table (#2228)
* Add AddressTable<T> * Use AddressTable<T> for dispatch * Remove JumpTable & co. * Add fallback for out of range addresses * Add PPTC support * Add documentation to `AddressTable<T>` * Make AddressTable<T> configurable * Fix table walk * Fix IsMapped check * Remove CountTableCapacity * Add PPTC support for fast path * Rename IsMapped to IsValid * Remove stale comment * Change format of address in exception message * Add TranslatorStubs * Split DispatchStub Avoids recompilation of stubs during tests. * Add hint for 64bit or 32bit * Add documentation to `Symbol` * Add documentation to `TranslatorStubs` Make `TranslatorStubs` disposable as well. * Add documentation to `SymbolType` * Add `AddressTableEventSource` to monitor function table size Add an EventSource which measures the amount of unmanaged bytes allocated by AddressTable<T> instances. dotnet-counters monitor -n Ryujinx --counters ARMeilleure * Add `AllowLcqInFunctionTable` optimization toggle This is to reduce the impact this change has on the test duration. Before everytime a test was ran, the FunctionTable would be initialized and populated so that the newly compiled test would get registered to it. * Implement unmanaged dispatcher Uses the DispatchStub to dispatch into the next translation, which allows execution to stay in unmanaged for longer and skips a ConcurrentDictionary look up when the target translation has been registered to the FunctionTable. * Remove redundant null check * Tune levels of FunctionTable Uses 5 levels instead of 4 and change unit of AddressTableEventSource from KB to MB. * Use 64-bit function table Improves codegen for direct branches: mov qword [rax+0x408],0x10603560 - mov rcx,sub_10603560_OFFSET - mov ecx,[rcx] - mov ecx,ecx - mov rdx,JIT_CACHE_BASE - add rdx,rcx + mov rcx,sub_10603560 + mov rdx,[rcx] mov rcx,rax Improves codegen for dispatch stub: and rax,byte +0x1f - mov eax,[rcx+rax*4] - mov eax,eax - mov rcx,JIT_CACHE_BASE - lea rax,[rcx+rax] + mov rax,[rcx+rax*8] mov rcx,rbx * Remove `JitCacheSymbol` & `JitCache.Offset` * Turn `Translator.Translate` into an instance method We do not have to add more parameter to this method and related ones as new structures are added & needed for translation. * Add symbol only when PTC is enabled Address LDj3SNuD's feedback * Change `NativeContext.Running` to a 32-bit integer * Fix PageTable symbol for host mapped
This commit is contained in:
parent
f3b0b4831c
commit
9d7627af64
36 changed files with 1020 additions and 1272 deletions
|
@ -24,12 +24,27 @@ namespace ARMeilleure.Translation
|
|||
{
|
||||
public class Translator
|
||||
{
|
||||
private const int CountTableCapacity = 4 * 1024 * 1024;
|
||||
private static readonly AddressTable<ulong>.Level[] Levels64Bit =
|
||||
new AddressTable<ulong>.Level[]
|
||||
{
|
||||
new(31, 17),
|
||||
new(23, 8),
|
||||
new(15, 8),
|
||||
new( 7, 8),
|
||||
new( 2, 5)
|
||||
};
|
||||
|
||||
private static readonly AddressTable<ulong>.Level[] Levels32Bit =
|
||||
new AddressTable<ulong>.Level[]
|
||||
{
|
||||
new(31, 17),
|
||||
new(23, 8),
|
||||
new(15, 8),
|
||||
new( 7, 8),
|
||||
new( 1, 6)
|
||||
};
|
||||
|
||||
private readonly IJitMemoryAllocator _allocator;
|
||||
private readonly IMemoryManager _memory;
|
||||
|
||||
private readonly ConcurrentDictionary<ulong, TranslatedFunction> _funcs;
|
||||
private readonly ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>> _oldFuncs;
|
||||
|
||||
private readonly ConcurrentDictionary<ulong, object> _backgroundSet;
|
||||
|
@ -37,21 +52,22 @@ namespace ARMeilleure.Translation
|
|||
private readonly AutoResetEvent _backgroundTranslatorEvent;
|
||||
private readonly ReaderWriterLock _backgroundTranslatorLock;
|
||||
|
||||
private JumpTable _jumpTable;
|
||||
internal JumpTable JumpTable => _jumpTable;
|
||||
internal ConcurrentDictionary<ulong, TranslatedFunction> Functions { get; }
|
||||
internal AddressTable<ulong> FunctionTable { get; }
|
||||
internal EntryTable<uint> CountTable { get; }
|
||||
internal TranslatorStubs Stubs { get; }
|
||||
internal IMemoryManager Memory { get; }
|
||||
|
||||
private volatile int _threadCount;
|
||||
|
||||
// FIXME: Remove this once the init logic of the emulator will be redone.
|
||||
public static readonly ManualResetEvent IsReadyForTranslation = new(false);
|
||||
|
||||
public Translator(IJitMemoryAllocator allocator, IMemoryManager memory)
|
||||
public Translator(IJitMemoryAllocator allocator, IMemoryManager memory, bool for64Bits)
|
||||
{
|
||||
_allocator = allocator;
|
||||
_memory = memory;
|
||||
Memory = memory;
|
||||
|
||||
_funcs = new ConcurrentDictionary<ulong, TranslatedFunction>();
|
||||
_oldFuncs = new ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>>();
|
||||
|
||||
_backgroundSet = new ConcurrentDictionary<ulong, object>();
|
||||
|
@ -59,11 +75,14 @@ namespace ARMeilleure.Translation
|
|||
_backgroundTranslatorEvent = new AutoResetEvent(false);
|
||||
_backgroundTranslatorLock = new ReaderWriterLock();
|
||||
|
||||
CountTable = new EntryTable<uint>();
|
||||
|
||||
JitCache.Initialize(allocator);
|
||||
|
||||
DirectCallStubs.InitializeStubs();
|
||||
CountTable = new EntryTable<uint>();
|
||||
Functions = new ConcurrentDictionary<ulong, TranslatedFunction>();
|
||||
FunctionTable = new AddressTable<ulong>(for64Bits ? Levels64Bit : Levels32Bit);
|
||||
Stubs = new TranslatorStubs(this);
|
||||
|
||||
FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub;
|
||||
|
||||
if (memory.Type.IsHostMapped())
|
||||
{
|
||||
|
@ -80,27 +99,21 @@ namespace ARMeilleure.Translation
|
|||
if (_backgroundStack.TryPop(out RejitRequest request) &&
|
||||
_backgroundSet.TryRemove(request.Address, out _))
|
||||
{
|
||||
TranslatedFunction func = Translate(
|
||||
_memory,
|
||||
_jumpTable,
|
||||
CountTable,
|
||||
request.Address,
|
||||
request.Mode,
|
||||
highCq: true);
|
||||
TranslatedFunction func = Translate(request.Address, request.Mode, highCq: true);
|
||||
|
||||
_funcs.AddOrUpdate(request.Address, func, (key, oldFunc) =>
|
||||
Functions.AddOrUpdate(request.Address, func, (key, oldFunc) =>
|
||||
{
|
||||
EnqueueForDeletion(key, oldFunc);
|
||||
return func;
|
||||
});
|
||||
|
||||
_jumpTable.RegisterFunction(request.Address, func);
|
||||
|
||||
if (PtcProfiler.Enabled)
|
||||
{
|
||||
PtcProfiler.UpdateEntry(request.Address, request.Mode, highCq: true);
|
||||
}
|
||||
|
||||
RegisterFunction(request.Address, func);
|
||||
|
||||
_backgroundTranslatorLock.ReleaseReaderLock();
|
||||
}
|
||||
else
|
||||
|
@ -120,14 +133,11 @@ namespace ARMeilleure.Translation
|
|||
{
|
||||
IsReadyForTranslation.WaitOne();
|
||||
|
||||
Debug.Assert(_jumpTable == null);
|
||||
_jumpTable = new JumpTable(_allocator);
|
||||
|
||||
if (Ptc.State == PtcState.Enabled)
|
||||
{
|
||||
Debug.Assert(_funcs.Count == 0);
|
||||
Ptc.LoadTranslations(_funcs, _memory, _jumpTable, CountTable);
|
||||
Ptc.MakeAndSaveTranslations(_funcs, _memory, _jumpTable, CountTable);
|
||||
Debug.Assert(Functions.Count == 0);
|
||||
Ptc.LoadTranslations(this);
|
||||
Ptc.MakeAndSaveTranslations(this);
|
||||
}
|
||||
|
||||
PtcProfiler.Start();
|
||||
|
@ -160,13 +170,20 @@ namespace ARMeilleure.Translation
|
|||
|
||||
Statistics.InitializeTimer();
|
||||
|
||||
NativeInterface.RegisterThread(context, _memory, this);
|
||||
NativeInterface.RegisterThread(context, Memory, this);
|
||||
|
||||
do
|
||||
if (Optimizations.UseUnmanagedDispatchLoop)
|
||||
{
|
||||
address = ExecuteSingle(context, address);
|
||||
Stubs.DispatchLoop(context.NativeContextPtr, address);
|
||||
}
|
||||
else
|
||||
{
|
||||
do
|
||||
{
|
||||
address = ExecuteSingle(context, address);
|
||||
}
|
||||
while (context.Running && address != 0);
|
||||
}
|
||||
while (context.Running && address != 0);
|
||||
|
||||
NativeInterface.UnregisterThread();
|
||||
|
||||
|
@ -178,9 +195,8 @@ namespace ARMeilleure.Translation
|
|||
|
||||
DisposePools();
|
||||
|
||||
_jumpTable.Dispose();
|
||||
_jumpTable = null;
|
||||
|
||||
Stubs.Dispose();
|
||||
FunctionTable.Dispose();
|
||||
CountTable.Dispose();
|
||||
|
||||
GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
|
||||
|
@ -202,40 +218,51 @@ namespace ARMeilleure.Translation
|
|||
|
||||
internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
|
||||
{
|
||||
if (!_funcs.TryGetValue(address, out TranslatedFunction func))
|
||||
if (!Functions.TryGetValue(address, out TranslatedFunction func))
|
||||
{
|
||||
func = Translate(_memory, _jumpTable, CountTable, address, mode, highCq: false);
|
||||
func = Translate(address, mode, highCq: false);
|
||||
|
||||
TranslatedFunction getFunc = _funcs.GetOrAdd(address, func);
|
||||
TranslatedFunction oldFunc = Functions.GetOrAdd(address, func);
|
||||
|
||||
if (getFunc != func)
|
||||
if (oldFunc != func)
|
||||
{
|
||||
JitCache.Unmap(func.FuncPtr);
|
||||
func = getFunc;
|
||||
func = oldFunc;
|
||||
}
|
||||
|
||||
if (PtcProfiler.Enabled)
|
||||
{
|
||||
PtcProfiler.AddEntry(address, mode, highCq: false);
|
||||
}
|
||||
|
||||
RegisterFunction(address, func);
|
||||
}
|
||||
|
||||
return func;
|
||||
}
|
||||
|
||||
internal static TranslatedFunction Translate(
|
||||
IMemoryManager memory,
|
||||
JumpTable jumpTable,
|
||||
EntryTable<uint> countTable,
|
||||
ulong address,
|
||||
ExecutionMode mode,
|
||||
bool highCq)
|
||||
internal void RegisterFunction(ulong guestAddress, TranslatedFunction func)
|
||||
{
|
||||
var context = new ArmEmitterContext(memory, jumpTable, countTable, address, highCq, Aarch32Mode.User);
|
||||
if (FunctionTable.IsValid(guestAddress) && (Optimizations.AllowLcqInFunctionTable || func.HighCq))
|
||||
{
|
||||
Volatile.Write(ref FunctionTable.GetValue(guestAddress), (ulong)func.FuncPtr);
|
||||
}
|
||||
}
|
||||
|
||||
internal TranslatedFunction Translate(ulong address, ExecutionMode mode, bool highCq)
|
||||
{
|
||||
var context = new ArmEmitterContext(
|
||||
Memory,
|
||||
CountTable,
|
||||
FunctionTable,
|
||||
Stubs,
|
||||
address,
|
||||
highCq,
|
||||
mode: Aarch32Mode.User);
|
||||
|
||||
Logger.StartPass(PassName.Decoding);
|
||||
|
||||
Block[] blocks = Decoder.Decode(memory, address, mode, highCq, singleBlock: false);
|
||||
Block[] blocks = Decoder.Decode(Memory, address, mode, highCq, singleBlock: false);
|
||||
|
||||
Logger.EndPass(PassName.Decoding);
|
||||
|
||||
|
@ -268,7 +295,7 @@ namespace ARMeilleure.Translation
|
|||
|
||||
GuestFunction func;
|
||||
|
||||
if (Ptc.State == PtcState.Disabled)
|
||||
if (!context.HasPtc)
|
||||
{
|
||||
func = Compiler.Compile<GuestFunction>(cfg, argTypes, OperandType.I64, options);
|
||||
|
||||
|
@ -282,7 +309,7 @@ namespace ARMeilleure.Translation
|
|||
|
||||
ResetPool(highCq ? 1 : 0);
|
||||
|
||||
Hash128 hash = Ptc.ComputeHash(memory, address, funcSize);
|
||||
Hash128 hash = Ptc.ComputeHash(Memory, address, funcSize);
|
||||
|
||||
Ptc.WriteInfoCodeRelocUnwindInfo(address, funcSize, hash, highCq, ptcInfo);
|
||||
}
|
||||
|
@ -360,7 +387,11 @@ namespace ARMeilleure.Translation
|
|||
|
||||
if (block.Exit)
|
||||
{
|
||||
InstEmitFlowHelper.EmitTailContinue(context, Const(block.Address));
|
||||
// Left option here as it may be useful if we need to return to managed rather than tail call in
|
||||
// future. (eg. for debug)
|
||||
bool useReturns = false;
|
||||
|
||||
InstEmitFlowHelper.EmitVirtualJump(context, Const(block.Address), isReturn: useReturns);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -416,7 +447,10 @@ namespace ARMeilleure.Translation
|
|||
|
||||
Operand lblEnd = Label();
|
||||
|
||||
Operand address = Const(ref counter.Value, Ptc.CountTableIndex);
|
||||
Operand address = !context.HasPtc ?
|
||||
Const(ref counter.Value) :
|
||||
Const(ref counter.Value, Ptc.CountTableSymbol);
|
||||
|
||||
Operand curCount = context.Load(OperandType.I32, address);
|
||||
Operand count = context.Add(curCount, Const(1));
|
||||
context.Store(address, count);
|
||||
|
@ -477,14 +511,14 @@ namespace ARMeilleure.Translation
|
|||
// Ensure no attempt will be made to compile new functions due to rejit.
|
||||
ClearRejitQueue(allowRequeue: false);
|
||||
|
||||
foreach (var func in _funcs.Values)
|
||||
foreach (var func in Functions.Values)
|
||||
{
|
||||
JitCache.Unmap(func.FuncPtr);
|
||||
|
||||
func.CallCounter?.Dispose();
|
||||
}
|
||||
|
||||
_funcs.Clear();
|
||||
Functions.Clear();
|
||||
|
||||
while (_oldFuncs.TryDequeue(out var kv))
|
||||
{
|
||||
|
@ -502,7 +536,7 @@ namespace ARMeilleure.Translation
|
|||
{
|
||||
while (_backgroundStack.TryPop(out var request))
|
||||
{
|
||||
if (_funcs.TryGetValue(request.Address, out var func) && func.CallCounter != null)
|
||||
if (Functions.TryGetValue(request.Address, out var func) && func.CallCounter != null)
|
||||
{
|
||||
Volatile.Write(ref func.CallCounter.Value, 0);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue