Add multi-level function table (#2228)

* Add AddressTable<T>

* Use AddressTable<T> for dispatch

* Remove JumpTable & co.

* Add fallback for out of range addresses

* Add PPTC support

* Add documentation to `AddressTable<T>`

* Make AddressTable<T> configurable

* Fix table walk

* Fix IsMapped check

* Remove CountTableCapacity

* Add PPTC support for fast path

* Rename IsMapped to IsValid

* Remove stale comment

* Change format of address in exception message

* Add TranslatorStubs

* Split DispatchStub

Avoids recompilation of stubs during tests.

* Add hint for 64bit or 32bit

* Add documentation to `Symbol`

* Add documentation to `TranslatorStubs`

Make `TranslatorStubs` disposable as well.

* Add documentation to `SymbolType`

* Add `AddressTableEventSource` to monitor function table size

Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.

 dotnet-counters monitor -n Ryujinx --counters ARMeilleure

* Add `AllowLcqInFunctionTable` optimization toggle

This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.

* Implement unmanaged dispatcher

Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.

* Remove redundant null check

* Tune levels of FunctionTable

Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.

* Use 64-bit function table

Improves codegen for direct branches:

    mov qword [rax+0x408],0x10603560
 -  mov rcx,sub_10603560_OFFSET
 -  mov ecx,[rcx]
 -  mov ecx,ecx
 -  mov rdx,JIT_CACHE_BASE
 -  add rdx,rcx
 +  mov rcx,sub_10603560
 +  mov rdx,[rcx]
    mov rcx,rax

Improves codegen for dispatch stub:

    and rax,byte +0x1f
 -  mov eax,[rcx+rax*4]
 -  mov eax,eax
 -  mov rcx,JIT_CACHE_BASE
 -  lea rax,[rcx+rax]
 +  mov rax,[rcx+rax*8]
    mov rcx,rbx

* Remove `JitCacheSymbol` & `JitCache.Offset`

* Turn `Translator.Translate` into an instance method

We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.

* Add symbol only when PTC is enabled

Address LDj3SNuD's feedback

* Change `NativeContext.Running` to a 32-bit integer

* Fix PageTable symbol for host mapped
This commit is contained in:
FICTURE7 2021-05-30 01:06:28 +04:00 committed by GitHub
parent f3b0b4831c
commit 9d7627af64
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
36 changed files with 1020 additions and 1272 deletions

View file

@ -24,12 +24,27 @@ namespace ARMeilleure.Translation
{
public class Translator
{
private const int CountTableCapacity = 4 * 1024 * 1024;
private static readonly AddressTable<ulong>.Level[] Levels64Bit =
new AddressTable<ulong>.Level[]
{
new(31, 17),
new(23, 8),
new(15, 8),
new( 7, 8),
new( 2, 5)
};
private static readonly AddressTable<ulong>.Level[] Levels32Bit =
new AddressTable<ulong>.Level[]
{
new(31, 17),
new(23, 8),
new(15, 8),
new( 7, 8),
new( 1, 6)
};
private readonly IJitMemoryAllocator _allocator;
private readonly IMemoryManager _memory;
private readonly ConcurrentDictionary<ulong, TranslatedFunction> _funcs;
private readonly ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>> _oldFuncs;
private readonly ConcurrentDictionary<ulong, object> _backgroundSet;
@ -37,21 +52,22 @@ namespace ARMeilleure.Translation
private readonly AutoResetEvent _backgroundTranslatorEvent;
private readonly ReaderWriterLock _backgroundTranslatorLock;
private JumpTable _jumpTable;
internal JumpTable JumpTable => _jumpTable;
internal ConcurrentDictionary<ulong, TranslatedFunction> Functions { get; }
internal AddressTable<ulong> FunctionTable { get; }
internal EntryTable<uint> CountTable { get; }
internal TranslatorStubs Stubs { get; }
internal IMemoryManager Memory { get; }
private volatile int _threadCount;
// FIXME: Remove this once the init logic of the emulator will be redone.
public static readonly ManualResetEvent IsReadyForTranslation = new(false);
public Translator(IJitMemoryAllocator allocator, IMemoryManager memory)
public Translator(IJitMemoryAllocator allocator, IMemoryManager memory, bool for64Bits)
{
_allocator = allocator;
_memory = memory;
Memory = memory;
_funcs = new ConcurrentDictionary<ulong, TranslatedFunction>();
_oldFuncs = new ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>>();
_backgroundSet = new ConcurrentDictionary<ulong, object>();
@ -59,11 +75,14 @@ namespace ARMeilleure.Translation
_backgroundTranslatorEvent = new AutoResetEvent(false);
_backgroundTranslatorLock = new ReaderWriterLock();
CountTable = new EntryTable<uint>();
JitCache.Initialize(allocator);
DirectCallStubs.InitializeStubs();
CountTable = new EntryTable<uint>();
Functions = new ConcurrentDictionary<ulong, TranslatedFunction>();
FunctionTable = new AddressTable<ulong>(for64Bits ? Levels64Bit : Levels32Bit);
Stubs = new TranslatorStubs(this);
FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub;
if (memory.Type.IsHostMapped())
{
@ -80,27 +99,21 @@ namespace ARMeilleure.Translation
if (_backgroundStack.TryPop(out RejitRequest request) &&
_backgroundSet.TryRemove(request.Address, out _))
{
TranslatedFunction func = Translate(
_memory,
_jumpTable,
CountTable,
request.Address,
request.Mode,
highCq: true);
TranslatedFunction func = Translate(request.Address, request.Mode, highCq: true);
_funcs.AddOrUpdate(request.Address, func, (key, oldFunc) =>
Functions.AddOrUpdate(request.Address, func, (key, oldFunc) =>
{
EnqueueForDeletion(key, oldFunc);
return func;
});
_jumpTable.RegisterFunction(request.Address, func);
if (PtcProfiler.Enabled)
{
PtcProfiler.UpdateEntry(request.Address, request.Mode, highCq: true);
}
RegisterFunction(request.Address, func);
_backgroundTranslatorLock.ReleaseReaderLock();
}
else
@ -120,14 +133,11 @@ namespace ARMeilleure.Translation
{
IsReadyForTranslation.WaitOne();
Debug.Assert(_jumpTable == null);
_jumpTable = new JumpTable(_allocator);
if (Ptc.State == PtcState.Enabled)
{
Debug.Assert(_funcs.Count == 0);
Ptc.LoadTranslations(_funcs, _memory, _jumpTable, CountTable);
Ptc.MakeAndSaveTranslations(_funcs, _memory, _jumpTable, CountTable);
Debug.Assert(Functions.Count == 0);
Ptc.LoadTranslations(this);
Ptc.MakeAndSaveTranslations(this);
}
PtcProfiler.Start();
@ -160,13 +170,20 @@ namespace ARMeilleure.Translation
Statistics.InitializeTimer();
NativeInterface.RegisterThread(context, _memory, this);
NativeInterface.RegisterThread(context, Memory, this);
do
if (Optimizations.UseUnmanagedDispatchLoop)
{
address = ExecuteSingle(context, address);
Stubs.DispatchLoop(context.NativeContextPtr, address);
}
else
{
do
{
address = ExecuteSingle(context, address);
}
while (context.Running && address != 0);
}
while (context.Running && address != 0);
NativeInterface.UnregisterThread();
@ -178,9 +195,8 @@ namespace ARMeilleure.Translation
DisposePools();
_jumpTable.Dispose();
_jumpTable = null;
Stubs.Dispose();
FunctionTable.Dispose();
CountTable.Dispose();
GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
@ -202,40 +218,51 @@ namespace ARMeilleure.Translation
internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
{
if (!_funcs.TryGetValue(address, out TranslatedFunction func))
if (!Functions.TryGetValue(address, out TranslatedFunction func))
{
func = Translate(_memory, _jumpTable, CountTable, address, mode, highCq: false);
func = Translate(address, mode, highCq: false);
TranslatedFunction getFunc = _funcs.GetOrAdd(address, func);
TranslatedFunction oldFunc = Functions.GetOrAdd(address, func);
if (getFunc != func)
if (oldFunc != func)
{
JitCache.Unmap(func.FuncPtr);
func = getFunc;
func = oldFunc;
}
if (PtcProfiler.Enabled)
{
PtcProfiler.AddEntry(address, mode, highCq: false);
}
RegisterFunction(address, func);
}
return func;
}
internal static TranslatedFunction Translate(
IMemoryManager memory,
JumpTable jumpTable,
EntryTable<uint> countTable,
ulong address,
ExecutionMode mode,
bool highCq)
internal void RegisterFunction(ulong guestAddress, TranslatedFunction func)
{
var context = new ArmEmitterContext(memory, jumpTable, countTable, address, highCq, Aarch32Mode.User);
if (FunctionTable.IsValid(guestAddress) && (Optimizations.AllowLcqInFunctionTable || func.HighCq))
{
Volatile.Write(ref FunctionTable.GetValue(guestAddress), (ulong)func.FuncPtr);
}
}
internal TranslatedFunction Translate(ulong address, ExecutionMode mode, bool highCq)
{
var context = new ArmEmitterContext(
Memory,
CountTable,
FunctionTable,
Stubs,
address,
highCq,
mode: Aarch32Mode.User);
Logger.StartPass(PassName.Decoding);
Block[] blocks = Decoder.Decode(memory, address, mode, highCq, singleBlock: false);
Block[] blocks = Decoder.Decode(Memory, address, mode, highCq, singleBlock: false);
Logger.EndPass(PassName.Decoding);
@ -268,7 +295,7 @@ namespace ARMeilleure.Translation
GuestFunction func;
if (Ptc.State == PtcState.Disabled)
if (!context.HasPtc)
{
func = Compiler.Compile<GuestFunction>(cfg, argTypes, OperandType.I64, options);
@ -282,7 +309,7 @@ namespace ARMeilleure.Translation
ResetPool(highCq ? 1 : 0);
Hash128 hash = Ptc.ComputeHash(memory, address, funcSize);
Hash128 hash = Ptc.ComputeHash(Memory, address, funcSize);
Ptc.WriteInfoCodeRelocUnwindInfo(address, funcSize, hash, highCq, ptcInfo);
}
@ -360,7 +387,11 @@ namespace ARMeilleure.Translation
if (block.Exit)
{
InstEmitFlowHelper.EmitTailContinue(context, Const(block.Address));
// Left option here as it may be useful if we need to return to managed rather than tail call in
// future. (eg. for debug)
bool useReturns = false;
InstEmitFlowHelper.EmitVirtualJump(context, Const(block.Address), isReturn: useReturns);
}
else
{
@ -416,7 +447,10 @@ namespace ARMeilleure.Translation
Operand lblEnd = Label();
Operand address = Const(ref counter.Value, Ptc.CountTableIndex);
Operand address = !context.HasPtc ?
Const(ref counter.Value) :
Const(ref counter.Value, Ptc.CountTableSymbol);
Operand curCount = context.Load(OperandType.I32, address);
Operand count = context.Add(curCount, Const(1));
context.Store(address, count);
@ -477,14 +511,14 @@ namespace ARMeilleure.Translation
// Ensure no attempt will be made to compile new functions due to rejit.
ClearRejitQueue(allowRequeue: false);
foreach (var func in _funcs.Values)
foreach (var func in Functions.Values)
{
JitCache.Unmap(func.FuncPtr);
func.CallCounter?.Dispose();
}
_funcs.Clear();
Functions.Clear();
while (_oldFuncs.TryDequeue(out var kv))
{
@ -502,7 +536,7 @@ namespace ARMeilleure.Translation
{
while (_backgroundStack.TryPop(out var request))
{
if (_funcs.TryGetValue(request.Address, out var func) && func.CallCounter != null)
if (Functions.TryGetValue(request.Address, out var func) && func.CallCounter != null)
{
Volatile.Write(ref func.CallCounter.Value, 0);
}