ARM exclusive monitor and multicore fixes (#589)

* Implement ARM exclusive load/store with compare exchange insts, and enable multicore by default

* Fix comment typo

* Support Linux and OSX on MemoryAlloc and CompareExchange128, some cleanup

* Use intel syntax on assembly code

* Adjust identation

* Add CPUID check and fix exclusive reservation granule size

* Update schema multicore scheduling default value

* Make the cpu id check code lower case aswell
This commit is contained in:
gdkchan 2019-02-18 20:52:06 -03:00 committed by jduncanator
parent dd00a4b62d
commit 932224f051
19 changed files with 954 additions and 261 deletions

View file

@ -0,0 +1,151 @@
using System;
using System.Runtime.InteropServices;
namespace ChocolArm64.Memory
{
static class CompareExchange128
{
private struct Int128
{
public ulong Low { get; }
public ulong High { get; }
public Int128(ulong low, ulong high)
{
Low = low;
High = high;
}
}
private delegate Int128 InterlockedCompareExchange(IntPtr address, Int128 expected, Int128 desired);
private delegate int GetCpuId();
private static InterlockedCompareExchange _interlockedCompareExchange;
static CompareExchange128()
{
if (RuntimeInformation.OSArchitecture != Architecture.X64 || !IsCmpxchg16bSupported())
{
throw new PlatformNotSupportedException();
}
byte[] interlockedCompareExchange128Code;
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
interlockedCompareExchange128Code = new byte[]
{
0x53, // push rbx
0x49, 0x8b, 0x00, // mov rax, [r8]
0x49, 0x8b, 0x19, // mov rbx, [r9]
0x49, 0x89, 0xca, // mov r10, rcx
0x49, 0x89, 0xd3, // mov r11, rdx
0x49, 0x8b, 0x49, 0x08, // mov rcx, [r9+8]
0x49, 0x8b, 0x50, 0x08, // mov rdx, [r8+8]
0xf0, 0x49, 0x0f, 0xc7, 0x0b, // lock cmpxchg16b [r11]
0x49, 0x89, 0x02, // mov [r10], rax
0x4c, 0x89, 0xd0, // mov rax, r10
0x49, 0x89, 0x52, 0x08, // mov [r10+8], rdx
0x5b, // pop rbx
0xc3 // ret
};
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
interlockedCompareExchange128Code = new byte[]
{
0x53, // push rbx
0x49, 0x89, 0xd1, // mov r9, rdx
0x48, 0x89, 0xcb, // mov rbx, rcx
0x48, 0x89, 0xf0, // mov rax, rsi
0x4c, 0x89, 0xca, // mov rdx, r9
0x4c, 0x89, 0xc1, // mov rcx, r8
0xf0, 0x48, 0x0f, 0xc7, 0x0f, // lock cmpxchg16b [rdi]
0x5b, // pop rbx
0xc3 // ret
};
}
else
{
throw new PlatformNotSupportedException();
}
IntPtr funcPtr = MapCodeAsExecutable(interlockedCompareExchange128Code);
_interlockedCompareExchange = Marshal.GetDelegateForFunctionPointer<InterlockedCompareExchange>(funcPtr);
}
private static bool IsCmpxchg16bSupported()
{
byte[] getCpuIdCode = new byte[]
{
0x53, // push rbx
0xb8, 0x01, 0x00, 0x00, 0x00, // mov eax, 0x1
0x0f, 0xa2, // cpuid
0x89, 0xc8, // mov eax, ecx
0x5b, // pop rbx
0xc3 // ret
};
IntPtr funcPtr = MapCodeAsExecutable(getCpuIdCode);
GetCpuId getCpuId = Marshal.GetDelegateForFunctionPointer<GetCpuId>(funcPtr);
int cpuId = getCpuId();
MemoryAlloc.Free(funcPtr);
return (cpuId & (1 << 13)) != 0;
}
private static IntPtr MapCodeAsExecutable(byte[] code)
{
ulong codeLength = (ulong)code.Length;
IntPtr funcPtr = MemoryAlloc.Allocate(codeLength);
unsafe
{
fixed (byte* codePtr = code)
{
byte* dest = (byte*)funcPtr;
long size = (long)codeLength;
Buffer.MemoryCopy(codePtr, dest, size, size);
}
}
MemoryAlloc.Reprotect(funcPtr, codeLength, MemoryProtection.Execute);
return funcPtr;
}
public static bool InterlockedCompareExchange128(
IntPtr address,
ulong expectedLow,
ulong expectedHigh,
ulong desiredLow,
ulong desiredHigh)
{
Int128 expected = new Int128(expectedLow, expectedHigh);
Int128 desired = new Int128(desiredLow, desiredHigh);
Int128 old = _interlockedCompareExchange(address, expected, desired);
return old.Low == expected.Low && old.High == expected.High;
}
public static void InterlockedRead128(IntPtr address, out ulong low, out ulong high)
{
Int128 zero = new Int128(0, 0);
Int128 old = _interlockedCompareExchange(address, zero, zero);
low = old.Low;
high = old.High;
}
}
}

View file

@ -0,0 +1,114 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace ChocolArm64.Memory
{
public static class MemoryAlloc
{
public static bool HasWriteWatchSupport => RuntimeInformation.IsOSPlatform(OSPlatform.Windows);
public static IntPtr Allocate(ulong size)
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
IntPtr sizeNint = new IntPtr((long)size);
return MemoryAllocWindows.Allocate(sizeNint);
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return MemoryAllocUnix.Allocate(size);
}
else
{
throw new PlatformNotSupportedException();
}
}
public static IntPtr AllocateWriteTracked(ulong size)
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
IntPtr sizeNint = new IntPtr((long)size);
return MemoryAllocWindows.AllocateWriteTracked(sizeNint);
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return MemoryAllocUnix.Allocate(size);
}
else
{
throw new PlatformNotSupportedException();
}
}
public static void Reprotect(IntPtr address, ulong size, MemoryProtection permission)
{
bool result;
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
IntPtr sizeNint = new IntPtr((long)size);
result = MemoryAllocWindows.Reprotect(address, sizeNint, permission);
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
result = MemoryAllocUnix.Reprotect(address, size, permission);
}
else
{
throw new PlatformNotSupportedException();
}
if (!result)
{
throw new MemoryProtectionException(permission);
}
}
public static bool Free(IntPtr address)
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
return MemoryAllocWindows.Free(address);
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return MemoryAllocUnix.Free(address);
}
else
{
throw new PlatformNotSupportedException();
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool GetModifiedPages(
IntPtr address,
IntPtr size,
IntPtr[] addresses,
out ulong count)
{
//This is only supported on windows, but returning
//false (failed) is also valid for platforms without
//write tracking support on the OS.
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
return MemoryAllocWindows.GetModifiedPages(address, size, addresses, out count);
}
else
{
count = 0;
return false;
}
}
}
}

View file

@ -0,0 +1,70 @@
using Mono.Unix.Native;
using System;
namespace ChocolArm64.Memory
{
static class MemoryAllocUnix
{
public static IntPtr Allocate(ulong size)
{
ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
const MmapProts prot = MmapProts.PROT_READ | MmapProts.PROT_WRITE;
const MmapFlags flags = MmapFlags.MAP_PRIVATE | MmapFlags.MAP_ANONYMOUS;
IntPtr ptr = Syscall.mmap(IntPtr.Zero, size + pageSize, prot, flags, -1, 0);
if (ptr == IntPtr.Zero)
{
throw new OutOfMemoryException();
}
unsafe
{
ptr = new IntPtr(ptr.ToInt64() + (long)pageSize);
*((ulong*)ptr - 1) = size;
}
return ptr;
}
public static bool Reprotect(IntPtr address, ulong size, Memory.MemoryProtection protection)
{
MmapProts prot = GetProtection(protection);
return Syscall.mprotect(address, size, prot) == 0;
}
private static MmapProts GetProtection(Memory.MemoryProtection protection)
{
switch (protection)
{
case Memory.MemoryProtection.None: return MmapProts.PROT_NONE;
case Memory.MemoryProtection.Read: return MmapProts.PROT_READ;
case Memory.MemoryProtection.ReadAndWrite: return MmapProts.PROT_READ | MmapProts.PROT_WRITE;
case Memory.MemoryProtection.ReadAndExecute: return MmapProts.PROT_READ | MmapProts.PROT_EXEC;
case Memory.MemoryProtection.Execute: return MmapProts.PROT_EXEC;
default: throw new ArgumentException($"Invalid permission \"{protection}\".");
}
}
public static bool Free(IntPtr address)
{
ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
ulong size;
unsafe
{
size = *((ulong*)address - 1);
address = new IntPtr(address.ToInt64() - (long)pageSize);
}
return Syscall.munmap(address, size + pageSize) == 0;
}
}
}

View file

@ -0,0 +1,155 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace ChocolArm64.Memory
{
static class MemoryAllocWindows
{
[Flags]
private enum AllocationType : uint
{
Commit = 0x1000,
Reserve = 0x2000,
Decommit = 0x4000,
Release = 0x8000,
Reset = 0x80000,
Physical = 0x400000,
TopDown = 0x100000,
WriteWatch = 0x200000,
LargePages = 0x20000000
}
[Flags]
private enum MemoryProtection
{
NoAccess = 0x01,
ReadOnly = 0x02,
ReadWrite = 0x04,
WriteCopy = 0x08,
Execute = 0x10,
ExecuteRead = 0x20,
ExecuteReadWrite = 0x40,
ExecuteWriteCopy = 0x80,
GuardModifierflag = 0x100,
NoCacheModifierflag = 0x200,
WriteCombineModifierflag = 0x400
}
private enum WriteWatchFlags : uint
{
None = 0,
Reset = 1
}
[DllImport("kernel32.dll")]
private static extern IntPtr VirtualAlloc(
IntPtr lpAddress,
IntPtr dwSize,
AllocationType flAllocationType,
MemoryProtection flProtect);
[DllImport("kernel32.dll")]
private static extern bool VirtualProtect(
IntPtr lpAddress,
IntPtr dwSize,
MemoryProtection flNewProtect,
out MemoryProtection lpflOldProtect);
[DllImport("kernel32.dll")]
private static extern bool VirtualFree(
IntPtr lpAddress,
uint dwSize,
AllocationType dwFreeType);
[DllImport("kernel32.dll")]
private static extern int GetWriteWatch(
WriteWatchFlags dwFlags,
IntPtr lpBaseAddress,
IntPtr dwRegionSize,
IntPtr[] lpAddresses,
ref ulong lpdwCount,
out uint lpdwGranularity);
public static IntPtr Allocate(IntPtr size)
{
const AllocationType flags =
AllocationType.Reserve |
AllocationType.Commit;
IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
if (ptr == IntPtr.Zero)
{
throw new OutOfMemoryException();
}
return ptr;
}
public static IntPtr AllocateWriteTracked(IntPtr size)
{
const AllocationType flags =
AllocationType.Reserve |
AllocationType.Commit |
AllocationType.WriteWatch;
IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
if (ptr == IntPtr.Zero)
{
throw new OutOfMemoryException();
}
return ptr;
}
public static bool Reprotect(IntPtr address, IntPtr size, Memory.MemoryProtection protection)
{
MemoryProtection prot = GetProtection(protection);
return VirtualProtect(address, size, prot, out _);
}
private static MemoryProtection GetProtection(Memory.MemoryProtection protection)
{
switch (protection)
{
case Memory.MemoryProtection.None: return MemoryProtection.NoAccess;
case Memory.MemoryProtection.Read: return MemoryProtection.ReadOnly;
case Memory.MemoryProtection.ReadAndWrite: return MemoryProtection.ReadWrite;
case Memory.MemoryProtection.ReadAndExecute: return MemoryProtection.ExecuteRead;
case Memory.MemoryProtection.Execute: return MemoryProtection.Execute;
default: throw new ArgumentException($"Invalid permission \"{protection}\".");
}
}
public static bool Free(IntPtr address)
{
return VirtualFree(address, 0, AllocationType.Release);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool GetModifiedPages(
IntPtr address,
IntPtr size,
IntPtr[] addresses,
out ulong count)
{
ulong pagesCount = (ulong)addresses.Length;
int result = GetWriteWatch(
WriteWatchFlags.Reset,
address,
size,
addresses,
ref pagesCount,
out uint granularity);
count = pagesCount;
return result == 0;
}
}
}

View file

@ -1,16 +1,16 @@
using ChocolArm64.Events;
using ChocolArm64.Exceptions;
using ChocolArm64.Instructions;
using ChocolArm64.State;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Threading;
using static ChocolArm64.Memory.CompareExchange128;
namespace ChocolArm64.Memory
{
public unsafe class MemoryManager : IMemory, IDisposable
@ -30,21 +30,6 @@ namespace ChocolArm64.Memory
private const int PtLvl0Bit = PageBits + PtLvl1Bits;
private const int PtLvl1Bit = PageBits;
private const long ErgMask = (4 << CpuThreadState.ErgSizeLog2) - 1;
private class ArmMonitor
{
public long Position;
public bool ExState;
public bool HasExclusiveAccess(long position)
{
return Position == position && ExState;
}
}
private Dictionary<int, ArmMonitor> _monitors;
private ConcurrentDictionary<long, IntPtr> _observedPages;
public IntPtr Ram { get; private set; }
@ -59,8 +44,6 @@ namespace ChocolArm64.Memory
public MemoryManager(IntPtr ram)
{
_monitors = new Dictionary<int, ArmMonitor>();
_observedPages = new ConcurrentDictionary<long, IntPtr>();
Ram = ram;
@ -75,104 +58,139 @@ namespace ChocolArm64.Memory
}
}
public void RemoveMonitor(int core)
internal bool AtomicCompareExchange2xInt32(
long position,
int expectedLow,
int expectedHigh,
int desiredLow,
int desiredHigh)
{
lock (_monitors)
{
ClearExclusive(core);
long expected = (uint)expectedLow;
long desired = (uint)desiredLow;
_monitors.Remove(core);
}
expected |= (long)expectedHigh << 32;
desired |= (long)desiredHigh << 32;
return AtomicCompareExchangeInt64(position, expected, desired);
}
public void SetExclusive(int core, long position)
internal bool AtomicCompareExchangeInt128(
long position,
ulong expectedLow,
ulong expectedHigh,
ulong desiredLow,
ulong desiredHigh)
{
position &= ~ErgMask;
lock (_monitors)
if ((position & 0xf) != 0)
{
foreach (ArmMonitor mon in _monitors.Values)
{
if (mon.Position == position && mon.ExState)
{
mon.ExState = false;
}
}
if (!_monitors.TryGetValue(core, out ArmMonitor threadMon))
{
threadMon = new ArmMonitor();
_monitors.Add(core, threadMon);
}
threadMon.Position = position;
threadMon.ExState = true;
AbortWithAlignmentFault(position);
}
IntPtr ptr = new IntPtr(TranslateWrite(position));
return InterlockedCompareExchange128(ptr, expectedLow, expectedHigh, desiredLow, desiredHigh);
}
public bool TestExclusive(int core, long position)
internal Vector128<float> AtomicReadInt128(long position)
{
//Note: Any call to this method also should be followed by a
//call to ClearExclusiveForStore if this method returns true.
position &= ~ErgMask;
Monitor.Enter(_monitors);
if (!_monitors.TryGetValue(core, out ArmMonitor threadMon))
if ((position & 0xf) != 0)
{
Monitor.Exit(_monitors);
return false;
AbortWithAlignmentFault(position);
}
bool exState = threadMon.HasExclusiveAccess(position);
IntPtr ptr = new IntPtr(Translate(position));
if (!exState)
{
Monitor.Exit(_monitors);
}
InterlockedRead128(ptr, out ulong low, out ulong high);
return exState;
Vector128<float> vector = default(Vector128<float>);
vector = VectorHelper.VectorInsertInt(low, vector, 0, 3);
vector = VectorHelper.VectorInsertInt(high, vector, 1, 3);
return vector;
}
public void ClearExclusiveForStore(int core)
public bool AtomicCompareExchangeByte(long position, byte expected, byte desired)
{
if (_monitors.TryGetValue(core, out ArmMonitor threadMon))
{
threadMon.ExState = false;
}
int* ptr = (int*)Translate(position);
Monitor.Exit(_monitors);
int currentValue = *ptr;
int expected32 = (currentValue & ~byte.MaxValue) | expected;
int desired32 = (currentValue & ~byte.MaxValue) | desired;
return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32;
}
public void ClearExclusive(int core)
public bool AtomicCompareExchangeInt16(long position, short expected, short desired)
{
lock (_monitors)
if ((position & 1) != 0)
{
if (_monitors.TryGetValue(core, out ArmMonitor threadMon))
{
threadMon.ExState = false;
}
AbortWithAlignmentFault(position);
}
int* ptr = (int*)Translate(position);
int currentValue = *ptr;
int expected32 = (currentValue & ~ushort.MaxValue) | (ushort)expected;
int desired32 = (currentValue & ~ushort.MaxValue) | (ushort)desired;
return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32;
}
public void WriteInt32ToSharedAddr(long position, int value)
public bool AtomicCompareExchangeInt32(long position, int expected, int desired)
{
long maskedPosition = position & ~ErgMask;
lock (_monitors)
if ((position & 3) != 0)
{
foreach (ArmMonitor mon in _monitors.Values)
{
if (mon.Position == maskedPosition && mon.ExState)
{
mon.ExState = false;
}
}
WriteInt32(position, value);
AbortWithAlignmentFault(position);
}
int* ptr = (int*)TranslateWrite(position);
return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected;
}
public bool AtomicCompareExchangeInt64(long position, long expected, long desired)
{
if ((position & 7) != 0)
{
AbortWithAlignmentFault(position);
}
long* ptr = (long*)TranslateWrite(position);
return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected;
}
public int AtomicIncrementInt32(long position)
{
if ((position & 3) != 0)
{
AbortWithAlignmentFault(position);
}
int* ptr = (int*)TranslateWrite(position);
return Interlocked.Increment(ref *ptr);
}
public int AtomicDecrementInt32(long position)
{
if ((position & 3) != 0)
{
AbortWithAlignmentFault(position);
}
int* ptr = (int*)TranslateWrite(position);
return Interlocked.Decrement(ref *ptr);
}
private void AbortWithAlignmentFault(long position)
{
//TODO: Abort mode and exception support on the CPU.
throw new InvalidOperationException($"Tried to compare exchange a misaligned address 0x{position:X16}.");
}
public sbyte ReadSByte(long position)

View file

@ -0,0 +1,16 @@
using System;
namespace ChocolArm64.Memory
{
[Flags]
public enum MemoryProtection
{
None = 0,
Read = 1 << 0,
Write = 1 << 1,
Execute = 1 << 2,
ReadAndWrite = Read | Write,
ReadAndExecute = Read | Execute
}
}

View file

@ -0,0 +1,10 @@
using System;
namespace ChocolArm64.Memory
{
class MemoryProtectionException : Exception
{
public MemoryProtectionException(MemoryProtection protection) :
base($"Failed to set memory protection to \"{protection}\".") { }
}
}