GPU: Migrate buffers on GPU project, pre-emptively flush device local mappings (#6794)

* GPU: Migrate buffers on GPU project, pre-emptively flush device local mappings

Essentially retreading #4540, but it's on the GPU project now instead of the backend. This allows us to have a lot more control + knowledge of where the buffer backing has been changed and allows us to pre-emptively flush pages to host memory for quicker readback. It will allow us to do other stuff in the future, but we'll get there when we get there.

Performance greatly improved in Hyrule Warriors: Age of Calamity. Performance notably improved in TOTK (average). Performance for BOTW restored to how it was before #4911, perhaps a bit better.

- Rewrites a bunch of buffer migration stuff. Might want to tighten up how dispose stuff works.
- Fixed an issue where the copy for texture pre-flush would happen _after_ the syncpoint.

TODO: remove a page from pre-flush if it isn't flushed after a certain number of copies.

* Add copy deactivation

* Fix dependent virtual buffers

* Remove logging

* Fix format issues (maybe)

* Vulkan: Remove backing swap

* Add explicit memory access types for most buffers

* Fix typo

* Add device local force expiry, change buffer inheritance behaviour

* General cleanup, OGL fix

* BufferPreFlush comments

* BufferBackingState comments

* Add an extra precaution to BufferMigration

This is very unlikely, but it's important to cover loose ends like this.

* Address some feedback

* Docs
This commit is contained in:
riperiperi 2024-05-19 20:53:37 +01:00 committed by GitHub
parent 2f427deb67
commit eb1ce41b00
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 1342 additions and 523 deletions

View file

@ -1,4 +1,3 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.GAL;
using Silk.NET.Vulkan;
using System;
@ -31,40 +30,29 @@ namespace Ryujinx.Graphics.Vulkan
private readonly VulkanRenderer _gd;
private readonly Device _device;
private MemoryAllocation _allocation;
private Auto<DisposableBuffer> _buffer;
private Auto<MemoryAllocation> _allocationAuto;
private readonly MemoryAllocation _allocation;
private readonly Auto<DisposableBuffer> _buffer;
private readonly Auto<MemoryAllocation> _allocationAuto;
private readonly bool _allocationImported;
private ulong _bufferHandle;
private readonly ulong _bufferHandle;
private CacheByRange<BufferHolder> _cachedConvertedBuffers;
public int Size { get; }
private IntPtr _map;
private readonly IntPtr _map;
private MultiFenceHolder _waitable;
private readonly MultiFenceHolder _waitable;
private bool _lastAccessIsWrite;
private BufferAllocationType _baseType;
private BufferAllocationType _currentType;
private bool _swapQueued;
public BufferAllocationType DesiredType { get; private set; }
private int _setCount;
private int _writeCount;
private int _flushCount;
private int _flushTemp;
private int _lastFlushWrite = -1;
private readonly BufferAllocationType _baseType;
private readonly BufferAllocationType _activeType;
private readonly ReaderWriterLockSlim _flushLock;
private FenceHolder _flushFence;
private int _flushWaiting;
private List<Action> _swapActions;
private byte[] _pendingData;
private BufferMirrorRangeList _pendingDataRanges;
private Dictionary<ulong, StagingBufferReserved> _mirrors;
@ -83,8 +71,7 @@ namespace Ryujinx.Graphics.Vulkan
_map = allocation.HostPointer;
_baseType = type;
_currentType = currentType;
DesiredType = currentType;
_activeType = currentType;
_flushLock = new ReaderWriterLockSlim();
_useMirrors = gd.IsTBDR;
@ -104,8 +91,7 @@ namespace Ryujinx.Graphics.Vulkan
_map = _allocation.HostPointer + offset;
_baseType = type;
_currentType = currentType;
DesiredType = currentType;
_activeType = currentType;
_flushLock = new ReaderWriterLockSlim();
}
@ -120,164 +106,11 @@ namespace Ryujinx.Graphics.Vulkan
Size = size;
_baseType = BufferAllocationType.Sparse;
_currentType = BufferAllocationType.Sparse;
DesiredType = BufferAllocationType.Sparse;
_activeType = BufferAllocationType.Sparse;
_flushLock = new ReaderWriterLockSlim();
}
public bool TryBackingSwap(ref CommandBufferScoped? cbs)
{
if (_swapQueued && DesiredType != _currentType)
{
// Only swap if the buffer is not used in any queued command buffer.
bool isRented = _buffer.HasRentedCommandBufferDependency(_gd.CommandBufferPool);
if (!isRented && _gd.CommandBufferPool.OwnedByCurrentThread && !_flushLock.IsReadLockHeld && (_pendingData == null || cbs != null))
{
var currentAllocation = _allocationAuto;
var currentBuffer = _buffer;
IntPtr currentMap = _map;
(VkBuffer buffer, MemoryAllocation allocation, BufferAllocationType resultType) = _gd.BufferManager.CreateBacking(_gd, Size, DesiredType, false, false, _currentType);
if (buffer.Handle != 0)
{
if (cbs != null)
{
ClearMirrors(cbs.Value, 0, Size);
}
_flushLock.EnterWriteLock();
ClearFlushFence();
_waitable = new MultiFenceHolder(Size);
_allocation = allocation;
_allocationAuto = new Auto<MemoryAllocation>(allocation);
_buffer = new Auto<DisposableBuffer>(new DisposableBuffer(_gd.Api, _device, buffer), this, _waitable, _allocationAuto);
_bufferHandle = buffer.Handle;
_map = allocation.HostPointer;
if (_map != IntPtr.Zero && currentMap != IntPtr.Zero)
{
// Copy data directly. Readbacks don't have to wait if this is done.
unsafe
{
new Span<byte>((void*)currentMap, Size).CopyTo(new Span<byte>((void*)_map, Size));
}
}
else
{
cbs ??= _gd.CommandBufferPool.Rent();
CommandBufferScoped cbsV = cbs.Value;
Copy(_gd, cbsV, currentBuffer, _buffer, 0, 0, Size);
// Need to wait for the data to reach the new buffer before data can be flushed.
_flushFence = _gd.CommandBufferPool.GetFence(cbsV.CommandBufferIndex);
_flushFence.Get();
}
Logger.Debug?.PrintMsg(LogClass.Gpu, $"Converted {Size} buffer {_currentType} to {resultType}");
_currentType = resultType;
if (_swapActions != null)
{
foreach (var action in _swapActions)
{
action();
}
_swapActions.Clear();
}
currentBuffer.Dispose();
currentAllocation.Dispose();
_gd.PipelineInternal.SwapBuffer(currentBuffer, _buffer);
_flushLock.ExitWriteLock();
}
_swapQueued = false;
return true;
}
return false;
}
_swapQueued = false;
return true;
}
private void ConsiderBackingSwap()
{
if (_baseType == BufferAllocationType.Auto)
{
// When flushed, wait for a bit more info to make a decision.
bool wasFlushed = _flushTemp > 0;
int multiplier = wasFlushed ? 2 : 0;
if (_writeCount >= (WriteCountThreshold << multiplier) || _setCount >= (SetCountThreshold << multiplier) || _flushCount >= (FlushCountThreshold << multiplier))
{
if (_flushCount > 0 || _flushTemp-- > 0)
{
// Buffers that flush should ideally be mapped in host address space for easy copies.
// If the buffer is large it will do better on GPU memory, as there will be more writes than data flushes (typically individual pages).
// If it is small, then it's likely most of the buffer will be flushed so we want it on host memory, as access is cached.
bool hostMappingSensitive = _gd.Vendor == Vendor.Nvidia;
bool deviceLocalMapped = Size > DeviceLocalSizeThreshold || (wasFlushed && _writeCount > _flushCount * 10 && hostMappingSensitive) || _currentType == BufferAllocationType.DeviceLocalMapped;
DesiredType = deviceLocalMapped ? BufferAllocationType.DeviceLocalMapped : BufferAllocationType.HostMapped;
// It's harder for a buffer that is flushed to revert to another type of mapping.
if (_flushCount > 0)
{
_flushTemp = 1000;
}
}
else if (_writeCount >= (WriteCountThreshold << multiplier))
{
// Buffers that are written often should ideally be in the device local heap. (Storage buffers)
DesiredType = BufferAllocationType.DeviceLocal;
}
else if (_setCount > (SetCountThreshold << multiplier))
{
// Buffers that have their data set often should ideally be host mapped. (Constant buffers)
DesiredType = BufferAllocationType.HostMapped;
}
_lastFlushWrite = -1;
_flushCount = 0;
_writeCount = 0;
_setCount = 0;
}
if (!_swapQueued && DesiredType != _currentType)
{
_swapQueued = true;
_gd.PipelineInternal.AddBackingSwap(this);
}
}
}
public void Pin()
{
if (_baseType == BufferAllocationType.Auto)
{
_baseType = _currentType;
}
}
public unsafe Auto<DisposableBufferView> CreateView(VkFormat format, int offset, int size, Action invalidateView)
{
var bufferViewCreateInfo = new BufferViewCreateInfo
@ -291,19 +124,9 @@ namespace Ryujinx.Graphics.Vulkan
_gd.Api.CreateBufferView(_device, bufferViewCreateInfo, null, out var bufferView).ThrowOnError();
(_swapActions ??= new List<Action>()).Add(invalidateView);
return new Auto<DisposableBufferView>(new DisposableBufferView(_gd.Api, _device, bufferView), this, _waitable, _buffer);
}
public void InheritMetrics(BufferHolder other)
{
_setCount = other._setCount;
_writeCount = other._writeCount;
_flushCount = other._flushCount;
_flushTemp = other._flushTemp;
}
public unsafe void InsertBarrier(CommandBuffer commandBuffer, bool isWrite)
{
// If the last access is write, we always need a barrier to be sure we will read or modify
@ -423,18 +246,8 @@ namespace Ryujinx.Graphics.Vulkan
{
if (isWrite)
{
_writeCount++;
SignalWrite(0, Size);
}
else if (isSSBO)
{
// Always consider SSBO access for swapping to device local memory.
_writeCount++;
ConsiderBackingSwap();
}
return _buffer;
}
@ -443,8 +256,6 @@ namespace Ryujinx.Graphics.Vulkan
{
if (isWrite)
{
_writeCount++;
SignalWrite(offset, size);
}
@ -543,8 +354,6 @@ namespace Ryujinx.Graphics.Vulkan
public void SignalWrite(int offset, int size)
{
ConsiderBackingSwap();
if (offset == 0 && size == Size)
{
_cachedConvertedBuffers.Clear();
@ -624,13 +433,6 @@ namespace Ryujinx.Graphics.Vulkan
WaitForFlushFence();
if (_lastFlushWrite != _writeCount)
{
// If it's on the same page as the last flush, ignore it.
_lastFlushWrite = _writeCount;
_flushCount++;
}
Span<byte> result;
if (_map != IntPtr.Zero)
@ -711,8 +513,7 @@ namespace Ryujinx.Graphics.Vulkan
return;
}
_setCount++;
bool allowMirror = _useMirrors && allowCbsWait && cbs != null && _currentType <= BufferAllocationType.HostMapped;
bool allowMirror = _useMirrors && allowCbsWait && cbs != null && _activeType <= BufferAllocationType.HostMapped;
if (_map != IntPtr.Zero)
{
@ -863,8 +664,6 @@ namespace Ryujinx.Graphics.Vulkan
var dstBuffer = GetBuffer(cbs.CommandBuffer, dstOffset, data.Length, true).Get(cbs, dstOffset, data.Length, true).Value;
_writeCount--;
InsertBufferBarrier(
_gd,
cbs.CommandBuffer,
@ -1100,8 +899,6 @@ namespace Ryujinx.Graphics.Vulkan
public void Dispose()
{
_swapQueued = false;
_gd.PipelineInternal?.FlushCommandsIfWeightExceeding(_buffer, (ulong)Size);
_buffer.Dispose();

View file

@ -165,10 +165,6 @@ namespace Ryujinx.Graphics.Vulkan
if (TryGetBuffer(range.Handle, out var existingHolder))
{
// Since this buffer now also owns the memory from the referenced buffer,
// we pin it to ensure the memory location will not change.
existingHolder.Pin();
(var memory, var offset) = existingHolder.GetDeviceMemoryAndOffset();
memoryBinds[index] = new SparseMemoryBind()
@ -235,10 +231,9 @@ namespace Ryujinx.Graphics.Vulkan
int size,
bool sparseCompatible = false,
BufferAllocationType baseType = BufferAllocationType.HostMapped,
BufferHandle storageHint = default,
bool forceMirrors = false)
{
return CreateWithHandle(gd, size, out _, sparseCompatible, baseType, storageHint, forceMirrors);
return CreateWithHandle(gd, size, out _, sparseCompatible, baseType, forceMirrors);
}
public BufferHandle CreateWithHandle(
@ -247,10 +242,9 @@ namespace Ryujinx.Graphics.Vulkan
out BufferHolder holder,
bool sparseCompatible = false,
BufferAllocationType baseType = BufferAllocationType.HostMapped,
BufferHandle storageHint = default,
bool forceMirrors = false)
{
holder = Create(gd, size, forConditionalRendering: false, sparseCompatible, baseType, storageHint);
holder = Create(gd, size, forConditionalRendering: false, sparseCompatible, baseType);
if (holder == null)
{
return BufferHandle.Null;
@ -387,31 +381,13 @@ namespace Ryujinx.Graphics.Vulkan
int size,
bool forConditionalRendering = false,
bool sparseCompatible = false,
BufferAllocationType baseType = BufferAllocationType.HostMapped,
BufferHandle storageHint = default)
BufferAllocationType baseType = BufferAllocationType.HostMapped)
{
BufferAllocationType type = baseType;
BufferHolder storageHintHolder = null;
if (baseType == BufferAllocationType.Auto)
{
if (gd.IsSharedMemory)
{
baseType = BufferAllocationType.HostMapped;
type = baseType;
}
else
{
type = size >= BufferHolder.DeviceLocalSizeThreshold ? BufferAllocationType.DeviceLocal : BufferAllocationType.HostMapped;
}
if (storageHint != BufferHandle.Null)
{
if (TryGetBuffer(storageHint, out storageHintHolder))
{
type = storageHintHolder.DesiredType;
}
}
type = BufferAllocationType.HostMapped;
}
(VkBuffer buffer, MemoryAllocation allocation, BufferAllocationType resultType) =
@ -421,11 +397,6 @@ namespace Ryujinx.Graphics.Vulkan
{
var holder = new BufferHolder(gd, _device, buffer, allocation, size, baseType, resultType);
if (storageHintHolder != null)
{
holder.InheritMetrics(storageHintHolder);
}
return holder;
}

View file

@ -424,10 +424,20 @@ namespace Ryujinx.Graphics.Vulkan
public static BufferAllocationType Convert(this BufferAccess access)
{
if (access.HasFlag(BufferAccess.FlushPersistent) || access.HasFlag(BufferAccess.Stream))
BufferAccess memType = access & BufferAccess.MemoryTypeMask;
if (memType == BufferAccess.HostMemory || access.HasFlag(BufferAccess.Stream))
{
return BufferAllocationType.HostMapped;
}
else if (memType == BufferAccess.DeviceMemory)
{
return BufferAllocationType.DeviceLocal;
}
else if (memType == BufferAccess.DeviceMemoryMapped)
{
return BufferAllocationType.DeviceLocalMapped;
}
return BufferAllocationType.Auto;
}

View file

@ -222,20 +222,6 @@ namespace Ryujinx.Graphics.Vulkan
}
}
private void TryBackingSwaps()
{
CommandBufferScoped? cbs = null;
_backingSwaps.RemoveAll(holder => holder.TryBackingSwap(ref cbs));
cbs?.Dispose();
}
public void AddBackingSwap(BufferHolder holder)
{
_backingSwaps.Add(holder);
}
public void Restore()
{
if (Pipeline != null)
@ -291,8 +277,6 @@ namespace Ryujinx.Graphics.Vulkan
Gd.ResetCounterPool();
TryBackingSwaps();
Restore();
}

View file

@ -486,12 +486,7 @@ namespace Ryujinx.Graphics.Vulkan
public BufferHandle CreateBuffer(int size, BufferAccess access)
{
return BufferManager.CreateWithHandle(this, size, access.HasFlag(BufferAccess.SparseCompatible), access.Convert(), default, access == BufferAccess.Stream);
}
public BufferHandle CreateBuffer(int size, BufferAccess access, BufferHandle storageHint)
{
return BufferManager.CreateWithHandle(this, size, access.HasFlag(BufferAccess.SparseCompatible), access.Convert(), storageHint);
return BufferManager.CreateWithHandle(this, size, access.HasFlag(BufferAccess.SparseCompatible), access.Convert(), access.HasFlag(BufferAccess.Stream));
}
public BufferHandle CreateBuffer(nint pointer, int size)
@ -675,9 +670,23 @@ namespace Ryujinx.Graphics.Vulkan
var limits = _physicalDevice.PhysicalDeviceProperties.Limits;
var mainQueueProperties = _physicalDevice.QueueFamilyProperties[QueueFamilyIndex];
SystemMemoryType memoryType;
if (IsSharedMemory)
{
memoryType = SystemMemoryType.UnifiedMemory;
}
else
{
memoryType = Vendor == Vendor.Nvidia ?
SystemMemoryType.DedicatedMemorySlowStorage :
SystemMemoryType.DedicatedMemory;
}
return new Capabilities(
api: TargetApi.Vulkan,
GpuVendor,
memoryType: memoryType,
hasFrontFacingBug: IsIntelWindows,
hasVectorIndexingBug: Vendor == Vendor.Qualcomm,
needsFragmentOutputSpecialization: IsMoltenVk,