GPU: Migrate buffers on GPU project, pre-emptively flush device local mappings (#6794)
* GPU: Migrate buffers on GPU project, pre-emptively flush device local mappings Essentially retreading #4540, but it's on the GPU project now instead of the backend. This allows us to have a lot more control + knowledge of where the buffer backing has been changed and allows us to pre-emptively flush pages to host memory for quicker readback. It will allow us to do other stuff in the future, but we'll get there when we get there. Performance greatly improved in Hyrule Warriors: Age of Calamity. Performance notably improved in TOTK (average). Performance for BOTW restored to how it was before #4911, perhaps a bit better. - Rewrites a bunch of buffer migration stuff. Might want to tighten up how dispose stuff works. - Fixed an issue where the copy for texture pre-flush would happen _after_ the syncpoint. TODO: remove a page from pre-flush if it isn't flushed after a certain number of copies. * Add copy deactivation * Fix dependent virtual buffers * Remove logging * Fix format issues (maybe) * Vulkan: Remove backing swap * Add explicit memory access types for most buffers * Fix typo * Add device local force expiry, change buffer inheritance behaviour * General cleanup, OGL fix * BufferPreFlush comments * BufferBackingState comments * Add an extra precaution to BufferMigration This is very unlikely, but it's important to cover loose ends like this. * Address some feedback * Docs
This commit is contained in:
parent
2f427deb67
commit
eb1ce41b00
29 changed files with 1342 additions and 523 deletions
|
@ -1,4 +1,3 @@
|
|||
using Ryujinx.Common.Logging;
|
||||
using Ryujinx.Graphics.GAL;
|
||||
using Silk.NET.Vulkan;
|
||||
using System;
|
||||
|
@ -31,40 +30,29 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
|
||||
private readonly VulkanRenderer _gd;
|
||||
private readonly Device _device;
|
||||
private MemoryAllocation _allocation;
|
||||
private Auto<DisposableBuffer> _buffer;
|
||||
private Auto<MemoryAllocation> _allocationAuto;
|
||||
private readonly MemoryAllocation _allocation;
|
||||
private readonly Auto<DisposableBuffer> _buffer;
|
||||
private readonly Auto<MemoryAllocation> _allocationAuto;
|
||||
private readonly bool _allocationImported;
|
||||
private ulong _bufferHandle;
|
||||
private readonly ulong _bufferHandle;
|
||||
|
||||
private CacheByRange<BufferHolder> _cachedConvertedBuffers;
|
||||
|
||||
public int Size { get; }
|
||||
|
||||
private IntPtr _map;
|
||||
private readonly IntPtr _map;
|
||||
|
||||
private MultiFenceHolder _waitable;
|
||||
private readonly MultiFenceHolder _waitable;
|
||||
|
||||
private bool _lastAccessIsWrite;
|
||||
|
||||
private BufferAllocationType _baseType;
|
||||
private BufferAllocationType _currentType;
|
||||
private bool _swapQueued;
|
||||
|
||||
public BufferAllocationType DesiredType { get; private set; }
|
||||
|
||||
private int _setCount;
|
||||
private int _writeCount;
|
||||
private int _flushCount;
|
||||
private int _flushTemp;
|
||||
private int _lastFlushWrite = -1;
|
||||
private readonly BufferAllocationType _baseType;
|
||||
private readonly BufferAllocationType _activeType;
|
||||
|
||||
private readonly ReaderWriterLockSlim _flushLock;
|
||||
private FenceHolder _flushFence;
|
||||
private int _flushWaiting;
|
||||
|
||||
private List<Action> _swapActions;
|
||||
|
||||
private byte[] _pendingData;
|
||||
private BufferMirrorRangeList _pendingDataRanges;
|
||||
private Dictionary<ulong, StagingBufferReserved> _mirrors;
|
||||
|
@ -83,8 +71,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
_map = allocation.HostPointer;
|
||||
|
||||
_baseType = type;
|
||||
_currentType = currentType;
|
||||
DesiredType = currentType;
|
||||
_activeType = currentType;
|
||||
|
||||
_flushLock = new ReaderWriterLockSlim();
|
||||
_useMirrors = gd.IsTBDR;
|
||||
|
@ -104,8 +91,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
_map = _allocation.HostPointer + offset;
|
||||
|
||||
_baseType = type;
|
||||
_currentType = currentType;
|
||||
DesiredType = currentType;
|
||||
_activeType = currentType;
|
||||
|
||||
_flushLock = new ReaderWriterLockSlim();
|
||||
}
|
||||
|
@ -120,164 +106,11 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
Size = size;
|
||||
|
||||
_baseType = BufferAllocationType.Sparse;
|
||||
_currentType = BufferAllocationType.Sparse;
|
||||
DesiredType = BufferAllocationType.Sparse;
|
||||
_activeType = BufferAllocationType.Sparse;
|
||||
|
||||
_flushLock = new ReaderWriterLockSlim();
|
||||
}
|
||||
|
||||
public bool TryBackingSwap(ref CommandBufferScoped? cbs)
|
||||
{
|
||||
if (_swapQueued && DesiredType != _currentType)
|
||||
{
|
||||
// Only swap if the buffer is not used in any queued command buffer.
|
||||
bool isRented = _buffer.HasRentedCommandBufferDependency(_gd.CommandBufferPool);
|
||||
|
||||
if (!isRented && _gd.CommandBufferPool.OwnedByCurrentThread && !_flushLock.IsReadLockHeld && (_pendingData == null || cbs != null))
|
||||
{
|
||||
var currentAllocation = _allocationAuto;
|
||||
var currentBuffer = _buffer;
|
||||
IntPtr currentMap = _map;
|
||||
|
||||
(VkBuffer buffer, MemoryAllocation allocation, BufferAllocationType resultType) = _gd.BufferManager.CreateBacking(_gd, Size, DesiredType, false, false, _currentType);
|
||||
|
||||
if (buffer.Handle != 0)
|
||||
{
|
||||
if (cbs != null)
|
||||
{
|
||||
ClearMirrors(cbs.Value, 0, Size);
|
||||
}
|
||||
|
||||
_flushLock.EnterWriteLock();
|
||||
|
||||
ClearFlushFence();
|
||||
|
||||
_waitable = new MultiFenceHolder(Size);
|
||||
|
||||
_allocation = allocation;
|
||||
_allocationAuto = new Auto<MemoryAllocation>(allocation);
|
||||
_buffer = new Auto<DisposableBuffer>(new DisposableBuffer(_gd.Api, _device, buffer), this, _waitable, _allocationAuto);
|
||||
_bufferHandle = buffer.Handle;
|
||||
_map = allocation.HostPointer;
|
||||
|
||||
if (_map != IntPtr.Zero && currentMap != IntPtr.Zero)
|
||||
{
|
||||
// Copy data directly. Readbacks don't have to wait if this is done.
|
||||
|
||||
unsafe
|
||||
{
|
||||
new Span<byte>((void*)currentMap, Size).CopyTo(new Span<byte>((void*)_map, Size));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cbs ??= _gd.CommandBufferPool.Rent();
|
||||
|
||||
CommandBufferScoped cbsV = cbs.Value;
|
||||
|
||||
Copy(_gd, cbsV, currentBuffer, _buffer, 0, 0, Size);
|
||||
|
||||
// Need to wait for the data to reach the new buffer before data can be flushed.
|
||||
|
||||
_flushFence = _gd.CommandBufferPool.GetFence(cbsV.CommandBufferIndex);
|
||||
_flushFence.Get();
|
||||
}
|
||||
|
||||
Logger.Debug?.PrintMsg(LogClass.Gpu, $"Converted {Size} buffer {_currentType} to {resultType}");
|
||||
|
||||
_currentType = resultType;
|
||||
|
||||
if (_swapActions != null)
|
||||
{
|
||||
foreach (var action in _swapActions)
|
||||
{
|
||||
action();
|
||||
}
|
||||
|
||||
_swapActions.Clear();
|
||||
}
|
||||
|
||||
currentBuffer.Dispose();
|
||||
currentAllocation.Dispose();
|
||||
|
||||
_gd.PipelineInternal.SwapBuffer(currentBuffer, _buffer);
|
||||
|
||||
_flushLock.ExitWriteLock();
|
||||
}
|
||||
|
||||
_swapQueued = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
_swapQueued = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private void ConsiderBackingSwap()
|
||||
{
|
||||
if (_baseType == BufferAllocationType.Auto)
|
||||
{
|
||||
// When flushed, wait for a bit more info to make a decision.
|
||||
bool wasFlushed = _flushTemp > 0;
|
||||
int multiplier = wasFlushed ? 2 : 0;
|
||||
if (_writeCount >= (WriteCountThreshold << multiplier) || _setCount >= (SetCountThreshold << multiplier) || _flushCount >= (FlushCountThreshold << multiplier))
|
||||
{
|
||||
if (_flushCount > 0 || _flushTemp-- > 0)
|
||||
{
|
||||
// Buffers that flush should ideally be mapped in host address space for easy copies.
|
||||
// If the buffer is large it will do better on GPU memory, as there will be more writes than data flushes (typically individual pages).
|
||||
// If it is small, then it's likely most of the buffer will be flushed so we want it on host memory, as access is cached.
|
||||
|
||||
bool hostMappingSensitive = _gd.Vendor == Vendor.Nvidia;
|
||||
bool deviceLocalMapped = Size > DeviceLocalSizeThreshold || (wasFlushed && _writeCount > _flushCount * 10 && hostMappingSensitive) || _currentType == BufferAllocationType.DeviceLocalMapped;
|
||||
|
||||
DesiredType = deviceLocalMapped ? BufferAllocationType.DeviceLocalMapped : BufferAllocationType.HostMapped;
|
||||
|
||||
// It's harder for a buffer that is flushed to revert to another type of mapping.
|
||||
if (_flushCount > 0)
|
||||
{
|
||||
_flushTemp = 1000;
|
||||
}
|
||||
}
|
||||
else if (_writeCount >= (WriteCountThreshold << multiplier))
|
||||
{
|
||||
// Buffers that are written often should ideally be in the device local heap. (Storage buffers)
|
||||
DesiredType = BufferAllocationType.DeviceLocal;
|
||||
}
|
||||
else if (_setCount > (SetCountThreshold << multiplier))
|
||||
{
|
||||
// Buffers that have their data set often should ideally be host mapped. (Constant buffers)
|
||||
DesiredType = BufferAllocationType.HostMapped;
|
||||
}
|
||||
|
||||
_lastFlushWrite = -1;
|
||||
_flushCount = 0;
|
||||
_writeCount = 0;
|
||||
_setCount = 0;
|
||||
}
|
||||
|
||||
if (!_swapQueued && DesiredType != _currentType)
|
||||
{
|
||||
_swapQueued = true;
|
||||
|
||||
_gd.PipelineInternal.AddBackingSwap(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void Pin()
|
||||
{
|
||||
if (_baseType == BufferAllocationType.Auto)
|
||||
{
|
||||
_baseType = _currentType;
|
||||
}
|
||||
}
|
||||
|
||||
public unsafe Auto<DisposableBufferView> CreateView(VkFormat format, int offset, int size, Action invalidateView)
|
||||
{
|
||||
var bufferViewCreateInfo = new BufferViewCreateInfo
|
||||
|
@ -291,19 +124,9 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
|
||||
_gd.Api.CreateBufferView(_device, bufferViewCreateInfo, null, out var bufferView).ThrowOnError();
|
||||
|
||||
(_swapActions ??= new List<Action>()).Add(invalidateView);
|
||||
|
||||
return new Auto<DisposableBufferView>(new DisposableBufferView(_gd.Api, _device, bufferView), this, _waitable, _buffer);
|
||||
}
|
||||
|
||||
public void InheritMetrics(BufferHolder other)
|
||||
{
|
||||
_setCount = other._setCount;
|
||||
_writeCount = other._writeCount;
|
||||
_flushCount = other._flushCount;
|
||||
_flushTemp = other._flushTemp;
|
||||
}
|
||||
|
||||
public unsafe void InsertBarrier(CommandBuffer commandBuffer, bool isWrite)
|
||||
{
|
||||
// If the last access is write, we always need a barrier to be sure we will read or modify
|
||||
|
@ -423,18 +246,8 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
{
|
||||
if (isWrite)
|
||||
{
|
||||
_writeCount++;
|
||||
|
||||
SignalWrite(0, Size);
|
||||
}
|
||||
else if (isSSBO)
|
||||
{
|
||||
// Always consider SSBO access for swapping to device local memory.
|
||||
|
||||
_writeCount++;
|
||||
|
||||
ConsiderBackingSwap();
|
||||
}
|
||||
|
||||
return _buffer;
|
||||
}
|
||||
|
@ -443,8 +256,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
{
|
||||
if (isWrite)
|
||||
{
|
||||
_writeCount++;
|
||||
|
||||
SignalWrite(offset, size);
|
||||
}
|
||||
|
||||
|
@ -543,8 +354,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
|
||||
public void SignalWrite(int offset, int size)
|
||||
{
|
||||
ConsiderBackingSwap();
|
||||
|
||||
if (offset == 0 && size == Size)
|
||||
{
|
||||
_cachedConvertedBuffers.Clear();
|
||||
|
@ -624,13 +433,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
|
||||
WaitForFlushFence();
|
||||
|
||||
if (_lastFlushWrite != _writeCount)
|
||||
{
|
||||
// If it's on the same page as the last flush, ignore it.
|
||||
_lastFlushWrite = _writeCount;
|
||||
_flushCount++;
|
||||
}
|
||||
|
||||
Span<byte> result;
|
||||
|
||||
if (_map != IntPtr.Zero)
|
||||
|
@ -711,8 +513,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
return;
|
||||
}
|
||||
|
||||
_setCount++;
|
||||
bool allowMirror = _useMirrors && allowCbsWait && cbs != null && _currentType <= BufferAllocationType.HostMapped;
|
||||
bool allowMirror = _useMirrors && allowCbsWait && cbs != null && _activeType <= BufferAllocationType.HostMapped;
|
||||
|
||||
if (_map != IntPtr.Zero)
|
||||
{
|
||||
|
@ -863,8 +664,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
|
||||
var dstBuffer = GetBuffer(cbs.CommandBuffer, dstOffset, data.Length, true).Get(cbs, dstOffset, data.Length, true).Value;
|
||||
|
||||
_writeCount--;
|
||||
|
||||
InsertBufferBarrier(
|
||||
_gd,
|
||||
cbs.CommandBuffer,
|
||||
|
@ -1100,8 +899,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
|
||||
public void Dispose()
|
||||
{
|
||||
_swapQueued = false;
|
||||
|
||||
_gd.PipelineInternal?.FlushCommandsIfWeightExceeding(_buffer, (ulong)Size);
|
||||
|
||||
_buffer.Dispose();
|
||||
|
|
|
@ -165,10 +165,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
|
||||
if (TryGetBuffer(range.Handle, out var existingHolder))
|
||||
{
|
||||
// Since this buffer now also owns the memory from the referenced buffer,
|
||||
// we pin it to ensure the memory location will not change.
|
||||
existingHolder.Pin();
|
||||
|
||||
(var memory, var offset) = existingHolder.GetDeviceMemoryAndOffset();
|
||||
|
||||
memoryBinds[index] = new SparseMemoryBind()
|
||||
|
@ -235,10 +231,9 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
int size,
|
||||
bool sparseCompatible = false,
|
||||
BufferAllocationType baseType = BufferAllocationType.HostMapped,
|
||||
BufferHandle storageHint = default,
|
||||
bool forceMirrors = false)
|
||||
{
|
||||
return CreateWithHandle(gd, size, out _, sparseCompatible, baseType, storageHint, forceMirrors);
|
||||
return CreateWithHandle(gd, size, out _, sparseCompatible, baseType, forceMirrors);
|
||||
}
|
||||
|
||||
public BufferHandle CreateWithHandle(
|
||||
|
@ -247,10 +242,9 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
out BufferHolder holder,
|
||||
bool sparseCompatible = false,
|
||||
BufferAllocationType baseType = BufferAllocationType.HostMapped,
|
||||
BufferHandle storageHint = default,
|
||||
bool forceMirrors = false)
|
||||
{
|
||||
holder = Create(gd, size, forConditionalRendering: false, sparseCompatible, baseType, storageHint);
|
||||
holder = Create(gd, size, forConditionalRendering: false, sparseCompatible, baseType);
|
||||
if (holder == null)
|
||||
{
|
||||
return BufferHandle.Null;
|
||||
|
@ -387,31 +381,13 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
int size,
|
||||
bool forConditionalRendering = false,
|
||||
bool sparseCompatible = false,
|
||||
BufferAllocationType baseType = BufferAllocationType.HostMapped,
|
||||
BufferHandle storageHint = default)
|
||||
BufferAllocationType baseType = BufferAllocationType.HostMapped)
|
||||
{
|
||||
BufferAllocationType type = baseType;
|
||||
BufferHolder storageHintHolder = null;
|
||||
|
||||
if (baseType == BufferAllocationType.Auto)
|
||||
{
|
||||
if (gd.IsSharedMemory)
|
||||
{
|
||||
baseType = BufferAllocationType.HostMapped;
|
||||
type = baseType;
|
||||
}
|
||||
else
|
||||
{
|
||||
type = size >= BufferHolder.DeviceLocalSizeThreshold ? BufferAllocationType.DeviceLocal : BufferAllocationType.HostMapped;
|
||||
}
|
||||
|
||||
if (storageHint != BufferHandle.Null)
|
||||
{
|
||||
if (TryGetBuffer(storageHint, out storageHintHolder))
|
||||
{
|
||||
type = storageHintHolder.DesiredType;
|
||||
}
|
||||
}
|
||||
type = BufferAllocationType.HostMapped;
|
||||
}
|
||||
|
||||
(VkBuffer buffer, MemoryAllocation allocation, BufferAllocationType resultType) =
|
||||
|
@ -421,11 +397,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
{
|
||||
var holder = new BufferHolder(gd, _device, buffer, allocation, size, baseType, resultType);
|
||||
|
||||
if (storageHintHolder != null)
|
||||
{
|
||||
holder.InheritMetrics(storageHintHolder);
|
||||
}
|
||||
|
||||
return holder;
|
||||
}
|
||||
|
||||
|
|
|
@ -424,10 +424,20 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
|
||||
public static BufferAllocationType Convert(this BufferAccess access)
|
||||
{
|
||||
if (access.HasFlag(BufferAccess.FlushPersistent) || access.HasFlag(BufferAccess.Stream))
|
||||
BufferAccess memType = access & BufferAccess.MemoryTypeMask;
|
||||
|
||||
if (memType == BufferAccess.HostMemory || access.HasFlag(BufferAccess.Stream))
|
||||
{
|
||||
return BufferAllocationType.HostMapped;
|
||||
}
|
||||
else if (memType == BufferAccess.DeviceMemory)
|
||||
{
|
||||
return BufferAllocationType.DeviceLocal;
|
||||
}
|
||||
else if (memType == BufferAccess.DeviceMemoryMapped)
|
||||
{
|
||||
return BufferAllocationType.DeviceLocalMapped;
|
||||
}
|
||||
|
||||
return BufferAllocationType.Auto;
|
||||
}
|
||||
|
|
|
@ -222,20 +222,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
}
|
||||
}
|
||||
|
||||
private void TryBackingSwaps()
|
||||
{
|
||||
CommandBufferScoped? cbs = null;
|
||||
|
||||
_backingSwaps.RemoveAll(holder => holder.TryBackingSwap(ref cbs));
|
||||
|
||||
cbs?.Dispose();
|
||||
}
|
||||
|
||||
public void AddBackingSwap(BufferHolder holder)
|
||||
{
|
||||
_backingSwaps.Add(holder);
|
||||
}
|
||||
|
||||
public void Restore()
|
||||
{
|
||||
if (Pipeline != null)
|
||||
|
@ -291,8 +277,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
|
||||
Gd.ResetCounterPool();
|
||||
|
||||
TryBackingSwaps();
|
||||
|
||||
Restore();
|
||||
}
|
||||
|
||||
|
|
|
@ -486,12 +486,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
|
||||
public BufferHandle CreateBuffer(int size, BufferAccess access)
|
||||
{
|
||||
return BufferManager.CreateWithHandle(this, size, access.HasFlag(BufferAccess.SparseCompatible), access.Convert(), default, access == BufferAccess.Stream);
|
||||
}
|
||||
|
||||
public BufferHandle CreateBuffer(int size, BufferAccess access, BufferHandle storageHint)
|
||||
{
|
||||
return BufferManager.CreateWithHandle(this, size, access.HasFlag(BufferAccess.SparseCompatible), access.Convert(), storageHint);
|
||||
return BufferManager.CreateWithHandle(this, size, access.HasFlag(BufferAccess.SparseCompatible), access.Convert(), access.HasFlag(BufferAccess.Stream));
|
||||
}
|
||||
|
||||
public BufferHandle CreateBuffer(nint pointer, int size)
|
||||
|
@ -675,9 +670,23 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
var limits = _physicalDevice.PhysicalDeviceProperties.Limits;
|
||||
var mainQueueProperties = _physicalDevice.QueueFamilyProperties[QueueFamilyIndex];
|
||||
|
||||
SystemMemoryType memoryType;
|
||||
|
||||
if (IsSharedMemory)
|
||||
{
|
||||
memoryType = SystemMemoryType.UnifiedMemory;
|
||||
}
|
||||
else
|
||||
{
|
||||
memoryType = Vendor == Vendor.Nvidia ?
|
||||
SystemMemoryType.DedicatedMemorySlowStorage :
|
||||
SystemMemoryType.DedicatedMemory;
|
||||
}
|
||||
|
||||
return new Capabilities(
|
||||
api: TargetApi.Vulkan,
|
||||
GpuVendor,
|
||||
memoryType: memoryType,
|
||||
hasFrontFacingBug: IsIntelWindows,
|
||||
hasVectorIndexingBug: Vendor == Vendor.Qualcomm,
|
||||
needsFragmentOutputSpecialization: IsMoltenVk,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue