Add support for HLE macros and accelerate MultiDrawElementsIndirectCount #2 (#2557)

* Add support for HLE macros and accelerate MultiDrawElementsIndirectCount

* Add missing barrier

* Fix index buffer count

* Add support check for each macro hle before use

* Add missing xml doc

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
This commit is contained in:
mpnico 2021-08-26 23:50:28 +02:00 committed by GitHub
parent 5cab8ea4ad
commit 8e1adb95cf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 552 additions and 40 deletions

View file

@ -161,6 +161,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// <param name="argument">Method call argument</param>
public void SetReference(int argument)
{
_context.Renderer.Pipeline.CommandBufferBarrier();
_context.CreateHostSyncIfNeeded();
}
@ -195,10 +197,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// Pushes an argument to a macro.
/// </summary>
/// <param name="index">Index of the macro</param>
/// <param name="gpuVa">GPU virtual address where the command word is located</param>
/// <param name="argument">Argument to be pushed to the macro</param>
public void MmePushArgument(int index, int argument)
public void MmePushArgument(int index, ulong gpuVa, int argument)
{
_macros[index].PushArgument(argument);
_macros[index].PushArgument(gpuVa, argument);
}
/// <summary>
@ -208,7 +211,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// <param name="argument">Initial argument passed to the macro</param>
public void MmeStart(int index, int argument)
{
_macros[index].StartExecution(argument);
_macros[index].StartExecution(_context, _parent, _macroCode, argument);
}
/// <summary>

View file

@ -54,11 +54,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// <summary>
/// Fetch the command buffer.
/// </summary>
public void Fetch(MemoryManager memoryManager)
/// <param name="flush">If true, flushes potential GPU written data before reading the command buffer</param>
public void Fetch(MemoryManager memoryManager, bool flush = true)
{
if (Words == null)
{
Words = MemoryMarshal.Cast<byte, int>(memoryManager.GetSpan(EntryAddress, (int)EntryCount * 4, true)).ToArray();
Words = MemoryMarshal.Cast<byte, int>(memoryManager.GetSpan(EntryAddress, (int)EntryCount * 4, flush)).ToArray();
}
}
}
@ -73,6 +74,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
private readonly AutoResetEvent _event;
private bool _interrupt;
private int _flushSkips;
/// <summary>
/// Creates a new instance of the GPU General Purpose FIFO device.
@ -188,8 +190,16 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
// Process command buffers.
while (_ibEnable && !_interrupt && _commandBufferQueue.TryDequeue(out CommandBuffer entry))
{
bool flushCommandBuffer = true;
if (_flushSkips != 0)
{
_flushSkips--;
flushCommandBuffer = false;
}
_currentCommandBuffer = entry;
_currentCommandBuffer.Fetch(entry.Processor.MemoryManager);
_currentCommandBuffer.Fetch(entry.Processor.MemoryManager, flushCommandBuffer);
// If we are changing the current channel,
// we need to force all the host state to be updated.
@ -199,12 +209,24 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
entry.Processor.ForceAllDirty();
}
entry.Processor.Process(_currentCommandBuffer.Words);
entry.Processor.Process(entry.EntryAddress, _currentCommandBuffer.Words);
}
_interrupt = false;
}
/// <summary>
/// Sets the number of flushes that should be skipped for subsequent command buffers.
/// </summary>
/// <remarks>
/// This can improve performance when command buffer data only needs to be consumed by the GPU.
/// </remarks>
/// <param name="count">The amount of flushes that should be skipped</param>
internal void SetFlushSkips(int count)
{
_flushSkips = count;
}
/// <summary>
/// Interrupts command processing. This will break out of the DispatchCalls loop.
/// </summary>

View file

@ -28,6 +28,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// </summary>
public MemoryManager MemoryManager => _channel.MemoryManager;
/// <summary>
/// 3D Engine.
/// </summary>
public ThreedClass ThreedClass => _3dClass;
/// <summary>
/// Internal GPFIFO state.
/// </summary>
@ -70,13 +75,16 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// <summary>
/// Processes a command buffer.
/// </summary>
/// <param name="baseGpuVa">Base GPU virtual address of the command buffer</param>
/// <param name="commandBuffer">Command buffer</param>
public void Process(ReadOnlySpan<int> commandBuffer)
public void Process(ulong baseGpuVa, ReadOnlySpan<int> commandBuffer)
{
for (int index = 0; index < commandBuffer.Length; index++)
{
int command = commandBuffer[index];
ulong gpuVa = baseGpuVa + (ulong)index * 4;
if (_state.MethodCount != 0)
{
if (TryFastI2mBufferUpdate(commandBuffer, ref index))
@ -84,7 +92,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
continue;
}
Send(_state.Method, command, _state.SubChannel, _state.MethodCount <= 1);
Send(gpuVa, _state.Method, command, _state.SubChannel, _state.MethodCount <= 1);
if (!_state.NonIncrementing)
{
@ -120,7 +128,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
_state.NonIncrementing = meth.SecOp == SecOp.NonIncMethod;
break;
case SecOp.ImmdDataMethod:
Send(meth.MethodAddress, meth.ImmdData, meth.MethodSubchannel, true);
Send(gpuVa, meth.MethodAddress, meth.ImmdData, meth.MethodSubchannel, true);
break;
}
}
@ -198,8 +206,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// <summary>
/// Sends a uncompressed method for processing by the graphics pipeline.
/// </summary>
/// <param name="gpuVa">GPU virtual address where the command word is located</param>
/// <param name="meth">Method to be processed</param>
private void Send(int offset, int argument, int subChannel, bool isLastCall)
private void Send(ulong gpuVa, int offset, int argument, int subChannel, bool isLastCall)
{
if (offset < 0x60)
{
@ -243,7 +252,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
if ((offset & 1) != 0)
{
_fifoClass.MmePushArgument(macroIndex, argument);
_fifoClass.MmePushArgument(macroIndex, gpuVa, argument);
}
else
{

View file

@ -4,6 +4,33 @@ using System.Collections.Generic;
namespace Ryujinx.Graphics.Gpu.Engine.MME
{
/// <summary>
/// FIFO word.
/// </summary>
struct FifoWord
{
/// <summary>
/// GPU virtual address where the word is located in memory.
/// </summary>
public ulong GpuVa { get; }
/// <summary>
/// Word value.
/// </summary>
public int Word { get; }
/// <summary>
/// Creates a new FIFO word.
/// </summary>
/// <param name="gpuVa">GPU virtual address where the word is located in memory</param>
/// <param name="word">Word value</param>
public FifoWord(ulong gpuVa, int word)
{
GpuVa = gpuVa;
Word = word;
}
}
/// <summary>
/// Macro Execution Engine interface.
/// </summary>
@ -12,7 +39,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <summary>
/// Arguments FIFO.
/// </summary>
Queue<int> Fifo { get; }
Queue<FifoWord> Fifo { get; }
/// <summary>
/// Should execute the GPU Macro code being passed.

View file

@ -1,4 +1,6 @@
using Ryujinx.Graphics.Device;
using Ryujinx.Graphics.Gpu.Engine.GPFifo;
using Ryujinx.Graphics.Gpu.Memory;
using System;
namespace Ryujinx.Graphics.Gpu.Engine.MME
@ -13,10 +15,10 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// </summary>
public int Position { get; }
private IMacroEE _executionEngine;
private bool _executionPending;
private int _argument;
private readonly IMacroEE _executionEngine;
private MacroHLEFunctionName _hleFunction;
/// <summary>
/// Creates a new instance of the GPU cached macro program.
@ -26,28 +28,47 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
{
Position = position;
_executionEngine = null;
_executionPending = false;
_argument = 0;
if (GraphicsConfig.EnableMacroJit)
{
_executionEngine = new MacroJit();
}
else
{
_executionEngine = new MacroInterpreter();
}
_hleFunction = MacroHLEFunctionName.None;
}
/// <summary>
/// Sets the first argument for the macro call.
/// </summary>
/// <param name="context">GPU context where the macro code is being executed</param>
/// <param name="processor">GPU GP FIFO command processor</param>
/// <param name="code">Code to be executed</param>
/// <param name="argument">First argument</param>
public void StartExecution(int argument)
public void StartExecution(GpuContext context, GPFifoProcessor processor, ReadOnlySpan<int> code, int argument)
{
_argument = argument;
_executionPending = true;
if (_executionEngine == null)
{
if (GraphicsConfig.EnableMacroHLE && MacroHLETable.TryGetMacroHLEFunction(code.Slice(Position), context.Capabilities, out _hleFunction))
{
_executionEngine = new MacroHLE(processor, _hleFunction);
}
else if (GraphicsConfig.EnableMacroJit)
{
_executionEngine = new MacroJit();
}
else
{
_executionEngine = new MacroInterpreter();
}
}
if (_hleFunction == MacroHLEFunctionName.MultiDrawElementsIndirectCount)
{
// We don't consume the parameter buffer value, so we don't need to flush it.
// Doing so improves performance if the value was written by a GPU shader.
context.GPFifo.SetFlushSkips(2);
}
}
/// <summary>
@ -60,7 +81,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
if (_executionPending)
{
_executionPending = false;
_executionEngine?.Execute(code.Slice(Position), state, _argument);
}
}
@ -68,10 +88,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <summary>
/// Pushes an argument to the macro call argument FIFO.
/// </summary>
/// <param name="gpuVa">GPU virtual address where the command word is located</param>
/// <param name="argument">Argument to be pushed</param>
public void PushArgument(int argument)
public void PushArgument(ulong gpuVa, int argument)
{
_executionEngine?.Fifo.Enqueue(argument);
_executionEngine?.Fifo.Enqueue(new FifoWord(gpuVa, argument));
}
}
}

View file

@ -0,0 +1,142 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Device;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.GPFifo;
using Ryujinx.Graphics.Gpu.Engine.Threed;
using Ryujinx.Graphics.Gpu.Memory;
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Gpu.Engine.MME
{
/// <summary>
/// Macro High-level emulation.
/// </summary>
class MacroHLE : IMacroEE
{
private readonly GPFifoProcessor _processor;
private readonly MacroHLEFunctionName _functionName;
/// <summary>
/// Arguments FIFO.
/// </summary>
public Queue<FifoWord> Fifo { get; }
/// <summary>
/// Creates a new instance of the HLE macro handler.
/// </summary>
/// <param name="context">GPU context the macro is being executed on</param>
/// <param name="memoryManager">GPU memory manager</param>
/// <param name="engine">3D engine where this macro is being called</param>
/// <param name="functionName">Name of the HLE macro function to be called</param>
public MacroHLE(GPFifoProcessor processor, MacroHLEFunctionName functionName)
{
_processor = processor;
_functionName = functionName;
Fifo = new Queue<FifoWord>();
}
/// <summary>
/// Executes a macro program until it exits.
/// </summary>
/// <param name="code">Code of the program to execute</param>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">Optional argument passed to the program, 0 if not used</param>
public void Execute(ReadOnlySpan<int> code, IDeviceState state, int arg0)
{
switch (_functionName)
{
case MacroHLEFunctionName.MultiDrawElementsIndirectCount:
MultiDrawElementsIndirectCount(state, arg0);
break;
default:
throw new NotImplementedException(_functionName.ToString());
}
}
/// <summary>
/// Performs a indirect multi-draw, with parameters from a GPU buffer.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void MultiDrawElementsIndirectCount(IDeviceState state, int arg0)
{
int arg1 = FetchParam().Word;
int arg2 = FetchParam().Word;
int arg3 = FetchParam().Word;
int startOffset = arg0;
int endOffset = arg1;
var topology = (PrimitiveTopology)arg2;
int paddingWords = arg3;
int maxDrawCount = endOffset - startOffset;
int stride = paddingWords * 4 + 0x14;
int indirectBufferSize = maxDrawCount * stride;
ulong parameterBufferGpuVa = FetchParam().GpuVa;
ulong indirectBufferGpuVa = 0;
int indexCount = 0;
for (int i = 0; i < maxDrawCount; i++)
{
var count = FetchParam();
var instanceCount = FetchParam();
var firstIndex = FetchParam();
var baseVertex = FetchParam();
var baseInstance = FetchParam();
if (i == 0)
{
indirectBufferGpuVa = count.GpuVa;
}
indexCount = Math.Max(indexCount, count.Word + firstIndex.Word);
if (i != maxDrawCount - 1)
{
for (int j = 0; j < paddingWords; j++)
{
FetchParam();
}
}
}
// It should be empty at this point, but clear it just to be safe.
Fifo.Clear();
var parameterBuffer = _processor.MemoryManager.Physical.BufferCache.GetGpuBufferRange(_processor.MemoryManager, parameterBufferGpuVa, 4);
var indirectBuffer = _processor.MemoryManager.Physical.BufferCache.GetGpuBufferRange(_processor.MemoryManager, indirectBufferGpuVa, (ulong)indirectBufferSize);
_processor.ThreedClass.MultiDrawIndirectCount(indexCount, topology, indirectBuffer, parameterBuffer, maxDrawCount, stride);
}
/// <summary>
/// Fetches a arguments from the arguments FIFO.
/// </summary>
/// <returns>The call argument, or a 0 value with null address if the FIFO is empty</returns>
private FifoWord FetchParam()
{
if (!Fifo.TryDequeue(out var value))
{
Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
return new FifoWord(0UL, 0);
}
return value;
}
/// <summary>
/// Performs a GPU method call.
/// </summary>
/// <param name="state">Current GPU state</param>
/// <param name="methAddr">Address, in words, of the method</param>
/// <param name="value">Call argument</param>
private static void Send(IDeviceState state, int methAddr, int value)
{
state.Write(methAddr * 4, value);
}
}
}

View file

@ -0,0 +1,11 @@
namespace Ryujinx.Graphics.Gpu.Engine.MME
{
/// <summary>
/// Name of the High-level implementation of a Macro function.
/// </summary>
enum MacroHLEFunctionName
{
None,
MultiDrawElementsIndirectCount
}
}

View file

@ -0,0 +1,89 @@
using Ryujinx.Common;
using Ryujinx.Graphics.GAL;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Engine.MME
{
/// <summary>
/// Table with information about High-level implementations of GPU Macro code.
/// </summary>
static class MacroHLETable
{
/// <summary>
/// Macroo High-level implementation table entry.
/// </summary>
struct TableEntry
{
/// <summary>
/// Name of the Macro function.
/// </summary>
public MacroHLEFunctionName Name { get; }
/// <summary>
/// Hash of the original binary Macro function code.
/// </summary>
public Hash128 Hash { get; }
/// <summary>
/// Size (in bytes) of the original binary Macro function code.
/// </summary>
public int Length { get; }
/// <summary>
/// Creates a new table entry.
/// </summary>
/// <param name="name">Name of the Macro function</param>
/// <param name="hash">Hash of the original binary Macro function code</param>
/// <param name="length">Size (in bytes) of the original binary Macro function code</param>
public TableEntry(MacroHLEFunctionName name, Hash128 hash, int length)
{
Name = name;
Hash = hash;
Length = length;
}
}
private static readonly TableEntry[] Table = new TableEntry[]
{
new TableEntry(MacroHLEFunctionName.MultiDrawElementsIndirectCount, new Hash128(0x890AF57ED3FB1C37, 0x35D0C95C61F5386F), 0x19C)
};
private static bool IsMacroHLESupported(Capabilities caps, MacroHLEFunctionName name)
{
if (name == MacroHLEFunctionName.MultiDrawElementsIndirectCount)
{
return caps.SupportsIndirectParameters;
}
return false;
}
/// <summary>
/// Checks if there's a fast, High-level implementation of the specified Macro code available.
/// </summary>
/// <param name="code">Macro code to be checked</param>
/// <param name="caps">Renderer capabilities to check for this macro HLE support</param>
/// <param name="name">Name of the function if a implementation is available and supported, otherwise <see cref="MacroHLEFunctionName.None"/></param>
/// <returns>True if there is a implementation available and supported, false otherwise</returns>
public static bool TryGetMacroHLEFunction(ReadOnlySpan<int> code, Capabilities caps, out MacroHLEFunctionName name)
{
var mc = MemoryMarshal.Cast<int, byte>(code);
for (int i = 0; i < Table.Length; i++)
{
ref var entry = ref Table[i];
var hash = XXHash128.ComputeHash(mc.Slice(0, entry.Length));
if (hash == entry.Hash)
{
name = entry.Name;
return IsMacroHLESupported(caps, name);
}
}
name = MacroHLEFunctionName.None;
return false;
}
}
}

View file

@ -13,7 +13,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <summary>
/// Arguments FIFO.
/// </summary>
public Queue<int> Fifo { get; }
public Queue<FifoWord> Fifo { get; }
private int[] _gprs;
@ -34,7 +34,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// </summary>
public MacroInterpreter()
{
Fifo = new Queue<int>();
Fifo = new Queue<FifoWord>();
_gprs = new int[8];
}
@ -364,14 +364,14 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <returns>The call argument, or 0 if the FIFO is empty</returns>
private int FetchParam()
{
if (!Fifo.TryDequeue(out int value))
if (!Fifo.TryDequeue(out var value))
{
Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
return 0;
}
return value;
return value.Word;
}
/// <summary>

View file

@ -14,7 +14,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <summary>
/// Arguments FIFO.
/// </summary>
public Queue<int> Fifo => _context.Fifo;
public Queue<FifoWord> Fifo => _context.Fifo;
private MacroJitCompiler.MacroExecute _execute;

View file

@ -12,22 +12,22 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <summary>
/// Arguments FIFO.
/// </summary>
public Queue<int> Fifo { get; } = new Queue<int>();
public Queue<FifoWord> Fifo { get; } = new Queue<FifoWord>();
/// <summary>
/// Fetches a arguments from the arguments FIFO.
/// </summary>
/// <returns></returns>
/// <returns>The call argument, or 0 if the FIFO is empty</returns>
public int FetchParam()
{
if (!Fifo.TryDequeue(out int value))
if (!Fifo.TryDequeue(out var value))
{
Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
return 0;
}
return value;
return value.Word;
}
/// <summary>

View file

@ -26,6 +26,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
private int _instanceIndex;
private const int IndexBufferCountMethodOffset = 0x5f8;
/// <summary>
/// Creates a new instance of the draw manager.
/// </summary>
@ -304,6 +306,63 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
_drawState.DrawIndexed = oldDrawIndexed;
}
/// <summary>
/// Performs a indirect multi-draw, with parameters from a GPU buffer.
/// </summary>
/// <param name="engine">3D engine where this method is being called</param>
/// <param name="topology">Primitive topology</param>
/// <param name="indirectBuffer">GPU buffer with the draw parameters, such as count, first index, etc</param>
/// <param name="parameterBuffer">GPU buffer with the draw count</param>
/// <param name="maxDrawCount">Maximum number of draws that can be made</param>
/// <param name="stride">Distance in bytes between each element on the <paramref name="indirectBuffer"/> array</param>
public void MultiDrawIndirectCount(
ThreedClass engine,
int indexCount,
PrimitiveTopology topology,
BufferRange indirectBuffer,
BufferRange parameterBuffer,
int maxDrawCount,
int stride)
{
engine.Write(IndexBufferCountMethodOffset * 4, indexCount);
_context.Renderer.Pipeline.SetPrimitiveTopology(topology);
_drawState.Topology = topology;
ConditionalRenderEnabled renderEnable = ConditionalRendering.GetRenderEnable(
_context,
_channel.MemoryManager,
_state.State.RenderEnableAddress,
_state.State.RenderEnableCondition);
if (renderEnable == ConditionalRenderEnabled.False)
{
_drawState.DrawIndexed = false;
return;
}
_drawState.FirstIndex = _state.State.IndexBufferState.First;
_drawState.IndexCount = indexCount;
engine.UpdateState();
if (_drawState.DrawIndexed)
{
_context.Renderer.Pipeline.MultiDrawIndexedIndirectCount(indirectBuffer, parameterBuffer, maxDrawCount, stride);
}
else
{
_context.Renderer.Pipeline.MultiDrawIndirectCount(indirectBuffer, parameterBuffer, maxDrawCount, stride);
}
_drawState.DrawIndexed = false;
if (renderEnable == ConditionalRenderEnabled.Host)
{
_context.Renderer.Pipeline.EndHostConditionalRendering();
}
}
/// <summary>
/// Perform any deferred draws.
/// This is used for instanced draws.

View file

@ -1,4 +1,5 @@
using Ryujinx.Graphics.Device;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
using System;
using System.Collections.Generic;
@ -433,5 +434,25 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
{
return 0;
}
/// <summary>
/// Performs a indirect multi-draw, with parameters from a GPU buffer.
/// </summary>
/// <param name="indexCount">Index Buffer Count</param>
/// <param name="topology">Primitive topology</param>
/// <param name="indirectBuffer">GPU buffer with the draw parameters, such as count, first index, etc</param>
/// <param name="parameterBuffer">GPU buffer with the draw count</param>
/// <param name="maxDrawCount">Maximum number of draws that can be made</param>
/// <param name="stride">Distance in bytes between each element on the <paramref name="indirectBuffer"/> array</param>
public void MultiDrawIndirectCount(
int indexCount,
PrimitiveTopology topology,
BufferRange indirectBuffer,
BufferRange parameterBuffer,
int maxDrawCount,
int stride)
{
_drawManager.MultiDrawIndirectCount(this, indexCount, topology, indirectBuffer, parameterBuffer, maxDrawCount, stride);
}
}
}