Refactor shader GPU state and memory access (#1203)
* Refactor shader GPU state and memory access * Fix NVDEC project build * Address PR feedback and add missing XML comments
This commit is contained in:
parent
7f500e7cae
commit
b8eb6abecc
35 changed files with 633 additions and 684 deletions
|
@ -79,7 +79,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot);
|
||||
}
|
||||
|
||||
Operand alignMask = Const(-config.QueryInfo(QueryInfoName.StorageBufferOffsetAlignment));
|
||||
Operand alignMask = Const(-config.GpuAccessor.QueryStorageBufferOffsetAlignment());
|
||||
|
||||
Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
|
||||
Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
|
||||
|
@ -131,9 +131,9 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
|
||||
bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
|
||||
|
||||
bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.QueryInfoBool(QueryInfoName.SupportsNonConstantTextureOffset);
|
||||
bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QuerySupportsNonConstantTextureOffset();
|
||||
|
||||
bool isRect = config.QueryInfoBool(QueryInfoName.IsTextureRectangle, texOp.Handle);
|
||||
bool isRect = config.GpuAccessor.QueryIsTextureRectangle(texOp.Handle);
|
||||
|
||||
if (!(hasInvalidOffset || isRect))
|
||||
{
|
||||
|
|
|
@ -69,7 +69,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
|
||||
Operand baseAddrTrunc = Local();
|
||||
|
||||
Operand alignMask = Const(-config.QueryInfo(QueryInfoName.StorageBufferOffsetAlignment));
|
||||
Operand alignMask = Const(-config.GpuAccessor.QueryStorageBufferOffsetAlignment());
|
||||
|
||||
Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
|
||||
|
||||
|
@ -140,7 +140,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
|
||||
Operand baseAddrTrunc = Local();
|
||||
|
||||
Operand alignMask = Const(-config.QueryInfo(QueryInfoName.StorageBufferOffsetAlignment));
|
||||
Operand alignMask = Const(-config.GpuAccessor.QueryStorageBufferOffsetAlignment());
|
||||
|
||||
Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
|
||||
|
||||
|
|
|
@ -18,11 +18,11 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
public bool OmapSampleMask { get; }
|
||||
public bool OmapDepth { get; }
|
||||
|
||||
public IGpuAccessor GpuAccessor { get; }
|
||||
|
||||
public TranslationFlags Flags { get; }
|
||||
|
||||
private TranslatorCallbacks _callbacks;
|
||||
|
||||
public ShaderConfig(TranslationFlags flags, TranslatorCallbacks callbacks)
|
||||
public ShaderConfig(IGpuAccessor gpuAccessor, TranslationFlags flags)
|
||||
{
|
||||
Stage = ShaderStage.Compute;
|
||||
OutputTopology = OutputTopology.PointList;
|
||||
|
@ -32,11 +32,11 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
OmapTargets = null;
|
||||
OmapSampleMask = false;
|
||||
OmapDepth = false;
|
||||
GpuAccessor = gpuAccessor;
|
||||
Flags = flags;
|
||||
_callbacks = callbacks;
|
||||
}
|
||||
|
||||
public ShaderConfig(ShaderHeader header, TranslationFlags flags, TranslatorCallbacks callbacks)
|
||||
public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationFlags flags)
|
||||
{
|
||||
Stage = header.Stage;
|
||||
OutputTopology = header.OutputTopology;
|
||||
|
@ -46,8 +46,8 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
OmapTargets = header.OmapTargets;
|
||||
OmapSampleMask = header.OmapSampleMask;
|
||||
OmapDepth = header.OmapDepth;
|
||||
GpuAccessor = gpuAccessor;
|
||||
Flags = flags;
|
||||
_callbacks = callbacks;
|
||||
}
|
||||
|
||||
public int GetDepthRegister()
|
||||
|
@ -68,51 +68,5 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
// The depth register is always two registers after the last color output.
|
||||
return count + 1;
|
||||
}
|
||||
|
||||
public bool QueryInfoBool(QueryInfoName info, int index = 0)
|
||||
{
|
||||
return Convert.ToBoolean(QueryInfo(info, index));
|
||||
}
|
||||
|
||||
public int QueryInfo(QueryInfoName info, int index = 0)
|
||||
{
|
||||
if (_callbacks.QueryInfo != null)
|
||||
{
|
||||
return _callbacks.QueryInfo(info, index);
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (info)
|
||||
{
|
||||
case QueryInfoName.ComputeLocalSizeX:
|
||||
case QueryInfoName.ComputeLocalSizeY:
|
||||
case QueryInfoName.ComputeLocalSizeZ:
|
||||
return 1;
|
||||
case QueryInfoName.ComputeLocalMemorySize:
|
||||
return 0x1000;
|
||||
case QueryInfoName.ComputeSharedMemorySize:
|
||||
return 0xc000;
|
||||
case QueryInfoName.IsTextureBuffer:
|
||||
return Convert.ToInt32(false);
|
||||
case QueryInfoName.IsTextureRectangle:
|
||||
return Convert.ToInt32(false);
|
||||
case QueryInfoName.PrimitiveTopology:
|
||||
return (int)InputTopology.Points;
|
||||
case QueryInfoName.StorageBufferOffsetAlignment:
|
||||
return 16;
|
||||
case QueryInfoName.SupportsNonConstantTextureOffset:
|
||||
return Convert.ToInt32(true);
|
||||
case QueryInfoName.TextureFormat:
|
||||
return (int)TextureFormat.R8G8B8A8Unorm;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
public void PrintLog(string message)
|
||||
{
|
||||
_callbacks.PrintLog?.Invoke(message);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,6 +1,5 @@
|
|||
using Ryujinx.Graphics.Shader.Decoders;
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
|
@ -110,15 +109,13 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
public bool OmapSampleMask { get; }
|
||||
public bool OmapDepth { get; }
|
||||
|
||||
public ShaderHeader(ReadOnlySpan<byte> code)
|
||||
public ShaderHeader(IGpuAccessor gpuAccessor, ulong address)
|
||||
{
|
||||
ReadOnlySpan<int> header = MemoryMarshal.Cast<byte, int>(code);
|
||||
|
||||
int commonWord0 = header[0];
|
||||
int commonWord1 = header[1];
|
||||
int commonWord2 = header[2];
|
||||
int commonWord3 = header[3];
|
||||
int commonWord4 = header[4];
|
||||
int commonWord0 = gpuAccessor.MemoryRead<int>(address + 0);
|
||||
int commonWord1 = gpuAccessor.MemoryRead<int>(address + 4);
|
||||
int commonWord2 = gpuAccessor.MemoryRead<int>(address + 8);
|
||||
int commonWord3 = gpuAccessor.MemoryRead<int>(address + 12);
|
||||
int commonWord4 = gpuAccessor.MemoryRead<int>(address + 16);
|
||||
|
||||
SphType = commonWord0.Extract(0, 5);
|
||||
Version = commonWord0.Extract(5, 5);
|
||||
|
@ -163,22 +160,19 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
ImapTypes = new ImapPixelType[32];
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
for (ulong i = 0; i < 32; i++)
|
||||
{
|
||||
for (int j = 0; j < 4; j++)
|
||||
{
|
||||
byte imap = (byte)(header[6 + i] >> (j * 8));
|
||||
byte imap = gpuAccessor.MemoryRead<byte>(address + 0x18 + i);
|
||||
|
||||
ImapTypes[i * 4 + j] = new ImapPixelType(
|
||||
(PixelImap)((imap >> 0) & 3),
|
||||
(PixelImap)((imap >> 2) & 3),
|
||||
(PixelImap)((imap >> 4) & 3),
|
||||
(PixelImap)((imap >> 6) & 3));
|
||||
}
|
||||
ImapTypes[i] = new ImapPixelType(
|
||||
(PixelImap)((imap >> 0) & 3),
|
||||
(PixelImap)((imap >> 2) & 3),
|
||||
(PixelImap)((imap >> 4) & 3),
|
||||
(PixelImap)((imap >> 6) & 3));
|
||||
}
|
||||
|
||||
int type2OmapTarget = header[18];
|
||||
int type2Omap = header[19];
|
||||
int type2OmapTarget = gpuAccessor.MemoryRead<int>(address + 0x48);
|
||||
int type2Omap = gpuAccessor.MemoryRead<int>(address + 0x4c);
|
||||
|
||||
OmapTargets = new OmapTarget[8];
|
||||
|
||||
|
|
|
@ -14,46 +14,22 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
{
|
||||
private const int HeaderSize = 0x50;
|
||||
|
||||
public static ReadOnlySpan<byte> ExtractCode(ReadOnlySpan<byte> code, bool compute, out int headerSize)
|
||||
public static ShaderProgram Translate(ulong address, IGpuAccessor gpuAccessor, TranslationFlags flags)
|
||||
{
|
||||
headerSize = compute ? 0 : HeaderSize;
|
||||
|
||||
Block[] cfg = Decoder.Decode(code, (ulong)headerSize);
|
||||
|
||||
if (cfg == null)
|
||||
{
|
||||
return code;
|
||||
}
|
||||
|
||||
ulong endAddress = 0;
|
||||
|
||||
foreach (Block block in cfg)
|
||||
{
|
||||
if (endAddress < block.EndAddress)
|
||||
{
|
||||
endAddress = block.EndAddress;
|
||||
}
|
||||
}
|
||||
|
||||
return code.Slice(0, headerSize + (int)endAddress);
|
||||
}
|
||||
|
||||
public static ShaderProgram Translate(ReadOnlySpan<byte> code, TranslatorCallbacks callbacks, TranslationFlags flags)
|
||||
{
|
||||
Operation[] ops = DecodeShader(code, callbacks, flags, out ShaderConfig config, out int size);
|
||||
Operation[] ops = DecodeShader(address, gpuAccessor, flags, out ShaderConfig config, out int size);
|
||||
|
||||
return Translate(ops, config, size);
|
||||
}
|
||||
|
||||
public static ShaderProgram Translate(ReadOnlySpan<byte> vpACode, ReadOnlySpan<byte> vpBCode, TranslatorCallbacks callbacks, TranslationFlags flags)
|
||||
public static ShaderProgram Translate(ulong addressA, ulong addressB, IGpuAccessor gpuAccessor, TranslationFlags flags)
|
||||
{
|
||||
Operation[] vpAOps = DecodeShader(vpACode, callbacks, flags, out _, out _);
|
||||
Operation[] vpBOps = DecodeShader(vpBCode, callbacks, flags, out ShaderConfig config, out int sizeB);
|
||||
Operation[] opsA = DecodeShader(addressA, gpuAccessor, flags, out _, out int sizeA);
|
||||
Operation[] opsB = DecodeShader(addressB, gpuAccessor, flags, out ShaderConfig config, out int sizeB);
|
||||
|
||||
return Translate(Combine(vpAOps, vpBOps), config, sizeB);
|
||||
return Translate(Combine(opsA, opsB), config, sizeB, sizeA);
|
||||
}
|
||||
|
||||
private static ShaderProgram Translate(Operation[] ops, ShaderConfig config, int size)
|
||||
private static ShaderProgram Translate(Operation[] ops, ShaderConfig config, int size, int sizeA = 0)
|
||||
{
|
||||
BasicBlock[] blocks = ControlFlowGraph.MakeCfg(ops);
|
||||
|
||||
|
@ -83,34 +59,34 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
string glslCode = program.Code;
|
||||
|
||||
return new ShaderProgram(spInfo, config.Stage, glslCode, size);
|
||||
return new ShaderProgram(spInfo, config.Stage, glslCode, size, sizeA);
|
||||
}
|
||||
|
||||
private static Operation[] DecodeShader(
|
||||
ReadOnlySpan<byte> code,
|
||||
TranslatorCallbacks callbacks,
|
||||
TranslationFlags flags,
|
||||
out ShaderConfig config,
|
||||
out int size)
|
||||
ulong address,
|
||||
IGpuAccessor gpuAccessor,
|
||||
TranslationFlags flags,
|
||||
out ShaderConfig config,
|
||||
out int size)
|
||||
{
|
||||
Block[] cfg;
|
||||
|
||||
if ((flags & TranslationFlags.Compute) != 0)
|
||||
{
|
||||
config = new ShaderConfig(flags, callbacks);
|
||||
config = new ShaderConfig(gpuAccessor, flags);
|
||||
|
||||
cfg = Decoder.Decode(code, 0);
|
||||
cfg = Decoder.Decode(gpuAccessor, address);
|
||||
}
|
||||
else
|
||||
{
|
||||
config = new ShaderConfig(new ShaderHeader(code), flags, callbacks);
|
||||
config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, flags);
|
||||
|
||||
cfg = Decoder.Decode(code, HeaderSize);
|
||||
cfg = Decoder.Decode(gpuAccessor, address + HeaderSize);
|
||||
}
|
||||
|
||||
if (cfg == null)
|
||||
{
|
||||
config.PrintLog("Invalid branch detected, failed to build CFG.");
|
||||
gpuAccessor.Log("Invalid branch detected, failed to build CFG.");
|
||||
|
||||
size = 0;
|
||||
|
||||
|
@ -150,7 +126,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
{
|
||||
instName = "???";
|
||||
|
||||
config.PrintLog($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16}).");
|
||||
gpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16}).");
|
||||
}
|
||||
|
||||
string dbgComment = $"0x{op.Address:X6}: 0x{op.RawOpCode:X16} {instName}";
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
using System;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
public struct TranslatorCallbacks
|
||||
{
|
||||
internal Func<QueryInfoName, int, int> QueryInfo { get; }
|
||||
|
||||
internal Action<string> PrintLog { get; }
|
||||
|
||||
public TranslatorCallbacks(Func<QueryInfoName, int, int> queryInfoCallback, Action<string> printLogCallback)
|
||||
{
|
||||
QueryInfo = queryInfoCallback;
|
||||
PrintLog = printLogCallback;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue