Move solution and projects to src
This commit is contained in:
parent
cd124bda58
commit
cee7121058
3466 changed files with 55 additions and 55 deletions
25
src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs
Normal file
25
src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs
Normal file
|
@ -0,0 +1,25 @@
|
|||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
enum AggregateType
|
||||
{
|
||||
Invalid,
|
||||
Void,
|
||||
Bool,
|
||||
FP32,
|
||||
FP64,
|
||||
S32,
|
||||
U32,
|
||||
|
||||
ElementTypeMask = 0xff,
|
||||
|
||||
ElementCountShift = 8,
|
||||
ElementCountMask = 3 << ElementCountShift,
|
||||
|
||||
Scalar = 0 << ElementCountShift,
|
||||
Vector2 = 1 << ElementCountShift,
|
||||
Vector3 = 2 << ElementCountShift,
|
||||
Vector4 = 3 << ElementCountShift,
|
||||
|
||||
Array = 1 << 10
|
||||
}
|
||||
}
|
36
src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs
Normal file
36
src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs
Normal file
|
@ -0,0 +1,36 @@
|
|||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
static class AttributeConsts
|
||||
{
|
||||
public const int PrimitiveId = 0x060;
|
||||
public const int Layer = 0x064;
|
||||
public const int PositionX = 0x070;
|
||||
public const int PositionY = 0x074;
|
||||
public const int FrontColorDiffuseR = 0x280;
|
||||
public const int BackColorDiffuseR = 0x2a0;
|
||||
public const int ClipDistance0 = 0x2c0;
|
||||
public const int ClipDistance1 = 0x2c4;
|
||||
public const int ClipDistance2 = 0x2c8;
|
||||
public const int ClipDistance3 = 0x2cc;
|
||||
public const int ClipDistance4 = 0x2d0;
|
||||
public const int ClipDistance5 = 0x2d4;
|
||||
public const int ClipDistance6 = 0x2d8;
|
||||
public const int ClipDistance7 = 0x2dc;
|
||||
public const int FogCoord = 0x2e8;
|
||||
public const int TessCoordX = 0x2f0;
|
||||
public const int TessCoordY = 0x2f4;
|
||||
public const int InstanceId = 0x2f8;
|
||||
public const int VertexId = 0x2fc;
|
||||
public const int TexCoordCount = 10;
|
||||
public const int TexCoordBase = 0x300;
|
||||
public const int TexCoordEnd = TexCoordBase + TexCoordCount * 16;
|
||||
public const int FrontFacing = 0x3fc;
|
||||
|
||||
public const int UserAttributesCount = 32;
|
||||
public const int UserAttributeBase = 0x80;
|
||||
public const int UserAttributeEnd = UserAttributeBase + UserAttributesCount * 16;
|
||||
|
||||
public const int UserAttributePerPatchBase = 0x18;
|
||||
public const int UserAttributePerPatchEnd = 0x200;
|
||||
}
|
||||
}
|
176
src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs
Normal file
176
src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs
Normal file
|
@ -0,0 +1,176 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
class ControlFlowGraph
|
||||
{
|
||||
public BasicBlock[] Blocks { get; }
|
||||
public BasicBlock[] PostOrderBlocks { get; }
|
||||
public int[] PostOrderMap { get; }
|
||||
|
||||
public ControlFlowGraph(BasicBlock[] blocks)
|
||||
{
|
||||
Blocks = blocks;
|
||||
|
||||
HashSet<BasicBlock> visited = new HashSet<BasicBlock>();
|
||||
|
||||
Stack<BasicBlock> blockStack = new Stack<BasicBlock>();
|
||||
|
||||
List<BasicBlock> postOrderBlocks = new List<BasicBlock>(blocks.Length);
|
||||
|
||||
PostOrderMap = new int[blocks.Length];
|
||||
|
||||
visited.Add(blocks[0]);
|
||||
|
||||
blockStack.Push(blocks[0]);
|
||||
|
||||
while (blockStack.TryPop(out BasicBlock block))
|
||||
{
|
||||
if (block.Next != null && visited.Add(block.Next))
|
||||
{
|
||||
blockStack.Push(block);
|
||||
blockStack.Push(block.Next);
|
||||
}
|
||||
else if (block.Branch != null && visited.Add(block.Branch))
|
||||
{
|
||||
blockStack.Push(block);
|
||||
blockStack.Push(block.Branch);
|
||||
}
|
||||
else
|
||||
{
|
||||
PostOrderMap[block.Index] = postOrderBlocks.Count;
|
||||
|
||||
postOrderBlocks.Add(block);
|
||||
}
|
||||
}
|
||||
|
||||
PostOrderBlocks = postOrderBlocks.ToArray();
|
||||
}
|
||||
|
||||
public static ControlFlowGraph Create(Operation[] operations)
|
||||
{
|
||||
Dictionary<Operand, BasicBlock> labels = new Dictionary<Operand, BasicBlock>();
|
||||
|
||||
List<BasicBlock> blocks = new List<BasicBlock>();
|
||||
|
||||
BasicBlock currentBlock = null;
|
||||
|
||||
void NextBlock(BasicBlock nextBlock)
|
||||
{
|
||||
if (currentBlock != null && !EndsWithUnconditionalInst(currentBlock.GetLastOp()))
|
||||
{
|
||||
currentBlock.Next = nextBlock;
|
||||
}
|
||||
|
||||
currentBlock = nextBlock;
|
||||
}
|
||||
|
||||
void NewNextBlock()
|
||||
{
|
||||
BasicBlock block = new BasicBlock(blocks.Count);
|
||||
|
||||
blocks.Add(block);
|
||||
|
||||
NextBlock(block);
|
||||
}
|
||||
|
||||
bool needsNewBlock = true;
|
||||
|
||||
for (int index = 0; index < operations.Length; index++)
|
||||
{
|
||||
Operation operation = operations[index];
|
||||
|
||||
if (operation.Inst == Instruction.MarkLabel)
|
||||
{
|
||||
Operand label = operation.Dest;
|
||||
|
||||
if (labels.TryGetValue(label, out BasicBlock nextBlock))
|
||||
{
|
||||
nextBlock.Index = blocks.Count;
|
||||
|
||||
blocks.Add(nextBlock);
|
||||
|
||||
NextBlock(nextBlock);
|
||||
}
|
||||
else
|
||||
{
|
||||
NewNextBlock();
|
||||
|
||||
labels.Add(label, currentBlock);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (needsNewBlock)
|
||||
{
|
||||
NewNextBlock();
|
||||
}
|
||||
|
||||
currentBlock.Operations.AddLast(operation);
|
||||
}
|
||||
|
||||
needsNewBlock = operation.Inst == Instruction.Branch ||
|
||||
operation.Inst == Instruction.BranchIfTrue ||
|
||||
operation.Inst == Instruction.BranchIfFalse;
|
||||
|
||||
if (needsNewBlock)
|
||||
{
|
||||
Operand label = operation.Dest;
|
||||
|
||||
if (!labels.TryGetValue(label, out BasicBlock branchBlock))
|
||||
{
|
||||
branchBlock = new BasicBlock();
|
||||
|
||||
labels.Add(label, branchBlock);
|
||||
}
|
||||
|
||||
currentBlock.Branch = branchBlock;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove unreachable blocks.
|
||||
bool hasUnreachable;
|
||||
|
||||
do
|
||||
{
|
||||
hasUnreachable = false;
|
||||
|
||||
for (int blkIndex = 1; blkIndex < blocks.Count; blkIndex++)
|
||||
{
|
||||
BasicBlock block = blocks[blkIndex];
|
||||
|
||||
if (block.Predecessors.Count == 0)
|
||||
{
|
||||
block.Next = null;
|
||||
block.Branch = null;
|
||||
blocks.RemoveAt(blkIndex--);
|
||||
hasUnreachable = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
block.Index = blkIndex;
|
||||
}
|
||||
}
|
||||
} while (hasUnreachable);
|
||||
|
||||
return new ControlFlowGraph(blocks.ToArray());
|
||||
}
|
||||
|
||||
private static bool EndsWithUnconditionalInst(INode node)
|
||||
{
|
||||
if (node is Operation operation)
|
||||
{
|
||||
switch (operation.Inst)
|
||||
{
|
||||
case Instruction.Branch:
|
||||
case Instruction.Discard:
|
||||
case Instruction.Return:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
94
src/Ryujinx.Graphics.Shader/Translation/Dominance.cs
Normal file
94
src/Ryujinx.Graphics.Shader/Translation/Dominance.cs
Normal file
|
@ -0,0 +1,94 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
static class Dominance
|
||||
{
|
||||
// Those methods are an implementation of the algorithms on "A Simple, Fast Dominance Algorithm".
|
||||
// https://www.cs.rice.edu/~keith/EMBED/dom.pdf
|
||||
public static void FindDominators(ControlFlowGraph cfg)
|
||||
{
|
||||
BasicBlock Intersect(BasicBlock block1, BasicBlock block2)
|
||||
{
|
||||
while (block1 != block2)
|
||||
{
|
||||
while (cfg.PostOrderMap[block1.Index] < cfg.PostOrderMap[block2.Index])
|
||||
{
|
||||
block1 = block1.ImmediateDominator;
|
||||
}
|
||||
|
||||
while (cfg.PostOrderMap[block2.Index] < cfg.PostOrderMap[block1.Index])
|
||||
{
|
||||
block2 = block2.ImmediateDominator;
|
||||
}
|
||||
}
|
||||
|
||||
return block1;
|
||||
}
|
||||
|
||||
cfg.Blocks[0].ImmediateDominator = cfg.Blocks[0];
|
||||
|
||||
bool modified;
|
||||
|
||||
do
|
||||
{
|
||||
modified = false;
|
||||
|
||||
for (int blkIndex = cfg.PostOrderBlocks.Length - 2; blkIndex >= 0; blkIndex--)
|
||||
{
|
||||
BasicBlock block = cfg.PostOrderBlocks[blkIndex];
|
||||
|
||||
BasicBlock newIDom = null;
|
||||
|
||||
foreach (BasicBlock predecessor in block.Predecessors)
|
||||
{
|
||||
if (predecessor.ImmediateDominator != null)
|
||||
{
|
||||
if (newIDom != null)
|
||||
{
|
||||
newIDom = Intersect(predecessor, newIDom);
|
||||
}
|
||||
else
|
||||
{
|
||||
newIDom = predecessor;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (block.ImmediateDominator != newIDom)
|
||||
{
|
||||
block.ImmediateDominator = newIDom;
|
||||
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
while (modified);
|
||||
}
|
||||
|
||||
public static void FindDominanceFrontiers(BasicBlock[] blocks)
|
||||
{
|
||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||
{
|
||||
BasicBlock block = blocks[blkIndex];
|
||||
|
||||
if (block.Predecessors.Count < 2)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int pBlkIndex = 0; pBlkIndex < block.Predecessors.Count; pBlkIndex++)
|
||||
{
|
||||
BasicBlock current = block.Predecessors[pBlkIndex];
|
||||
|
||||
while (current != block.ImmediateDominator)
|
||||
{
|
||||
current.DominanceFrontiers.Add(block);
|
||||
|
||||
current = current.ImmediateDominator;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
492
src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs
Normal file
492
src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs
Normal file
|
@ -0,0 +1,492 @@
|
|||
using Ryujinx.Graphics.Shader.Decoders;
|
||||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Numerics;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
class EmitterContext
|
||||
{
|
||||
public DecodedProgram Program { get; }
|
||||
public ShaderConfig Config { get; }
|
||||
|
||||
public bool IsNonMain { get; }
|
||||
|
||||
public Block CurrBlock { get; set; }
|
||||
public InstOp CurrOp { get; set; }
|
||||
|
||||
public int OperationsCount => _operations.Count;
|
||||
|
||||
private readonly struct BrxTarget
|
||||
{
|
||||
public readonly Operand Selector;
|
||||
public readonly int ExpectedValue;
|
||||
public readonly ulong NextTargetAddress;
|
||||
|
||||
public BrxTarget(Operand selector, int expectedValue, ulong nextTargetAddress)
|
||||
{
|
||||
Selector = selector;
|
||||
ExpectedValue = expectedValue;
|
||||
NextTargetAddress = nextTargetAddress;
|
||||
}
|
||||
}
|
||||
|
||||
private class BlockLabel
|
||||
{
|
||||
public readonly Operand Label;
|
||||
public BrxTarget BrxTarget;
|
||||
|
||||
public BlockLabel(Operand label)
|
||||
{
|
||||
Label = label;
|
||||
}
|
||||
}
|
||||
|
||||
private readonly List<Operation> _operations;
|
||||
private readonly Dictionary<ulong, BlockLabel> _labels;
|
||||
|
||||
public EmitterContext(DecodedProgram program, ShaderConfig config, bool isNonMain)
|
||||
{
|
||||
Program = program;
|
||||
Config = config;
|
||||
IsNonMain = isNonMain;
|
||||
_operations = new List<Operation>();
|
||||
_labels = new Dictionary<ulong, BlockLabel>();
|
||||
|
||||
EmitStart();
|
||||
}
|
||||
|
||||
private void EmitStart()
|
||||
{
|
||||
if (Config.Stage == ShaderStage.Vertex &&
|
||||
Config.Options.TargetApi == TargetApi.Vulkan &&
|
||||
(Config.Options.Flags & TranslationFlags.VertexA) == 0)
|
||||
{
|
||||
// Vulkan requires the point size to be always written on the shader if the primitive topology is points.
|
||||
this.Store(StorageKind.Output, IoVariable.PointSize, null, ConstF(Config.GpuAccessor.QueryPointSize()));
|
||||
}
|
||||
}
|
||||
|
||||
public T GetOp<T>() where T : unmanaged
|
||||
{
|
||||
Debug.Assert(Unsafe.SizeOf<T>() == sizeof(ulong));
|
||||
ulong op = CurrOp.RawOpCode;
|
||||
return Unsafe.As<ulong, T>(ref op);
|
||||
}
|
||||
|
||||
public Operand Add(Instruction inst, Operand dest = null, params Operand[] sources)
|
||||
{
|
||||
Operation operation = new Operation(inst, dest, sources);
|
||||
|
||||
_operations.Add(operation);
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
public Operand Add(Instruction inst, StorageKind storageKind, Operand dest = null, params Operand[] sources)
|
||||
{
|
||||
Operation operation = new Operation(inst, storageKind, dest, sources);
|
||||
|
||||
_operations.Add(operation);
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
public (Operand, Operand) Add(Instruction inst, (Operand, Operand) dest, params Operand[] sources)
|
||||
{
|
||||
Operand[] dests = new[] { dest.Item1, dest.Item2 };
|
||||
|
||||
Operation operation = new Operation(inst, 0, dests, sources);
|
||||
|
||||
Add(operation);
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
public void Add(Operation operation)
|
||||
{
|
||||
_operations.Add(operation);
|
||||
}
|
||||
|
||||
public TextureOperation CreateTextureOperation(
|
||||
Instruction inst,
|
||||
SamplerType type,
|
||||
TextureFlags flags,
|
||||
int handle,
|
||||
int compIndex,
|
||||
Operand[] dests,
|
||||
params Operand[] sources)
|
||||
{
|
||||
return CreateTextureOperation(inst, type, TextureFormat.Unknown, flags, handle, compIndex, dests, sources);
|
||||
}
|
||||
|
||||
public TextureOperation CreateTextureOperation(
|
||||
Instruction inst,
|
||||
SamplerType type,
|
||||
TextureFormat format,
|
||||
TextureFlags flags,
|
||||
int handle,
|
||||
int compIndex,
|
||||
Operand[] dests,
|
||||
params Operand[] sources)
|
||||
{
|
||||
if (!flags.HasFlag(TextureFlags.Bindless))
|
||||
{
|
||||
Config.SetUsedTexture(inst, type, format, flags, TextureOperation.DefaultCbufSlot, handle);
|
||||
}
|
||||
|
||||
return new TextureOperation(inst, type, format, flags, handle, compIndex, dests, sources);
|
||||
}
|
||||
|
||||
public void FlagAttributeRead(int attribute)
|
||||
{
|
||||
if (Config.Stage == ShaderStage.Vertex && attribute == AttributeConsts.InstanceId)
|
||||
{
|
||||
Config.SetUsedFeature(FeatureFlags.InstanceId);
|
||||
}
|
||||
else if (Config.Stage == ShaderStage.Fragment)
|
||||
{
|
||||
switch (attribute)
|
||||
{
|
||||
case AttributeConsts.PositionX:
|
||||
case AttributeConsts.PositionY:
|
||||
Config.SetUsedFeature(FeatureFlags.FragCoordXY);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void FlagAttributeWritten(int attribute)
|
||||
{
|
||||
if (Config.Stage == ShaderStage.Vertex)
|
||||
{
|
||||
switch (attribute)
|
||||
{
|
||||
case AttributeConsts.ClipDistance0:
|
||||
case AttributeConsts.ClipDistance1:
|
||||
case AttributeConsts.ClipDistance2:
|
||||
case AttributeConsts.ClipDistance3:
|
||||
case AttributeConsts.ClipDistance4:
|
||||
case AttributeConsts.ClipDistance5:
|
||||
case AttributeConsts.ClipDistance6:
|
||||
case AttributeConsts.ClipDistance7:
|
||||
Config.SetClipDistanceWritten((attribute - AttributeConsts.ClipDistance0) / 4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (Config.Stage != ShaderStage.Fragment && attribute == AttributeConsts.Layer)
|
||||
{
|
||||
Config.SetUsedFeature(FeatureFlags.RtLayer);
|
||||
}
|
||||
}
|
||||
|
||||
public void MarkLabel(Operand label)
|
||||
{
|
||||
Add(Instruction.MarkLabel, label);
|
||||
}
|
||||
|
||||
public Operand GetLabel(ulong address)
|
||||
{
|
||||
return EnsureBlockLabel(address).Label;
|
||||
}
|
||||
|
||||
public void SetBrxTarget(ulong address, Operand selector, int targetValue, ulong nextTargetAddress)
|
||||
{
|
||||
BlockLabel blockLabel = EnsureBlockLabel(address);
|
||||
Debug.Assert(blockLabel.BrxTarget.Selector == null);
|
||||
blockLabel.BrxTarget = new BrxTarget(selector, targetValue, nextTargetAddress);
|
||||
}
|
||||
|
||||
public void EnterBlock(ulong address)
|
||||
{
|
||||
BlockLabel blockLabel = EnsureBlockLabel(address);
|
||||
|
||||
MarkLabel(blockLabel.Label);
|
||||
|
||||
BrxTarget brxTarget = blockLabel.BrxTarget;
|
||||
|
||||
if (brxTarget.Selector != null)
|
||||
{
|
||||
this.BranchIfFalse(GetLabel(brxTarget.NextTargetAddress), this.ICompareEqual(brxTarget.Selector, Const(brxTarget.ExpectedValue)));
|
||||
}
|
||||
}
|
||||
|
||||
private BlockLabel EnsureBlockLabel(ulong address)
|
||||
{
|
||||
if (!_labels.TryGetValue(address, out BlockLabel blockLabel))
|
||||
{
|
||||
blockLabel = new BlockLabel(Label());
|
||||
|
||||
_labels.Add(address, blockLabel);
|
||||
}
|
||||
|
||||
return blockLabel;
|
||||
}
|
||||
|
||||
public void PrepareForVertexReturn()
|
||||
{
|
||||
if (Config.GpuAccessor.QueryViewportTransformDisable())
|
||||
{
|
||||
Operand x = this.Load(StorageKind.Output, IoVariable.Position, null, Const(0));
|
||||
Operand y = this.Load(StorageKind.Output, IoVariable.Position, null, Const(1));
|
||||
Operand xScale = this.Load(StorageKind.Input, IoVariable.SupportBlockViewInverse, null, Const(0));
|
||||
Operand yScale = this.Load(StorageKind.Input, IoVariable.SupportBlockViewInverse, null, Const(1));
|
||||
Operand negativeOne = ConstF(-1.0f);
|
||||
|
||||
this.Store(StorageKind.Output, IoVariable.Position, null, Const(0), this.FPFusedMultiplyAdd(x, xScale, negativeOne));
|
||||
this.Store(StorageKind.Output, IoVariable.Position, null, Const(1), this.FPFusedMultiplyAdd(y, yScale, negativeOne));
|
||||
}
|
||||
|
||||
if (Config.Options.TargetApi == TargetApi.Vulkan && Config.GpuAccessor.QueryTransformDepthMinusOneToOne())
|
||||
{
|
||||
Operand z = this.Load(StorageKind.Output, IoVariable.Position, null, Const(2));
|
||||
Operand w = this.Load(StorageKind.Output, IoVariable.Position, null, Const(3));
|
||||
Operand halfW = this.FPMultiply(w, ConstF(0.5f));
|
||||
|
||||
this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), this.FPFusedMultiplyAdd(z, ConstF(0.5f), halfW));
|
||||
}
|
||||
|
||||
if (Config.Stage != ShaderStage.Geometry && Config.HasLayerInputAttribute)
|
||||
{
|
||||
Config.SetUsedFeature(FeatureFlags.RtLayer);
|
||||
|
||||
int attrVecIndex = Config.GpLayerInputAttribute >> 2;
|
||||
int attrComponentIndex = Config.GpLayerInputAttribute & 3;
|
||||
|
||||
Operand layer = this.Load(StorageKind.Output, IoVariable.UserDefined, null, Const(attrVecIndex), Const(attrComponentIndex));
|
||||
|
||||
this.Store(StorageKind.Output, IoVariable.Layer, null, layer);
|
||||
}
|
||||
}
|
||||
|
||||
public void PrepareForVertexReturn(out Operand oldXLocal, out Operand oldYLocal, out Operand oldZLocal)
|
||||
{
|
||||
if (Config.GpuAccessor.QueryViewportTransformDisable())
|
||||
{
|
||||
oldXLocal = Local();
|
||||
this.Copy(oldXLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(0)));
|
||||
oldYLocal = Local();
|
||||
this.Copy(oldYLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(1)));
|
||||
}
|
||||
else
|
||||
{
|
||||
oldXLocal = null;
|
||||
oldYLocal = null;
|
||||
}
|
||||
|
||||
if (Config.Options.TargetApi == TargetApi.Vulkan && Config.GpuAccessor.QueryTransformDepthMinusOneToOne())
|
||||
{
|
||||
oldZLocal = Local();
|
||||
this.Copy(oldZLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(2)));
|
||||
}
|
||||
else
|
||||
{
|
||||
oldZLocal = null;
|
||||
}
|
||||
|
||||
PrepareForVertexReturn();
|
||||
}
|
||||
|
||||
public void PrepareForReturn()
|
||||
{
|
||||
if (IsNonMain)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (Config.LastInVertexPipeline &&
|
||||
(Config.Stage == ShaderStage.Vertex || Config.Stage == ShaderStage.TessellationEvaluation) &&
|
||||
(Config.Options.Flags & TranslationFlags.VertexA) == 0)
|
||||
{
|
||||
PrepareForVertexReturn();
|
||||
}
|
||||
else if (Config.Stage == ShaderStage.Geometry)
|
||||
{
|
||||
void WritePositionOutput(int primIndex)
|
||||
{
|
||||
Operand x = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(0));
|
||||
Operand y = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(1));
|
||||
Operand z = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(2));
|
||||
Operand w = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(3));
|
||||
|
||||
this.Store(StorageKind.Output, IoVariable.Position, null, Const(0), x);
|
||||
this.Store(StorageKind.Output, IoVariable.Position, null, Const(1), y);
|
||||
this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), z);
|
||||
this.Store(StorageKind.Output, IoVariable.Position, null, Const(3), w);
|
||||
}
|
||||
|
||||
void WriteUserDefinedOutput(int index, int primIndex)
|
||||
{
|
||||
Operand x = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(0));
|
||||
Operand y = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(1));
|
||||
Operand z = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(2));
|
||||
Operand w = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(3));
|
||||
|
||||
this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(0), x);
|
||||
this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(1), y);
|
||||
this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(2), z);
|
||||
this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(3), w);
|
||||
}
|
||||
|
||||
if (Config.GpPassthrough && !Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
|
||||
{
|
||||
int inputVertices = Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices();
|
||||
|
||||
for (int primIndex = 0; primIndex < inputVertices; primIndex++)
|
||||
{
|
||||
WritePositionOutput(primIndex);
|
||||
|
||||
int passthroughAttributes = Config.PassthroughAttributes;
|
||||
while (passthroughAttributes != 0)
|
||||
{
|
||||
int index = BitOperations.TrailingZeroCount(passthroughAttributes);
|
||||
WriteUserDefinedOutput(index, primIndex);
|
||||
Config.SetOutputUserAttribute(index);
|
||||
passthroughAttributes &= ~(1 << index);
|
||||
}
|
||||
|
||||
this.EmitVertex();
|
||||
}
|
||||
|
||||
this.EndPrimitive();
|
||||
}
|
||||
}
|
||||
else if (Config.Stage == ShaderStage.Fragment)
|
||||
{
|
||||
GenerateAlphaToCoverageDitherDiscard();
|
||||
|
||||
bool supportsBgra = Config.GpuAccessor.QueryHostSupportsBgraFormat();
|
||||
|
||||
if (Config.OmapDepth)
|
||||
{
|
||||
Operand src = Register(Config.GetDepthRegister(), RegisterType.Gpr);
|
||||
|
||||
this.Store(StorageKind.Output, IoVariable.FragmentOutputDepth, null, src);
|
||||
}
|
||||
|
||||
AlphaTestOp alphaTestOp = Config.GpuAccessor.QueryAlphaTestCompare();
|
||||
|
||||
if (alphaTestOp != AlphaTestOp.Always && (Config.OmapTargets & 8) != 0)
|
||||
{
|
||||
if (alphaTestOp == AlphaTestOp.Never)
|
||||
{
|
||||
this.Discard();
|
||||
}
|
||||
else
|
||||
{
|
||||
Instruction comparator = alphaTestOp switch
|
||||
{
|
||||
AlphaTestOp.Equal => Instruction.CompareEqual,
|
||||
AlphaTestOp.Greater => Instruction.CompareGreater,
|
||||
AlphaTestOp.GreaterOrEqual => Instruction.CompareGreaterOrEqual,
|
||||
AlphaTestOp.Less => Instruction.CompareLess,
|
||||
AlphaTestOp.LessOrEqual => Instruction.CompareLessOrEqual,
|
||||
AlphaTestOp.NotEqual => Instruction.CompareNotEqual,
|
||||
_ => 0
|
||||
};
|
||||
|
||||
Debug.Assert(comparator != 0, $"Invalid alpha test operation \"{alphaTestOp}\".");
|
||||
|
||||
Operand alpha = Register(3, RegisterType.Gpr);
|
||||
Operand alphaRef = ConstF(Config.GpuAccessor.QueryAlphaTestReference());
|
||||
Operand alphaPass = Add(Instruction.FP32 | comparator, Local(), alpha, alphaRef);
|
||||
Operand alphaPassLabel = Label();
|
||||
|
||||
this.BranchIfTrue(alphaPassLabel, alphaPass);
|
||||
this.Discard();
|
||||
this.MarkLabel(alphaPassLabel);
|
||||
}
|
||||
}
|
||||
|
||||
int regIndexBase = 0;
|
||||
|
||||
for (int rtIndex = 0; rtIndex < 8; rtIndex++)
|
||||
{
|
||||
for (int component = 0; component < 4; component++)
|
||||
{
|
||||
bool componentEnabled = (Config.OmapTargets & (1 << (rtIndex * 4 + component))) != 0;
|
||||
if (!componentEnabled)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
Operand src = Register(regIndexBase + component, RegisterType.Gpr);
|
||||
|
||||
// Perform B <-> R swap if needed, for BGRA formats (not supported on OpenGL).
|
||||
if (!supportsBgra && (component == 0 || component == 2))
|
||||
{
|
||||
Operand isBgra = this.Load(StorageKind.Input, IoVariable.FragmentOutputIsBgra, null, Const(rtIndex));
|
||||
|
||||
Operand lblIsBgra = Label();
|
||||
Operand lblEnd = Label();
|
||||
|
||||
this.BranchIfTrue(lblIsBgra, isBgra);
|
||||
|
||||
this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(component), src);
|
||||
this.Branch(lblEnd);
|
||||
|
||||
MarkLabel(lblIsBgra);
|
||||
|
||||
this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(2 - component), src);
|
||||
|
||||
MarkLabel(lblEnd);
|
||||
}
|
||||
else
|
||||
{
|
||||
this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(component), src);
|
||||
}
|
||||
}
|
||||
|
||||
bool targetEnabled = (Config.OmapTargets & (0xf << (rtIndex * 4))) != 0;
|
||||
if (targetEnabled)
|
||||
{
|
||||
Config.SetOutputUserAttribute(rtIndex);
|
||||
regIndexBase += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void GenerateAlphaToCoverageDitherDiscard()
|
||||
{
|
||||
// If the feature is disabled, or alpha is not written, then we're done.
|
||||
if (!Config.GpuAccessor.QueryAlphaToCoverageDitherEnable() || (Config.OmapTargets & 8) == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// 11 11 11 10 10 10 10 00
|
||||
// 11 01 01 01 01 00 00 00
|
||||
Operand ditherMask = Const(unchecked((int)0xfbb99110u));
|
||||
|
||||
Operand fragCoordX = this.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(0));
|
||||
Operand fragCoordY = this.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(1));
|
||||
|
||||
Operand x = this.BitwiseAnd(this.FP32ConvertToU32(fragCoordX), Const(1));
|
||||
Operand y = this.BitwiseAnd(this.FP32ConvertToU32(fragCoordY), Const(1));
|
||||
Operand xy = this.BitwiseOr(x, this.ShiftLeft(y, Const(1)));
|
||||
|
||||
Operand alpha = Register(3, RegisterType.Gpr);
|
||||
Operand scaledAlpha = this.FPMultiply(this.FPSaturate(alpha), ConstF(8));
|
||||
Operand quantizedAlpha = this.IMinimumU32(this.FP32ConvertToU32(scaledAlpha), Const(7));
|
||||
Operand shift = this.BitwiseOr(this.ShiftLeft(quantizedAlpha, Const(2)), xy);
|
||||
Operand opaque = this.BitwiseAnd(this.ShiftRightU32(ditherMask, shift), Const(1));
|
||||
|
||||
Operand a2cDitherEndLabel = Label();
|
||||
|
||||
this.BranchIfTrue(a2cDitherEndLabel, opaque);
|
||||
this.Discard();
|
||||
this.MarkLabel(a2cDitherEndLabel);
|
||||
}
|
||||
|
||||
public Operation[] GetOperations()
|
||||
{
|
||||
return _operations.ToArray();
|
||||
}
|
||||
}
|
||||
}
|
819
src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
Normal file
819
src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
Normal file
|
@ -0,0 +1,819 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
static class EmitterContextInsts
|
||||
{
|
||||
public static Operand AtomicAdd(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.AtomicAdd, storageKind, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand AtomicAnd(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.AtomicAnd, storageKind, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c, Operand d)
|
||||
{
|
||||
return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), a, b, c, d);
|
||||
}
|
||||
|
||||
public static Operand AtomicMaxS32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.AtomicMaxS32, storageKind, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand AtomicMaxU32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.AtomicMaxU32, storageKind, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand AtomicMinS32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.AtomicMinS32, storageKind, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand AtomicMinU32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.AtomicMinU32, storageKind, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand AtomicOr(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.AtomicOr, storageKind, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand AtomicSwap(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.AtomicSwap, storageKind, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand AtomicXor(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.AtomicXor, storageKind, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand Ballot(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.Ballot, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand Barrier(this EmitterContext context)
|
||||
{
|
||||
return context.Add(Instruction.Barrier);
|
||||
}
|
||||
|
||||
public static Operand BitCount(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.BitCount, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand BitfieldExtractS32(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.BitfieldExtractS32, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand BitfieldExtractU32(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.BitfieldExtractU32, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand BitfieldInsert(this EmitterContext context, Operand a, Operand b, Operand c, Operand d)
|
||||
{
|
||||
return context.Add(Instruction.BitfieldInsert, Local(), a, b, c, d);
|
||||
}
|
||||
|
||||
public static Operand BitfieldReverse(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.BitfieldReverse, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand BitwiseAnd(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.BitwiseAnd, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand BitwiseExclusiveOr(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.BitwiseExclusiveOr, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand BitwiseNot(this EmitterContext context, Operand a, bool invert)
|
||||
{
|
||||
if (invert)
|
||||
{
|
||||
a = context.BitwiseNot(a);
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
public static Operand BitwiseNot(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.BitwiseNot, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand BitwiseOr(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.BitwiseOr, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand Branch(this EmitterContext context, Operand d)
|
||||
{
|
||||
return context.Add(Instruction.Branch, d);
|
||||
}
|
||||
|
||||
public static Operand BranchIfFalse(this EmitterContext context, Operand d, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.BranchIfFalse, d, a);
|
||||
}
|
||||
|
||||
public static Operand BranchIfTrue(this EmitterContext context, Operand d, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.BranchIfTrue, d, a);
|
||||
}
|
||||
|
||||
public static Operand Call(this EmitterContext context, int funcId, bool returns, params Operand[] args)
|
||||
{
|
||||
Operand[] args2 = new Operand[args.Length + 1];
|
||||
|
||||
args2[0] = Const(funcId);
|
||||
args.CopyTo(args2, 1);
|
||||
|
||||
return context.Add(Instruction.Call, returns ? Local() : null, args2);
|
||||
}
|
||||
|
||||
public static Operand ConditionalSelect(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.ConditionalSelect, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand Copy(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.Copy, Local(), a);
|
||||
}
|
||||
|
||||
public static void Copy(this EmitterContext context, Operand d, Operand a)
|
||||
{
|
||||
if (d.Type == OperandType.Constant)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
context.Add(Instruction.Copy, d, a);
|
||||
}
|
||||
|
||||
public static Operand Discard(this EmitterContext context)
|
||||
{
|
||||
return context.Add(Instruction.Discard);
|
||||
}
|
||||
|
||||
public static Operand EmitVertex(this EmitterContext context)
|
||||
{
|
||||
return context.Add(Instruction.EmitVertex);
|
||||
}
|
||||
|
||||
public static Operand EndPrimitive(this EmitterContext context)
|
||||
{
|
||||
return context.Add(Instruction.EndPrimitive);
|
||||
}
|
||||
|
||||
public static Operand FindLSB(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.FindLSB, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FindMSBS32(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.FindMSBS32, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FindMSBU32(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.FindMSBU32, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FP32ConvertToFP64(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.ConvertFP32ToFP64, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FP64ConvertToFP32(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.ConvertFP64ToFP32, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FPAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.FPNegate(context.FPAbsolute(a, abs, fpType), neg, fpType);
|
||||
}
|
||||
|
||||
public static Operand FPAbsolute(this EmitterContext context, Operand a, bool abs, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
if (abs)
|
||||
{
|
||||
a = context.FPAbsolute(a, fpType);
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
public static Operand FPAbsolute(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.Absolute, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FPAdd(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.Add, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand FPCeiling(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.Ceiling, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FPCompareEqual(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.CompareEqual, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand FPCompareLess(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.CompareLess, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand FP32ConvertToS32(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.ConvertFP32ToS32, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FP32ConvertToU32(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.ConvertFP32ToU32, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FP64ConvertToS32(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.ConvertFP64ToS32, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FP64ConvertToU32(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.ConvertFP64ToU32, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FPCosine(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.FP32 | Instruction.Cosine, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FPDivide(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.Divide, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand FPExponentB2(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.FP32 | Instruction.ExponentB2, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FPFloor(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.Floor, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FPFusedMultiplyAdd(this EmitterContext context, Operand a, Operand b, Operand c, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.FusedMultiplyAdd, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand FPLogarithmB2(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.FP32 | Instruction.LogarithmB2, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FPMaximum(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.Maximum, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand FPMinimum(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.Minimum, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand FPMultiply(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.Multiply, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand FPNegate(this EmitterContext context, Operand a, bool neg, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
if (neg)
|
||||
{
|
||||
a = context.FPNegate(a, fpType);
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
public static Operand FPNegate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.Negate, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FPReciprocal(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.FPDivide(fpType == Instruction.FP64 ? context.PackDouble2x32(1.0) : ConstF(1), a, fpType);
|
||||
}
|
||||
|
||||
public static Operand FPReciprocalSquareRoot(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.ReciprocalSquareRoot, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FPRound(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.Round, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FPSaturate(this EmitterContext context, Operand a, bool sat, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
if (sat)
|
||||
{
|
||||
a = context.FPSaturate(a, fpType);
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
public static Operand FPSaturate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return fpType == Instruction.FP64
|
||||
? context.Add(fpType | Instruction.Clamp, Local(), a, context.PackDouble2x32(0.0), context.PackDouble2x32(1.0))
|
||||
: context.Add(fpType | Instruction.Clamp, Local(), a, ConstF(0), ConstF(1));
|
||||
}
|
||||
|
||||
public static Operand FPSine(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.FP32 | Instruction.Sine, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FPSquareRoot(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.FP32 | Instruction.SquareRoot, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FPTruncate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.Truncate, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand FPSwizzleAdd(this EmitterContext context, Operand a, Operand b, int mask)
|
||||
{
|
||||
return context.Add(Instruction.SwizzleAdd, Local(), a, b, Const(mask));
|
||||
}
|
||||
|
||||
public static void FSIBegin(this EmitterContext context)
|
||||
{
|
||||
context.Add(Instruction.FSIBegin);
|
||||
}
|
||||
|
||||
public static void FSIEnd(this EmitterContext context)
|
||||
{
|
||||
context.Add(Instruction.FSIEnd);
|
||||
}
|
||||
|
||||
public static Operand GroupMemoryBarrier(this EmitterContext context)
|
||||
{
|
||||
return context.Add(Instruction.GroupMemoryBarrier);
|
||||
}
|
||||
|
||||
public static Operand IAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg)
|
||||
{
|
||||
return context.INegate(context.IAbsolute(a, abs), neg);
|
||||
}
|
||||
|
||||
public static Operand IAbsolute(this EmitterContext context, Operand a, bool abs)
|
||||
{
|
||||
if (abs)
|
||||
{
|
||||
a = context.IAbsolute(a);
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
public static Operand IAbsolute(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.Absolute, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand IAdd(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.Add, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand IClampS32(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.Clamp, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand IClampU32(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.ClampU32, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand ICompareEqual(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.CompareEqual, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand ICompareGreater(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.CompareGreater, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand ICompareGreaterOrEqual(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.CompareGreaterOrEqual, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand ICompareGreaterOrEqualUnsigned(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.CompareGreaterOrEqualU32, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand ICompareGreaterUnsigned(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.CompareGreaterU32, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand ICompareLess(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.CompareLess, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand ICompareLessOrEqual(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.CompareLessOrEqual, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand ICompareLessOrEqualUnsigned(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.CompareLessOrEqualU32, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand ICompareLessUnsigned(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.CompareLessU32, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand ICompareNotEqual(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.CompareNotEqual, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand IConvertS32ToFP32(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.ConvertS32ToFP32, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand IConvertS32ToFP64(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.ConvertS32ToFP64, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand IConvertU32ToFP32(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.ConvertU32ToFP32, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand IConvertU32ToFP64(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.ConvertU32ToFP64, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand IMaximumS32(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.Maximum, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand IMaximumU32(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.MaximumU32, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand IMinimumS32(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.Minimum, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand IMinimumU32(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.MinimumU32, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand IMultiply(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.Multiply, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand INegate(this EmitterContext context, Operand a, bool neg)
|
||||
{
|
||||
if (neg)
|
||||
{
|
||||
a = context.INegate(a);
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
public static Operand INegate(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.Negate, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand ISubtract(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.Subtract, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand IsNan(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
|
||||
{
|
||||
return context.Add(fpType | Instruction.IsNan, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand Load(this EmitterContext context, StorageKind storageKind, IoVariable ioVariable, Operand primVertex = null)
|
||||
{
|
||||
return primVertex != null
|
||||
? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex)
|
||||
: context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable));
|
||||
}
|
||||
|
||||
public static Operand Load(
|
||||
this EmitterContext context,
|
||||
StorageKind storageKind,
|
||||
IoVariable ioVariable,
|
||||
Operand primVertex,
|
||||
Operand elemIndex)
|
||||
{
|
||||
return primVertex != null
|
||||
? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex, elemIndex)
|
||||
: context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), elemIndex);
|
||||
}
|
||||
|
||||
public static Operand Load(
|
||||
this EmitterContext context,
|
||||
StorageKind storageKind,
|
||||
IoVariable ioVariable,
|
||||
Operand primVertex,
|
||||
Operand arrayIndex,
|
||||
Operand elemIndex)
|
||||
{
|
||||
return primVertex != null
|
||||
? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex, arrayIndex, elemIndex)
|
||||
: context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), arrayIndex, elemIndex);
|
||||
}
|
||||
|
||||
public static Operand LoadConstant(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
if (a.Type == OperandType.Constant)
|
||||
{
|
||||
context.Config.SetUsedConstantBuffer(a.Value);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Config.SetUsedFeature(FeatureFlags.CbIndexing);
|
||||
}
|
||||
|
||||
return context.Add(Instruction.LoadConstant, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand LoadGlobal(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.LoadGlobal, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand LoadLocal(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.LoadLocal, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand LoadShared(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.LoadShared, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand MemoryBarrier(this EmitterContext context)
|
||||
{
|
||||
return context.Add(Instruction.MemoryBarrier);
|
||||
}
|
||||
|
||||
public static Operand MultiplyHighS32(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.MultiplyHighS32, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand MultiplyHighU32(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.MultiplyHighU32, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand PackDouble2x32(this EmitterContext context, double value)
|
||||
{
|
||||
long valueAsLong = BitConverter.DoubleToInt64Bits(value);
|
||||
|
||||
return context.Add(Instruction.PackDouble2x32, Local(), Const((int)valueAsLong), Const((int)(valueAsLong >> 32)));
|
||||
}
|
||||
|
||||
public static Operand PackDouble2x32(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.PackDouble2x32, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand PackHalf2x16(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.PackHalf2x16, Local(), a, b);
|
||||
}
|
||||
|
||||
public static void Return(this EmitterContext context)
|
||||
{
|
||||
context.PrepareForReturn();
|
||||
context.Add(Instruction.Return);
|
||||
}
|
||||
|
||||
public static void Return(this EmitterContext context, Operand returnValue)
|
||||
{
|
||||
context.PrepareForReturn();
|
||||
context.Add(Instruction.Return, null, returnValue);
|
||||
}
|
||||
|
||||
public static Operand ShiftLeft(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.ShiftLeft, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand ShiftRightS32(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.ShiftRightS32, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand ShiftRightU32(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.ShiftRightU32, Local(), a, b);
|
||||
}
|
||||
|
||||
public static (Operand, Operand) Shuffle(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.Shuffle, (Local(), Local()), a, b, c);
|
||||
}
|
||||
|
||||
public static (Operand, Operand) ShuffleDown(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.ShuffleDown, (Local(), Local()), a, b, c);
|
||||
}
|
||||
|
||||
public static (Operand, Operand) ShuffleUp(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.ShuffleUp, (Local(), Local()), a, b, c);
|
||||
}
|
||||
|
||||
public static (Operand, Operand) ShuffleXor(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.ShuffleXor, (Local(), Local()), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand Store(
|
||||
this EmitterContext context,
|
||||
StorageKind storageKind,
|
||||
IoVariable ioVariable,
|
||||
Operand invocationId,
|
||||
Operand value)
|
||||
{
|
||||
return invocationId != null
|
||||
? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, value)
|
||||
: context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), value);
|
||||
}
|
||||
|
||||
public static Operand Store(
|
||||
this EmitterContext context,
|
||||
StorageKind storageKind,
|
||||
IoVariable ioVariable,
|
||||
Operand invocationId,
|
||||
Operand elemIndex,
|
||||
Operand value)
|
||||
{
|
||||
return invocationId != null
|
||||
? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, elemIndex, value)
|
||||
: context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), elemIndex, value);
|
||||
}
|
||||
|
||||
public static Operand Store(
|
||||
this EmitterContext context,
|
||||
StorageKind storageKind,
|
||||
IoVariable ioVariable,
|
||||
Operand invocationId,
|
||||
Operand arrayIndex,
|
||||
Operand elemIndex,
|
||||
Operand value)
|
||||
{
|
||||
return invocationId != null
|
||||
? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, arrayIndex, elemIndex, value)
|
||||
: context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), arrayIndex, elemIndex, value);
|
||||
}
|
||||
|
||||
public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.StoreGlobal, null, a, b, c);
|
||||
}
|
||||
|
||||
public static Operand StoreGlobal16(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.StoreGlobal16, null, a, b, c);
|
||||
}
|
||||
|
||||
public static Operand StoreGlobal8(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.StoreGlobal8, null, a, b, c);
|
||||
}
|
||||
|
||||
public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.StoreLocal, null, a, b);
|
||||
}
|
||||
|
||||
public static Operand StoreShared(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.StoreShared, null, a, b);
|
||||
}
|
||||
|
||||
public static Operand StoreShared16(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.StoreShared16, null, a, b);
|
||||
}
|
||||
|
||||
public static Operand StoreShared8(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.StoreShared8, null, a, b);
|
||||
}
|
||||
|
||||
public static Operand UnpackDouble2x32High(this EmitterContext context, Operand a)
|
||||
{
|
||||
return UnpackDouble2x32(context, a, 1);
|
||||
}
|
||||
|
||||
public static Operand UnpackDouble2x32Low(this EmitterContext context, Operand a)
|
||||
{
|
||||
return UnpackDouble2x32(context, a, 0);
|
||||
}
|
||||
|
||||
private static Operand UnpackDouble2x32(this EmitterContext context, Operand a, int index)
|
||||
{
|
||||
Operand dest = Local();
|
||||
|
||||
context.Add(new Operation(Instruction.UnpackDouble2x32, index, dest, a));
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
public static Operand UnpackHalf2x16High(this EmitterContext context, Operand a)
|
||||
{
|
||||
return UnpackHalf2x16(context, a, 1);
|
||||
}
|
||||
|
||||
public static Operand UnpackHalf2x16Low(this EmitterContext context, Operand a)
|
||||
{
|
||||
return UnpackHalf2x16(context, a, 0);
|
||||
}
|
||||
|
||||
private static Operand UnpackHalf2x16(this EmitterContext context, Operand a, int index)
|
||||
{
|
||||
Operand dest = Local();
|
||||
|
||||
context.Add(new Operation(Instruction.UnpackHalf2x16, index, dest, a));
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
public static Operand VoteAll(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.VoteAll, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand VoteAllEqual(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.VoteAllEqual, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand VoteAny(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.VoteAny, Local(), a);
|
||||
}
|
||||
}
|
||||
}
|
27
src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs
Normal file
27
src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs
Normal file
|
@ -0,0 +1,27 @@
|
|||
using System;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
/// <summary>
|
||||
/// Features used by the shader that are important for the code generator to know in advance.
|
||||
/// These typically change the declarations in the shader header.
|
||||
/// </summary>
|
||||
[Flags]
|
||||
public enum FeatureFlags
|
||||
{
|
||||
None = 0,
|
||||
|
||||
// Affected by resolution scaling.
|
||||
IntegerSampling = 1 << 0,
|
||||
FragCoordXY = 1 << 1,
|
||||
|
||||
Bindless = 1 << 2,
|
||||
InstanceId = 1 << 3,
|
||||
DrawParameters = 1 << 4,
|
||||
RtLayer = 1 << 5,
|
||||
CbIndexing = 1 << 6,
|
||||
IaIndexing = 1 << 7,
|
||||
OaIndexing = 1 << 8,
|
||||
FixedFuncAttr = 1 << 9
|
||||
}
|
||||
}
|
866
src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs
Normal file
866
src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs
Normal file
|
@ -0,0 +1,866 @@
|
|||
using Ryujinx.Graphics.Shader.Decoders;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
static class FunctionMatch
|
||||
{
|
||||
private static IPatternTreeNode[] _fsiGetAddressTree = PatternTrees.GetFsiGetAddress();
|
||||
private static IPatternTreeNode[] _fsiGetAddressV2Tree = PatternTrees.GetFsiGetAddressV2();
|
||||
private static IPatternTreeNode[] _fsiIsLastWarpThreadPatternTree = PatternTrees.GetFsiIsLastWarpThread();
|
||||
private static IPatternTreeNode[] _fsiBeginPatternTree = PatternTrees.GetFsiBeginPattern();
|
||||
private static IPatternTreeNode[] _fsiEndPatternTree = PatternTrees.GetFsiEndPattern();
|
||||
|
||||
public static void RunPass(DecodedProgram program)
|
||||
{
|
||||
byte[] externalRegs = new byte[4];
|
||||
bool hasGetAddress = false;
|
||||
|
||||
foreach (DecodedFunction function in program)
|
||||
{
|
||||
if (function == program.MainFunction)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
int externalReg4 = 0;
|
||||
|
||||
TreeNode[] functionTree = BuildTree(function.Blocks);
|
||||
|
||||
if (Matches(_fsiGetAddressTree, functionTree))
|
||||
{
|
||||
externalRegs[1] = functionTree[0].GetRd();
|
||||
externalRegs[2] = functionTree[2].GetRd();
|
||||
externalRegs[3] = functionTree[1].GetRd();
|
||||
externalReg4 = functionTree[3].GetRd();
|
||||
}
|
||||
else if (Matches(_fsiGetAddressV2Tree, functionTree))
|
||||
{
|
||||
externalRegs[1] = functionTree[2].GetRd();
|
||||
externalRegs[2] = functionTree[1].GetRd();
|
||||
externalRegs[3] = functionTree[0].GetRd();
|
||||
externalReg4 = functionTree[3].GetRd();
|
||||
}
|
||||
|
||||
// Ensure the register allocation is valid.
|
||||
// If so, then we have a match.
|
||||
if (externalRegs[1] != externalRegs[2] &&
|
||||
externalRegs[2] != externalRegs[3] &&
|
||||
externalRegs[1] != externalRegs[3] &&
|
||||
externalRegs[1] + 1 != externalRegs[2] &&
|
||||
externalRegs[1] + 1 != externalRegs[3] &&
|
||||
externalRegs[1] + 1 == externalReg4 &&
|
||||
externalRegs[2] != RegisterConsts.RegisterZeroIndex &&
|
||||
externalRegs[3] != RegisterConsts.RegisterZeroIndex &&
|
||||
externalReg4 != RegisterConsts.RegisterZeroIndex)
|
||||
{
|
||||
hasGetAddress = true;
|
||||
function.Type = FunctionType.Unused;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
foreach (DecodedFunction function in program)
|
||||
{
|
||||
if (function.IsCompilerGenerated || function == program.MainFunction)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (hasGetAddress)
|
||||
{
|
||||
TreeNode[] functionTree = BuildTree(function.Blocks);
|
||||
|
||||
if (MatchesFsi(_fsiBeginPatternTree, program, function, functionTree, externalRegs))
|
||||
{
|
||||
function.Type = FunctionType.BuiltInFSIBegin;
|
||||
continue;
|
||||
}
|
||||
else if (MatchesFsi(_fsiEndPatternTree, program, function, functionTree, externalRegs))
|
||||
{
|
||||
function.Type = FunctionType.BuiltInFSIEnd;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private readonly struct TreeNodeUse
|
||||
{
|
||||
public TreeNode Node { get; }
|
||||
public int Index { get; }
|
||||
public bool Inverted { get; }
|
||||
|
||||
private TreeNodeUse(int index, bool inverted, TreeNode node)
|
||||
{
|
||||
Index = index;
|
||||
Inverted = inverted;
|
||||
Node = node;
|
||||
}
|
||||
|
||||
public TreeNodeUse(int index, TreeNode node) : this(index, false, node)
|
||||
{
|
||||
}
|
||||
|
||||
public TreeNodeUse Flip()
|
||||
{
|
||||
return new TreeNodeUse(Index, !Inverted, Node);
|
||||
}
|
||||
}
|
||||
|
||||
private enum TreeNodeType : byte
|
||||
{
|
||||
Op,
|
||||
Label
|
||||
}
|
||||
|
||||
private class TreeNode
|
||||
{
|
||||
public readonly InstOp Op;
|
||||
public readonly List<TreeNodeUse> Uses;
|
||||
public TreeNodeType Type { get; }
|
||||
public byte Order { get; }
|
||||
|
||||
public TreeNode(byte order)
|
||||
{
|
||||
Type = TreeNodeType.Label;
|
||||
Order = order;
|
||||
}
|
||||
|
||||
public TreeNode(InstOp op, byte order)
|
||||
{
|
||||
Op = op;
|
||||
Uses = new List<TreeNodeUse>();
|
||||
Type = TreeNodeType.Op;
|
||||
Order = order;
|
||||
}
|
||||
|
||||
public byte GetPd()
|
||||
{
|
||||
return (byte)((Op.RawOpCode >> 3) & 7);
|
||||
}
|
||||
|
||||
public byte GetRd()
|
||||
{
|
||||
return (byte)Op.RawOpCode;
|
||||
}
|
||||
}
|
||||
|
||||
private static TreeNode[] BuildTree(Block[] blocks)
|
||||
{
|
||||
List<TreeNode> nodes = new List<TreeNode>();
|
||||
|
||||
Dictionary<ulong, TreeNode> labels = new Dictionary<ulong, TreeNode>();
|
||||
|
||||
TreeNodeUse[] predDefs = new TreeNodeUse[RegisterConsts.PredsCount];
|
||||
TreeNodeUse[] gprDefs = new TreeNodeUse[RegisterConsts.GprsCount];
|
||||
|
||||
void DefPred(byte predIndex, int index, TreeNode node)
|
||||
{
|
||||
if (predIndex != RegisterConsts.PredicateTrueIndex)
|
||||
{
|
||||
predDefs[predIndex] = new TreeNodeUse(index, node);
|
||||
}
|
||||
}
|
||||
|
||||
void DefGpr(byte regIndex, int index, TreeNode node)
|
||||
{
|
||||
if (regIndex != RegisterConsts.RegisterZeroIndex)
|
||||
{
|
||||
gprDefs[regIndex] = new TreeNodeUse(index, node);
|
||||
}
|
||||
}
|
||||
|
||||
TreeNodeUse UsePred(byte predIndex, bool predInv)
|
||||
{
|
||||
if (predIndex != RegisterConsts.PredicateTrueIndex)
|
||||
{
|
||||
TreeNodeUse use = predDefs[predIndex];
|
||||
|
||||
if (use.Node != null)
|
||||
{
|
||||
nodes.Remove(use.Node);
|
||||
}
|
||||
else
|
||||
{
|
||||
use = new TreeNodeUse(-(predIndex + 2), null);
|
||||
}
|
||||
|
||||
return predInv ? use.Flip() : use;
|
||||
}
|
||||
|
||||
return new TreeNodeUse(-1, null);
|
||||
}
|
||||
|
||||
TreeNodeUse UseGpr(byte regIndex)
|
||||
{
|
||||
if (regIndex != RegisterConsts.RegisterZeroIndex)
|
||||
{
|
||||
TreeNodeUse use = gprDefs[regIndex];
|
||||
|
||||
if (use.Node != null)
|
||||
{
|
||||
nodes.Remove(use.Node);
|
||||
}
|
||||
else
|
||||
{
|
||||
use = new TreeNodeUse(-(regIndex + 2), null);
|
||||
}
|
||||
|
||||
return use;
|
||||
}
|
||||
|
||||
return new TreeNodeUse(-1, null);
|
||||
}
|
||||
|
||||
byte order = 0;
|
||||
|
||||
for (int index = 0; index < blocks.Length; index++)
|
||||
{
|
||||
Block block = blocks[index];
|
||||
|
||||
if (block.Predecessors.Count > 1)
|
||||
{
|
||||
TreeNode label = new TreeNode(order++);
|
||||
nodes.Add(label);
|
||||
labels.Add(block.Address, label);
|
||||
}
|
||||
|
||||
for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++)
|
||||
{
|
||||
InstOp op = block.OpCodes[opIndex];
|
||||
|
||||
TreeNode node = new TreeNode(op, IsOrderDependant(op.Name) ? order : (byte)0);
|
||||
|
||||
// Add uses.
|
||||
|
||||
if (!op.Props.HasFlag(InstProps.NoPred))
|
||||
{
|
||||
byte predIndex = (byte)((op.RawOpCode >> 16) & 7);
|
||||
bool predInv = (op.RawOpCode & 0x80000) != 0;
|
||||
node.Uses.Add(UsePred(predIndex, predInv));
|
||||
}
|
||||
|
||||
if (op.Props.HasFlag(InstProps.Ps))
|
||||
{
|
||||
byte predIndex = (byte)((op.RawOpCode >> 39) & 7);
|
||||
bool predInv = (op.RawOpCode & 0x40000000000) != 0;
|
||||
node.Uses.Add(UsePred(predIndex, predInv));
|
||||
}
|
||||
|
||||
if (op.Props.HasFlag(InstProps.Ra))
|
||||
{
|
||||
byte ra = (byte)(op.RawOpCode >> 8);
|
||||
node.Uses.Add(UseGpr(ra));
|
||||
}
|
||||
|
||||
if ((op.Props & (InstProps.Rb | InstProps.Rb2)) != 0)
|
||||
{
|
||||
byte rb = op.Props.HasFlag(InstProps.Rb2) ? (byte)op.RawOpCode : (byte)(op.RawOpCode >> 20);
|
||||
node.Uses.Add(UseGpr(rb));
|
||||
}
|
||||
|
||||
if (op.Props.HasFlag(InstProps.Rc))
|
||||
{
|
||||
byte rc = (byte)(op.RawOpCode >> 39);
|
||||
node.Uses.Add(UseGpr(rc));
|
||||
}
|
||||
|
||||
if (op.Name == InstName.Bra && labels.TryGetValue(op.GetAbsoluteAddress(), out TreeNode label))
|
||||
{
|
||||
node.Uses.Add(new TreeNodeUse(0, label));
|
||||
}
|
||||
|
||||
// Make definitions.
|
||||
|
||||
int defIndex = 0;
|
||||
|
||||
InstProps pdType = op.Props & InstProps.PdMask;
|
||||
|
||||
if (pdType != 0)
|
||||
{
|
||||
int bit = pdType switch
|
||||
{
|
||||
InstProps.Pd => 3,
|
||||
InstProps.LPd => 48,
|
||||
InstProps.SPd => 30,
|
||||
InstProps.TPd => 51,
|
||||
InstProps.VPd => 45,
|
||||
_ => throw new InvalidOperationException($"Table has unknown predicate destination {pdType}.")
|
||||
};
|
||||
|
||||
byte predIndex = (byte)((op.RawOpCode >> bit) & 7);
|
||||
DefPred(predIndex, defIndex++, node);
|
||||
}
|
||||
|
||||
if (op.Props.HasFlag(InstProps.Rd))
|
||||
{
|
||||
byte rd = (byte)op.RawOpCode;
|
||||
DefGpr(rd, defIndex++, node);
|
||||
}
|
||||
|
||||
nodes.Add(node);
|
||||
}
|
||||
}
|
||||
|
||||
return nodes.ToArray();
|
||||
}
|
||||
|
||||
private static bool IsOrderDependant(InstName name)
|
||||
{
|
||||
switch (name)
|
||||
{
|
||||
case InstName.Atom:
|
||||
case InstName.AtomCas:
|
||||
case InstName.Atoms:
|
||||
case InstName.AtomsCas:
|
||||
case InstName.Ld:
|
||||
case InstName.Ldg:
|
||||
case InstName.Ldl:
|
||||
case InstName.Lds:
|
||||
case InstName.Suatom:
|
||||
case InstName.SuatomB:
|
||||
case InstName.SuatomB2:
|
||||
case InstName.SuatomCas:
|
||||
case InstName.SuatomCasB:
|
||||
case InstName.Suld:
|
||||
case InstName.SuldB:
|
||||
case InstName.SuldD:
|
||||
case InstName.SuldDB:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private interface IPatternTreeNode
|
||||
{
|
||||
List<PatternTreeNodeUse> Uses { get; }
|
||||
InstName Name { get; }
|
||||
TreeNodeType Type { get; }
|
||||
byte Order { get; }
|
||||
bool IsImm { get; }
|
||||
bool Matches(in InstOp opInfo);
|
||||
}
|
||||
|
||||
private readonly struct PatternTreeNodeUse
|
||||
{
|
||||
public IPatternTreeNode Node { get; }
|
||||
public int Index { get; }
|
||||
public bool Inverted { get; }
|
||||
public PatternTreeNodeUse Inv => new PatternTreeNodeUse(Index, !Inverted, Node);
|
||||
|
||||
private PatternTreeNodeUse(int index, bool inverted, IPatternTreeNode node)
|
||||
{
|
||||
Index = index;
|
||||
Inverted = inverted;
|
||||
Node = node;
|
||||
}
|
||||
|
||||
public PatternTreeNodeUse(int index, IPatternTreeNode node) : this(index, false, node)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
private class PatternTreeNode<T> : IPatternTreeNode
|
||||
{
|
||||
public List<PatternTreeNodeUse> Uses { get; }
|
||||
private readonly Func<T, bool> _match;
|
||||
|
||||
public InstName Name { get; }
|
||||
public TreeNodeType Type { get; }
|
||||
public byte Order { get; }
|
||||
public bool IsImm { get; }
|
||||
public PatternTreeNodeUse Out => new PatternTreeNodeUse(0, this);
|
||||
|
||||
public PatternTreeNode(InstName name, Func<T, bool> match, TreeNodeType type = TreeNodeType.Op, byte order = 0, bool isImm = false)
|
||||
{
|
||||
Name = name;
|
||||
_match = match;
|
||||
Type = type;
|
||||
Order = order;
|
||||
IsImm = isImm;
|
||||
Uses = new List<PatternTreeNodeUse>();
|
||||
}
|
||||
|
||||
public PatternTreeNode<T> Use(PatternTreeNodeUse use)
|
||||
{
|
||||
Uses.Add(use);
|
||||
return this;
|
||||
}
|
||||
|
||||
public PatternTreeNodeUse OutAt(int index)
|
||||
{
|
||||
return new PatternTreeNodeUse(index, this);
|
||||
}
|
||||
|
||||
public bool Matches(in InstOp opInfo)
|
||||
{
|
||||
if (opInfo.Name != Name)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
ulong rawOp = opInfo.RawOpCode;
|
||||
T op = Unsafe.As<ulong, T>(ref rawOp);
|
||||
|
||||
if (!_match(op))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private static bool MatchesFsi(
|
||||
IPatternTreeNode[] pattern,
|
||||
DecodedProgram program,
|
||||
DecodedFunction function,
|
||||
TreeNode[] functionTree,
|
||||
byte[] externalRegs)
|
||||
{
|
||||
if (function.Blocks.Length == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
InstOp callOp = function.Blocks[0].GetLastOp();
|
||||
|
||||
if (callOp.Name != InstName.Cal)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
DecodedFunction callTarget = program.GetFunctionByAddress(callOp.GetAbsoluteAddress());
|
||||
TreeNode[] callTargetTree = null;
|
||||
|
||||
if (callTarget == null || !Matches(_fsiIsLastWarpThreadPatternTree, callTargetTree = BuildTree(callTarget.Blocks)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
externalRegs[0] = callTargetTree[0].GetPd();
|
||||
|
||||
if (Matches(pattern, functionTree, externalRegs))
|
||||
{
|
||||
callTarget.RemoveCaller(function);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool Matches(IPatternTreeNode[] pTree, TreeNode[] cTree, byte[] externalRegs = null)
|
||||
{
|
||||
if (pTree.Length != cTree.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int index = 0; index < pTree.Length; index++)
|
||||
{
|
||||
if (!Matches(pTree[index], cTree[index], externalRegs))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool Matches(IPatternTreeNode pTreeNode, TreeNode cTreeNode, byte[] externalRegs)
|
||||
{
|
||||
if (!pTreeNode.Matches(in cTreeNode.Op) ||
|
||||
pTreeNode.Type != cTreeNode.Type ||
|
||||
pTreeNode.Order != cTreeNode.Order ||
|
||||
pTreeNode.IsImm != cTreeNode.Op.Props.HasFlag(InstProps.Ib))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pTreeNode.Type == TreeNodeType.Op)
|
||||
{
|
||||
if (pTreeNode.Uses.Count != cTreeNode.Uses.Count)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int index = 0; index < pTreeNode.Uses.Count; index++)
|
||||
{
|
||||
var pUse = pTreeNode.Uses[index];
|
||||
var cUse = cTreeNode.Uses[index];
|
||||
|
||||
if (pUse.Index <= -2)
|
||||
{
|
||||
if (externalRegs[-pUse.Index - 2] != (-cUse.Index - 2))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (pUse.Index != cUse.Index)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pUse.Inverted != cUse.Inverted || (pUse.Node == null) != (cUse.Node == null))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pUse.Node != null && !Matches(pUse.Node, cUse.Node, externalRegs))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static class PatternTrees
|
||||
{
|
||||
public static IPatternTreeNode[] GetFsiGetAddress()
|
||||
{
|
||||
var affinityValue = S2r(SReg.Affinity).Use(PT).Out;
|
||||
var orderingTicketValue = S2r(SReg.OrderingTicket).Use(PT).Out;
|
||||
|
||||
return new IPatternTreeNode[]
|
||||
{
|
||||
Iscadd(cc: true, 2, 0, 404)
|
||||
.Use(PT)
|
||||
.Use(Iscadd(cc: false, 8)
|
||||
.Use(PT)
|
||||
.Use(Lop32i(LogicOp.And, 0xff)
|
||||
.Use(PT)
|
||||
.Use(affinityValue).Out)
|
||||
.Use(Lop32i(LogicOp.And, 0xff)
|
||||
.Use(PT)
|
||||
.Use(orderingTicketValue).Out).Out),
|
||||
ShrU32W(16)
|
||||
.Use(PT)
|
||||
.Use(orderingTicketValue),
|
||||
Iadd32i(0x200)
|
||||
.Use(PT)
|
||||
.Use(Lop32i(LogicOp.And, 0xfe00)
|
||||
.Use(PT)
|
||||
.Use(orderingTicketValue).Out),
|
||||
Iadd(x: true, 0, 405).Use(PT).Use(RZ),
|
||||
Ret().Use(PT)
|
||||
};
|
||||
}
|
||||
|
||||
public static IPatternTreeNode[] GetFsiGetAddressV2()
|
||||
{
|
||||
var affinityValue = S2r(SReg.Affinity).Use(PT).Out;
|
||||
var orderingTicketValue = S2r(SReg.OrderingTicket).Use(PT).Out;
|
||||
|
||||
return new IPatternTreeNode[]
|
||||
{
|
||||
ShrU32W(16)
|
||||
.Use(PT)
|
||||
.Use(orderingTicketValue),
|
||||
Iadd32i(0x200)
|
||||
.Use(PT)
|
||||
.Use(Lop32i(LogicOp.And, 0xfe00)
|
||||
.Use(PT)
|
||||
.Use(orderingTicketValue).Out),
|
||||
Iscadd(cc: true, 2, 0, 404)
|
||||
.Use(PT)
|
||||
.Use(Bfi(0x808)
|
||||
.Use(PT)
|
||||
.Use(affinityValue)
|
||||
.Use(Lop32i(LogicOp.And, 0xff)
|
||||
.Use(PT)
|
||||
.Use(orderingTicketValue).Out).Out),
|
||||
Iadd(x: true, 0, 405).Use(PT).Use(RZ),
|
||||
Ret().Use(PT)
|
||||
};
|
||||
}
|
||||
|
||||
public static IPatternTreeNode[] GetFsiIsLastWarpThread()
|
||||
{
|
||||
var threadKillValue = S2r(SReg.ThreadKill).Use(PT).Out;
|
||||
var laneIdValue = S2r(SReg.LaneId).Use(PT).Out;
|
||||
|
||||
return new IPatternTreeNode[]
|
||||
{
|
||||
IsetpU32(IComp.Eq)
|
||||
.Use(PT)
|
||||
.Use(PT)
|
||||
.Use(FloU32()
|
||||
.Use(PT)
|
||||
.Use(Vote(VoteMode.Any)
|
||||
.Use(PT)
|
||||
.Use(IsetpU32(IComp.Ne)
|
||||
.Use(PT)
|
||||
.Use(PT)
|
||||
.Use(Lop(negB: true, LogicOp.PassB)
|
||||
.Use(PT)
|
||||
.Use(RZ)
|
||||
.Use(threadKillValue).OutAt(1))
|
||||
.Use(RZ).Out).OutAt(1)).Out)
|
||||
.Use(laneIdValue),
|
||||
Ret().Use(PT)
|
||||
};
|
||||
}
|
||||
|
||||
public static IPatternTreeNode[] GetFsiBeginPattern()
|
||||
{
|
||||
var addressLowValue = CallArg(1);
|
||||
|
||||
static PatternTreeNodeUse HighU16Equals(PatternTreeNodeUse x)
|
||||
{
|
||||
var expectedValue = CallArg(3);
|
||||
|
||||
return IsetpU32(IComp.Eq)
|
||||
.Use(PT)
|
||||
.Use(PT)
|
||||
.Use(ShrU32W(16).Use(PT).Use(x).Out)
|
||||
.Use(expectedValue).Out;
|
||||
}
|
||||
|
||||
PatternTreeNode<byte> label;
|
||||
|
||||
return new IPatternTreeNode[]
|
||||
{
|
||||
Cal(),
|
||||
Ret().Use(CallArg(0).Inv),
|
||||
Ret()
|
||||
.Use(HighU16Equals(LdgE(CacheOpLd.Cg, LsSize.B32)
|
||||
.Use(PT)
|
||||
.Use(addressLowValue).Out)),
|
||||
label = Label(),
|
||||
Bra()
|
||||
.Use(HighU16Equals(LdgE(CacheOpLd.Cg, LsSize.B32, 1)
|
||||
.Use(PT)
|
||||
.Use(addressLowValue).Out).Inv)
|
||||
.Use(label.Out),
|
||||
Ret().Use(PT)
|
||||
};
|
||||
}
|
||||
|
||||
public static IPatternTreeNode[] GetFsiEndPattern()
|
||||
{
|
||||
var voteResult = Vote(VoteMode.All).Use(PT).Use(PT).OutAt(1);
|
||||
var popcResult = Popc().Use(PT).Use(voteResult).Out;
|
||||
var threadKillValue = S2r(SReg.ThreadKill).Use(PT).Out;
|
||||
var laneIdValue = S2r(SReg.LaneId).Use(PT).Out;
|
||||
|
||||
var addressLowValue = CallArg(1);
|
||||
var incrementValue = CallArg(2);
|
||||
|
||||
return new IPatternTreeNode[]
|
||||
{
|
||||
Cal(),
|
||||
Ret().Use(CallArg(0).Inv),
|
||||
Membar(Decoders.Membar.Vc).Use(PT),
|
||||
Ret().Use(IsetpU32(IComp.Ne)
|
||||
.Use(PT)
|
||||
.Use(PT)
|
||||
.Use(threadKillValue)
|
||||
.Use(RZ).Out),
|
||||
RedE(RedOp.Add, AtomSize.U32)
|
||||
.Use(IsetpU32(IComp.Eq)
|
||||
.Use(PT)
|
||||
.Use(PT)
|
||||
.Use(FloU32()
|
||||
.Use(PT)
|
||||
.Use(voteResult).Out)
|
||||
.Use(laneIdValue).Out)
|
||||
.Use(addressLowValue)
|
||||
.Use(Xmad(XmadCop.Cbcc, psl: true, hiloA: true, hiloB: true)
|
||||
.Use(PT)
|
||||
.Use(incrementValue)
|
||||
.Use(Xmad(XmadCop.Cfull, mrg: true, hiloB: true)
|
||||
.Use(PT)
|
||||
.Use(incrementValue)
|
||||
.Use(popcResult)
|
||||
.Use(RZ).Out)
|
||||
.Use(Xmad(XmadCop.Cfull)
|
||||
.Use(PT)
|
||||
.Use(incrementValue)
|
||||
.Use(popcResult)
|
||||
.Use(RZ).Out).Out),
|
||||
Ret().Use(PT)
|
||||
};
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstBfiI> Bfi(int imm)
|
||||
{
|
||||
return new(InstName.Bfi, (op) => !op.WriteCC && op.Imm20 == imm, isImm: true);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstBra> Bra()
|
||||
{
|
||||
return new(InstName.Bra, (op) => op.Ccc == Ccc.T && !op.Ca);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstCal> Cal()
|
||||
{
|
||||
return new(InstName.Cal, (op) => !op.Ca && op.Inc);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstFloR> FloU32()
|
||||
{
|
||||
return new(InstName.Flo, (op) => !op.Signed && !op.Sh && !op.NegB && !op.WriteCC);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstIaddC> Iadd(bool x, int cbufSlot, int cbufOffset)
|
||||
{
|
||||
return new(InstName.Iadd, (op) =>
|
||||
!op.Sat &&
|
||||
!op.WriteCC &&
|
||||
op.X == x &&
|
||||
op.AvgMode == AvgMode.NoNeg &&
|
||||
op.CbufSlot == cbufSlot &&
|
||||
op.CbufOffset == cbufOffset);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstIadd32i> Iadd32i(int imm)
|
||||
{
|
||||
return new(InstName.Iadd32i, (op) => !op.Sat && !op.WriteCC && !op.X && op.AvgMode == AvgMode.NoNeg && op.Imm32 == imm);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstIscaddR> Iscadd(bool cc, int imm)
|
||||
{
|
||||
return new(InstName.Iscadd, (op) => op.WriteCC == cc && op.AvgMode == AvgMode.NoNeg && op.Imm5 == imm);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstIscaddC> Iscadd(bool cc, int imm, int cbufSlot, int cbufOffset)
|
||||
{
|
||||
return new(InstName.Iscadd, (op) =>
|
||||
op.WriteCC == cc &&
|
||||
op.AvgMode == AvgMode.NoNeg &&
|
||||
op.Imm5 == imm &&
|
||||
op.CbufSlot == cbufSlot &&
|
||||
op.CbufOffset == cbufOffset);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstIsetpR> IsetpU32(IComp comp)
|
||||
{
|
||||
return new(InstName.Isetp, (op) => !op.Signed && op.IComp == comp && op.Bop == BoolOp.And);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<byte> Label()
|
||||
{
|
||||
return new(InstName.Invalid, (op) => true, type: TreeNodeType.Label);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstLopR> Lop(bool negB, LogicOp logicOp)
|
||||
{
|
||||
return new(InstName.Lop, (op) => !op.NegA && op.NegB == negB && !op.WriteCC && !op.X && op.Lop == logicOp && op.PredicateOp == PredicateOp.F);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstLop32i> Lop32i(LogicOp logicOp, int imm)
|
||||
{
|
||||
return new(InstName.Lop32i, (op) => !op.NegA && !op.NegB && !op.X && !op.WriteCC && op.LogicOp == logicOp && op.Imm32 == imm);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstMembar> Membar(Membar membar)
|
||||
{
|
||||
return new(InstName.Membar, (op) => op.Membar == membar);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstPopcR> Popc()
|
||||
{
|
||||
return new(InstName.Popc, (op) => !op.NegB);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstRet> Ret()
|
||||
{
|
||||
return new(InstName.Ret, (op) => op.Ccc == Ccc.T);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstS2r> S2r(SReg reg)
|
||||
{
|
||||
return new(InstName.S2r, (op) => op.SReg == reg);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstShrI> ShrU32W(int imm)
|
||||
{
|
||||
return new(InstName.Shr, (op) => !op.Signed && !op.Brev && op.M && op.XMode == 0 && op.Imm20 == imm, isImm: true);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstLdg> LdgE(CacheOpLd cacheOp, LsSize size, byte order = 0)
|
||||
{
|
||||
return new(InstName.Ldg, (op) => op.E && op.CacheOp == cacheOp && op.LsSize == size, order: order);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstRed> RedE(RedOp redOp, AtomSize size, byte order = 0)
|
||||
{
|
||||
return new(InstName.Red, (op) => op.E && op.RedOp == redOp && op.RedSize == size, order: order);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstVote> Vote(VoteMode mode)
|
||||
{
|
||||
return new(InstName.Vote, (op) => op.VoteMode == mode);
|
||||
}
|
||||
|
||||
private static PatternTreeNode<InstXmadR> Xmad(XmadCop cop, bool psl = false, bool mrg = false, bool hiloA = false, bool hiloB = false)
|
||||
{
|
||||
return new(InstName.Xmad, (op) => op.XmadCop == cop && op.Psl == psl && op.Mrg == mrg && op.HiloA == hiloA && op.HiloB == hiloB);
|
||||
}
|
||||
|
||||
private static PatternTreeNodeUse PT => PTOrRZ();
|
||||
private static PatternTreeNodeUse RZ => PTOrRZ();
|
||||
private static PatternTreeNodeUse Undef => new PatternTreeNodeUse(0, null);
|
||||
|
||||
private static PatternTreeNodeUse CallArg(int index)
|
||||
{
|
||||
return new PatternTreeNodeUse(-(index + 2), null);
|
||||
}
|
||||
|
||||
private static PatternTreeNodeUse PTOrRZ()
|
||||
{
|
||||
return new PatternTreeNodeUse(-1, null);
|
||||
}
|
||||
}
|
||||
|
||||
private static void PrintTreeNode(TreeNode node, string indentation)
|
||||
{
|
||||
Console.WriteLine($" {node.Op.Name}");
|
||||
|
||||
for (int i = 0; i < node.Uses.Count; i++)
|
||||
{
|
||||
TreeNodeUse use = node.Uses[i];
|
||||
bool last = i == node.Uses.Count - 1;
|
||||
char separator = last ? '`' : '|';
|
||||
|
||||
if (use.Node != null)
|
||||
{
|
||||
Console.Write($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index})");
|
||||
PrintTreeNode(use.Node, indentation + (last ? " " : " | "));
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index}) NULL");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void PrintTreeNode(IPatternTreeNode node, string indentation)
|
||||
{
|
||||
Console.WriteLine($" {node.Name}");
|
||||
|
||||
for (int i = 0; i < node.Uses.Count; i++)
|
||||
{
|
||||
PatternTreeNodeUse use = node.Uses[i];
|
||||
bool last = i == node.Uses.Count - 1;
|
||||
char separator = last ? '`' : '|';
|
||||
|
||||
if (use.Node != null)
|
||||
{
|
||||
Console.Write($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index})");
|
||||
PrintTreeNode(use.Node, indentation + (last ? " " : " | "));
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index}) NULL");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
52
src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs
Normal file
52
src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs
Normal file
|
@ -0,0 +1,52 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
static class GlobalMemory
|
||||
{
|
||||
private const int StorageDescsBaseOffset = 0x44; // In words.
|
||||
|
||||
public const int StorageDescSize = 4; // In words.
|
||||
public const int StorageMaxCount = 16;
|
||||
|
||||
public const int StorageDescsSize = StorageDescSize * StorageMaxCount;
|
||||
|
||||
public const int UbeBaseOffset = 0x98; // In words.
|
||||
public const int UbeMaxCount = 9;
|
||||
public const int UbeDescsSize = StorageDescSize * UbeMaxCount;
|
||||
public const int UbeFirstCbuf = 8;
|
||||
|
||||
public static bool UsesGlobalMemory(Instruction inst, StorageKind storageKind)
|
||||
{
|
||||
return (inst.IsAtomic() && storageKind == StorageKind.GlobalMemory) ||
|
||||
inst == Instruction.LoadGlobal ||
|
||||
inst == Instruction.StoreGlobal ||
|
||||
inst == Instruction.StoreGlobal16 ||
|
||||
inst == Instruction.StoreGlobal8;
|
||||
}
|
||||
|
||||
public static int GetStorageCbOffset(ShaderStage stage, int slot)
|
||||
{
|
||||
return GetStorageBaseCbOffset(stage) + slot * StorageDescSize;
|
||||
}
|
||||
|
||||
public static int GetStorageBaseCbOffset(ShaderStage stage)
|
||||
{
|
||||
return stage switch
|
||||
{
|
||||
ShaderStage.Compute => StorageDescsBaseOffset + 2 * StorageDescsSize,
|
||||
ShaderStage.Vertex => StorageDescsBaseOffset,
|
||||
ShaderStage.TessellationControl => StorageDescsBaseOffset + 1 * StorageDescsSize,
|
||||
ShaderStage.TessellationEvaluation => StorageDescsBaseOffset + 2 * StorageDescsSize,
|
||||
ShaderStage.Geometry => StorageDescsBaseOffset + 3 * StorageDescsSize,
|
||||
ShaderStage.Fragment => StorageDescsBaseOffset + 4 * StorageDescsSize,
|
||||
_ => 0
|
||||
};
|
||||
}
|
||||
|
||||
public static int GetConstantUbeOffset(int slot)
|
||||
{
|
||||
return UbeBaseOffset + slot * StorageDescSize;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,263 @@
|
|||
using Ryujinx.Graphics.Shader.Instructions;
|
||||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||
{
|
||||
class BindlessElimination
|
||||
{
|
||||
public static void RunPass(BasicBlock block, ShaderConfig config)
|
||||
{
|
||||
// We can turn a bindless into regular access by recognizing the pattern
|
||||
// produced by the compiler for separate texture and sampler.
|
||||
// We check for the following conditions:
|
||||
// - The handle is a constant buffer value.
|
||||
// - The handle is the result of a bitwise OR logical operation.
|
||||
// - Both sources of the OR operation comes from a constant buffer.
|
||||
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
|
||||
{
|
||||
if (!(node.Value is TextureOperation texOp))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((texOp.Flags & TextureFlags.Bindless) == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (texOp.Inst == Instruction.Lod ||
|
||||
texOp.Inst == Instruction.TextureSample ||
|
||||
texOp.Inst == Instruction.TextureSize)
|
||||
{
|
||||
Operand bindlessHandle = Utils.FindLastOperation(texOp.GetSource(0), block);
|
||||
|
||||
// Some instructions do not encode an accurate sampler type:
|
||||
// - Most instructions uses the same type for 1D and Buffer.
|
||||
// - Query instructions may not have any type.
|
||||
// For those cases, we need to try getting the type from current GPU state,
|
||||
// as long bindless elimination is successful and we know where the texture descriptor is located.
|
||||
bool rewriteSamplerType =
|
||||
texOp.Type == SamplerType.TextureBuffer ||
|
||||
texOp.Inst == Instruction.TextureSize;
|
||||
|
||||
if (bindlessHandle.Type == OperandType.ConstantBuffer)
|
||||
{
|
||||
SetHandle(config, texOp, bindlessHandle.GetCbufOffset(), bindlessHandle.GetCbufSlot(), rewriteSamplerType, isImage: false);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(bindlessHandle.AsgOp is Operation handleCombineOp))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (handleCombineOp.Inst != Instruction.BitwiseOr)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
Operand src0 = Utils.FindLastOperation(handleCombineOp.GetSource(0), block);
|
||||
Operand src1 = Utils.FindLastOperation(handleCombineOp.GetSource(1), block);
|
||||
|
||||
// For cases where we have a constant, ensure that the constant is always
|
||||
// the second operand.
|
||||
// Since this is a commutative operation, both are fine,
|
||||
// and having a "canonical" representation simplifies some checks below.
|
||||
if (src0.Type == OperandType.Constant && src1.Type != OperandType.Constant)
|
||||
{
|
||||
Operand temp = src1;
|
||||
src1 = src0;
|
||||
src0 = temp;
|
||||
}
|
||||
|
||||
TextureHandleType handleType = TextureHandleType.SeparateSamplerHandle;
|
||||
|
||||
// Try to match the following patterns:
|
||||
// Masked pattern:
|
||||
// - samplerHandle = samplerHandle & 0xFFF00000;
|
||||
// - textureHandle = textureHandle & 0xFFFFF;
|
||||
// - combinedHandle = samplerHandle | textureHandle;
|
||||
// Where samplerHandle and textureHandle comes from a constant buffer.
|
||||
// Shifted pattern:
|
||||
// - samplerHandle = samplerId << 20;
|
||||
// - combinedHandle = samplerHandle | textureHandle;
|
||||
// Where samplerId and textureHandle comes from a constant buffer.
|
||||
// Constant pattern:
|
||||
// - combinedHandle = samplerHandleConstant | textureHandle;
|
||||
// Where samplerHandleConstant is a constant value, and textureHandle comes from a constant buffer.
|
||||
if (src0.AsgOp is Operation src0AsgOp)
|
||||
{
|
||||
if (src1.AsgOp is Operation src1AsgOp &&
|
||||
src0AsgOp.Inst == Instruction.BitwiseAnd &&
|
||||
src1AsgOp.Inst == Instruction.BitwiseAnd)
|
||||
{
|
||||
src0 = GetSourceForMaskedHandle(src0AsgOp, 0xFFFFF);
|
||||
src1 = GetSourceForMaskedHandle(src1AsgOp, 0xFFF00000);
|
||||
|
||||
// The OR operation is commutative, so we can also try to swap the operands to get a match.
|
||||
if (src0 == null || src1 == null)
|
||||
{
|
||||
src0 = GetSourceForMaskedHandle(src1AsgOp, 0xFFFFF);
|
||||
src1 = GetSourceForMaskedHandle(src0AsgOp, 0xFFF00000);
|
||||
}
|
||||
|
||||
if (src0 == null || src1 == null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (src0AsgOp.Inst == Instruction.ShiftLeft)
|
||||
{
|
||||
Operand shift = src0AsgOp.GetSource(1);
|
||||
|
||||
if (shift.Type == OperandType.Constant && shift.Value == 20)
|
||||
{
|
||||
src0 = src1;
|
||||
src1 = src0AsgOp.GetSource(0);
|
||||
handleType = TextureHandleType.SeparateSamplerId;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (src1.AsgOp is Operation src1AsgOp && src1AsgOp.Inst == Instruction.ShiftLeft)
|
||||
{
|
||||
Operand shift = src1AsgOp.GetSource(1);
|
||||
|
||||
if (shift.Type == OperandType.Constant && shift.Value == 20)
|
||||
{
|
||||
src1 = src1AsgOp.GetSource(0);
|
||||
handleType = TextureHandleType.SeparateSamplerId;
|
||||
}
|
||||
}
|
||||
else if (src1.Type == OperandType.Constant && (src1.Value & 0xfffff) == 0)
|
||||
{
|
||||
handleType = TextureHandleType.SeparateConstantSamplerHandle;
|
||||
}
|
||||
|
||||
if (src0.Type != OperandType.ConstantBuffer)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (handleType == TextureHandleType.SeparateConstantSamplerHandle)
|
||||
{
|
||||
SetHandle(
|
||||
config,
|
||||
texOp,
|
||||
TextureHandle.PackOffsets(src0.GetCbufOffset(), ((src1.Value >> 20) & 0xfff), handleType),
|
||||
TextureHandle.PackSlots(src0.GetCbufSlot(), 0),
|
||||
rewriteSamplerType,
|
||||
isImage: false);
|
||||
}
|
||||
else if (src1.Type == OperandType.ConstantBuffer)
|
||||
{
|
||||
SetHandle(
|
||||
config,
|
||||
texOp,
|
||||
TextureHandle.PackOffsets(src0.GetCbufOffset(), src1.GetCbufOffset(), handleType),
|
||||
TextureHandle.PackSlots(src0.GetCbufSlot(), src1.GetCbufSlot()),
|
||||
rewriteSamplerType,
|
||||
isImage: false);
|
||||
}
|
||||
}
|
||||
else if (texOp.Inst == Instruction.ImageLoad ||
|
||||
texOp.Inst == Instruction.ImageStore ||
|
||||
texOp.Inst == Instruction.ImageAtomic)
|
||||
{
|
||||
Operand src0 = Utils.FindLastOperation(texOp.GetSource(0), block);
|
||||
|
||||
if (src0.Type == OperandType.ConstantBuffer)
|
||||
{
|
||||
int cbufOffset = src0.GetCbufOffset();
|
||||
int cbufSlot = src0.GetCbufSlot();
|
||||
|
||||
if (texOp.Format == TextureFormat.Unknown)
|
||||
{
|
||||
if (texOp.Inst == Instruction.ImageAtomic)
|
||||
{
|
||||
texOp.Format = config.GetTextureFormatAtomic(cbufOffset, cbufSlot);
|
||||
}
|
||||
else
|
||||
{
|
||||
texOp.Format = config.GetTextureFormat(cbufOffset, cbufSlot);
|
||||
}
|
||||
}
|
||||
|
||||
bool rewriteSamplerType = texOp.Type == SamplerType.TextureBuffer;
|
||||
|
||||
SetHandle(config, texOp, cbufOffset, cbufSlot, rewriteSamplerType, isImage: true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Operand GetSourceForMaskedHandle(Operation asgOp, uint mask)
|
||||
{
|
||||
// Assume it was already checked that the operation is bitwise AND.
|
||||
Operand src0 = asgOp.GetSource(0);
|
||||
Operand src1 = asgOp.GetSource(1);
|
||||
|
||||
if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.ConstantBuffer)
|
||||
{
|
||||
// We can't check if the mask matches here as both operands are from a constant buffer.
|
||||
// Be optimistic and assume it matches. Avoid constant buffer 1 as official drivers
|
||||
// uses this one to store compiler constants.
|
||||
return src0.GetCbufSlot() == 1 ? src1 : src0;
|
||||
}
|
||||
else if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.Constant)
|
||||
{
|
||||
if ((uint)src1.Value == mask)
|
||||
{
|
||||
return src0;
|
||||
}
|
||||
}
|
||||
else if (src0.Type == OperandType.Constant && src1.Type == OperandType.ConstantBuffer)
|
||||
{
|
||||
if ((uint)src0.Value == mask)
|
||||
{
|
||||
return src1;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static void SetHandle(ShaderConfig config, TextureOperation texOp, int cbufOffset, int cbufSlot, bool rewriteSamplerType, bool isImage)
|
||||
{
|
||||
texOp.SetHandle(cbufOffset, cbufSlot);
|
||||
|
||||
if (rewriteSamplerType)
|
||||
{
|
||||
SamplerType newType = config.GpuAccessor.QuerySamplerType(cbufOffset, cbufSlot);
|
||||
|
||||
if (texOp.Inst.IsTextureQuery())
|
||||
{
|
||||
texOp.Type = newType;
|
||||
}
|
||||
else if (texOp.Type == SamplerType.TextureBuffer && newType == SamplerType.Texture1D)
|
||||
{
|
||||
int coordsCount = 1;
|
||||
|
||||
if (InstEmit.Sample1DAs2D)
|
||||
{
|
||||
newType = SamplerType.Texture2D;
|
||||
texOp.InsertSource(coordsCount++, OperandHelper.Const(0));
|
||||
}
|
||||
|
||||
if (!isImage &&
|
||||
(texOp.Flags & TextureFlags.IntCoords) != 0 &&
|
||||
(texOp.Flags & TextureFlags.LodLevel) == 0)
|
||||
{
|
||||
// IntCoords textures must always have explicit LOD.
|
||||
texOp.SetLodLevelFlag();
|
||||
texOp.InsertSource(coordsCount, OperandHelper.Const(0));
|
||||
}
|
||||
|
||||
texOp.Type = newType;
|
||||
}
|
||||
}
|
||||
|
||||
config.SetUsedTexture(texOp.Inst, texOp.Type, texOp.Format, texOp.Flags, cbufSlot, cbufOffset);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||
{
|
||||
static class BindlessToIndexed
|
||||
{
|
||||
public static void RunPass(BasicBlock block, ShaderConfig config)
|
||||
{
|
||||
// We can turn a bindless texture access into a indexed access,
|
||||
// as long the following conditions are true:
|
||||
// - The handle is loaded using a LDC instruction.
|
||||
// - The handle is loaded from the constant buffer with the handles (CB2 for NVN).
|
||||
// - The load has a constant offset.
|
||||
// The base offset of the array of handles on the constant buffer is the constant offset.
|
||||
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
|
||||
{
|
||||
if (!(node.Value is TextureOperation texOp))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((texOp.Flags & TextureFlags.Bindless) == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(texOp.GetSource(0).AsgOp is Operation handleAsgOp))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (handleAsgOp.Inst != Instruction.LoadConstant)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
Operand ldcSrc0 = handleAsgOp.GetSource(0);
|
||||
Operand ldcSrc1 = handleAsgOp.GetSource(1);
|
||||
|
||||
if (ldcSrc0.Type != OperandType.Constant || ldcSrc0.Value != 2)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(ldcSrc1.AsgOp is Operation shrOp) || shrOp.Inst != Instruction.ShiftRightU32)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(shrOp.GetSource(0).AsgOp is Operation addOp) || addOp.Inst != Instruction.Add)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
Operand addSrc1 = addOp.GetSource(1);
|
||||
|
||||
if (addSrc1.Type != OperandType.Constant)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
TurnIntoIndexed(config, texOp, addSrc1.Value / 4);
|
||||
|
||||
Operand index = Local();
|
||||
|
||||
Operand source = addOp.GetSource(0);
|
||||
|
||||
Operation shrBy3 = new Operation(Instruction.ShiftRightU32, index, source, Const(3));
|
||||
|
||||
block.Operations.AddBefore(node, shrBy3);
|
||||
|
||||
texOp.SetSource(0, index);
|
||||
}
|
||||
}
|
||||
|
||||
private static void TurnIntoIndexed(ShaderConfig config, TextureOperation texOp, int handle)
|
||||
{
|
||||
texOp.TurnIntoIndexed(handle);
|
||||
config.SetUsedTexture(texOp.Inst, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, handle);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||
{
|
||||
static class BranchElimination
|
||||
{
|
||||
public static bool RunPass(BasicBlock block)
|
||||
{
|
||||
if (block.HasBranch && IsRedundantBranch((Operation)block.GetLastOp(), Next(block)))
|
||||
{
|
||||
block.Branch = null;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool IsRedundantBranch(Operation current, BasicBlock nextBlock)
|
||||
{
|
||||
// Here we check that:
|
||||
// - The current block ends with a branch.
|
||||
// - The next block only contains a branch.
|
||||
// - The branch on the next block is unconditional.
|
||||
// - Both branches are jumping to the same location.
|
||||
// In this case, the branch on the current block can be removed,
|
||||
// as the next block is going to jump to the same place anyway.
|
||||
if (nextBlock == null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!(nextBlock.Operations.First?.Value is Operation next))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (next.Inst != Instruction.Branch)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return current.Dest == next.Dest;
|
||||
}
|
||||
|
||||
private static BasicBlock Next(BasicBlock block)
|
||||
{
|
||||
block = block.Next;
|
||||
|
||||
while (block != null && block.Operations.Count == 0)
|
||||
{
|
||||
if (block.HasBranch)
|
||||
{
|
||||
throw new InvalidOperationException("Found a bogus empty block that \"ends with a branch\".");
|
||||
}
|
||||
|
||||
block = block.Next;
|
||||
}
|
||||
|
||||
return block;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,346 @@
|
|||
using Ryujinx.Common.Utilities;
|
||||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||
{
|
||||
static class ConstantFolding
|
||||
{
|
||||
public static void RunPass(Operation operation)
|
||||
{
|
||||
if (!AreAllSourcesConstant(operation))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
switch (operation.Inst)
|
||||
{
|
||||
case Instruction.Add:
|
||||
EvaluateBinary(operation, (x, y) => x + y);
|
||||
break;
|
||||
|
||||
case Instruction.BitCount:
|
||||
EvaluateUnary(operation, (x) => BitCount(x));
|
||||
break;
|
||||
|
||||
case Instruction.BitwiseAnd:
|
||||
EvaluateBinary(operation, (x, y) => x & y);
|
||||
break;
|
||||
|
||||
case Instruction.BitwiseExclusiveOr:
|
||||
EvaluateBinary(operation, (x, y) => x ^ y);
|
||||
break;
|
||||
|
||||
case Instruction.BitwiseNot:
|
||||
EvaluateUnary(operation, (x) => ~x);
|
||||
break;
|
||||
|
||||
case Instruction.BitwiseOr:
|
||||
EvaluateBinary(operation, (x, y) => x | y);
|
||||
break;
|
||||
|
||||
case Instruction.BitfieldExtractS32:
|
||||
BitfieldExtractS32(operation);
|
||||
break;
|
||||
|
||||
case Instruction.BitfieldExtractU32:
|
||||
BitfieldExtractU32(operation);
|
||||
break;
|
||||
|
||||
case Instruction.Clamp:
|
||||
EvaluateTernary(operation, (x, y, z) => Math.Clamp(x, y, z));
|
||||
break;
|
||||
|
||||
case Instruction.ClampU32:
|
||||
EvaluateTernary(operation, (x, y, z) => (int)Math.Clamp((uint)x, (uint)y, (uint)z));
|
||||
break;
|
||||
|
||||
case Instruction.CompareEqual:
|
||||
EvaluateBinary(operation, (x, y) => x == y);
|
||||
break;
|
||||
|
||||
case Instruction.CompareGreater:
|
||||
EvaluateBinary(operation, (x, y) => x > y);
|
||||
break;
|
||||
|
||||
case Instruction.CompareGreaterOrEqual:
|
||||
EvaluateBinary(operation, (x, y) => x >= y);
|
||||
break;
|
||||
|
||||
case Instruction.CompareGreaterOrEqualU32:
|
||||
EvaluateBinary(operation, (x, y) => (uint)x >= (uint)y);
|
||||
break;
|
||||
|
||||
case Instruction.CompareGreaterU32:
|
||||
EvaluateBinary(operation, (x, y) => (uint)x > (uint)y);
|
||||
break;
|
||||
|
||||
case Instruction.CompareLess:
|
||||
EvaluateBinary(operation, (x, y) => x < y);
|
||||
break;
|
||||
|
||||
case Instruction.CompareLessOrEqual:
|
||||
EvaluateBinary(operation, (x, y) => x <= y);
|
||||
break;
|
||||
|
||||
case Instruction.CompareLessOrEqualU32:
|
||||
EvaluateBinary(operation, (x, y) => (uint)x <= (uint)y);
|
||||
break;
|
||||
|
||||
case Instruction.CompareLessU32:
|
||||
EvaluateBinary(operation, (x, y) => (uint)x < (uint)y);
|
||||
break;
|
||||
|
||||
case Instruction.CompareNotEqual:
|
||||
EvaluateBinary(operation, (x, y) => x != y);
|
||||
break;
|
||||
|
||||
case Instruction.Divide:
|
||||
EvaluateBinary(operation, (x, y) => y != 0 ? x / y : 0);
|
||||
break;
|
||||
|
||||
case Instruction.FP32 | Instruction.Add:
|
||||
EvaluateFPBinary(operation, (x, y) => x + y);
|
||||
break;
|
||||
|
||||
case Instruction.FP32 | Instruction.Clamp:
|
||||
EvaluateFPTernary(operation, (x, y, z) => Math.Clamp(x, y, z));
|
||||
break;
|
||||
|
||||
case Instruction.FP32 | Instruction.CompareEqual:
|
||||
EvaluateFPBinary(operation, (x, y) => x == y);
|
||||
break;
|
||||
|
||||
case Instruction.FP32 | Instruction.CompareGreater:
|
||||
EvaluateFPBinary(operation, (x, y) => x > y);
|
||||
break;
|
||||
|
||||
case Instruction.FP32 | Instruction.CompareGreaterOrEqual:
|
||||
EvaluateFPBinary(operation, (x, y) => x >= y);
|
||||
break;
|
||||
|
||||
case Instruction.FP32 | Instruction.CompareLess:
|
||||
EvaluateFPBinary(operation, (x, y) => x < y);
|
||||
break;
|
||||
|
||||
case Instruction.FP32 | Instruction.CompareLessOrEqual:
|
||||
EvaluateFPBinary(operation, (x, y) => x <= y);
|
||||
break;
|
||||
|
||||
case Instruction.FP32 | Instruction.CompareNotEqual:
|
||||
EvaluateFPBinary(operation, (x, y) => x != y);
|
||||
break;
|
||||
|
||||
case Instruction.FP32 | Instruction.Divide:
|
||||
EvaluateFPBinary(operation, (x, y) => x / y);
|
||||
break;
|
||||
|
||||
case Instruction.FP32 | Instruction.Multiply:
|
||||
EvaluateFPBinary(operation, (x, y) => x * y);
|
||||
break;
|
||||
|
||||
case Instruction.FP32 | Instruction.Negate:
|
||||
EvaluateFPUnary(operation, (x) => -x);
|
||||
break;
|
||||
|
||||
case Instruction.FP32 | Instruction.Subtract:
|
||||
EvaluateFPBinary(operation, (x, y) => x - y);
|
||||
break;
|
||||
|
||||
case Instruction.IsNan:
|
||||
EvaluateFPUnary(operation, (x) => float.IsNaN(x));
|
||||
break;
|
||||
|
||||
case Instruction.LoadConstant:
|
||||
operation.TurnIntoCopy(Cbuf(operation.GetSource(0).Value, operation.GetSource(1).Value));
|
||||
break;
|
||||
|
||||
case Instruction.Maximum:
|
||||
EvaluateBinary(operation, (x, y) => Math.Max(x, y));
|
||||
break;
|
||||
|
||||
case Instruction.MaximumU32:
|
||||
EvaluateBinary(operation, (x, y) => (int)Math.Max((uint)x, (uint)y));
|
||||
break;
|
||||
|
||||
case Instruction.Minimum:
|
||||
EvaluateBinary(operation, (x, y) => Math.Min(x, y));
|
||||
break;
|
||||
|
||||
case Instruction.MinimumU32:
|
||||
EvaluateBinary(operation, (x, y) => (int)Math.Min((uint)x, (uint)y));
|
||||
break;
|
||||
|
||||
case Instruction.Multiply:
|
||||
EvaluateBinary(operation, (x, y) => x * y);
|
||||
break;
|
||||
|
||||
case Instruction.Negate:
|
||||
EvaluateUnary(operation, (x) => -x);
|
||||
break;
|
||||
|
||||
case Instruction.ShiftLeft:
|
||||
EvaluateBinary(operation, (x, y) => x << y);
|
||||
break;
|
||||
|
||||
case Instruction.ShiftRightS32:
|
||||
EvaluateBinary(operation, (x, y) => x >> y);
|
||||
break;
|
||||
|
||||
case Instruction.ShiftRightU32:
|
||||
EvaluateBinary(operation, (x, y) => (int)((uint)x >> y));
|
||||
break;
|
||||
|
||||
case Instruction.Subtract:
|
||||
EvaluateBinary(operation, (x, y) => x - y);
|
||||
break;
|
||||
|
||||
case Instruction.UnpackHalf2x16:
|
||||
UnpackHalf2x16(operation);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static bool AreAllSourcesConstant(Operation operation)
|
||||
{
|
||||
for (int index = 0; index < operation.SourcesCount; index++)
|
||||
{
|
||||
if (operation.GetSource(index).Type != OperandType.Constant)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static int BitCount(int value)
|
||||
{
|
||||
int count = 0;
|
||||
|
||||
for (int bit = 0; bit < 32; bit++)
|
||||
{
|
||||
if (value.Extract(bit))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
private static void BitfieldExtractS32(Operation operation)
|
||||
{
|
||||
int value = GetBitfieldExtractValue(operation);
|
||||
|
||||
int shift = 32 - operation.GetSource(2).Value;
|
||||
|
||||
value = (value << shift) >> shift;
|
||||
|
||||
operation.TurnIntoCopy(Const(value));
|
||||
}
|
||||
|
||||
private static void BitfieldExtractU32(Operation operation)
|
||||
{
|
||||
operation.TurnIntoCopy(Const(GetBitfieldExtractValue(operation)));
|
||||
}
|
||||
|
||||
private static int GetBitfieldExtractValue(Operation operation)
|
||||
{
|
||||
int value = operation.GetSource(0).Value;
|
||||
int lsb = operation.GetSource(1).Value;
|
||||
int length = operation.GetSource(2).Value;
|
||||
|
||||
return value.Extract(lsb, length);
|
||||
}
|
||||
|
||||
private static void UnpackHalf2x16(Operation operation)
|
||||
{
|
||||
int value = operation.GetSource(0).Value;
|
||||
|
||||
value = (value >> operation.Index * 16) & 0xffff;
|
||||
|
||||
operation.TurnIntoCopy(ConstF((float)BitConverter.UInt16BitsToHalf((ushort)value)));
|
||||
}
|
||||
|
||||
private static void FPNegate(Operation operation)
|
||||
{
|
||||
float value = operation.GetSource(0).AsFloat();
|
||||
|
||||
operation.TurnIntoCopy(ConstF(-value));
|
||||
}
|
||||
|
||||
private static void EvaluateUnary(Operation operation, Func<int, int> op)
|
||||
{
|
||||
int x = operation.GetSource(0).Value;
|
||||
|
||||
operation.TurnIntoCopy(Const(op(x)));
|
||||
}
|
||||
|
||||
private static void EvaluateFPUnary(Operation operation, Func<float, float> op)
|
||||
{
|
||||
float x = operation.GetSource(0).AsFloat();
|
||||
|
||||
operation.TurnIntoCopy(ConstF(op(x)));
|
||||
}
|
||||
|
||||
private static void EvaluateFPUnary(Operation operation, Func<float, bool> op)
|
||||
{
|
||||
float x = operation.GetSource(0).AsFloat();
|
||||
|
||||
operation.TurnIntoCopy(Const(op(x) ? IrConsts.True : IrConsts.False));
|
||||
}
|
||||
|
||||
private static void EvaluateBinary(Operation operation, Func<int, int, int> op)
|
||||
{
|
||||
int x = operation.GetSource(0).Value;
|
||||
int y = operation.GetSource(1).Value;
|
||||
|
||||
operation.TurnIntoCopy(Const(op(x, y)));
|
||||
}
|
||||
|
||||
private static void EvaluateBinary(Operation operation, Func<int, int, bool> op)
|
||||
{
|
||||
int x = operation.GetSource(0).Value;
|
||||
int y = operation.GetSource(1).Value;
|
||||
|
||||
operation.TurnIntoCopy(Const(op(x, y) ? IrConsts.True : IrConsts.False));
|
||||
}
|
||||
|
||||
private static void EvaluateFPBinary(Operation operation, Func<float, float, float> op)
|
||||
{
|
||||
float x = operation.GetSource(0).AsFloat();
|
||||
float y = operation.GetSource(1).AsFloat();
|
||||
|
||||
operation.TurnIntoCopy(ConstF(op(x, y)));
|
||||
}
|
||||
|
||||
private static void EvaluateFPBinary(Operation operation, Func<float, float, bool> op)
|
||||
{
|
||||
float x = operation.GetSource(0).AsFloat();
|
||||
float y = operation.GetSource(1).AsFloat();
|
||||
|
||||
operation.TurnIntoCopy(Const(op(x, y) ? IrConsts.True : IrConsts.False));
|
||||
}
|
||||
|
||||
private static void EvaluateTernary(Operation operation, Func<int, int, int, int> op)
|
||||
{
|
||||
int x = operation.GetSource(0).Value;
|
||||
int y = operation.GetSource(1).Value;
|
||||
int z = operation.GetSource(2).Value;
|
||||
|
||||
operation.TurnIntoCopy(Const(op(x, y, z)));
|
||||
}
|
||||
|
||||
private static void EvaluateFPTernary(Operation operation, Func<float, float, float, float> op)
|
||||
{
|
||||
float x = operation.GetSource(0).AsFloat();
|
||||
float y = operation.GetSource(1).AsFloat();
|
||||
float z = operation.GetSource(2).AsFloat();
|
||||
|
||||
operation.TurnIntoCopy(ConstF(op(x, y, z)));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,433 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||
{
|
||||
static class GlobalToStorage
|
||||
{
|
||||
public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask)
|
||||
{
|
||||
int sbStart = GetStorageBaseCbOffset(config.Stage);
|
||||
int sbEnd = sbStart + StorageDescsSize;
|
||||
|
||||
int ubeStart = UbeBaseOffset;
|
||||
int ubeEnd = UbeBaseOffset + UbeDescsSize;
|
||||
|
||||
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
|
||||
{
|
||||
for (int index = 0; index < node.Value.SourcesCount; index++)
|
||||
{
|
||||
Operand src = node.Value.GetSource(index);
|
||||
|
||||
int storageIndex = GetStorageIndex(src, sbStart, sbEnd);
|
||||
|
||||
if (storageIndex >= 0)
|
||||
{
|
||||
sbUseMask |= 1 << storageIndex;
|
||||
}
|
||||
|
||||
if (config.Stage == ShaderStage.Compute)
|
||||
{
|
||||
int constantIndex = GetStorageIndex(src, ubeStart, ubeEnd);
|
||||
|
||||
if (constantIndex >= 0)
|
||||
{
|
||||
ubeUseMask |= 1 << constantIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!(node.Value is Operation operation))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (UsesGlobalMemory(operation.Inst, operation.StorageKind))
|
||||
{
|
||||
Operand source = operation.GetSource(0);
|
||||
|
||||
int storageIndex = SearchForStorageBase(block, source, sbStart, sbEnd);
|
||||
|
||||
if (storageIndex >= 0)
|
||||
{
|
||||
// Storage buffers are implemented using global memory access.
|
||||
// If we know from where the base address of the access is loaded,
|
||||
// we can guess which storage buffer it is accessing.
|
||||
// We can then replace the global memory access with a storage
|
||||
// buffer access.
|
||||
node = ReplaceGlobalWithStorage(block, node, config, storageIndex);
|
||||
}
|
||||
else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal)
|
||||
{
|
||||
// Here we effectively try to replace a LDG instruction with LDC.
|
||||
// The hardware only supports a limited amount of constant buffers
|
||||
// so NVN "emulates" more constant buffers using global memory access.
|
||||
// Here we try to replace the global access back to a constant buffer
|
||||
// load.
|
||||
storageIndex = SearchForStorageBase(block, source, ubeStart, ubeStart + ubeEnd);
|
||||
|
||||
if (storageIndex >= 0)
|
||||
{
|
||||
node = ReplaceLdgWithLdc(node, config, storageIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> ReplaceGlobalWithStorage(BasicBlock block, LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
|
||||
{
|
||||
Operation operation = (Operation)node.Value;
|
||||
|
||||
bool isAtomic = operation.Inst.IsAtomic();
|
||||
bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
|
||||
bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
|
||||
|
||||
config.SetUsedStorageBuffer(storageIndex, isWrite);
|
||||
|
||||
Operand[] sources = new Operand[operation.SourcesCount];
|
||||
|
||||
sources[0] = Const(storageIndex);
|
||||
sources[1] = GetStorageOffset(block, node, config, storageIndex, operation.GetSource(0), isStg16Or8);
|
||||
|
||||
for (int index = 2; index < operation.SourcesCount; index++)
|
||||
{
|
||||
sources[index] = operation.GetSource(index);
|
||||
}
|
||||
|
||||
Operation storageOp;
|
||||
|
||||
if (isAtomic)
|
||||
{
|
||||
storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources);
|
||||
}
|
||||
else if (operation.Inst == Instruction.LoadGlobal)
|
||||
{
|
||||
storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
|
||||
}
|
||||
else
|
||||
{
|
||||
Instruction storeInst = operation.Inst switch
|
||||
{
|
||||
Instruction.StoreGlobal16 => Instruction.StoreStorage16,
|
||||
Instruction.StoreGlobal8 => Instruction.StoreStorage8,
|
||||
_ => Instruction.StoreStorage
|
||||
};
|
||||
|
||||
storageOp = new Operation(storeInst, null, sources);
|
||||
}
|
||||
|
||||
for (int index = 0; index < operation.SourcesCount; index++)
|
||||
{
|
||||
operation.SetSource(index, null);
|
||||
}
|
||||
|
||||
LinkedListNode<INode> oldNode = node;
|
||||
|
||||
node = node.List.AddBefore(node, storageOp);
|
||||
|
||||
node.List.Remove(oldNode);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
private static Operand GetStorageOffset(
|
||||
BasicBlock block,
|
||||
LinkedListNode<INode> node,
|
||||
ShaderConfig config,
|
||||
int storageIndex,
|
||||
Operand addrLow,
|
||||
bool isStg16Or8)
|
||||
{
|
||||
int baseAddressCbOffset = GetStorageCbOffset(config.Stage, storageIndex);
|
||||
|
||||
bool storageAligned = !(config.GpuAccessor.QueryHasUnalignedStorageBuffer() || config.GpuAccessor.QueryHostStorageBufferOffsetAlignment() > Constants.StorageAlignment);
|
||||
|
||||
(Operand byteOffset, int constantOffset) = storageAligned ?
|
||||
GetStorageOffset(block, Utils.FindLastOperation(addrLow, block), baseAddressCbOffset) :
|
||||
(null, 0);
|
||||
|
||||
if (byteOffset != null)
|
||||
{
|
||||
ReplaceAddressAlignment(node.List, addrLow, byteOffset, constantOffset);
|
||||
}
|
||||
|
||||
if (byteOffset == null)
|
||||
{
|
||||
Operand baseAddrLow = Cbuf(0, baseAddressCbOffset);
|
||||
Operand baseAddrTrunc = Local();
|
||||
|
||||
Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
|
||||
|
||||
Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
|
||||
|
||||
node.List.AddBefore(node, andOp);
|
||||
|
||||
Operand offset = Local();
|
||||
Operation subOp = new Operation(Instruction.Subtract, offset, addrLow, baseAddrTrunc);
|
||||
|
||||
node.List.AddBefore(node, subOp);
|
||||
|
||||
byteOffset = offset;
|
||||
}
|
||||
else if (constantOffset != 0)
|
||||
{
|
||||
Operand offset = Local();
|
||||
Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
|
||||
|
||||
node.List.AddBefore(node, addOp);
|
||||
|
||||
byteOffset = offset;
|
||||
}
|
||||
|
||||
if (isStg16Or8)
|
||||
{
|
||||
return byteOffset;
|
||||
}
|
||||
|
||||
Operand wordOffset = Local();
|
||||
Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
|
||||
|
||||
node.List.AddBefore(node, shrOp);
|
||||
|
||||
return wordOffset;
|
||||
}
|
||||
|
||||
private static bool IsCb0Offset(Operand operand, int offset)
|
||||
{
|
||||
return operand.Type == OperandType.ConstantBuffer && operand.GetCbufSlot() == 0 && operand.GetCbufOffset() == offset;
|
||||
}
|
||||
|
||||
private static void ReplaceAddressAlignment(LinkedList<INode> list, Operand address, Operand byteOffset, int constantOffset)
|
||||
{
|
||||
// When we emit 16/8-bit LDG, we add extra code to determine the address alignment.
|
||||
// Eliminate the storage buffer base address from this too, leaving only the byte offset.
|
||||
|
||||
foreach (INode useNode in address.UseOps)
|
||||
{
|
||||
if (useNode is Operation op && op.Inst == Instruction.BitwiseAnd)
|
||||
{
|
||||
Operand src1 = op.GetSource(0);
|
||||
Operand src2 = op.GetSource(1);
|
||||
|
||||
int addressIndex = -1;
|
||||
|
||||
if (src1 == address && src2.Type == OperandType.Constant && src2.Value == 3)
|
||||
{
|
||||
addressIndex = 0;
|
||||
}
|
||||
else if (src2 == address && src1.Type == OperandType.Constant && src1.Value == 3)
|
||||
{
|
||||
addressIndex = 1;
|
||||
}
|
||||
|
||||
if (addressIndex != -1)
|
||||
{
|
||||
LinkedListNode<INode> node = list.Find(op);
|
||||
|
||||
// Add offset calculation before the use. Needs to be on the same block.
|
||||
if (node != null)
|
||||
{
|
||||
Operand offset = Local();
|
||||
Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
|
||||
list.AddBefore(node, addOp);
|
||||
|
||||
op.SetSource(addressIndex, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static (Operand, int) GetStorageOffset(BasicBlock block, Operand address, int baseAddressCbOffset)
|
||||
{
|
||||
if (IsCb0Offset(address, baseAddressCbOffset))
|
||||
{
|
||||
// Direct offset: zero.
|
||||
return (Const(0), 0);
|
||||
}
|
||||
|
||||
(address, int constantOffset) = GetStorageConstantOffset(block, address);
|
||||
|
||||
address = Utils.FindLastOperation(address, block);
|
||||
|
||||
if (IsCb0Offset(address, baseAddressCbOffset))
|
||||
{
|
||||
// Only constant offset
|
||||
return (Const(0), constantOffset);
|
||||
}
|
||||
|
||||
if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
|
||||
{
|
||||
return (null, 0);
|
||||
}
|
||||
|
||||
Operand src1 = offsetAdd.GetSource(0);
|
||||
Operand src2 = Utils.FindLastOperation(offsetAdd.GetSource(1), block);
|
||||
|
||||
if (IsCb0Offset(src2, baseAddressCbOffset))
|
||||
{
|
||||
return (src1, constantOffset);
|
||||
}
|
||||
else if (IsCb0Offset(src1, baseAddressCbOffset))
|
||||
{
|
||||
return (src2, constantOffset);
|
||||
}
|
||||
|
||||
return (null, 0);
|
||||
}
|
||||
|
||||
private static (Operand, int) GetStorageConstantOffset(BasicBlock block, Operand address)
|
||||
{
|
||||
if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
|
||||
{
|
||||
return (address, 0);
|
||||
}
|
||||
|
||||
Operand src1 = offsetAdd.GetSource(0);
|
||||
Operand src2 = offsetAdd.GetSource(1);
|
||||
|
||||
if (src2.Type != OperandType.Constant)
|
||||
{
|
||||
return (address, 0);
|
||||
}
|
||||
|
||||
return (src1, src2.Value);
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> ReplaceLdgWithLdc(LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
|
||||
{
|
||||
Operation operation = (Operation)node.Value;
|
||||
|
||||
Operand GetCbufOffset()
|
||||
{
|
||||
Operand addrLow = operation.GetSource(0);
|
||||
|
||||
Operand baseAddrLow = Cbuf(0, UbeBaseOffset + storageIndex * StorageDescSize);
|
||||
|
||||
Operand baseAddrTrunc = Local();
|
||||
|
||||
Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
|
||||
|
||||
Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
|
||||
|
||||
node.List.AddBefore(node, andOp);
|
||||
|
||||
Operand byteOffset = Local();
|
||||
Operand wordOffset = Local();
|
||||
|
||||
Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
|
||||
Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
|
||||
|
||||
node.List.AddBefore(node, subOp);
|
||||
node.List.AddBefore(node, shrOp);
|
||||
|
||||
return wordOffset;
|
||||
}
|
||||
|
||||
Operand[] sources = new Operand[operation.SourcesCount];
|
||||
|
||||
int cbSlot = UbeFirstCbuf + storageIndex;
|
||||
|
||||
sources[0] = Const(cbSlot);
|
||||
sources[1] = GetCbufOffset();
|
||||
|
||||
config.SetUsedConstantBuffer(cbSlot);
|
||||
|
||||
for (int index = 2; index < operation.SourcesCount; index++)
|
||||
{
|
||||
sources[index] = operation.GetSource(index);
|
||||
}
|
||||
|
||||
Operation ldcOp = new Operation(Instruction.LoadConstant, operation.Dest, sources);
|
||||
|
||||
for (int index = 0; index < operation.SourcesCount; index++)
|
||||
{
|
||||
operation.SetSource(index, null);
|
||||
}
|
||||
|
||||
LinkedListNode<INode> oldNode = node;
|
||||
|
||||
node = node.List.AddBefore(node, ldcOp);
|
||||
|
||||
node.List.Remove(oldNode);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
private static int SearchForStorageBase(BasicBlock block, Operand globalAddress, int sbStart, int sbEnd)
|
||||
{
|
||||
globalAddress = Utils.FindLastOperation(globalAddress, block);
|
||||
|
||||
if (globalAddress.Type == OperandType.ConstantBuffer)
|
||||
{
|
||||
return GetStorageIndex(globalAddress, sbStart, sbEnd);
|
||||
}
|
||||
|
||||
Operation operation = globalAddress.AsgOp as Operation;
|
||||
|
||||
if (operation == null || operation.Inst != Instruction.Add)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
Operand src1 = operation.GetSource(0);
|
||||
Operand src2 = operation.GetSource(1);
|
||||
|
||||
if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) ||
|
||||
(src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant))
|
||||
{
|
||||
if (src1.Type == OperandType.LocalVariable)
|
||||
{
|
||||
operation = Utils.FindLastOperation(src1, block).AsgOp as Operation;
|
||||
}
|
||||
else
|
||||
{
|
||||
operation = Utils.FindLastOperation(src2, block).AsgOp as Operation;
|
||||
}
|
||||
|
||||
if (operation == null || operation.Inst != Instruction.Add)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for (int index = 0; index < operation.SourcesCount; index++)
|
||||
{
|
||||
Operand source = operation.GetSource(index);
|
||||
|
||||
int storageIndex = GetStorageIndex(source, sbStart, sbEnd);
|
||||
|
||||
if (storageIndex != -1)
|
||||
{
|
||||
return storageIndex;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd)
|
||||
{
|
||||
if (operand.Type == OperandType.ConstantBuffer)
|
||||
{
|
||||
int slot = operand.GetCbufSlot();
|
||||
int offset = operand.GetCbufOffset();
|
||||
|
||||
if (slot == 0 && offset >= sbStart && offset < sbEnd)
|
||||
{
|
||||
int storageIndex = (offset - sbStart) / StorageDescSize;
|
||||
|
||||
return storageIndex;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,380 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||
{
|
||||
static class Optimizer
|
||||
{
|
||||
public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
|
||||
{
|
||||
RunOptimizationPasses(blocks);
|
||||
|
||||
int sbUseMask = 0;
|
||||
int ubeUseMask = 0;
|
||||
|
||||
// Those passes are looking for specific patterns and only needs to run once.
|
||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||
{
|
||||
GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask, ref ubeUseMask);
|
||||
BindlessToIndexed.RunPass(blocks[blkIndex], config);
|
||||
BindlessElimination.RunPass(blocks[blkIndex], config);
|
||||
}
|
||||
|
||||
config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
|
||||
|
||||
// Run optimizations one last time to remove any code that is now optimizable after above passes.
|
||||
RunOptimizationPasses(blocks);
|
||||
}
|
||||
|
||||
private static void RunOptimizationPasses(BasicBlock[] blocks)
|
||||
{
|
||||
bool modified;
|
||||
|
||||
do
|
||||
{
|
||||
modified = false;
|
||||
|
||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||
{
|
||||
BasicBlock block = blocks[blkIndex];
|
||||
|
||||
LinkedListNode<INode> node = block.Operations.First;
|
||||
|
||||
while (node != null)
|
||||
{
|
||||
LinkedListNode<INode> nextNode = node.Next;
|
||||
|
||||
bool isUnused = IsUnused(node.Value);
|
||||
|
||||
if (!(node.Value is Operation operation) || isUnused)
|
||||
{
|
||||
if (node.Value is PhiNode phi && !isUnused)
|
||||
{
|
||||
isUnused = PropagatePhi(phi);
|
||||
}
|
||||
|
||||
if (isUnused)
|
||||
{
|
||||
RemoveNode(block, node);
|
||||
|
||||
modified = true;
|
||||
}
|
||||
|
||||
node = nextNode;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
ConstantFolding.RunPass(operation);
|
||||
|
||||
Simplification.RunPass(operation);
|
||||
|
||||
if (DestIsLocalVar(operation))
|
||||
{
|
||||
if (operation.Inst == Instruction.Copy)
|
||||
{
|
||||
PropagateCopy(operation);
|
||||
|
||||
RemoveNode(block, node);
|
||||
|
||||
modified = true;
|
||||
}
|
||||
else if ((operation.Inst == Instruction.PackHalf2x16 && PropagatePack(operation)) ||
|
||||
(operation.Inst == Instruction.ShuffleXor && MatchDdxOrDdy(operation)))
|
||||
{
|
||||
if (DestHasNoUses(operation))
|
||||
{
|
||||
RemoveNode(block, node);
|
||||
}
|
||||
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
|
||||
node = nextNode;
|
||||
}
|
||||
|
||||
if (BranchElimination.RunPass(block))
|
||||
{
|
||||
RemoveNode(block, block.Operations.Last);
|
||||
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
while (modified);
|
||||
}
|
||||
|
||||
private static void PropagateCopy(Operation copyOp)
|
||||
{
|
||||
// Propagate copy source operand to all uses of
|
||||
// the destination operand.
|
||||
|
||||
Operand dest = copyOp.Dest;
|
||||
Operand src = copyOp.GetSource(0);
|
||||
|
||||
INode[] uses = dest.UseOps.ToArray();
|
||||
|
||||
foreach (INode useNode in uses)
|
||||
{
|
||||
for (int index = 0; index < useNode.SourcesCount; index++)
|
||||
{
|
||||
if (useNode.GetSource(index) == dest)
|
||||
{
|
||||
useNode.SetSource(index, src);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static bool PropagatePhi(PhiNode phi)
|
||||
{
|
||||
// If all phi sources are the same, we can propagate it and remove the phi.
|
||||
|
||||
Operand firstSrc = phi.GetSource(0);
|
||||
|
||||
for (int index = 1; index < phi.SourcesCount; index++)
|
||||
{
|
||||
if (!IsSameOperand(firstSrc, phi.GetSource(index)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// All sources are equal, we can propagate the value.
|
||||
|
||||
Operand dest = phi.Dest;
|
||||
|
||||
INode[] uses = dest.UseOps.ToArray();
|
||||
|
||||
foreach (INode useNode in uses)
|
||||
{
|
||||
for (int index = 0; index < useNode.SourcesCount; index++)
|
||||
{
|
||||
if (useNode.GetSource(index) == dest)
|
||||
{
|
||||
useNode.SetSource(index, firstSrc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool IsSameOperand(Operand x, Operand y)
|
||||
{
|
||||
if (x.Type != y.Type || x.Value != y.Value)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: Handle Load operations with the same storage and the same constant parameters.
|
||||
return x.Type == OperandType.Constant || x.Type == OperandType.ConstantBuffer;
|
||||
}
|
||||
|
||||
private static bool PropagatePack(Operation packOp)
|
||||
{
|
||||
// Propagate pack source operands to uses by unpack
|
||||
// instruction. The source depends on the unpack instruction.
|
||||
bool modified = false;
|
||||
|
||||
Operand dest = packOp.Dest;
|
||||
Operand src0 = packOp.GetSource(0);
|
||||
Operand src1 = packOp.GetSource(1);
|
||||
|
||||
INode[] uses = dest.UseOps.ToArray();
|
||||
|
||||
foreach (INode useNode in uses)
|
||||
{
|
||||
if (!(useNode is Operation operation) || operation.Inst != Instruction.UnpackHalf2x16)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (operation.GetSource(0) == dest)
|
||||
{
|
||||
operation.TurnIntoCopy(operation.Index == 1 ? src1 : src0);
|
||||
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
|
||||
return modified;
|
||||
}
|
||||
|
||||
public static bool MatchDdxOrDdy(Operation operation)
|
||||
{
|
||||
// It's assumed that "operation.Inst" is ShuffleXor,
|
||||
// that should be checked before calling this method.
|
||||
Debug.Assert(operation.Inst == Instruction.ShuffleXor);
|
||||
|
||||
bool modified = false;
|
||||
|
||||
Operand src2 = operation.GetSource(1);
|
||||
Operand src3 = operation.GetSource(2);
|
||||
|
||||
if (src2.Type != OperandType.Constant || (src2.Value != 1 && src2.Value != 2))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src3.Type != OperandType.Constant || src3.Value != 0x1c03)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isDdy = src2.Value == 2;
|
||||
bool isDdx = !isDdy;
|
||||
|
||||
// We can replace any use by a FSWZADD with DDX/DDY, when
|
||||
// the following conditions are true:
|
||||
// - The mask should be 0b10100101 for DDY, or 0b10011001 for DDX.
|
||||
// - The first source operand must be the shuffle output.
|
||||
// - The second source operand must be the shuffle first source operand.
|
||||
INode[] uses = operation.Dest.UseOps.ToArray();
|
||||
|
||||
foreach (INode use in uses)
|
||||
{
|
||||
if (!(use is Operation test))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(use is Operation useOp) || useOp.Inst != Instruction.SwizzleAdd)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
Operand fswzaddSrc1 = useOp.GetSource(0);
|
||||
Operand fswzaddSrc2 = useOp.GetSource(1);
|
||||
Operand fswzaddSrc3 = useOp.GetSource(2);
|
||||
|
||||
if (fswzaddSrc1 != operation.Dest)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fswzaddSrc2 != operation.GetSource(0))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fswzaddSrc3.Type != OperandType.Constant)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
int mask = fswzaddSrc3.Value;
|
||||
|
||||
if ((isDdx && mask != 0b10011001) ||
|
||||
(isDdy && mask != 0b10100101))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
useOp.TurnInto(isDdx ? Instruction.Ddx : Instruction.Ddy, fswzaddSrc2);
|
||||
|
||||
modified = true;
|
||||
}
|
||||
|
||||
return modified;
|
||||
}
|
||||
|
||||
private static void RemoveNode(BasicBlock block, LinkedListNode<INode> llNode)
|
||||
{
|
||||
// Remove a node from the nodes list, and also remove itself
|
||||
// from all the use lists on the operands that this node uses.
|
||||
block.Operations.Remove(llNode);
|
||||
|
||||
Queue<INode> nodes = new Queue<INode>();
|
||||
|
||||
nodes.Enqueue(llNode.Value);
|
||||
|
||||
while (nodes.TryDequeue(out INode node))
|
||||
{
|
||||
for (int index = 0; index < node.SourcesCount; index++)
|
||||
{
|
||||
Operand src = node.GetSource(index);
|
||||
|
||||
if (src.Type != OperandType.LocalVariable)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (src.UseOps.Remove(node) && src.UseOps.Count == 0)
|
||||
{
|
||||
Debug.Assert(src.AsgOp != null);
|
||||
nodes.Enqueue(src.AsgOp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsUnused(INode node)
|
||||
{
|
||||
return !HasSideEffects(node) && DestIsLocalVar(node) && DestHasNoUses(node);
|
||||
}
|
||||
|
||||
private static bool HasSideEffects(INode node)
|
||||
{
|
||||
if (node is Operation operation)
|
||||
{
|
||||
switch (operation.Inst & Instruction.Mask)
|
||||
{
|
||||
case Instruction.AtomicAdd:
|
||||
case Instruction.AtomicAnd:
|
||||
case Instruction.AtomicCompareAndSwap:
|
||||
case Instruction.AtomicMaxS32:
|
||||
case Instruction.AtomicMaxU32:
|
||||
case Instruction.AtomicMinS32:
|
||||
case Instruction.AtomicMinU32:
|
||||
case Instruction.AtomicOr:
|
||||
case Instruction.AtomicSwap:
|
||||
case Instruction.AtomicXor:
|
||||
case Instruction.Call:
|
||||
case Instruction.ImageAtomic:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool DestIsLocalVar(INode node)
|
||||
{
|
||||
if (node.DestsCount == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int index = 0; index < node.DestsCount; index++)
|
||||
{
|
||||
Operand dest = node.GetDest(index);
|
||||
|
||||
if (dest != null && dest.Type != OperandType.LocalVariable)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool DestHasNoUses(INode node)
|
||||
{
|
||||
for (int index = 0; index < node.DestsCount; index++)
|
||||
{
|
||||
Operand dest = node.GetDest(index);
|
||||
|
||||
if (dest != null && dest.UseOps.Count != 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,147 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||
{
|
||||
static class Simplification
|
||||
{
|
||||
private const int AllOnes = ~0;
|
||||
|
||||
public static void RunPass(Operation operation)
|
||||
{
|
||||
switch (operation.Inst)
|
||||
{
|
||||
case Instruction.Add:
|
||||
case Instruction.BitwiseExclusiveOr:
|
||||
TryEliminateBinaryOpCommutative(operation, 0);
|
||||
break;
|
||||
|
||||
case Instruction.BitwiseAnd:
|
||||
TryEliminateBitwiseAnd(operation);
|
||||
break;
|
||||
|
||||
case Instruction.BitwiseOr:
|
||||
TryEliminateBitwiseOr(operation);
|
||||
break;
|
||||
|
||||
case Instruction.ConditionalSelect:
|
||||
TryEliminateConditionalSelect(operation);
|
||||
break;
|
||||
|
||||
case Instruction.Divide:
|
||||
TryEliminateBinaryOpY(operation, 1);
|
||||
break;
|
||||
|
||||
case Instruction.Multiply:
|
||||
TryEliminateBinaryOpCommutative(operation, 1);
|
||||
break;
|
||||
|
||||
case Instruction.ShiftLeft:
|
||||
case Instruction.ShiftRightS32:
|
||||
case Instruction.ShiftRightU32:
|
||||
case Instruction.Subtract:
|
||||
TryEliminateBinaryOpY(operation, 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static void TryEliminateBitwiseAnd(Operation operation)
|
||||
{
|
||||
// Try to recognize and optimize those 3 patterns (in order):
|
||||
// x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
|
||||
// x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
|
||||
Operand x = operation.GetSource(0);
|
||||
Operand y = operation.GetSource(1);
|
||||
|
||||
if (IsConstEqual(x, AllOnes))
|
||||
{
|
||||
operation.TurnIntoCopy(y);
|
||||
}
|
||||
else if (IsConstEqual(y, AllOnes))
|
||||
{
|
||||
operation.TurnIntoCopy(x);
|
||||
}
|
||||
else if (IsConstEqual(x, 0) || IsConstEqual(y, 0))
|
||||
{
|
||||
operation.TurnIntoCopy(Const(0));
|
||||
}
|
||||
}
|
||||
|
||||
private static void TryEliminateBitwiseOr(Operation operation)
|
||||
{
|
||||
// Try to recognize and optimize those 3 patterns (in order):
|
||||
// x | 0x00000000 == x, 0x00000000 | y == y,
|
||||
// x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
|
||||
Operand x = operation.GetSource(0);
|
||||
Operand y = operation.GetSource(1);
|
||||
|
||||
if (IsConstEqual(x, 0))
|
||||
{
|
||||
operation.TurnIntoCopy(y);
|
||||
}
|
||||
else if (IsConstEqual(y, 0))
|
||||
{
|
||||
operation.TurnIntoCopy(x);
|
||||
}
|
||||
else if (IsConstEqual(x, AllOnes) || IsConstEqual(y, AllOnes))
|
||||
{
|
||||
operation.TurnIntoCopy(Const(AllOnes));
|
||||
}
|
||||
}
|
||||
|
||||
private static void TryEliminateBinaryOpY(Operation operation, int comparand)
|
||||
{
|
||||
Operand x = operation.GetSource(0);
|
||||
Operand y = operation.GetSource(1);
|
||||
|
||||
if (IsConstEqual(y, comparand))
|
||||
{
|
||||
operation.TurnIntoCopy(x);
|
||||
}
|
||||
}
|
||||
|
||||
private static void TryEliminateBinaryOpCommutative(Operation operation, int comparand)
|
||||
{
|
||||
Operand x = operation.GetSource(0);
|
||||
Operand y = operation.GetSource(1);
|
||||
|
||||
if (IsConstEqual(x, comparand))
|
||||
{
|
||||
operation.TurnIntoCopy(y);
|
||||
}
|
||||
else if (IsConstEqual(y, comparand))
|
||||
{
|
||||
operation.TurnIntoCopy(x);
|
||||
}
|
||||
}
|
||||
|
||||
private static void TryEliminateConditionalSelect(Operation operation)
|
||||
{
|
||||
Operand cond = operation.GetSource(0);
|
||||
|
||||
if (cond.Type != OperandType.Constant)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// The condition is constant, we can turn it into a copy, and select
|
||||
// the source based on the condition value.
|
||||
int srcIndex = cond.Value != 0 ? 1 : 2;
|
||||
|
||||
Operand source = operation.GetSource(srcIndex);
|
||||
|
||||
operation.TurnIntoCopy(source);
|
||||
}
|
||||
|
||||
private static bool IsConstEqual(Operand operand, int comparand)
|
||||
{
|
||||
if (operand.Type != OperandType.Constant)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return operand.Value == comparand;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||
{
|
||||
static class Utils
|
||||
{
|
||||
private static Operation FindBranchSource(BasicBlock block)
|
||||
{
|
||||
foreach (BasicBlock sourceBlock in block.Predecessors)
|
||||
{
|
||||
if (sourceBlock.Operations.Count > 0)
|
||||
{
|
||||
if (sourceBlock.GetLastOp() is Operation lastOp && IsConditionalBranch(lastOp.Inst) && sourceBlock.Next == block)
|
||||
{
|
||||
return lastOp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool IsConditionalBranch(Instruction inst)
|
||||
{
|
||||
return inst == Instruction.BranchIfFalse || inst == Instruction.BranchIfTrue;
|
||||
}
|
||||
|
||||
private static bool BlockConditionsMatch(BasicBlock currentBlock, BasicBlock queryBlock)
|
||||
{
|
||||
// Check if all the conditions for the query block are satisfied by the current block.
|
||||
// Just checks the top-most conditional for now.
|
||||
|
||||
Operation currentBranch = FindBranchSource(currentBlock);
|
||||
Operation queryBranch = FindBranchSource(queryBlock);
|
||||
|
||||
Operand currentCondition = currentBranch?.GetSource(0);
|
||||
Operand queryCondition = queryBranch?.GetSource(0);
|
||||
|
||||
// The condition should be the same operand instance.
|
||||
|
||||
return currentBranch != null && queryBranch != null &&
|
||||
currentBranch.Inst == queryBranch.Inst &&
|
||||
currentCondition == queryCondition;
|
||||
}
|
||||
|
||||
public static Operand FindLastOperation(Operand source, BasicBlock block)
|
||||
{
|
||||
if (source.AsgOp is PhiNode phiNode)
|
||||
{
|
||||
// This source can have a different value depending on a previous branch.
|
||||
// Ensure that conditions met for that branch are also met for the current one.
|
||||
// Prefer the latest sources for the phi node.
|
||||
|
||||
for (int i = phiNode.SourcesCount - 1; i >= 0; i--)
|
||||
{
|
||||
BasicBlock phiBlock = phiNode.GetBlock(i);
|
||||
|
||||
if (BlockConditionsMatch(block, phiBlock))
|
||||
{
|
||||
return phiNode.GetSource(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return source;
|
||||
}
|
||||
}
|
||||
}
|
486
src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs
Normal file
486
src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs
Normal file
|
@ -0,0 +1,486 @@
|
|||
using Ryujinx.Graphics.Shader.Decoders;
|
||||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
static class RegisterUsage
|
||||
{
|
||||
private const int RegsCount = 256;
|
||||
private const int RegsMask = RegsCount - 1;
|
||||
|
||||
private const int GprMasks = 4;
|
||||
private const int PredMasks = 1;
|
||||
private const int FlagMasks = 1;
|
||||
private const int TotalMasks = GprMasks + PredMasks + FlagMasks;
|
||||
|
||||
private struct RegisterMask : IEquatable<RegisterMask>
|
||||
{
|
||||
public long GprMask0 { get; set; }
|
||||
public long GprMask1 { get; set; }
|
||||
public long GprMask2 { get; set; }
|
||||
public long GprMask3 { get; set; }
|
||||
public long PredMask { get; set; }
|
||||
public long FlagMask { get; set; }
|
||||
|
||||
public RegisterMask(long gprMask0, long gprMask1, long gprMask2, long gprMask3, long predMask, long flagMask)
|
||||
{
|
||||
GprMask0 = gprMask0;
|
||||
GprMask1 = gprMask1;
|
||||
GprMask2 = gprMask2;
|
||||
GprMask3 = gprMask3;
|
||||
PredMask = predMask;
|
||||
FlagMask = flagMask;
|
||||
}
|
||||
|
||||
public long GetMask(int index)
|
||||
{
|
||||
return index switch
|
||||
{
|
||||
0 => GprMask0,
|
||||
1 => GprMask1,
|
||||
2 => GprMask2,
|
||||
3 => GprMask3,
|
||||
4 => PredMask,
|
||||
5 => FlagMask,
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(index))
|
||||
};
|
||||
}
|
||||
|
||||
public static RegisterMask operator &(RegisterMask x, RegisterMask y)
|
||||
{
|
||||
return new RegisterMask(
|
||||
x.GprMask0 & y.GprMask0,
|
||||
x.GprMask1 & y.GprMask1,
|
||||
x.GprMask2 & y.GprMask2,
|
||||
x.GprMask3 & y.GprMask3,
|
||||
x.PredMask & y.PredMask,
|
||||
x.FlagMask & y.FlagMask);
|
||||
}
|
||||
|
||||
public static RegisterMask operator |(RegisterMask x, RegisterMask y)
|
||||
{
|
||||
return new RegisterMask(
|
||||
x.GprMask0 | y.GprMask0,
|
||||
x.GprMask1 | y.GprMask1,
|
||||
x.GprMask2 | y.GprMask2,
|
||||
x.GprMask3 | y.GprMask3,
|
||||
x.PredMask | y.PredMask,
|
||||
x.FlagMask | y.FlagMask);
|
||||
}
|
||||
|
||||
public static RegisterMask operator ~(RegisterMask x)
|
||||
{
|
||||
return new RegisterMask(
|
||||
~x.GprMask0,
|
||||
~x.GprMask1,
|
||||
~x.GprMask2,
|
||||
~x.GprMask3,
|
||||
~x.PredMask,
|
||||
~x.FlagMask);
|
||||
}
|
||||
|
||||
public static bool operator ==(RegisterMask x, RegisterMask y)
|
||||
{
|
||||
return x.Equals(y);
|
||||
}
|
||||
|
||||
public static bool operator !=(RegisterMask x, RegisterMask y)
|
||||
{
|
||||
return !x.Equals(y);
|
||||
}
|
||||
|
||||
public override bool Equals(object obj)
|
||||
{
|
||||
return obj is RegisterMask regMask && Equals(regMask);
|
||||
}
|
||||
|
||||
public bool Equals(RegisterMask other)
|
||||
{
|
||||
return GprMask0 == other.GprMask0 &&
|
||||
GprMask1 == other.GprMask1 &&
|
||||
GprMask2 == other.GprMask2 &&
|
||||
GprMask3 == other.GprMask3 &&
|
||||
PredMask == other.PredMask &&
|
||||
FlagMask == other.FlagMask;
|
||||
}
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return HashCode.Combine(GprMask0, GprMask1, GprMask2, GprMask3, PredMask, FlagMask);
|
||||
}
|
||||
}
|
||||
|
||||
public readonly struct FunctionRegisterUsage
|
||||
{
|
||||
public Register[] InArguments { get; }
|
||||
public Register[] OutArguments { get; }
|
||||
|
||||
public FunctionRegisterUsage(Register[] inArguments, Register[] outArguments)
|
||||
{
|
||||
InArguments = inArguments;
|
||||
OutArguments = outArguments;
|
||||
}
|
||||
}
|
||||
|
||||
public static FunctionRegisterUsage RunPass(ControlFlowGraph cfg)
|
||||
{
|
||||
List<Register> inArguments = new List<Register>();
|
||||
List<Register> outArguments = new List<Register>();
|
||||
|
||||
// Compute local register inputs and outputs used inside blocks.
|
||||
RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Length];
|
||||
RegisterMask[] localOutputs = new RegisterMask[cfg.Blocks.Length];
|
||||
|
||||
foreach (BasicBlock block in cfg.Blocks)
|
||||
{
|
||||
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
|
||||
{
|
||||
Operation operation = node.Value as Operation;
|
||||
|
||||
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
|
||||
{
|
||||
Operand source = operation.GetSource(srcIndex);
|
||||
|
||||
if (source.Type != OperandType.Register)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
Register register = source.GetRegister();
|
||||
|
||||
localInputs[block.Index] |= GetMask(register) & ~localOutputs[block.Index];
|
||||
}
|
||||
|
||||
if (operation.Dest != null && operation.Dest.Type == OperandType.Register)
|
||||
{
|
||||
localOutputs[block.Index] |= GetMask(operation.Dest.GetRegister());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute global register inputs and outputs used across blocks.
|
||||
RegisterMask[] globalCmnOutputs = new RegisterMask[cfg.Blocks.Length];
|
||||
|
||||
RegisterMask[] globalInputs = new RegisterMask[cfg.Blocks.Length];
|
||||
RegisterMask[] globalOutputs = new RegisterMask[cfg.Blocks.Length];
|
||||
|
||||
RegisterMask allOutputs = new RegisterMask();
|
||||
RegisterMask allCmnOutputs = new RegisterMask(-1L, -1L, -1L, -1L, -1L, -1L);
|
||||
|
||||
bool modified;
|
||||
|
||||
bool firstPass = true;
|
||||
|
||||
do
|
||||
{
|
||||
modified = false;
|
||||
|
||||
// Compute register outputs.
|
||||
for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
|
||||
{
|
||||
BasicBlock block = cfg.PostOrderBlocks[index];
|
||||
|
||||
if (block.Predecessors.Count != 0)
|
||||
{
|
||||
BasicBlock predecessor = block.Predecessors[0];
|
||||
|
||||
RegisterMask cmnOutputs = localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
|
||||
|
||||
RegisterMask outputs = globalOutputs[predecessor.Index];
|
||||
|
||||
for (int pIndex = 1; pIndex < block.Predecessors.Count; pIndex++)
|
||||
{
|
||||
predecessor = block.Predecessors[pIndex];
|
||||
|
||||
cmnOutputs &= localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
|
||||
|
||||
outputs |= globalOutputs[predecessor.Index];
|
||||
}
|
||||
|
||||
globalInputs[block.Index] |= outputs & ~cmnOutputs;
|
||||
|
||||
if (!firstPass)
|
||||
{
|
||||
cmnOutputs &= globalCmnOutputs[block.Index];
|
||||
}
|
||||
|
||||
if (EndsWithReturn(block))
|
||||
{
|
||||
allCmnOutputs &= cmnOutputs | localOutputs[block.Index];
|
||||
}
|
||||
|
||||
if (Exchange(globalCmnOutputs, block.Index, cmnOutputs))
|
||||
{
|
||||
modified = true;
|
||||
}
|
||||
|
||||
outputs |= localOutputs[block.Index];
|
||||
|
||||
if (Exchange(globalOutputs, block.Index, globalOutputs[block.Index] | outputs))
|
||||
{
|
||||
allOutputs |= outputs;
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
else if (Exchange(globalOutputs, block.Index, localOutputs[block.Index]))
|
||||
{
|
||||
allOutputs |= localOutputs[block.Index];
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute register inputs.
|
||||
for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
|
||||
{
|
||||
BasicBlock block = cfg.PostOrderBlocks[index];
|
||||
|
||||
RegisterMask inputs = localInputs[block.Index];
|
||||
|
||||
if (block.Next != null)
|
||||
{
|
||||
inputs |= globalInputs[block.Next.Index];
|
||||
}
|
||||
|
||||
if (block.Branch != null)
|
||||
{
|
||||
inputs |= globalInputs[block.Branch.Index];
|
||||
}
|
||||
|
||||
inputs &= ~globalCmnOutputs[block.Index];
|
||||
|
||||
if (Exchange(globalInputs, block.Index, globalInputs[block.Index] | inputs))
|
||||
{
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
|
||||
firstPass = false;
|
||||
}
|
||||
while (modified);
|
||||
|
||||
// Insert load and store context instructions where needed.
|
||||
foreach (BasicBlock block in cfg.Blocks)
|
||||
{
|
||||
// The only block without any predecessor should be the entry block.
|
||||
// It always needs a context load as it is the first block to run.
|
||||
if (block.Predecessors.Count == 0)
|
||||
{
|
||||
RegisterMask inputs = globalInputs[block.Index] | (allOutputs & ~allCmnOutputs);
|
||||
|
||||
LoadLocals(block, inputs, inArguments);
|
||||
}
|
||||
|
||||
if (EndsWithReturn(block))
|
||||
{
|
||||
StoreLocals(block, allOutputs, inArguments.Count, outArguments);
|
||||
}
|
||||
}
|
||||
|
||||
return new FunctionRegisterUsage(inArguments.ToArray(), outArguments.ToArray());
|
||||
}
|
||||
|
||||
public static void FixupCalls(BasicBlock[] blocks, FunctionRegisterUsage[] frus)
|
||||
{
|
||||
foreach (BasicBlock block in blocks)
|
||||
{
|
||||
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
|
||||
{
|
||||
Operation operation = node.Value as Operation;
|
||||
|
||||
if (operation.Inst == Instruction.Call)
|
||||
{
|
||||
Operand funcId = operation.GetSource(0);
|
||||
|
||||
Debug.Assert(funcId.Type == OperandType.Constant);
|
||||
|
||||
var fru = frus[funcId.Value];
|
||||
|
||||
Operand[] inRegs = new Operand[fru.InArguments.Length];
|
||||
|
||||
for (int i = 0; i < fru.InArguments.Length; i++)
|
||||
{
|
||||
inRegs[i] = OperandHelper.Register(fru.InArguments[i]);
|
||||
}
|
||||
|
||||
operation.AppendSources(inRegs);
|
||||
|
||||
Operand[] outRegs = new Operand[1 + fru.OutArguments.Length];
|
||||
|
||||
for (int i = 0; i < fru.OutArguments.Length; i++)
|
||||
{
|
||||
outRegs[1 + i] = OperandHelper.Register(fru.OutArguments[i]);
|
||||
}
|
||||
|
||||
operation.AppendDests(outRegs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static bool StartsWith(BasicBlock block, Instruction inst)
|
||||
{
|
||||
if (block.Operations.Count == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return block.Operations.First.Value is Operation operation && operation.Inst == inst;
|
||||
}
|
||||
|
||||
private static bool EndsWith(BasicBlock block, Instruction inst)
|
||||
{
|
||||
if (block.Operations.Count == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return block.Operations.Last.Value is Operation operation && operation.Inst == inst;
|
||||
}
|
||||
|
||||
private static RegisterMask GetMask(Register register)
|
||||
{
|
||||
Span<long> gprMasks = stackalloc long[4];
|
||||
long predMask = 0;
|
||||
long flagMask = 0;
|
||||
|
||||
switch (register.Type)
|
||||
{
|
||||
case RegisterType.Gpr:
|
||||
gprMasks[register.Index >> 6] = 1L << (register.Index & 0x3f);
|
||||
break;
|
||||
case RegisterType.Predicate:
|
||||
predMask = 1L << register.Index;
|
||||
break;
|
||||
case RegisterType.Flag:
|
||||
flagMask = 1L << register.Index;
|
||||
break;
|
||||
}
|
||||
|
||||
return new RegisterMask(gprMasks[0], gprMasks[1], gprMasks[2], gprMasks[3], predMask, flagMask);
|
||||
}
|
||||
|
||||
private static bool Exchange(RegisterMask[] masks, int blkIndex, RegisterMask value)
|
||||
{
|
||||
RegisterMask oldValue = masks[blkIndex];
|
||||
|
||||
masks[blkIndex] = value;
|
||||
|
||||
return oldValue != value;
|
||||
}
|
||||
|
||||
private static void LoadLocals(BasicBlock block, RegisterMask masks, List<Register> inArguments)
|
||||
{
|
||||
bool fillArgsList = inArguments.Count == 0;
|
||||
LinkedListNode<INode> node = null;
|
||||
int argIndex = 0;
|
||||
|
||||
for (int i = 0; i < TotalMasks; i++)
|
||||
{
|
||||
(RegisterType regType, int baseRegIndex) = GetRegTypeAndBaseIndex(i);
|
||||
long mask = masks.GetMask(i);
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int bit = BitOperations.TrailingZeroCount(mask);
|
||||
|
||||
mask &= ~(1L << bit);
|
||||
|
||||
Register register = new Register(baseRegIndex + bit, regType);
|
||||
|
||||
if (fillArgsList)
|
||||
{
|
||||
inArguments.Add(register);
|
||||
}
|
||||
|
||||
Operation copyOp = new Operation(Instruction.Copy, OperandHelper.Register(register), OperandHelper.Argument(argIndex++));
|
||||
|
||||
if (node == null)
|
||||
{
|
||||
node = block.Operations.AddFirst(copyOp);
|
||||
}
|
||||
else
|
||||
{
|
||||
node = block.Operations.AddAfter(node, copyOp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Debug.Assert(argIndex <= inArguments.Count);
|
||||
}
|
||||
|
||||
private static void StoreLocals(BasicBlock block, RegisterMask masks, int inArgumentsCount, List<Register> outArguments)
|
||||
{
|
||||
LinkedListNode<INode> node = null;
|
||||
int argIndex = inArgumentsCount;
|
||||
bool fillArgsList = outArguments.Count == 0;
|
||||
|
||||
for (int i = 0; i < TotalMasks; i++)
|
||||
{
|
||||
(RegisterType regType, int baseRegIndex) = GetRegTypeAndBaseIndex(i);
|
||||
long mask = masks.GetMask(i);
|
||||
|
||||
while (mask != 0)
|
||||
{
|
||||
int bit = BitOperations.TrailingZeroCount(mask);
|
||||
|
||||
mask &= ~(1L << bit);
|
||||
|
||||
Register register = new Register(baseRegIndex + bit, regType);
|
||||
|
||||
if (fillArgsList)
|
||||
{
|
||||
outArguments.Add(register);
|
||||
}
|
||||
|
||||
Operation copyOp = new Operation(Instruction.Copy, OperandHelper.Argument(argIndex++), OperandHelper.Register(register));
|
||||
|
||||
if (node == null)
|
||||
{
|
||||
node = block.Operations.AddBefore(block.Operations.Last, copyOp);
|
||||
}
|
||||
else
|
||||
{
|
||||
node = block.Operations.AddAfter(node, copyOp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Debug.Assert(argIndex <= inArgumentsCount + outArguments.Count);
|
||||
}
|
||||
|
||||
private static (RegisterType RegType, int BaseRegIndex) GetRegTypeAndBaseIndex(int i)
|
||||
{
|
||||
RegisterType regType = RegisterType.Gpr;
|
||||
int baseRegIndex = 0;
|
||||
|
||||
if (i < GprMasks)
|
||||
{
|
||||
baseRegIndex = i * sizeof(long) * 8;
|
||||
}
|
||||
else if (i == GprMasks)
|
||||
{
|
||||
regType = RegisterType.Predicate;
|
||||
}
|
||||
else
|
||||
{
|
||||
regType = RegisterType.Flag;
|
||||
}
|
||||
|
||||
return (regType, baseRegIndex);
|
||||
}
|
||||
|
||||
private static bool EndsWithReturn(BasicBlock block)
|
||||
{
|
||||
if (!(block.GetLastOp() is Operation operation))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return operation.Inst == Instruction.Return;
|
||||
}
|
||||
}
|
||||
}
|
768
src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
Normal file
768
src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
Normal file
|
@ -0,0 +1,768 @@
|
|||
using Ryujinx.Graphics.Shader.Decoders;
|
||||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Numerics;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
static class Rewriter
|
||||
{
|
||||
public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
|
||||
{
|
||||
bool isVertexShader = config.Stage == ShaderStage.Vertex;
|
||||
bool hasConstantBufferDrawParameters = config.GpuAccessor.QueryHasConstantBufferDrawParameters();
|
||||
bool supportsSnormBufferTextureFormat = config.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat();
|
||||
|
||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||
{
|
||||
BasicBlock block = blocks[blkIndex];
|
||||
|
||||
for (LinkedListNode<INode> node = block.Operations.First; node != null;)
|
||||
{
|
||||
if (node.Value is not Operation operation)
|
||||
{
|
||||
node = node.Next;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isVertexShader)
|
||||
{
|
||||
if (hasConstantBufferDrawParameters)
|
||||
{
|
||||
if (ReplaceConstantBufferWithDrawParameters(node, operation))
|
||||
{
|
||||
config.SetUsedFeature(FeatureFlags.DrawParameters);
|
||||
}
|
||||
}
|
||||
else if (HasConstantBufferDrawParameters(operation))
|
||||
{
|
||||
config.SetUsedFeature(FeatureFlags.DrawParameters);
|
||||
}
|
||||
}
|
||||
|
||||
LinkedListNode<INode> nextNode = node.Next;
|
||||
|
||||
if (operation is TextureOperation texOp)
|
||||
{
|
||||
if (texOp.Inst == Instruction.TextureSample)
|
||||
{
|
||||
node = RewriteTextureSample(node, config);
|
||||
|
||||
if (texOp.Type == SamplerType.TextureBuffer && !supportsSnormBufferTextureFormat)
|
||||
{
|
||||
node = InsertSnormNormalization(node, config);
|
||||
}
|
||||
}
|
||||
|
||||
nextNode = node.Next;
|
||||
}
|
||||
else if (UsesGlobalMemory(operation.Inst, operation.StorageKind))
|
||||
{
|
||||
nextNode = RewriteGlobalAccess(node, config)?.Next ?? nextNode;
|
||||
}
|
||||
|
||||
node = nextNode;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config)
|
||||
{
|
||||
Operation operation = (Operation)node.Value;
|
||||
|
||||
bool isAtomic = operation.Inst.IsAtomic();
|
||||
bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
|
||||
bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
|
||||
|
||||
Operation storageOp = null;
|
||||
|
||||
Operand PrependOperation(Instruction inst, params Operand[] sources)
|
||||
{
|
||||
Operand local = Local();
|
||||
|
||||
node.List.AddBefore(node, new Operation(inst, local, sources));
|
||||
|
||||
return local;
|
||||
}
|
||||
|
||||
Operand PrependExistingOperation(Operation operation)
|
||||
{
|
||||
Operand local = Local();
|
||||
|
||||
operation.Dest = local;
|
||||
node.List.AddBefore(node, operation);
|
||||
|
||||
return local;
|
||||
}
|
||||
|
||||
Operand addrLow = operation.GetSource(0);
|
||||
Operand addrHigh = operation.GetSource(1);
|
||||
|
||||
Operand sbBaseAddrLow = Const(0);
|
||||
Operand sbSlot = Const(0);
|
||||
|
||||
Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
|
||||
|
||||
Operand BindingRangeCheck(int cbOffset, out Operand baseAddrLow)
|
||||
{
|
||||
baseAddrLow = Cbuf(0, cbOffset);
|
||||
Operand baseAddrHigh = Cbuf(0, cbOffset + 1);
|
||||
Operand size = Cbuf(0, cbOffset + 2);
|
||||
|
||||
Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
|
||||
Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
|
||||
|
||||
Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size);
|
||||
|
||||
Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow);
|
||||
|
||||
Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh);
|
||||
|
||||
return PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh);
|
||||
}
|
||||
|
||||
int sbUseMask = config.AccessibleStorageBuffersMask;
|
||||
|
||||
while (sbUseMask != 0)
|
||||
{
|
||||
int slot = BitOperations.TrailingZeroCount(sbUseMask);
|
||||
|
||||
sbUseMask &= ~(1 << slot);
|
||||
|
||||
config.SetUsedStorageBuffer(slot, isWrite);
|
||||
|
||||
int cbOffset = GetStorageCbOffset(config.Stage, slot);
|
||||
|
||||
Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow);
|
||||
|
||||
sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow);
|
||||
sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot);
|
||||
}
|
||||
|
||||
if (config.AccessibleStorageBuffersMask != 0)
|
||||
{
|
||||
Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
|
||||
Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
|
||||
|
||||
Operand[] sources = new Operand[operation.SourcesCount];
|
||||
|
||||
sources[0] = sbSlot;
|
||||
|
||||
if (isStg16Or8)
|
||||
{
|
||||
sources[1] = byteOffset;
|
||||
}
|
||||
else
|
||||
{
|
||||
sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
|
||||
}
|
||||
|
||||
for (int index = 2; index < operation.SourcesCount; index++)
|
||||
{
|
||||
sources[index] = operation.GetSource(index);
|
||||
}
|
||||
|
||||
if (isAtomic)
|
||||
{
|
||||
storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources);
|
||||
}
|
||||
else if (operation.Inst == Instruction.LoadGlobal)
|
||||
{
|
||||
storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
|
||||
}
|
||||
else
|
||||
{
|
||||
Instruction storeInst = operation.Inst switch
|
||||
{
|
||||
Instruction.StoreGlobal16 => Instruction.StoreStorage16,
|
||||
Instruction.StoreGlobal8 => Instruction.StoreStorage8,
|
||||
_ => Instruction.StoreStorage
|
||||
};
|
||||
|
||||
storageOp = new Operation(storeInst, null, sources);
|
||||
}
|
||||
}
|
||||
else if (operation.Dest != null)
|
||||
{
|
||||
storageOp = new Operation(Instruction.Copy, operation.Dest, Const(0));
|
||||
}
|
||||
|
||||
if (operation.Inst == Instruction.LoadGlobal)
|
||||
{
|
||||
int cbeUseMask = config.AccessibleConstantBuffersMask;
|
||||
|
||||
while (cbeUseMask != 0)
|
||||
{
|
||||
int slot = BitOperations.TrailingZeroCount(cbeUseMask);
|
||||
int cbSlot = UbeFirstCbuf + slot;
|
||||
|
||||
cbeUseMask &= ~(1 << slot);
|
||||
|
||||
config.SetUsedConstantBuffer(cbSlot);
|
||||
|
||||
Operand previousResult = PrependExistingOperation(storageOp);
|
||||
|
||||
int cbOffset = GetConstantUbeOffset(slot);
|
||||
|
||||
Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow);
|
||||
|
||||
Operand baseAddrTruncConst = PrependOperation(Instruction.BitwiseAnd, baseAddrLow, alignMask);
|
||||
Operand byteOffsetConst = PrependOperation(Instruction.Subtract, addrLow, baseAddrTruncConst);
|
||||
|
||||
Operand cbIndex = PrependOperation(Instruction.ShiftRightU32, byteOffsetConst, Const(2));
|
||||
|
||||
Operand[] sourcesCb = new Operand[operation.SourcesCount];
|
||||
|
||||
sourcesCb[0] = Const(cbSlot);
|
||||
sourcesCb[1] = cbIndex;
|
||||
|
||||
for (int index = 2; index < operation.SourcesCount; index++)
|
||||
{
|
||||
sourcesCb[index] = operation.GetSource(index);
|
||||
}
|
||||
|
||||
Operand ldcResult = PrependOperation(Instruction.LoadConstant, sourcesCb);
|
||||
|
||||
storageOp = new Operation(Instruction.ConditionalSelect, operation.Dest, inRange, ldcResult, previousResult);
|
||||
}
|
||||
}
|
||||
|
||||
for (int index = 0; index < operation.SourcesCount; index++)
|
||||
{
|
||||
operation.SetSource(index, null);
|
||||
}
|
||||
|
||||
LinkedListNode<INode> oldNode = node;
|
||||
LinkedList<INode> oldNodeList = oldNode.List;
|
||||
|
||||
if (storageOp != null)
|
||||
{
|
||||
node = node.List.AddBefore(node, storageOp);
|
||||
}
|
||||
else
|
||||
{
|
||||
node = null;
|
||||
}
|
||||
|
||||
oldNodeList.Remove(oldNode);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> RewriteTextureSample(LinkedListNode<INode> node, ShaderConfig config)
|
||||
{
|
||||
TextureOperation texOp = (TextureOperation)node.Value;
|
||||
|
||||
bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
|
||||
bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
|
||||
|
||||
bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset();
|
||||
|
||||
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
|
||||
|
||||
bool isCoordNormalized = isBindless || config.GpuAccessor.QueryTextureCoordNormalized(texOp.Handle, texOp.CbufSlot);
|
||||
|
||||
if (!hasInvalidOffset && isCoordNormalized)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
|
||||
bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
|
||||
bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
|
||||
bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
|
||||
bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
|
||||
|
||||
bool isArray = (texOp.Type & SamplerType.Array) != 0;
|
||||
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
|
||||
bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
|
||||
bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
|
||||
|
||||
int coordsCount = texOp.Type.GetDimensions();
|
||||
|
||||
int offsetsCount;
|
||||
|
||||
if (hasOffsets)
|
||||
{
|
||||
offsetsCount = coordsCount * 4;
|
||||
}
|
||||
else if (hasOffset)
|
||||
{
|
||||
offsetsCount = coordsCount;
|
||||
}
|
||||
else
|
||||
{
|
||||
offsetsCount = 0;
|
||||
}
|
||||
|
||||
Operand[] offsets = new Operand[offsetsCount];
|
||||
Operand[] sources = new Operand[texOp.SourcesCount - offsetsCount];
|
||||
|
||||
int copyCount = 0;
|
||||
|
||||
if (isBindless || isIndexed)
|
||||
{
|
||||
copyCount++;
|
||||
}
|
||||
|
||||
Operand[] lodSources = new Operand[copyCount + coordsCount];
|
||||
|
||||
for (int index = 0; index < lodSources.Length; index++)
|
||||
{
|
||||
lodSources[index] = texOp.GetSource(index);
|
||||
}
|
||||
|
||||
copyCount += coordsCount;
|
||||
|
||||
if (isArray)
|
||||
{
|
||||
copyCount++;
|
||||
}
|
||||
|
||||
if (isShadow)
|
||||
{
|
||||
copyCount++;
|
||||
}
|
||||
|
||||
if (hasDerivatives)
|
||||
{
|
||||
copyCount += coordsCount * 2;
|
||||
}
|
||||
|
||||
if (isMultisample)
|
||||
{
|
||||
copyCount++;
|
||||
}
|
||||
else if (hasLodLevel)
|
||||
{
|
||||
copyCount++;
|
||||
}
|
||||
|
||||
int srcIndex = 0;
|
||||
int dstIndex = 0;
|
||||
|
||||
for (int index = 0; index < copyCount; index++)
|
||||
{
|
||||
sources[dstIndex++] = texOp.GetSource(srcIndex++);
|
||||
}
|
||||
|
||||
bool areAllOffsetsConstant = true;
|
||||
|
||||
for (int index = 0; index < offsetsCount; index++)
|
||||
{
|
||||
Operand offset = texOp.GetSource(srcIndex++);
|
||||
|
||||
areAllOffsetsConstant &= offset.Type == OperandType.Constant;
|
||||
|
||||
offsets[index] = offset;
|
||||
}
|
||||
|
||||
hasInvalidOffset &= !areAllOffsetsConstant;
|
||||
|
||||
if (!hasInvalidOffset && isCoordNormalized)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
if (hasLodBias)
|
||||
{
|
||||
sources[dstIndex++] = texOp.GetSource(srcIndex++);
|
||||
}
|
||||
|
||||
if (isGather && !isShadow)
|
||||
{
|
||||
sources[dstIndex++] = texOp.GetSource(srcIndex++);
|
||||
}
|
||||
|
||||
int coordsIndex = isBindless || isIndexed ? 1 : 0;
|
||||
|
||||
int componentIndex = texOp.Index;
|
||||
|
||||
Operand Float(Operand value)
|
||||
{
|
||||
Operand res = Local();
|
||||
|
||||
node.List.AddBefore(node, new Operation(Instruction.ConvertS32ToFP32, res, value));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
// Emulate non-normalized coordinates by normalizing the coordinates on the shader.
|
||||
// Without normalization, the coordinates are expected to the in the [0, W or H] range,
|
||||
// and otherwise, it is expected to be in the [0, 1] range.
|
||||
// We normalize by dividing the coords by the texture size.
|
||||
if (!isCoordNormalized && !intCoords)
|
||||
{
|
||||
config.SetUsedFeature(FeatureFlags.IntegerSampling);
|
||||
|
||||
int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
|
||||
|
||||
for (int index = 0; index < normCoordsCount; index++)
|
||||
{
|
||||
Operand coordSize = Local();
|
||||
|
||||
Operand[] texSizeSources;
|
||||
|
||||
if (isBindless || isIndexed)
|
||||
{
|
||||
texSizeSources = new Operand[] { sources[0], Const(0) };
|
||||
}
|
||||
else
|
||||
{
|
||||
texSizeSources = new Operand[] { Const(0) };
|
||||
}
|
||||
|
||||
node.List.AddBefore(node, new TextureOperation(
|
||||
Instruction.TextureSize,
|
||||
texOp.Type,
|
||||
texOp.Format,
|
||||
texOp.Flags,
|
||||
texOp.CbufSlot,
|
||||
texOp.Handle,
|
||||
index,
|
||||
new[] { coordSize },
|
||||
texSizeSources));
|
||||
|
||||
config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
|
||||
|
||||
Operand source = sources[coordsIndex + index];
|
||||
|
||||
Operand coordNormalized = Local();
|
||||
|
||||
node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, Float(coordSize)));
|
||||
|
||||
sources[coordsIndex + index] = coordNormalized;
|
||||
}
|
||||
}
|
||||
|
||||
Operand[] dests = new Operand[texOp.DestsCount];
|
||||
|
||||
for (int i = 0; i < texOp.DestsCount; i++)
|
||||
{
|
||||
dests[i] = texOp.GetDest(i);
|
||||
}
|
||||
|
||||
Operand bindlessHandle = isBindless || isIndexed ? sources[0] : null;
|
||||
|
||||
LinkedListNode<INode> oldNode = node;
|
||||
|
||||
// Technically, non-constant texture offsets are not allowed (according to the spec),
|
||||
// however some GPUs does support that.
|
||||
// For GPUs where it is not supported, we can replace the instruction with the following:
|
||||
// For texture*Offset, we replace it by texture*, and add the offset to the P coords.
|
||||
// The offset can be calculated as offset / textureSize(lod), where lod = textureQueryLod(coords).
|
||||
// For texelFetchOffset, we replace it by texelFetch and add the offset to the P coords directly.
|
||||
// For textureGatherOffset, we split the operation into up to 4 operations, one for each component
|
||||
// that is accessed, where each textureGather operation has a different offset for each pixel.
|
||||
if (hasInvalidOffset && isGather && !isShadow)
|
||||
{
|
||||
config.SetUsedFeature(FeatureFlags.IntegerSampling);
|
||||
|
||||
Operand[] newSources = new Operand[sources.Length];
|
||||
|
||||
sources.CopyTo(newSources, 0);
|
||||
|
||||
Operand[] texSizes = InsertTextureSize(node, texOp, lodSources, bindlessHandle, coordsCount);
|
||||
|
||||
int destIndex = 0;
|
||||
|
||||
for (int compIndex = 0; compIndex < 4; compIndex++)
|
||||
{
|
||||
if (((texOp.Index >> compIndex) & 1) == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int index = 0; index < coordsCount; index++)
|
||||
{
|
||||
config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
|
||||
|
||||
Operand offset = Local();
|
||||
|
||||
Operand intOffset = offsets[index + (hasOffsets ? compIndex * coordsCount : 0)];
|
||||
|
||||
node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(texSizes[index])));
|
||||
|
||||
Operand source = sources[coordsIndex + index];
|
||||
|
||||
Operand coordPlusOffset = Local();
|
||||
|
||||
node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
|
||||
|
||||
newSources[coordsIndex + index] = coordPlusOffset;
|
||||
}
|
||||
|
||||
TextureOperation newTexOp = new TextureOperation(
|
||||
Instruction.TextureSample,
|
||||
texOp.Type,
|
||||
texOp.Format,
|
||||
texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
|
||||
texOp.CbufSlot,
|
||||
texOp.Handle,
|
||||
1,
|
||||
new[] { dests[destIndex++] },
|
||||
newSources);
|
||||
|
||||
node = node.List.AddBefore(node, newTexOp);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (hasInvalidOffset)
|
||||
{
|
||||
if (intCoords)
|
||||
{
|
||||
for (int index = 0; index < coordsCount; index++)
|
||||
{
|
||||
Operand source = sources[coordsIndex + index];
|
||||
|
||||
Operand coordPlusOffset = Local();
|
||||
|
||||
node.List.AddBefore(node, new Operation(Instruction.Add, coordPlusOffset, source, offsets[index]));
|
||||
|
||||
sources[coordsIndex + index] = coordPlusOffset;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
config.SetUsedFeature(FeatureFlags.IntegerSampling);
|
||||
|
||||
Operand[] texSizes = InsertTextureSize(node, texOp, lodSources, bindlessHandle, coordsCount);
|
||||
|
||||
for (int index = 0; index < coordsCount; index++)
|
||||
{
|
||||
config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
|
||||
|
||||
Operand offset = Local();
|
||||
|
||||
Operand intOffset = offsets[index];
|
||||
|
||||
node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(texSizes[index])));
|
||||
|
||||
Operand source = sources[coordsIndex + index];
|
||||
|
||||
Operand coordPlusOffset = Local();
|
||||
|
||||
node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
|
||||
|
||||
sources[coordsIndex + index] = coordPlusOffset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TextureOperation newTexOp = new TextureOperation(
|
||||
Instruction.TextureSample,
|
||||
texOp.Type,
|
||||
texOp.Format,
|
||||
texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
|
||||
texOp.CbufSlot,
|
||||
texOp.Handle,
|
||||
componentIndex,
|
||||
dests,
|
||||
sources);
|
||||
|
||||
node = node.List.AddBefore(node, newTexOp);
|
||||
}
|
||||
|
||||
node.List.Remove(oldNode);
|
||||
|
||||
for (int index = 0; index < texOp.SourcesCount; index++)
|
||||
{
|
||||
texOp.SetSource(index, null);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
private static Operand[] InsertTextureSize(
|
||||
LinkedListNode<INode> node,
|
||||
TextureOperation texOp,
|
||||
Operand[] lodSources,
|
||||
Operand bindlessHandle,
|
||||
int coordsCount)
|
||||
{
|
||||
Operand Int(Operand value)
|
||||
{
|
||||
Operand res = Local();
|
||||
|
||||
node.List.AddBefore(node, new Operation(Instruction.ConvertFP32ToS32, res, value));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
Operand[] texSizes = new Operand[coordsCount];
|
||||
|
||||
Operand lod = Local();
|
||||
|
||||
node.List.AddBefore(node, new TextureOperation(
|
||||
Instruction.Lod,
|
||||
texOp.Type,
|
||||
texOp.Format,
|
||||
texOp.Flags,
|
||||
texOp.CbufSlot,
|
||||
texOp.Handle,
|
||||
0,
|
||||
new[] { lod },
|
||||
lodSources));
|
||||
|
||||
for (int index = 0; index < coordsCount; index++)
|
||||
{
|
||||
texSizes[index] = Local();
|
||||
|
||||
Operand[] texSizeSources;
|
||||
|
||||
if (bindlessHandle != null)
|
||||
{
|
||||
texSizeSources = new Operand[] { bindlessHandle, Int(lod) };
|
||||
}
|
||||
else
|
||||
{
|
||||
texSizeSources = new Operand[] { Int(lod) };
|
||||
}
|
||||
|
||||
node.List.AddBefore(node, new TextureOperation(
|
||||
Instruction.TextureSize,
|
||||
texOp.Type,
|
||||
texOp.Format,
|
||||
texOp.Flags,
|
||||
texOp.CbufSlot,
|
||||
texOp.Handle,
|
||||
index,
|
||||
new[] { texSizes[index] },
|
||||
texSizeSources));
|
||||
}
|
||||
|
||||
return texSizes;
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config)
|
||||
{
|
||||
TextureOperation texOp = (TextureOperation)node.Value;
|
||||
|
||||
// We can't query the format of a bindless texture,
|
||||
// because the handle is unknown, it can have any format.
|
||||
if (texOp.Flags.HasFlag(TextureFlags.Bindless))
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
TextureFormat format = config.GpuAccessor.QueryTextureFormat(texOp.Handle, texOp.CbufSlot);
|
||||
|
||||
int maxPositive = format switch
|
||||
{
|
||||
TextureFormat.R8Snorm => sbyte.MaxValue,
|
||||
TextureFormat.R8G8Snorm => sbyte.MaxValue,
|
||||
TextureFormat.R8G8B8A8Snorm => sbyte.MaxValue,
|
||||
TextureFormat.R16Snorm => short.MaxValue,
|
||||
TextureFormat.R16G16Snorm => short.MaxValue,
|
||||
TextureFormat.R16G16B16A16Snorm => short.MaxValue,
|
||||
_ => 0
|
||||
};
|
||||
|
||||
// The value being 0 means that the format is not a SNORM format,
|
||||
// so there's nothing to do here.
|
||||
if (maxPositive == 0)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
// Do normalization. We assume SINT formats are being used
|
||||
// as replacement for SNORM (which is not supported).
|
||||
for (int i = 0; i < texOp.DestsCount; i++)
|
||||
{
|
||||
Operand dest = texOp.GetDest(i);
|
||||
|
||||
INode[] uses = dest.UseOps.ToArray();
|
||||
|
||||
Operation convOp = new Operation(Instruction.ConvertS32ToFP32, Local(), dest);
|
||||
Operation normOp = new Operation(Instruction.FP32 | Instruction.Multiply, Local(), convOp.Dest, ConstF(1f / maxPositive));
|
||||
|
||||
node = node.List.AddAfter(node, convOp);
|
||||
node = node.List.AddAfter(node, normOp);
|
||||
|
||||
foreach (INode useOp in uses)
|
||||
{
|
||||
if (useOp is not Operation op)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Replace all uses of the texture pixel value with the normalized value.
|
||||
for (int index = 0; index < op.SourcesCount; index++)
|
||||
{
|
||||
if (op.GetSource(index) == dest)
|
||||
{
|
||||
op.SetSource(index, normOp.Dest);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
private static bool ReplaceConstantBufferWithDrawParameters(LinkedListNode<INode> node, Operation operation)
|
||||
{
|
||||
Operand GenerateLoad(IoVariable ioVariable)
|
||||
{
|
||||
Operand value = Local();
|
||||
node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, value, Const((int)ioVariable)));
|
||||
return value;
|
||||
}
|
||||
|
||||
bool modified = false;
|
||||
|
||||
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
|
||||
{
|
||||
Operand src = operation.GetSource(srcIndex);
|
||||
|
||||
if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
|
||||
{
|
||||
switch (src.GetCbufOffset())
|
||||
{
|
||||
case Constants.NvnBaseVertexByteOffset / 4:
|
||||
operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseVertex));
|
||||
modified = true;
|
||||
break;
|
||||
case Constants.NvnBaseInstanceByteOffset / 4:
|
||||
operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseInstance));
|
||||
modified = true;
|
||||
break;
|
||||
case Constants.NvnDrawIndexByteOffset / 4:
|
||||
operation.SetSource(srcIndex, GenerateLoad(IoVariable.DrawIndex));
|
||||
modified = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return modified;
|
||||
}
|
||||
|
||||
private static bool HasConstantBufferDrawParameters(Operation operation)
|
||||
{
|
||||
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
|
||||
{
|
||||
Operand src = operation.GetSource(srcIndex);
|
||||
|
||||
if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
|
||||
{
|
||||
switch (src.GetCbufOffset())
|
||||
{
|
||||
case Constants.NvnBaseVertexByteOffset / 4:
|
||||
case Constants.NvnBaseInstanceByteOffset / 4:
|
||||
case Constants.NvnDrawIndexByteOffset / 4:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
944
src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
Normal file
944
src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
Normal file
|
@ -0,0 +1,944 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using Ryujinx.Graphics.Shader.StructuredIr;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
class ShaderConfig
|
||||
{
|
||||
// TODO: Non-hardcoded array size.
|
||||
public const int SamplerArraySize = 4;
|
||||
|
||||
private const int ThreadsPerWarp = 32;
|
||||
|
||||
public ShaderStage Stage { get; }
|
||||
|
||||
public bool GpPassthrough { get; }
|
||||
public bool LastInVertexPipeline { get; private set; }
|
||||
|
||||
public bool HasLayerInputAttribute { get; private set; }
|
||||
public int GpLayerInputAttribute { get; private set; }
|
||||
public int ThreadsPerInputPrimitive { get; }
|
||||
|
||||
public OutputTopology OutputTopology { get; }
|
||||
|
||||
public int MaxOutputVertices { get; }
|
||||
|
||||
public int LocalMemorySize { get; }
|
||||
|
||||
public ImapPixelType[] ImapTypes { get; }
|
||||
|
||||
public int OmapTargets { get; }
|
||||
public bool OmapSampleMask { get; }
|
||||
public bool OmapDepth { get; }
|
||||
|
||||
public IGpuAccessor GpuAccessor { get; }
|
||||
|
||||
public TranslationOptions Options { get; }
|
||||
|
||||
public bool TransformFeedbackEnabled { get; }
|
||||
|
||||
private TransformFeedbackOutput[] _transformFeedbackOutputs;
|
||||
|
||||
readonly struct TransformFeedbackVariable : IEquatable<TransformFeedbackVariable>
|
||||
{
|
||||
public IoVariable IoVariable { get; }
|
||||
public int Location { get; }
|
||||
public int Component { get; }
|
||||
|
||||
public TransformFeedbackVariable(IoVariable ioVariable, int location = 0, int component = 0)
|
||||
{
|
||||
IoVariable = ioVariable;
|
||||
Location = location;
|
||||
Component = component;
|
||||
}
|
||||
|
||||
public override bool Equals(object other)
|
||||
{
|
||||
return other is TransformFeedbackVariable tfbVar && Equals(tfbVar);
|
||||
}
|
||||
|
||||
public bool Equals(TransformFeedbackVariable other)
|
||||
{
|
||||
return IoVariable == other.IoVariable &&
|
||||
Location == other.Location &&
|
||||
Component == other.Component;
|
||||
}
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return (int)IoVariable | (Location << 8) | (Component << 16);
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return $"{IoVariable}.{Location}.{Component}";
|
||||
}
|
||||
}
|
||||
|
||||
private readonly Dictionary<TransformFeedbackVariable, TransformFeedbackOutput> _transformFeedbackDefinitions;
|
||||
|
||||
public int Size { get; private set; }
|
||||
|
||||
public byte ClipDistancesWritten { get; private set; }
|
||||
|
||||
public FeatureFlags UsedFeatures { get; private set; }
|
||||
|
||||
public int Cb1DataSize { get; private set; }
|
||||
|
||||
public bool LayerOutputWritten { get; private set; }
|
||||
public int LayerOutputAttribute { get; private set; }
|
||||
|
||||
public bool NextUsesFixedFuncAttributes { get; private set; }
|
||||
public int UsedInputAttributes { get; private set; }
|
||||
public int UsedOutputAttributes { get; private set; }
|
||||
public HashSet<int> UsedInputAttributesPerPatch { get; }
|
||||
public HashSet<int> UsedOutputAttributesPerPatch { get; }
|
||||
public HashSet<int> NextUsedInputAttributesPerPatch { get; private set; }
|
||||
public int PassthroughAttributes { get; private set; }
|
||||
private int _nextUsedInputAttributes;
|
||||
private int _thisUsedInputAttributes;
|
||||
private Dictionary<int, int> _perPatchAttributeLocations;
|
||||
|
||||
public UInt128 NextInputAttributesComponents { get; private set; }
|
||||
public UInt128 ThisInputAttributesComponents { get; private set; }
|
||||
|
||||
public int AccessibleStorageBuffersMask { get; private set; }
|
||||
public int AccessibleConstantBuffersMask { get; private set; }
|
||||
|
||||
private int _usedConstantBuffers;
|
||||
private int _usedStorageBuffers;
|
||||
private int _usedStorageBuffersWrite;
|
||||
|
||||
private readonly record struct TextureInfo(int CbufSlot, int Handle, bool Indexed, TextureFormat Format);
|
||||
|
||||
private struct TextureMeta
|
||||
{
|
||||
public bool AccurateType;
|
||||
public SamplerType Type;
|
||||
public TextureUsageFlags UsageFlags;
|
||||
}
|
||||
|
||||
private readonly Dictionary<TextureInfo, TextureMeta> _usedTextures;
|
||||
private readonly Dictionary<TextureInfo, TextureMeta> _usedImages;
|
||||
|
||||
private BufferDescriptor[] _cachedConstantBufferDescriptors;
|
||||
private BufferDescriptor[] _cachedStorageBufferDescriptors;
|
||||
private TextureDescriptor[] _cachedTextureDescriptors;
|
||||
private TextureDescriptor[] _cachedImageDescriptors;
|
||||
|
||||
private int _firstConstantBufferBinding;
|
||||
private int _firstStorageBufferBinding;
|
||||
|
||||
public int FirstConstantBufferBinding => _firstConstantBufferBinding;
|
||||
public int FirstStorageBufferBinding => _firstStorageBufferBinding;
|
||||
|
||||
public ShaderConfig(IGpuAccessor gpuAccessor, TranslationOptions options)
|
||||
{
|
||||
Stage = ShaderStage.Compute;
|
||||
GpuAccessor = gpuAccessor;
|
||||
Options = options;
|
||||
|
||||
_transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>();
|
||||
|
||||
AccessibleStorageBuffersMask = (1 << GlobalMemory.StorageMaxCount) - 1;
|
||||
AccessibleConstantBuffersMask = (1 << GlobalMemory.UbeMaxCount) - 1;
|
||||
|
||||
UsedInputAttributesPerPatch = new HashSet<int>();
|
||||
UsedOutputAttributesPerPatch = new HashSet<int>();
|
||||
|
||||
_usedTextures = new Dictionary<TextureInfo, TextureMeta>();
|
||||
_usedImages = new Dictionary<TextureInfo, TextureMeta>();
|
||||
}
|
||||
|
||||
public ShaderConfig(
|
||||
ShaderStage stage,
|
||||
OutputTopology outputTopology,
|
||||
int maxOutputVertices,
|
||||
IGpuAccessor gpuAccessor,
|
||||
TranslationOptions options) : this(gpuAccessor, options)
|
||||
{
|
||||
Stage = stage;
|
||||
ThreadsPerInputPrimitive = 1;
|
||||
OutputTopology = outputTopology;
|
||||
MaxOutputVertices = maxOutputVertices;
|
||||
TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
|
||||
|
||||
if (Stage != ShaderStage.Compute)
|
||||
{
|
||||
AccessibleConstantBuffersMask = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(gpuAccessor, options)
|
||||
{
|
||||
Stage = header.Stage;
|
||||
GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough;
|
||||
ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive;
|
||||
OutputTopology = header.OutputTopology;
|
||||
MaxOutputVertices = header.MaxOutputVertexCount;
|
||||
LocalMemorySize = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp);
|
||||
ImapTypes = header.ImapTypes;
|
||||
OmapTargets = header.OmapTargets;
|
||||
OmapSampleMask = header.OmapSampleMask;
|
||||
OmapDepth = header.OmapDepth;
|
||||
TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
|
||||
LastInVertexPipeline = header.Stage < ShaderStage.Fragment;
|
||||
}
|
||||
|
||||
private void EnsureTransformFeedbackInitialized()
|
||||
{
|
||||
if (HasTransformFeedbackOutputs() && _transformFeedbackOutputs == null)
|
||||
{
|
||||
TransformFeedbackOutput[] transformFeedbackOutputs = new TransformFeedbackOutput[0xc0];
|
||||
ulong vecMap = 0UL;
|
||||
|
||||
for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++)
|
||||
{
|
||||
var locations = GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
|
||||
var stride = GpuAccessor.QueryTransformFeedbackStride(tfbIndex);
|
||||
|
||||
for (int i = 0; i < locations.Length; i++)
|
||||
{
|
||||
byte wordOffset = locations[i];
|
||||
if (wordOffset < 0xc0)
|
||||
{
|
||||
transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride);
|
||||
vecMap |= 1UL << (wordOffset / 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_transformFeedbackOutputs = transformFeedbackOutputs;
|
||||
|
||||
while (vecMap != 0)
|
||||
{
|
||||
int vecIndex = BitOperations.TrailingZeroCount(vecMap);
|
||||
|
||||
for (int subIndex = 0; subIndex < 4; subIndex++)
|
||||
{
|
||||
int wordOffset = vecIndex * 4 + subIndex;
|
||||
int byteOffset = wordOffset * 4;
|
||||
|
||||
if (transformFeedbackOutputs[wordOffset].Valid)
|
||||
{
|
||||
IoVariable ioVariable = Instructions.AttributeMap.GetIoVariable(this, byteOffset, out int location);
|
||||
int component = 0;
|
||||
|
||||
if (HasPerLocationInputOrOutputComponent(ioVariable, location, subIndex, isOutput: true))
|
||||
{
|
||||
component = subIndex;
|
||||
}
|
||||
|
||||
var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
|
||||
_transformFeedbackDefinitions.TryAdd(transformFeedbackVariable, transformFeedbackOutputs[wordOffset]);
|
||||
}
|
||||
}
|
||||
|
||||
vecMap &= ~(1UL << vecIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public TransformFeedbackOutput[] GetTransformFeedbackOutputs()
|
||||
{
|
||||
EnsureTransformFeedbackInitialized();
|
||||
return _transformFeedbackOutputs;
|
||||
}
|
||||
|
||||
public bool TryGetTransformFeedbackOutput(IoVariable ioVariable, int location, int component, out TransformFeedbackOutput transformFeedbackOutput)
|
||||
{
|
||||
EnsureTransformFeedbackInitialized();
|
||||
var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
|
||||
return _transformFeedbackDefinitions.TryGetValue(transformFeedbackVariable, out transformFeedbackOutput);
|
||||
}
|
||||
|
||||
private bool HasTransformFeedbackOutputs()
|
||||
{
|
||||
return TransformFeedbackEnabled && (LastInVertexPipeline || Stage == ShaderStage.Fragment);
|
||||
}
|
||||
|
||||
public bool HasTransformFeedbackOutputs(bool isOutput)
|
||||
{
|
||||
return TransformFeedbackEnabled && ((isOutput && LastInVertexPipeline) || (!isOutput && Stage == ShaderStage.Fragment));
|
||||
}
|
||||
|
||||
public bool HasPerLocationInputOrOutput(IoVariable ioVariable, bool isOutput)
|
||||
{
|
||||
if (ioVariable == IoVariable.UserDefined)
|
||||
{
|
||||
return (!isOutput && !UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) ||
|
||||
(isOutput && !UsedFeatures.HasFlag(FeatureFlags.OaIndexing));
|
||||
}
|
||||
|
||||
return ioVariable == IoVariable.FragmentOutputColor;
|
||||
}
|
||||
|
||||
public bool HasPerLocationInputOrOutputComponent(IoVariable ioVariable, int location, int component, bool isOutput)
|
||||
{
|
||||
if (ioVariable != IoVariable.UserDefined || !HasTransformFeedbackOutputs(isOutput))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return GetTransformFeedbackOutputComponents(location, component) == 1;
|
||||
}
|
||||
|
||||
public TransformFeedbackOutput GetTransformFeedbackOutput(int wordOffset)
|
||||
{
|
||||
EnsureTransformFeedbackInitialized();
|
||||
|
||||
return _transformFeedbackOutputs[wordOffset];
|
||||
}
|
||||
|
||||
public TransformFeedbackOutput GetTransformFeedbackOutput(int location, int component)
|
||||
{
|
||||
return GetTransformFeedbackOutput((AttributeConsts.UserAttributeBase / 4) + location * 4 + component);
|
||||
}
|
||||
|
||||
public int GetTransformFeedbackOutputComponents(int location, int component)
|
||||
{
|
||||
EnsureTransformFeedbackInitialized();
|
||||
|
||||
int baseIndex = (AttributeConsts.UserAttributeBase / 4) + location * 4;
|
||||
int index = baseIndex + component;
|
||||
int count = 1;
|
||||
|
||||
for (; count < 4; count++)
|
||||
{
|
||||
ref var prev = ref _transformFeedbackOutputs[baseIndex + count - 1];
|
||||
ref var curr = ref _transformFeedbackOutputs[baseIndex + count];
|
||||
|
||||
int prevOffset = prev.Offset;
|
||||
int currOffset = curr.Offset;
|
||||
|
||||
if (!prev.Valid || !curr.Valid || prevOffset + 4 != currOffset)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (baseIndex + count <= index)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
public AggregateType GetFragmentOutputColorType(int location)
|
||||
{
|
||||
return AggregateType.Vector4 | GpuAccessor.QueryFragmentOutputType(location).ToAggregateType();
|
||||
}
|
||||
|
||||
public AggregateType GetUserDefinedType(int location, bool isOutput)
|
||||
{
|
||||
if ((!isOutput && UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) ||
|
||||
(isOutput && UsedFeatures.HasFlag(FeatureFlags.OaIndexing)))
|
||||
{
|
||||
return AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32;
|
||||
}
|
||||
|
||||
AggregateType type = AggregateType.Vector4;
|
||||
|
||||
if (Stage == ShaderStage.Vertex && !isOutput)
|
||||
{
|
||||
type |= GpuAccessor.QueryAttributeType(location).ToAggregateType();
|
||||
}
|
||||
else
|
||||
{
|
||||
type |= AggregateType.FP32;
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
public int GetDepthRegister()
|
||||
{
|
||||
// The depth register is always two registers after the last color output.
|
||||
return BitOperations.PopCount((uint)OmapTargets) + 1;
|
||||
}
|
||||
|
||||
public uint ConstantBuffer1Read(int offset)
|
||||
{
|
||||
if (Cb1DataSize < offset + 4)
|
||||
{
|
||||
Cb1DataSize = offset + 4;
|
||||
}
|
||||
|
||||
return GpuAccessor.ConstantBuffer1Read(offset);
|
||||
}
|
||||
|
||||
public TextureFormat GetTextureFormat(int handle, int cbufSlot = -1)
|
||||
{
|
||||
// When the formatted load extension is supported, we don't need to
|
||||
// specify a format, we can just declare it without a format and the GPU will handle it.
|
||||
if (GpuAccessor.QueryHostSupportsImageLoadFormatted())
|
||||
{
|
||||
return TextureFormat.Unknown;
|
||||
}
|
||||
|
||||
var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot);
|
||||
|
||||
if (format == TextureFormat.Unknown)
|
||||
{
|
||||
GpuAccessor.Log($"Unknown format for texture {handle}.");
|
||||
|
||||
format = TextureFormat.R8G8B8A8Unorm;
|
||||
}
|
||||
|
||||
return format;
|
||||
}
|
||||
|
||||
private static bool FormatSupportsAtomic(TextureFormat format)
|
||||
{
|
||||
return format == TextureFormat.R32Sint || format == TextureFormat.R32Uint;
|
||||
}
|
||||
|
||||
public TextureFormat GetTextureFormatAtomic(int handle, int cbufSlot = -1)
|
||||
{
|
||||
// Atomic image instructions do not support GL_EXT_shader_image_load_formatted,
|
||||
// and must have a type specified. Default to R32Sint if not available.
|
||||
|
||||
var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot);
|
||||
|
||||
if (!FormatSupportsAtomic(format))
|
||||
{
|
||||
GpuAccessor.Log($"Unsupported format for texture {handle}: {format}.");
|
||||
|
||||
format = TextureFormat.R32Sint;
|
||||
}
|
||||
|
||||
return format;
|
||||
}
|
||||
|
||||
public void SizeAdd(int size)
|
||||
{
|
||||
Size += size;
|
||||
}
|
||||
|
||||
public void InheritFrom(ShaderConfig other)
|
||||
{
|
||||
ClipDistancesWritten |= other.ClipDistancesWritten;
|
||||
UsedFeatures |= other.UsedFeatures;
|
||||
|
||||
UsedInputAttributes |= other.UsedInputAttributes;
|
||||
UsedOutputAttributes |= other.UsedOutputAttributes;
|
||||
_usedConstantBuffers |= other._usedConstantBuffers;
|
||||
_usedStorageBuffers |= other._usedStorageBuffers;
|
||||
_usedStorageBuffersWrite |= other._usedStorageBuffersWrite;
|
||||
|
||||
foreach (var kv in other._usedTextures)
|
||||
{
|
||||
if (!_usedTextures.TryAdd(kv.Key, kv.Value))
|
||||
{
|
||||
_usedTextures[kv.Key] = MergeTextureMeta(kv.Value, _usedTextures[kv.Key]);
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var kv in other._usedImages)
|
||||
{
|
||||
if (!_usedImages.TryAdd(kv.Key, kv.Value))
|
||||
{
|
||||
_usedImages[kv.Key] = MergeTextureMeta(kv.Value, _usedImages[kv.Key]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void SetLayerOutputAttribute(int attr)
|
||||
{
|
||||
LayerOutputWritten = true;
|
||||
LayerOutputAttribute = attr;
|
||||
}
|
||||
|
||||
public void SetGeometryShaderLayerInputAttribute(int attr)
|
||||
{
|
||||
HasLayerInputAttribute = true;
|
||||
GpLayerInputAttribute = attr;
|
||||
}
|
||||
|
||||
public void SetLastInVertexPipeline()
|
||||
{
|
||||
LastInVertexPipeline = true;
|
||||
}
|
||||
|
||||
public void SetInputUserAttributeFixedFunc(int index)
|
||||
{
|
||||
UsedInputAttributes |= 1 << index;
|
||||
}
|
||||
|
||||
public void SetOutputUserAttributeFixedFunc(int index)
|
||||
{
|
||||
UsedOutputAttributes |= 1 << index;
|
||||
}
|
||||
|
||||
public void SetInputUserAttribute(int index, int component)
|
||||
{
|
||||
int mask = 1 << index;
|
||||
|
||||
UsedInputAttributes |= mask;
|
||||
_thisUsedInputAttributes |= mask;
|
||||
ThisInputAttributesComponents |= UInt128.One << (index * 4 + component);
|
||||
}
|
||||
|
||||
public void SetInputUserAttributePerPatch(int index)
|
||||
{
|
||||
UsedInputAttributesPerPatch.Add(index);
|
||||
}
|
||||
|
||||
public void SetOutputUserAttribute(int index)
|
||||
{
|
||||
UsedOutputAttributes |= 1 << index;
|
||||
}
|
||||
|
||||
public void SetOutputUserAttributePerPatch(int index)
|
||||
{
|
||||
UsedOutputAttributesPerPatch.Add(index);
|
||||
}
|
||||
|
||||
public void MergeFromtNextStage(ShaderConfig config)
|
||||
{
|
||||
NextInputAttributesComponents = config.ThisInputAttributesComponents;
|
||||
NextUsedInputAttributesPerPatch = config.UsedInputAttributesPerPatch;
|
||||
NextUsesFixedFuncAttributes = config.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr);
|
||||
MergeOutputUserAttributes(config.UsedInputAttributes, config.UsedInputAttributesPerPatch);
|
||||
|
||||
if (UsedOutputAttributesPerPatch.Count != 0)
|
||||
{
|
||||
// Regular and per-patch input/output locations can't overlap,
|
||||
// so we must assign on our location using unused regular input/output locations.
|
||||
|
||||
Dictionary<int, int> locationsMap = new Dictionary<int, int>();
|
||||
|
||||
int freeMask = ~UsedOutputAttributes;
|
||||
|
||||
foreach (int attr in UsedOutputAttributesPerPatch)
|
||||
{
|
||||
int location = BitOperations.TrailingZeroCount(freeMask);
|
||||
if (location == 32)
|
||||
{
|
||||
config.GpuAccessor.Log($"No enough free locations for patch input/output 0x{attr:X}.");
|
||||
break;
|
||||
}
|
||||
|
||||
locationsMap.Add(attr, location);
|
||||
freeMask &= ~(1 << location);
|
||||
}
|
||||
|
||||
// Both stages must agree on the locations, so use the same "map" for both.
|
||||
_perPatchAttributeLocations = locationsMap;
|
||||
config._perPatchAttributeLocations = locationsMap;
|
||||
}
|
||||
|
||||
// We don't consider geometry shaders using the geometry shader passthrough feature
|
||||
// as being the last because when this feature is used, it can't actually modify any of the outputs,
|
||||
// so the stage that comes before it is the last one that can do modifications.
|
||||
if (config.Stage != ShaderStage.Fragment && (config.Stage != ShaderStage.Geometry || !config.GpPassthrough))
|
||||
{
|
||||
LastInVertexPipeline = false;
|
||||
}
|
||||
}
|
||||
|
||||
public void MergeOutputUserAttributes(int mask, IEnumerable<int> perPatch)
|
||||
{
|
||||
_nextUsedInputAttributes = mask;
|
||||
|
||||
if (GpPassthrough)
|
||||
{
|
||||
PassthroughAttributes = mask & ~UsedOutputAttributes;
|
||||
}
|
||||
else
|
||||
{
|
||||
UsedOutputAttributes |= mask;
|
||||
UsedOutputAttributesPerPatch.UnionWith(perPatch);
|
||||
}
|
||||
}
|
||||
|
||||
public int GetPerPatchAttributeLocation(int index)
|
||||
{
|
||||
if (_perPatchAttributeLocations == null || !_perPatchAttributeLocations.TryGetValue(index, out int location))
|
||||
{
|
||||
return index;
|
||||
}
|
||||
|
||||
return location;
|
||||
}
|
||||
|
||||
public bool IsUsedOutputAttribute(int attr)
|
||||
{
|
||||
// The check for fixed function attributes on the next stage is conservative,
|
||||
// returning false if the output is just not used by the next stage is also valid.
|
||||
if (NextUsesFixedFuncAttributes &&
|
||||
attr >= AttributeConsts.UserAttributeBase &&
|
||||
attr < AttributeConsts.UserAttributeEnd)
|
||||
{
|
||||
int index = (attr - AttributeConsts.UserAttributeBase) >> 4;
|
||||
return (_nextUsedInputAttributes & (1 << index)) != 0;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public int GetFreeUserAttribute(bool isOutput, int index)
|
||||
{
|
||||
int useMask = isOutput ? _nextUsedInputAttributes : _thisUsedInputAttributes;
|
||||
int bit = -1;
|
||||
|
||||
while (useMask != -1)
|
||||
{
|
||||
bit = BitOperations.TrailingZeroCount(~useMask);
|
||||
|
||||
if (bit == 32)
|
||||
{
|
||||
bit = -1;
|
||||
break;
|
||||
}
|
||||
else if (index < 1)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
useMask |= 1 << bit;
|
||||
index--;
|
||||
}
|
||||
|
||||
return bit;
|
||||
}
|
||||
|
||||
public void SetAllInputUserAttributes()
|
||||
{
|
||||
UsedInputAttributes |= Constants.AllAttributesMask;
|
||||
ThisInputAttributesComponents |= ~UInt128.Zero >> (128 - Constants.MaxAttributes * 4);
|
||||
}
|
||||
|
||||
public void SetAllOutputUserAttributes()
|
||||
{
|
||||
UsedOutputAttributes |= Constants.AllAttributesMask;
|
||||
}
|
||||
|
||||
public void SetClipDistanceWritten(int index)
|
||||
{
|
||||
ClipDistancesWritten |= (byte)(1 << index);
|
||||
}
|
||||
|
||||
public void SetUsedFeature(FeatureFlags flags)
|
||||
{
|
||||
UsedFeatures |= flags;
|
||||
}
|
||||
|
||||
public void SetAccessibleBufferMasks(int sbMask, int ubeMask)
|
||||
{
|
||||
AccessibleStorageBuffersMask = sbMask;
|
||||
AccessibleConstantBuffersMask = ubeMask;
|
||||
}
|
||||
|
||||
public void SetUsedConstantBuffer(int slot)
|
||||
{
|
||||
_usedConstantBuffers |= 1 << slot;
|
||||
}
|
||||
|
||||
public void SetUsedStorageBuffer(int slot, bool write)
|
||||
{
|
||||
int mask = 1 << slot;
|
||||
_usedStorageBuffers |= mask;
|
||||
|
||||
if (write)
|
||||
{
|
||||
_usedStorageBuffersWrite |= mask;
|
||||
}
|
||||
}
|
||||
|
||||
public void SetUsedTexture(
|
||||
Instruction inst,
|
||||
SamplerType type,
|
||||
TextureFormat format,
|
||||
TextureFlags flags,
|
||||
int cbufSlot,
|
||||
int handle)
|
||||
{
|
||||
inst &= Instruction.Mask;
|
||||
bool isImage = inst == Instruction.ImageLoad || inst == Instruction.ImageStore || inst == Instruction.ImageAtomic;
|
||||
bool isWrite = inst == Instruction.ImageStore || inst == Instruction.ImageAtomic;
|
||||
bool accurateType = inst != Instruction.Lod && inst != Instruction.TextureSize;
|
||||
bool coherent = flags.HasFlag(TextureFlags.Coherent);
|
||||
|
||||
if (isImage)
|
||||
{
|
||||
SetUsedTextureOrImage(_usedImages, cbufSlot, handle, type, format, true, isWrite, false, coherent);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool intCoords = flags.HasFlag(TextureFlags.IntCoords) || inst == Instruction.TextureSize;
|
||||
SetUsedTextureOrImage(_usedTextures, cbufSlot, handle, type, TextureFormat.Unknown, intCoords, false, accurateType, coherent);
|
||||
}
|
||||
|
||||
GpuAccessor.RegisterTexture(handle, cbufSlot);
|
||||
}
|
||||
|
||||
private void SetUsedTextureOrImage(
|
||||
Dictionary<TextureInfo, TextureMeta> dict,
|
||||
int cbufSlot,
|
||||
int handle,
|
||||
SamplerType type,
|
||||
TextureFormat format,
|
||||
bool intCoords,
|
||||
bool write,
|
||||
bool accurateType,
|
||||
bool coherent)
|
||||
{
|
||||
var dimensions = type.GetDimensions();
|
||||
var isIndexed = type.HasFlag(SamplerType.Indexed);
|
||||
|
||||
var usageFlags = TextureUsageFlags.None;
|
||||
|
||||
if (intCoords)
|
||||
{
|
||||
usageFlags |= TextureUsageFlags.NeedsScaleValue;
|
||||
|
||||
var canScale = Stage.SupportsRenderScale() && !isIndexed && !write && dimensions == 2;
|
||||
|
||||
if (!canScale)
|
||||
{
|
||||
// Resolution scaling cannot be applied to this texture right now.
|
||||
// Flag so that we know to blacklist scaling on related textures when binding them.
|
||||
usageFlags |= TextureUsageFlags.ResScaleUnsupported;
|
||||
}
|
||||
}
|
||||
|
||||
if (write)
|
||||
{
|
||||
usageFlags |= TextureUsageFlags.ImageStore;
|
||||
}
|
||||
|
||||
if (coherent)
|
||||
{
|
||||
usageFlags |= TextureUsageFlags.ImageCoherent;
|
||||
}
|
||||
|
||||
int arraySize = isIndexed ? SamplerArraySize : 1;
|
||||
|
||||
for (int layer = 0; layer < arraySize; layer++)
|
||||
{
|
||||
var info = new TextureInfo(cbufSlot, handle + layer * 2, isIndexed, format);
|
||||
var meta = new TextureMeta()
|
||||
{
|
||||
AccurateType = accurateType,
|
||||
Type = type,
|
||||
UsageFlags = usageFlags
|
||||
};
|
||||
|
||||
if (dict.TryGetValue(info, out var existingMeta))
|
||||
{
|
||||
dict[info] = MergeTextureMeta(meta, existingMeta);
|
||||
}
|
||||
else
|
||||
{
|
||||
dict.Add(info, meta);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static TextureMeta MergeTextureMeta(TextureMeta meta, TextureMeta existingMeta)
|
||||
{
|
||||
meta.UsageFlags |= existingMeta.UsageFlags;
|
||||
|
||||
// If the texture we have has inaccurate type information, then
|
||||
// we prefer the most accurate one.
|
||||
if (existingMeta.AccurateType)
|
||||
{
|
||||
meta.AccurateType = true;
|
||||
meta.Type = existingMeta.Type;
|
||||
}
|
||||
|
||||
return meta;
|
||||
}
|
||||
|
||||
public BufferDescriptor[] GetConstantBufferDescriptors()
|
||||
{
|
||||
if (_cachedConstantBufferDescriptors != null)
|
||||
{
|
||||
return _cachedConstantBufferDescriptors;
|
||||
}
|
||||
|
||||
int usedMask = _usedConstantBuffers;
|
||||
|
||||
if (UsedFeatures.HasFlag(FeatureFlags.CbIndexing))
|
||||
{
|
||||
usedMask |= (int)GpuAccessor.QueryConstantBufferUse();
|
||||
}
|
||||
|
||||
return _cachedConstantBufferDescriptors = GetBufferDescriptors(
|
||||
usedMask,
|
||||
0,
|
||||
UsedFeatures.HasFlag(FeatureFlags.CbIndexing),
|
||||
out _firstConstantBufferBinding,
|
||||
GpuAccessor.QueryBindingConstantBuffer);
|
||||
}
|
||||
|
||||
public BufferDescriptor[] GetStorageBufferDescriptors()
|
||||
{
|
||||
if (_cachedStorageBufferDescriptors != null)
|
||||
{
|
||||
return _cachedStorageBufferDescriptors;
|
||||
}
|
||||
|
||||
return _cachedStorageBufferDescriptors = GetBufferDescriptors(
|
||||
_usedStorageBuffers,
|
||||
_usedStorageBuffersWrite,
|
||||
true,
|
||||
out _firstStorageBufferBinding,
|
||||
GpuAccessor.QueryBindingStorageBuffer);
|
||||
}
|
||||
|
||||
private static BufferDescriptor[] GetBufferDescriptors(
|
||||
int usedMask,
|
||||
int writtenMask,
|
||||
bool isArray,
|
||||
out int firstBinding,
|
||||
Func<int, int> getBindingCallback)
|
||||
{
|
||||
firstBinding = 0;
|
||||
bool hasFirstBinding = false;
|
||||
var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)];
|
||||
|
||||
int lastSlot = -1;
|
||||
|
||||
for (int i = 0; i < descriptors.Length; i++)
|
||||
{
|
||||
int slot = BitOperations.TrailingZeroCount(usedMask);
|
||||
|
||||
if (isArray)
|
||||
{
|
||||
// The next array entries also consumes bindings, even if they are unused.
|
||||
for (int j = lastSlot + 1; j < slot; j++)
|
||||
{
|
||||
int binding = getBindingCallback(j);
|
||||
|
||||
if (!hasFirstBinding)
|
||||
{
|
||||
firstBinding = binding;
|
||||
hasFirstBinding = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lastSlot = slot;
|
||||
|
||||
descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot);
|
||||
|
||||
if (!hasFirstBinding)
|
||||
{
|
||||
firstBinding = descriptors[i].Binding;
|
||||
hasFirstBinding = true;
|
||||
}
|
||||
|
||||
if ((writtenMask & (1 << slot)) != 0)
|
||||
{
|
||||
descriptors[i].SetFlag(BufferUsageFlags.Write);
|
||||
}
|
||||
|
||||
usedMask &= ~(1 << slot);
|
||||
}
|
||||
|
||||
return descriptors;
|
||||
}
|
||||
|
||||
public TextureDescriptor[] GetTextureDescriptors()
|
||||
{
|
||||
return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, GpuAccessor.QueryBindingTexture);
|
||||
}
|
||||
|
||||
public TextureDescriptor[] GetImageDescriptors()
|
||||
{
|
||||
return _cachedImageDescriptors ??= GetTextureOrImageDescriptors(_usedImages, GpuAccessor.QueryBindingImage);
|
||||
}
|
||||
|
||||
private static TextureDescriptor[] GetTextureOrImageDescriptors(Dictionary<TextureInfo, TextureMeta> dict, Func<int, bool, int> getBindingCallback)
|
||||
{
|
||||
var descriptors = new TextureDescriptor[dict.Count];
|
||||
|
||||
int i = 0;
|
||||
foreach (var kv in dict.OrderBy(x => x.Key.Indexed).OrderBy(x => x.Key.Handle))
|
||||
{
|
||||
var info = kv.Key;
|
||||
var meta = kv.Value;
|
||||
|
||||
bool isBuffer = (meta.Type & SamplerType.Mask) == SamplerType.TextureBuffer;
|
||||
int binding = getBindingCallback(i, isBuffer);
|
||||
|
||||
descriptors[i] = new TextureDescriptor(binding, meta.Type, info.Format, info.CbufSlot, info.Handle);
|
||||
descriptors[i].SetFlag(meta.UsageFlags);
|
||||
i++;
|
||||
}
|
||||
|
||||
return descriptors;
|
||||
}
|
||||
|
||||
public (TextureDescriptor, int) FindTextureDescriptor(AstTextureOperation texOp)
|
||||
{
|
||||
TextureDescriptor[] descriptors = GetTextureDescriptors();
|
||||
|
||||
for (int i = 0; i < descriptors.Length; i++)
|
||||
{
|
||||
var descriptor = descriptors[i];
|
||||
|
||||
if (descriptor.CbufSlot == texOp.CbufSlot &&
|
||||
descriptor.HandleIndex == texOp.Handle &&
|
||||
descriptor.Format == texOp.Format)
|
||||
{
|
||||
return (descriptor, i);
|
||||
}
|
||||
}
|
||||
|
||||
return (default, -1);
|
||||
}
|
||||
|
||||
private static int FindDescriptorIndex(TextureDescriptor[] array, AstTextureOperation texOp)
|
||||
{
|
||||
for (int i = 0; i < array.Length; i++)
|
||||
{
|
||||
var descriptor = array[i];
|
||||
|
||||
if (descriptor.Type == texOp.Type &&
|
||||
descriptor.CbufSlot == texOp.CbufSlot &&
|
||||
descriptor.HandleIndex == texOp.Handle &&
|
||||
descriptor.Format == texOp.Format)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
public int FindTextureDescriptorIndex(AstTextureOperation texOp)
|
||||
{
|
||||
return FindDescriptorIndex(GetTextureDescriptors(), texOp);
|
||||
}
|
||||
|
||||
public int FindImageDescriptorIndex(AstTextureOperation texOp)
|
||||
{
|
||||
return FindDescriptorIndex(GetImageDescriptors(), texOp);
|
||||
}
|
||||
|
||||
public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None)
|
||||
{
|
||||
return new ShaderProgramInfo(
|
||||
GetConstantBufferDescriptors(),
|
||||
GetStorageBufferDescriptors(),
|
||||
GetTextureDescriptors(),
|
||||
GetImageDescriptors(),
|
||||
identification,
|
||||
GpLayerInputAttribute,
|
||||
Stage,
|
||||
UsedFeatures.HasFlag(FeatureFlags.InstanceId),
|
||||
UsedFeatures.HasFlag(FeatureFlags.DrawParameters),
|
||||
UsedFeatures.HasFlag(FeatureFlags.RtLayer),
|
||||
ClipDistancesWritten,
|
||||
OmapTargets);
|
||||
}
|
||||
}
|
||||
}
|
158
src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs
Normal file
158
src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs
Normal file
|
@ -0,0 +1,158 @@
|
|||
using Ryujinx.Common.Utilities;
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
enum PixelImap
|
||||
{
|
||||
Unused = 0,
|
||||
Constant = 1,
|
||||
Perspective = 2,
|
||||
ScreenLinear = 3
|
||||
}
|
||||
|
||||
readonly struct ImapPixelType
|
||||
{
|
||||
public PixelImap X { get; }
|
||||
public PixelImap Y { get; }
|
||||
public PixelImap Z { get; }
|
||||
public PixelImap W { get; }
|
||||
|
||||
public ImapPixelType(PixelImap x, PixelImap y, PixelImap z, PixelImap w)
|
||||
{
|
||||
X = x;
|
||||
Y = y;
|
||||
Z = z;
|
||||
W = w;
|
||||
}
|
||||
|
||||
public PixelImap GetFirstUsedType()
|
||||
{
|
||||
if (X != PixelImap.Unused) return X;
|
||||
if (Y != PixelImap.Unused) return Y;
|
||||
if (Z != PixelImap.Unused) return Z;
|
||||
return W;
|
||||
}
|
||||
}
|
||||
|
||||
class ShaderHeader
|
||||
{
|
||||
public int SphType { get; }
|
||||
public int Version { get; }
|
||||
|
||||
public ShaderStage Stage { get; }
|
||||
|
||||
public bool MrtEnable { get; }
|
||||
|
||||
public bool KillsPixels { get; }
|
||||
|
||||
public bool DoesGlobalStore { get; }
|
||||
|
||||
public int SassVersion { get; }
|
||||
|
||||
public bool GpPassthrough { get; }
|
||||
|
||||
public bool DoesLoadOrStore { get; }
|
||||
public bool DoesFp64 { get; }
|
||||
|
||||
public int StreamOutMask { get; }
|
||||
|
||||
public int ShaderLocalMemoryLowSize { get; }
|
||||
|
||||
public int PerPatchAttributeCount { get; }
|
||||
|
||||
public int ShaderLocalMemoryHighSize { get; }
|
||||
|
||||
public int ThreadsPerInputPrimitive { get; }
|
||||
|
||||
public int ShaderLocalMemoryCrsSize { get; }
|
||||
|
||||
public OutputTopology OutputTopology { get; }
|
||||
|
||||
public int MaxOutputVertexCount { get; }
|
||||
|
||||
public int StoreReqStart { get; }
|
||||
public int StoreReqEnd { get; }
|
||||
|
||||
public ImapPixelType[] ImapTypes { get; }
|
||||
|
||||
public int OmapTargets { get; }
|
||||
public bool OmapSampleMask { get; }
|
||||
public bool OmapDepth { get; }
|
||||
|
||||
public ShaderHeader(IGpuAccessor gpuAccessor, ulong address)
|
||||
{
|
||||
ReadOnlySpan<int> header = MemoryMarshal.Cast<ulong, int>(gpuAccessor.GetCode(address, 0x50));
|
||||
|
||||
int commonWord0 = header[0];
|
||||
int commonWord1 = header[1];
|
||||
int commonWord2 = header[2];
|
||||
int commonWord3 = header[3];
|
||||
int commonWord4 = header[4];
|
||||
|
||||
SphType = commonWord0.Extract(0, 5);
|
||||
Version = commonWord0.Extract(5, 5);
|
||||
|
||||
Stage = (ShaderStage)commonWord0.Extract(10, 4);
|
||||
|
||||
// Invalid.
|
||||
if (Stage == ShaderStage.Compute)
|
||||
{
|
||||
Stage = ShaderStage.Vertex;
|
||||
}
|
||||
|
||||
MrtEnable = commonWord0.Extract(14);
|
||||
|
||||
KillsPixels = commonWord0.Extract(15);
|
||||
|
||||
DoesGlobalStore = commonWord0.Extract(16);
|
||||
|
||||
SassVersion = commonWord0.Extract(17, 4);
|
||||
|
||||
GpPassthrough = commonWord0.Extract(24);
|
||||
|
||||
DoesLoadOrStore = commonWord0.Extract(26);
|
||||
DoesFp64 = commonWord0.Extract(27);
|
||||
|
||||
StreamOutMask = commonWord0.Extract(28, 4);
|
||||
|
||||
ShaderLocalMemoryLowSize = commonWord1.Extract(0, 24);
|
||||
|
||||
PerPatchAttributeCount = commonWord1.Extract(24, 8);
|
||||
|
||||
ShaderLocalMemoryHighSize = commonWord2.Extract(0, 24);
|
||||
|
||||
ThreadsPerInputPrimitive = commonWord2.Extract(24, 8);
|
||||
|
||||
ShaderLocalMemoryCrsSize = commonWord3.Extract(0, 24);
|
||||
|
||||
OutputTopology = (OutputTopology)commonWord3.Extract(24, 4);
|
||||
|
||||
MaxOutputVertexCount = commonWord4.Extract(0, 12);
|
||||
|
||||
StoreReqStart = commonWord4.Extract(12, 8);
|
||||
StoreReqEnd = commonWord4.Extract(24, 8);
|
||||
|
||||
ImapTypes = new ImapPixelType[32];
|
||||
|
||||
for (int i = 0; i < 32; i++)
|
||||
{
|
||||
byte imap = (byte)(header[6 + (i >> 2)] >> ((i & 3) * 8));
|
||||
|
||||
ImapTypes[i] = new ImapPixelType(
|
||||
(PixelImap)((imap >> 0) & 3),
|
||||
(PixelImap)((imap >> 2) & 3),
|
||||
(PixelImap)((imap >> 4) & 3),
|
||||
(PixelImap)((imap >> 6) & 3));
|
||||
}
|
||||
|
||||
int type2OmapTarget = header[18];
|
||||
int type2Omap = header[19];
|
||||
|
||||
OmapTargets = type2OmapTarget;
|
||||
OmapSampleMask = type2Omap.Extract(0);
|
||||
OmapDepth = type2Omap.Extract(1);
|
||||
}
|
||||
}
|
||||
}
|
185
src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs
Normal file
185
src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs
Normal file
|
@ -0,0 +1,185 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
static class ShaderIdentifier
|
||||
{
|
||||
public static ShaderIdentification Identify(Function[] functions, ShaderConfig config)
|
||||
{
|
||||
if (config.Stage == ShaderStage.Geometry &&
|
||||
config.GpuAccessor.QueryPrimitiveTopology() == InputTopology.Triangles &&
|
||||
!config.GpuAccessor.QueryHostSupportsGeometryShader() &&
|
||||
IsLayerPassthroughGeometryShader(functions, out int layerInputAttr))
|
||||
{
|
||||
config.SetGeometryShaderLayerInputAttribute(layerInputAttr);
|
||||
|
||||
return ShaderIdentification.GeometryLayerPassthrough;
|
||||
}
|
||||
|
||||
return ShaderIdentification.None;
|
||||
}
|
||||
|
||||
private static bool IsLayerPassthroughGeometryShader(Function[] functions, out int layerInputAttr)
|
||||
{
|
||||
bool writesLayer = false;
|
||||
layerInputAttr = 0;
|
||||
|
||||
if (functions.Length != 1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
int verticesCount = 0;
|
||||
int totalVerticesCount = 0;
|
||||
|
||||
foreach (BasicBlock block in functions[0].Blocks)
|
||||
{
|
||||
// We are not expecting loops or any complex control flow here, so fail in those cases.
|
||||
if (block.Branch != null && block.Branch.Index <= block.Index)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
foreach (INode node in block.Operations)
|
||||
{
|
||||
if (!(node is Operation operation))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (IsResourceWrite(operation.Inst))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (operation.Inst == Instruction.Store && operation.StorageKind == StorageKind.Output)
|
||||
{
|
||||
Operand src = operation.GetSource(operation.SourcesCount - 1);
|
||||
Operation srcAttributeAsgOp = null;
|
||||
|
||||
if (src.Type == OperandType.LocalVariable &&
|
||||
src.AsgOp is Operation asgOp &&
|
||||
asgOp.Inst == Instruction.Load &&
|
||||
asgOp.StorageKind.IsInputOrOutput())
|
||||
{
|
||||
if (asgOp.StorageKind != StorageKind.Input)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
srcAttributeAsgOp = asgOp;
|
||||
}
|
||||
|
||||
if (srcAttributeAsgOp != null)
|
||||
{
|
||||
IoVariable dstAttribute = (IoVariable)operation.GetSource(0).Value;
|
||||
IoVariable srcAttribute = (IoVariable)srcAttributeAsgOp.GetSource(0).Value;
|
||||
|
||||
if (dstAttribute == IoVariable.Layer && srcAttribute == IoVariable.UserDefined)
|
||||
{
|
||||
if (srcAttributeAsgOp.SourcesCount != 4)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
writesLayer = true;
|
||||
layerInputAttr = srcAttributeAsgOp.GetSource(1).Value * 4 + srcAttributeAsgOp.GetSource(3).Value;;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dstAttribute != srcAttribute)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
int inputsCount = operation.SourcesCount - 2;
|
||||
|
||||
if (dstAttribute == IoVariable.UserDefined)
|
||||
{
|
||||
if (operation.GetSource(1).Value != srcAttributeAsgOp.GetSource(1).Value)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
inputsCount--;
|
||||
}
|
||||
|
||||
for (int i = 0; i < inputsCount; i++)
|
||||
{
|
||||
int dstIndex = operation.SourcesCount - 2 - i;
|
||||
int srcIndex = srcAttributeAsgOp.SourcesCount - 1 - i;
|
||||
|
||||
if ((dstIndex | srcIndex) < 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (operation.GetSource(dstIndex).Type != OperandType.Constant ||
|
||||
srcAttributeAsgOp.GetSource(srcIndex).Type != OperandType.Constant ||
|
||||
operation.GetSource(dstIndex).Value != srcAttributeAsgOp.GetSource(srcIndex).Value)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (src.Type == OperandType.Constant)
|
||||
{
|
||||
int dstComponent = operation.GetSource(operation.SourcesCount - 2).Value;
|
||||
float expectedValue = dstComponent == 3 ? 1f : 0f;
|
||||
|
||||
if (src.AsFloat() != expectedValue)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (operation.Inst == Instruction.EmitVertex)
|
||||
{
|
||||
verticesCount++;
|
||||
}
|
||||
else if (operation.Inst == Instruction.EndPrimitive)
|
||||
{
|
||||
totalVerticesCount += verticesCount;
|
||||
verticesCount = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return totalVerticesCount + verticesCount == 3 && writesLayer;
|
||||
}
|
||||
|
||||
private static bool IsResourceWrite(Instruction inst)
|
||||
{
|
||||
switch (inst)
|
||||
{
|
||||
case Instruction.AtomicAdd:
|
||||
case Instruction.AtomicAnd:
|
||||
case Instruction.AtomicCompareAndSwap:
|
||||
case Instruction.AtomicMaxS32:
|
||||
case Instruction.AtomicMaxU32:
|
||||
case Instruction.AtomicMinS32:
|
||||
case Instruction.AtomicMinU32:
|
||||
case Instruction.AtomicOr:
|
||||
case Instruction.AtomicSwap:
|
||||
case Instruction.AtomicXor:
|
||||
case Instruction.ImageAtomic:
|
||||
case Instruction.ImageStore:
|
||||
case Instruction.StoreGlobal:
|
||||
case Instruction.StoreGlobal16:
|
||||
case Instruction.StoreGlobal8:
|
||||
case Instruction.StoreStorage:
|
||||
case Instruction.StoreStorage16:
|
||||
case Instruction.StoreStorage8:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
376
src/Ryujinx.Graphics.Shader/Translation/Ssa.cs
Normal file
376
src/Ryujinx.Graphics.Shader/Translation/Ssa.cs
Normal file
|
@ -0,0 +1,376 @@
|
|||
using Ryujinx.Graphics.Shader.Decoders;
|
||||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
static class Ssa
|
||||
{
|
||||
private const int GprsAndPredsCount = RegisterConsts.GprsCount + RegisterConsts.PredsCount;
|
||||
|
||||
private class DefMap
|
||||
{
|
||||
private Dictionary<Register, Operand> _map;
|
||||
|
||||
private long[] _phiMasks;
|
||||
|
||||
public DefMap()
|
||||
{
|
||||
_map = new Dictionary<Register, Operand>();
|
||||
|
||||
_phiMasks = new long[(RegisterConsts.TotalCount + 63) / 64];
|
||||
}
|
||||
|
||||
public bool TryAddOperand(Register reg, Operand operand)
|
||||
{
|
||||
return _map.TryAdd(reg, operand);
|
||||
}
|
||||
|
||||
public bool TryGetOperand(Register reg, out Operand operand)
|
||||
{
|
||||
return _map.TryGetValue(reg, out operand);
|
||||
}
|
||||
|
||||
public bool AddPhi(Register reg)
|
||||
{
|
||||
int key = GetKeyFromRegister(reg);
|
||||
|
||||
int index = key / 64;
|
||||
int bit = key & 63;
|
||||
|
||||
long mask = 1L << bit;
|
||||
|
||||
if ((_phiMasks[index] & mask) != 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
_phiMasks[index] |= mask;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public bool HasPhi(Register reg)
|
||||
{
|
||||
int key = GetKeyFromRegister(reg);
|
||||
|
||||
int index = key / 64;
|
||||
int bit = key & 63;
|
||||
|
||||
return (_phiMasks[index] & (1L << bit)) != 0;
|
||||
}
|
||||
}
|
||||
|
||||
private class LocalDefMap
|
||||
{
|
||||
private Operand[] _map;
|
||||
private int[] _uses;
|
||||
public int UseCount { get; private set; }
|
||||
|
||||
public LocalDefMap()
|
||||
{
|
||||
_map = new Operand[RegisterConsts.TotalCount];
|
||||
_uses = new int[RegisterConsts.TotalCount];
|
||||
}
|
||||
|
||||
public Operand Get(int key)
|
||||
{
|
||||
return _map[key];
|
||||
}
|
||||
|
||||
public void Add(int key, Operand operand)
|
||||
{
|
||||
if (_map[key] == null)
|
||||
{
|
||||
_uses[UseCount++] = key;
|
||||
}
|
||||
|
||||
_map[key] = operand;
|
||||
}
|
||||
|
||||
public Operand GetUse(int index, out int key)
|
||||
{
|
||||
key = _uses[index];
|
||||
|
||||
return _map[key];
|
||||
}
|
||||
|
||||
public void Clear()
|
||||
{
|
||||
for (int i = 0; i < UseCount; i++)
|
||||
{
|
||||
_map[_uses[i]] = null;
|
||||
}
|
||||
|
||||
UseCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
private readonly struct Definition
|
||||
{
|
||||
public BasicBlock Block { get; }
|
||||
public Operand Local { get; }
|
||||
|
||||
public Definition(BasicBlock block, Operand local)
|
||||
{
|
||||
Block = block;
|
||||
Local = local;
|
||||
}
|
||||
}
|
||||
|
||||
public static void Rename(BasicBlock[] blocks)
|
||||
{
|
||||
DefMap[] globalDefs = new DefMap[blocks.Length];
|
||||
LocalDefMap localDefs = new LocalDefMap();
|
||||
|
||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||
{
|
||||
globalDefs[blkIndex] = new DefMap();
|
||||
}
|
||||
|
||||
Queue<BasicBlock> dfPhiBlocks = new Queue<BasicBlock>();
|
||||
|
||||
// First pass, get all defs and locals uses.
|
||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||
{
|
||||
Operand RenameLocal(Operand operand)
|
||||
{
|
||||
if (operand != null && operand.Type == OperandType.Register)
|
||||
{
|
||||
Operand local = localDefs.Get(GetKeyFromRegister(operand.GetRegister()));
|
||||
|
||||
operand = local ?? operand;
|
||||
}
|
||||
|
||||
return operand;
|
||||
}
|
||||
|
||||
BasicBlock block = blocks[blkIndex];
|
||||
|
||||
LinkedListNode<INode> node = block.Operations.First;
|
||||
|
||||
while (node != null)
|
||||
{
|
||||
if (node.Value is Operation operation)
|
||||
{
|
||||
for (int index = 0; index < operation.SourcesCount; index++)
|
||||
{
|
||||
operation.SetSource(index, RenameLocal(operation.GetSource(index)));
|
||||
}
|
||||
|
||||
for (int index = 0; index < operation.DestsCount; index++)
|
||||
{
|
||||
Operand dest = operation.GetDest(index);
|
||||
|
||||
if (dest != null && dest.Type == OperandType.Register)
|
||||
{
|
||||
Operand local = Local();
|
||||
|
||||
localDefs.Add(GetKeyFromRegister(dest.GetRegister()), local);
|
||||
|
||||
operation.SetDest(index, local);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node = node.Next;
|
||||
}
|
||||
|
||||
int localUses = localDefs.UseCount;
|
||||
for (int index = 0; index < localUses; index++)
|
||||
{
|
||||
Operand local = localDefs.GetUse(index, out int key);
|
||||
|
||||
Register reg = GetRegisterFromKey(key);
|
||||
|
||||
globalDefs[block.Index].TryAddOperand(reg, local);
|
||||
|
||||
dfPhiBlocks.Enqueue(block);
|
||||
|
||||
while (dfPhiBlocks.TryDequeue(out BasicBlock dfPhiBlock))
|
||||
{
|
||||
foreach (BasicBlock domFrontier in dfPhiBlock.DominanceFrontiers)
|
||||
{
|
||||
if (globalDefs[domFrontier.Index].AddPhi(reg))
|
||||
{
|
||||
dfPhiBlocks.Enqueue(domFrontier);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
localDefs.Clear();
|
||||
}
|
||||
|
||||
// Second pass, rename variables with definitions on different blocks.
|
||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||
{
|
||||
BasicBlock block = blocks[blkIndex];
|
||||
|
||||
Operand RenameGlobal(Operand operand)
|
||||
{
|
||||
if (operand != null && operand.Type == OperandType.Register)
|
||||
{
|
||||
int key = GetKeyFromRegister(operand.GetRegister());
|
||||
|
||||
Operand local = localDefs.Get(key);
|
||||
|
||||
if (local != null)
|
||||
{
|
||||
return local;
|
||||
}
|
||||
|
||||
operand = FindDefinitionForCurr(globalDefs, block, operand.GetRegister());
|
||||
|
||||
localDefs.Add(key, operand);
|
||||
}
|
||||
|
||||
return operand;
|
||||
}
|
||||
|
||||
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
|
||||
{
|
||||
if (node.Value is Operation operation)
|
||||
{
|
||||
for (int index = 0; index < operation.SourcesCount; index++)
|
||||
{
|
||||
operation.SetSource(index, RenameGlobal(operation.GetSource(index)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (blkIndex < blocks.Length - 1)
|
||||
{
|
||||
localDefs.Clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Operand FindDefinitionForCurr(DefMap[] globalDefs, BasicBlock current, Register reg)
|
||||
{
|
||||
if (globalDefs[current.Index].HasPhi(reg))
|
||||
{
|
||||
return InsertPhi(globalDefs, current, reg);
|
||||
}
|
||||
|
||||
if (current != current.ImmediateDominator)
|
||||
{
|
||||
return FindDefinition(globalDefs, current.ImmediateDominator, reg).Local;
|
||||
}
|
||||
|
||||
return Undef();
|
||||
}
|
||||
|
||||
private static Definition FindDefinition(DefMap[] globalDefs, BasicBlock current, Register reg)
|
||||
{
|
||||
foreach (BasicBlock block in SelfAndImmediateDominators(current))
|
||||
{
|
||||
DefMap defMap = globalDefs[block.Index];
|
||||
|
||||
if (defMap.TryGetOperand(reg, out Operand lastDef))
|
||||
{
|
||||
return new Definition(block, lastDef);
|
||||
}
|
||||
|
||||
if (defMap.HasPhi(reg))
|
||||
{
|
||||
return new Definition(block, InsertPhi(globalDefs, block, reg));
|
||||
}
|
||||
}
|
||||
|
||||
return new Definition(current, Undef());
|
||||
}
|
||||
|
||||
private static IEnumerable<BasicBlock> SelfAndImmediateDominators(BasicBlock block)
|
||||
{
|
||||
while (block != block.ImmediateDominator)
|
||||
{
|
||||
yield return block;
|
||||
|
||||
block = block.ImmediateDominator;
|
||||
}
|
||||
|
||||
yield return block;
|
||||
}
|
||||
|
||||
private static Operand InsertPhi(DefMap[] globalDefs, BasicBlock block, Register reg)
|
||||
{
|
||||
// This block has a Phi that has not been materialized yet, but that
|
||||
// would define a new version of the variable we're looking for. We need
|
||||
// to materialize the Phi, add all the block/operand pairs into the Phi, and
|
||||
// then use the definition from that Phi.
|
||||
Operand local = Local();
|
||||
|
||||
PhiNode phi = new PhiNode(local);
|
||||
|
||||
AddPhi(block, phi);
|
||||
|
||||
globalDefs[block.Index].TryAddOperand(reg, local);
|
||||
|
||||
foreach (BasicBlock predecessor in block.Predecessors)
|
||||
{
|
||||
Definition def = FindDefinition(globalDefs, predecessor, reg);
|
||||
|
||||
phi.AddSource(def.Block, def.Local);
|
||||
}
|
||||
|
||||
return local;
|
||||
}
|
||||
|
||||
private static void AddPhi(BasicBlock block, PhiNode phi)
|
||||
{
|
||||
LinkedListNode<INode> node = block.Operations.First;
|
||||
|
||||
if (node != null)
|
||||
{
|
||||
while (node.Next?.Value is PhiNode)
|
||||
{
|
||||
node = node.Next;
|
||||
}
|
||||
}
|
||||
|
||||
if (node?.Value is PhiNode)
|
||||
{
|
||||
block.Operations.AddAfter(node, phi);
|
||||
}
|
||||
else
|
||||
{
|
||||
block.Operations.AddFirst(phi);
|
||||
}
|
||||
}
|
||||
|
||||
private static int GetKeyFromRegister(Register reg)
|
||||
{
|
||||
if (reg.Type == RegisterType.Gpr)
|
||||
{
|
||||
return reg.Index;
|
||||
}
|
||||
else if (reg.Type == RegisterType.Predicate)
|
||||
{
|
||||
return RegisterConsts.GprsCount + reg.Index;
|
||||
}
|
||||
else /* if (reg.Type == RegisterType.Flag) */
|
||||
{
|
||||
return GprsAndPredsCount + reg.Index;
|
||||
}
|
||||
}
|
||||
|
||||
private static Register GetRegisterFromKey(int key)
|
||||
{
|
||||
if (key < RegisterConsts.GprsCount)
|
||||
{
|
||||
return new Register(key, RegisterType.Gpr);
|
||||
}
|
||||
else if (key < GprsAndPredsCount)
|
||||
{
|
||||
return new Register(key - RegisterConsts.GprsCount, RegisterType.Predicate);
|
||||
}
|
||||
else /* if (key < RegisterConsts.TotalCount) */
|
||||
{
|
||||
return new Register(key - GprsAndPredsCount, RegisterType.Flag);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
8
src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs
Normal file
8
src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs
Normal file
|
@ -0,0 +1,8 @@
|
|||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
public enum TargetApi
|
||||
{
|
||||
OpenGL,
|
||||
Vulkan
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
public enum TargetLanguage
|
||||
{
|
||||
Glsl,
|
||||
Spirv,
|
||||
Arb
|
||||
}
|
||||
}
|
14
src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs
Normal file
14
src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs
Normal file
|
@ -0,0 +1,14 @@
|
|||
using System;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
[Flags]
|
||||
public enum TranslationFlags
|
||||
{
|
||||
None = 0,
|
||||
|
||||
VertexA = 1 << 0,
|
||||
Compute = 1 << 1,
|
||||
DebugMode = 1 << 2
|
||||
}
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
public readonly struct TranslationOptions
|
||||
{
|
||||
public TargetLanguage TargetLanguage { get; }
|
||||
public TargetApi TargetApi { get; }
|
||||
public TranslationFlags Flags { get; }
|
||||
|
||||
public TranslationOptions(TargetLanguage targetLanguage, TargetApi targetApi, TranslationFlags flags)
|
||||
{
|
||||
TargetLanguage = targetLanguage;
|
||||
TargetApi = targetApi;
|
||||
Flags = flags;
|
||||
}
|
||||
}
|
||||
}
|
362
src/Ryujinx.Graphics.Shader/Translation/Translator.cs
Normal file
362
src/Ryujinx.Graphics.Shader/Translation/Translator.cs
Normal file
|
@ -0,0 +1,362 @@
|
|||
using Ryujinx.Graphics.Shader.CodeGen.Glsl;
|
||||
using Ryujinx.Graphics.Shader.CodeGen.Spirv;
|
||||
using Ryujinx.Graphics.Shader.Decoders;
|
||||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using Ryujinx.Graphics.Shader.StructuredIr;
|
||||
using Ryujinx.Graphics.Shader.Translation.Optimizations;
|
||||
using System;
|
||||
using System.Linq;
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
public static class Translator
|
||||
{
|
||||
private const int HeaderSize = 0x50;
|
||||
|
||||
internal readonly struct FunctionCode
|
||||
{
|
||||
public Operation[] Code { get; }
|
||||
|
||||
public FunctionCode(Operation[] code)
|
||||
{
|
||||
Code = code;
|
||||
}
|
||||
}
|
||||
|
||||
public static TranslatorContext CreateContext(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options)
|
||||
{
|
||||
return DecodeShader(address, gpuAccessor, options);
|
||||
}
|
||||
|
||||
internal static ShaderProgram Translate(FunctionCode[] functions, ShaderConfig config)
|
||||
{
|
||||
var cfgs = new ControlFlowGraph[functions.Length];
|
||||
var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length];
|
||||
|
||||
for (int i = 0; i < functions.Length; i++)
|
||||
{
|
||||
cfgs[i] = ControlFlowGraph.Create(functions[i].Code);
|
||||
|
||||
if (i != 0)
|
||||
{
|
||||
frus[i] = RegisterUsage.RunPass(cfgs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
Function[] funcs = new Function[functions.Length];
|
||||
|
||||
for (int i = 0; i < functions.Length; i++)
|
||||
{
|
||||
var cfg = cfgs[i];
|
||||
|
||||
int inArgumentsCount = 0;
|
||||
int outArgumentsCount = 0;
|
||||
|
||||
if (i != 0)
|
||||
{
|
||||
var fru = frus[i];
|
||||
|
||||
inArgumentsCount = fru.InArguments.Length;
|
||||
outArgumentsCount = fru.OutArguments.Length;
|
||||
}
|
||||
|
||||
if (cfg.Blocks.Length != 0)
|
||||
{
|
||||
RegisterUsage.FixupCalls(cfg.Blocks, frus);
|
||||
|
||||
Dominance.FindDominators(cfg);
|
||||
Dominance.FindDominanceFrontiers(cfg.Blocks);
|
||||
|
||||
Ssa.Rename(cfg.Blocks);
|
||||
|
||||
Optimizer.RunPass(cfg.Blocks, config);
|
||||
Rewriter.RunPass(cfg.Blocks, config);
|
||||
}
|
||||
|
||||
funcs[i] = new Function(cfg.Blocks, $"fun{i}", false, inArgumentsCount, outArgumentsCount);
|
||||
}
|
||||
|
||||
var identification = ShaderIdentifier.Identify(funcs, config);
|
||||
|
||||
var sInfo = StructuredProgram.MakeStructuredProgram(funcs, config);
|
||||
|
||||
var info = config.CreateProgramInfo(identification);
|
||||
|
||||
return config.Options.TargetLanguage switch
|
||||
{
|
||||
TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)),
|
||||
TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)),
|
||||
_ => throw new NotImplementedException(config.Options.TargetLanguage.ToString())
|
||||
};
|
||||
}
|
||||
|
||||
private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options)
|
||||
{
|
||||
ShaderConfig config;
|
||||
DecodedProgram program;
|
||||
ulong maxEndAddress = 0;
|
||||
|
||||
if (options.Flags.HasFlag(TranslationFlags.Compute))
|
||||
{
|
||||
config = new ShaderConfig(gpuAccessor, options);
|
||||
|
||||
program = Decoder.Decode(config, address);
|
||||
}
|
||||
else
|
||||
{
|
||||
config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options);
|
||||
|
||||
program = Decoder.Decode(config, address + HeaderSize);
|
||||
}
|
||||
|
||||
foreach (DecodedFunction function in program)
|
||||
{
|
||||
foreach (Block block in function.Blocks)
|
||||
{
|
||||
if (maxEndAddress < block.EndAddress)
|
||||
{
|
||||
maxEndAddress = block.EndAddress;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
config.SizeAdd((int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize));
|
||||
|
||||
return new TranslatorContext(address, program, config);
|
||||
}
|
||||
|
||||
internal static FunctionCode[] EmitShader(DecodedProgram program, ShaderConfig config, bool initializeOutputs, out int initializationOperations)
|
||||
{
|
||||
initializationOperations = 0;
|
||||
|
||||
FunctionMatch.RunPass(program);
|
||||
|
||||
foreach (DecodedFunction function in program.OrderBy(x => x.Address).Where(x => !x.IsCompilerGenerated))
|
||||
{
|
||||
program.AddFunctionAndSetId(function);
|
||||
}
|
||||
|
||||
FunctionCode[] functions = new FunctionCode[program.FunctionsWithIdCount];
|
||||
|
||||
for (int index = 0; index < functions.Length; index++)
|
||||
{
|
||||
EmitterContext context = new EmitterContext(program, config, index != 0);
|
||||
|
||||
if (initializeOutputs && index == 0)
|
||||
{
|
||||
EmitOutputsInitialization(context, config);
|
||||
initializationOperations = context.OperationsCount;
|
||||
}
|
||||
|
||||
DecodedFunction function = program.GetFunctionById(index);
|
||||
|
||||
foreach (Block block in function.Blocks)
|
||||
{
|
||||
context.CurrBlock = block;
|
||||
|
||||
context.EnterBlock(block.Address);
|
||||
|
||||
EmitOps(context, block);
|
||||
}
|
||||
|
||||
functions[index] = new FunctionCode(context.GetOperations());
|
||||
}
|
||||
|
||||
return functions;
|
||||
}
|
||||
|
||||
private static void EmitOutputsInitialization(EmitterContext context, ShaderConfig config)
|
||||
{
|
||||
// Compute has no output attributes, and fragment is the last stage, so we
|
||||
// don't need to initialize outputs on those stages.
|
||||
if (config.Stage == ShaderStage.Compute || config.Stage == ShaderStage.Fragment)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.Stage == ShaderStage.Vertex)
|
||||
{
|
||||
InitializePositionOutput(context);
|
||||
}
|
||||
|
||||
UInt128 usedAttributes = context.Config.NextInputAttributesComponents;
|
||||
while (usedAttributes != UInt128.Zero)
|
||||
{
|
||||
int index = (int)UInt128.TrailingZeroCount(usedAttributes);
|
||||
int vecIndex = index / 4;
|
||||
|
||||
usedAttributes &= ~(UInt128.One << index);
|
||||
|
||||
// We don't need to initialize passthrough attributes.
|
||||
if ((context.Config.PassthroughAttributes & (1 << vecIndex)) != 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
InitializeOutputComponent(context, vecIndex, index & 3, perPatch: false);
|
||||
}
|
||||
|
||||
if (context.Config.NextUsedInputAttributesPerPatch != null)
|
||||
{
|
||||
foreach (int vecIndex in context.Config.NextUsedInputAttributesPerPatch.Order())
|
||||
{
|
||||
InitializeOutput(context, vecIndex, perPatch: true);
|
||||
}
|
||||
}
|
||||
|
||||
if (config.NextUsesFixedFuncAttributes)
|
||||
{
|
||||
bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation();
|
||||
int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1;
|
||||
|
||||
for (int i = fixedStartAttr; i < fixedStartAttr + 5 + AttributeConsts.TexCoordCount; i++)
|
||||
{
|
||||
int index = config.GetFreeUserAttribute(isOutput: true, i);
|
||||
if (index < 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
InitializeOutput(context, index, perPatch: false);
|
||||
|
||||
config.SetOutputUserAttributeFixedFunc(index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void InitializePositionOutput(EmitterContext context)
|
||||
{
|
||||
for (int c = 0; c < 4; c++)
|
||||
{
|
||||
context.Store(StorageKind.Output, IoVariable.Position, null, Const(c), ConstF(c == 3 ? 1f : 0f));
|
||||
}
|
||||
}
|
||||
|
||||
private static void InitializeOutput(EmitterContext context, int location, bool perPatch)
|
||||
{
|
||||
for (int c = 0; c < 4; c++)
|
||||
{
|
||||
InitializeOutputComponent(context, location, c, perPatch);
|
||||
}
|
||||
}
|
||||
|
||||
private static void InitializeOutputComponent(EmitterContext context, int location, int c, bool perPatch)
|
||||
{
|
||||
StorageKind storageKind = perPatch ? StorageKind.OutputPerPatch : StorageKind.Output;
|
||||
|
||||
if (context.Config.UsedFeatures.HasFlag(FeatureFlags.OaIndexing))
|
||||
{
|
||||
Operand invocationId = null;
|
||||
|
||||
if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch)
|
||||
{
|
||||
invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
|
||||
}
|
||||
|
||||
int index = location * 4 + c;
|
||||
|
||||
context.Store(storageKind, IoVariable.UserDefined, invocationId, Const(index), ConstF(c == 3 ? 1f : 0f));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch)
|
||||
{
|
||||
Operand invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
|
||||
context.Store(storageKind, IoVariable.UserDefined, Const(location), invocationId, Const(c), ConstF(c == 3 ? 1f : 0f));
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Store(storageKind, IoVariable.UserDefined, null, Const(location), Const(c), ConstF(c == 3 ? 1f : 0f));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitOps(EmitterContext context, Block block)
|
||||
{
|
||||
for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++)
|
||||
{
|
||||
InstOp op = block.OpCodes[opIndex];
|
||||
|
||||
if (context.Config.Options.Flags.HasFlag(TranslationFlags.DebugMode))
|
||||
{
|
||||
string instName;
|
||||
|
||||
if (op.Emitter != null)
|
||||
{
|
||||
instName = op.Name.ToString();
|
||||
}
|
||||
else
|
||||
{
|
||||
instName = "???";
|
||||
|
||||
context.Config.GpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16}).");
|
||||
}
|
||||
|
||||
string dbgComment = $"0x{op.Address:X6}: 0x{op.RawOpCode:X16} {instName}";
|
||||
|
||||
context.Add(new CommentNode(dbgComment));
|
||||
}
|
||||
|
||||
InstConditional opConditional = new InstConditional(op.RawOpCode);
|
||||
|
||||
bool noPred = op.Props.HasFlag(InstProps.NoPred);
|
||||
if (!noPred && opConditional.Pred == RegisterConsts.PredicateTrueIndex && opConditional.PredInv)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
Operand predSkipLbl = null;
|
||||
|
||||
if (Decoder.IsPopBranch(op.Name))
|
||||
{
|
||||
// If the instruction is a SYNC or BRK instruction with only one
|
||||
// possible target address, then the instruction is basically
|
||||
// just a simple branch, we can generate code similar to branch
|
||||
// instructions, with the condition check on the branch itself.
|
||||
noPred = block.SyncTargets.Count <= 1;
|
||||
}
|
||||
else if (op.Name == InstName.Bra)
|
||||
{
|
||||
noPred = true;
|
||||
}
|
||||
|
||||
if (!(opConditional.Pred == RegisterConsts.PredicateTrueIndex || noPred))
|
||||
{
|
||||
Operand label;
|
||||
|
||||
if (opIndex == block.OpCodes.Count - 1 && block.HasNext())
|
||||
{
|
||||
label = context.GetLabel(block.Successors[0].Address);
|
||||
}
|
||||
else
|
||||
{
|
||||
label = Label();
|
||||
|
||||
predSkipLbl = label;
|
||||
}
|
||||
|
||||
Operand pred = Register(opConditional.Pred, RegisterType.Predicate);
|
||||
|
||||
if (opConditional.PredInv)
|
||||
{
|
||||
context.BranchIfTrue(label, pred);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.BranchIfFalse(label, pred);
|
||||
}
|
||||
}
|
||||
|
||||
context.CurrOp = op;
|
||||
|
||||
op.Emitter?.Invoke(context);
|
||||
|
||||
if (predSkipLbl != null)
|
||||
{
|
||||
context.MarkLabel(predSkipLbl);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
255
src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs
Normal file
255
src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs
Normal file
|
@ -0,0 +1,255 @@
|
|||
using Ryujinx.Graphics.Shader.CodeGen.Glsl;
|
||||
using Ryujinx.Graphics.Shader.CodeGen.Spirv;
|
||||
using Ryujinx.Graphics.Shader.Decoders;
|
||||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using Ryujinx.Graphics.Shader.StructuredIr;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Numerics;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
using static Ryujinx.Graphics.Shader.Translation.Translator;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
public class TranslatorContext
|
||||
{
|
||||
private readonly DecodedProgram _program;
|
||||
private ShaderConfig _config;
|
||||
|
||||
public ulong Address { get; }
|
||||
|
||||
public ShaderStage Stage => _config.Stage;
|
||||
public int Size => _config.Size;
|
||||
public int Cb1DataSize => _config.Cb1DataSize;
|
||||
public bool LayerOutputWritten => _config.LayerOutputWritten;
|
||||
|
||||
public IGpuAccessor GpuAccessor => _config.GpuAccessor;
|
||||
|
||||
internal TranslatorContext(ulong address, DecodedProgram program, ShaderConfig config)
|
||||
{
|
||||
Address = address;
|
||||
_program = program;
|
||||
_config = config;
|
||||
}
|
||||
|
||||
private static bool IsLoadUserDefined(Operation operation)
|
||||
{
|
||||
// TODO: Check if sources count match and all sources are constant.
|
||||
return operation.Inst == Instruction.Load && (IoVariable)operation.GetSource(0).Value == IoVariable.UserDefined;
|
||||
}
|
||||
|
||||
private static bool IsStoreUserDefined(Operation operation)
|
||||
{
|
||||
// TODO: Check if sources count match and all sources are constant.
|
||||
return operation.Inst == Instruction.Store && (IoVariable)operation.GetSource(0).Value == IoVariable.UserDefined;
|
||||
}
|
||||
|
||||
private static FunctionCode[] Combine(FunctionCode[] a, FunctionCode[] b, int aStart)
|
||||
{
|
||||
// Here we combine two shaders.
|
||||
// For shader A:
|
||||
// - All user attribute stores on shader A are turned into copies to a
|
||||
// temporary variable. It's assumed that shader B will consume them.
|
||||
// - All return instructions are turned into branch instructions, the
|
||||
// branch target being the start of the shader B code.
|
||||
// For shader B:
|
||||
// - All user attribute loads on shader B are turned into copies from a
|
||||
// temporary variable, as long that attribute is written by shader A.
|
||||
FunctionCode[] output = new FunctionCode[a.Length + b.Length - 1];
|
||||
|
||||
List<Operation> ops = new List<Operation>(a.Length + b.Length);
|
||||
|
||||
Operand[] temps = new Operand[AttributeConsts.UserAttributesCount * 4];
|
||||
|
||||
Operand lblB = Label();
|
||||
|
||||
for (int index = aStart; index < a[0].Code.Length; index++)
|
||||
{
|
||||
Operation operation = a[0].Code[index];
|
||||
|
||||
if (IsStoreUserDefined(operation))
|
||||
{
|
||||
int tIndex = operation.GetSource(1).Value * 4 + operation.GetSource(2).Value;
|
||||
|
||||
Operand temp = temps[tIndex];
|
||||
|
||||
if (temp == null)
|
||||
{
|
||||
temp = Local();
|
||||
|
||||
temps[tIndex] = temp;
|
||||
}
|
||||
|
||||
operation.Dest = temp;
|
||||
operation.TurnIntoCopy(operation.GetSource(operation.SourcesCount - 1));
|
||||
}
|
||||
|
||||
if (operation.Inst == Instruction.Return)
|
||||
{
|
||||
ops.Add(new Operation(Instruction.Branch, lblB));
|
||||
}
|
||||
else
|
||||
{
|
||||
ops.Add(operation);
|
||||
}
|
||||
}
|
||||
|
||||
ops.Add(new Operation(Instruction.MarkLabel, lblB));
|
||||
|
||||
for (int index = 0; index < b[0].Code.Length; index++)
|
||||
{
|
||||
Operation operation = b[0].Code[index];
|
||||
|
||||
if (IsLoadUserDefined(operation))
|
||||
{
|
||||
int tIndex = operation.GetSource(1).Value * 4 + operation.GetSource(2).Value;
|
||||
|
||||
Operand temp = temps[tIndex];
|
||||
|
||||
if (temp != null)
|
||||
{
|
||||
operation.TurnIntoCopy(temp);
|
||||
}
|
||||
}
|
||||
|
||||
ops.Add(operation);
|
||||
}
|
||||
|
||||
output[0] = new FunctionCode(ops.ToArray());
|
||||
|
||||
for (int i = 1; i < a.Length; i++)
|
||||
{
|
||||
output[i] = a[i];
|
||||
}
|
||||
|
||||
for (int i = 1; i < b.Length; i++)
|
||||
{
|
||||
output[a.Length + i - 1] = b[i];
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
public void SetNextStage(TranslatorContext nextStage)
|
||||
{
|
||||
_config.MergeFromtNextStage(nextStage._config);
|
||||
}
|
||||
|
||||
public void SetGeometryShaderLayerInputAttribute(int attr)
|
||||
{
|
||||
_config.SetGeometryShaderLayerInputAttribute(attr);
|
||||
}
|
||||
|
||||
public void SetLastInVertexPipeline()
|
||||
{
|
||||
_config.SetLastInVertexPipeline();
|
||||
}
|
||||
|
||||
public ShaderProgram Translate(TranslatorContext other = null)
|
||||
{
|
||||
FunctionCode[] code = EmitShader(_program, _config, initializeOutputs: other == null, out _);
|
||||
|
||||
if (other != null)
|
||||
{
|
||||
other._config.MergeOutputUserAttributes(_config.UsedOutputAttributes, Enumerable.Empty<int>());
|
||||
|
||||
FunctionCode[] otherCode = EmitShader(other._program, other._config, initializeOutputs: true, out int aStart);
|
||||
|
||||
code = Combine(otherCode, code, aStart);
|
||||
|
||||
_config.InheritFrom(other._config);
|
||||
}
|
||||
|
||||
return Translator.Translate(code, _config);
|
||||
}
|
||||
|
||||
public ShaderProgram GenerateGeometryPassthrough()
|
||||
{
|
||||
int outputAttributesMask = _config.UsedOutputAttributes;
|
||||
int layerOutputAttr = _config.LayerOutputAttribute;
|
||||
|
||||
OutputTopology outputTopology;
|
||||
int maxOutputVertices;
|
||||
|
||||
switch (GpuAccessor.QueryPrimitiveTopology())
|
||||
{
|
||||
case InputTopology.Points:
|
||||
outputTopology = OutputTopology.PointList;
|
||||
maxOutputVertices = 1;
|
||||
break;
|
||||
case InputTopology.Lines:
|
||||
case InputTopology.LinesAdjacency:
|
||||
outputTopology = OutputTopology.LineStrip;
|
||||
maxOutputVertices = 2;
|
||||
break;
|
||||
default:
|
||||
outputTopology = OutputTopology.TriangleStrip;
|
||||
maxOutputVertices = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
ShaderConfig config = new ShaderConfig(ShaderStage.Geometry, outputTopology, maxOutputVertices, GpuAccessor, _config.Options);
|
||||
|
||||
EmitterContext context = new EmitterContext(default, config, false);
|
||||
|
||||
for (int v = 0; v < maxOutputVertices; v++)
|
||||
{
|
||||
int outAttrsMask = outputAttributesMask;
|
||||
|
||||
while (outAttrsMask != 0)
|
||||
{
|
||||
int attrIndex = BitOperations.TrailingZeroCount(outAttrsMask);
|
||||
|
||||
outAttrsMask &= ~(1 << attrIndex);
|
||||
|
||||
for (int c = 0; c < 4; c++)
|
||||
{
|
||||
int attr = AttributeConsts.UserAttributeBase + attrIndex * 16 + c * 4;
|
||||
|
||||
Operand value = context.Load(StorageKind.Input, IoVariable.UserDefined, Const(attrIndex), Const(v), Const(c));
|
||||
|
||||
if (attr == layerOutputAttr)
|
||||
{
|
||||
context.Store(StorageKind.Output, IoVariable.Layer, null, value);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(attrIndex), Const(c), value);
|
||||
config.SetOutputUserAttribute(attrIndex);
|
||||
}
|
||||
|
||||
config.SetInputUserAttribute(attrIndex, c);
|
||||
}
|
||||
}
|
||||
|
||||
for (int c = 0; c < 4; c++)
|
||||
{
|
||||
Operand value = context.Load(StorageKind.Input, IoVariable.Position, Const(v), Const(c));
|
||||
|
||||
context.Store(StorageKind.Output, IoVariable.Position, null, Const(c), value);
|
||||
}
|
||||
|
||||
context.EmitVertex();
|
||||
}
|
||||
|
||||
context.EndPrimitive();
|
||||
|
||||
var operations = context.GetOperations();
|
||||
var cfg = ControlFlowGraph.Create(operations);
|
||||
var function = new Function(cfg.Blocks, "main", false, 0, 0);
|
||||
|
||||
var sInfo = StructuredProgram.MakeStructuredProgram(new[] { function }, config);
|
||||
|
||||
var info = config.CreateProgramInfo();
|
||||
|
||||
return config.Options.TargetLanguage switch
|
||||
{
|
||||
TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)),
|
||||
TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)),
|
||||
_ => throw new NotImplementedException(config.Options.TargetLanguage.ToString())
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue