Implement Shader Instructions SUATOM and SURED (#2090)

* Initial Implementation

* Further improvements (no support for float/64-bit types)

* Merge atomic and reduce instructions, add missing format switch

* Fix rebase issues.

* Not used.

* Whoops. Fixed.

* Partial implementation of inc/dec, cleanup and TODOs

* Remove testing path

* Address Feedback
This commit is contained in:
riperiperi 2021-08-31 06:51:57 +01:00 committed by GitHub
parent 416dc8fde4
commit 142cededd4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 510 additions and 18 deletions

View file

@ -277,6 +277,249 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Add(operation);
}
public static void Sured(EmitterContext context)
{
OpCodeSured op = (OpCodeSured)context.CurrOp;
SamplerType type = ConvertSamplerType(op.Dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image reduction sampler type.");
return;
}
int raIndex = op.Ra.Index;
int rbIndex = op.Rb.Index;
Operand Ra()
{
if (raIndex > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(raIndex++, RegisterType.Gpr));
}
Operand Rb()
{
if (rbIndex > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(rbIndex++, RegisterType.Gpr));
}
List<Operand> sourcesList = new List<Operand>();
if (op.IsBindless)
{
sourcesList.Add(context.Copy(Register(op.Rc)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
type |= SamplerType.Array;
}
TextureFormat format = TextureFormat.R32Sint;
if (op.UseType)
{
if (op.ByteAddress)
{
int xIndex = op.IsBindless ? 1 : 0;
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(op.Type)));
}
// TODO: FP and 64-bit formats.
format = (op.Type == ReductionType.SD32 || op.Type == ReductionType.SD64) ?
context.Config.GetTextureFormatAtomic(op.HandleOffset) :
GetTextureFormat(op.Type);
}
else if (!op.IsBindless)
{
format = context.Config.GetTextureFormatAtomic(op.HandleOffset);
}
sourcesList.Add(Rb());
Operand[] sources = sourcesList.ToArray();
int handle = op.HandleOffset;
TextureFlags flags = GetAtomicOpFlags(op.AtomicOp);
if (op.IsBindless)
{
handle = 0;
flags |= TextureFlags.Bindless;
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageAtomic,
type,
format,
flags,
handle,
0,
null,
sources);
context.Add(operation);
}
public static void Suatom(EmitterContext context)
{
OpCodeSuatom op = (OpCodeSuatom)context.CurrOp;
SamplerType type = ConvertSamplerType(op.Dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image atomic sampler type.");
return;
}
int raIndex = op.Ra.Index;
int rbIndex = op.Rb.Index;
Operand Ra()
{
if (raIndex > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(raIndex++, RegisterType.Gpr));
}
Operand Rb()
{
if (rbIndex > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(rbIndex++, RegisterType.Gpr));
}
int rdIndex = op.Rd.Index;
Operand GetDest()
{
if (rdIndex > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return Register(rdIndex++, RegisterType.Gpr);
}
List<Operand> sourcesList = new List<Operand>();
if (op.IsBindless)
{
sourcesList.Add(context.Copy(Register(op.Rc)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
type |= SamplerType.Array;
}
TextureFormat format = TextureFormat.R32Sint;
if (op.UseType)
{
if (op.ByteAddress)
{
int xIndex = op.IsBindless ? 1 : 0;
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(op.Type)));
}
// TODO: FP and 64-bit formats.
format = (op.Type == ReductionType.SD32 || op.Type == ReductionType.SD64) ?
context.Config.GetTextureFormatAtomic(op.HandleOffset) :
GetTextureFormat(op.Type);
}
else if (!op.IsBindless)
{
format = context.Config.GetTextureFormatAtomic(op.HandleOffset);
}
if (op.CompareAndSwap)
{
sourcesList.Add(Rb());
}
sourcesList.Add(Rb());
Operand[] sources = sourcesList.ToArray();
int handle = op.HandleOffset;
TextureFlags flags = op.CompareAndSwap ? TextureFlags.CAS : GetAtomicOpFlags(op.AtomicOp);
if (op.IsBindless)
{
handle = 0;
flags |= TextureFlags.Bindless;
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageAtomic,
type,
format,
flags,
handle,
0,
GetDest(),
sources);
context.Add(operation);
}
public static void Tex(EmitterContext context)
{
EmitTextureSample(context, TextureFlags.None);
@ -1332,6 +1575,55 @@ namespace Ryujinx.Graphics.Shader.Instructions
};
}
private static int GetComponentSizeInBytesLog2(ReductionType type)
{
return type switch
{
ReductionType.U32 => 2,
ReductionType.S32 => 2,
ReductionType.U64 => 3,
ReductionType.FP32FtzRn => 2,
ReductionType.FP16x2FtzRn => 2,
ReductionType.S64 => 3,
ReductionType.SD32 => 2,
ReductionType.SD64 => 3,
_ => 2
};
}
private static TextureFormat GetTextureFormat(ReductionType type)
{
return type switch
{
ReductionType.U32 => TextureFormat.R32Uint,
ReductionType.S32 => TextureFormat.R32Sint,
ReductionType.U64 => TextureFormat.R32G32Uint,
ReductionType.FP32FtzRn => TextureFormat.R32Float,
ReductionType.FP16x2FtzRn => TextureFormat.R16G16Float,
ReductionType.S64 => TextureFormat.R32G32Uint,
ReductionType.SD32 => TextureFormat.R32Uint,
ReductionType.SD64 => TextureFormat.R32G32Uint,
_ => TextureFormat.R32Uint
};
}
private static TextureFlags GetAtomicOpFlags(AtomicOp op)
{
return op switch
{
AtomicOp.Add => TextureFlags.Add,
AtomicOp.Minimum => TextureFlags.Minimum,
AtomicOp.Maximum => TextureFlags.Maximum,
AtomicOp.Increment => TextureFlags.Increment,
AtomicOp.Decrement => TextureFlags.Decrement,
AtomicOp.BitwiseAnd => TextureFlags.BitwiseAnd,
AtomicOp.BitwiseOr => TextureFlags.BitwiseOr,
AtomicOp.BitwiseExclusiveOr => TextureFlags.BitwiseXor,
AtomicOp.Swap => TextureFlags.Swap,
_ => TextureFlags.Add
};
}
private static SamplerType ConvertSamplerType(ImageDimensions target)
{
return target switch