Implement Shader Instructions SUATOM and SURED (#2090)
* Initial Implementation * Further improvements (no support for float/64-bit types) * Merge atomic and reduce instructions, add missing format switch * Fix rebase issues. * Not used. * Whoops. Fixed. * Partial implementation of inc/dec, cleanup and TODOs * Remove testing path * Address Feedback
This commit is contained in:
parent
416dc8fde4
commit
142cededd4
16 changed files with 510 additions and 18 deletions
|
@ -277,6 +277,249 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
|||
context.Add(operation);
|
||||
}
|
||||
|
||||
public static void Sured(EmitterContext context)
|
||||
{
|
||||
OpCodeSured op = (OpCodeSured)context.CurrOp;
|
||||
|
||||
SamplerType type = ConvertSamplerType(op.Dimensions);
|
||||
|
||||
if (type == SamplerType.None)
|
||||
{
|
||||
context.Config.GpuAccessor.Log("Invalid image reduction sampler type.");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int raIndex = op.Ra.Index;
|
||||
int rbIndex = op.Rb.Index;
|
||||
|
||||
Operand Ra()
|
||||
{
|
||||
if (raIndex > RegisterConsts.RegisterZeroIndex)
|
||||
{
|
||||
return Const(0);
|
||||
}
|
||||
|
||||
return context.Copy(Register(raIndex++, RegisterType.Gpr));
|
||||
}
|
||||
|
||||
Operand Rb()
|
||||
{
|
||||
if (rbIndex > RegisterConsts.RegisterZeroIndex)
|
||||
{
|
||||
return Const(0);
|
||||
}
|
||||
|
||||
return context.Copy(Register(rbIndex++, RegisterType.Gpr));
|
||||
}
|
||||
|
||||
List<Operand> sourcesList = new List<Operand>();
|
||||
|
||||
if (op.IsBindless)
|
||||
{
|
||||
sourcesList.Add(context.Copy(Register(op.Rc)));
|
||||
}
|
||||
|
||||
int coordsCount = type.GetDimensions();
|
||||
|
||||
for (int index = 0; index < coordsCount; index++)
|
||||
{
|
||||
sourcesList.Add(Ra());
|
||||
}
|
||||
|
||||
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
|
||||
{
|
||||
sourcesList.Add(Const(0));
|
||||
|
||||
type &= ~SamplerType.Mask;
|
||||
type |= SamplerType.Texture2D;
|
||||
}
|
||||
|
||||
if (type.HasFlag(SamplerType.Array))
|
||||
{
|
||||
sourcesList.Add(Ra());
|
||||
|
||||
type |= SamplerType.Array;
|
||||
}
|
||||
|
||||
TextureFormat format = TextureFormat.R32Sint;
|
||||
|
||||
if (op.UseType)
|
||||
{
|
||||
if (op.ByteAddress)
|
||||
{
|
||||
int xIndex = op.IsBindless ? 1 : 0;
|
||||
|
||||
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(op.Type)));
|
||||
}
|
||||
|
||||
// TODO: FP and 64-bit formats.
|
||||
format = (op.Type == ReductionType.SD32 || op.Type == ReductionType.SD64) ?
|
||||
context.Config.GetTextureFormatAtomic(op.HandleOffset) :
|
||||
GetTextureFormat(op.Type);
|
||||
}
|
||||
else if (!op.IsBindless)
|
||||
{
|
||||
format = context.Config.GetTextureFormatAtomic(op.HandleOffset);
|
||||
}
|
||||
|
||||
sourcesList.Add(Rb());
|
||||
|
||||
Operand[] sources = sourcesList.ToArray();
|
||||
|
||||
int handle = op.HandleOffset;
|
||||
|
||||
TextureFlags flags = GetAtomicOpFlags(op.AtomicOp);
|
||||
|
||||
if (op.IsBindless)
|
||||
{
|
||||
handle = 0;
|
||||
flags |= TextureFlags.Bindless;
|
||||
}
|
||||
|
||||
TextureOperation operation = context.CreateTextureOperation(
|
||||
Instruction.ImageAtomic,
|
||||
type,
|
||||
format,
|
||||
flags,
|
||||
handle,
|
||||
0,
|
||||
null,
|
||||
sources);
|
||||
|
||||
context.Add(operation);
|
||||
}
|
||||
|
||||
public static void Suatom(EmitterContext context)
|
||||
{
|
||||
OpCodeSuatom op = (OpCodeSuatom)context.CurrOp;
|
||||
|
||||
SamplerType type = ConvertSamplerType(op.Dimensions);
|
||||
|
||||
if (type == SamplerType.None)
|
||||
{
|
||||
context.Config.GpuAccessor.Log("Invalid image atomic sampler type.");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int raIndex = op.Ra.Index;
|
||||
int rbIndex = op.Rb.Index;
|
||||
|
||||
Operand Ra()
|
||||
{
|
||||
if (raIndex > RegisterConsts.RegisterZeroIndex)
|
||||
{
|
||||
return Const(0);
|
||||
}
|
||||
|
||||
return context.Copy(Register(raIndex++, RegisterType.Gpr));
|
||||
}
|
||||
|
||||
Operand Rb()
|
||||
{
|
||||
if (rbIndex > RegisterConsts.RegisterZeroIndex)
|
||||
{
|
||||
return Const(0);
|
||||
}
|
||||
|
||||
return context.Copy(Register(rbIndex++, RegisterType.Gpr));
|
||||
}
|
||||
|
||||
int rdIndex = op.Rd.Index;
|
||||
|
||||
Operand GetDest()
|
||||
{
|
||||
if (rdIndex > RegisterConsts.RegisterZeroIndex)
|
||||
{
|
||||
return Const(0);
|
||||
}
|
||||
|
||||
return Register(rdIndex++, RegisterType.Gpr);
|
||||
}
|
||||
|
||||
List<Operand> sourcesList = new List<Operand>();
|
||||
|
||||
if (op.IsBindless)
|
||||
{
|
||||
sourcesList.Add(context.Copy(Register(op.Rc)));
|
||||
}
|
||||
|
||||
int coordsCount = type.GetDimensions();
|
||||
|
||||
for (int index = 0; index < coordsCount; index++)
|
||||
{
|
||||
sourcesList.Add(Ra());
|
||||
}
|
||||
|
||||
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
|
||||
{
|
||||
sourcesList.Add(Const(0));
|
||||
|
||||
type &= ~SamplerType.Mask;
|
||||
type |= SamplerType.Texture2D;
|
||||
}
|
||||
|
||||
if (type.HasFlag(SamplerType.Array))
|
||||
{
|
||||
sourcesList.Add(Ra());
|
||||
|
||||
type |= SamplerType.Array;
|
||||
}
|
||||
|
||||
TextureFormat format = TextureFormat.R32Sint;
|
||||
|
||||
if (op.UseType)
|
||||
{
|
||||
if (op.ByteAddress)
|
||||
{
|
||||
int xIndex = op.IsBindless ? 1 : 0;
|
||||
|
||||
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(op.Type)));
|
||||
}
|
||||
|
||||
// TODO: FP and 64-bit formats.
|
||||
format = (op.Type == ReductionType.SD32 || op.Type == ReductionType.SD64) ?
|
||||
context.Config.GetTextureFormatAtomic(op.HandleOffset) :
|
||||
GetTextureFormat(op.Type);
|
||||
}
|
||||
else if (!op.IsBindless)
|
||||
{
|
||||
format = context.Config.GetTextureFormatAtomic(op.HandleOffset);
|
||||
}
|
||||
|
||||
if (op.CompareAndSwap)
|
||||
{
|
||||
sourcesList.Add(Rb());
|
||||
}
|
||||
|
||||
sourcesList.Add(Rb());
|
||||
|
||||
Operand[] sources = sourcesList.ToArray();
|
||||
|
||||
int handle = op.HandleOffset;
|
||||
|
||||
TextureFlags flags = op.CompareAndSwap ? TextureFlags.CAS : GetAtomicOpFlags(op.AtomicOp);
|
||||
|
||||
if (op.IsBindless)
|
||||
{
|
||||
handle = 0;
|
||||
flags |= TextureFlags.Bindless;
|
||||
}
|
||||
|
||||
TextureOperation operation = context.CreateTextureOperation(
|
||||
Instruction.ImageAtomic,
|
||||
type,
|
||||
format,
|
||||
flags,
|
||||
handle,
|
||||
0,
|
||||
GetDest(),
|
||||
sources);
|
||||
|
||||
context.Add(operation);
|
||||
}
|
||||
|
||||
public static void Tex(EmitterContext context)
|
||||
{
|
||||
EmitTextureSample(context, TextureFlags.None);
|
||||
|
@ -1332,6 +1575,55 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
|||
};
|
||||
}
|
||||
|
||||
private static int GetComponentSizeInBytesLog2(ReductionType type)
|
||||
{
|
||||
return type switch
|
||||
{
|
||||
ReductionType.U32 => 2,
|
||||
ReductionType.S32 => 2,
|
||||
ReductionType.U64 => 3,
|
||||
ReductionType.FP32FtzRn => 2,
|
||||
ReductionType.FP16x2FtzRn => 2,
|
||||
ReductionType.S64 => 3,
|
||||
ReductionType.SD32 => 2,
|
||||
ReductionType.SD64 => 3,
|
||||
_ => 2
|
||||
};
|
||||
}
|
||||
|
||||
private static TextureFormat GetTextureFormat(ReductionType type)
|
||||
{
|
||||
return type switch
|
||||
{
|
||||
ReductionType.U32 => TextureFormat.R32Uint,
|
||||
ReductionType.S32 => TextureFormat.R32Sint,
|
||||
ReductionType.U64 => TextureFormat.R32G32Uint,
|
||||
ReductionType.FP32FtzRn => TextureFormat.R32Float,
|
||||
ReductionType.FP16x2FtzRn => TextureFormat.R16G16Float,
|
||||
ReductionType.S64 => TextureFormat.R32G32Uint,
|
||||
ReductionType.SD32 => TextureFormat.R32Uint,
|
||||
ReductionType.SD64 => TextureFormat.R32G32Uint,
|
||||
_ => TextureFormat.R32Uint
|
||||
};
|
||||
}
|
||||
|
||||
private static TextureFlags GetAtomicOpFlags(AtomicOp op)
|
||||
{
|
||||
return op switch
|
||||
{
|
||||
AtomicOp.Add => TextureFlags.Add,
|
||||
AtomicOp.Minimum => TextureFlags.Minimum,
|
||||
AtomicOp.Maximum => TextureFlags.Maximum,
|
||||
AtomicOp.Increment => TextureFlags.Increment,
|
||||
AtomicOp.Decrement => TextureFlags.Decrement,
|
||||
AtomicOp.BitwiseAnd => TextureFlags.BitwiseAnd,
|
||||
AtomicOp.BitwiseOr => TextureFlags.BitwiseOr,
|
||||
AtomicOp.BitwiseExclusiveOr => TextureFlags.BitwiseXor,
|
||||
AtomicOp.Swap => TextureFlags.Swap,
|
||||
_ => TextureFlags.Add
|
||||
};
|
||||
}
|
||||
|
||||
private static SamplerType ConvertSamplerType(ImageDimensions target)
|
||||
{
|
||||
return target switch
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue