Fix vote and shuffle shader instructions on AMD GPUs (#5540)
* Move shuffle handling out of the backend to a transform pass * Handle subgroup sizes higher than 32 * Stop using the subgroup size control extension * Make GenerateShuffleFunction static * Shader cache version bump
This commit is contained in:
parent
64079c034c
commit
6ed613a6e6
35 changed files with 445 additions and 265 deletions
|
@ -231,7 +231,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
|||
var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
|
||||
|
||||
var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source));
|
||||
var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)0);
|
||||
var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
|
||||
|
||||
return new OperationResult(AggregateType.U32, mask);
|
||||
}
|
||||
|
@ -1100,117 +1100,40 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
|||
|
||||
private static OperationResult GenerateShuffle(CodeGenContext context, AstOperation operation)
|
||||
{
|
||||
var x = context.GetFP32(operation.GetSource(0));
|
||||
var value = context.GetFP32(operation.GetSource(0));
|
||||
var index = context.GetU32(operation.GetSource(1));
|
||||
var mask = context.GetU32(operation.GetSource(2));
|
||||
|
||||
var const31 = context.Constant(context.TypeU32(), 31);
|
||||
var const8 = context.Constant(context.TypeU32(), 8);
|
||||
|
||||
var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31);
|
||||
var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
|
||||
var notSegMask = context.Not(context.TypeU32(), segMask);
|
||||
var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask);
|
||||
var indexNotSegMask = context.BitwiseAnd(context.TypeU32(), index, notSegMask);
|
||||
|
||||
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
|
||||
|
||||
var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
|
||||
var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask);
|
||||
var srcThreadId = context.BitwiseOr(context.TypeU32(), indexNotSegMask, minThreadId);
|
||||
var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId);
|
||||
var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
|
||||
var result = context.Select(context.TypeFP32(), valid, value, x);
|
||||
|
||||
var validLocal = (AstOperand)operation.GetSource(3);
|
||||
|
||||
context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
|
||||
var result = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index);
|
||||
|
||||
return new OperationResult(AggregateType.FP32, result);
|
||||
}
|
||||
|
||||
private static OperationResult GenerateShuffleDown(CodeGenContext context, AstOperation operation)
|
||||
{
|
||||
var x = context.GetFP32(operation.GetSource(0));
|
||||
var value = context.GetFP32(operation.GetSource(0));
|
||||
var index = context.GetU32(operation.GetSource(1));
|
||||
var mask = context.GetU32(operation.GetSource(2));
|
||||
|
||||
var const31 = context.Constant(context.TypeU32(), 31);
|
||||
var const8 = context.Constant(context.TypeU32(), 8);
|
||||
|
||||
var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31);
|
||||
var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
|
||||
var notSegMask = context.Not(context.TypeU32(), segMask);
|
||||
var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask);
|
||||
|
||||
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
|
||||
|
||||
var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
|
||||
var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask);
|
||||
var srcThreadId = context.IAdd(context.TypeU32(), threadId, index);
|
||||
var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId);
|
||||
var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
|
||||
var result = context.Select(context.TypeFP32(), valid, value, x);
|
||||
|
||||
var validLocal = (AstOperand)operation.GetSource(3);
|
||||
|
||||
context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
|
||||
var result = context.GroupNonUniformShuffleDown(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index);
|
||||
|
||||
return new OperationResult(AggregateType.FP32, result);
|
||||
}
|
||||
|
||||
private static OperationResult GenerateShuffleUp(CodeGenContext context, AstOperation operation)
|
||||
{
|
||||
var x = context.GetFP32(operation.GetSource(0));
|
||||
var value = context.GetFP32(operation.GetSource(0));
|
||||
var index = context.GetU32(operation.GetSource(1));
|
||||
var mask = context.GetU32(operation.GetSource(2));
|
||||
|
||||
var const31 = context.Constant(context.TypeU32(), 31);
|
||||
var const8 = context.Constant(context.TypeU32(), 8);
|
||||
|
||||
var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
|
||||
|
||||
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
|
||||
|
||||
var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
|
||||
var srcThreadId = context.ISub(context.TypeU32(), threadId, index);
|
||||
var valid = context.SGreaterThanEqual(context.TypeBool(), srcThreadId, minThreadId);
|
||||
var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
|
||||
var result = context.Select(context.TypeFP32(), valid, value, x);
|
||||
|
||||
var validLocal = (AstOperand)operation.GetSource(3);
|
||||
|
||||
context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
|
||||
var result = context.GroupNonUniformShuffleUp(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index);
|
||||
|
||||
return new OperationResult(AggregateType.FP32, result);
|
||||
}
|
||||
|
||||
private static OperationResult GenerateShuffleXor(CodeGenContext context, AstOperation operation)
|
||||
{
|
||||
var x = context.GetFP32(operation.GetSource(0));
|
||||
var value = context.GetFP32(operation.GetSource(0));
|
||||
var index = context.GetU32(operation.GetSource(1));
|
||||
var mask = context.GetU32(operation.GetSource(2));
|
||||
|
||||
var const31 = context.Constant(context.TypeU32(), 31);
|
||||
var const8 = context.Constant(context.TypeU32(), 8);
|
||||
|
||||
var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31);
|
||||
var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
|
||||
var notSegMask = context.Not(context.TypeU32(), segMask);
|
||||
var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask);
|
||||
|
||||
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
|
||||
|
||||
var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
|
||||
var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask);
|
||||
var srcThreadId = context.BitwiseXor(context.TypeU32(), threadId, index);
|
||||
var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId);
|
||||
var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
|
||||
var result = context.Select(context.TypeFP32(), valid, value, x);
|
||||
|
||||
var validLocal = (AstOperand)operation.GetSource(3);
|
||||
|
||||
context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
|
||||
var result = context.GroupNonUniformShuffleXor(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index);
|
||||
|
||||
return new OperationResult(AggregateType.FP32, result);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue