Fix vote and shuffle shader instructions on AMD GPUs (#5540)

* Move shuffle handling out of the backend to a transform pass

* Handle subgroup sizes higher than 32

* Stop using the subgroup size control extension

* Make GenerateShuffleFunction static

* Shader cache version bump
This commit is contained in:
gdkchan 2023-08-16 21:31:07 -03:00 committed by GitHub
parent 64079c034c
commit 6ed613a6e6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
35 changed files with 445 additions and 265 deletions

View file

@ -231,7 +231,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source));
var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)0);
var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
return new OperationResult(AggregateType.U32, mask);
}
@ -1100,117 +1100,40 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
private static OperationResult GenerateShuffle(CodeGenContext context, AstOperation operation)
{
var x = context.GetFP32(operation.GetSource(0));
var value = context.GetFP32(operation.GetSource(0));
var index = context.GetU32(operation.GetSource(1));
var mask = context.GetU32(operation.GetSource(2));
var const31 = context.Constant(context.TypeU32(), 31);
var const8 = context.Constant(context.TypeU32(), 8);
var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31);
var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
var notSegMask = context.Not(context.TypeU32(), segMask);
var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask);
var indexNotSegMask = context.BitwiseAnd(context.TypeU32(), index, notSegMask);
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask);
var srcThreadId = context.BitwiseOr(context.TypeU32(), indexNotSegMask, minThreadId);
var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId);
var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
var result = context.Select(context.TypeFP32(), valid, value, x);
var validLocal = (AstOperand)operation.GetSource(3);
context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
var result = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index);
return new OperationResult(AggregateType.FP32, result);
}
private static OperationResult GenerateShuffleDown(CodeGenContext context, AstOperation operation)
{
var x = context.GetFP32(operation.GetSource(0));
var value = context.GetFP32(operation.GetSource(0));
var index = context.GetU32(operation.GetSource(1));
var mask = context.GetU32(operation.GetSource(2));
var const31 = context.Constant(context.TypeU32(), 31);
var const8 = context.Constant(context.TypeU32(), 8);
var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31);
var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
var notSegMask = context.Not(context.TypeU32(), segMask);
var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask);
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask);
var srcThreadId = context.IAdd(context.TypeU32(), threadId, index);
var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId);
var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
var result = context.Select(context.TypeFP32(), valid, value, x);
var validLocal = (AstOperand)operation.GetSource(3);
context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
var result = context.GroupNonUniformShuffleDown(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index);
return new OperationResult(AggregateType.FP32, result);
}
private static OperationResult GenerateShuffleUp(CodeGenContext context, AstOperation operation)
{
var x = context.GetFP32(operation.GetSource(0));
var value = context.GetFP32(operation.GetSource(0));
var index = context.GetU32(operation.GetSource(1));
var mask = context.GetU32(operation.GetSource(2));
var const31 = context.Constant(context.TypeU32(), 31);
var const8 = context.Constant(context.TypeU32(), 8);
var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
var srcThreadId = context.ISub(context.TypeU32(), threadId, index);
var valid = context.SGreaterThanEqual(context.TypeBool(), srcThreadId, minThreadId);
var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
var result = context.Select(context.TypeFP32(), valid, value, x);
var validLocal = (AstOperand)operation.GetSource(3);
context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
var result = context.GroupNonUniformShuffleUp(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index);
return new OperationResult(AggregateType.FP32, result);
}
private static OperationResult GenerateShuffleXor(CodeGenContext context, AstOperation operation)
{
var x = context.GetFP32(operation.GetSource(0));
var value = context.GetFP32(operation.GetSource(0));
var index = context.GetU32(operation.GetSource(1));
var mask = context.GetU32(operation.GetSource(2));
var const31 = context.Constant(context.TypeU32(), 31);
var const8 = context.Constant(context.TypeU32(), 8);
var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31);
var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
var notSegMask = context.Not(context.TypeU32(), segMask);
var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask);
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask);
var srcThreadId = context.BitwiseXor(context.TypeU32(), threadId, index);
var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId);
var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
var result = context.Select(context.TypeFP32(), valid, value, x);
var validLocal = (AstOperand)operation.GetSource(3);
context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
var result = context.GroupNonUniformShuffleXor(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index);
return new OperationResult(AggregateType.FP32, result);
}