Use shader subgroup extensions if shader ballot is not supported (#2627)

* Use shader subgroup extensions if shader ballot is not supported

* Shader cache version bump + cleanup

* The type is still required on the table
This commit is contained in:
gdkchan 2021-09-19 09:38:39 -03:00 committed by GitHub
parent 7379bc2f39
commit f08a280ade
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 125 additions and 42 deletions

View file

@ -2,10 +2,10 @@ float Helper_Shuffle(float x, uint index, uint mask, out bool valid)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = (index & ~segMask) | minThreadId;
valid = srcThreadId <= maxThreadId;
float v = readInvocationARB(x, srcThreadId);
float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x;
}

View file

@ -2,10 +2,10 @@ float Helper_ShuffleDown(float x, uint index, uint mask, out bool valid)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = gl_SubGroupInvocationARB + index;
uint srcThreadId = $SUBGROUP_INVOCATION$ + index;
valid = srcThreadId <= maxThreadId;
float v = readInvocationARB(x, srcThreadId);
float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x;
}

View file

@ -1,9 +1,9 @@
float Helper_ShuffleUp(float x, uint index, uint mask, out bool valid)
{
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint srcThreadId = gl_SubGroupInvocationARB - index;
uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint srcThreadId = $SUBGROUP_INVOCATION$ - index;
valid = int(srcThreadId) >= int(minThreadId);
float v = readInvocationARB(x, srcThreadId);
float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x;
}

View file

@ -2,10 +2,10 @@ float Helper_ShuffleXor(float x, uint index, uint mask, out bool valid)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = gl_SubGroupInvocationARB ^ index;
uint srcThreadId = $SUBGROUP_INVOCATION$ ^ index;
valid = srcThreadId <= maxThreadId;
float v = readInvocationARB(x, srcThreadId);
float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x;
}

View file

@ -2,6 +2,6 @@ float Helper_SwizzleAdd(float x, float y, int mask)
{
vec4 xLut = vec4(1.0, -1.0, 1.0, 0.0);
vec4 yLut = vec4(1.0, 1.0, -1.0, 1.0);
int lutIdx = mask >> int(gl_SubGroupInvocationARB & 3u) * 2;
int lutIdx = mask >> int($SUBGROUP_INVOCATION$ & 3u) * 2;
return x * xLut[lutIdx] + y * yLut[lutIdx];
}