Implement BFI, BRK, FLO, FSWZADD, PBK, SHFL and TXD shader instructions, misc. fixes

This commit is contained in:
gdk 2019-10-31 00:29:22 -03:00 committed by Thog
parent d786d8d2b9
commit 278a4c317c
38 changed files with 972 additions and 166 deletions

View file

@ -0,0 +1,11 @@
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
static class HelperFunctionNames
{
public static string Shuffle = "Helper_Shuffle";
public static string ShuffleDown = "Helper_ShuffleDown";
public static string ShuffleUp = "Helper_ShuffleUp";
public static string ShuffleXor = "Helper_ShuffleXor";
public static string SwizzleAdd = "Helper_SwizzleAdd";
}
}

View file

@ -0,0 +1,9 @@
float Helper_Shuffle(float x, uint index, uint mask)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = (index & ~segMask) | minThreadId;
return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x;
}

View file

@ -0,0 +1,9 @@
float Helper_ShuffleDown(float x, uint index, uint mask)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = gl_SubGroupInvocationARB + index;
return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x;
}

View file

@ -0,0 +1,8 @@
float Helper_ShuffleUp(float x, uint index, uint mask)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint srcThreadId = gl_SubGroupInvocationARB - index;
return (srcThreadId >= minThreadId) ? readInvocationARB(x, srcThreadId) : x;
}

View file

@ -0,0 +1,9 @@
float Helper_ShuffleXor(float x, uint index, uint mask)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = gl_SubGroupInvocationARB ^ index;
return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x;
}

View file

@ -0,0 +1,7 @@
float Helper_SwizzleAdd(float x, float y, int mask)
{
vec4 xLut = vec4(1.0, -1.0, 1.0, 0.0);
vec4 yLut = vec4(1.0, 1.0, -1.0, 1.0);
int lutIdx = mask >> int(gl_SubGroupInvocationARB & 3u) * 2;
return x * xLut[lutIdx] + y * yLut[lutIdx];
}