Implement Load/Store Local/Shared and Atomic shared using new instructions (#5241)

* Implement Load/Store Local/Shared and Atomic shared using new instructions

* Remove now unused code

* Fix base offset register overwrite

* Fix missing storage buffer set index when generating GLSL for Vulkan

* Shader cache version bump

* Remove more unused code

* Some PR feedback
This commit is contained in:
gdkchan 2023-06-15 17:31:53 -03:00 committed by GitHub
parent 32d21ddf17
commit f92921a6d1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
30 changed files with 475 additions and 567 deletions

View file

@ -10,12 +10,6 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
private enum MemoryRegion
{
Local,
Shared
}
public static void Atom(EmitterContext context)
{
InstAtom op = context.GetOp<InstAtom>();
@ -51,7 +45,8 @@ namespace Ryujinx.Graphics.Shader.Instructions
_ => AtomSize.U32
};
Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, offset, Const(0), value);
Operand id = Const(context.Config.ResourceManager.SharedMemoryId);
Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, id, offset, value);
context.Copy(GetDest(op.Dest), res);
}
@ -114,14 +109,14 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
InstLdl op = context.GetOp<InstLdl>();
EmitLoad(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
EmitLoad(context, StorageKind.LocalMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
public static void Lds(EmitterContext context)
{
InstLds op = context.GetOp<InstLds>();
EmitLoad(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
EmitLoad(context, StorageKind.SharedMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
public static void Red(EmitterContext context)
@ -144,14 +139,14 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
InstStl op = context.GetOp<InstStl>();
EmitStore(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
EmitStore(context, StorageKind.LocalMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
public static void Sts(EmitterContext context)
{
InstSts op = context.GetOp<InstSts>();
EmitStore(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
EmitStore(context, StorageKind.SharedMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
private static Operand EmitLoadConstant(EmitterContext context, Operand slot, Operand offset)
@ -192,8 +187,8 @@ namespace Ryujinx.Graphics.Shader.Instructions
StorageKind storageKind,
AtomOp op,
AtomSize type,
Operand addrLow,
Operand addrHigh,
Operand e0,
Operand e1,
Operand value)
{
Operand res = Const(0);
@ -203,7 +198,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
case AtomOp.Add:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicAdd(storageKind, addrLow, addrHigh, value);
res = context.AtomicAdd(storageKind, e0, e1, value);
}
else
{
@ -213,7 +208,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
case AtomOp.And:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicAnd(storageKind, addrLow, addrHigh, value);
res = context.AtomicAnd(storageKind, e0, e1, value);
}
else
{
@ -223,7 +218,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
case AtomOp.Xor:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicXor(storageKind, addrLow, addrHigh, value);
res = context.AtomicXor(storageKind, e0, e1, value);
}
else
{
@ -233,7 +228,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
case AtomOp.Or:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicOr(storageKind, addrLow, addrHigh, value);
res = context.AtomicOr(storageKind, e0, e1, value);
}
else
{
@ -243,11 +238,11 @@ namespace Ryujinx.Graphics.Shader.Instructions
case AtomOp.Max:
if (type == AtomSize.S32)
{
res = context.AtomicMaxS32(storageKind, addrLow, addrHigh, value);
res = context.AtomicMaxS32(storageKind, e0, e1, value);
}
else if (type == AtomSize.U32)
{
res = context.AtomicMaxU32(storageKind, addrLow, addrHigh, value);
res = context.AtomicMaxU32(storageKind, e0, e1, value);
}
else
{
@ -257,11 +252,11 @@ namespace Ryujinx.Graphics.Shader.Instructions
case AtomOp.Min:
if (type == AtomSize.S32)
{
res = context.AtomicMinS32(storageKind, addrLow, addrHigh, value);
res = context.AtomicMinS32(storageKind, e0, e1, value);
}
else if (type == AtomSize.U32)
{
res = context.AtomicMinU32(storageKind, addrLow, addrHigh, value);
res = context.AtomicMinU32(storageKind, e0, e1, value);
}
else
{
@ -275,7 +270,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
private static void EmitLoad(
EmitterContext context,
MemoryRegion region,
StorageKind storageKind,
LsSize2 size,
Operand srcA,
int rd,
@ -287,19 +282,19 @@ namespace Ryujinx.Graphics.Shader.Instructions
return;
}
int id = storageKind == StorageKind.LocalMemory
? context.Config.ResourceManager.LocalMemoryId
: context.Config.ResourceManager.SharedMemoryId;
bool isSmallInt = size < LsSize2.B32;
int count = 1;
switch (size)
int count = size switch
{
case LsSize2.B64: count = 2; break;
case LsSize2.B128: count = 4; break;
}
LsSize2.B64 => 2,
LsSize2.B128 => 4,
_ => 1
};
Operand baseOffset = context.IAdd(srcA, Const(offset));
Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
Operand bitOffset = GetBitOffset(context, baseOffset);
Operand baseOffset = context.Copy(srcA);
for (int index = 0; index < count; index++)
{
@ -310,14 +305,10 @@ namespace Ryujinx.Graphics.Shader.Instructions
break;
}
Operand elemOffset = context.IAdd(wordOffset, Const(index));
Operand value = null;
switch (region)
{
case MemoryRegion.Local: value = context.LoadLocal(elemOffset); break;
case MemoryRegion.Shared: value = context.LoadShared(elemOffset); break;
}
Operand byteOffset = context.IAdd(baseOffset, Const(offset + index * 4));
Operand wordOffset = context.ShiftRightU32(byteOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
Operand bitOffset = GetBitOffset(context, byteOffset);
Operand value = context.Load(storageKind, id, wordOffset);
if (isSmallInt)
{
@ -360,7 +351,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
private static void EmitStore(
EmitterContext context,
MemoryRegion region,
StorageKind storageKind,
LsSize2 size,
Operand srcA,
int rd,
@ -372,52 +363,54 @@ namespace Ryujinx.Graphics.Shader.Instructions
return;
}
int id = storageKind == StorageKind.LocalMemory
? context.Config.ResourceManager.LocalMemoryId
: context.Config.ResourceManager.SharedMemoryId;
bool isSmallInt = size < LsSize2.B32;
int count = 1;
switch (size)
int count = size switch
{
case LsSize2.B64: count = 2; break;
case LsSize2.B128: count = 4; break;
}
LsSize2.B64 => 2,
LsSize2.B128 => 4,
_ => 1
};
Operand baseOffset = context.IAdd(srcA, Const(offset));
Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
Operand bitOffset = GetBitOffset(context, baseOffset);
Operand baseOffset = context.Copy(srcA);
for (int index = 0; index < count; index++)
{
bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
Operand elemOffset = context.IAdd(wordOffset, Const(index));
Operand byteOffset = context.IAdd(baseOffset, Const(offset + index * 4));
Operand wordOffset = context.ShiftRightU32(byteOffset, Const(2));
Operand bitOffset = GetBitOffset(context, byteOffset);
if (isSmallInt && region == MemoryRegion.Local)
if (isSmallInt && storageKind == StorageKind.LocalMemory)
{
Operand word = context.LoadLocal(elemOffset);
Operand word = context.Load(storageKind, id, wordOffset);
value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
}
if (region == MemoryRegion.Local)
if (storageKind == StorageKind.LocalMemory)
{
context.StoreLocal(elemOffset, value);
context.Store(storageKind, id, wordOffset, value);
}
else if (region == MemoryRegion.Shared)
else if (storageKind == StorageKind.SharedMemory)
{
switch (size)
{
case LsSize2.U8:
case LsSize2.S8:
context.StoreShared8(baseOffset, value);
context.Store(StorageKind.SharedMemory8, id, byteOffset, value);
break;
case LsSize2.U16:
case LsSize2.S16:
context.StoreShared16(baseOffset, value);
context.Store(StorageKind.SharedMemory16, id, byteOffset, value);
break;
default:
context.StoreShared(elemOffset, value);
context.Store(storageKind, id, wordOffset, value);
break;
}
}