Implement shader storage buffer operations using new Load/Store instructions (#4993)

* Implement storage buffer operations using new Load/Store instruction

* Extend GenerateMultiTargetStorageOp to also match access with constant offset, and log and comments

* Remove now unused code

* Catch more complex cases of global memory usage

* Shader cache version bump

* Extend global access elimination to work with more shared memory cases

* Change alignment requirement from 16 bytes to 8 bytes, handle cases where we need more than 16 storage buffers

* Tweak preferencing to catch more cases

* Enable CB0 elimination even when host storage buffer alignment is > 16 (for Intel)

* Fix storage buffer bindings

* Simplify some code

* Shader cache version bump

* Fix typo

* Extend global memory elimination to handle shared memory with multiple possible offsets and local memory
This commit is contained in:
gdkchan 2023-06-03 20:12:18 -03:00 committed by GitHub
parent 81c9052847
commit 21c9ac6240
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
42 changed files with 1468 additions and 1259 deletions

View file

@ -336,13 +336,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
int offset,
bool extended)
{
bool isSmallInt = size < LsSize.B32;
int count = GetVectorCount(size);
StorageKind storageKind = GetStorageKind(size);
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
(_, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
Operand bitOffset = GetBitOffset(context, addrLow);
Operand srcA = context.Copy(new Operand(new Register(ra, RegisterType.Gpr)));
for (int index = 0; index < count; index++)
{
@ -353,12 +352,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
break;
}
Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
if (isSmallInt)
{
value = ExtractSmallInt(context, size, bitOffset, value);
}
Operand value = context.Load(storageKind, context.IAdd(srcA, Const(offset + index * 4)), addrHigh);
context.Copy(Register(dest), value);
}
@ -445,10 +439,11 @@ namespace Ryujinx.Graphics.Shader.Instructions
}
int count = GetVectorCount((LsSize)size);
StorageKind storageKind = GetStorageKind((LsSize)size);
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
(_, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
Operand bitOffset = GetBitOffset(context, addrLow);
Operand srcA = context.Copy(new Operand(new Register(ra, RegisterType.Gpr)));
for (int index = 0; index < count; index++)
{
@ -456,23 +451,24 @@ namespace Ryujinx.Graphics.Shader.Instructions
Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4));
Operand addrLowOffset = context.IAdd(srcA, Const(offset + index * 4));
if (size == LsSize2.U8 || size == LsSize2.S8)
{
context.StoreGlobal8(addrLowOffset, addrHigh, value);
}
else if (size == LsSize2.U16 || size == LsSize2.S16)
{
context.StoreGlobal16(addrLowOffset, addrHigh, value);
}
else
{
context.StoreGlobal(addrLowOffset, addrHigh, value);
}
context.Store(storageKind, addrLowOffset, addrHigh, value);
}
}
private static StorageKind GetStorageKind(LsSize size)
{
return size switch
{
LsSize.U8 => StorageKind.GlobalMemoryU8,
LsSize.S8 => StorageKind.GlobalMemoryS8,
LsSize.U16 => StorageKind.GlobalMemoryU16,
LsSize.S16 => StorageKind.GlobalMemoryS16,
_ => StorageKind.GlobalMemory
};
}
private static int GetVectorCount(LsSize size)
{
switch (size)