Implement shader storage buffer operations using new Load/Store instructions (#4993)
* Implement storage buffer operations using new Load/Store instruction * Extend GenerateMultiTargetStorageOp to also match access with constant offset, and log and comments * Remove now unused code * Catch more complex cases of global memory usage * Shader cache version bump * Extend global access elimination to work with more shared memory cases * Change alignment requirement from 16 bytes to 8 bytes, handle cases where we need more than 16 storage buffers * Tweak preferencing to catch more cases * Enable CB0 elimination even when host storage buffer alignment is > 16 (for Intel) * Fix storage buffer bindings * Simplify some code * Shader cache version bump * Fix typo * Extend global memory elimination to handle shared memory with multiple possible offsets and local memory
This commit is contained in:
parent
81c9052847
commit
21c9ac6240
42 changed files with 1468 additions and 1259 deletions
|
@ -336,13 +336,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
|||
int offset,
|
||||
bool extended)
|
||||
{
|
||||
bool isSmallInt = size < LsSize.B32;
|
||||
|
||||
int count = GetVectorCount(size);
|
||||
StorageKind storageKind = GetStorageKind(size);
|
||||
|
||||
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
|
||||
(_, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
|
||||
|
||||
Operand bitOffset = GetBitOffset(context, addrLow);
|
||||
Operand srcA = context.Copy(new Operand(new Register(ra, RegisterType.Gpr)));
|
||||
|
||||
for (int index = 0; index < count; index++)
|
||||
{
|
||||
|
@ -353,12 +352,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
|||
break;
|
||||
}
|
||||
|
||||
Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
|
||||
|
||||
if (isSmallInt)
|
||||
{
|
||||
value = ExtractSmallInt(context, size, bitOffset, value);
|
||||
}
|
||||
Operand value = context.Load(storageKind, context.IAdd(srcA, Const(offset + index * 4)), addrHigh);
|
||||
|
||||
context.Copy(Register(dest), value);
|
||||
}
|
||||
|
@ -445,10 +439,11 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
|||
}
|
||||
|
||||
int count = GetVectorCount((LsSize)size);
|
||||
StorageKind storageKind = GetStorageKind((LsSize)size);
|
||||
|
||||
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
|
||||
(_, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
|
||||
|
||||
Operand bitOffset = GetBitOffset(context, addrLow);
|
||||
Operand srcA = context.Copy(new Operand(new Register(ra, RegisterType.Gpr)));
|
||||
|
||||
for (int index = 0; index < count; index++)
|
||||
{
|
||||
|
@ -456,23 +451,24 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
|||
|
||||
Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
|
||||
|
||||
Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4));
|
||||
Operand addrLowOffset = context.IAdd(srcA, Const(offset + index * 4));
|
||||
|
||||
if (size == LsSize2.U8 || size == LsSize2.S8)
|
||||
{
|
||||
context.StoreGlobal8(addrLowOffset, addrHigh, value);
|
||||
}
|
||||
else if (size == LsSize2.U16 || size == LsSize2.S16)
|
||||
{
|
||||
context.StoreGlobal16(addrLowOffset, addrHigh, value);
|
||||
}
|
||||
else
|
||||
{
|
||||
context.StoreGlobal(addrLowOffset, addrHigh, value);
|
||||
}
|
||||
context.Store(storageKind, addrLowOffset, addrHigh, value);
|
||||
}
|
||||
}
|
||||
|
||||
private static StorageKind GetStorageKind(LsSize size)
|
||||
{
|
||||
return size switch
|
||||
{
|
||||
LsSize.U8 => StorageKind.GlobalMemoryU8,
|
||||
LsSize.S8 => StorageKind.GlobalMemoryS8,
|
||||
LsSize.U16 => StorageKind.GlobalMemoryU16,
|
||||
LsSize.S16 => StorageKind.GlobalMemoryS16,
|
||||
_ => StorageKind.GlobalMemory
|
||||
};
|
||||
}
|
||||
|
||||
private static int GetVectorCount(LsSize size)
|
||||
{
|
||||
switch (size)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue