Implement transform feedback emulation for hardware without native support (#5080)

* Implement transform feedback emulation for hardware without native support

* Stop doing some useless buffer updates and account for non-zero base instance

* Reduce redundant updates even more

* Update descriptor init logic to account for ResourceLayout

* Fix transform feedback and storage buffers not being updated in some cases

* Shader cache version bump

* PR feedback

* SetInstancedDrawVertexCount must be always called after UpdateState

* Minor typo
This commit is contained in:
gdkchan 2023-06-10 18:31:38 -03:00 committed by GitHub
parent 0e95a8271a
commit eb0bb36bbf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 375 additions and 68 deletions

View file

@ -10,5 +10,11 @@ namespace Ryujinx.Graphics.Shader
public const int NvnBaseVertexByteOffset = 0x640;
public const int NvnBaseInstanceByteOffset = 0x644;
public const int NvnDrawIndexByteOffset = 0x648;
// Transform Feedback emulation.
public const int TfeInfoBinding = 0;
public const int TfeBufferBaseBinding = 1;
public const int TfeBuffersCount = 4;
}
}

View file

@ -367,6 +367,15 @@ namespace Ryujinx.Graphics.Shader
return true;
}
/// <summary>
/// Queries host GPU transform feedback support.
/// </summary>
/// <returns>True if the GPU and driver supports transform feedback, false otherwise</returns>
bool QueryHostSupportsTransformFeedback()
{
return true;
}
/// <summary>
/// Queries host support for writes to the viewport index from vertex or tessellation shader stages.
/// </summary>

View file

@ -234,6 +234,45 @@ namespace Ryujinx.Graphics.Shader.Translation
public void PrepareForVertexReturn()
{
if (!Config.GpuAccessor.QueryHostSupportsTransformFeedback() && Config.GpuAccessor.QueryTransformFeedbackEnabled())
{
Operand vertexCount = this.Load(StorageKind.StorageBuffer, Constants.TfeInfoBinding, Const(1));
for (int tfbIndex = 0; tfbIndex < Constants.TfeBuffersCount; tfbIndex++)
{
var locations = Config.GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
var stride = Config.GpuAccessor.QueryTransformFeedbackStride(tfbIndex);
Operand baseOffset = this.Load(StorageKind.StorageBuffer, Constants.TfeInfoBinding, Const(0), Const(tfbIndex));
Operand baseVertex = this.Load(StorageKind.Input, IoVariable.BaseVertex);
Operand baseInstance = this.Load(StorageKind.Input, IoVariable.BaseInstance);
Operand vertexIndex = this.Load(StorageKind.Input, IoVariable.VertexIndex);
Operand instanceIndex = this.Load(StorageKind.Input, IoVariable.InstanceIndex);
Operand outputVertexOffset = this.ISubtract(vertexIndex, baseVertex);
Operand outputInstanceOffset = this.ISubtract(instanceIndex, baseInstance);
Operand outputBaseVertex = this.IMultiply(outputInstanceOffset, vertexCount);
Operand vertexOffset = this.IMultiply(this.IAdd(outputBaseVertex, outputVertexOffset), Const(stride / 4));
baseOffset = this.IAdd(baseOffset, vertexOffset);
for (int j = 0; j < locations.Length; j++)
{
byte location = locations[j];
if (location == 0xff)
{
continue;
}
Operand offset = this.IAdd(baseOffset, Const(j));
Operand value = Instructions.AttributeMap.GenerateAttributeLoad(this, null, location * 4, isOutput: true, isPerPatch: false);
this.Store(StorageKind.StorageBuffer, Constants.TfeBufferBaseBinding + tfbIndex, Const(0), offset, value);
}
}
}
if (Config.GpuAccessor.QueryViewportTransformDisable())
{
Operand x = this.Load(StorageKind.Output, IoVariable.Position, null, Const(0));

View file

@ -132,6 +132,11 @@ namespace Ryujinx.Graphics.Shader.Translation
_transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>();
TransformFeedbackEnabled =
stage != ShaderStage.Compute &&
gpuAccessor.QueryTransformFeedbackEnabled() &&
gpuAccessor.QueryHostSupportsTransformFeedback();
UsedInputAttributesPerPatch = new HashSet<int>();
UsedOutputAttributesPerPatch = new HashSet<int>();
@ -139,6 +144,31 @@ namespace Ryujinx.Graphics.Shader.Translation
_usedImages = new Dictionary<TextureInfo, TextureMeta>();
ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties());
if (!gpuAccessor.QueryHostSupportsTransformFeedback() && gpuAccessor.QueryTransformFeedbackEnabled())
{
StructureType tfeInfoStruct = new StructureType(new StructureField[]
{
new StructureField(AggregateType.Array | AggregateType.U32, "base_offset", 4),
new StructureField(AggregateType.U32, "vertex_count")
});
BufferDefinition tfeInfoBuffer = new BufferDefinition(BufferLayout.Std430, 1, Constants.TfeInfoBinding, "tfe_info", tfeInfoStruct);
Properties.AddStorageBuffer(Constants.TfeInfoBinding, tfeInfoBuffer);
StructureType tfeDataStruct = new StructureType(new StructureField[]
{
new StructureField(AggregateType.Array | AggregateType.U32, "data", 0)
});
for (int i = 0; i < Constants.TfeBuffersCount; i++)
{
int binding = Constants.TfeBufferBaseBinding + i;
BufferDefinition tfeDataBuffer = new BufferDefinition(BufferLayout.Std430, 1, binding, $"tfe_data{i}", tfeDataStruct);
Properties.AddStorageBuffer(binding, tfeDataBuffer);
}
}
}
public ShaderConfig(
@ -151,7 +181,6 @@ namespace Ryujinx.Graphics.Shader.Translation
ThreadsPerInputPrimitive = 1;
OutputTopology = outputTopology;
MaxOutputVertices = maxOutputVertices;
TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
}
public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(header.Stage, gpuAccessor, options)
@ -165,7 +194,6 @@ namespace Ryujinx.Graphics.Shader.Translation
OmapTargets = header.OmapTargets;
OmapSampleMask = header.OmapSampleMask;
OmapDepth = header.OmapDepth;
TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
LastInVertexPipeline = header.Stage < ShaderStage.Fragment;
}