Fix vote and shuffle shader instructions on AMD GPUs (#5540)

* Move shuffle handling out of the backend to a transform pass

* Handle subgroup sizes higher than 32

* Stop using the subgroup size control extension

* Make GenerateShuffleFunction static

* Shader cache version bump
This commit is contained in:
gdkchan 2023-08-16 21:31:07 -03:00 committed by GitHub
parent 64079c034c
commit 6ed613a6e6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
35 changed files with 445 additions and 265 deletions

View file

@ -25,7 +25,6 @@ namespace Ryujinx.Graphics.Vulkan
public readonly bool SupportsIndirectParameters;
public readonly bool SupportsFragmentShaderInterlock;
public readonly bool SupportsGeometryShaderPassthrough;
public readonly bool SupportsSubgroupSizeControl;
public readonly bool SupportsShaderFloat64;
public readonly bool SupportsShaderInt8;
public readonly bool SupportsShaderStencilExport;
@ -45,9 +44,7 @@ namespace Ryujinx.Graphics.Vulkan
public readonly bool SupportsViewportArray2;
public readonly bool SupportsHostImportedMemory;
public readonly bool SupportsDepthClipControl;
public readonly uint MinSubgroupSize;
public readonly uint MaxSubgroupSize;
public readonly ShaderStageFlags RequiredSubgroupSizeStages;
public readonly uint SubgroupSize;
public readonly SampleCountFlags SupportedSampleCounts;
public readonly PortabilitySubsetFlags PortabilitySubset;
public readonly uint VertexBufferAlignment;
@ -64,7 +61,6 @@ namespace Ryujinx.Graphics.Vulkan
bool supportsIndirectParameters,
bool supportsFragmentShaderInterlock,
bool supportsGeometryShaderPassthrough,
bool supportsSubgroupSizeControl,
bool supportsShaderFloat64,
bool supportsShaderInt8,
bool supportsShaderStencilExport,
@ -84,9 +80,7 @@ namespace Ryujinx.Graphics.Vulkan
bool supportsViewportArray2,
bool supportsHostImportedMemory,
bool supportsDepthClipControl,
uint minSubgroupSize,
uint maxSubgroupSize,
ShaderStageFlags requiredSubgroupSizeStages,
uint subgroupSize,
SampleCountFlags supportedSampleCounts,
PortabilitySubsetFlags portabilitySubset,
uint vertexBufferAlignment,
@ -102,7 +96,6 @@ namespace Ryujinx.Graphics.Vulkan
SupportsIndirectParameters = supportsIndirectParameters;
SupportsFragmentShaderInterlock = supportsFragmentShaderInterlock;
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
SupportsSubgroupSizeControl = supportsSubgroupSizeControl;
SupportsShaderFloat64 = supportsShaderFloat64;
SupportsShaderInt8 = supportsShaderInt8;
SupportsShaderStencilExport = supportsShaderStencilExport;
@ -122,9 +115,7 @@ namespace Ryujinx.Graphics.Vulkan
SupportsViewportArray2 = supportsViewportArray2;
SupportsHostImportedMemory = supportsHostImportedMemory;
SupportsDepthClipControl = supportsDepthClipControl;
MinSubgroupSize = minSubgroupSize;
MaxSubgroupSize = maxSubgroupSize;
RequiredSubgroupSizeStages = requiredSubgroupSizeStages;
SubgroupSize = subgroupSize;
SupportedSampleCounts = supportedSampleCounts;
PortabilitySubset = portabilitySubset;
VertexBufferAlignment = vertexBufferAlignment;

View file

@ -352,11 +352,6 @@ namespace Ryujinx.Graphics.Vulkan
return pipeline;
}
if (gd.Capabilities.SupportsSubgroupSizeControl)
{
UpdateStageRequiredSubgroupSizes(gd, 1);
}
var pipelineCreateInfo = new ComputePipelineCreateInfo
{
SType = StructureType.ComputePipelineCreateInfo,
@ -616,11 +611,6 @@ namespace Ryujinx.Graphics.Vulkan
PDynamicStates = dynamicStates,
};
if (gd.Capabilities.SupportsSubgroupSizeControl)
{
UpdateStageRequiredSubgroupSizes(gd, (int)StagesCount);
}
var pipelineCreateInfo = new GraphicsPipelineCreateInfo
{
SType = StructureType.GraphicsPipelineCreateInfo,
@ -659,19 +649,6 @@ namespace Ryujinx.Graphics.Vulkan
return pipeline;
}
private readonly unsafe void UpdateStageRequiredSubgroupSizes(VulkanRenderer gd, int count)
{
for (int index = 0; index < count; index++)
{
bool canUseExplicitSubgroupSize =
(gd.Capabilities.RequiredSubgroupSizeStages & Stages[index].Stage) != 0 &&
gd.Capabilities.MinSubgroupSize <= RequiredSubgroupSize &&
gd.Capabilities.MaxSubgroupSize >= RequiredSubgroupSize;
Stages[index].PNext = canUseExplicitSubgroupSize ? StageRequiredSubgroupSizes.Pointer + index : null;
}
}
private void UpdateVertexAttributeDescriptions(VulkanRenderer gd)
{
// Vertex attributes exceeding the stride are invalid.

View file

@ -37,7 +37,6 @@ namespace Ryujinx.Graphics.Vulkan
"VK_EXT_shader_stencil_export",
"VK_KHR_shader_float16_int8",
"VK_EXT_shader_subgroup_ballot",
"VK_EXT_subgroup_size_control",
"VK_NV_geometry_shader_passthrough",
"VK_NV_viewport_array2",
"VK_EXT_depth_clip_control",

View file

@ -151,6 +151,14 @@ namespace Ryujinx.Graphics.Vulkan
SType = StructureType.PhysicalDeviceProperties2,
};
PhysicalDeviceSubgroupProperties propertiesSubgroup = new()
{
SType = StructureType.PhysicalDeviceSubgroupProperties,
PNext = properties2.PNext,
};
properties2.PNext = &propertiesSubgroup;
PhysicalDeviceBlendOperationAdvancedPropertiesEXT propertiesBlendOperationAdvanced = new()
{
SType = StructureType.PhysicalDeviceBlendOperationAdvancedPropertiesExt,
@ -164,18 +172,6 @@ namespace Ryujinx.Graphics.Vulkan
properties2.PNext = &propertiesBlendOperationAdvanced;
}
PhysicalDeviceSubgroupSizeControlPropertiesEXT propertiesSubgroupSizeControl = new()
{
SType = StructureType.PhysicalDeviceSubgroupSizeControlPropertiesExt,
};
bool supportsSubgroupSizeControl = _physicalDevice.IsDeviceExtensionPresent("VK_EXT_subgroup_size_control");
if (supportsSubgroupSizeControl)
{
properties2.PNext = &propertiesSubgroupSizeControl;
}
bool supportsTransformFeedback = _physicalDevice.IsDeviceExtensionPresent(ExtTransformFeedback.ExtensionName);
PhysicalDeviceTransformFeedbackPropertiesEXT propertiesTransformFeedback = new()
@ -315,7 +311,6 @@ namespace Ryujinx.Graphics.Vulkan
_physicalDevice.IsDeviceExtensionPresent(KhrDrawIndirectCount.ExtensionName),
_physicalDevice.IsDeviceExtensionPresent("VK_EXT_fragment_shader_interlock"),
_physicalDevice.IsDeviceExtensionPresent("VK_NV_geometry_shader_passthrough"),
supportsSubgroupSizeControl,
features2.Features.ShaderFloat64,
featuresShaderInt8.ShaderInt8,
_physicalDevice.IsDeviceExtensionPresent("VK_EXT_shader_stencil_export"),
@ -335,9 +330,7 @@ namespace Ryujinx.Graphics.Vulkan
_physicalDevice.IsDeviceExtensionPresent("VK_NV_viewport_array2"),
_physicalDevice.IsDeviceExtensionPresent(ExtExternalMemoryHost.ExtensionName),
supportsDepthClipControl && featuresDepthClipControl.DepthClipControl,
propertiesSubgroupSizeControl.MinSubgroupSize,
propertiesSubgroupSizeControl.MaxSubgroupSize,
propertiesSubgroupSizeControl.RequiredSubgroupSizeStages,
propertiesSubgroup.SubgroupSize,
supportedSampleCounts,
portabilityFlags,
vertexBufferAlignment,
@ -623,6 +616,7 @@ namespace Ryujinx.Graphics.Vulkan
maximumImagesPerStage: Constants.MaxImagesPerStage,
maximumComputeSharedMemorySize: (int)limits.MaxComputeSharedMemorySize,
maximumSupportedAnisotropy: (int)limits.MaxSamplerAnisotropy,
shaderSubgroupSize: (int)Capabilities.SubgroupSize,
storageBufferOffsetAlignment: (int)limits.MinStorageBufferOffsetAlignment,
gatherBiasPrecision: IsIntelWindows || IsAmdWindows ? (int)Capabilities.SubTexelPrecisionBits : 0);
}