Fix vote and shuffle shader instructions on AMD GPUs (#5540)
* Move shuffle handling out of the backend to a transform pass * Handle subgroup sizes higher than 32 * Stop using the subgroup size control extension * Make GenerateShuffleFunction static * Shader cache version bump
This commit is contained in:
parent
64079c034c
commit
6ed613a6e6
35 changed files with 445 additions and 265 deletions
|
@ -25,7 +25,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
public readonly bool SupportsIndirectParameters;
|
||||
public readonly bool SupportsFragmentShaderInterlock;
|
||||
public readonly bool SupportsGeometryShaderPassthrough;
|
||||
public readonly bool SupportsSubgroupSizeControl;
|
||||
public readonly bool SupportsShaderFloat64;
|
||||
public readonly bool SupportsShaderInt8;
|
||||
public readonly bool SupportsShaderStencilExport;
|
||||
|
@ -45,9 +44,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
public readonly bool SupportsViewportArray2;
|
||||
public readonly bool SupportsHostImportedMemory;
|
||||
public readonly bool SupportsDepthClipControl;
|
||||
public readonly uint MinSubgroupSize;
|
||||
public readonly uint MaxSubgroupSize;
|
||||
public readonly ShaderStageFlags RequiredSubgroupSizeStages;
|
||||
public readonly uint SubgroupSize;
|
||||
public readonly SampleCountFlags SupportedSampleCounts;
|
||||
public readonly PortabilitySubsetFlags PortabilitySubset;
|
||||
public readonly uint VertexBufferAlignment;
|
||||
|
@ -64,7 +61,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
bool supportsIndirectParameters,
|
||||
bool supportsFragmentShaderInterlock,
|
||||
bool supportsGeometryShaderPassthrough,
|
||||
bool supportsSubgroupSizeControl,
|
||||
bool supportsShaderFloat64,
|
||||
bool supportsShaderInt8,
|
||||
bool supportsShaderStencilExport,
|
||||
|
@ -84,9 +80,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
bool supportsViewportArray2,
|
||||
bool supportsHostImportedMemory,
|
||||
bool supportsDepthClipControl,
|
||||
uint minSubgroupSize,
|
||||
uint maxSubgroupSize,
|
||||
ShaderStageFlags requiredSubgroupSizeStages,
|
||||
uint subgroupSize,
|
||||
SampleCountFlags supportedSampleCounts,
|
||||
PortabilitySubsetFlags portabilitySubset,
|
||||
uint vertexBufferAlignment,
|
||||
|
@ -102,7 +96,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
SupportsIndirectParameters = supportsIndirectParameters;
|
||||
SupportsFragmentShaderInterlock = supportsFragmentShaderInterlock;
|
||||
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
|
||||
SupportsSubgroupSizeControl = supportsSubgroupSizeControl;
|
||||
SupportsShaderFloat64 = supportsShaderFloat64;
|
||||
SupportsShaderInt8 = supportsShaderInt8;
|
||||
SupportsShaderStencilExport = supportsShaderStencilExport;
|
||||
|
@ -122,9 +115,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
SupportsViewportArray2 = supportsViewportArray2;
|
||||
SupportsHostImportedMemory = supportsHostImportedMemory;
|
||||
SupportsDepthClipControl = supportsDepthClipControl;
|
||||
MinSubgroupSize = minSubgroupSize;
|
||||
MaxSubgroupSize = maxSubgroupSize;
|
||||
RequiredSubgroupSizeStages = requiredSubgroupSizeStages;
|
||||
SubgroupSize = subgroupSize;
|
||||
SupportedSampleCounts = supportedSampleCounts;
|
||||
PortabilitySubset = portabilitySubset;
|
||||
VertexBufferAlignment = vertexBufferAlignment;
|
||||
|
|
|
@ -352,11 +352,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
return pipeline;
|
||||
}
|
||||
|
||||
if (gd.Capabilities.SupportsSubgroupSizeControl)
|
||||
{
|
||||
UpdateStageRequiredSubgroupSizes(gd, 1);
|
||||
}
|
||||
|
||||
var pipelineCreateInfo = new ComputePipelineCreateInfo
|
||||
{
|
||||
SType = StructureType.ComputePipelineCreateInfo,
|
||||
|
@ -616,11 +611,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
PDynamicStates = dynamicStates,
|
||||
};
|
||||
|
||||
if (gd.Capabilities.SupportsSubgroupSizeControl)
|
||||
{
|
||||
UpdateStageRequiredSubgroupSizes(gd, (int)StagesCount);
|
||||
}
|
||||
|
||||
var pipelineCreateInfo = new GraphicsPipelineCreateInfo
|
||||
{
|
||||
SType = StructureType.GraphicsPipelineCreateInfo,
|
||||
|
@ -659,19 +649,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
return pipeline;
|
||||
}
|
||||
|
||||
private readonly unsafe void UpdateStageRequiredSubgroupSizes(VulkanRenderer gd, int count)
|
||||
{
|
||||
for (int index = 0; index < count; index++)
|
||||
{
|
||||
bool canUseExplicitSubgroupSize =
|
||||
(gd.Capabilities.RequiredSubgroupSizeStages & Stages[index].Stage) != 0 &&
|
||||
gd.Capabilities.MinSubgroupSize <= RequiredSubgroupSize &&
|
||||
gd.Capabilities.MaxSubgroupSize >= RequiredSubgroupSize;
|
||||
|
||||
Stages[index].PNext = canUseExplicitSubgroupSize ? StageRequiredSubgroupSizes.Pointer + index : null;
|
||||
}
|
||||
}
|
||||
|
||||
private void UpdateVertexAttributeDescriptions(VulkanRenderer gd)
|
||||
{
|
||||
// Vertex attributes exceeding the stride are invalid.
|
||||
|
|
|
@ -37,7 +37,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
"VK_EXT_shader_stencil_export",
|
||||
"VK_KHR_shader_float16_int8",
|
||||
"VK_EXT_shader_subgroup_ballot",
|
||||
"VK_EXT_subgroup_size_control",
|
||||
"VK_NV_geometry_shader_passthrough",
|
||||
"VK_NV_viewport_array2",
|
||||
"VK_EXT_depth_clip_control",
|
||||
|
|
|
@ -151,6 +151,14 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
SType = StructureType.PhysicalDeviceProperties2,
|
||||
};
|
||||
|
||||
PhysicalDeviceSubgroupProperties propertiesSubgroup = new()
|
||||
{
|
||||
SType = StructureType.PhysicalDeviceSubgroupProperties,
|
||||
PNext = properties2.PNext,
|
||||
};
|
||||
|
||||
properties2.PNext = &propertiesSubgroup;
|
||||
|
||||
PhysicalDeviceBlendOperationAdvancedPropertiesEXT propertiesBlendOperationAdvanced = new()
|
||||
{
|
||||
SType = StructureType.PhysicalDeviceBlendOperationAdvancedPropertiesExt,
|
||||
|
@ -164,18 +172,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
properties2.PNext = &propertiesBlendOperationAdvanced;
|
||||
}
|
||||
|
||||
PhysicalDeviceSubgroupSizeControlPropertiesEXT propertiesSubgroupSizeControl = new()
|
||||
{
|
||||
SType = StructureType.PhysicalDeviceSubgroupSizeControlPropertiesExt,
|
||||
};
|
||||
|
||||
bool supportsSubgroupSizeControl = _physicalDevice.IsDeviceExtensionPresent("VK_EXT_subgroup_size_control");
|
||||
|
||||
if (supportsSubgroupSizeControl)
|
||||
{
|
||||
properties2.PNext = &propertiesSubgroupSizeControl;
|
||||
}
|
||||
|
||||
bool supportsTransformFeedback = _physicalDevice.IsDeviceExtensionPresent(ExtTransformFeedback.ExtensionName);
|
||||
|
||||
PhysicalDeviceTransformFeedbackPropertiesEXT propertiesTransformFeedback = new()
|
||||
|
@ -315,7 +311,6 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
_physicalDevice.IsDeviceExtensionPresent(KhrDrawIndirectCount.ExtensionName),
|
||||
_physicalDevice.IsDeviceExtensionPresent("VK_EXT_fragment_shader_interlock"),
|
||||
_physicalDevice.IsDeviceExtensionPresent("VK_NV_geometry_shader_passthrough"),
|
||||
supportsSubgroupSizeControl,
|
||||
features2.Features.ShaderFloat64,
|
||||
featuresShaderInt8.ShaderInt8,
|
||||
_physicalDevice.IsDeviceExtensionPresent("VK_EXT_shader_stencil_export"),
|
||||
|
@ -335,9 +330,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
_physicalDevice.IsDeviceExtensionPresent("VK_NV_viewport_array2"),
|
||||
_physicalDevice.IsDeviceExtensionPresent(ExtExternalMemoryHost.ExtensionName),
|
||||
supportsDepthClipControl && featuresDepthClipControl.DepthClipControl,
|
||||
propertiesSubgroupSizeControl.MinSubgroupSize,
|
||||
propertiesSubgroupSizeControl.MaxSubgroupSize,
|
||||
propertiesSubgroupSizeControl.RequiredSubgroupSizeStages,
|
||||
propertiesSubgroup.SubgroupSize,
|
||||
supportedSampleCounts,
|
||||
portabilityFlags,
|
||||
vertexBufferAlignment,
|
||||
|
@ -623,6 +616,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
maximumImagesPerStage: Constants.MaxImagesPerStage,
|
||||
maximumComputeSharedMemorySize: (int)limits.MaxComputeSharedMemorySize,
|
||||
maximumSupportedAnisotropy: (int)limits.MaxSamplerAnisotropy,
|
||||
shaderSubgroupSize: (int)Capabilities.SubgroupSize,
|
||||
storageBufferOffsetAlignment: (int)limits.MinStorageBufferOffsetAlignment,
|
||||
gatherBiasPrecision: IsIntelWindows || IsAmdWindows ? (int)Capabilities.SubTexelPrecisionBits : 0);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue