vk_graphics_pipeline: Use VK_KHR_push_descriptor when available

~51% faster on Nvidia compared to previous method.
This commit is contained in:
ReinUsesLisp 2021-06-16 21:14:57 -03:00 committed by ameerj
parent ccbd24fe00
commit ca67077ca8
8 changed files with 88 additions and 36 deletions

View file

@ -767,6 +767,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
test(khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true);
test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
@ -932,6 +933,16 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
khr_workgroup_memory_explicit_layout = true;
}
}
if (khr_push_descriptor) {
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
push_descriptor.pNext = nullptr;
physical_properties.pNext = &push_descriptor;
physical.GetProperties2KHR(physical_properties);
max_push_descriptors = push_descriptor.maxPushDescriptors;
}
return extensions;
}

View file

@ -154,6 +154,11 @@ public:
return guest_warp_stages & stage;
}
/// Returns the maximum number of push descriptors.
u32 MaxPushDescriptors() const {
return max_push_descriptors;
}
/// Returns true if formatless image load is supported.
bool IsFormatlessImageLoadSupported() const {
return is_formatless_image_load_supported;
@ -194,6 +199,11 @@ public:
return khr_spirv_1_4;
}
/// Returns true if the device supports VK_KHR_push_descriptor.
bool IsKhrPushDescriptorSupported() const {
return khr_push_descriptor;
}
/// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
return khr_workgroup_memory_explicit_layout;
@ -330,6 +340,7 @@ private:
VkDriverIdKHR driver_id{}; ///< Driver ID.
VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 max_push_descriptors{}; ///< Maximum number of push descriptors
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
bool is_float16_supported{}; ///< Support for float16 arithmetics.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
@ -345,6 +356,7 @@ private:
bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts.
bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4.
bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.

View file

@ -103,6 +103,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdFillBuffer);
X(vkCmdPipelineBarrier);
X(vkCmdPushConstants);
X(vkCmdPushDescriptorSetWithTemplateKHR);
X(vkCmdSetBlendConstants);
X(vkCmdSetDepthBias);
X(vkCmdSetDepthBounds);

View file

@ -193,15 +193,16 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkBeginCommandBuffer vkBeginCommandBuffer{};
PFN_vkBindBufferMemory vkBindBufferMemory{};
PFN_vkBindImageMemory vkBindImageMemory{};
PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
PFN_vkCmdBeginQuery vkCmdBeginQuery{};
PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{};
PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{};
PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{};
PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{};
PFN_vkCmdBindPipeline vkCmdBindPipeline{};
PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{};
PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{};
PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
PFN_vkCmdBlitImage vkCmdBlitImage{};
PFN_vkCmdClearAttachments vkCmdClearAttachments{};
PFN_vkCmdCopyBuffer vkCmdCopyBuffer{};
@ -211,35 +212,35 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkCmdDispatch vkCmdDispatch{};
PFN_vkCmdDraw vkCmdDraw{};
PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
PFN_vkCmdEndQuery vkCmdEndQuery{};
PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{};
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
PFN_vkCmdFillBuffer vkCmdFillBuffer{};
PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{};
PFN_vkCmdPushConstants vkCmdPushConstants{};
PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{};
PFN_vkCmdResolveImage vkCmdResolveImage{};
PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{};
PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
PFN_vkCmdSetDepthBias vkCmdSetDepthBias{};
PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{};
PFN_vkCmdSetEvent vkCmdSetEvent{};
PFN_vkCmdSetScissor vkCmdSetScissor{};
PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
PFN_vkCmdSetViewport vkCmdSetViewport{};
PFN_vkCmdWaitEvents vkCmdWaitEvents{};
PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{};
PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{};
PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{};
PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{};
PFN_vkCmdSetEvent vkCmdSetEvent{};
PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{};
PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{};
PFN_vkCmdSetScissor vkCmdSetScissor{};
PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{};
PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{};
PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{};
PFN_vkCmdResolveImage vkCmdResolveImage{};
PFN_vkCmdSetViewport vkCmdSetViewport{};
PFN_vkCmdWaitEvents vkCmdWaitEvents{};
PFN_vkCreateBuffer vkCreateBuffer{};
PFN_vkCreateBufferView vkCreateBufferView{};
PFN_vkCreateCommandPool vkCreateCommandPool{};
@ -990,6 +991,12 @@ public:
dynamic_offsets.size(), dynamic_offsets.data());
}
void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template,
VkPipelineLayout layout, u32 set,
const void* data) const noexcept {
dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data);
}
void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept {
dld->vkCmdBindPipeline(handle, bind_point, pipeline);
}