shader: Implement LDS, STS, LDL, and STS and use SPIR-V 1.4 when available

This commit is contained in:
ReinUsesLisp 2021-03-28 19:53:34 -03:00 committed by ameerj
parent 84298ce191
commit e860870dd2
20 changed files with 730 additions and 36 deletions

View file

@ -114,10 +114,12 @@ public:
gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size);
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
const u32 local_memory_size{LocalMemorySize()};
const u32 texture_bound{TextureBoundBuffer()};
file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
.write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
.write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
.write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
.write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
.write(reinterpret_cast<const char*>(&read_lowest), sizeof(read_lowest))
@ -132,7 +134,10 @@ public:
file.flush();
if (stage == Shader::Stage::Compute) {
const std::array<u32, 3> workgroup_size{WorkgroupSize()};
file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size));
const u32 shared_memory_size{SharedMemorySize()};
file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
.write(reinterpret_cast<const char*>(&shared_memory_size),
sizeof(shared_memory_size));
} else {
file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
}
@ -278,6 +283,16 @@ public:
return maxwell3d->regs.tex_cb_index;
}
u32 LocalMemorySize() const override {
const u64 size{sph.LocalMemorySize()};
ASSERT(size <= std::numeric_limits<u32>::max());
return static_cast<u32>(size);
}
u32 SharedMemorySize() const override {
throw Shader::LogicError("Requesting shared memory size in graphics stage");
}
std::array<u32, 3> WorkgroupSize() const override {
throw Shader::LogicError("Requesting workgroup size in a graphics stage");
}
@ -313,6 +328,16 @@ public:
return kepler_compute->regs.tex_cb_index;
}
u32 LocalMemorySize() const override {
const auto& qmd{kepler_compute->launch_description};
return qmd.local_pos_alloc;
}
u32 SharedMemorySize() const override {
const auto& qmd{kepler_compute->launch_description};
return qmd.shared_alloc;
}
std::array<u32, 3> WorkgroupSize() const override {
const auto& qmd{kepler_compute->launch_description};
return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
@ -366,6 +391,7 @@ public:
u64 num_texture_types{};
file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
.read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
.read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
.read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
.read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
.read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
@ -381,7 +407,8 @@ public:
texture_types.emplace(key, type);
}
if (stage == Shader::Stage::Compute) {
file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size));
file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
.read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
} else {
file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
}
@ -402,6 +429,14 @@ public:
return it->second;
}
u32 LocalMemorySize() const override {
return local_memory_size;
}
u32 SharedMemorySize() const override {
return shared_memory_size;
}
u32 TextureBoundBuffer() const override {
return texture_bound;
}
@ -414,6 +449,8 @@ private:
std::unique_ptr<u64[]> code;
std::unordered_map<u64, Shader::TextureType> texture_types;
std::array<u32, 3> workgroup_size{};
u32 local_memory_size{};
u32 shared_memory_size{};
u32 texture_bound{};
u32 read_lowest{};
u32 read_highest{};
@ -541,6 +578,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
const auto& float_control{device.FloatControlProperties()};
const VkDriverIdKHR driver_id{device.GetDriverID()};
base_profile = Shader::Profile{
.supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,
.unified_descriptor_binding = true,
.support_vertex_instance_id = false,
.support_float_controls = true,
@ -558,6 +596,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
.support_fp64_signed_zero_nan_preserve =
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
.support_vote = true,
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
.has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
@ -600,8 +639,8 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
shader = MakeShaderInfo(env, *cpu_shader_addr);
}
const ComputePipelineCacheKey key{
.unique_hash = shader->unique_hash,
.shared_memory_size = qmd.shared_alloc,
.unique_hash{shader->unique_hash},
.shared_memory_size{qmd.shared_alloc},
.workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
};
const auto [pair, is_new]{compute_cache.try_emplace(key)};

View file

@ -399,6 +399,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
}
VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout;
if (khr_workgroup_memory_explicit_layout) {
workgroup_layout = {
.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR,
.pNext = nullptr,
.workgroupMemoryExplicitLayout = VK_TRUE,
.workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE,
.workgroupMemoryExplicitLayout8BitAccess = VK_TRUE,
.workgroupMemoryExplicitLayout16BitAccess = VK_TRUE,
};
SetNext(next, workgroup_layout);
}
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
@ -662,6 +676,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
}
bool has_khr_shader_float16_int8{};
bool has_khr_workgroup_memory_explicit_layout{};
bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{};
bool has_ext_custom_border_color{};
@ -682,6 +697,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
test(khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
@ -694,6 +710,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
test(has_khr_workgroup_memory_explicit_layout,
VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
if (Settings::values.renderer_debug) {
test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
true);
@ -787,6 +805,22 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
ext_extended_dynamic_state = true;
}
}
if (has_khr_workgroup_memory_explicit_layout) {
VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout;
layout.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR;
layout.pNext = nullptr;
features.pNext = &layout;
physical.GetFeatures2KHR(features);
if (layout.workgroupMemoryExplicitLayout &&
layout.workgroupMemoryExplicitLayout8BitAccess &&
layout.workgroupMemoryExplicitLayout16BitAccess &&
layout.workgroupMemoryExplicitLayoutScalarBlockLayout) {
extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
khr_workgroup_memory_explicit_layout = true;
}
}
return extensions;
}

View file

@ -168,11 +168,21 @@ public:
return nv_viewport_swizzle;
}
/// Returns true if the device supports VK_EXT_scalar_block_layout.
/// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout.
bool IsKhrUniformBufferStandardLayoutSupported() const {
return khr_uniform_buffer_standard_layout;
}
/// Returns true if the device supports VK_KHR_spirv_1_4.
bool IsKhrSpirv1_4Supported() const {
return khr_spirv_1_4;
}
/// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
return khr_workgroup_memory_explicit_layout;
}
/// Returns true if the device supports VK_EXT_index_type_uint8.
bool IsExtIndexTypeUint8Supported() const {
return ext_index_type_uint8;
@ -296,20 +306,22 @@ private:
bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control.
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts.
bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4.
bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control.
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.