From 04fe3a79b9f1e0fbb21066c61f9b699fe80491a4 Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Sun, 9 Feb 2025 22:03:20 +0100 Subject: [PATCH] fix: lower UBO max size to account buffer cache offset (#2388) * fix: lower UBO max size to account buffer cache offset * review comments * remove UBO size from spec and always set it to max on shader side --- .../backend/spirv/spirv_emit_context.cpp | 4 ++-- src/shader_recompiler/info.h | 8 ++++---- src/shader_recompiler/profile.h | 1 + src/shader_recompiler/specialization.h | 15 +++++---------- .../renderer_vulkan/vk_compute_pipeline.cpp | 15 ++++++++------- .../renderer_vulkan/vk_compute_pipeline.h | 5 +++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 13 +++++++------ .../renderer_vulkan/vk_graphics_pipeline.h | 3 ++- src/video_core/renderer_vulkan/vk_instance.h | 7 +++++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 11 ++++++++--- .../renderer_vulkan/vk_pipeline_cache.h | 4 ++++ .../renderer_vulkan/vk_pipeline_common.cpp | 6 ++++-- .../renderer_vulkan/vk_pipeline_common.h | 5 ++++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 4 ++-- 14 files changed, 61 insertions(+), 40 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 13d727c72..2ab5ca05d 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -633,8 +633,8 @@ void EmitContext::DefineBuffers() { for (const auto& desc : info.buffers) { const auto sharp = desc.GetSharp(info); - const bool is_storage = desc.IsStorage(sharp); - const u32 array_size = sharp.NumDwords() != 0 ? sharp.NumDwords() : MaxUboDwords; + const bool is_storage = desc.IsStorage(sharp, profile); + const u32 array_size = profile.max_ubo_size >> 2; const auto* data_types = True(desc.used_types & IR::Type::F32) ? &F32 : &U32; const Id data_type = (*data_types)[1]; const Id record_array_type{is_storage ? TypeRuntimeArray(data_type) diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 498752607..b32eb6833 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -17,6 +17,7 @@ #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/type.h" #include "shader_recompiler/params.h" +#include "shader_recompiler/profile.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/resource.h" @@ -24,8 +25,6 @@ namespace Shader { static constexpr size_t NumUserDataRegs = 16; -static constexpr size_t MaxUboSize = 65536; -static constexpr size_t MaxUboDwords = MaxUboSize >> 2; enum class TextureType : u32 { Color1D, @@ -50,8 +49,9 @@ struct BufferResource { bool is_written{}; bool is_formatted{}; - [[nodiscard]] bool IsStorage(const AmdGpu::Buffer& buffer) const noexcept { - return buffer.GetSize() > MaxUboSize || is_written || is_gds_buffer; + [[nodiscard]] bool IsStorage(const AmdGpu::Buffer& buffer, + const Profile& profile) const noexcept { + return buffer.GetSize() > profile.max_ubo_size || is_written || is_gds_buffer; } [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f359a7dcc..53d940b79 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -30,6 +30,7 @@ struct Profile { bool needs_manual_interpolation{}; bool needs_lds_barriers{}; u64 min_ssbo_alignment{}; + u64 max_ubo_size{}; u32 max_viewport_width{}; u32 max_viewport_height{}; u32 max_shared_memory_size{}; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 4328193b5..9bf9e71e4 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -27,7 +27,6 @@ struct BufferSpecialization { u32 num_format : 4; u32 index_stride : 2; u32 element_size : 2; - u32 size = 0; AmdGpu::CompMapping dst_select{}; AmdGpu::NumberConversion num_conversion{}; @@ -38,8 +37,7 @@ struct BufferSpecialization { (data_format == other.data_format && num_format == other.num_format && dst_select == other.dst_select && num_conversion == other.num_conversion)) && (!swizzle_enable || - (index_stride == other.index_stride && element_size == other.element_size)) && - (size >= other.is_storage || is_storage); + (index_stride == other.index_stride && element_size == other.element_size)); } }; @@ -87,8 +85,8 @@ struct StageSpecialization { boost::container::small_vector samplers; Backend::Bindings start{}; - explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, - const Profile& profile_, Backend::Bindings start_) + StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, const Profile& profile_, + Backend::Bindings start_) : info{&info_}, runtime_info{runtime_info_}, start{start_} { fetch_shader_data = Gcn::ParseFetchShader(info_); if (info_.stage == Stage::Vertex && fetch_shader_data && @@ -107,9 +105,9 @@ struct StageSpecialization { binding++; } ForEachSharp(binding, buffers, info->buffers, - [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { + [profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { spec.stride = sharp.GetStride(); - spec.is_storage = desc.IsStorage(sharp); + spec.is_storage = desc.IsStorage(sharp, profile_); spec.is_formatted = desc.is_formatted; spec.swizzle_enable = sharp.swizzle_enable; if (spec.is_formatted) { @@ -122,9 +120,6 @@ struct StageSpecialization { spec.index_stride = sharp.index_stride; spec.element_size = sharp.element_size; } - if (!spec.is_storage) { - spec.size = sharp.GetSize(); - } }); ForEachSharp(binding, images, info->images, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 0832f65a2..f0346559d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -11,11 +11,12 @@ namespace Vulkan { -ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_, - DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache, - ComputePipelineKey compute_key_, const Shader::Info& info_, - vk::ShaderModule module) - : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} { +ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler, + DescriptorHeap& desc_heap, const Shader::Profile& profile, + vk::PipelineCache pipeline_cache, ComputePipelineKey compute_key_, + const Shader::Info& info_, vk::ShaderModule module) + : Pipeline{instance, scheduler, desc_heap, profile, pipeline_cache, true}, + compute_key{compute_key_} { auto& info = stages[int(Shader::LogicalStage::Compute)]; info = &info_; const auto debug_str = GetDebugString(); @@ -49,8 +50,8 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler const auto sharp = buffer.GetSharp(*info); bindings.push_back({ .binding = binding++, - .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer - : vk::DescriptorType::eUniformBuffer, + .descriptorType = buffer.IsStorage(sharp, profile) ? vk::DescriptorType::eStorageBuffer + : vk::DescriptorType::eUniformBuffer, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eCompute, }); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 1c28e461c..79059b509 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -31,8 +31,9 @@ struct ComputePipelineKey { class ComputePipeline : public Pipeline { public: ComputePipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, - vk::PipelineCache pipeline_cache, ComputePipelineKey compute_key, - const Shader::Info& info, vk::ShaderModule module); + const Shader::Profile& profile, vk::PipelineCache pipeline_cache, + ComputePipelineKey compute_key, const Shader::Info& info, + vk::ShaderModule module); ~ComputePipeline(); private: diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 588754c00..4eecd1edf 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -25,13 +25,13 @@ namespace Vulkan { using Shader::Backend::SPIRV::AuxShaderType; GraphicsPipeline::GraphicsPipeline( - const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, - const GraphicsPipelineKey& key_, vk::PipelineCache pipeline_cache, - std::span infos, + const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, + const Shader::Profile& profile, const GraphicsPipelineKey& key_, + vk::PipelineCache pipeline_cache, std::span infos, std::span runtime_infos, std::optional fetch_shader_, std::span modules) - : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_}, + : Pipeline{instance, scheduler, desc_heap, profile, pipeline_cache}, key{key_}, fetch_shader{std::move(fetch_shader_)} { const vk::Device device = instance.GetDevice(); std::ranges::copy(infos, stages.begin()); @@ -369,8 +369,9 @@ void GraphicsPipeline::BuildDescSetLayout() { const auto sharp = buffer.GetSharp(*stage); bindings.push_back({ .binding = binding++, - .descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer - : vk::DescriptorType::eUniformBuffer, + .descriptorType = buffer.IsStorage(sharp, profile) + ? vk::DescriptorType::eStorageBuffer + : vk::DescriptorType::eUniformBuffer, .descriptorCount = 1, .stageFlags = gp_stage_flags, }); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 8c5cb1f3b..64cc761f4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -75,7 +75,8 @@ struct GraphicsPipelineKey { class GraphicsPipeline : public Pipeline { public: GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, - const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache, + const Shader::Profile& profile, const GraphicsPipelineKey& key, + vk::PipelineCache pipeline_cache, std::span stages, std::span runtime_infos, std::optional fetch_shader, diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index c2322df41..682824044 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -209,6 +209,11 @@ public: return properties.limits.minUniformBufferOffsetAlignment; } + /// Returns the maximum size of uniform buffers. + vk::DeviceSize UniformMaxSize() const { + return properties.limits.maxUniformBufferRange; + } + /// Returns the minimum required alignment for storage buffers vk::DeviceSize StorageMinAlignment() const { return properties.limits.minStorageBufferOffsetAlignment; @@ -254,10 +259,12 @@ public: return features.shaderClipDistance; } + /// Returns the maximim viewport width. u32 GetMaxViewportWidth() const { return properties.limits.maxViewportDimensions[0]; } + /// Returns the maximum viewport height. u32 GetMaxViewportHeight() const { return properties.limits.maxViewportDimensions[1]; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4406be439..a936ccf31 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -204,6 +204,10 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || instance.GetDriverID() == vk::DriverId::eMoltenvk, + // When binding a UBO, we calculate its size considering the offset in the larger buffer + // cache underlying resource. In some cases, it may produce sizes exceeding the system + // maximum allowed UBO range, so we need to reduce the threshold to prevent issues. + .max_ubo_size = instance.UniformMaxSize() - instance.UniformMinAlignment(), .max_viewport_width = instance.GetMaxViewportWidth(), .max_viewport_height = instance.GetMaxViewportHeight(), .max_shared_memory_size = instance.MaxComputeSharedMemorySize(), @@ -222,7 +226,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { } const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); if (is_new) { - it.value() = std::make_unique(instance, scheduler, desc_heap, + it.value() = std::make_unique(instance, scheduler, desc_heap, profile, graphics_key, *pipeline_cache, infos, runtime_infos, fetch_shader, modules); if (Config::collectShadersForDebug()) { @@ -243,8 +247,9 @@ const ComputePipeline* PipelineCache::GetComputePipeline() { } const auto [it, is_new] = compute_pipelines.try_emplace(compute_key); if (is_new) { - it.value() = std::make_unique( - instance, scheduler, desc_heap, *pipeline_cache, compute_key, *infos[0], modules[0]); + it.value() = + std::make_unique(instance, scheduler, desc_heap, profile, + *pipeline_cache, compute_key, *infos[0], modules[0]); if (Config::collectShadersForDebug()) { auto& m = modules[0]; module_related_pipelines[m].emplace_back(compute_key); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index b3bccd513..ba3407b48 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -68,6 +68,10 @@ public: static std::string GetShaderName(Shader::Stage stage, u64 hash, std::optional perm = {}); + auto& GetProfile() const { + return profile; + } + private: bool RefreshGraphicsKey(); bool RefreshComputeKey(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp index 91f53109e..bf43257f8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp @@ -14,8 +14,10 @@ namespace Vulkan { Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, - vk::PipelineCache pipeline_cache, bool is_compute_ /*= false*/) - : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, is_compute{is_compute_} {} + const Shader::Profile& profile_, vk::PipelineCache pipeline_cache, + bool is_compute_ /*= false*/) + : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, profile{profile_}, + is_compute{is_compute_} {} Pipeline::~Pipeline() = default; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h index f71631da0..e9e6fed01 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h @@ -5,6 +5,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/info.h" +#include "shader_recompiler/profile.h" #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/texture_cache/texture_cache.h" @@ -26,7 +27,8 @@ class DescriptorHeap; class Pipeline { public: Pipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, - vk::PipelineCache pipeline_cache, bool is_compute = false); + const Shader::Profile& profile, vk::PipelineCache pipeline_cache, + bool is_compute = false); virtual ~Pipeline(); vk::Pipeline Handle() const noexcept { @@ -66,6 +68,7 @@ protected: const Instance& instance; Scheduler& scheduler; DescriptorHeap& desc_heap; + const Shader::Profile& profile; vk::UniquePipeline pipeline; vk::UniquePipelineLayout pipeline_layout; vk::UniqueDescriptorSetLayout desc_layout; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 6f979a734..8b1d5d8b3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -554,11 +554,10 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding } // Second pass to re-bind buffers that were updated after binding - auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID); for (u32 i = 0; i < buffer_bindings.size(); i++) { const auto& [buffer_id, vsharp] = buffer_bindings[i]; const auto& desc = stage.buffers[i]; - const bool is_storage = desc.IsStorage(vsharp); + const bool is_storage = desc.IsStorage(vsharp, pipeline_cache.GetProfile()); if (!buffer_id) { if (desc.is_gds_buffer) { const auto* gds_buf = buffer_cache.GetGdsBuffer(); @@ -566,6 +565,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding } else if (instance.IsNullDescriptorSupported()) { buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE); } else { + auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID); buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE); } } else {