fix: lower UBO max size to account buffer cache offset (#2388)

* fix: lower UBO max size to account buffer cache offset

* review comments

* remove UBO size from spec and always set it to max on shader side
This commit is contained in:
psucien 2025-02-09 22:03:20 +01:00 committed by GitHub
parent 34a4f6e60e
commit 04fe3a79b9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 61 additions and 40 deletions

View file

@ -633,8 +633,8 @@ void EmitContext::DefineBuffers() {
for (const auto& desc : info.buffers) { for (const auto& desc : info.buffers) {
const auto sharp = desc.GetSharp(info); const auto sharp = desc.GetSharp(info);
const bool is_storage = desc.IsStorage(sharp); const bool is_storage = desc.IsStorage(sharp, profile);
const u32 array_size = sharp.NumDwords() != 0 ? sharp.NumDwords() : MaxUboDwords; const u32 array_size = profile.max_ubo_size >> 2;
const auto* data_types = True(desc.used_types & IR::Type::F32) ? &F32 : &U32; const auto* data_types = True(desc.used_types & IR::Type::F32) ? &F32 : &U32;
const Id data_type = (*data_types)[1]; const Id data_type = (*data_types)[1];
const Id record_array_type{is_storage ? TypeRuntimeArray(data_type) const Id record_array_type{is_storage ? TypeRuntimeArray(data_type)

View file

@ -17,6 +17,7 @@
#include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/type.h" #include "shader_recompiler/ir/type.h"
#include "shader_recompiler/params.h" #include "shader_recompiler/params.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
@ -24,8 +25,6 @@
namespace Shader { namespace Shader {
static constexpr size_t NumUserDataRegs = 16; static constexpr size_t NumUserDataRegs = 16;
static constexpr size_t MaxUboSize = 65536;
static constexpr size_t MaxUboDwords = MaxUboSize >> 2;
enum class TextureType : u32 { enum class TextureType : u32 {
Color1D, Color1D,
@ -50,8 +49,9 @@ struct BufferResource {
bool is_written{}; bool is_written{};
bool is_formatted{}; bool is_formatted{};
[[nodiscard]] bool IsStorage(const AmdGpu::Buffer& buffer) const noexcept { [[nodiscard]] bool IsStorage(const AmdGpu::Buffer& buffer,
return buffer.GetSize() > MaxUboSize || is_written || is_gds_buffer; const Profile& profile) const noexcept {
return buffer.GetSize() > profile.max_ubo_size || is_written || is_gds_buffer;
} }
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;

View file

@ -30,6 +30,7 @@ struct Profile {
bool needs_manual_interpolation{}; bool needs_manual_interpolation{};
bool needs_lds_barriers{}; bool needs_lds_barriers{};
u64 min_ssbo_alignment{}; u64 min_ssbo_alignment{};
u64 max_ubo_size{};
u32 max_viewport_width{}; u32 max_viewport_width{};
u32 max_viewport_height{}; u32 max_viewport_height{};
u32 max_shared_memory_size{}; u32 max_shared_memory_size{};

View file

@ -27,7 +27,6 @@ struct BufferSpecialization {
u32 num_format : 4; u32 num_format : 4;
u32 index_stride : 2; u32 index_stride : 2;
u32 element_size : 2; u32 element_size : 2;
u32 size = 0;
AmdGpu::CompMapping dst_select{}; AmdGpu::CompMapping dst_select{};
AmdGpu::NumberConversion num_conversion{}; AmdGpu::NumberConversion num_conversion{};
@ -38,8 +37,7 @@ struct BufferSpecialization {
(data_format == other.data_format && num_format == other.num_format && (data_format == other.data_format && num_format == other.num_format &&
dst_select == other.dst_select && num_conversion == other.num_conversion)) && dst_select == other.dst_select && num_conversion == other.num_conversion)) &&
(!swizzle_enable || (!swizzle_enable ||
(index_stride == other.index_stride && element_size == other.element_size)) && (index_stride == other.index_stride && element_size == other.element_size));
(size >= other.is_storage || is_storage);
} }
}; };
@ -87,8 +85,8 @@ struct StageSpecialization {
boost::container::small_vector<SamplerSpecialization, 16> samplers; boost::container::small_vector<SamplerSpecialization, 16> samplers;
Backend::Bindings start{}; Backend::Bindings start{};
explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, const Profile& profile_,
const Profile& profile_, Backend::Bindings start_) Backend::Bindings start_)
: info{&info_}, runtime_info{runtime_info_}, start{start_} { : info{&info_}, runtime_info{runtime_info_}, start{start_} {
fetch_shader_data = Gcn::ParseFetchShader(info_); fetch_shader_data = Gcn::ParseFetchShader(info_);
if (info_.stage == Stage::Vertex && fetch_shader_data && if (info_.stage == Stage::Vertex && fetch_shader_data &&
@ -107,9 +105,9 @@ struct StageSpecialization {
binding++; binding++;
} }
ForEachSharp(binding, buffers, info->buffers, ForEachSharp(binding, buffers, info->buffers,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { [profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.stride = sharp.GetStride(); spec.stride = sharp.GetStride();
spec.is_storage = desc.IsStorage(sharp); spec.is_storage = desc.IsStorage(sharp, profile_);
spec.is_formatted = desc.is_formatted; spec.is_formatted = desc.is_formatted;
spec.swizzle_enable = sharp.swizzle_enable; spec.swizzle_enable = sharp.swizzle_enable;
if (spec.is_formatted) { if (spec.is_formatted) {
@ -122,9 +120,6 @@ struct StageSpecialization {
spec.index_stride = sharp.index_stride; spec.index_stride = sharp.index_stride;
spec.element_size = sharp.element_size; spec.element_size = sharp.element_size;
} }
if (!spec.is_storage) {
spec.size = sharp.GetSize();
}
}); });
ForEachSharp(binding, images, info->images, ForEachSharp(binding, images, info->images,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) { [](auto& spec, const auto& desc, AmdGpu::Image sharp) {

View file

@ -11,11 +11,12 @@
namespace Vulkan { namespace Vulkan {
ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_, ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler,
DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache, DescriptorHeap& desc_heap, const Shader::Profile& profile,
ComputePipelineKey compute_key_, const Shader::Info& info_, vk::PipelineCache pipeline_cache, ComputePipelineKey compute_key_,
vk::ShaderModule module) const Shader::Info& info_, vk::ShaderModule module)
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} { : Pipeline{instance, scheduler, desc_heap, profile, pipeline_cache, true},
compute_key{compute_key_} {
auto& info = stages[int(Shader::LogicalStage::Compute)]; auto& info = stages[int(Shader::LogicalStage::Compute)];
info = &info_; info = &info_;
const auto debug_str = GetDebugString(); const auto debug_str = GetDebugString();
@ -49,8 +50,8 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
const auto sharp = buffer.GetSharp(*info); const auto sharp = buffer.GetSharp(*info);
bindings.push_back({ bindings.push_back({
.binding = binding++, .binding = binding++,
.descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer .descriptorType = buffer.IsStorage(sharp, profile) ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer, : vk::DescriptorType::eUniformBuffer,
.descriptorCount = 1, .descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute, .stageFlags = vk::ShaderStageFlagBits::eCompute,
}); });

View file

@ -31,8 +31,9 @@ struct ComputePipelineKey {
class ComputePipeline : public Pipeline { class ComputePipeline : public Pipeline {
public: public:
ComputePipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, ComputePipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
vk::PipelineCache pipeline_cache, ComputePipelineKey compute_key, const Shader::Profile& profile, vk::PipelineCache pipeline_cache,
const Shader::Info& info, vk::ShaderModule module); ComputePipelineKey compute_key, const Shader::Info& info,
vk::ShaderModule module);
~ComputePipeline(); ~ComputePipeline();
private: private:

View file

@ -25,13 +25,13 @@ namespace Vulkan {
using Shader::Backend::SPIRV::AuxShaderType; using Shader::Backend::SPIRV::AuxShaderType;
GraphicsPipeline::GraphicsPipeline( GraphicsPipeline::GraphicsPipeline(
const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
const GraphicsPipelineKey& key_, vk::PipelineCache pipeline_cache, const Shader::Profile& profile, const GraphicsPipelineKey& key_,
std::span<const Shader::Info*, MaxShaderStages> infos, vk::PipelineCache pipeline_cache, std::span<const Shader::Info*, MaxShaderStages> infos,
std::span<const Shader::RuntimeInfo, MaxShaderStages> runtime_infos, std::span<const Shader::RuntimeInfo, MaxShaderStages> runtime_infos,
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader_, std::optional<const Shader::Gcn::FetchShaderData> fetch_shader_,
std::span<const vk::ShaderModule> modules) std::span<const vk::ShaderModule> modules)
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_}, : Pipeline{instance, scheduler, desc_heap, profile, pipeline_cache}, key{key_},
fetch_shader{std::move(fetch_shader_)} { fetch_shader{std::move(fetch_shader_)} {
const vk::Device device = instance.GetDevice(); const vk::Device device = instance.GetDevice();
std::ranges::copy(infos, stages.begin()); std::ranges::copy(infos, stages.begin());
@ -369,8 +369,9 @@ void GraphicsPipeline::BuildDescSetLayout() {
const auto sharp = buffer.GetSharp(*stage); const auto sharp = buffer.GetSharp(*stage);
bindings.push_back({ bindings.push_back({
.binding = binding++, .binding = binding++,
.descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer .descriptorType = buffer.IsStorage(sharp, profile)
: vk::DescriptorType::eUniformBuffer, ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.descriptorCount = 1, .descriptorCount = 1,
.stageFlags = gp_stage_flags, .stageFlags = gp_stage_flags,
}); });

View file

@ -75,7 +75,8 @@ struct GraphicsPipelineKey {
class GraphicsPipeline : public Pipeline { class GraphicsPipeline : public Pipeline {
public: public:
GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache, const Shader::Profile& profile, const GraphicsPipelineKey& key,
vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> stages, std::span<const Shader::Info*, MaxShaderStages> stages,
std::span<const Shader::RuntimeInfo, MaxShaderStages> runtime_infos, std::span<const Shader::RuntimeInfo, MaxShaderStages> runtime_infos,
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader, std::optional<const Shader::Gcn::FetchShaderData> fetch_shader,

View file

@ -209,6 +209,11 @@ public:
return properties.limits.minUniformBufferOffsetAlignment; return properties.limits.minUniformBufferOffsetAlignment;
} }
/// Returns the maximum size of uniform buffers.
vk::DeviceSize UniformMaxSize() const {
return properties.limits.maxUniformBufferRange;
}
/// Returns the minimum required alignment for storage buffers /// Returns the minimum required alignment for storage buffers
vk::DeviceSize StorageMinAlignment() const { vk::DeviceSize StorageMinAlignment() const {
return properties.limits.minStorageBufferOffsetAlignment; return properties.limits.minStorageBufferOffsetAlignment;
@ -254,10 +259,12 @@ public:
return features.shaderClipDistance; return features.shaderClipDistance;
} }
/// Returns the maximim viewport width.
u32 GetMaxViewportWidth() const { u32 GetMaxViewportWidth() const {
return properties.limits.maxViewportDimensions[0]; return properties.limits.maxViewportDimensions[0];
} }
/// Returns the maximum viewport height.
u32 GetMaxViewportHeight() const { u32 GetMaxViewportHeight() const {
return properties.limits.maxViewportDimensions[1]; return properties.limits.maxViewportDimensions[1];
} }

View file

@ -204,6 +204,10 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||
instance.GetDriverID() == vk::DriverId::eMoltenvk, instance.GetDriverID() == vk::DriverId::eMoltenvk,
// When binding a UBO, we calculate its size considering the offset in the larger buffer
// cache underlying resource. In some cases, it may produce sizes exceeding the system
// maximum allowed UBO range, so we need to reduce the threshold to prevent issues.
.max_ubo_size = instance.UniformMaxSize() - instance.UniformMinAlignment(),
.max_viewport_width = instance.GetMaxViewportWidth(), .max_viewport_width = instance.GetMaxViewportWidth(),
.max_viewport_height = instance.GetMaxViewportHeight(), .max_viewport_height = instance.GetMaxViewportHeight(),
.max_shared_memory_size = instance.MaxComputeSharedMemorySize(), .max_shared_memory_size = instance.MaxComputeSharedMemorySize(),
@ -222,7 +226,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
} }
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
if (is_new) { if (is_new) {
it.value() = std::make_unique<GraphicsPipeline>(instance, scheduler, desc_heap, it.value() = std::make_unique<GraphicsPipeline>(instance, scheduler, desc_heap, profile,
graphics_key, *pipeline_cache, infos, graphics_key, *pipeline_cache, infos,
runtime_infos, fetch_shader, modules); runtime_infos, fetch_shader, modules);
if (Config::collectShadersForDebug()) { if (Config::collectShadersForDebug()) {
@ -243,8 +247,9 @@ const ComputePipeline* PipelineCache::GetComputePipeline() {
} }
const auto [it, is_new] = compute_pipelines.try_emplace(compute_key); const auto [it, is_new] = compute_pipelines.try_emplace(compute_key);
if (is_new) { if (is_new) {
it.value() = std::make_unique<ComputePipeline>( it.value() =
instance, scheduler, desc_heap, *pipeline_cache, compute_key, *infos[0], modules[0]); std::make_unique<ComputePipeline>(instance, scheduler, desc_heap, profile,
*pipeline_cache, compute_key, *infos[0], modules[0]);
if (Config::collectShadersForDebug()) { if (Config::collectShadersForDebug()) {
auto& m = modules[0]; auto& m = modules[0];
module_related_pipelines[m].emplace_back(compute_key); module_related_pipelines[m].emplace_back(compute_key);

View file

@ -68,6 +68,10 @@ public:
static std::string GetShaderName(Shader::Stage stage, u64 hash, static std::string GetShaderName(Shader::Stage stage, u64 hash,
std::optional<size_t> perm = {}); std::optional<size_t> perm = {});
auto& GetProfile() const {
return profile;
}
private: private:
bool RefreshGraphicsKey(); bool RefreshGraphicsKey();
bool RefreshComputeKey(); bool RefreshComputeKey();

View file

@ -14,8 +14,10 @@
namespace Vulkan { namespace Vulkan {
Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_,
vk::PipelineCache pipeline_cache, bool is_compute_ /*= false*/) const Shader::Profile& profile_, vk::PipelineCache pipeline_cache,
: instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, is_compute{is_compute_} {} bool is_compute_ /*= false*/)
: instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, profile{profile_},
is_compute{is_compute_} {}
Pipeline::~Pipeline() = default; Pipeline::~Pipeline() = default;

View file

@ -5,6 +5,7 @@
#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "shader_recompiler/profile.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache.h"
@ -26,7 +27,8 @@ class DescriptorHeap;
class Pipeline { class Pipeline {
public: public:
Pipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, Pipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
vk::PipelineCache pipeline_cache, bool is_compute = false); const Shader::Profile& profile, vk::PipelineCache pipeline_cache,
bool is_compute = false);
virtual ~Pipeline(); virtual ~Pipeline();
vk::Pipeline Handle() const noexcept { vk::Pipeline Handle() const noexcept {
@ -66,6 +68,7 @@ protected:
const Instance& instance; const Instance& instance;
Scheduler& scheduler; Scheduler& scheduler;
DescriptorHeap& desc_heap; DescriptorHeap& desc_heap;
const Shader::Profile& profile;
vk::UniquePipeline pipeline; vk::UniquePipeline pipeline;
vk::UniquePipelineLayout pipeline_layout; vk::UniquePipelineLayout pipeline_layout;
vk::UniqueDescriptorSetLayout desc_layout; vk::UniqueDescriptorSetLayout desc_layout;

View file

@ -554,11 +554,10 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
} }
// Second pass to re-bind buffers that were updated after binding // Second pass to re-bind buffers that were updated after binding
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
for (u32 i = 0; i < buffer_bindings.size(); i++) { for (u32 i = 0; i < buffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp] = buffer_bindings[i]; const auto& [buffer_id, vsharp] = buffer_bindings[i];
const auto& desc = stage.buffers[i]; const auto& desc = stage.buffers[i];
const bool is_storage = desc.IsStorage(vsharp); const bool is_storage = desc.IsStorage(vsharp, pipeline_cache.GetProfile());
if (!buffer_id) { if (!buffer_id) {
if (desc.is_gds_buffer) { if (desc.is_gds_buffer) {
const auto* gds_buf = buffer_cache.GetGdsBuffer(); const auto* gds_buf = buffer_cache.GetGdsBuffer();
@ -566,6 +565,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
} else if (instance.IsNullDescriptorSupported()) { } else if (instance.IsNullDescriptorSupported()) {
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE); buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
} else { } else {
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE); buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE);
} }
} else { } else {