mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-11 05:03:14 +00:00
renderer_vulkan: use LDS buffer as SSBO on unsupported shared memory size (#2245)
* renderer_vulkan: use LDS buffer as SSBO on unsupported shared memory size * shader_recompiler: add `v_trunc_f64` on inst format table
This commit is contained in:
parent
8aea0fc7ee
commit
eed4de1da9
14 changed files with 147 additions and 36 deletions
|
@ -17,7 +17,7 @@
|
|||
|
||||
namespace VideoCore {
|
||||
|
||||
static constexpr size_t GdsBufferSize = 64_KB;
|
||||
static constexpr size_t DataShareBufferSize = 64_KB;
|
||||
static constexpr size_t StagingBufferSize = 1_GB;
|
||||
static constexpr size_t UboStreamBufferSize = 64_MB;
|
||||
|
||||
|
@ -28,9 +28,11 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
|||
texture_cache{texture_cache_}, tracker{tracker_},
|
||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, GdsBufferSize},
|
||||
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
||||
lds_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, DataShareBufferSize},
|
||||
memory_tracker{&tracker} {
|
||||
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
|
||||
Vulkan::SetObjectName(instance.GetDevice(), lds_buffer.Handle(), "LDS Buffer");
|
||||
|
||||
// Ensure the first slot is used for the null buffer
|
||||
const auto null_id =
|
||||
|
|
|
@ -68,6 +68,11 @@ public:
|
|||
return &gds_buffer;
|
||||
}
|
||||
|
||||
/// Returns a pointer to LDS device local buffer.
|
||||
[[nodiscard]] const Buffer* GetLdsBuffer() const noexcept {
|
||||
return &lds_buffer;
|
||||
}
|
||||
|
||||
/// Retrieves the buffer with the specified id.
|
||||
[[nodiscard]] Buffer& GetBuffer(BufferId id) {
|
||||
return slot_buffers[id];
|
||||
|
@ -154,6 +159,7 @@ private:
|
|||
StreamBuffer staging_buffer;
|
||||
StreamBuffer stream_buffer;
|
||||
Buffer gds_buffer;
|
||||
Buffer lds_buffer;
|
||||
std::shared_mutex mutex;
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
RangeSet gpu_modified_ranges;
|
||||
|
|
|
@ -29,6 +29,14 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
|||
u32 binding{};
|
||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||
|
||||
if (info->has_emulated_shared_memory) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
});
|
||||
}
|
||||
if (info->has_readconst) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
|
|
|
@ -180,7 +180,6 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
|||
info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
||||
cs_pgm.IsTgidEnabled(2)};
|
||||
info.cs_info.shared_memory_size = cs_pgm.SharedMemSize();
|
||||
info.cs_info.max_shared_memory_size = instance.MaxComputeSharedMemorySize();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -209,6 +208,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||
instance.GetDriverID() == vk::DriverId::eMoltenvk,
|
||||
.max_viewport_width = instance.GetMaxViewportWidth(),
|
||||
.max_viewport_height = instance.GetMaxViewportHeight(),
|
||||
.max_shared_memory_size = instance.MaxComputeSharedMemorySize(),
|
||||
};
|
||||
auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
|
||||
ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}",
|
||||
|
|
|
@ -175,7 +175,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
|
|||
const bool is_depth_clear = regs.depth_render_control.depth_clear_enable ||
|
||||
texture_cache.IsMetaCleared(htile_address, slice);
|
||||
const bool is_stencil_clear = regs.depth_render_control.stencil_clear_enable;
|
||||
ASSERT(desc.view_info.range.extent.layers == 1);
|
||||
ASSERT(desc.view_info.range.extent.levels == 1);
|
||||
|
||||
state.width = std::min<u32>(state.width, image.info.size.width);
|
||||
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||
|
@ -554,6 +554,21 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
|||
}
|
||||
}
|
||||
|
||||
// Bind a SSBO to act as shared memory in case of not being able to use a workgroup buffer
|
||||
// (e.g. when the compute shared memory is bigger than the GPU's shared memory)
|
||||
if (stage.has_emulated_shared_memory) {
|
||||
const auto* lds_buf = buffer_cache.GetLdsBuffer();
|
||||
buffer_infos.emplace_back(lds_buf->Handle(), 0, lds_buf->SizeBytes());
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding.unified++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
}
|
||||
|
||||
// Bind the flattened user data buffer as a UBO so it's accessible to the shader
|
||||
if (stage.has_readconst) {
|
||||
const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue