mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-04 17:53:17 +00:00
shader_recompiler: Remove special case buffers and add support for aliasing (#2428)
* shader_recompiler: Move shared mem lowering into emitter * IR can be quite verbose during first stages of translation, before ssa and constant prop passes have run that drastically simplify it. This lowering can also be done during emission so why not do it then to save some compilation time * runtime_info: Pack PsColorBuffer into 8 bytes * Drops the size of the total structure by half from 396 to 204 bytes. Also should make comparison of the array a bit faster, since its a hot path done every draw * emit_spirv_context: Add infrastructure for buffer aliases * Splits out the buffer creation function so it can be reused when defining multiple type aliases * shader_recompiler: Merge srt_flatbuf into buffers list * Its no longer a special case, yay * shader_recompiler: Complete buffer aliasing support * Add a bunch more types into buffers, such as F32 for float reads/writes and 8/16 bit integer types for formatted buffers * shader_recompiler: Remove existing shared memory emulation * The current impl relies on backend side implementaton and hooking into every shared memory access. It also doesnt handle atomics. Will be replaced by an IR pass that solves these issues * shader_recompiler: Reintroduce shared memory on ssbo emulation * Now it is performed with an IR pass, and combined with the previous commit cleanup, is fully transparent from the backend, other than requiring workgroup_index be provided as an attribute (computing this on every shared memory access is gonna be too verbose * clang format * buffer_cache: Reduce buffer sizes * vk_rasterizer: Cleanup resource binding code * Reduce noise in the functions, also remove some arguments which are class members * Fix gcc
This commit is contained in:
parent
290e127a4f
commit
82cacec8eb
36 changed files with 675 additions and 625 deletions
|
@ -197,6 +197,10 @@ struct Liverpool {
|
|||
return settings.lds_dwords.Value() * 128 * 4;
|
||||
}
|
||||
|
||||
u32 NumWorkgroups() const noexcept {
|
||||
return dim_x * dim_y * dim_z;
|
||||
}
|
||||
|
||||
bool IsTgidEnabled(u32 i) const noexcept {
|
||||
return (settings.tgid_enable.Value() >> i) & 1;
|
||||
}
|
||||
|
|
|
@ -31,6 +31,12 @@ struct Buffer {
|
|||
u32 _padding1 : 6;
|
||||
u32 type : 2; // overlaps with T# type, so should be 0 for buffer
|
||||
|
||||
static constexpr Buffer Null() {
|
||||
Buffer buffer{};
|
||||
buffer.base_address = 1;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
bool Valid() const {
|
||||
return type == 0u;
|
||||
}
|
||||
|
|
|
@ -183,7 +183,7 @@ enum class NumberFormat : u32 {
|
|||
Ubscaled = 13,
|
||||
};
|
||||
|
||||
enum class CompSwizzle : u32 {
|
||||
enum class CompSwizzle : u8 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
Red = 4,
|
||||
|
@ -193,10 +193,10 @@ enum class CompSwizzle : u32 {
|
|||
};
|
||||
|
||||
enum class NumberConversion : u32 {
|
||||
None,
|
||||
UintToUscaled,
|
||||
SintToSscaled,
|
||||
UnormToUbnorm,
|
||||
None = 0,
|
||||
UintToUscaled = 1,
|
||||
SintToSscaled = 2,
|
||||
UnormToUbnorm = 3,
|
||||
};
|
||||
|
||||
struct CompMapping {
|
||||
|
|
|
@ -168,7 +168,7 @@ public:
|
|||
void Commit();
|
||||
|
||||
/// Maps and commits a memory region with user provided data
|
||||
u64 Copy(VAddr src, size_t size, size_t alignment = 0) {
|
||||
u64 Copy(auto src, size_t size, size_t alignment = 0) {
|
||||
const auto [data, offset] = Map(size, alignment);
|
||||
std::memcpy(data, reinterpret_cast<const void*>(src), size);
|
||||
Commit();
|
||||
|
|
|
@ -5,11 +5,8 @@
|
|||
#include "common/alignment.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
@ -18,8 +15,8 @@
|
|||
namespace VideoCore {
|
||||
|
||||
static constexpr size_t DataShareBufferSize = 64_KB;
|
||||
static constexpr size_t StagingBufferSize = 1_GB;
|
||||
static constexpr size_t UboStreamBufferSize = 64_MB;
|
||||
static constexpr size_t StagingBufferSize = 512_MB;
|
||||
static constexpr size_t UboStreamBufferSize = 128_MB;
|
||||
|
||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
||||
|
@ -29,10 +26,8 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
|||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
||||
lds_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, DataShareBufferSize},
|
||||
memory_tracker{&tracker} {
|
||||
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
|
||||
Vulkan::SetObjectName(instance.GetDevice(), lds_buffer.Handle(), "LDS Buffer");
|
||||
|
||||
// Ensure the first slot is used for the null buffer
|
||||
const auto null_id =
|
||||
|
@ -251,14 +246,6 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
|||
});
|
||||
}
|
||||
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
|
||||
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
|
||||
ASSERT(data.size_bytes() <= StreamThreshold);
|
||||
const u64 offset = stream_buffer.Copy(reinterpret_cast<VAddr>(data.data()), data.size_bytes(),
|
||||
instance.UniformMinAlignment());
|
||||
return {&stream_buffer, offset};
|
||||
}
|
||||
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer, BufferId buffer_id) {
|
||||
// For small uniform buffers that have not been modified by gpu
|
||||
|
|
|
@ -68,9 +68,9 @@ public:
|
|||
return &gds_buffer;
|
||||
}
|
||||
|
||||
/// Returns a pointer to LDS device local buffer.
|
||||
[[nodiscard]] const Buffer* GetLdsBuffer() const noexcept {
|
||||
return &lds_buffer;
|
||||
/// Retrieves the host visible device local stream buffer.
|
||||
[[nodiscard]] StreamBuffer& GetStreamBuffer() noexcept {
|
||||
return stream_buffer;
|
||||
}
|
||||
|
||||
/// Retrieves the buffer with the specified id.
|
||||
|
@ -90,8 +90,6 @@ public:
|
|||
/// Writes a value to GPU buffer.
|
||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(std::span<const u32> data);
|
||||
|
||||
/// Obtains a buffer for the specified region.
|
||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer = false,
|
||||
|
@ -159,7 +157,6 @@ private:
|
|||
StreamBuffer staging_buffer;
|
||||
StreamBuffer stream_buffer;
|
||||
Buffer gds_buffer;
|
||||
Buffer lds_buffer;
|
||||
std::shared_mutex mutex;
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
RangeSet gpu_modified_ranges;
|
||||
|
|
|
@ -3,11 +3,9 @@
|
|||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
|
@ -29,23 +27,6 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler,
|
|||
|
||||
u32 binding{};
|
||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||
|
||||
if (info->has_emulated_shared_memory) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
});
|
||||
}
|
||||
if (info->has_readconst) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = vk::DescriptorType::eUniformBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
});
|
||||
}
|
||||
for (const auto& buffer : info->buffers) {
|
||||
const auto sharp = buffer.GetSharp(*info);
|
||||
bindings.push_back({
|
||||
|
|
|
@ -7,18 +7,13 @@
|
|||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/io_file.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
|
@ -357,14 +352,6 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
|||
if (!stage) {
|
||||
continue;
|
||||
}
|
||||
if (stage->has_readconst) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = vk::DescriptorType::eUniformBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = gp_stage_flags,
|
||||
});
|
||||
}
|
||||
for (const auto& buffer : stage->buffers) {
|
||||
const auto sharp = buffer.GetSharp(*stage);
|
||||
bindings.push_back({
|
||||
|
|
|
@ -35,8 +35,7 @@ struct GraphicsPipelineKey {
|
|||
std::array<size_t, MaxShaderStages> stage_hashes;
|
||||
u32 num_color_attachments;
|
||||
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
||||
std::array<Shader::FragmentRuntimeInfo::PsColorBuffer, Liverpool::NumColorBuffers>
|
||||
color_buffers;
|
||||
std::array<Shader::PsColorBuffer, Liverpool::NumColorBuffers> color_buffers;
|
||||
vk::Format depth_format;
|
||||
vk::Format stencil_format;
|
||||
|
||||
|
|
|
@ -1,14 +1,11 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <ranges>
|
||||
#include <span>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <fmt/format.h>
|
||||
#include <fmt/ranges.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "common/debug.h"
|
||||
#include "sdl_window.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
|
@ -206,13 +203,12 @@ std::string Instance::GetDriverVersionName() {
|
|||
}
|
||||
|
||||
bool Instance::CreateDevice() {
|
||||
const vk::StructureChain feature_chain =
|
||||
physical_device
|
||||
.getFeatures2<vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceVulkan12Features,
|
||||
vk::PhysicalDeviceRobustness2FeaturesEXT,
|
||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
|
||||
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
|
||||
vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
|
||||
const vk::StructureChain feature_chain = physical_device.getFeatures2<
|
||||
vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceVulkan11Features,
|
||||
vk::PhysicalDeviceVulkan12Features, vk::PhysicalDeviceRobustness2FeaturesEXT,
|
||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
|
||||
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
|
||||
vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
|
||||
features = feature_chain.get().features;
|
||||
#ifdef __APPLE__
|
||||
portability_features = feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
|
||||
|
@ -319,6 +315,7 @@ bool Instance::CreateDevice() {
|
|||
|
||||
const auto topology_list_restart_features =
|
||||
feature_chain.get<vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT>();
|
||||
const auto vk11_features = feature_chain.get<vk::PhysicalDeviceVulkan11Features>();
|
||||
const auto vk12_features = feature_chain.get<vk::PhysicalDeviceVulkan12Features>();
|
||||
vk::StructureChain device_chain = {
|
||||
vk::DeviceCreateInfo{
|
||||
|
@ -351,12 +348,17 @@ bool Instance::CreateDevice() {
|
|||
},
|
||||
},
|
||||
vk::PhysicalDeviceVulkan11Features{
|
||||
.shaderDrawParameters = true,
|
||||
.storageBuffer16BitAccess = vk11_features.storageBuffer16BitAccess,
|
||||
.uniformAndStorageBuffer16BitAccess = vk11_features.uniformAndStorageBuffer16BitAccess,
|
||||
.shaderDrawParameters = vk11_features.shaderDrawParameters,
|
||||
},
|
||||
vk::PhysicalDeviceVulkan12Features{
|
||||
.samplerMirrorClampToEdge = vk12_features.samplerMirrorClampToEdge,
|
||||
.drawIndirectCount = vk12_features.drawIndirectCount,
|
||||
.storageBuffer8BitAccess = vk12_features.storageBuffer8BitAccess,
|
||||
.uniformAndStorageBuffer8BitAccess = vk12_features.uniformAndStorageBuffer8BitAccess,
|
||||
.shaderFloat16 = vk12_features.shaderFloat16,
|
||||
.shaderInt8 = vk12_features.shaderInt8,
|
||||
.scalarBlockLayout = vk12_features.scalarBlockLayout,
|
||||
.uniformBufferStandardLayout = vk12_features.uniformBufferStandardLayout,
|
||||
.separateDepthStencilLayouts = vk12_features.separateDepthStencilLayouts,
|
||||
|
|
|
@ -345,12 +345,12 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
|
||||
key.color_formats[remapped_cb] =
|
||||
LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt());
|
||||
key.color_buffers[remapped_cb] = {
|
||||
key.color_buffers[remapped_cb] = Shader::PsColorBuffer{
|
||||
.num_format = col_buf.GetNumberFmt(),
|
||||
.num_conversion = col_buf.GetNumberConversion(),
|
||||
.swizzle = col_buf.Swizzle(),
|
||||
.export_format = regs.color_export_format.GetFormat(cb),
|
||||
.needs_unorm_fixup = needs_unorm_fixup,
|
||||
.swizzle = col_buf.Swizzle(),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,20 @@
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
|
||||
Shader::PushData push_data{};
|
||||
push_data.step0 = regs.vgt_instance_step_rate_0;
|
||||
push_data.step1 = regs.vgt_instance_step_rate_1;
|
||||
|
||||
// TODO(roamic): Add support for multiple viewports and geometry shaders when ViewportIndex
|
||||
// is encountered and implemented in the recompiler.
|
||||
push_data.xoffset = regs.viewport_control.xoffset_enable ? regs.viewports[0].xoffset : 0.f;
|
||||
push_data.xscale = regs.viewport_control.xscale_enable ? regs.viewports[0].xscale : 1.f;
|
||||
push_data.yoffset = regs.viewport_control.yoffset_enable ? regs.viewports[0].yoffset : 0.f;
|
||||
push_data.yscale = regs.viewport_control.yscale_enable ? regs.viewports[0].yscale : 1.f;
|
||||
return push_data;
|
||||
}
|
||||
|
||||
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||
AmdGpu::Liverpool* liverpool_)
|
||||
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
||||
|
@ -426,95 +440,69 @@ void Rasterizer::Finish() {
|
|||
}
|
||||
|
||||
bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||
buffer_infos.clear();
|
||||
buffer_views.clear();
|
||||
image_infos.clear();
|
||||
|
||||
const auto& regs = liverpool->regs;
|
||||
|
||||
if (pipeline->IsCompute()) {
|
||||
const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute);
|
||||
|
||||
// Assume if a shader reads and writes metas at the same time, it is a copy shader.
|
||||
bool meta_read = false;
|
||||
for (const auto& desc : info.buffers) {
|
||||
if (desc.is_gds_buffer) {
|
||||
continue;
|
||||
}
|
||||
if (!desc.is_written) {
|
||||
const VAddr address = desc.GetSharp(info).base_address;
|
||||
meta_read = texture_cache.IsMeta(address);
|
||||
}
|
||||
}
|
||||
|
||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means
|
||||
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
|
||||
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
|
||||
// will need its full emulation anyways. For cases of metadata read a warning will be
|
||||
// logged.
|
||||
if (!meta_read) {
|
||||
for (const auto& desc : info.buffers) {
|
||||
const auto sharp = desc.GetSharp(info);
|
||||
const VAddr address = sharp.base_address;
|
||||
if (desc.is_written) {
|
||||
// Assume all slices were updates
|
||||
if (texture_cache.ClearMeta(address)) {
|
||||
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (texture_cache.IsMeta(address)) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Unexpected metadata read by a CS shader (buffer)");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (IsComputeMetaClear(pipeline)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
set_writes.clear();
|
||||
buffer_barriers.clear();
|
||||
buffer_infos.clear();
|
||||
buffer_views.clear();
|
||||
image_infos.clear();
|
||||
|
||||
// Bind resource buffers and textures.
|
||||
Shader::PushData push_data{};
|
||||
Shader::Backend::Bindings binding{};
|
||||
|
||||
Shader::PushData push_data = MakeUserData(liverpool->regs);
|
||||
for (const auto* stage : pipeline->GetStages()) {
|
||||
if (!stage) {
|
||||
continue;
|
||||
}
|
||||
push_data.step0 = regs.vgt_instance_step_rate_0;
|
||||
push_data.step1 = regs.vgt_instance_step_rate_1;
|
||||
|
||||
// TODO(roamic): add support for multiple viewports and geometry shaders when ViewportIndex
|
||||
// is encountered and implemented in the recompiler.
|
||||
if (stage->stage == Shader::Stage::Vertex) {
|
||||
push_data.xoffset =
|
||||
regs.viewport_control.xoffset_enable ? regs.viewports[0].xoffset : 0.f;
|
||||
push_data.xscale = regs.viewport_control.xscale_enable ? regs.viewports[0].xscale : 1.f;
|
||||
push_data.yoffset =
|
||||
regs.viewport_control.yoffset_enable ? regs.viewports[0].yoffset : 0.f;
|
||||
push_data.yscale = regs.viewport_control.yscale_enable ? regs.viewports[0].yscale : 1.f;
|
||||
}
|
||||
stage->PushUd(binding, push_data);
|
||||
|
||||
BindBuffers(*stage, binding, push_data, set_writes, buffer_barriers);
|
||||
BindTextures(*stage, binding, set_writes);
|
||||
BindBuffers(*stage, binding, push_data);
|
||||
BindTextures(*stage, binding);
|
||||
}
|
||||
|
||||
pipeline->BindResources(set_writes, buffer_barriers, push_data);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Rasterizer::IsComputeMetaClear(const Pipeline* pipeline) {
|
||||
if (!pipeline->IsCompute()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute);
|
||||
|
||||
// Assume if a shader reads and writes metas at the same time, it is a copy shader.
|
||||
for (const auto& desc : info.buffers) {
|
||||
const VAddr address = desc.GetSharp(info).base_address;
|
||||
if (!desc.IsSpecial() && !desc.is_written && texture_cache.IsMeta(address)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means
|
||||
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
|
||||
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
|
||||
// will need its full emulation anyways.
|
||||
for (const auto& desc : info.buffers) {
|
||||
const VAddr address = desc.GetSharp(info).base_address;
|
||||
if (!desc.IsSpecial() && desc.is_written && texture_cache.ClearMeta(address)) {
|
||||
// Assume all slices were updates
|
||||
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding,
|
||||
Shader::PushData& push_data, Pipeline::DescriptorWrites& set_writes,
|
||||
Pipeline::BufferBarriers& buffer_barriers) {
|
||||
Shader::PushData& push_data) {
|
||||
buffer_bindings.clear();
|
||||
|
||||
for (const auto& desc : stage.buffers) {
|
||||
const auto vsharp = desc.GetSharp(stage);
|
||||
if (!desc.is_gds_buffer && vsharp.base_address != 0 && vsharp.GetSize() > 0) {
|
||||
if (!desc.IsSpecial() && vsharp.base_address != 0 && vsharp.GetSize() > 0) {
|
||||
const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize());
|
||||
buffer_bindings.emplace_back(buffer_id, vsharp);
|
||||
} else {
|
||||
|
@ -522,47 +510,30 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
|||
}
|
||||
}
|
||||
|
||||
// Bind a SSBO to act as shared memory in case of not being able to use a workgroup buffer
|
||||
// (e.g. when the compute shared memory is bigger than the GPU's shared memory)
|
||||
if (stage.has_emulated_shared_memory) {
|
||||
const auto* lds_buf = buffer_cache.GetLdsBuffer();
|
||||
buffer_infos.emplace_back(lds_buf->Handle(), 0, lds_buf->SizeBytes());
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding.unified++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
++binding.buffer;
|
||||
}
|
||||
|
||||
// Bind the flattened user data buffer as a UBO so it's accessible to the shader
|
||||
if (stage.has_readconst) {
|
||||
const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf);
|
||||
buffer_infos.emplace_back(vk_buffer->Handle(), offset,
|
||||
stage.flattened_ud_buf.size() * sizeof(u32));
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding.unified++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eUniformBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
++binding.buffer;
|
||||
}
|
||||
|
||||
// Second pass to re-bind buffers that were updated after binding
|
||||
for (u32 i = 0; i < buffer_bindings.size(); i++) {
|
||||
const auto& [buffer_id, vsharp] = buffer_bindings[i];
|
||||
const auto& desc = stage.buffers[i];
|
||||
const bool is_storage = desc.IsStorage(vsharp, pipeline_cache.GetProfile());
|
||||
// Buffer is not from the cache, either a special buffer or unbound.
|
||||
if (!buffer_id) {
|
||||
if (desc.is_gds_buffer) {
|
||||
if (desc.buffer_type == Shader::BufferType::GdsBuffer) {
|
||||
const auto* gds_buf = buffer_cache.GetGdsBuffer();
|
||||
buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes());
|
||||
} else if (desc.buffer_type == Shader::BufferType::ReadConstUbo) {
|
||||
auto& vk_buffer = buffer_cache.GetStreamBuffer();
|
||||
const u32 ubo_size = stage.flattened_ud_buf.size() * sizeof(u32);
|
||||
const u64 offset = vk_buffer.Copy(stage.flattened_ud_buf.data(), ubo_size,
|
||||
instance.UniformMinAlignment());
|
||||
buffer_infos.emplace_back(vk_buffer.Handle(), offset, ubo_size);
|
||||
} else if (desc.buffer_type == Shader::BufferType::SharedMemory) {
|
||||
auto& lds_buffer = buffer_cache.GetStreamBuffer();
|
||||
const auto& cs_program = liverpool->GetCsRegs();
|
||||
const auto lds_size = cs_program.SharedMemSize() * cs_program.NumWorkgroups();
|
||||
const auto [data, offset] =
|
||||
lds_buffer.Map(lds_size, instance.StorageMinAlignment());
|
||||
std::memset(data, 0, lds_size);
|
||||
buffer_infos.emplace_back(lds_buffer.Handle(), offset, lds_size);
|
||||
} else if (instance.IsNullDescriptorSupported()) {
|
||||
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
|
||||
} else {
|
||||
|
@ -605,8 +576,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
|||
}
|
||||
}
|
||||
|
||||
void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding,
|
||||
Pipeline::DescriptorWrites& set_writes) {
|
||||
void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding) {
|
||||
image_bindings.clear();
|
||||
|
||||
for (const auto& image_desc : stage.images) {
|
||||
|
|
|
@ -81,11 +81,9 @@ private:
|
|||
bool FilterDraw();
|
||||
|
||||
void BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding,
|
||||
Shader::PushData& push_data, Pipeline::DescriptorWrites& set_writes,
|
||||
Pipeline::BufferBarriers& buffer_barriers);
|
||||
Shader::PushData& push_data);
|
||||
|
||||
void BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding,
|
||||
Pipeline::DescriptorWrites& set_writes);
|
||||
void BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding);
|
||||
|
||||
bool BindResources(const Pipeline* pipeline);
|
||||
void ResetBindings() {
|
||||
|
@ -95,6 +93,8 @@ private:
|
|||
bound_images.clear();
|
||||
}
|
||||
|
||||
bool IsComputeMetaClear(const Pipeline* pipeline);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue