mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-03 01:03:16 +00:00
video_core: Account of runtime state changes when compiling shaders (#575)
* video_core: Compile shader permutations * spirv: Only specific storage image format for atomics * ir: Avoid cube coord patching for storage image * spirv: Fix default attributes * data_share: Add more instructions * video_core: Query storage flag with runtime state * kernel: Use std::list for semaphore * video_core: Use texture buffers for untyped format load/store * buffer_cache: Limit view usage * vk_pipeline_cache: Fix invalid iterator * image_view: Reduce log spam when alpha=1 in storage swizzle * video_core: More features and proper spirv feature detection * video_core: Attempt no2 for specialization * spirv: Remove conflict * vk_shader_cache: Small cleanup
This commit is contained in:
parent
790d19e59b
commit
66e96dd944
43 changed files with 1058 additions and 976 deletions
|
@ -167,7 +167,7 @@ struct Liverpool {
|
|||
static constexpr auto* GetBinaryInfo(const Shader& sh) {
|
||||
const auto* code = sh.template Address<u32*>();
|
||||
const auto* bininfo = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
|
||||
ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header");
|
||||
// ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header");
|
||||
return bininfo;
|
||||
}
|
||||
|
||||
|
|
|
@ -61,6 +61,10 @@ enum class NumberFormat : u32 {
|
|||
Ubscaled = 13,
|
||||
};
|
||||
|
||||
[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) {
|
||||
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string_view NameOf(DataFormat fmt);
|
||||
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/types.h"
|
||||
|
@ -68,6 +69,10 @@ struct Buffer {
|
|||
return stride == 0 ? 1U : stride;
|
||||
}
|
||||
|
||||
u32 NumDwords() const noexcept {
|
||||
return Common::AlignUp(GetSize(), sizeof(u32)) >> 2;
|
||||
}
|
||||
|
||||
u32 GetSize() const noexcept {
|
||||
return GetStride() * num_records;
|
||||
}
|
||||
|
|
|
@ -13,13 +13,6 @@
|
|||
|
||||
namespace VideoCore {
|
||||
|
||||
constexpr vk::BufferUsageFlags AllFlags =
|
||||
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
|
||||
vk::BufferUsageFlagBits::eUniformTexelBuffer | vk::BufferUsageFlagBits::eStorageTexelBuffer |
|
||||
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer |
|
||||
vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer |
|
||||
vk::BufferUsageFlagBits::eIndirectBuffer;
|
||||
|
||||
std::string_view BufferTypeName(MemoryUsage type) {
|
||||
switch (type) {
|
||||
case MemoryUsage::Upload:
|
||||
|
@ -96,13 +89,13 @@ void UniqueBuffer::Create(const vk::BufferCreateInfo& buffer_ci, MemoryUsage usa
|
|||
}
|
||||
|
||||
Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_addr_,
|
||||
u64 size_bytes_)
|
||||
vk::BufferUsageFlags flags, u64 size_bytes_)
|
||||
: cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, usage{usage_},
|
||||
buffer{instance->GetDevice(), instance->GetAllocator()} {
|
||||
// Create buffer object.
|
||||
const vk::BufferCreateInfo buffer_ci = {
|
||||
.size = size_bytes,
|
||||
.usage = AllFlags,
|
||||
.usage = flags,
|
||||
};
|
||||
VmaAllocationInfo alloc_info{};
|
||||
buffer.Create(buffer_ci, usage, &alloc_info);
|
||||
|
@ -119,25 +112,33 @@ Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_
|
|||
is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
}
|
||||
|
||||
vk::BufferView Buffer::View(u32 offset, u32 size, AmdGpu::DataFormat dfmt,
|
||||
vk::BufferView Buffer::View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt,
|
||||
AmdGpu::NumberFormat nfmt) {
|
||||
const auto it{std::ranges::find_if(views, [offset, size, dfmt, nfmt](const BufferView& view) {
|
||||
return offset == view.offset && size == view.size && dfmt == view.dfmt && nfmt == view.nfmt;
|
||||
const auto it{std::ranges::find_if(views, [=](const BufferView& view) {
|
||||
return offset == view.offset && size == view.size && is_written == view.is_written &&
|
||||
dfmt == view.dfmt && nfmt == view.nfmt;
|
||||
})};
|
||||
if (it != views.end()) {
|
||||
return it->handle;
|
||||
}
|
||||
const vk::BufferUsageFlags2CreateInfoKHR usage_flags = {
|
||||
.usage = is_written ? vk::BufferUsageFlagBits2KHR::eStorageTexelBuffer
|
||||
: vk::BufferUsageFlagBits2KHR::eUniformTexelBuffer,
|
||||
};
|
||||
const vk::BufferViewCreateInfo view_ci = {
|
||||
.pNext = &usage_flags,
|
||||
.buffer = buffer.buffer,
|
||||
.format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt),
|
||||
.offset = offset,
|
||||
.range = size,
|
||||
};
|
||||
views.push_back({
|
||||
.offset = offset,
|
||||
.size = size,
|
||||
.is_written = is_written,
|
||||
.dfmt = dfmt,
|
||||
.nfmt = nfmt,
|
||||
.handle = instance->GetDevice().createBufferView({
|
||||
.buffer = buffer.buffer,
|
||||
.format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt),
|
||||
.offset = offset,
|
||||
.range = size,
|
||||
}),
|
||||
.handle = instance->GetDevice().createBufferView(view_ci),
|
||||
});
|
||||
return views.back().handle;
|
||||
}
|
||||
|
@ -147,7 +148,7 @@ constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
|||
|
||||
StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler_,
|
||||
MemoryUsage usage, u64 size_bytes)
|
||||
: Buffer{instance, usage, 0, size_bytes}, scheduler{scheduler_} {
|
||||
: Buffer{instance, usage, 0, AllFlags, size_bytes}, scheduler{scheduler_} {
|
||||
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
|
||||
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
|
||||
const auto device = instance.GetDevice();
|
||||
|
|
|
@ -31,6 +31,15 @@ enum class MemoryUsage {
|
|||
Stream, ///< Requests device local host visible buffer, falling back host memory.
|
||||
};
|
||||
|
||||
constexpr vk::BufferUsageFlags ReadFlags =
|
||||
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eUniformTexelBuffer |
|
||||
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
|
||||
vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndirectBuffer;
|
||||
|
||||
constexpr vk::BufferUsageFlags AllFlags = ReadFlags | vk::BufferUsageFlagBits::eTransferDst |
|
||||
vk::BufferUsageFlagBits::eStorageTexelBuffer |
|
||||
vk::BufferUsageFlagBits::eStorageBuffer;
|
||||
|
||||
struct UniqueBuffer {
|
||||
explicit UniqueBuffer(vk::Device device, VmaAllocator allocator);
|
||||
~UniqueBuffer();
|
||||
|
@ -65,7 +74,7 @@ struct UniqueBuffer {
|
|||
class Buffer {
|
||||
public:
|
||||
explicit Buffer(const Vulkan::Instance& instance, MemoryUsage usage, VAddr cpu_addr_,
|
||||
u64 size_bytes_);
|
||||
vk::BufferUsageFlags flags, u64 size_bytes_);
|
||||
|
||||
Buffer& operator=(const Buffer&) = delete;
|
||||
Buffer(const Buffer&) = delete;
|
||||
|
@ -73,7 +82,8 @@ public:
|
|||
Buffer& operator=(Buffer&&) = default;
|
||||
Buffer(Buffer&&) = default;
|
||||
|
||||
vk::BufferView View(u32 offset, u32 size, AmdGpu::DataFormat dfmt, AmdGpu::NumberFormat nfmt);
|
||||
vk::BufferView View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt,
|
||||
AmdGpu::NumberFormat nfmt);
|
||||
|
||||
/// Increases the likeliness of this being a stream buffer
|
||||
void IncreaseStreamScore(int score) noexcept {
|
||||
|
@ -121,6 +131,7 @@ public:
|
|||
struct BufferView {
|
||||
u32 offset;
|
||||
u32 size;
|
||||
bool is_written;
|
||||
AmdGpu::DataFormat dfmt;
|
||||
AmdGpu::NumberFormat nfmt;
|
||||
vk::BufferView handle;
|
||||
|
|
|
@ -23,7 +23,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
|||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||
memory_tracker{&tracker} {
|
||||
// Ensure the first slot is used for the null buffer
|
||||
void(slot_buffers.insert(instance, MemoryUsage::DeviceLocal, 0, 1));
|
||||
void(slot_buffers.insert(instance, MemoryUsage::DeviceLocal, 0, ReadFlags, 1));
|
||||
}
|
||||
|
||||
BufferCache::~BufferCache() = default;
|
||||
|
@ -421,7 +421,7 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
|||
const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size);
|
||||
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
|
||||
const BufferId new_buffer_id =
|
||||
slot_buffers.insert(instance, MemoryUsage::DeviceLocal, overlap.begin, size);
|
||||
slot_buffers.insert(instance, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size);
|
||||
auto& new_buffer = slot_buffers[new_buffer_id];
|
||||
const size_t size_bytes = new_buffer.SizeBytes();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
|
@ -495,7 +495,8 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size)
|
|||
} else {
|
||||
// For large one time transfers use a temporary host buffer.
|
||||
// RenderDoc can lag quite a bit if the stream buffer is too large.
|
||||
Buffer temp_buffer{instance, MemoryUsage::Upload, 0, total_size_bytes};
|
||||
Buffer temp_buffer{instance, MemoryUsage::Upload, 0, vk::BufferUsageFlagBits::eTransferSrc,
|
||||
total_size_bytes};
|
||||
src_buffer = temp_buffer.Handle();
|
||||
u8* const staging = temp_buffer.mapped_data.data();
|
||||
for (auto& copy : copies) {
|
||||
|
|
|
@ -13,22 +13,31 @@ namespace Vulkan {
|
|||
|
||||
ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_,
|
||||
vk::PipelineCache pipeline_cache, u64 compute_key_,
|
||||
const Program* program)
|
||||
: instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_},
|
||||
info{&program->pgm.info} {
|
||||
const Shader::Info& info_, vk::ShaderModule module)
|
||||
: instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_}, info{&info_} {
|
||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
.module = program->module,
|
||||
.module = module,
|
||||
.pName = "main",
|
||||
};
|
||||
|
||||
u32 binding{};
|
||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||
for (const auto& buffer : info->buffers) {
|
||||
const auto sharp = buffer.GetSharp(*info);
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer
|
||||
: vk::DescriptorType::eUniformBuffer,
|
||||
.descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer
|
||||
: vk::DescriptorType::eUniformBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
});
|
||||
}
|
||||
for (const auto& tex_buffer : info->texture_buffers) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer
|
||||
: vk::DescriptorType::eUniformTexelBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
});
|
||||
|
@ -91,22 +100,24 @@ ComputePipeline::~ComputePipeline() = default;
|
|||
bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
||||
VideoCore::TextureCache& texture_cache) const {
|
||||
// Bind resource buffers and textures.
|
||||
boost::container::static_vector<vk::BufferView, 8> buffer_views;
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos;
|
||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||
Shader::PushData push_data{};
|
||||
u32 binding{};
|
||||
|
||||
for (const auto& buffer : info->buffers) {
|
||||
const auto vsharp = buffer.GetVsharp(*info);
|
||||
for (const auto& desc : info->buffers) {
|
||||
const auto vsharp = desc.GetSharp(*info);
|
||||
const bool is_storage = desc.IsStorage(vsharp);
|
||||
const VAddr address = vsharp.base_address;
|
||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means we
|
||||
// can skip the whole dispatch and update the tracked state instead. Also, it is not
|
||||
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we will
|
||||
// need its full emulation anyways. For cases of metadata read a warning will be logged.
|
||||
if (buffer.is_storage) {
|
||||
if (desc.is_written) {
|
||||
if (texture_cache.TouchMeta(address, true)) {
|
||||
LOG_WARNING(Render_Vulkan, "Metadata update skipped");
|
||||
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
|
@ -115,13 +126,12 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
}
|
||||
}
|
||||
const u32 size = vsharp.GetSize();
|
||||
if (buffer.is_written) {
|
||||
if (desc.is_written) {
|
||||
texture_cache.InvalidateMemory(address, size, true);
|
||||
}
|
||||
const u32 alignment =
|
||||
buffer.is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
|
||||
const auto [vk_buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(address, size, buffer.is_written);
|
||||
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
|
||||
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(address, size, desc.is_written);
|
||||
const u32 offset_aligned = Common::AlignDown(offset, alignment);
|
||||
const u32 adjust = offset - offset_aligned;
|
||||
if (adjust != 0) {
|
||||
|
@ -134,20 +144,68 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
.dstBinding = binding++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer
|
||||
: vk::DescriptorType::eUniformBuffer,
|
||||
.descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer
|
||||
: vk::DescriptorType::eUniformBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
}
|
||||
|
||||
for (const auto& desc : info->texture_buffers) {
|
||||
const auto vsharp = desc.GetSharp(*info);
|
||||
vk::BufferView& buffer_view = buffer_views.emplace_back(VK_NULL_HANDLE);
|
||||
if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
|
||||
const VAddr address = vsharp.base_address;
|
||||
const u32 size = vsharp.GetSize();
|
||||
if (desc.is_written) {
|
||||
if (texture_cache.TouchMeta(address, true)) {
|
||||
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (texture_cache.IsMeta(address)) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
|
||||
}
|
||||
}
|
||||
if (desc.is_written) {
|
||||
texture_cache.InvalidateMemory(address, size, true);
|
||||
}
|
||||
const u32 alignment = instance.TexelBufferMinAlignment();
|
||||
const auto [vk_buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(address, size, desc.is_written);
|
||||
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
|
||||
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
|
||||
"Texel buffer stride must match format stride");
|
||||
const u32 offset_aligned = Common::AlignDown(offset, alignment);
|
||||
const u32 adjust = offset - offset_aligned;
|
||||
if (adjust != 0) {
|
||||
ASSERT(adjust % fmt_stride == 0);
|
||||
push_data.AddOffset(binding, adjust / fmt_stride);
|
||||
}
|
||||
buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written,
|
||||
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
|
||||
}
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
|
||||
: vk::DescriptorType::eUniformTexelBuffer,
|
||||
.pTexelBufferView = &buffer_view,
|
||||
});
|
||||
}
|
||||
|
||||
for (const auto& image_desc : info->images) {
|
||||
const auto tsharp =
|
||||
info->ReadUd<AmdGpu::Image>(image_desc.sgpr_base, image_desc.dword_offset);
|
||||
VideoCore::ImageInfo image_info{tsharp};
|
||||
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
|
||||
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
|
||||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout);
|
||||
const auto tsharp = image_desc.GetSharp(*info);
|
||||
if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
|
||||
VideoCore::ImageInfo image_info{tsharp};
|
||||
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
|
||||
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
|
||||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout);
|
||||
} else {
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
}
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding++,
|
||||
|
@ -163,7 +221,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
|
|||
}
|
||||
}
|
||||
for (const auto& sampler : info->samplers) {
|
||||
const auto ssharp = sampler.GetSsharp(*info);
|
||||
const auto ssharp = sampler.GetSharp(*info);
|
||||
const auto vk_sampler = texture_cache.GetSampler(ssharp);
|
||||
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
set_writes.push_back({
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
|
@ -17,18 +17,11 @@ namespace Vulkan {
|
|||
class Instance;
|
||||
class Scheduler;
|
||||
|
||||
struct Program {
|
||||
Shader::IR::Program pgm;
|
||||
std::vector<u32> spv;
|
||||
vk::ShaderModule module;
|
||||
u32 end_binding;
|
||||
};
|
||||
|
||||
class ComputePipeline {
|
||||
public:
|
||||
explicit ComputePipeline(const Instance& instance, Scheduler& scheduler,
|
||||
vk::PipelineCache pipeline_cache, u64 compute_key,
|
||||
const Program* program);
|
||||
const Shader::Info& info, vk::ShaderModule module);
|
||||
~ComputePipeline();
|
||||
|
||||
[[nodiscard]] vk::Pipeline Handle() const noexcept {
|
||||
|
|
|
@ -19,15 +19,11 @@ namespace Vulkan {
|
|||
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
|
||||
const GraphicsPipelineKey& key_,
|
||||
vk::PipelineCache pipeline_cache,
|
||||
std::span<const Program*, MaxShaderStages> programs)
|
||||
std::span<const Shader::Info*, MaxShaderStages> infos,
|
||||
std::span<const vk::ShaderModule> modules)
|
||||
: instance{instance_}, scheduler{scheduler_}, key{key_} {
|
||||
const vk::Device device = instance.GetDevice();
|
||||
for (u32 i = 0; i < MaxShaderStages; i++) {
|
||||
if (!programs[i]) {
|
||||
continue;
|
||||
}
|
||||
stages[i] = &programs[i]->pgm.info;
|
||||
}
|
||||
std::ranges::copy(infos, stages.begin());
|
||||
BuildDescSetLayout();
|
||||
|
||||
const vk::PushConstantRange push_constants = {
|
||||
|
@ -194,16 +190,18 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
|||
auto stage = u32(Shader::Stage::Vertex);
|
||||
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, MaxShaderStages>
|
||||
shader_stages;
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eVertex,
|
||||
.module = programs[stage]->module,
|
||||
.pName = "main",
|
||||
});
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eVertex,
|
||||
.module = modules[stage],
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::Stage::Fragment);
|
||||
if (programs[stage]) {
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eFragment,
|
||||
.module = programs[stage]->module,
|
||||
.module = modules[stage],
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
|
@ -309,14 +307,24 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
|||
continue;
|
||||
}
|
||||
for (const auto& buffer : stage->buffers) {
|
||||
const auto sharp = buffer.GetSharp(*stage);
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer
|
||||
: vk::DescriptorType::eUniformBuffer,
|
||||
.descriptorType = buffer.IsStorage(sharp) ? vk::DescriptorType::eStorageBuffer
|
||||
: vk::DescriptorType::eUniformBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
|
||||
});
|
||||
}
|
||||
for (const auto& tex_buffer : stage->texture_buffers) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
.descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer
|
||||
: vk::DescriptorType::eUniformTexelBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
});
|
||||
}
|
||||
for (const auto& image : stage->images) {
|
||||
bindings.push_back({
|
||||
.binding = binding++,
|
||||
|
@ -347,7 +355,8 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
VideoCore::BufferCache& buffer_cache,
|
||||
VideoCore::TextureCache& texture_cache) const {
|
||||
// Bind resource buffers and textures.
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
|
||||
boost::container::static_vector<vk::BufferView, 8> buffer_views;
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
|
||||
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
|
||||
Shader::PushData push_data{};
|
||||
|
@ -362,15 +371,16 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
push_data.step1 = regs.vgt_instance_step_rate_1;
|
||||
}
|
||||
for (const auto& buffer : stage->buffers) {
|
||||
const auto vsharp = buffer.GetVsharp(*stage);
|
||||
const auto vsharp = buffer.GetSharp(*stage);
|
||||
const bool is_storage = buffer.IsStorage(vsharp);
|
||||
if (vsharp) {
|
||||
const VAddr address = vsharp.base_address;
|
||||
if (texture_cache.IsMeta(address)) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)");
|
||||
}
|
||||
const u32 size = vsharp.GetSize();
|
||||
const u32 alignment = buffer.is_storage ? instance.StorageMinAlignment()
|
||||
: instance.UniformMinAlignment();
|
||||
const u32 alignment =
|
||||
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
|
||||
const auto [vk_buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(address, size, buffer.is_written);
|
||||
const u32 offset_aligned = Common::AlignDown(offset, alignment);
|
||||
|
@ -388,16 +398,47 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
.dstBinding = binding++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer
|
||||
: vk::DescriptorType::eUniformBuffer,
|
||||
.descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer
|
||||
: vk::DescriptorType::eUniformBuffer,
|
||||
.pBufferInfo = &buffer_infos.back(),
|
||||
});
|
||||
}
|
||||
|
||||
for (const auto& tex_buffer : stage->texture_buffers) {
|
||||
const auto vsharp = tex_buffer.GetSharp(*stage);
|
||||
vk::BufferView& buffer_view = buffer_views.emplace_back(VK_NULL_HANDLE);
|
||||
if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
|
||||
const VAddr address = vsharp.base_address;
|
||||
const u32 size = vsharp.GetSize();
|
||||
const u32 alignment = instance.TexelBufferMinAlignment();
|
||||
const auto [vk_buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(address, size, tex_buffer.is_written);
|
||||
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
|
||||
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
|
||||
"Texel buffer stride must match format stride");
|
||||
const u32 offset_aligned = Common::AlignDown(offset, alignment);
|
||||
const u32 adjust = offset - offset_aligned;
|
||||
if (adjust != 0) {
|
||||
ASSERT(adjust % fmt_stride == 0);
|
||||
push_data.AddOffset(binding, adjust / fmt_stride);
|
||||
}
|
||||
buffer_view = vk_buffer->View(offset, size + adjust, tex_buffer.is_written,
|
||||
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
|
||||
}
|
||||
set_writes.push_back({
|
||||
.dstSet = VK_NULL_HANDLE,
|
||||
.dstBinding = binding++,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer
|
||||
: vk::DescriptorType::eUniformTexelBuffer,
|
||||
.pTexelBufferView = &buffer_view,
|
||||
});
|
||||
}
|
||||
|
||||
boost::container::static_vector<AmdGpu::Image, 16> tsharps;
|
||||
for (const auto& image_desc : stage->images) {
|
||||
const auto tsharp =
|
||||
stage->ReadUd<AmdGpu::Image>(image_desc.sgpr_base, image_desc.dword_offset);
|
||||
const auto tsharp = image_desc.GetSharp(*stage);
|
||||
if (tsharp) {
|
||||
tsharps.emplace_back(tsharp);
|
||||
VideoCore::ImageInfo image_info{tsharp};
|
||||
|
@ -423,7 +464,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
|||
}
|
||||
}
|
||||
for (const auto& sampler : stage->samplers) {
|
||||
auto ssharp = sampler.GetSsharp(*stage);
|
||||
auto ssharp = sampler.GetSharp(*stage);
|
||||
if (sampler.disable_aniso) {
|
||||
const auto& tsharp = tsharps[sampler.associated_image];
|
||||
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
|
||||
|
|
|
@ -59,7 +59,8 @@ class GraphicsPipeline {
|
|||
public:
|
||||
explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
|
||||
const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
|
||||
std::span<const Program*, MaxShaderStages> programs);
|
||||
std::span<const Shader::Info*, MaxShaderStages> stages,
|
||||
std::span<const vk::ShaderModule> modules);
|
||||
~GraphicsPipeline();
|
||||
|
||||
void BindResources(const Liverpool::Regs& regs, VideoCore::BufferCache& buffer_cache,
|
||||
|
|
|
@ -178,7 +178,7 @@ bool Instance::CreateDevice() {
|
|||
return false;
|
||||
}
|
||||
|
||||
boost::container::static_vector<const char*, 20> enabled_extensions;
|
||||
boost::container::static_vector<const char*, 25> enabled_extensions;
|
||||
const auto add_extension = [&](std::string_view extension) -> bool {
|
||||
const auto result =
|
||||
std::find_if(available_extensions.begin(), available_extensions.end(),
|
||||
|
@ -217,6 +217,7 @@ bool Instance::CreateDevice() {
|
|||
// with extensions.
|
||||
tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME);
|
||||
const bool maintenance4 = add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
|
||||
const bool maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
|
||||
add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
|
||||
add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME);
|
||||
const bool has_sync2 = add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
|
||||
|
@ -277,6 +278,7 @@ bool Instance::CreateDevice() {
|
|||
.depthBiasClamp = features.depthBiasClamp,
|
||||
.multiViewport = features.multiViewport,
|
||||
.samplerAnisotropy = features.samplerAnisotropy,
|
||||
.vertexPipelineStoresAndAtomics = features.vertexPipelineStoresAndAtomics,
|
||||
.fragmentStoresAndAtomics = features.fragmentStoresAndAtomics,
|
||||
.shaderImageGatherExtended = features.shaderImageGatherExtended,
|
||||
.shaderStorageImageExtendedFormats = features.shaderStorageImageExtendedFormats,
|
||||
|
@ -299,6 +301,9 @@ bool Instance::CreateDevice() {
|
|||
vk::PhysicalDeviceMaintenance4FeaturesKHR{
|
||||
.maintenance4 = true,
|
||||
},
|
||||
vk::PhysicalDeviceMaintenance5FeaturesKHR{
|
||||
.maintenance5 = true,
|
||||
},
|
||||
vk::PhysicalDeviceDynamicRenderingFeaturesKHR{
|
||||
.dynamicRendering = true,
|
||||
},
|
||||
|
@ -344,6 +349,9 @@ bool Instance::CreateDevice() {
|
|||
if (!maintenance4) {
|
||||
device_chain.unlink<vk::PhysicalDeviceMaintenance4FeaturesKHR>();
|
||||
}
|
||||
if (!maintenance5) {
|
||||
device_chain.unlink<vk::PhysicalDeviceMaintenance5FeaturesKHR>();
|
||||
}
|
||||
if (!custom_border_color) {
|
||||
device_chain.unlink<vk::PhysicalDeviceCustomBorderColorFeaturesEXT>();
|
||||
}
|
||||
|
|
|
@ -192,6 +192,11 @@ public:
|
|||
return properties.limits.minStorageBufferOffsetAlignment;
|
||||
}
|
||||
|
||||
/// Returns the minimum required alignment for texel buffers
|
||||
vk::DeviceSize TexelBufferMinAlignment() const {
|
||||
return properties.limits.minTexelBufferOffsetAlignment;
|
||||
}
|
||||
|
||||
/// Returns the minimum alignemt required for accessing host-mapped device memory
|
||||
vk::DeviceSize NonCoherentAtomSize() const {
|
||||
return properties.limits.nonCoherentAtomSize;
|
||||
|
|
|
@ -1,147 +1,59 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_cache.h"
|
||||
|
||||
extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::VsOutput;
|
||||
|
||||
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
|
||||
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
|
||||
}
|
||||
|
||||
void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
|
||||
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
|
||||
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
|
||||
w != VsOutput::None) {
|
||||
info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
|
||||
}
|
||||
};
|
||||
// VS_OUT_MISC_VEC
|
||||
add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
|
||||
ctl.use_vtx_edge_flag
|
||||
? VsOutput::EdgeFlag
|
||||
: (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
|
||||
ctl.use_vtx_kill_flag
|
||||
? VsOutput::KillFlag
|
||||
: (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
|
||||
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None);
|
||||
// VS_OUT_CCDIST0
|
||||
add_output(ctl.IsClipDistEnabled(0)
|
||||
? VsOutput::ClipDist0
|
||||
: (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(1)
|
||||
? VsOutput::ClipDist1
|
||||
: (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(2)
|
||||
? VsOutput::ClipDist2
|
||||
: (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(3)
|
||||
? VsOutput::ClipDist3
|
||||
: (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None));
|
||||
// VS_OUT_CCDIST1
|
||||
add_output(ctl.IsClipDistEnabled(4)
|
||||
? VsOutput::ClipDist4
|
||||
: (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(5)
|
||||
? VsOutput::ClipDist5
|
||||
: (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(6)
|
||||
? VsOutput::ClipDist6
|
||||
: (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(7)
|
||||
? VsOutput::ClipDist7
|
||||
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
||||
}
|
||||
|
||||
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
|
||||
const AmdGpu::Liverpool::Regs& regs) {
|
||||
Shader::Info info{};
|
||||
info.user_data = user_data;
|
||||
info.stage = stage;
|
||||
switch (stage) {
|
||||
case Shader::Stage::Vertex: {
|
||||
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
||||
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
|
||||
BuildVsOutputs(info, regs.vs_output_control);
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Fragment: {
|
||||
info.num_user_data = regs.ps_program.settings.num_user_regs;
|
||||
for (u32 i = 0; i < regs.num_interp; i++) {
|
||||
info.ps_inputs.push_back({
|
||||
.param_index = regs.ps_inputs[i].input_offset.Value(),
|
||||
.is_default = bool(regs.ps_inputs[i].use_default),
|
||||
.is_flat = bool(regs.ps_inputs[i].flat_shade),
|
||||
.default_value = regs.ps_inputs[i].default_value,
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Compute: {
|
||||
const auto& cs_pgm = regs.cs_program;
|
||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||
cs_pgm.num_thread_z.full};
|
||||
info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
||||
cs_pgm.IsTgidEnabled(2)};
|
||||
info.shared_memory_size = cs_pgm.SharedMemSize();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||
AmdGpu::Liverpool* liverpool_)
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
|
||||
block_pool{512} {
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
||||
shader_cache{std::make_unique<ShaderCache>(instance, liverpool)} {
|
||||
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
|
||||
profile = Shader::Profile{
|
||||
.supported_spirv = 0x00010600U,
|
||||
.subgroup_size = instance.SubgroupSize(),
|
||||
.support_explicit_workgroup_layout = true,
|
||||
};
|
||||
}
|
||||
|
||||
PipelineCache::~PipelineCache() = default;
|
||||
|
||||
const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
|
||||
const auto& regs = liverpool->regs;
|
||||
// Tessellation is unsupported so skip the draw to avoid locking up the driver.
|
||||
if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::PatchPrimitive) {
|
||||
if (regs.primitive_type == Liverpool::PrimitiveType::PatchPrimitive) {
|
||||
return nullptr;
|
||||
}
|
||||
// There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
|
||||
// actual draw hence can skip pipeline creation.
|
||||
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {
|
||||
LOG_TRACE(Render_Vulkan, "FCE pass skipped");
|
||||
return nullptr;
|
||||
}
|
||||
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) {
|
||||
// TODO: check for a valid MRT1 to promote the draw to the resolve pass.
|
||||
LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped");
|
||||
return nullptr;
|
||||
}
|
||||
RefreshGraphicsKey();
|
||||
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
|
||||
if (is_new) {
|
||||
it.value() = CreateGraphicsPipeline();
|
||||
it.value() = std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key,
|
||||
*pipeline_cache, infos, modules);
|
||||
}
|
||||
const GraphicsPipeline* pipeline = it->second.get();
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
const ComputePipeline* PipelineCache::GetComputePipeline() {
|
||||
const auto& cs_pgm = liverpool->regs.cs_program;
|
||||
ASSERT(cs_pgm.Address() != nullptr);
|
||||
const auto* bininfo = Liverpool::GetBinaryInfo(cs_pgm);
|
||||
compute_key = bininfo->shader_hash;
|
||||
RefreshComputeKey();
|
||||
const auto [it, is_new] = compute_pipelines.try_emplace(compute_key);
|
||||
if (is_new) {
|
||||
it.value() = CreateComputePipeline();
|
||||
it.value() = std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache,
|
||||
compute_key, *infos[0], modules[0]);
|
||||
}
|
||||
const ComputePipeline* pipeline = it->second.get();
|
||||
return pipeline;
|
||||
|
@ -229,164 +141,37 @@ void PipelineCache::RefreshGraphicsKey() {
|
|||
++remapped_cb;
|
||||
}
|
||||
|
||||
u32 binding{};
|
||||
for (u32 i = 0; i < MaxShaderStages; i++) {
|
||||
if (!regs.stage_enable.IsStageEnabled(i)) {
|
||||
key.stage_hashes[i] = 0;
|
||||
infos[i] = nullptr;
|
||||
continue;
|
||||
}
|
||||
auto* pgm = regs.ProgramForStage(i);
|
||||
if (!pgm || !pgm->Address<u32*>()) {
|
||||
key.stage_hashes[i] = 0;
|
||||
infos[i] = nullptr;
|
||||
continue;
|
||||
}
|
||||
const auto* bininfo = Liverpool::GetBinaryInfo(*pgm);
|
||||
if (!bininfo->Valid()) {
|
||||
key.stage_hashes[i] = 0;
|
||||
infos[i] = nullptr;
|
||||
continue;
|
||||
}
|
||||
key.stage_hashes[i] = bininfo->shader_hash;
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
||||
const auto& regs = liverpool->regs;
|
||||
|
||||
// There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
|
||||
// actual draw hence can skip pipeline creation.
|
||||
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {
|
||||
LOG_TRACE(Render_Vulkan, "FCE pass skipped");
|
||||
return {};
|
||||
}
|
||||
|
||||
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::FmaskDecompress) {
|
||||
// TODO: check for a valid MRT1 to promote the draw to the resolve pass.
|
||||
LOG_TRACE(Render_Vulkan, "FMask decompression pass skipped");
|
||||
return {};
|
||||
}
|
||||
|
||||
u32 binding{};
|
||||
for (u32 i = 0; i < MaxShaderStages; i++) {
|
||||
if (!graphics_key.stage_hashes[i]) {
|
||||
programs[i] = nullptr;
|
||||
continue;
|
||||
}
|
||||
auto* pgm = regs.ProgramForStage(i);
|
||||
const auto code = pgm->Code();
|
||||
|
||||
// Dump shader code if requested.
|
||||
const auto stage = Shader::Stage{i};
|
||||
const u64 hash = graphics_key.stage_hashes[i];
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(code, hash, stage, "bin");
|
||||
}
|
||||
|
||||
if (stage != Shader::Stage::Fragment && stage != Shader::Stage::Vertex) {
|
||||
LOG_ERROR(Render_Vulkan, "Unsupported shader stage {}. PL creation skipped.", stage);
|
||||
return {};
|
||||
}
|
||||
|
||||
const u64 lookup_hash = HashCombine(hash, binding);
|
||||
auto it = program_cache.find(lookup_hash);
|
||||
if (it != program_cache.end()) {
|
||||
const Program* program = it.value().get();
|
||||
ASSERT(program->pgm.info.stage == stage);
|
||||
programs[i] = program;
|
||||
binding = program->end_binding;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Recompile shader to IR.
|
||||
try {
|
||||
auto program = std::make_unique<Program>();
|
||||
block_pool.ReleaseContents();
|
||||
inst_pool.ReleaseContents();
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x}", stage, hash);
|
||||
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
info.pgm_base = pgm->Address<uintptr_t>();
|
||||
info.pgm_hash = hash;
|
||||
program->pgm =
|
||||
Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
program->spv = Shader::Backend::SPIRV::EmitSPIRV(profile, program->pgm, binding);
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(program->spv, hash, stage, "spv");
|
||||
}
|
||||
|
||||
// Compile module and set name to hash in renderdoc
|
||||
program->end_binding = binding;
|
||||
program->module = CompileSPV(program->spv, instance.GetDevice());
|
||||
const auto name = fmt::format("{}_{:#x}", stage, hash);
|
||||
Vulkan::SetObjectName(instance.GetDevice(), program->module, name);
|
||||
|
||||
// Cache program
|
||||
const auto [it, _] = program_cache.emplace(lookup_hash, std::move(program));
|
||||
programs[i] = it.value().get();
|
||||
} catch (const Shader::Exception& e) {
|
||||
UNREACHABLE_MSG("{}", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key, *pipeline_cache,
|
||||
programs);
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
||||
const auto& cs_pgm = liverpool->regs.cs_program;
|
||||
const auto code = cs_pgm.Code();
|
||||
|
||||
// Dump shader code if requested.
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(code, compute_key, Shader::Stage::Compute, "bin");
|
||||
}
|
||||
|
||||
block_pool.ReleaseContents();
|
||||
inst_pool.ReleaseContents();
|
||||
|
||||
// Recompile shader to IR.
|
||||
try {
|
||||
auto program = std::make_unique<Program>();
|
||||
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
|
||||
Shader::Info info =
|
||||
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
||||
info.pgm_base = cs_pgm.Address<uintptr_t>();
|
||||
info.pgm_hash = compute_key;
|
||||
program->pgm =
|
||||
Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info), profile);
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
u32 binding{};
|
||||
program->spv = Shader::Backend::SPIRV::EmitSPIRV(profile, program->pgm, binding);
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(program->spv, compute_key, Shader::Stage::Compute, "spv");
|
||||
}
|
||||
|
||||
// Compile module and set name to hash in renderdoc
|
||||
program->module = CompileSPV(program->spv, instance.GetDevice());
|
||||
const auto name = fmt::format("cs_{:#x}", compute_key);
|
||||
Vulkan::SetObjectName(instance.GetDevice(), program->module, name);
|
||||
|
||||
// Cache program
|
||||
const auto [it, _] = program_cache.emplace(compute_key, std::move(program));
|
||||
return std::make_unique<ComputePipeline>(instance, scheduler, *pipeline_cache, compute_key,
|
||||
it.value().get());
|
||||
} catch (const Shader::Exception& e) {
|
||||
UNREACHABLE_MSG("{}", e.what());
|
||||
return nullptr;
|
||||
const GuestProgram guest_pgm{pgm, stage};
|
||||
std::tie(infos[i], modules[i], key.stage_hashes[i]) =
|
||||
shader_cache->GetProgram(guest_pgm, binding);
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
|
||||
std::string_view ext) {
|
||||
using namespace Common::FS;
|
||||
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
|
||||
if (!std::filesystem::exists(dump_dir)) {
|
||||
std::filesystem::create_directories(dump_dir);
|
||||
}
|
||||
const auto filename = fmt::format("{}_{:#018x}.{}", stage, hash, ext);
|
||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||
file.WriteSpan(code);
|
||||
void PipelineCache::RefreshComputeKey() {
|
||||
u32 binding{};
|
||||
const auto* cs_pgm = &liverpool->regs.cs_program;
|
||||
const GuestProgram guest_pgm{cs_pgm, Shader::Stage::Compute};
|
||||
std::tie(infos[0], modules[0], compute_key) = shader_cache->GetProgram(guest_pgm, binding);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -4,9 +4,6 @@
|
|||
#pragma once
|
||||
|
||||
#include <tsl/robin_map.h>
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
|
||||
|
@ -18,6 +15,7 @@ namespace Vulkan {
|
|||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
class ShaderCache;
|
||||
|
||||
class PipelineCache {
|
||||
static constexpr size_t MaxShaderStages = 5;
|
||||
|
@ -25,7 +23,7 @@ class PipelineCache {
|
|||
public:
|
||||
explicit PipelineCache(const Instance& instance, Scheduler& scheduler,
|
||||
AmdGpu::Liverpool* liverpool);
|
||||
~PipelineCache() = default;
|
||||
~PipelineCache();
|
||||
|
||||
const GraphicsPipeline* GetGraphicsPipeline();
|
||||
|
||||
|
@ -33,10 +31,7 @@ public:
|
|||
|
||||
private:
|
||||
void RefreshGraphicsKey();
|
||||
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, std::string_view ext);
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
|
||||
std::unique_ptr<ComputePipeline> CreateComputePipeline();
|
||||
void RefreshComputeKey();
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
|
@ -44,15 +39,13 @@ private:
|
|||
AmdGpu::Liverpool* liverpool;
|
||||
vk::UniquePipelineCache pipeline_cache;
|
||||
vk::UniquePipelineLayout pipeline_layout;
|
||||
tsl::robin_map<size_t, std::unique_ptr<Program>> program_cache;
|
||||
std::unique_ptr<ShaderCache> shader_cache;
|
||||
tsl::robin_map<size_t, std::unique_ptr<ComputePipeline>> compute_pipelines;
|
||||
tsl::robin_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
|
||||
std::array<const Program*, MaxShaderStages> programs{};
|
||||
Shader::Profile profile{};
|
||||
std::array<const Shader::Info*, MaxShaderStages> infos{};
|
||||
std::array<vk::ShaderModule, MaxShaderStages> modules{};
|
||||
GraphicsPipelineKey graphics_key{};
|
||||
u64 compute_key{};
|
||||
Common::ObjectPool<Shader::IR::Inst> inst_pool;
|
||||
Common::ObjectPool<Shader::IR::Block> block_pool;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
192
src/video_core/renderer_vulkan/vk_shader_cache.cpp
Normal file
192
src/video_core/renderer_vulkan/vk_shader_cache.cpp
Normal file
|
@ -0,0 +1,192 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_platform.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::VsOutput;
|
||||
|
||||
void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
|
||||
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
|
||||
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
|
||||
w != VsOutput::None) {
|
||||
info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
|
||||
}
|
||||
};
|
||||
// VS_OUT_MISC_VEC
|
||||
add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
|
||||
ctl.use_vtx_edge_flag
|
||||
? VsOutput::EdgeFlag
|
||||
: (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
|
||||
ctl.use_vtx_kill_flag
|
||||
? VsOutput::KillFlag
|
||||
: (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
|
||||
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None);
|
||||
// VS_OUT_CCDIST0
|
||||
add_output(ctl.IsClipDistEnabled(0)
|
||||
? VsOutput::ClipDist0
|
||||
: (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(1)
|
||||
? VsOutput::ClipDist1
|
||||
: (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(2)
|
||||
? VsOutput::ClipDist2
|
||||
: (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(3)
|
||||
? VsOutput::ClipDist3
|
||||
: (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None));
|
||||
// VS_OUT_CCDIST1
|
||||
add_output(ctl.IsClipDistEnabled(4)
|
||||
? VsOutput::ClipDist4
|
||||
: (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(5)
|
||||
? VsOutput::ClipDist5
|
||||
: (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(6)
|
||||
? VsOutput::ClipDist6
|
||||
: (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
|
||||
ctl.IsClipDistEnabled(7)
|
||||
? VsOutput::ClipDist7
|
||||
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
||||
}
|
||||
|
||||
Shader::Info MakeShaderInfo(const GuestProgram& pgm, const AmdGpu::Liverpool::Regs& regs) {
|
||||
Shader::Info info{};
|
||||
info.user_data = pgm.user_data;
|
||||
info.pgm_base = VAddr(pgm.code.data());
|
||||
info.pgm_hash = pgm.hash;
|
||||
info.stage = pgm.stage;
|
||||
switch (pgm.stage) {
|
||||
case Shader::Stage::Vertex: {
|
||||
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
||||
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
|
||||
BuildVsOutputs(info, regs.vs_output_control);
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Fragment: {
|
||||
info.num_user_data = regs.ps_program.settings.num_user_regs;
|
||||
for (u32 i = 0; i < regs.num_interp; i++) {
|
||||
info.ps_inputs.push_back({
|
||||
.param_index = regs.ps_inputs[i].input_offset.Value(),
|
||||
.is_default = bool(regs.ps_inputs[i].use_default),
|
||||
.is_flat = bool(regs.ps_inputs[i].flat_shade),
|
||||
.default_value = regs.ps_inputs[i].default_value,
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Compute: {
|
||||
const auto& cs_pgm = regs.cs_program;
|
||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||
cs_pgm.num_thread_z.full};
|
||||
info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
||||
cs_pgm.IsTgidEnabled(2)};
|
||||
info.shared_memory_size = cs_pgm.SharedMemSize();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
|
||||
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
|
||||
}
|
||||
|
||||
ShaderCache::ShaderCache(const Instance& instance_, AmdGpu::Liverpool* liverpool_)
|
||||
: instance{instance_}, liverpool{liverpool_}, inst_pool{8192}, block_pool{512} {
|
||||
profile = Shader::Profile{
|
||||
.supported_spirv = 0x00010600U,
|
||||
.subgroup_size = instance.SubgroupSize(),
|
||||
.support_explicit_workgroup_layout = true,
|
||||
};
|
||||
}
|
||||
|
||||
vk::ShaderModule ShaderCache::CompileModule(Shader::Info& info, std::span<const u32> code,
|
||||
size_t perm_idx, u32& binding) {
|
||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
|
||||
perm_idx != 0 ? "(permutation)" : "");
|
||||
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin");
|
||||
}
|
||||
|
||||
block_pool.ReleaseContents();
|
||||
inst_pool.ReleaseContents();
|
||||
const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile);
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding);
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv");
|
||||
}
|
||||
|
||||
// Create module and set name to hash in renderdoc
|
||||
const auto module = CompileSPV(spv, instance.GetDevice());
|
||||
ASSERT(module != VK_NULL_HANDLE);
|
||||
const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx);
|
||||
Vulkan::SetObjectName(instance.GetDevice(), module, name);
|
||||
return module;
|
||||
}
|
||||
|
||||
Program* ShaderCache::CreateProgram(const GuestProgram& pgm, u32& binding) {
|
||||
Program* program = program_pool.Create(MakeShaderInfo(pgm, liverpool->regs));
|
||||
u32 start_binding = binding;
|
||||
const auto module = CompileModule(program->info, pgm.code, 0, binding);
|
||||
program->modules.emplace_back(module, StageSpecialization{program->info, start_binding});
|
||||
return program;
|
||||
}
|
||||
|
||||
std::tuple<const Shader::Info*, vk::ShaderModule, u64> ShaderCache::GetProgram(
|
||||
const GuestProgram& pgm, u32& binding) {
|
||||
auto [it_pgm, new_program] = program_cache.try_emplace(pgm.hash);
|
||||
if (new_program) {
|
||||
auto program = CreateProgram(pgm, binding);
|
||||
const auto module = program->modules.back().module;
|
||||
it_pgm.value() = program;
|
||||
return std::make_tuple(&program->info, module, HashCombine(pgm.hash, 0));
|
||||
}
|
||||
|
||||
Program* program = it_pgm->second;
|
||||
const auto& info = program->info;
|
||||
size_t perm_idx = program->modules.size();
|
||||
StageSpecialization spec{info, binding};
|
||||
vk::ShaderModule module{};
|
||||
|
||||
const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
|
||||
if (it == program->modules.end()) {
|
||||
auto new_info = MakeShaderInfo(pgm, liverpool->regs);
|
||||
module = CompileModule(new_info, pgm.code, perm_idx, binding);
|
||||
program->modules.emplace_back(module, std::move(spec));
|
||||
} else {
|
||||
binding += info.NumBindings();
|
||||
module = it->module;
|
||||
perm_idx = std::distance(program->modules.begin(), it);
|
||||
}
|
||||
return std::make_tuple(&info, module, HashCombine(pgm.hash, perm_idx));
|
||||
}
|
||||
|
||||
void ShaderCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
|
||||
size_t perm_idx, std::string_view ext) {
|
||||
using namespace Common::FS;
|
||||
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
|
||||
if (!std::filesystem::exists(dump_dir)) {
|
||||
std::filesystem::create_directories(dump_dir);
|
||||
}
|
||||
const auto filename = fmt::format("{}_{:#018x}_{}.{}", stage, hash, perm_idx, ext);
|
||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||
file.WriteSpan(code);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
156
src/video_core/renderer_vulkan/vk_shader_cache.h
Normal file
156
src/video_core/renderer_vulkan/vk_shader_cache.h
Normal file
|
@ -0,0 +1,156 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <tsl/robin_map.h>
|
||||
#include "common/object_pool.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
|
||||
struct BufferSpecialization {
|
||||
u16 stride : 14;
|
||||
u16 is_storage : 1;
|
||||
|
||||
auto operator<=>(const BufferSpecialization&) const = default;
|
||||
};
|
||||
|
||||
struct TextureBufferSpecialization {
|
||||
bool is_integer;
|
||||
|
||||
auto operator<=>(const TextureBufferSpecialization&) const = default;
|
||||
};
|
||||
|
||||
struct ImageSpecialization {
|
||||
AmdGpu::ImageType type;
|
||||
bool is_integer;
|
||||
|
||||
auto operator<=>(const ImageSpecialization&) const = default;
|
||||
};
|
||||
|
||||
struct StageSpecialization {
|
||||
static constexpr size_t MaxStageResources = 32;
|
||||
|
||||
const Shader::Info* info;
|
||||
std::bitset<MaxStageResources> bitset{};
|
||||
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
||||
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
||||
boost::container::small_vector<ImageSpecialization, 8> images;
|
||||
u32 start_binding{};
|
||||
|
||||
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
|
||||
for (const auto& desc : desc_list) {
|
||||
auto& spec = spec_list.emplace_back();
|
||||
const auto sharp = desc.GetSharp(*info);
|
||||
if (!sharp) {
|
||||
binding++;
|
||||
continue;
|
||||
}
|
||||
bitset.set(binding++);
|
||||
func(spec, desc, sharp);
|
||||
}
|
||||
}
|
||||
|
||||
StageSpecialization(const Shader::Info& info_, u32 start_binding_)
|
||||
: info{&info_}, start_binding{start_binding_} {
|
||||
u32 binding{};
|
||||
ForEachSharp(binding, buffers, info->buffers,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.stride = sharp.GetStride();
|
||||
spec.is_storage = desc.IsStorage(sharp);
|
||||
});
|
||||
ForEachSharp(binding, tex_buffers, info->texture_buffers,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
});
|
||||
ForEachSharp(binding, images, info->images,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
spec.type = sharp.GetType();
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
});
|
||||
}
|
||||
|
||||
bool operator==(const StageSpecialization& other) const {
|
||||
if (start_binding != other.start_binding) {
|
||||
return false;
|
||||
}
|
||||
u32 binding{};
|
||||
for (u32 i = 0; i < buffers.size(); i++) {
|
||||
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
for (u32 i = 0; i < tex_buffers.size(); i++) {
|
||||
if (other.bitset[binding++] && tex_buffers[i] != other.tex_buffers[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
for (u32 i = 0; i < images.size(); i++) {
|
||||
if (other.bitset[binding++] && images[i] != other.images[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct Program {
|
||||
struct Module {
|
||||
vk::ShaderModule module;
|
||||
StageSpecialization spec;
|
||||
};
|
||||
|
||||
Shader::Info info;
|
||||
boost::container::small_vector<Module, 8> modules;
|
||||
|
||||
explicit Program(const Shader::Info& info_) : info{info_} {}
|
||||
};
|
||||
|
||||
struct GuestProgram {
|
||||
Shader::Stage stage;
|
||||
std::span<const u32, AmdGpu::Liverpool::NumShaderUserData> user_data;
|
||||
std::span<const u32> code;
|
||||
u64 hash;
|
||||
|
||||
explicit GuestProgram(const auto* pgm, Shader::Stage stage_)
|
||||
: stage{stage_}, user_data{pgm->user_data}, code{pgm->Code()} {
|
||||
const auto* bininfo = AmdGpu::Liverpool::GetBinaryInfo(*pgm);
|
||||
hash = bininfo->shader_hash;
|
||||
}
|
||||
};
|
||||
|
||||
class ShaderCache {
|
||||
public:
|
||||
explicit ShaderCache(const Instance& instance, AmdGpu::Liverpool* liverpool);
|
||||
~ShaderCache() = default;
|
||||
|
||||
std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram(const GuestProgram& pgm,
|
||||
u32& binding);
|
||||
|
||||
private:
|
||||
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx,
|
||||
std::string_view ext);
|
||||
vk::ShaderModule CompileModule(Shader::Info& info, std::span<const u32> code, size_t perm_idx,
|
||||
u32& binding);
|
||||
Program* CreateProgram(const GuestProgram& pgm, u32& binding);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
Shader::Profile profile{};
|
||||
tsl::robin_map<size_t, Program*> program_cache;
|
||||
Common::ObjectPool<Shader::IR::Inst> inst_pool;
|
||||
Common::ObjectPool<Shader::IR::Block> block_pool;
|
||||
Common::ObjectPool<Program> program_pool;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
|
@ -50,9 +50,9 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) {
|
|||
}
|
||||
|
||||
bool IsIdentityMapping(u32 dst_sel, u32 num_components) {
|
||||
return (num_components == 1 && dst_sel == 0b100) ||
|
||||
(num_components == 2 && dst_sel == 0b101'100) ||
|
||||
(num_components == 3 && dst_sel == 0b110'101'100) ||
|
||||
return (num_components == 1 && dst_sel == 0b001'000'000'100) ||
|
||||
(num_components == 2 && dst_sel == 0b001'000'101'100) ||
|
||||
(num_components == 3 && dst_sel == 0b001'110'101'100) ||
|
||||
(num_components == 4 && dst_sel == 0b111'110'101'100);
|
||||
}
|
||||
|
||||
|
|
|
@ -187,6 +187,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
|||
case vk::Format::eR32Uint:
|
||||
case vk::Format::eR16G16Sfloat:
|
||||
case vk::Format::eR16G16Unorm:
|
||||
case vk::Format::eB10G11R11UfloatPack32:
|
||||
return vk::Format::eR32Uint;
|
||||
case vk::Format::eBc1RgbaSrgbBlock:
|
||||
case vk::Format::eBc1RgbaUnormBlock:
|
||||
|
@ -202,6 +203,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
|||
case vk::Format::eBc3SrgbBlock:
|
||||
case vk::Format::eBc3UnormBlock:
|
||||
case vk::Format::eBc5UnormBlock:
|
||||
case vk::Format::eBc5SnormBlock:
|
||||
case vk::Format::eBc7SrgbBlock:
|
||||
case vk::Format::eBc7UnormBlock:
|
||||
case vk::Format::eBc6HUfloatBlock:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue