shader: Primitive Vulkan integration
This commit is contained in:
parent
c67d64365a
commit
85cce78583
43 changed files with 1003 additions and 3036 deletions
|
@ -43,9 +43,6 @@ add_library(video_core STATIC
|
|||
engines/maxwell_3d.h
|
||||
engines/maxwell_dma.cpp
|
||||
engines/maxwell_dma.h
|
||||
engines/shader_bytecode.h
|
||||
engines/shader_header.h
|
||||
engines/shader_type.h
|
||||
framebuffer_config.h
|
||||
macro/macro.cpp
|
||||
macro/macro.h
|
||||
|
@ -123,6 +120,7 @@ add_library(video_core STATIC
|
|||
renderer_vulkan/vk_master_semaphore.h
|
||||
renderer_vulkan/vk_pipeline_cache.cpp
|
||||
renderer_vulkan/vk_pipeline_cache.h
|
||||
renderer_vulkan/vk_pipeline.h
|
||||
renderer_vulkan/vk_query_cache.cpp
|
||||
renderer_vulkan/vk_query_cache.h
|
||||
renderer_vulkan/vk_rasterizer.cpp
|
||||
|
@ -201,7 +199,7 @@ add_library(video_core STATIC
|
|||
create_target_directory_groups(video_core)
|
||||
|
||||
target_link_libraries(video_core PUBLIC common core)
|
||||
target_link_libraries(video_core PRIVATE glad xbyak)
|
||||
target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak)
|
||||
|
||||
if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
|
||||
add_dependencies(video_core ffmpeg-build)
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,158 +0,0 @@
|
|||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Shader {
|
||||
|
||||
enum class OutputTopology : u32 {
|
||||
PointList = 1,
|
||||
LineStrip = 6,
|
||||
TriangleStrip = 7,
|
||||
};
|
||||
|
||||
enum class PixelImap : u8 {
|
||||
Unused = 0,
|
||||
Constant = 1,
|
||||
Perspective = 2,
|
||||
ScreenLinear = 3,
|
||||
};
|
||||
|
||||
// Documentation in:
|
||||
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
|
||||
struct Header {
|
||||
union {
|
||||
BitField<0, 5, u32> sph_type;
|
||||
BitField<5, 5, u32> version;
|
||||
BitField<10, 4, u32> shader_type;
|
||||
BitField<14, 1, u32> mrt_enable;
|
||||
BitField<15, 1, u32> kills_pixels;
|
||||
BitField<16, 1, u32> does_global_store;
|
||||
BitField<17, 4, u32> sass_version;
|
||||
BitField<21, 5, u32> reserved;
|
||||
BitField<26, 1, u32> does_load_or_store;
|
||||
BitField<27, 1, u32> does_fp64;
|
||||
BitField<28, 4, u32> stream_out_mask;
|
||||
} common0;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_low_size;
|
||||
BitField<24, 8, u32> per_patch_attribute_count;
|
||||
} common1;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_high_size;
|
||||
BitField<24, 8, u32> threads_per_input_primitive;
|
||||
} common2;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_crs_size;
|
||||
BitField<24, 4, OutputTopology> output_topology;
|
||||
BitField<28, 4, u32> reserved;
|
||||
} common3;
|
||||
|
||||
union {
|
||||
BitField<0, 12, u32> max_output_vertices;
|
||||
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
|
||||
BitField<20, 4, u32> reserved;
|
||||
BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
|
||||
} common4;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
|
||||
INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
|
||||
union {
|
||||
BitField<0, 8, u16> clip_distances;
|
||||
BitField<8, 1, u16> point_sprite_s;
|
||||
BitField<9, 1, u16> point_sprite_t;
|
||||
BitField<10, 1, u16> fog_coordinate;
|
||||
BitField<12, 1, u16> tessellation_eval_point_u;
|
||||
BitField<13, 1, u16> tessellation_eval_point_v;
|
||||
BitField<14, 1, u16> instance_id;
|
||||
BitField<15, 1, u16> vertex_id;
|
||||
};
|
||||
INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
|
||||
INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB
|
||||
INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC
|
||||
INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
|
||||
} vtg;
|
||||
|
||||
struct {
|
||||
INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
|
||||
INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
|
||||
|
||||
union {
|
||||
BitField<0, 2, PixelImap> x;
|
||||
BitField<2, 2, PixelImap> y;
|
||||
BitField<4, 2, PixelImap> z;
|
||||
BitField<6, 2, PixelImap> w;
|
||||
u8 raw;
|
||||
} imap_generic_vector[32];
|
||||
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
|
||||
INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
|
||||
INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
|
||||
|
||||
struct {
|
||||
u32 target;
|
||||
union {
|
||||
BitField<0, 1, u32> sample_mask;
|
||||
BitField<1, 1, u32> depth;
|
||||
BitField<2, 30, u32> reserved;
|
||||
};
|
||||
} omap;
|
||||
|
||||
bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
|
||||
const u32 bit = render_target * 4 + component;
|
||||
return omap.target & (1 << bit);
|
||||
}
|
||||
|
||||
PixelImap GetPixelImap(u32 attribute) const {
|
||||
const auto get_index = [this, attribute](u32 index) {
|
||||
return static_cast<PixelImap>(
|
||||
(imap_generic_vector[attribute].raw >> (index * 2)) & 3);
|
||||
};
|
||||
|
||||
std::optional<PixelImap> result;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
const PixelImap index = get_index(component);
|
||||
if (index == PixelImap::Unused) {
|
||||
continue;
|
||||
}
|
||||
if (result && result != index) {
|
||||
LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
|
||||
}
|
||||
result = index;
|
||||
}
|
||||
return result.value_or(PixelImap::Unused);
|
||||
}
|
||||
} ps;
|
||||
|
||||
std::array<u32, 0xF> raw;
|
||||
};
|
||||
|
||||
u64 GetLocalMemorySize() const {
|
||||
return (common1.shader_local_memory_low_size |
|
||||
(common2.shader_local_memory_high_size << 24));
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
|
||||
|
||||
} // namespace Tegra::Shader
|
|
@ -4,6 +4,9 @@
|
|||
|
||||
#include <vector>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
|
@ -13,9 +16,142 @@
|
|||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) {
|
||||
boost::container::small_vector<VkDescriptorSetLayoutBinding, 24> bindings;
|
||||
u32 binding{};
|
||||
for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
|
||||
bindings.push_back({
|
||||
.binding = binding,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
});
|
||||
++binding;
|
||||
}
|
||||
for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
|
||||
bindings.push_back({
|
||||
.binding = binding,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
});
|
||||
++binding;
|
||||
}
|
||||
return device.GetLogical().CreateDescriptorSetLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.bindingCount = static_cast<u32>(bindings.size()),
|
||||
.pBindings = bindings.data(),
|
||||
});
|
||||
}
|
||||
|
||||
ComputePipeline::ComputePipeline() = default;
|
||||
vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
|
||||
const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout,
|
||||
VkPipelineLayout pipeline_layout) {
|
||||
boost::container::small_vector<VkDescriptorUpdateTemplateEntry, 24> entries;
|
||||
size_t offset{};
|
||||
u32 binding{};
|
||||
for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
|
||||
entries.push_back({
|
||||
.dstBinding = binding,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.offset = offset,
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
});
|
||||
++binding;
|
||||
offset += sizeof(DescriptorUpdateEntry);
|
||||
}
|
||||
for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
|
||||
entries.push_back({
|
||||
.dstBinding = binding,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.offset = offset,
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
});
|
||||
++binding;
|
||||
offset += sizeof(DescriptorUpdateEntry);
|
||||
}
|
||||
return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
|
||||
.pDescriptorUpdateEntries = entries.data(),
|
||||
.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET,
|
||||
.descriptorSetLayout = descriptor_set_layout,
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
.pipelineLayout = pipeline_layout,
|
||||
.set = 0,
|
||||
});
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
ComputePipeline::~ComputePipeline() = default;
|
||||
ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
const Shader::Info& info_, vk::ShaderModule spv_module_)
|
||||
: update_descriptor_queue{&update_descriptor_queue_}, info{info_},
|
||||
spv_module(std::move(spv_module_)),
|
||||
descriptor_set_layout(CreateDescriptorSetLayout(device, info)),
|
||||
descriptor_allocator(descriptor_pool, *descriptor_set_layout),
|
||||
pipeline_layout{device.GetLogical().CreatePipelineLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = descriptor_set_layout.address(),
|
||||
.pushConstantRangeCount = 0,
|
||||
.pPushConstantRanges = nullptr,
|
||||
})},
|
||||
descriptor_update_template{
|
||||
CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)},
|
||||
pipeline{device.GetLogical().CreateComputePipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = *spv_module,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = nullptr,
|
||||
},
|
||||
.layout = *pipeline_layout,
|
||||
.basePipelineHandle = 0,
|
||||
.basePipelineIndex = 0,
|
||||
})} {}
|
||||
|
||||
void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) {
|
||||
u32 enabled_uniforms{};
|
||||
for (const auto& desc : info.constant_buffer_descriptors) {
|
||||
enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index;
|
||||
}
|
||||
buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms);
|
||||
|
||||
buffer_cache.UnbindComputeStorageBuffers();
|
||||
size_t index{};
|
||||
for (const auto& desc : info.storage_buffers_descriptors) {
|
||||
ASSERT(desc.count == 1);
|
||||
buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true);
|
||||
++index;
|
||||
}
|
||||
buffer_cache.UpdateComputeBuffers();
|
||||
buffer_cache.BindHostComputeBuffers();
|
||||
}
|
||||
|
||||
VkDescriptorSet ComputePipeline::UpdateDescriptorSet() {
|
||||
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
|
||||
update_descriptor_queue->Send(*descriptor_update_template, descriptor_set);
|
||||
return descriptor_set;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -5,19 +5,52 @@
|
|||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class VKScheduler;
|
||||
class VKUpdateDescriptorQueue;
|
||||
|
||||
class ComputePipeline {
|
||||
class ComputePipeline : public Pipeline {
|
||||
public:
|
||||
explicit ComputePipeline();
|
||||
~ComputePipeline();
|
||||
explicit ComputePipeline() = default;
|
||||
explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||
const Shader::Info& info, vk::ShaderModule spv_module);
|
||||
|
||||
ComputePipeline& operator=(ComputePipeline&&) noexcept = default;
|
||||
ComputePipeline(ComputePipeline&&) noexcept = default;
|
||||
|
||||
ComputePipeline& operator=(const ComputePipeline&) = delete;
|
||||
ComputePipeline(const ComputePipeline&) = delete;
|
||||
|
||||
void ConfigureBufferCache(BufferCache& buffer_cache);
|
||||
|
||||
[[nodiscard]] VkDescriptorSet UpdateDescriptorSet();
|
||||
|
||||
[[nodiscard]] VkPipeline Handle() const noexcept {
|
||||
return *pipeline;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept {
|
||||
return *pipeline_layout;
|
||||
}
|
||||
|
||||
private:
|
||||
VKUpdateDescriptorQueue* update_descriptor_queue;
|
||||
Shader::Info info;
|
||||
|
||||
vk::ShaderModule spv_module;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
DescriptorAllocator descriptor_allocator;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::DescriptorUpdateTemplateKHR descriptor_update_template;
|
||||
vk::Pipeline pipeline;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -19,9 +19,7 @@ constexpr std::size_t SETS_GROW_RATE = 0x20;
|
|||
DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_,
|
||||
VkDescriptorSetLayout layout_)
|
||||
: ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE),
|
||||
descriptor_pool{descriptor_pool_}, layout{layout_} {}
|
||||
|
||||
DescriptorAllocator::~DescriptorAllocator() = default;
|
||||
descriptor_pool{&descriptor_pool_}, layout{layout_} {}
|
||||
|
||||
VkDescriptorSet DescriptorAllocator::Commit() {
|
||||
const std::size_t index = CommitResource();
|
||||
|
@ -29,7 +27,7 @@ VkDescriptorSet DescriptorAllocator::Commit() {
|
|||
}
|
||||
|
||||
void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
|
||||
descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin));
|
||||
descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin));
|
||||
}
|
||||
|
||||
VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler)
|
||||
|
|
|
@ -17,8 +17,12 @@ class VKScheduler;
|
|||
|
||||
class DescriptorAllocator final : public ResourcePool {
|
||||
public:
|
||||
explicit DescriptorAllocator() = default;
|
||||
explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout);
|
||||
~DescriptorAllocator() override;
|
||||
~DescriptorAllocator() override = default;
|
||||
|
||||
DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
|
||||
DescriptorAllocator(DescriptorAllocator&&) noexcept = default;
|
||||
|
||||
DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
|
||||
DescriptorAllocator(const DescriptorAllocator&) = delete;
|
||||
|
@ -29,8 +33,8 @@ protected:
|
|||
void Allocate(std::size_t begin, std::size_t end) override;
|
||||
|
||||
private:
|
||||
VKDescriptorPool& descriptor_pool;
|
||||
const VkDescriptorSetLayout layout;
|
||||
VKDescriptorPool* descriptor_pool{};
|
||||
VkDescriptorSetLayout layout{};
|
||||
|
||||
std::vector<vk::DescriptorSets> descriptors_allocations;
|
||||
};
|
||||
|
|
36
src/video_core/renderer_vulkan/vk_pipeline.h
Normal file
36
src/video_core/renderer_vulkan/vk_pipeline.h
Normal file
|
@ -0,0 +1,36 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Pipeline {
|
||||
public:
|
||||
/// Add a reference count to the pipeline
|
||||
void AddRef() noexcept {
|
||||
++ref_count;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool RemoveRef() noexcept {
|
||||
--ref_count;
|
||||
return ref_count == 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] u64 UsageTick() const noexcept {
|
||||
return usage_tick;
|
||||
}
|
||||
|
||||
protected:
|
||||
u64 usage_tick{};
|
||||
|
||||
private:
|
||||
size_t ref_count{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
|
@ -12,6 +12,8 @@
|
|||
#include "common/microprofile.h"
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
@ -22,43 +24,105 @@
|
|||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
#include "video_core/shader_notify.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
#pragma optimize("", off)
|
||||
|
||||
namespace Vulkan {
|
||||
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
|
||||
|
||||
using Tegra::Engines::ShaderType;
|
||||
|
||||
namespace {
|
||||
size_t StageFromProgram(size_t program) {
|
||||
return program == 0 ? 0 : program - 1;
|
||||
}
|
||||
class Environment final : public Shader::Environment {
|
||||
public:
|
||||
explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_)
|
||||
: kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {}
|
||||
|
||||
ShaderType StageFromProgram(Maxwell::ShaderProgram program) {
|
||||
return static_cast<ShaderType>(StageFromProgram(static_cast<size_t>(program)));
|
||||
}
|
||||
~Environment() override = default;
|
||||
|
||||
ShaderType GetShaderType(Maxwell::ShaderProgram program) {
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
return ShaderType::Vertex;
|
||||
case Maxwell::ShaderProgram::TesselationControl:
|
||||
return ShaderType::TesselationControl;
|
||||
case Maxwell::ShaderProgram::TesselationEval:
|
||||
return ShaderType::TesselationEval;
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
return ShaderType::Geometry;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
return ShaderType::Fragment;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("program={}", program);
|
||||
return ShaderType::Vertex;
|
||||
[[nodiscard]] std::optional<u128> Analyze(u32 start_address) {
|
||||
const std::optional<u64> size{TryFindSize(start_address)};
|
||||
if (!size) {
|
||||
return std::nullopt;
|
||||
}
|
||||
cached_lowest = start_address;
|
||||
cached_highest = start_address + static_cast<u32>(*size);
|
||||
return Common::CityHash128(reinterpret_cast<const char*>(code.data()), code.size());
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t ShaderSize() const noexcept {
|
||||
return read_highest - read_lowest + INST_SIZE;
|
||||
}
|
||||
|
||||
[[nodiscard]] u128 ComputeHash() const {
|
||||
const size_t size{ShaderSize()};
|
||||
auto data = std::make_unique<u64[]>(size);
|
||||
gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size);
|
||||
return Common::CityHash128(reinterpret_cast<const char*>(data.get()), size);
|
||||
}
|
||||
|
||||
u64 ReadInstruction(u32 address) override {
|
||||
read_lowest = std::min(read_lowest, address);
|
||||
read_highest = std::max(read_highest, address);
|
||||
|
||||
if (address >= cached_lowest && address < cached_highest) {
|
||||
return code[address / INST_SIZE];
|
||||
}
|
||||
return gpu_memory.Read<u64>(program_base + address);
|
||||
}
|
||||
|
||||
std::array<u32, 3> WorkgroupSize() override {
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr size_t INST_SIZE = sizeof(u64);
|
||||
static constexpr size_t BLOCK_SIZE = 0x1000;
|
||||
static constexpr size_t MAXIMUM_SIZE = 0x100000;
|
||||
|
||||
static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
|
||||
static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
|
||||
|
||||
std::optional<u64> TryFindSize(u32 start_address) {
|
||||
GPUVAddr guest_addr = program_base + start_address;
|
||||
size_t offset = 0;
|
||||
size_t size = BLOCK_SIZE;
|
||||
while (size <= MAXIMUM_SIZE) {
|
||||
code.resize(size / INST_SIZE);
|
||||
u64* const data = code.data() + offset / INST_SIZE;
|
||||
gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE);
|
||||
for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) {
|
||||
const u64 inst = data[i / INST_SIZE];
|
||||
if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
|
||||
return offset + i;
|
||||
}
|
||||
}
|
||||
guest_addr += BLOCK_SIZE;
|
||||
size += BLOCK_SIZE;
|
||||
offset += BLOCK_SIZE;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
GPUVAddr program_base;
|
||||
|
||||
u32 read_lowest = 0;
|
||||
u32 read_highest = 0;
|
||||
|
||||
std::vector<u64> code;
|
||||
u32 cached_lowest = std::numeric_limits<u32>::max();
|
||||
u32 cached_highest = 0;
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
size_t ComputePipelineCacheKey::Hash() const noexcept {
|
||||
|
@ -70,35 +134,91 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
|
|||
return std::memcmp(&rhs, this, sizeof *this) == 0;
|
||||
}
|
||||
|
||||
Shader::Shader() = default;
|
||||
|
||||
Shader::~Shader() = default;
|
||||
|
||||
PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_,
|
||||
VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_)
|
||||
: VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
|
||||
: VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
|
||||
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
|
||||
scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
|
||||
update_descriptor_queue_} {}
|
||||
|
||||
PipelineCache::~PipelineCache() = default;
|
||||
|
||||
ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
|
||||
ComputePipeline* PipelineCache::CurrentComputePipeline() {
|
||||
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
|
||||
|
||||
const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
|
||||
auto& entry = pair->second;
|
||||
if (!is_cache_miss) {
|
||||
return *entry;
|
||||
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const GPUVAddr shader_addr{program_base + qmd.program_start};
|
||||
const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
|
||||
if (!cpu_shader_addr) {
|
||||
return nullptr;
|
||||
}
|
||||
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
|
||||
throw "Bad";
|
||||
ShaderInfo* const shader{TryGet(*cpu_shader_addr)};
|
||||
if (!shader) {
|
||||
return CreateComputePipelineWithoutShader(*cpu_shader_addr);
|
||||
}
|
||||
const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)};
|
||||
const auto [pair, is_new]{compute_cache.try_emplace(key)};
|
||||
auto& pipeline{pair->second};
|
||||
if (!is_new) {
|
||||
return &pipeline;
|
||||
}
|
||||
pipeline = CreateComputePipeline(shader);
|
||||
shader->compute_users.push_back(key);
|
||||
return &pipeline;
|
||||
}
|
||||
|
||||
void PipelineCache::OnShaderRemoval(Shader*) {}
|
||||
ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) {
|
||||
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
Environment env{kepler_compute, gpu_memory, program_base};
|
||||
if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) {
|
||||
// TODO: Load from cache
|
||||
}
|
||||
const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)};
|
||||
shader_info->unique_hash = env.ComputeHash();
|
||||
shader_info->size_bytes = env.ShaderSize();
|
||||
return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info,
|
||||
BuildShader(device, code)};
|
||||
}
|
||||
|
||||
ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) {
|
||||
ShaderInfo shader;
|
||||
ComputePipeline pipeline{CreateComputePipeline(&shader)};
|
||||
const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)};
|
||||
shader.compute_users.push_back(key);
|
||||
pipeline.AddRef();
|
||||
|
||||
const size_t size_bytes{shader.size_bytes};
|
||||
Register(std::make_unique<ShaderInfo>(std::move(shader)), shader_cpu_addr, size_bytes);
|
||||
return &compute_cache.emplace(key, std::move(pipeline)).first->second;
|
||||
}
|
||||
|
||||
ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const {
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
return {
|
||||
.unique_hash = unique_hash,
|
||||
.shared_memory_size = qmd.shared_alloc,
|
||||
.workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
|
||||
};
|
||||
}
|
||||
|
||||
void PipelineCache::OnShaderRemoval(ShaderInfo* shader) {
|
||||
for (const ComputePipelineCacheKey& key : shader->compute_users) {
|
||||
const auto it = compute_cache.find(key);
|
||||
ASSERT(it != compute_cache.end());
|
||||
|
||||
Pipeline& pipeline = it->second;
|
||||
if (pipeline.RemoveRef()) {
|
||||
// Wait for the pipeline to be free of GPU usage before destroying it
|
||||
scheduler.Wait(pipeline.UsageTick());
|
||||
compute_cache.erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -36,7 +36,7 @@ class VKUpdateDescriptorQueue;
|
|||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct ComputePipelineCacheKey {
|
||||
GPUVAddr shader;
|
||||
u128 unique_hash;
|
||||
u32 shared_memory_size;
|
||||
std::array<u32, 3> workgroup_size;
|
||||
|
||||
|
@ -67,13 +67,13 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
class Shader {
|
||||
public:
|
||||
explicit Shader();
|
||||
~Shader();
|
||||
struct ShaderInfo {
|
||||
u128 unique_hash{};
|
||||
size_t size_bytes{};
|
||||
std::vector<ComputePipelineCacheKey> compute_users;
|
||||
};
|
||||
|
||||
class PipelineCache final : public VideoCommon::ShaderCache<Shader> {
|
||||
class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> {
|
||||
public:
|
||||
explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d,
|
||||
|
@ -83,12 +83,18 @@ public:
|
|||
VKUpdateDescriptorQueue& update_descriptor_queue);
|
||||
~PipelineCache() override;
|
||||
|
||||
ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
|
||||
[[nodiscard]] ComputePipeline* CurrentComputePipeline();
|
||||
|
||||
protected:
|
||||
void OnShaderRemoval(Shader* shader) final;
|
||||
void OnShaderRemoval(ShaderInfo* shader) override;
|
||||
|
||||
private:
|
||||
ComputePipeline CreateComputePipeline(ShaderInfo* shader);
|
||||
|
||||
ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr);
|
||||
|
||||
ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const;
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
|
@ -99,13 +105,7 @@ private:
|
|||
VKDescriptorPool& descriptor_pool;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
|
||||
std::unique_ptr<Shader> null_shader;
|
||||
std::unique_ptr<Shader> null_kernel;
|
||||
|
||||
std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
|
||||
|
||||
std::mutex pipeline_cache;
|
||||
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
|
||||
std::unordered_map<ComputePipelineCacheKey, ComputePipeline> compute_cache;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -36,6 +36,8 @@
|
|||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
#pragma optimize("", off)
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
@ -237,7 +239,26 @@ void RasterizerVulkan::Clear() {
|
|||
}
|
||||
|
||||
void RasterizerVulkan::DispatchCompute() {
|
||||
UNREACHABLE_MSG("Not implemented");
|
||||
ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
|
||||
if (!pipeline) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
update_descriptor_queue.Acquire();
|
||||
pipeline->ConfigureBufferCache(buffer_cache);
|
||||
const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()};
|
||||
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
|
||||
const VkPipeline pipeline_handle{pipeline->Handle()};
|
||||
const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()};
|
||||
scheduler.Record(
|
||||
[pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0,
|
||||
descriptor_set, nullptr);
|
||||
cmdbuf.Dispatch(dim[0], dim[1], dim[2]);
|
||||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_fence_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
@ -150,8 +149,6 @@ private:
|
|||
BlitImageHelper blit_image;
|
||||
ASTCDecoderPass astc_decoder_pass;
|
||||
|
||||
GraphicsPipelineCacheKey graphics_key;
|
||||
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
BufferCacheRuntime buffer_cache_runtime;
|
||||
|
|
|
@ -10,18 +10,16 @@
|
|||
namespace Vulkan {
|
||||
|
||||
ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
|
||||
: master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
|
||||
|
||||
ResourcePool::~ResourcePool() = default;
|
||||
: master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
|
||||
|
||||
size_t ResourcePool::CommitResource() {
|
||||
// Refresh semaphore to query updated results
|
||||
master_semaphore.Refresh();
|
||||
const u64 gpu_tick = master_semaphore.KnownGpuTick();
|
||||
master_semaphore->Refresh();
|
||||
const u64 gpu_tick = master_semaphore->KnownGpuTick();
|
||||
const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
|
||||
for (size_t iterator = begin; iterator < end; ++iterator) {
|
||||
if (gpu_tick >= ticks[iterator]) {
|
||||
ticks[iterator] = master_semaphore.CurrentTick();
|
||||
ticks[iterator] = master_semaphore->CurrentTick();
|
||||
return iterator;
|
||||
}
|
||||
}
|
||||
|
@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() {
|
|||
// Both searches failed, the pool is full; handle it.
|
||||
const size_t free_resource = ManageOverflow();
|
||||
|
||||
ticks[free_resource] = master_semaphore.CurrentTick();
|
||||
ticks[free_resource] = master_semaphore->CurrentTick();
|
||||
found = free_resource;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,8 +18,16 @@ class MasterSemaphore;
|
|||
*/
|
||||
class ResourcePool {
|
||||
public:
|
||||
explicit ResourcePool() = default;
|
||||
explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
|
||||
virtual ~ResourcePool();
|
||||
|
||||
virtual ~ResourcePool() = default;
|
||||
|
||||
ResourcePool& operator=(ResourcePool&&) noexcept = default;
|
||||
ResourcePool(ResourcePool&&) noexcept = default;
|
||||
|
||||
ResourcePool& operator=(const ResourcePool&) = default;
|
||||
ResourcePool(const ResourcePool&) = default;
|
||||
|
||||
protected:
|
||||
size_t CommitResource();
|
||||
|
@ -34,7 +42,7 @@ private:
|
|||
/// Allocates a new page of resources.
|
||||
void Grow();
|
||||
|
||||
MasterSemaphore& master_semaphore;
|
||||
MasterSemaphore* master_semaphore{};
|
||||
size_t grow_step = 0; ///< Number of new resources created after an overflow
|
||||
size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
|
||||
std::vector<u64> ticks; ///< Ticks for each resource
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue