shader: Primitive Vulkan integration

This commit is contained in:
ReinUsesLisp 2021-02-17 00:59:28 -03:00 committed by ameerj
parent c67d64365a
commit 85cce78583
43 changed files with 1003 additions and 3036 deletions

View file

@ -43,9 +43,6 @@ add_library(video_core STATIC
engines/maxwell_3d.h
engines/maxwell_dma.cpp
engines/maxwell_dma.h
engines/shader_bytecode.h
engines/shader_header.h
engines/shader_type.h
framebuffer_config.h
macro/macro.cpp
macro/macro.h
@ -123,6 +120,7 @@ add_library(video_core STATIC
renderer_vulkan/vk_master_semaphore.h
renderer_vulkan/vk_pipeline_cache.cpp
renderer_vulkan/vk_pipeline_cache.h
renderer_vulkan/vk_pipeline.h
renderer_vulkan/vk_query_cache.cpp
renderer_vulkan/vk_query_cache.h
renderer_vulkan/vk_rasterizer.cpp
@ -201,7 +199,7 @@ add_library(video_core STATIC
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
target_link_libraries(video_core PRIVATE glad xbyak)
target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak)
if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
add_dependencies(video_core ffmpeg-build)

View file

@ -12,7 +12,6 @@
#include "common/common_types.h"
#include "video_core/engines/engine_interface.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/engines/shader_type.h"
#include "video_core/gpu.h"
#include "video_core/textures/texture.h"

File diff suppressed because it is too large Load diff

View file

@ -1,158 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <optional>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Tegra::Shader {
enum class OutputTopology : u32 {
PointList = 1,
LineStrip = 6,
TriangleStrip = 7,
};
enum class PixelImap : u8 {
Unused = 0,
Constant = 1,
Perspective = 2,
ScreenLinear = 3,
};
// Documentation in:
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
struct Header {
union {
BitField<0, 5, u32> sph_type;
BitField<5, 5, u32> version;
BitField<10, 4, u32> shader_type;
BitField<14, 1, u32> mrt_enable;
BitField<15, 1, u32> kills_pixels;
BitField<16, 1, u32> does_global_store;
BitField<17, 4, u32> sass_version;
BitField<21, 5, u32> reserved;
BitField<26, 1, u32> does_load_or_store;
BitField<27, 1, u32> does_fp64;
BitField<28, 4, u32> stream_out_mask;
} common0;
union {
BitField<0, 24, u32> shader_local_memory_low_size;
BitField<24, 8, u32> per_patch_attribute_count;
} common1;
union {
BitField<0, 24, u32> shader_local_memory_high_size;
BitField<24, 8, u32> threads_per_input_primitive;
} common2;
union {
BitField<0, 24, u32> shader_local_memory_crs_size;
BitField<24, 4, OutputTopology> output_topology;
BitField<28, 4, u32> reserved;
} common3;
union {
BitField<0, 12, u32> max_output_vertices;
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
BitField<20, 4, u32> reserved;
BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
} common4;
union {
struct {
INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
union {
BitField<0, 8, u16> clip_distances;
BitField<8, 1, u16> point_sprite_s;
BitField<9, 1, u16> point_sprite_t;
BitField<10, 1, u16> fog_coordinate;
BitField<12, 1, u16> tessellation_eval_point_u;
BitField<13, 1, u16> tessellation_eval_point_v;
BitField<14, 1, u16> instance_id;
BitField<15, 1, u16> vertex_id;
};
INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB
INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC
INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
} vtg;
struct {
INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
union {
BitField<0, 2, PixelImap> x;
BitField<2, 2, PixelImap> y;
BitField<4, 2, PixelImap> z;
BitField<6, 2, PixelImap> w;
u8 raw;
} imap_generic_vector[32];
INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
struct {
u32 target;
union {
BitField<0, 1, u32> sample_mask;
BitField<1, 1, u32> depth;
BitField<2, 30, u32> reserved;
};
} omap;
bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
const u32 bit = render_target * 4 + component;
return omap.target & (1 << bit);
}
PixelImap GetPixelImap(u32 attribute) const {
const auto get_index = [this, attribute](u32 index) {
return static_cast<PixelImap>(
(imap_generic_vector[attribute].raw >> (index * 2)) & 3);
};
std::optional<PixelImap> result;
for (u32 component = 0; component < 4; ++component) {
const PixelImap index = get_index(component);
if (index == PixelImap::Unused) {
continue;
}
if (result && result != index) {
LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
}
result = index;
}
return result.value_or(PixelImap::Unused);
}
} ps;
std::array<u32, 0xF> raw;
};
u64 GetLocalMemorySize() const {
return (common1.shader_local_memory_low_size |
(common2.shader_local_memory_high_size << 24));
}
};
static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
} // namespace Tegra::Shader

View file

@ -4,6 +4,9 @@
#include <vector>
#include <boost/container/small_vector.hpp>
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
@ -13,9 +16,142 @@
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) {
boost::container::small_vector<VkDescriptorSetLayoutBinding, 24> bindings;
u32 binding{};
for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
bindings.push_back({
.binding = binding,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
});
++binding;
}
for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
bindings.push_back({
.binding = binding,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
});
++binding;
}
return device.GetLogical().CreateDescriptorSetLayout({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(),
});
}
ComputePipeline::ComputePipeline() = default;
vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout,
VkPipelineLayout pipeline_layout) {
boost::container::small_vector<VkDescriptorUpdateTemplateEntry, 24> entries;
size_t offset{};
u32 binding{};
for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
entries.push_back({
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.offset = offset,
.stride = sizeof(DescriptorUpdateEntry),
});
++binding;
offset += sizeof(DescriptorUpdateEntry);
}
for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
entries.push_back({
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = offset,
.stride = sizeof(DescriptorUpdateEntry),
});
++binding;
offset += sizeof(DescriptorUpdateEntry);
}
return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
.pDescriptorUpdateEntries = entries.data(),
.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET,
.descriptorSetLayout = descriptor_set_layout,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE,
.pipelineLayout = pipeline_layout,
.set = 0,
});
}
} // Anonymous namespace
ComputePipeline::~ComputePipeline() = default;
ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue_,
const Shader::Info& info_, vk::ShaderModule spv_module_)
: update_descriptor_queue{&update_descriptor_queue_}, info{info_},
spv_module(std::move(spv_module_)),
descriptor_set_layout(CreateDescriptorSetLayout(device, info)),
descriptor_allocator(descriptor_pool, *descriptor_set_layout),
pipeline_layout{device.GetLogical().CreatePipelineLayout({
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = descriptor_set_layout.address(),
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
})},
descriptor_update_template{
CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)},
pipeline{device.GetLogical().CreateComputePipeline({
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *spv_module,
.pName = "main",
.pSpecializationInfo = nullptr,
},
.layout = *pipeline_layout,
.basePipelineHandle = 0,
.basePipelineIndex = 0,
})} {}
void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) {
u32 enabled_uniforms{};
for (const auto& desc : info.constant_buffer_descriptors) {
enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index;
}
buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms);
buffer_cache.UnbindComputeStorageBuffers();
size_t index{};
for (const auto& desc : info.storage_buffers_descriptors) {
ASSERT(desc.count == 1);
buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true);
++index;
}
buffer_cache.UpdateComputeBuffers();
buffer_cache.BindHostComputeBuffers();
}
VkDescriptorSet ComputePipeline::UpdateDescriptorSet() {
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
update_descriptor_queue->Send(*descriptor_update_template, descriptor_set);
return descriptor_set;
}
} // namespace Vulkan

View file

@ -5,19 +5,52 @@
#pragma once
#include "common/common_types.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_pipeline.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class VKScheduler;
class VKUpdateDescriptorQueue;
class ComputePipeline {
class ComputePipeline : public Pipeline {
public:
explicit ComputePipeline();
~ComputePipeline();
explicit ComputePipeline() = default;
explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
const Shader::Info& info, vk::ShaderModule spv_module);
ComputePipeline& operator=(ComputePipeline&&) noexcept = default;
ComputePipeline(ComputePipeline&&) noexcept = default;
ComputePipeline& operator=(const ComputePipeline&) = delete;
ComputePipeline(const ComputePipeline&) = delete;
void ConfigureBufferCache(BufferCache& buffer_cache);
[[nodiscard]] VkDescriptorSet UpdateDescriptorSet();
[[nodiscard]] VkPipeline Handle() const noexcept {
return *pipeline;
}
[[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept {
return *pipeline_layout;
}
private:
VKUpdateDescriptorQueue* update_descriptor_queue;
Shader::Info info;
vk::ShaderModule spv_module;
vk::DescriptorSetLayout descriptor_set_layout;
DescriptorAllocator descriptor_allocator;
vk::PipelineLayout pipeline_layout;
vk::DescriptorUpdateTemplateKHR descriptor_update_template;
vk::Pipeline pipeline;
};
} // namespace Vulkan

View file

@ -19,9 +19,7 @@ constexpr std::size_t SETS_GROW_RATE = 0x20;
DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_,
VkDescriptorSetLayout layout_)
: ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE),
descriptor_pool{descriptor_pool_}, layout{layout_} {}
DescriptorAllocator::~DescriptorAllocator() = default;
descriptor_pool{&descriptor_pool_}, layout{layout_} {}
VkDescriptorSet DescriptorAllocator::Commit() {
const std::size_t index = CommitResource();
@ -29,7 +27,7 @@ VkDescriptorSet DescriptorAllocator::Commit() {
}
void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin));
descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin));
}
VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler)

View file

@ -17,8 +17,12 @@ class VKScheduler;
class DescriptorAllocator final : public ResourcePool {
public:
explicit DescriptorAllocator() = default;
explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout);
~DescriptorAllocator() override;
~DescriptorAllocator() override = default;
DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
DescriptorAllocator(DescriptorAllocator&&) noexcept = default;
DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
DescriptorAllocator(const DescriptorAllocator&) = delete;
@ -29,8 +33,8 @@ protected:
void Allocate(std::size_t begin, std::size_t end) override;
private:
VKDescriptorPool& descriptor_pool;
const VkDescriptorSetLayout layout;
VKDescriptorPool* descriptor_pool{};
VkDescriptorSetLayout layout{};
std::vector<vk::DescriptorSets> descriptors_allocations;
};

View file

@ -0,0 +1,36 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Pipeline {
public:
/// Add a reference count to the pipeline
void AddRef() noexcept {
++ref_count;
}
[[nodiscard]] bool RemoveRef() noexcept {
--ref_count;
return ref_count == 0;
}
[[nodiscard]] u64 UsageTick() const noexcept {
return usage_tick;
}
protected:
u64 usage_tick{};
private:
size_t ref_count{};
};
} // namespace Vulkan

View file

@ -12,6 +12,8 @@
#include "common/microprofile.h"
#include "core/core.h"
#include "core/memory.h"
#include "shader_recompiler/environment.h"
#include "shader_recompiler/recompiler.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
@ -22,43 +24,105 @@
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_cache.h"
#include "video_core/shader_notify.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
#pragma optimize("", off)
namespace Vulkan {
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
using Tegra::Engines::ShaderType;
namespace {
size_t StageFromProgram(size_t program) {
return program == 0 ? 0 : program - 1;
}
class Environment final : public Shader::Environment {
public:
explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_)
: kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {}
ShaderType StageFromProgram(Maxwell::ShaderProgram program) {
return static_cast<ShaderType>(StageFromProgram(static_cast<size_t>(program)));
}
~Environment() override = default;
ShaderType GetShaderType(Maxwell::ShaderProgram program) {
switch (program) {
case Maxwell::ShaderProgram::VertexB:
return ShaderType::Vertex;
case Maxwell::ShaderProgram::TesselationControl:
return ShaderType::TesselationControl;
case Maxwell::ShaderProgram::TesselationEval:
return ShaderType::TesselationEval;
case Maxwell::ShaderProgram::Geometry:
return ShaderType::Geometry;
case Maxwell::ShaderProgram::Fragment:
return ShaderType::Fragment;
default:
UNIMPLEMENTED_MSG("program={}", program);
return ShaderType::Vertex;
[[nodiscard]] std::optional<u128> Analyze(u32 start_address) {
const std::optional<u64> size{TryFindSize(start_address)};
if (!size) {
return std::nullopt;
}
cached_lowest = start_address;
cached_highest = start_address + static_cast<u32>(*size);
return Common::CityHash128(reinterpret_cast<const char*>(code.data()), code.size());
}
}
[[nodiscard]] size_t ShaderSize() const noexcept {
return read_highest - read_lowest + INST_SIZE;
}
[[nodiscard]] u128 ComputeHash() const {
const size_t size{ShaderSize()};
auto data = std::make_unique<u64[]>(size);
gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size);
return Common::CityHash128(reinterpret_cast<const char*>(data.get()), size);
}
u64 ReadInstruction(u32 address) override {
read_lowest = std::min(read_lowest, address);
read_highest = std::max(read_highest, address);
if (address >= cached_lowest && address < cached_highest) {
return code[address / INST_SIZE];
}
return gpu_memory.Read<u64>(program_base + address);
}
std::array<u32, 3> WorkgroupSize() override {
const auto& qmd{kepler_compute.launch_description};
return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
}
private:
static constexpr size_t INST_SIZE = sizeof(u64);
static constexpr size_t BLOCK_SIZE = 0x1000;
static constexpr size_t MAXIMUM_SIZE = 0x100000;
static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
std::optional<u64> TryFindSize(u32 start_address) {
GPUVAddr guest_addr = program_base + start_address;
size_t offset = 0;
size_t size = BLOCK_SIZE;
while (size <= MAXIMUM_SIZE) {
code.resize(size / INST_SIZE);
u64* const data = code.data() + offset / INST_SIZE;
gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE);
for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) {
const u64 inst = data[i / INST_SIZE];
if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
return offset + i;
}
}
guest_addr += BLOCK_SIZE;
size += BLOCK_SIZE;
offset += BLOCK_SIZE;
}
return std::nullopt;
}
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
GPUVAddr program_base;
u32 read_lowest = 0;
u32 read_highest = 0;
std::vector<u64> code;
u32 cached_lowest = std::numeric_limits<u32>::max();
u32 cached_highest = 0;
};
} // Anonymous namespace
size_t ComputePipelineCacheKey::Hash() const noexcept {
@ -70,35 +134,91 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
return std::memcmp(&rhs, this, sizeof *this) == 0;
}
Shader::Shader() = default;
Shader::~Shader() = default;
PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_,
VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
VKUpdateDescriptorQueue& update_descriptor_queue_)
: VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
: VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
update_descriptor_queue_} {}
PipelineCache::~PipelineCache() = default;
ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
ComputePipeline* PipelineCache::CurrentComputePipeline() {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
auto& entry = pair->second;
if (!is_cache_miss) {
return *entry;
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
const auto& qmd{kepler_compute.launch_description};
const GPUVAddr shader_addr{program_base + qmd.program_start};
const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
if (!cpu_shader_addr) {
return nullptr;
}
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
throw "Bad";
ShaderInfo* const shader{TryGet(*cpu_shader_addr)};
if (!shader) {
return CreateComputePipelineWithoutShader(*cpu_shader_addr);
}
const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)};
const auto [pair, is_new]{compute_cache.try_emplace(key)};
auto& pipeline{pair->second};
if (!is_new) {
return &pipeline;
}
pipeline = CreateComputePipeline(shader);
shader->compute_users.push_back(key);
return &pipeline;
}
void PipelineCache::OnShaderRemoval(Shader*) {}
ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) {
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
const auto& qmd{kepler_compute.launch_description};
Environment env{kepler_compute, gpu_memory, program_base};
if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) {
// TODO: Load from cache
}
const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)};
shader_info->unique_hash = env.ComputeHash();
shader_info->size_bytes = env.ShaderSize();
return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info,
BuildShader(device, code)};
}
ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) {
ShaderInfo shader;
ComputePipeline pipeline{CreateComputePipeline(&shader)};
const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)};
shader.compute_users.push_back(key);
pipeline.AddRef();
const size_t size_bytes{shader.size_bytes};
Register(std::make_unique<ShaderInfo>(std::move(shader)), shader_cpu_addr, size_bytes);
return &compute_cache.emplace(key, std::move(pipeline)).first->second;
}
ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const {
const auto& qmd{kepler_compute.launch_description};
return {
.unique_hash = unique_hash,
.shared_memory_size = qmd.shared_alloc,
.workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
};
}
void PipelineCache::OnShaderRemoval(ShaderInfo* shader) {
for (const ComputePipelineCacheKey& key : shader->compute_users) {
const auto it = compute_cache.find(key);
ASSERT(it != compute_cache.end());
Pipeline& pipeline = it->second;
if (pipeline.RemoveRef()) {
// Wait for the pipeline to be free of GPU usage before destroying it
scheduler.Wait(pipeline.UsageTick());
compute_cache.erase(it);
}
}
}
} // namespace Vulkan

View file

@ -36,7 +36,7 @@ class VKUpdateDescriptorQueue;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ComputePipelineCacheKey {
GPUVAddr shader;
u128 unique_hash;
u32 shared_memory_size;
std::array<u32, 3> workgroup_size;
@ -67,13 +67,13 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
namespace Vulkan {
class Shader {
public:
explicit Shader();
~Shader();
struct ShaderInfo {
u128 unique_hash{};
size_t size_bytes{};
std::vector<ComputePipelineCacheKey> compute_users;
};
class PipelineCache final : public VideoCommon::ShaderCache<Shader> {
class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> {
public:
explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
Tegra::Engines::Maxwell3D& maxwell3d,
@ -83,12 +83,18 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue);
~PipelineCache() override;
ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
[[nodiscard]] ComputePipeline* CurrentComputePipeline();
protected:
void OnShaderRemoval(Shader* shader) final;
void OnShaderRemoval(ShaderInfo* shader) override;
private:
ComputePipeline CreateComputePipeline(ShaderInfo* shader);
ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr);
ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const;
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
@ -99,13 +105,7 @@ private:
VKDescriptorPool& descriptor_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
std::unique_ptr<Shader> null_shader;
std::unique_ptr<Shader> null_kernel;
std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
std::mutex pipeline_cache;
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
std::unordered_map<ComputePipelineCacheKey, ComputePipeline> compute_cache;
};
} // namespace Vulkan

View file

@ -36,6 +36,8 @@
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
#pragma optimize("", off)
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@ -237,7 +239,26 @@ void RasterizerVulkan::Clear() {
}
void RasterizerVulkan::DispatchCompute() {
UNREACHABLE_MSG("Not implemented");
ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
if (!pipeline) {
return;
}
std::scoped_lock lock{buffer_cache.mutex};
update_descriptor_queue.Acquire();
pipeline->ConfigureBufferCache(buffer_cache);
const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()};
const auto& qmd{kepler_compute.launch_description};
const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
const VkPipeline pipeline_handle{pipeline->Handle()};
const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()};
scheduler.Record(
[pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0,
descriptor_set, nullptr);
cmdbuf.Dispatch(dim[0], dim[1], dim[2]);
});
}
void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {

View file

@ -21,7 +21,6 @@
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@ -150,8 +149,6 @@ private:
BlitImageHelper blit_image;
ASTCDecoderPass astc_decoder_pass;
GraphicsPipelineCacheKey graphics_key;
TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;

View file

@ -10,18 +10,16 @@
namespace Vulkan {
ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
: master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
ResourcePool::~ResourcePool() = default;
: master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
size_t ResourcePool::CommitResource() {
// Refresh semaphore to query updated results
master_semaphore.Refresh();
const u64 gpu_tick = master_semaphore.KnownGpuTick();
master_semaphore->Refresh();
const u64 gpu_tick = master_semaphore->KnownGpuTick();
const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
for (size_t iterator = begin; iterator < end; ++iterator) {
if (gpu_tick >= ticks[iterator]) {
ticks[iterator] = master_semaphore.CurrentTick();
ticks[iterator] = master_semaphore->CurrentTick();
return iterator;
}
}
@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() {
// Both searches failed, the pool is full; handle it.
const size_t free_resource = ManageOverflow();
ticks[free_resource] = master_semaphore.CurrentTick();
ticks[free_resource] = master_semaphore->CurrentTick();
found = free_resource;
}
}

View file

@ -18,8 +18,16 @@ class MasterSemaphore;
*/
class ResourcePool {
public:
explicit ResourcePool() = default;
explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
virtual ~ResourcePool();
virtual ~ResourcePool() = default;
ResourcePool& operator=(ResourcePool&&) noexcept = default;
ResourcePool(ResourcePool&&) noexcept = default;
ResourcePool& operator=(const ResourcePool&) = default;
ResourcePool(const ResourcePool&) = default;
protected:
size_t CommitResource();
@ -34,7 +42,7 @@ private:
/// Allocates a new page of resources.
void Grow();
MasterSemaphore& master_semaphore;
MasterSemaphore* master_semaphore{};
size_t grow_step = 0; ///< Number of new resources created after an overflow
size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
std::vector<u64> ticks; ///< Ticks for each resource