Vk Async pipeline compilation

This commit is contained in:
ameerj 2020-07-28 00:08:02 -04:00
parent db96034ea4
commit 6ac97405df
13 changed files with 182 additions and 20 deletions

View file

@ -382,6 +382,8 @@ bool VKDevice::Create() {
graphics_queue = logical.GetQueue(graphics_family);
present_queue = logical.GetQueue(present_family);
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
return true;
}

View file

@ -202,6 +202,10 @@ public:
return reported_extensions;
}
bool UseAsynchronousShaders() const {
return use_asynchronous_shaders;
}
/// Checks if the physical device is suitable.
static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface);
@ -251,6 +255,7 @@ private:
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
bool use_asynchronous_shaders{};
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.

View file

@ -29,7 +29,7 @@ void InnerFence::Queue() {
}
ASSERT(!event);
event = device.GetLogical().CreateEvent();
event = device.GetLogical().CreateNewEvent();
ticks = scheduler.Ticks();
scheduler.RequestOutsideRenderPassOperationContext();

View file

@ -84,9 +84,8 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche
update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
program)},
renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
key.renderpass_params,
program)} {}
renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)},
pipeline{CreatePipeline(key.renderpass_params, program)}, m_key{key} {}
VKGraphicsPipeline::~VKGraphicsPipeline() = default;

View file

@ -54,6 +54,10 @@ public:
return renderpass;
}
const GraphicsPipelineCacheKey& GetCacheKey() {
return m_key;
}
private:
vk::DescriptorSetLayout CreateDescriptorSetLayout(
vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
@ -82,6 +86,8 @@ private:
VkRenderPass renderpass;
vk::Pipeline pipeline;
const GraphicsPipelineCacheKey& m_key;
};
} // namespace Vulkan

View file

@ -205,7 +205,8 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
return last_shaders = shaders;
}
VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(
const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
if (last_graphics_pipeline && last_graphics_key == key) {
@ -213,11 +214,27 @@ VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineC
}
last_graphics_key = key;
if (device.UseAsynchronousShaders()) {
auto work = async_shaders.GetCompletedWork();
for (std::size_t i = 0; i < work.size(); ++i) {
auto& entry = graphics_cache.at(work[i].pipeline->GetCacheKey());
entry = std::move(work[i].pipeline);
}
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
if (is_cache_miss) {
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key.fixed_state);
async_shaders.QueueVulkanShader(this, bindings, program, key.renderpass_params,
key.padding, key.shaders, key.fixed_state);
}
return *(last_graphics_pipeline = graphics_cache.at(key).get());
}
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
auto& entry = pair->second;
if (is_cache_miss) {
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key);
const auto [program, bindings] = DecompileShaders(key.fixed_state);
entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, renderpass_cache, key,
bindings, program);
@ -312,8 +329,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) {
}
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const auto& fixed_state = key.fixed_state;
VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
auto& memory_manager = system.GPU().MemoryManager();
const auto& gpu = system.GPU().Maxwell3D();

View file

@ -22,6 +22,7 @@
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/async_shaders.h"
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
@ -152,16 +153,37 @@ public:
std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
VideoCommon::Shader::AsyncShaders& async_shaders);
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
const VKDevice& GetDevice() {
return device;
}
VKScheduler& GetScheduler() {
return scheduler;
}
VKDescriptorPool& GetDescriptorPool() {
return descriptor_pool;
}
VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() {
return update_descriptor_queue;
}
VKRenderPassCache& GetRenderpassCache() {
return renderpass_cache;
}
protected:
void OnShaderRemoval(Shader* shader) final;
private:
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
const GraphicsPipelineCacheKey& key);
const FixedPipelineState& fixed_state);
Core::System& system;
const VKDevice& device;
@ -177,6 +199,7 @@ private:
GraphicsPipelineCacheKey last_graphics_key;
VKGraphicsPipeline* last_graphics_pipeline = nullptr;
std::vector<std::unique_ptr<VKGraphicsPipeline>> duplicates;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
graphics_cache;

View file

@ -400,8 +400,25 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
sampler_cache(device),
fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} {
query_cache(system, *this, device, scheduler),
wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
scheduler.SetQueryCache(query_cache);
if (device.UseAsynchronousShaders()) {
// Max worker threads we should allow
constexpr auto MAX_THREADS = 2u;
// Amount of threads we should reserve for other parts of yuzu
constexpr auto RESERVED_THREADS = 6u;
// Get the amount of threads we can use(this can return zero)
const auto cpu_thread_count =
std::max(RESERVED_THREADS, std::thread::hardware_concurrency());
// Deduce how many "extra" threads we have to use.
const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS;
// Always allow at least 1 thread regardless of our settings
const auto max_worker_count = std::max(1u, max_threads_unused);
// Don't use more than MAX_THREADS
const auto worker_count = std::min(max_worker_count, MAX_THREADS);
async_shaders.AllocateWorkers(worker_count);
}
}
RasterizerVulkan::~RasterizerVulkan() = default;
@ -439,7 +456,13 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
key.renderpass_params = GetRenderPassParams(texceptions);
key.padding = 0;
auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
auto& pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
if (&pipeline == nullptr || pipeline.GetHandle() == VK_NULL_HANDLE) {
// Async graphics pipeline was not ready.
system.GPU().TickWork();
return;
}
scheduler.BindGraphicsPipeline(pipeline.GetHandle());
const auto renderpass = pipeline.GetRenderPass();

View file

@ -32,6 +32,7 @@
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/async_shaders.h"
namespace Core {
class System;
@ -136,6 +137,14 @@ public:
u32 pixel_stride) override;
void SetupDirtyFlags() override;
VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
return async_shaders;
}
const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
return async_shaders;
}
/// Maximum supported size that a constbuffer can have in bytes.
static constexpr std::size_t MaxConstbufferSize = 0x10000;
static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
@ -278,6 +287,7 @@ private:
VKMemoryManager& memory_manager;
StateTracker& state_tracker;
VKScheduler& scheduler;
VideoCommon::Shader::AsyncShaders async_shaders;
VKStagingBufferPool staging_pool;
VKDescriptorPool descriptor_pool;

View file

@ -644,7 +644,7 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
return ShaderModule(object, handle, *dld);
}
Event Device::CreateEvent() const {
Event Device::CreateNewEvent() const {
static constexpr VkEventCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
.pNext = nullptr,

View file

@ -721,7 +721,7 @@ public:
ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
Event CreateEvent() const;
Event CreateNewEvent() const;
SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;