vk_query_cache: Implement generic query cache on Vulkan

This commit is contained in:
ReinUsesLisp 2020-02-11 18:59:44 -03:00
parent c31382ced5
commit bcd348f238
11 changed files with 327 additions and 20 deletions

View file

@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
features.depthBiasClamp = true;
features.geometryShader = true;
features.tessellationShader = true;
features.occlusionQueryPrecise = true;
features.fragmentStoresAndAtomics = true;
features.shaderImageGatherExtended = true;
features.shaderStorageImageWriteWithoutFormat = true;
@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
bit8_storage.uniformAndStorageBuffer8BitAccess = true;
SetNext(next, bit8_storage);
vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset;
host_query_reset.hostQueryReset = true;
SetNext(next, host_query_reset);
vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
if (is_float16_supported) {
float16_int8.shaderFloat16 = true;
@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
};
std::bitset<required_extensions.size()> available_extensions{};
@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
std::make_pair(features.geometryShader, "geometryShader"),
std::make_pair(features.tessellationShader, "tessellationShader"),
std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
@ -376,7 +383,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
}
};
extensions.reserve(13);
extensions.reserve(14);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@ -384,6 +391,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);
[[maybe_unused]] const bool nsight =
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");

View file

@ -0,0 +1,122 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <utility>
#include <vector>
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan {
namespace {
constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion};
constexpr vk::QueryType GetTarget(VideoCore::QueryType type) {
return QUERY_TARGETS[static_cast<std::size_t>(type)];
}
} // Anonymous namespace
QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {}
QueryPool::~QueryPool() = default;
void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) {
device = &device_;
type = type_;
}
std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) {
std::size_t index;
do {
index = CommitResource(fence);
} while (usage[index]);
usage[index] = true;
return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)};
}
void QueryPool::Allocate(std::size_t begin, std::size_t end) {
usage.resize(end);
const auto dev = device->GetLogical();
const u32 size = static_cast<u32>(end - begin);
const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {});
pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader()));
}
void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) {
const auto it =
std::find_if(std::begin(pools), std::end(pools),
[query_pool = query.first](auto& pool) { return query_pool == *pool; });
ASSERT(it != std::end(pools));
const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
}
VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKScheduler& scheduler)
: VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
QueryPool>{system, rasterizer},
device{device}, scheduler{scheduler} {
for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));
}
}
VKQueryCache::~VKQueryCache() = default;
std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) {
return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence());
}
void VKQueryCache::Reserve(VideoCore::QueryType type,
std::pair<vk::QueryPool, std::uint32_t> query) {
query_pools[static_cast<std::size_t>(type)].Reserve(query);
}
HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
VideoCore::QueryType type)
: VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache},
type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} {
const auto dev = cache.Device().GetLogical();
cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) {
dev.resetQueryPoolEXT(query.first, query.second, 1, dld);
cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld);
});
}
HostCounter::~HostCounter() {
cache.Reserve(type, query);
}
void HostCounter::EndQuery() {
cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) {
cmdbuf.endQuery(query.first, query.second, dld);
});
}
u64 HostCounter::BlockingQuery() const {
if (ticks >= cache.Scheduler().Ticks()) {
cache.Scheduler().Flush();
}
const auto dev = cache.Device().GetLogical();
const auto& dld = cache.Device().GetDispatchLoader();
u64 value;
dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value),
vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld);
return value;
}
} // namespace Vulkan

View file

@ -0,0 +1,104 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <cstdint>
#include <memory>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/query_cache.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
namespace VideoCore {
class RasterizerInterface;
}
namespace Vulkan {
class CachedQuery;
class HostCounter;
class VKDevice;
class VKQueryCache;
class VKScheduler;
using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
class QueryPool final : public VKFencedPool {
public:
explicit QueryPool();
~QueryPool() override;
void Initialize(const VKDevice& device, VideoCore::QueryType type);
std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence);
void Reserve(std::pair<vk::QueryPool, std::uint32_t> query);
protected:
void Allocate(std::size_t begin, std::size_t end) override;
private:
static constexpr std::size_t GROW_STEP = 512;
const VKDevice* device = nullptr;
VideoCore::QueryType type = {};
std::vector<UniqueQueryPool> pools;
std::vector<bool> usage;
};
class VKQueryCache final
: public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
QueryPool> {
public:
explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKScheduler& scheduler);
~VKQueryCache();
std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type);
void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query);
const VKDevice& Device() const noexcept {
return device;
}
VKScheduler& Scheduler() const noexcept {
return scheduler;
}
private:
const VKDevice& device;
VKScheduler& scheduler;
};
class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
public:
explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
VideoCore::QueryType type);
~HostCounter();
void EndQuery();
private:
u64 BlockingQuery() const override;
VKQueryCache& cache;
const VideoCore::QueryType type;
const std::pair<vk::QueryPool, std::uint32_t> query;
const u64 ticks;
};
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
public:
explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr)
: VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {}
};
} // namespace Vulkan

View file

@ -289,7 +289,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
staging_pool),
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
sampler_cache(device) {}
sampler_cache(device), query_cache(system, *this, device, scheduler) {
scheduler.SetQueryCache(query_cache);
}
RasterizerVulkan::~RasterizerVulkan() = default;
@ -308,6 +310,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
FlushWork();
query_cache.UpdateCounters();
const auto& gpu = system.GPU().Maxwell3D();
GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
@ -362,6 +366,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
void RasterizerVulkan::Clear() {
MICROPROFILE_SCOPE(Vulkan_Clearing);
query_cache.UpdateCounters();
const auto& gpu = system.GPU().Maxwell3D();
if (!system.GPU().Maxwell3D().ShouldExecute()) {
return;
@ -429,6 +435,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
sampled_views.clear();
image_views.clear();
query_cache.UpdateCounters();
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
const ComputePipelineCacheKey key{
code_addr,
@ -471,17 +479,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
});
}
void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
query_cache.ResetCounter(type);
}
void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
std::optional<u64> timestamp) {
query_cache.Query(gpu_addr, type, timestamp);
}
void RasterizerVulkan::FlushAll() {}
void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
texture_cache.FlushRegion(addr, size);
buffer_cache.FlushRegion(addr, size);
query_cache.FlushRegion(addr, size);
}
void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
texture_cache.InvalidateRegion(addr, size);
pipeline_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {

View file

@ -24,6 +24,7 @@
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
@ -96,7 +97,7 @@ struct ImageView {
vk::ImageLayout* layout = nullptr;
};
class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
VKScreenInfo& screen_info, const VKDevice& device,
@ -108,6 +109,8 @@ public:
bool DrawMultiBatch(bool is_indexed) override;
void Clear() override;
void DispatchCompute(GPUVAddr code_addr) override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
@ -247,6 +250,7 @@ private:
VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache;
VKSamplerCache sampler_cache;
VKQueryCache query_cache;
std::array<View, Maxwell::NumRenderTargets> color_attachments;
View zeta_attachment;

View file

@ -6,6 +6,7 @@
#include "common/microprofile.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
}
void VKScheduler::AllocateNewContext() {
++ticks;
std::unique_lock lock{mutex};
current_fence = next_fence;
next_fence = &resource_manager.CommitFence();
@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() {
current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
device.GetDispatchLoader());
// Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
query_cache->UpdateCounters();
}
}
void VKScheduler::InvalidateState() {
@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() {
}
void VKScheduler::EndPendingOperations() {
query_cache->DisableStreams();
EndRenderPass();
}

View file

@ -4,6 +4,7 @@
#pragma once
#include <atomic>
#include <condition_variable>
#include <memory>
#include <optional>
@ -18,6 +19,7 @@ namespace Vulkan {
class VKDevice;
class VKFence;
class VKQueryCache;
class VKResourceManager;
class VKFenceView {
@ -67,6 +69,11 @@ public:
/// Binds a pipeline to the current execution context.
void BindGraphicsPipeline(vk::Pipeline pipeline);
/// Assigns the query cache.
void SetQueryCache(VKQueryCache& query_cache_) {
query_cache = &query_cache_;
}
/// Returns true when viewports have been set in the current command buffer.
bool TouchViewports() {
return std::exchange(state.viewports, true);
@ -112,6 +119,11 @@ public:
return current_fence;
}
/// Returns the current command buffer tick.
u64 Ticks() const {
return ticks;
}
private:
class Command {
public:
@ -205,6 +217,8 @@ private:
const VKDevice& device;
VKResourceManager& resource_manager;
VKQueryCache* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf;
VKFence* current_fence = nullptr;
VKFence* next_fence = nullptr;
@ -227,6 +241,7 @@ private:
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
std::mutex mutex;
std::condition_variable cv;
std::atomic<u64> ticks = 0;
bool quit = false;
};