Merge pull request #11225 from FernandoS27/no-laxatives-in-santas-cookies
Y.F.C: Rework the Query Cache.
This commit is contained in:
commit
854457a392
45 changed files with 3571 additions and 384 deletions
|
@ -61,6 +61,9 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
|
|||
if (device.IsExtTransformFeedbackSupported()) {
|
||||
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
|
||||
}
|
||||
if (device.IsExtConditionalRendering()) {
|
||||
flags |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT;
|
||||
}
|
||||
const VkBufferCreateInfo buffer_ci = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
|
|
|
@ -12,6 +12,9 @@
|
|||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "video_core/host_shaders/astc_decoder_comp_spv.h"
|
||||
#include "video_core/host_shaders/queries_prefix_scan_sum_comp_spv.h"
|
||||
#include "video_core/host_shaders/queries_prefix_scan_sum_nosubgroups_comp_spv.h"
|
||||
#include "video_core/host_shaders/resolve_conditional_render_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
|
@ -57,6 +60,30 @@ constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SE
|
|||
},
|
||||
}};
|
||||
|
||||
constexpr std::array<VkDescriptorSetLayoutBinding, 3> QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS{{
|
||||
{
|
||||
.binding = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
{
|
||||
.binding = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
{
|
||||
.binding = 2,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
}};
|
||||
|
||||
constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
|
||||
.uniform_buffers = 0,
|
||||
.storage_buffers = 2,
|
||||
|
@ -67,6 +94,16 @@ constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
|
|||
.score = 2,
|
||||
};
|
||||
|
||||
constexpr DescriptorBankInfo QUERIES_SCAN_BANK_INFO{
|
||||
.uniform_buffers = 0,
|
||||
.storage_buffers = 3,
|
||||
.texture_buffers = 0,
|
||||
.image_buffers = 0,
|
||||
.textures = 0,
|
||||
.images = 0,
|
||||
.score = 3,
|
||||
};
|
||||
|
||||
constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCRIPTOR_SET_BINDINGS{{
|
||||
{
|
||||
.binding = ASTC_BINDING_INPUT_BUFFER,
|
||||
|
@ -103,6 +140,15 @@ constexpr VkDescriptorUpdateTemplateEntry INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLAT
|
|||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
};
|
||||
|
||||
constexpr VkDescriptorUpdateTemplateEntry QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE{
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 3,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.offset = 0,
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
};
|
||||
|
||||
constexpr std::array<VkDescriptorUpdateTemplateEntry, ASTC_NUM_BINDINGS>
|
||||
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
|
||||
{
|
||||
|
@ -131,13 +177,21 @@ struct AstcPushConstants {
|
|||
u32 block_height;
|
||||
u32 block_height_mask;
|
||||
};
|
||||
|
||||
struct QueriesPrefixScanPushConstants {
|
||||
u32 min_accumulation_base;
|
||||
u32 max_accumulation_base;
|
||||
u32 accumulation_limit;
|
||||
u32 buffer_offset;
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
|
||||
vk::Span<VkDescriptorSetLayoutBinding> bindings,
|
||||
vk::Span<VkDescriptorUpdateTemplateEntry> templates,
|
||||
const DescriptorBankInfo& bank_info,
|
||||
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
|
||||
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code,
|
||||
std::optional<u32> optional_subgroup_size)
|
||||
: device{device_} {
|
||||
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
|
@ -178,13 +232,19 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
|
|||
.pCode = code.data(),
|
||||
});
|
||||
device.SaveShader(code);
|
||||
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
.requiredSubgroupSize = optional_subgroup_size ? *optional_subgroup_size : 32U,
|
||||
};
|
||||
bool use_setup_size = device.IsExtSubgroupSizeControlSupported() && optional_subgroup_size;
|
||||
pipeline = device.GetLogical().CreateComputePipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.pNext = use_setup_size ? &subgroup_size_ci : nullptr,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = *module,
|
||||
|
@ -302,6 +362,123 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
|
|||
return {staging.buffer, staging.offset};
|
||||
}
|
||||
|
||||
ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(
|
||||
const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
|
||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue_)
|
||||
: ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
|
||||
INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr,
|
||||
RESOLVE_CONDITIONAL_RENDER_COMP_SPV),
|
||||
scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {}
|
||||
|
||||
void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||
u32 src_offset, bool compare_to_zero) {
|
||||
const size_t compare_size = compare_to_zero ? 8 : 24;
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, compare_size);
|
||||
compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, sizeof(u32));
|
||||
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
|
||||
static constexpr VkMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
};
|
||||
static constexpr VkMemoryBarrier write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT,
|
||||
};
|
||||
const VkDescriptorSet set = descriptor_allocator.Commit();
|
||||
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, read_barrier);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
|
||||
cmdbuf.Dispatch(1, 1, 1);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0, write_barrier);
|
||||
});
|
||||
}
|
||||
|
||||
QueriesPrefixScanPass::QueriesPrefixScanPass(
|
||||
const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
|
||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue_)
|
||||
: ComputePass(
|
||||
device_, descriptor_pool_, QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS,
|
||||
QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE, QUERIES_SCAN_BANK_INFO,
|
||||
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(QueriesPrefixScanPushConstants)>,
|
||||
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_BASIC_BIT) &&
|
||||
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) &&
|
||||
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_BIT) &&
|
||||
device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT)
|
||||
? std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_COMP_SPV)
|
||||
: std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_NOSUBGROUPS_COMP_SPV)),
|
||||
scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {}
|
||||
|
||||
void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer,
|
||||
VkBuffer src_buffer, size_t number_of_sums,
|
||||
size_t min_accumulation_limit, size_t max_accumulation_limit) {
|
||||
size_t current_runs = number_of_sums;
|
||||
size_t offset = 0;
|
||||
while (current_runs != 0) {
|
||||
static constexpr size_t DISPATCH_SIZE = 2048U;
|
||||
size_t runs_to_do = std::min<size_t>(current_runs, DISPATCH_SIZE);
|
||||
current_runs -= runs_to_do;
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.AddBuffer(src_buffer, 0, number_of_sums * sizeof(u64));
|
||||
compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, number_of_sums * sizeof(u64));
|
||||
compute_pass_descriptor_queue.AddBuffer(accumulation_buffer, 0, sizeof(u64));
|
||||
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
|
||||
size_t used_offset = offset;
|
||||
offset += runs_to_do;
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([this, descriptor_data, min_accumulation_limit, max_accumulation_limit,
|
||||
runs_to_do, used_offset](vk::CommandBuffer cmdbuf) {
|
||||
static constexpr VkMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
};
|
||||
static constexpr VkMemoryBarrier write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT |
|
||||
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
|
||||
VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT |
|
||||
VK_ACCESS_UNIFORM_READ_BIT |
|
||||
VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT,
|
||||
};
|
||||
const QueriesPrefixScanPushConstants uniforms{
|
||||
.min_accumulation_base = static_cast<u32>(min_accumulation_limit),
|
||||
.max_accumulation_base = static_cast<u32>(max_accumulation_limit),
|
||||
.accumulation_limit = static_cast<u32>(runs_to_do - 1),
|
||||
.buffer_offset = static_cast<u32>(used_offset),
|
||||
};
|
||||
const VkDescriptorSet set = descriptor_allocator.Commit();
|
||||
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, read_barrier);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
|
||||
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
|
||||
cmdbuf.Dispatch(1, 1, 1);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0,
|
||||
write_barrier);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
|
||||
|
@ -31,7 +32,8 @@ public:
|
|||
vk::Span<VkDescriptorSetLayoutBinding> bindings,
|
||||
vk::Span<VkDescriptorUpdateTemplateEntry> templates,
|
||||
const DescriptorBankInfo& bank_info,
|
||||
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
|
||||
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code,
|
||||
std::optional<u32> optional_subgroup_size = std::nullopt);
|
||||
~ComputePass();
|
||||
|
||||
protected:
|
||||
|
@ -82,6 +84,33 @@ private:
|
|||
ComputePassDescriptorQueue& compute_pass_descriptor_queue;
|
||||
};
|
||||
|
||||
class ConditionalRenderingResolvePass final : public ComputePass {
|
||||
public:
|
||||
explicit ConditionalRenderingResolvePass(
|
||||
const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
|
||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue_);
|
||||
|
||||
void Resolve(VkBuffer dst_buffer, VkBuffer src_buffer, u32 src_offset, bool compare_to_zero);
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue;
|
||||
};
|
||||
|
||||
class QueriesPrefixScanPass final : public ComputePass {
|
||||
public:
|
||||
explicit QueriesPrefixScanPass(const Device& device_, Scheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue_);
|
||||
|
||||
void Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer, VkBuffer src_buffer,
|
||||
size_t number_of_sums, size_t min_accumulation_limit, size_t max_accumulation_limit);
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue;
|
||||
};
|
||||
|
||||
class ASTCDecoderPass final : public ComputePass {
|
||||
public:
|
||||
explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include "video_core/fence_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
|
||||
namespace Core {
|
||||
|
@ -20,7 +21,6 @@ class RasterizerInterface;
|
|||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class QueryCache;
|
||||
class Scheduler;
|
||||
|
||||
class InnerFence : public VideoCommon::FenceBase {
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,101 +1,75 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
#include "video_core/query_cache/query_cache_base.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace VideoCommon {
|
||||
class StreamerInterface;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class CachedQuery;
|
||||
class Device;
|
||||
class HostCounter;
|
||||
class QueryCache;
|
||||
class Scheduler;
|
||||
class StagingBufferPool;
|
||||
|
||||
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
|
||||
struct QueryCacheRuntimeImpl;
|
||||
|
||||
class QueryPool final : public ResourcePool {
|
||||
class QueryCacheRuntime {
|
||||
public:
|
||||
explicit QueryPool(const Device& device, Scheduler& scheduler, VideoCore::QueryType type);
|
||||
~QueryPool() override;
|
||||
explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
|
||||
Core::Memory::Memory& cpu_memory_,
|
||||
Vulkan::BufferCache& buffer_cache_, const Device& device_,
|
||||
const MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
|
||||
StagingBufferPool& staging_pool_,
|
||||
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
|
||||
DescriptorPool& descriptor_pool);
|
||||
~QueryCacheRuntime();
|
||||
|
||||
std::pair<VkQueryPool, u32> Commit();
|
||||
template <typename SyncValuesType>
|
||||
void SyncValues(std::span<SyncValuesType> values, VkBuffer base_src_buffer = nullptr);
|
||||
|
||||
void Reserve(std::pair<VkQueryPool, u32> query);
|
||||
void Barriers(bool is_prebarrier);
|
||||
|
||||
protected:
|
||||
void Allocate(std::size_t begin, std::size_t end) override;
|
||||
void EndHostConditionalRendering();
|
||||
|
||||
void PauseHostConditionalRendering();
|
||||
|
||||
void ResumeHostConditionalRendering();
|
||||
|
||||
bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty);
|
||||
|
||||
bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1,
|
||||
VideoCommon::LookupData object_2, bool qc_dirty,
|
||||
bool equal_check);
|
||||
|
||||
VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type);
|
||||
|
||||
void Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d);
|
||||
|
||||
template <typename Func>
|
||||
void View3DRegs(Func&& func);
|
||||
|
||||
private:
|
||||
static constexpr std::size_t GROW_STEP = 512;
|
||||
|
||||
const Device& device;
|
||||
const VideoCore::QueryType type;
|
||||
|
||||
std::vector<vk::QueryPool> pools;
|
||||
std::vector<bool> usage;
|
||||
void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal);
|
||||
void HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal);
|
||||
friend struct QueryCacheRuntimeImpl;
|
||||
std::unique_ptr<QueryCacheRuntimeImpl> impl;
|
||||
};
|
||||
|
||||
class QueryCache final
|
||||
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
|
||||
public:
|
||||
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Core::Memory::Memory& cpu_memory_, const Device& device_,
|
||||
Scheduler& scheduler_);
|
||||
~QueryCache();
|
||||
|
||||
std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
|
||||
|
||||
void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query);
|
||||
|
||||
const Device& GetDevice() const noexcept {
|
||||
return device;
|
||||
}
|
||||
|
||||
Scheduler& GetScheduler() const noexcept {
|
||||
return scheduler;
|
||||
}
|
||||
|
||||
private:
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
|
||||
struct QueryCacheParams {
|
||||
using RuntimeType = typename Vulkan::QueryCacheRuntime;
|
||||
};
|
||||
|
||||
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
|
||||
public:
|
||||
explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
|
||||
VideoCore::QueryType type_);
|
||||
~HostCounter();
|
||||
|
||||
void EndQuery();
|
||||
|
||||
private:
|
||||
u64 BlockingQuery(bool async = false) const override;
|
||||
|
||||
QueryCache& cache;
|
||||
const VideoCore::QueryType type;
|
||||
const std::pair<VkQueryPool, u32> query;
|
||||
const u64 tick;
|
||||
};
|
||||
|
||||
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
|
||||
public:
|
||||
explicit CachedQuery(QueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_)
|
||||
: CachedQueryBase{cpu_addr_, host_ptr_} {}
|
||||
};
|
||||
using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>;
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
|
@ -170,9 +171,11 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
|||
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
|
||||
guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool),
|
||||
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
|
||||
query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler,
|
||||
staging_pool, compute_pass_descriptor_queue, descriptor_pool),
|
||||
query_cache(gpu, *this, cpu_memory_, query_cache_runtime),
|
||||
pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue,
|
||||
render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
|
||||
query_cache{*this, cpu_memory_, device, scheduler},
|
||||
accelerate_dma(buffer_cache, texture_cache, scheduler),
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
|
||||
wfi_event(device.GetLogical().CreateEvent()) {
|
||||
|
@ -189,14 +192,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
|
|||
FlushWork();
|
||||
gpu_memory->FlushCaching();
|
||||
|
||||
#if ANDROID
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
// This is problematic on Android, disable on GPU Normal.
|
||||
query_cache.UpdateCounters();
|
||||
}
|
||||
#else
|
||||
query_cache.UpdateCounters();
|
||||
#endif
|
||||
query_cache.NotifySegment(true);
|
||||
|
||||
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
|
||||
if (!pipeline) {
|
||||
|
@ -207,13 +203,12 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
|
|||
pipeline->SetEngine(maxwell3d, gpu_memory);
|
||||
pipeline->Configure(is_indexed);
|
||||
|
||||
BeginTransformFeedback();
|
||||
|
||||
UpdateDynamicStates();
|
||||
|
||||
HandleTransformFeedback();
|
||||
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
|
||||
maxwell3d->regs.zpass_pixel_count_enable);
|
||||
draw_func();
|
||||
|
||||
EndTransformFeedback();
|
||||
}
|
||||
|
||||
void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
|
||||
|
@ -241,6 +236,14 @@ void RasterizerVulkan::DrawIndirect() {
|
|||
const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer();
|
||||
const auto& buffer = indirect_buffer.first;
|
||||
const auto& offset = indirect_buffer.second;
|
||||
if (params.is_byte_count) {
|
||||
scheduler.Record([buffer_obj = buffer->Handle(), offset,
|
||||
stride = params.stride](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.DrawIndirectByteCountEXT(1, 0, buffer_obj, offset, 0,
|
||||
static_cast<u32>(stride));
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (params.include_count) {
|
||||
const auto count = buffer_cache.GetDrawIndirectCount();
|
||||
const auto& draw_buffer = count.first;
|
||||
|
@ -280,20 +283,15 @@ void RasterizerVulkan::DrawTexture() {
|
|||
SCOPE_EXIT({ gpu.TickWork(); });
|
||||
FlushWork();
|
||||
|
||||
#if ANDROID
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
// This is problematic on Android, disable on GPU Normal.
|
||||
query_cache.UpdateCounters();
|
||||
}
|
||||
#else
|
||||
query_cache.UpdateCounters();
|
||||
#endif
|
||||
query_cache.NotifySegment(true);
|
||||
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
|
||||
UpdateDynamicStates();
|
||||
|
||||
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
|
||||
maxwell3d->regs.zpass_pixel_count_enable);
|
||||
const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
|
||||
const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
|
||||
const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
|
||||
|
@ -316,14 +314,9 @@ void RasterizerVulkan::Clear(u32 layer_count) {
|
|||
FlushWork();
|
||||
gpu_memory->FlushCaching();
|
||||
|
||||
#if ANDROID
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
// This is problematic on Android, disable on GPU Normal.
|
||||
query_cache.UpdateCounters();
|
||||
}
|
||||
#else
|
||||
query_cache.UpdateCounters();
|
||||
#endif
|
||||
query_cache.NotifySegment(true);
|
||||
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
|
||||
maxwell3d->regs.zpass_pixel_count_enable);
|
||||
|
||||
auto& regs = maxwell3d->regs;
|
||||
const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B ||
|
||||
|
@ -482,13 +475,13 @@ void RasterizerVulkan::DispatchCompute() {
|
|||
scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
|
||||
}
|
||||
|
||||
void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
|
||||
query_cache.ResetCounter(type);
|
||||
void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) {
|
||||
query_cache.CounterReset(type);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
|
||||
std::optional<u64> timestamp) {
|
||||
query_cache.Query(gpu_addr, type, timestamp);
|
||||
void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
||||
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
|
||||
query_cache.CounterReport(gpu_addr, type, flags, payload, subreport);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
|
||||
|
@ -669,8 +662,8 @@ void RasterizerVulkan::SignalReference() {
|
|||
fence_manager.SignalReference();
|
||||
}
|
||||
|
||||
void RasterizerVulkan::ReleaseFences() {
|
||||
fence_manager.WaitPendingFences();
|
||||
void RasterizerVulkan::ReleaseFences(bool force) {
|
||||
fence_manager.WaitPendingFences(force);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size,
|
||||
|
@ -694,6 +687,8 @@ void RasterizerVulkan::WaitForIdle() {
|
|||
flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
|
||||
}
|
||||
|
||||
query_cache.NotifyWFI();
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.SetEvent(event, flags);
|
||||
|
@ -737,19 +732,7 @@ void RasterizerVulkan::TickFrame() {
|
|||
|
||||
bool RasterizerVulkan::AccelerateConditionalRendering() {
|
||||
gpu_memory->FlushCaching();
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
// TODO(Blinkhawk): Reimplement Host conditional rendering.
|
||||
return false;
|
||||
}
|
||||
// Medium / Low Hack: stub any checks on queries written into the buffer cache.
|
||||
const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
|
||||
Maxwell::ReportSemaphore::Compare cmp;
|
||||
if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
|
||||
VideoCommon::CacheType::BufferCache |
|
||||
VideoCommon::CacheType::QueryCache)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return query_cache.AccelerateHostConditionalRendering();
|
||||
}
|
||||
|
||||
bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
|
@ -795,6 +778,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
|||
if (!image_view) {
|
||||
return false;
|
||||
}
|
||||
query_cache.NotifySegment(false);
|
||||
screen_info.image = image_view->ImageHandle();
|
||||
screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D);
|
||||
screen_info.width = image_view->size.width;
|
||||
|
@ -933,31 +917,18 @@ void RasterizerVulkan::UpdateDynamicStates() {
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::BeginTransformFeedback() {
|
||||
void RasterizerVulkan::HandleTransformFeedback() {
|
||||
const auto& regs = maxwell3d->regs;
|
||||
if (regs.transform_feedback_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
if (!device.IsExtTransformFeedbackSupported()) {
|
||||
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
|
||||
return;
|
||||
}
|
||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation));
|
||||
scheduler.Record(
|
||||
[](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); });
|
||||
}
|
||||
|
||||
void RasterizerVulkan::EndTransformFeedback() {
|
||||
const auto& regs = maxwell3d->regs;
|
||||
if (regs.transform_feedback_enabled == 0) {
|
||||
return;
|
||||
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount,
|
||||
regs.transform_feedback_enabled);
|
||||
if (regs.transform_feedback_enabled != 0) {
|
||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation));
|
||||
}
|
||||
if (!device.IsExtTransformFeedbackSupported()) {
|
||||
return;
|
||||
}
|
||||
scheduler.Record(
|
||||
[](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
|
||||
}
|
||||
|
||||
void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
|
|
|
@ -84,8 +84,9 @@ public:
|
|||
void DrawTexture() override;
|
||||
void Clear(u32 layer_count) override;
|
||||
void DispatchCompute() override;
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||
void ResetCounter(VideoCommon::QueryType type) override;
|
||||
void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
||||
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
|
||||
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
|
||||
void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
|
||||
void FlushAll() override;
|
||||
|
@ -106,7 +107,7 @@ public:
|
|||
void SyncOperation(std::function<void()>&& func) override;
|
||||
void SignalSyncPoint(u32 value) override;
|
||||
void SignalReference() override;
|
||||
void ReleaseFences() override;
|
||||
void ReleaseFences(bool force = true) override;
|
||||
void FlushAndInvalidateRegion(
|
||||
VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
|
||||
void WaitForIdle() override;
|
||||
|
@ -146,9 +147,7 @@ private:
|
|||
|
||||
void UpdateDynamicStates();
|
||||
|
||||
void BeginTransformFeedback();
|
||||
|
||||
void EndTransformFeedback();
|
||||
void HandleTransformFeedback();
|
||||
|
||||
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
@ -195,8 +194,9 @@ private:
|
|||
TextureCache texture_cache;
|
||||
BufferCacheRuntime buffer_cache_runtime;
|
||||
BufferCache buffer_cache;
|
||||
PipelineCache pipeline_cache;
|
||||
QueryCacheRuntime query_cache_runtime;
|
||||
QueryCache query_cache;
|
||||
PipelineCache pipeline_cache;
|
||||
AccelerateDMA accelerate_dma;
|
||||
FenceManager fence_manager;
|
||||
|
||||
|
|
|
@ -243,10 +243,10 @@ void Scheduler::AllocateNewContext() {
|
|||
#if ANDROID
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
// This is problematic on Android, disable on GPU Normal.
|
||||
query_cache->UpdateCounters();
|
||||
query_cache->NotifySegment(true);
|
||||
}
|
||||
#else
|
||||
query_cache->UpdateCounters();
|
||||
query_cache->NotifySegment(true);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -261,11 +261,12 @@ void Scheduler::EndPendingOperations() {
|
|||
#if ANDROID
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
// This is problematic on Android, disable on GPU Normal.
|
||||
query_cache->DisableStreams();
|
||||
// query_cache->DisableStreams();
|
||||
}
|
||||
#else
|
||||
query_cache->DisableStreams();
|
||||
// query_cache->DisableStreams();
|
||||
#endif
|
||||
query_cache->NotifySegment(false);
|
||||
EndRenderPass();
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,11 @@
|
|||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
template <typename Trait>
|
||||
class QueryCacheBase;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class CommandPool;
|
||||
|
@ -24,7 +29,8 @@ class Device;
|
|||
class Framebuffer;
|
||||
class GraphicsPipeline;
|
||||
class StateTracker;
|
||||
class QueryCache;
|
||||
|
||||
struct QueryCacheParams;
|
||||
|
||||
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
|
||||
/// OpenGL-like operations on Vulkan command buffers.
|
||||
|
@ -63,7 +69,7 @@ public:
|
|||
void InvalidateState();
|
||||
|
||||
/// Assigns the query cache.
|
||||
void SetQueryCache(QueryCache& query_cache_) {
|
||||
void SetQueryCache(VideoCommon::QueryCacheBase<QueryCacheParams>& query_cache_) {
|
||||
query_cache = &query_cache_;
|
||||
}
|
||||
|
||||
|
@ -219,7 +225,7 @@ private:
|
|||
std::unique_ptr<MasterSemaphore> master_semaphore;
|
||||
std::unique_ptr<CommandPool> command_pool;
|
||||
|
||||
QueryCache* query_cache = nullptr;
|
||||
VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
|
||||
|
||||
vk::CommandBuffer current_cmdbuf;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue