Merge pull request #3677 from FernandoS27/better-sync

Introduce Predictive Flushing and Improve ASYNC GPU
This commit is contained in:
bunnei 2020-04-22 22:09:38 -04:00 committed by GitHub
commit bf2ddb8fd5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
41 changed files with 1193 additions and 63 deletions

View file

@ -0,0 +1,101 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <thread>
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed)
: VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {}
InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
u32 payload, bool is_stubbed)
: VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {}
InnerFence::~InnerFence() = default;
void InnerFence::Queue() {
if (is_stubbed) {
return;
}
ASSERT(!event);
event = device.GetLogical().CreateEvent();
ticks = scheduler.Ticks();
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
});
}
bool InnerFence::IsSignaled() const {
if (is_stubbed) {
return true;
}
ASSERT(event);
return IsEventSignalled();
}
void InnerFence::Wait() {
if (is_stubbed) {
return;
}
ASSERT(event);
if (ticks >= scheduler.Ticks()) {
scheduler.Flush();
}
while (!IsEventSignalled()) {
std::this_thread::yield();
}
}
bool InnerFence::IsEventSignalled() const {
switch (const VkResult result = event.GetStatus()) {
case VK_EVENT_SET:
return true;
case VK_EVENT_RESET:
return false;
default:
throw vk::Exception(result);
}
}
VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
VKQueryCache& query_cache)
: GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache),
device{device}, scheduler{scheduler} {}
Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
}
Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed);
}
void VKFenceManager::QueueFence(Fence& fence) {
fence->Queue();
}
bool VKFenceManager::IsFenceSignaled(Fence& fence) const {
return fence->IsSignaled();
}
void VKFenceManager::WaitFence(Fence& fence) {
fence->Wait();
}
} // namespace Vulkan

View file

@ -0,0 +1,74 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "video_core/fence_manager.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Vulkan {
class VKBufferCache;
class VKDevice;
class VKQueryCache;
class VKScheduler;
class VKTextureCache;
class InnerFence : public VideoCommon::FenceBase {
public:
explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload,
bool is_stubbed);
explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
u32 payload, bool is_stubbed);
~InnerFence();
void Queue();
bool IsSignaled() const;
void Wait();
private:
bool IsEventSignalled() const;
const VKDevice& device;
VKScheduler& scheduler;
vk::Event event;
u64 ticks = 0;
};
using Fence = std::shared_ptr<InnerFence>;
using GenericFenceManager =
VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>;
class VKFenceManager final : public GenericFenceManager {
public:
explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
VKQueryCache& query_cache);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
private:
const VKDevice& device;
VKScheduler& scheduler;
};
} // namespace Vulkan

View file

@ -207,7 +207,7 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
const GPUVAddr program_addr{GetShaderAddress(system, program)};
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
if (!shader) {
const auto host_ptr{memory_manager.GetPointer(program_addr)};
@ -218,7 +218,11 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
std::move(code), stage_offset);
Register(shader);
if (cpu_addr) {
Register(shader);
} else {
null_shader = shader;
}
}
shaders[index] = std::move(shader);
}
@ -261,7 +265,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
if (!shader) {
// No shader found - create a new one
const auto host_ptr = memory_manager.GetPointer(program_addr);
@ -271,7 +275,11 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
program_addr, *cpu_addr, std::move(code),
kernel_main_offset);
Register(shader);
if (cpu_addr) {
Register(shader);
} else {
null_kernel = shader;
}
}
Specialization specialization;

View file

@ -182,6 +182,9 @@ private:
VKUpdateDescriptorQueue& update_descriptor_queue;
VKRenderPassCache& renderpass_cache;
Shader null_shader{};
Shader null_kernel{};
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
GraphicsPipelineCacheKey last_graphics_key;

View file

@ -17,6 +17,7 @@
#include "common/microprofile.h"
#include "core/core.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
@ -299,7 +300,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
renderpass_cache),
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
sampler_cache(device), query_cache(system, *this, device, scheduler) {
sampler_cache(device),
fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
query_cache(system, *this, device, scheduler) {
scheduler.SetQueryCache(query_cache);
}
@ -360,6 +363,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
});
EndTransformFeedback();
system.GPU().TickWork();
}
void RasterizerVulkan::Clear() {
@ -504,6 +509,13 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
query_cache.FlushRegion(addr, size);
}
bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
if (!Settings::IsGPULevelHigh()) {
return buffer_cache.MustFlushRegion(addr, size);
}
return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
}
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
@ -514,6 +526,47 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
query_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
texture_cache.OnCPUWrite(addr, size);
pipeline_cache.InvalidateRegion(addr, size);
buffer_cache.OnCPUWrite(addr, size);
query_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::SyncGuestHost() {
texture_cache.SyncGuestHost();
buffer_cache.SyncGuestHost();
}
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.MemoryManager().Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
}
void RasterizerVulkan::SignalSyncPoint(u32 value) {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;
}
fence_manager.SignalSyncPoint(value);
}
void RasterizerVulkan::ReleaseFences() {
auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
return;
}
fence_manager.WaitPendingFences();
}
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
FlushRegion(addr, size);
InvalidateRegion(addr, size);

View file

@ -21,6 +21,7 @@
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
@ -118,7 +119,13 @@ public:
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushCommands() override;
void TickFrame() override;
@ -261,6 +268,7 @@ private:
VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache;
VKSamplerCache sampler_cache;
VKFenceManager fence_manager;
VKQueryCache query_cache;
std::array<View, Maxwell::NumRenderTargets> color_attachments;

View file

@ -63,6 +63,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdSetBlendConstants);
X(vkCmdSetDepthBias);
X(vkCmdSetDepthBounds);
X(vkCmdSetEvent);
X(vkCmdSetScissor);
X(vkCmdSetStencilCompareMask);
X(vkCmdSetStencilReference);
@ -75,6 +76,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCreateDescriptorPool);
X(vkCreateDescriptorSetLayout);
X(vkCreateDescriptorUpdateTemplateKHR);
X(vkCreateEvent);
X(vkCreateFence);
X(vkCreateFramebuffer);
X(vkCreateGraphicsPipelines);
@ -93,6 +95,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkDestroyDescriptorPool);
X(vkDestroyDescriptorSetLayout);
X(vkDestroyDescriptorUpdateTemplateKHR);
X(vkDestroyEvent);
X(vkDestroyFence);
X(vkDestroyFramebuffer);
X(vkDestroyImage);
@ -112,6 +115,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkFreeMemory);
X(vkGetBufferMemoryRequirements);
X(vkGetDeviceQueue);
X(vkGetEventStatus);
X(vkGetFenceStatus);
X(vkGetImageMemoryRequirements);
X(vkGetQueryPoolResults);
@ -269,6 +273,10 @@ void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld)
dld.vkFreeMemory(device, handle, nullptr);
}
void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyEvent(device, handle, nullptr);
}
void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept {
dld.vkDestroyFence(device, handle, nullptr);
}
@ -599,6 +607,16 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
return ShaderModule(object, handle, *dld);
}
Event Device::CreateEvent() const {
VkEventCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
ci.pNext = nullptr;
ci.flags = 0;
VkEvent object;
Check(dld->vkCreateEvent(handle, &ci, nullptr, &object));
return Event(object, handle, *dld);
}
SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const {
VkSwapchainKHR object;
Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object));

View file

@ -199,6 +199,7 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants;
PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
PFN_vkCmdSetEvent vkCmdSetEvent;
PFN_vkCmdSetScissor vkCmdSetScissor;
PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask;
PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
@ -211,6 +212,7 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkCreateDescriptorPool vkCreateDescriptorPool;
PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout;
PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
PFN_vkCreateEvent vkCreateEvent;
PFN_vkCreateFence vkCreateFence;
PFN_vkCreateFramebuffer vkCreateFramebuffer;
PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines;
@ -229,6 +231,7 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool;
PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout;
PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
PFN_vkDestroyEvent vkDestroyEvent;
PFN_vkDestroyFence vkDestroyFence;
PFN_vkDestroyFramebuffer vkDestroyFramebuffer;
PFN_vkDestroyImage vkDestroyImage;
@ -248,6 +251,7 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkFreeMemory vkFreeMemory;
PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
PFN_vkGetDeviceQueue vkGetDeviceQueue;
PFN_vkGetEventStatus vkGetEventStatus;
PFN_vkGetFenceStatus vkGetFenceStatus;
PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
@ -279,6 +283,7 @@ void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkEvent, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept;
void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept;
@ -648,6 +653,15 @@ public:
std::vector<VkImage> GetImages() const;
};
class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> {
using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle;
public:
VkResult GetStatus() const noexcept {
return dld->vkGetEventStatus(owner, handle);
}
};
class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
@ -695,6 +709,8 @@ public:
ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
Event CreateEvent() const;
SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept;
@ -938,6 +954,10 @@ public:
dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds);
}
void SetEvent(VkEvent event, VkPipelineStageFlags stage_flags) const noexcept {
dld->vkCmdSetEvent(handle, event, stage_flags);
}
void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
const VkDeviceSize* offsets,
const VkDeviceSize* sizes) const noexcept {