mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-01 08:13:16 +00:00
Move presentation to separate thread/improve sync (#303)
* video_out: Move presentation to separate thread * liverpool: Better sync for CPU flips * driver: Make flip blocking * videoout: Proper flip rate and vblank management * config: Add vblank divider option * clang format * videoout: added `sceVideoOutWaitVblank` * clang format * vk_scheduler: Silly merge conflict * externals: Add renderdoc API * clang format * reuse * rdoc: manual capture trigger * clang fmt --------- Co-authored-by: psucien <168137814+psucien@users.noreply.github.com>
This commit is contained in:
parent
361412031c
commit
0d6edaa0a0
32 changed files with 1259 additions and 224 deletions
|
@ -5,8 +5,10 @@
|
|||
#include "common/debug.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "common/thread.h"
|
||||
#include "core/libraries/videoout/driver.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/amdgpu/pm4_cmds.h"
|
||||
#include "video_core/renderdoc.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
|
@ -32,12 +34,15 @@ void Liverpool::Process(std::stop_token stoken) {
|
|||
while (!stoken.stop_requested()) {
|
||||
{
|
||||
std::unique_lock lk{submit_mutex};
|
||||
Common::CondvarWait(submit_cv, lk, stoken, [this] { return num_submits != 0; });
|
||||
Common::CondvarWait(submit_cv, lk, stoken,
|
||||
[this] { return num_submits != 0 || submit_done; });
|
||||
}
|
||||
if (stoken.stop_requested()) {
|
||||
break;
|
||||
}
|
||||
|
||||
VideoCore::StartCapture();
|
||||
|
||||
int qid = -1;
|
||||
|
||||
while (num_submits) {
|
||||
|
@ -48,11 +53,9 @@ void Liverpool::Process(std::stop_token stoken) {
|
|||
Task::Handle task{};
|
||||
{
|
||||
std::scoped_lock lock{queue.m_access};
|
||||
|
||||
if (queue.submits.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
task = queue.submits.front();
|
||||
}
|
||||
task.resume();
|
||||
|
@ -64,9 +67,20 @@ void Liverpool::Process(std::stop_token stoken) {
|
|||
queue.submits.pop();
|
||||
|
||||
--num_submits;
|
||||
std::scoped_lock lock2{submit_mutex};
|
||||
submit_cv.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
if (submit_done) {
|
||||
VideoCore::EndCapture();
|
||||
|
||||
if (rasterizer) {
|
||||
rasterizer->Flush();
|
||||
}
|
||||
submit_done = false;
|
||||
}
|
||||
|
||||
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GpuIdle);
|
||||
}
|
||||
}
|
||||
|
@ -365,8 +379,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
const auto* write_data = reinterpret_cast<const PM4CmdWriteData*>(header);
|
||||
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
|
||||
const u32 data_size = (header->type3.count.Value() - 2) * 4;
|
||||
u64* address = write_data->Address<u64*>();
|
||||
if (!write_data->wr_one_addr.Value()) {
|
||||
std::memcpy(write_data->Address<void*>(), write_data->data, data_size);
|
||||
std::memcpy(address, write_data->data, data_size);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -379,6 +394,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
case PM4ItOpcode::WaitRegMem: {
|
||||
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
||||
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||
// Optimization: VO label waits are special because the emulator
|
||||
// will write to the label when presentation is finished. So if
|
||||
// there are no other submits to yield to we can sleep the thread
|
||||
// instead and allow other tasks to run.
|
||||
const u64* wait_addr = wait_reg_mem->Address<u64*>();
|
||||
if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) {
|
||||
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
|
||||
}
|
||||
while (!wait_reg_mem->Test()) {
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
|
@ -511,7 +534,7 @@ void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
|||
|
||||
auto task = ProcessGraphics(dcb, ccb);
|
||||
{
|
||||
std::unique_lock lock{queue.m_access};
|
||||
std::scoped_lock lock{queue.m_access};
|
||||
queue.submits.emplace(task.handle);
|
||||
}
|
||||
|
||||
|
@ -526,7 +549,7 @@ void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
|
|||
|
||||
const auto& task = ProcessCompute(acb, vqid);
|
||||
{
|
||||
std::unique_lock lock{queue.m_access};
|
||||
std::scoped_lock lock{queue.m_access};
|
||||
queue.submits.emplace(task.handle);
|
||||
}
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <array>
|
||||
#include <condition_variable>
|
||||
#include <coroutine>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <span>
|
||||
#include <thread>
|
||||
|
@ -21,6 +22,10 @@ namespace Vulkan {
|
|||
class Rasterizer;
|
||||
}
|
||||
|
||||
namespace Libraries::VideoOut {
|
||||
struct VideoOutPort;
|
||||
}
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32))
|
||||
|
@ -991,10 +996,25 @@ public:
|
|||
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
|
||||
void SubmitAsc(u32 vqid, std::span<const u32> acb);
|
||||
|
||||
void SubmitDone() noexcept {
|
||||
std::scoped_lock lk{submit_mutex};
|
||||
submit_done = true;
|
||||
submit_cv.notify_one();
|
||||
}
|
||||
|
||||
void WaitGpuIdle() noexcept {
|
||||
std::unique_lock lk{submit_mutex};
|
||||
submit_cv.wait(lk, [this] { return num_submits == 0; });
|
||||
}
|
||||
|
||||
bool IsGpuIdle() const {
|
||||
return num_submits == 0;
|
||||
}
|
||||
|
||||
void SetVoPort(Libraries::VideoOut::VideoOutPort* port) {
|
||||
vo_port = port;
|
||||
}
|
||||
|
||||
void BindRasterizer(Vulkan::Rasterizer* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
@ -1059,8 +1079,10 @@ private:
|
|||
} cblock{};
|
||||
|
||||
Vulkan::Rasterizer* rasterizer{};
|
||||
Libraries::VideoOut::VideoOutPort* vo_port{};
|
||||
std::jthread process_thread{};
|
||||
std::atomic<u32> num_submits{};
|
||||
std::atomic<bool> submit_done{};
|
||||
std::mutex submit_mutex;
|
||||
std::condition_variable_any submit_cv;
|
||||
};
|
||||
|
|
|
@ -404,8 +404,9 @@ struct PM4CmdWaitRegMem {
|
|||
u32 mask;
|
||||
u32 poll_interval;
|
||||
|
||||
u32* Address() const {
|
||||
return reinterpret_cast<u32*>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo);
|
||||
template <typename T = u32*>
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo);
|
||||
}
|
||||
|
||||
bool Test() const {
|
||||
|
@ -464,8 +465,8 @@ struct PM4CmdWriteData {
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
T* Address() const {
|
||||
return reinterpret_cast<T*>(addr64);
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>(addr64);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -494,8 +495,9 @@ struct PM4CmdEventWriteEos {
|
|||
BitField<16, 16, u32> size; ///< Number of DWs to read from the GDS
|
||||
};
|
||||
|
||||
u32* Address() const {
|
||||
return reinterpret_cast<u32*>(address_lo | u64(address_hi) << 32);
|
||||
template <typename T = u32*>
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>(address_lo | u64(address_hi) << 32);
|
||||
}
|
||||
|
||||
u32 DataDWord() const {
|
||||
|
|
120
src/video_core/renderdoc.cpp
Normal file
120
src/video_core/renderdoc.cpp
Normal file
|
@ -0,0 +1,120 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "video_core/renderdoc.h"
|
||||
|
||||
#include <renderdoc_app.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
enum class CaptureState {
|
||||
Idle,
|
||||
Triggered,
|
||||
InProgress,
|
||||
};
|
||||
static CaptureState capture_state{CaptureState::Idle};
|
||||
|
||||
RENDERDOC_API_1_6_0* rdoc_api{};
|
||||
|
||||
void LoadRenderDoc() {
|
||||
#ifdef WIN32
|
||||
|
||||
// Check if we are running by RDoc GUI
|
||||
HMODULE mod = GetModuleHandleA("renderdoc.dll");
|
||||
if (!mod && Config::isRdocEnabled()) {
|
||||
// If enabled in config, try to load RDoc runtime in offline mode
|
||||
HKEY h_reg_key;
|
||||
LONG result = RegOpenKeyExW(HKEY_LOCAL_MACHINE,
|
||||
L"SOFTWARE\\Classes\\RenderDoc.RDCCapture.1\\DefaultIcon\\", 0,
|
||||
KEY_READ, &h_reg_key);
|
||||
if (result != ERROR_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
std::array<wchar_t, MAX_PATH> key_str{};
|
||||
DWORD str_sz_out{key_str.size()};
|
||||
result = RegQueryValueExW(h_reg_key, L"", 0, NULL, (LPBYTE)key_str.data(), &str_sz_out);
|
||||
if (result != ERROR_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::filesystem::path path{key_str.cbegin(), key_str.cend()};
|
||||
path = path.parent_path().append("renderdoc.dll");
|
||||
const auto path_to_lib = path.generic_string();
|
||||
mod = LoadLibraryA(path_to_lib.c_str());
|
||||
}
|
||||
|
||||
if (mod) {
|
||||
const auto RENDERDOC_GetAPI =
|
||||
reinterpret_cast<pRENDERDOC_GetAPI>(GetProcAddress(mod, "RENDERDOC_GetAPI"));
|
||||
const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api);
|
||||
ASSERT(ret == 1);
|
||||
}
|
||||
#else
|
||||
#ifdef ANDROID
|
||||
static constexpr const char RENDERDOC_LIB[] = "libVkLayer_GLES_RenderDoc.so";
|
||||
#else
|
||||
static constexpr const char RENDERDOC_LIB[] = "librenderdoc.so";
|
||||
#endif
|
||||
if (void* mod = dlopen(RENDERDOC_LIB, RTLD_NOW | RTLD_NOLOAD)) {
|
||||
const auto RENDERDOC_GetAPI =
|
||||
reinterpret_cast<pRENDERDOC_GetAPI>(dlsym(mod, "RENDERDOC_GetAPI"));
|
||||
const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api);
|
||||
ASSERT(ret == 1);
|
||||
}
|
||||
#endif
|
||||
if (rdoc_api) {
|
||||
// Disable default capture keys as they suppose to trigger present-to-present capturing
|
||||
// and it is not what we want
|
||||
rdoc_api->SetCaptureKeys(nullptr, 0);
|
||||
|
||||
// Also remove rdoc crash handler
|
||||
rdoc_api->UnloadCrashHandler();
|
||||
}
|
||||
}
|
||||
|
||||
void StartCapture() {
|
||||
if (!rdoc_api) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (capture_state == CaptureState::Triggered) {
|
||||
rdoc_api->StartFrameCapture(nullptr, nullptr);
|
||||
capture_state = CaptureState::InProgress;
|
||||
}
|
||||
}
|
||||
|
||||
void EndCapture() {
|
||||
if (!rdoc_api) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (capture_state == CaptureState::InProgress) {
|
||||
rdoc_api->EndFrameCapture(nullptr, nullptr);
|
||||
capture_state = CaptureState::Idle;
|
||||
}
|
||||
}
|
||||
|
||||
void TriggerCapture() {
|
||||
if (capture_state == CaptureState::Idle) {
|
||||
capture_state = CaptureState::Triggered;
|
||||
}
|
||||
}
|
||||
|
||||
void SetOutputDir(const std::string& path, const std::string& prefix) {
|
||||
if (!rdoc_api) {
|
||||
return;
|
||||
}
|
||||
rdoc_api->SetCaptureFilePathTemplate((path + '\\' + prefix).c_str());
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
25
src/video_core/renderdoc.h
Normal file
25
src/video_core/renderdoc.h
Normal file
|
@ -0,0 +1,25 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
/// Loads renderdoc dynamic library module.
|
||||
void LoadRenderDoc();
|
||||
|
||||
/// Begins a capture if a renderdoc instance is attached.
|
||||
void StartCapture();
|
||||
|
||||
/// Ends current renderdoc capture.
|
||||
void EndCapture();
|
||||
|
||||
/// Triggers capturing process.
|
||||
void TriggerCapture();
|
||||
|
||||
/// Sets output directory for captures
|
||||
void SetOutputDir(const std::string& path, const std::string& prefix);
|
||||
|
||||
} // namespace VideoCore
|
|
@ -63,44 +63,30 @@ bool CanBlitToSwapchain(const vk::PhysicalDevice physical_device, vk::Format for
|
|||
};
|
||||
}
|
||||
|
||||
RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_, AmdGpu::Liverpool* liverpool)
|
||||
: window{window_}, instance{window, Config::getGpuId(), Config::vkValidationEnabled()},
|
||||
scheduler{instance}, swapchain{instance, window}, texture_cache{instance, scheduler} {
|
||||
rasterizer = std::make_unique<Rasterizer>(instance, scheduler, texture_cache, liverpool);
|
||||
RendererVulkan::RendererVulkan(Frontend::WindowSDL& window_, AmdGpu::Liverpool* liverpool_)
|
||||
: window{window_}, liverpool{liverpool_},
|
||||
instance{window, Config::getGpuId(), Config::vkValidationEnabled()}, draw_scheduler{instance},
|
||||
present_scheduler{instance}, flip_scheduler{instance}, swapchain{instance, window},
|
||||
texture_cache{instance, draw_scheduler} {
|
||||
rasterizer = std::make_unique<Rasterizer>(instance, draw_scheduler, texture_cache, liverpool);
|
||||
const u32 num_images = swapchain.GetImageCount();
|
||||
const vk::Device device = instance.GetDevice();
|
||||
|
||||
const vk::CommandPoolCreateInfo pool_info = {
|
||||
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer |
|
||||
vk::CommandPoolCreateFlagBits::eTransient,
|
||||
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(),
|
||||
};
|
||||
command_pool = device.createCommandPoolUnique(pool_info);
|
||||
|
||||
const vk::CommandBufferAllocateInfo alloc_info = {
|
||||
.commandPool = *command_pool,
|
||||
.level = vk::CommandBufferLevel::ePrimary,
|
||||
.commandBufferCount = num_images,
|
||||
};
|
||||
|
||||
const auto cmdbuffers = device.allocateCommandBuffers(alloc_info);
|
||||
// Create presentation frames.
|
||||
present_frames.resize(num_images);
|
||||
for (u32 i = 0; i < num_images; i++) {
|
||||
Frame& frame = present_frames[i];
|
||||
frame.cmdbuf = cmdbuffers[i];
|
||||
frame.render_ready = device.createSemaphore({});
|
||||
frame.present_done = device.createFence({.flags = vk::FenceCreateFlagBits::eSignaled});
|
||||
free_queue.push(&frame);
|
||||
}
|
||||
}
|
||||
|
||||
RendererVulkan::~RendererVulkan() {
|
||||
scheduler.Finish();
|
||||
draw_scheduler.Finish();
|
||||
const vk::Device device = instance.GetDevice();
|
||||
for (auto& frame : present_frames) {
|
||||
vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation);
|
||||
device.destroyImageView(frame.image_view);
|
||||
device.destroySemaphore(frame.render_ready);
|
||||
device.destroyFence(frame.present_done);
|
||||
}
|
||||
}
|
||||
|
@ -184,7 +170,7 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) {
|
|||
info.pitch = splash->GetImageInfo().width;
|
||||
info.guest_address = VAddr(splash->GetImageData().data());
|
||||
info.guest_size_bytes = splash->GetImageData().size();
|
||||
splash_img.emplace(instance, scheduler, info);
|
||||
splash_img.emplace(instance, present_scheduler, info);
|
||||
texture_cache.RefreshImage(*splash_img);
|
||||
}
|
||||
frame = PrepareFrameInternal(*splash_img);
|
||||
|
@ -193,12 +179,18 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) {
|
|||
return true;
|
||||
}
|
||||
|
||||
Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) {
|
||||
Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image, bool is_eop) {
|
||||
// Request a free presentation frame.
|
||||
Frame* frame = GetRenderFrame();
|
||||
|
||||
// Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image.
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead);
|
||||
// EOP flips are triggered from GPU thread so use the drawing scheduler to record
|
||||
// commands. Otherwise we are dealing with a CPU flip which could have arrived
|
||||
// from any guest thread. Use a separate scheduler for that.
|
||||
auto& scheduler = is_eop ? draw_scheduler : flip_scheduler;
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead, cmdbuf);
|
||||
|
||||
const std::array pre_barrier{
|
||||
vk::ImageMemoryBarrier{
|
||||
|
@ -218,12 +210,11 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) {
|
|||
},
|
||||
},
|
||||
};
|
||||
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
|
||||
{}, {}, pre_barrier);
|
||||
|
||||
// Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image.
|
||||
cmdbuf.blitImage(
|
||||
image.image, image.layout, frame->image, vk::ImageLayout::eTransferDstOptimal,
|
||||
MakeImageBlit(image.info.size.width, image.info.size.height, frame->width, frame->height),
|
||||
|
@ -245,13 +236,15 @@ Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) {
|
|||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
|
||||
|
||||
// Flush pending vulkan operations.
|
||||
scheduler.Flush(frame->render_ready);
|
||||
// Flush frame creation commands.
|
||||
frame->ready_semaphore = scheduler.GetMasterSemaphore()->Handle();
|
||||
frame->ready_tick = scheduler.CurrentTick();
|
||||
SubmitInfo info{};
|
||||
scheduler.Flush(info);
|
||||
return frame;
|
||||
}
|
||||
|
||||
|
@ -260,11 +253,8 @@ void RendererVulkan::Present(Frame* frame) {
|
|||
|
||||
const vk::Image swapchain_image = swapchain.Image();
|
||||
|
||||
const vk::CommandBufferBeginInfo begin_info = {
|
||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
|
||||
};
|
||||
const vk::CommandBuffer cmdbuf = frame->cmdbuf;
|
||||
cmdbuf.begin(begin_info);
|
||||
auto& scheduler = present_scheduler;
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
{
|
||||
auto* profiler_ctx = instance.GetProfilerContext();
|
||||
TracyVkNamedZoneC(profiler_ctx, renderer_gpu_zone, cmdbuf, "Host frame",
|
||||
|
@ -339,35 +329,17 @@ void RendererVulkan::Present(Frame* frame) {
|
|||
TracyVkCollect(profiler_ctx, cmdbuf);
|
||||
}
|
||||
}
|
||||
cmdbuf.end();
|
||||
|
||||
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
vk::PipelineStageFlagBits::eAllGraphics,
|
||||
};
|
||||
|
||||
const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore();
|
||||
const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore();
|
||||
const std::array wait_semaphores = {image_acquired, frame->render_ready};
|
||||
|
||||
vk::SubmitInfo submit_info = {
|
||||
.waitSemaphoreCount = static_cast<u32>(wait_semaphores.size()),
|
||||
.pWaitSemaphores = wait_semaphores.data(),
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1u,
|
||||
.pCommandBuffers = &cmdbuf,
|
||||
.signalSemaphoreCount = 1,
|
||||
.pSignalSemaphores = &present_ready,
|
||||
};
|
||||
|
||||
std::scoped_lock submit_lock{scheduler.submit_mutex};
|
||||
try {
|
||||
instance.GetGraphicsQueue().submit(submit_info, frame->present_done);
|
||||
} catch (vk::DeviceLostError& err) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Device lost during present submit: {}", err.what());
|
||||
UNREACHABLE();
|
||||
}
|
||||
// Flush vulkan commands.
|
||||
SubmitInfo info{};
|
||||
info.AddWait(swapchain.GetImageAcquiredSemaphore());
|
||||
info.AddWait(frame->ready_semaphore, frame->ready_tick);
|
||||
info.AddSignal(swapchain.GetPresentReadySemaphore());
|
||||
info.AddSignal(frame->present_done);
|
||||
scheduler.Flush(info);
|
||||
|
||||
// Present to swapchain.
|
||||
std::scoped_lock submit_lock{Scheduler::submit_mutex};
|
||||
swapchain.Present();
|
||||
|
||||
// Free the frame for reuse
|
||||
|
|
|
@ -26,9 +26,15 @@ struct Frame {
|
|||
VmaAllocation allocation;
|
||||
vk::Image image;
|
||||
vk::ImageView image_view;
|
||||
vk::Semaphore render_ready;
|
||||
vk::Fence present_done;
|
||||
vk::CommandBuffer cmdbuf;
|
||||
vk::Semaphore ready_semaphore;
|
||||
u64 ready_tick;
|
||||
};
|
||||
|
||||
enum SchedulerType {
|
||||
Draw,
|
||||
Present,
|
||||
CpuFlip,
|
||||
};
|
||||
|
||||
class Rasterizer;
|
||||
|
@ -39,16 +45,16 @@ public:
|
|||
~RendererVulkan();
|
||||
|
||||
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
|
||||
VAddr cpu_address) {
|
||||
VAddr cpu_address, bool is_eop) {
|
||||
const auto info = VideoCore::ImageInfo{attribute, cpu_address};
|
||||
const auto image_id = texture_cache.FindImage(info, cpu_address);
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
return PrepareFrameInternal(image);
|
||||
return PrepareFrameInternal(image, is_eop);
|
||||
}
|
||||
|
||||
Frame* PrepareBlankFrame() {
|
||||
auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID);
|
||||
return PrepareFrameInternal(image);
|
||||
return PrepareFrameInternal(image, true);
|
||||
}
|
||||
|
||||
VideoCore::Image& RegisterVideoOutSurface(
|
||||
|
@ -60,9 +66,9 @@ public:
|
|||
}
|
||||
|
||||
bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
|
||||
return std::find_if(vo_buffers_addr.cbegin(), vo_buffers_addr.cend(), [&](VAddr vo_buffer) {
|
||||
return std::ranges::find_if(vo_buffers_addr, [&](VAddr vo_buffer) {
|
||||
return vo_buffer == color_buffer.Address();
|
||||
}) != vo_buffers_addr.cend();
|
||||
}) != vo_buffers_addr.end();
|
||||
}
|
||||
|
||||
bool ShowSplash(Frame* frame = nullptr);
|
||||
|
@ -70,13 +76,16 @@ public:
|
|||
void RecreateFrame(Frame* frame, u32 width, u32 height);
|
||||
|
||||
private:
|
||||
Frame* PrepareFrameInternal(VideoCore::Image& image);
|
||||
Frame* PrepareFrameInternal(VideoCore::Image& image, bool is_eop = true);
|
||||
Frame* GetRenderFrame();
|
||||
|
||||
private:
|
||||
Frontend::WindowSDL& window;
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
Instance instance;
|
||||
Scheduler scheduler;
|
||||
Scheduler draw_scheduler;
|
||||
Scheduler present_scheduler;
|
||||
Scheduler flip_scheduler;
|
||||
Swapchain swapchain;
|
||||
std::unique_ptr<Rasterizer> rasterizer;
|
||||
VideoCore::TextureCache texture_cache;
|
||||
|
|
|
@ -2,8 +2,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
|
||||
|
@ -60,46 +58,4 @@ void MasterSemaphore::Wait(u64 tick) {
|
|||
Refresh();
|
||||
}
|
||||
|
||||
void MasterSemaphore::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal,
|
||||
u64 signal_value) {
|
||||
cmdbuf.end();
|
||||
|
||||
const u32 num_signal_semaphores = signal ? 2U : 1U;
|
||||
const std::array signal_values{signal_value, u64(0)};
|
||||
const std::array signal_semaphores{Handle(), signal};
|
||||
|
||||
const u32 num_wait_semaphores = wait ? 2U : 1U;
|
||||
const std::array wait_values{signal_value - 1, u64(1)};
|
||||
const std::array wait_semaphores{Handle(), wait};
|
||||
|
||||
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
};
|
||||
|
||||
const vk::TimelineSemaphoreSubmitInfo timeline_si = {
|
||||
.waitSemaphoreValueCount = num_wait_semaphores,
|
||||
.pWaitSemaphoreValues = wait_values.data(),
|
||||
.signalSemaphoreValueCount = num_signal_semaphores,
|
||||
.pSignalSemaphoreValues = signal_values.data(),
|
||||
};
|
||||
|
||||
const vk::SubmitInfo submit_info = {
|
||||
.pNext = &timeline_si,
|
||||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = wait_semaphores.data(),
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1u,
|
||||
.pCommandBuffers = &cmdbuf,
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = signal_semaphores.data(),
|
||||
};
|
||||
|
||||
try {
|
||||
instance.GetGraphicsQueue().submit(submit_info);
|
||||
} catch (vk::DeviceLostError& err) {
|
||||
UNREACHABLE_MSG("Device lost during submit: {}", err.what());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -46,10 +46,6 @@ public:
|
|||
/// Waits for a tick to be hit on the GPU
|
||||
void Wait(u64 tick);
|
||||
|
||||
/// Submits the provided command buffer for execution
|
||||
void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal,
|
||||
u64 signal_value);
|
||||
|
||||
protected:
|
||||
const Instance& instance;
|
||||
vk::UniqueSemaphore semaphore; ///< Timeline semaphore.
|
||||
|
|
|
@ -96,6 +96,13 @@ void Rasterizer::DispatchDirect() {
|
|||
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
|
||||
}
|
||||
|
||||
u64 Rasterizer::Flush() {
|
||||
const u64 current_tick = scheduler.CurrentTick();
|
||||
SubmitInfo info{};
|
||||
scheduler.Flush(info);
|
||||
return current_tick;
|
||||
}
|
||||
|
||||
void Rasterizer::BeginRendering() {
|
||||
const auto& regs = liverpool->regs;
|
||||
RenderState state;
|
||||
|
|
|
@ -36,6 +36,8 @@ public:
|
|||
void ScopeMarkerBegin(const std::string& str);
|
||||
void ScopeMarkerEnd();
|
||||
|
||||
u64 Flush();
|
||||
|
||||
private:
|
||||
u32 SetupIndexBuffer(bool& is_indexed, u32 index_offset);
|
||||
void MapMemory(VAddr addr, size_t size);
|
||||
|
|
|
@ -2,12 +2,15 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <mutex>
|
||||
#include "common/assert.h"
|
||||
#include "common/debug.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
std::mutex Scheduler::submit_mutex;
|
||||
|
||||
Scheduler::Scheduler(const Instance& instance)
|
||||
: instance{instance}, master_semaphore{instance}, command_pool{instance, &master_semaphore} {
|
||||
profiler_scope = reinterpret_cast<tracy::VkCtxScope*>(std::malloc(sizeof(tracy::VkCtxScope)));
|
||||
|
@ -50,22 +53,24 @@ void Scheduler::EndRendering() {
|
|||
current_cmdbuf.endRendering();
|
||||
}
|
||||
|
||||
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
|
||||
// When flushing, we only send data to the worker thread; no waiting is necessary.
|
||||
SubmitExecution(signal, wait);
|
||||
void Scheduler::Flush(SubmitInfo& info) {
|
||||
// When flushing, we only send data to the driver; no waiting is necessary.
|
||||
SubmitExecution(info);
|
||||
}
|
||||
|
||||
void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) {
|
||||
void Scheduler::Finish() {
|
||||
// When finishing, we need to wait for the submission to have executed on the device.
|
||||
const u64 presubmit_tick = CurrentTick();
|
||||
SubmitExecution(signal, wait);
|
||||
SubmitInfo info{};
|
||||
SubmitExecution(info);
|
||||
Wait(presubmit_tick);
|
||||
}
|
||||
|
||||
void Scheduler::Wait(u64 tick) {
|
||||
if (tick >= master_semaphore.CurrentTick()) {
|
||||
// Make sure we are not waiting for the current tick without signalling
|
||||
Flush();
|
||||
SubmitInfo info{};
|
||||
Flush(info);
|
||||
}
|
||||
master_semaphore.Wait(tick);
|
||||
}
|
||||
|
@ -86,7 +91,7 @@ void Scheduler::AllocateWorkerCommandBuffers() {
|
|||
}
|
||||
}
|
||||
|
||||
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
|
||||
void Scheduler::SubmitExecution(SubmitInfo& info) {
|
||||
std::scoped_lock lk{submit_mutex};
|
||||
const u64 signal_value = master_semaphore.NextTick();
|
||||
|
||||
|
@ -97,7 +102,40 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa
|
|||
}
|
||||
|
||||
EndRendering();
|
||||
master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value);
|
||||
current_cmdbuf.end();
|
||||
|
||||
const vk::Semaphore timeline = master_semaphore.Handle();
|
||||
info.AddSignal(timeline, signal_value);
|
||||
|
||||
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
};
|
||||
|
||||
const vk::TimelineSemaphoreSubmitInfo timeline_si = {
|
||||
.waitSemaphoreValueCount = static_cast<u32>(info.wait_ticks.size()),
|
||||
.pWaitSemaphoreValues = info.wait_ticks.data(),
|
||||
.signalSemaphoreValueCount = static_cast<u32>(info.signal_ticks.size()),
|
||||
.pSignalSemaphoreValues = info.signal_ticks.data(),
|
||||
};
|
||||
|
||||
const vk::SubmitInfo submit_info = {
|
||||
.pNext = &timeline_si,
|
||||
.waitSemaphoreCount = static_cast<u32>(info.wait_semas.size()),
|
||||
.pWaitSemaphores = info.wait_semas.data(),
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1U,
|
||||
.pCommandBuffers = ¤t_cmdbuf,
|
||||
.signalSemaphoreCount = static_cast<u32>(info.signal_semas.size()),
|
||||
.pSignalSemaphores = info.signal_semas.data(),
|
||||
};
|
||||
|
||||
try {
|
||||
instance.GetGraphicsQueue().submit(submit_info, info.fence);
|
||||
} catch (vk::DeviceLostError& err) {
|
||||
UNREACHABLE_MSG("Device lost during submit: {}", err.what());
|
||||
}
|
||||
|
||||
master_semaphore.Refresh();
|
||||
AllocateWorkerCommandBuffers();
|
||||
|
||||
|
|
|
@ -26,16 +26,39 @@ struct RenderState {
|
|||
}
|
||||
};
|
||||
|
||||
struct SubmitInfo {
|
||||
boost::container::static_vector<vk::Semaphore, 3> wait_semas;
|
||||
boost::container::static_vector<u64, 3> wait_ticks;
|
||||
boost::container::static_vector<vk::Semaphore, 3> signal_semas;
|
||||
boost::container::static_vector<u64, 3> signal_ticks;
|
||||
vk::Fence fence;
|
||||
|
||||
void AddWait(vk::Semaphore semaphore, u64 tick = 1) {
|
||||
wait_semas.emplace_back(semaphore);
|
||||
wait_ticks.emplace_back(tick);
|
||||
}
|
||||
|
||||
void AddSignal(vk::Semaphore semaphore, u64 tick = 1) {
|
||||
signal_semas.emplace_back(semaphore);
|
||||
signal_ticks.emplace_back(tick);
|
||||
}
|
||||
|
||||
void AddSignal(vk::Fence fence) {
|
||||
this->fence = fence;
|
||||
}
|
||||
};
|
||||
|
||||
class Scheduler {
|
||||
public:
|
||||
explicit Scheduler(const Instance& instance);
|
||||
~Scheduler();
|
||||
|
||||
/// Sends the current execution context to the GPU.
|
||||
void Flush(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr);
|
||||
/// Sends the current execution context to the GPU
|
||||
/// and increments the scheduler timeline semaphore.
|
||||
void Flush(SubmitInfo& info);
|
||||
|
||||
/// Sends the current execution context to the GPU and waits for it to complete.
|
||||
void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr);
|
||||
void Finish();
|
||||
|
||||
/// Waits for the given tick to trigger on the GPU.
|
||||
void Wait(u64 tick);
|
||||
|
@ -76,12 +99,12 @@ public:
|
|||
pending_ops.emplace(func, CurrentTick());
|
||||
}
|
||||
|
||||
std::mutex submit_mutex;
|
||||
static std::mutex submit_mutex;
|
||||
|
||||
private:
|
||||
void AllocateWorkerCommandBuffers();
|
||||
|
||||
void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore);
|
||||
void SubmitExecution(SubmitInfo& info);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
|
|
|
@ -55,7 +55,7 @@ void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) {
|
|||
.pQueueFamilyIndices = queue_family_indices.data(),
|
||||
.preTransform = transform,
|
||||
.compositeAlpha = composite_alpha,
|
||||
.presentMode = vk::PresentModeKHR::eFifo,
|
||||
.presentMode = vk::PresentModeKHR::eMailbox,
|
||||
.clipped = true,
|
||||
.oldSwapchain = nullptr,
|
||||
};
|
||||
|
|
|
@ -231,7 +231,7 @@ static constexpr vk::BufferUsageFlags StagingFlags = vk::BufferUsageFlagBits::eT
|
|||
|
||||
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
|
||||
: instance{instance}, scheduler{scheduler},
|
||||
staging{instance, scheduler, StagingFlags, 128_MB, Vulkan::BufferType::Upload} {
|
||||
staging{instance, scheduler, StagingFlags, 256_MB, Vulkan::BufferType::Upload} {
|
||||
|
||||
static const std::array detiler_shaders{
|
||||
HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue