Move presentation to separate thread/improve sync (#303)

* video_out: Move presentation to separate thread

* liverpool: Better sync for CPU flips

* driver: Make flip blocking

* videoout: Proper flip rate and vblank management

* config: Add vblank divider option

* clang format

* videoout: added `sceVideoOutWaitVblank`

* clang format

* vk_scheduler: Silly merge conflict

* externals: Add renderdoc API

* clang format

* reuse

* rdoc: manual capture trigger

* clang fmt

---------

Co-authored-by: psucien <168137814+psucien@users.noreply.github.com>
This commit is contained in:
TheTurtle 2024-07-28 16:54:09 +03:00 committed by GitHub
parent 361412031c
commit 0d6edaa0a0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
32 changed files with 1259 additions and 224 deletions

View file

@ -20,13 +20,12 @@
extern Frontend::WindowSDL* g_window;
std::unique_ptr<Vulkan::RendererVulkan> renderer;
std::unique_ptr<AmdGpu::Liverpool> liverpool;
namespace Libraries::GnmDriver {
using namespace AmdGpu;
static std::unique_ptr<AmdGpu::Liverpool> liverpool;
enum GnmEventIdents : u64 {
Compute0RelMem = 0x00,
Compute1RelMem = 0x01,
@ -2131,6 +2130,7 @@ int PS4_SYSV_ABI sceGnmSubmitDone() {
if (!liverpool->IsGpuIdle()) {
submission_lock = true;
}
liverpool->SubmitDone();
send_init_packet = true;
++frames_submitted;
return ORBIS_OK;

View file

@ -78,9 +78,7 @@ bool EqueueInternal::TriggerEvent(u64 ident, s16 filter, void* trigger_data) {
std::scoped_lock lock{m_mutex};
for (auto& event : m_events) {
ASSERT_MSG(event.event.filter == filter,
"Event to trigger doesn't match to queue events");
if (event.event.ident == ident) {
if ((event.event.ident == ident) && (event.event.filter == filter)) {
event.Trigger(trigger_data);
has_found = true;
}

View file

@ -43,8 +43,8 @@ namespace Libraries {
void InitHLELibs(Core::Loader::SymbolsResolver* sym) {
LOG_INFO(Lib_Kernel, "Initializing HLE libraries");
Libraries::Kernel::LibKernel_Register(sym);
Libraries::VideoOut::RegisterLib(sym);
Libraries::GnmDriver::RegisterlibSceGnmDriver(sym);
Libraries::VideoOut::RegisterLib(sym);
if (!Config::isLleLibc()) {
Libraries::LibC::libcSymbolsRegister(sym);
}

View file

@ -3,14 +3,16 @@
#include <pthread.h>
#include "common/assert.h"
#include "common/config.h"
#include "common/debug.h"
#include "common/thread.h"
#include "core/libraries/error_codes.h"
#include "core/libraries/kernel/time_management.h"
#include "core/libraries/videoout/driver.h"
#include "core/platform.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
extern std::unique_ptr<AmdGpu::Liverpool> liverpool;
namespace Libraries::VideoOut {
@ -41,20 +43,18 @@ VideoOutDriver::VideoOutDriver(u32 width, u32 height) {
main_port.resolution.fullHeight = height;
main_port.resolution.paneWidth = width;
main_port.resolution.paneHeight = height;
present_thread = std::jthread([&](std::stop_token token) { PresentThread(token); });
}
VideoOutDriver::~VideoOutDriver() = default;
int VideoOutDriver::Open(const ServiceThreadParams* params) {
std::scoped_lock lock{mutex};
if (main_port.is_open) {
return ORBIS_VIDEO_OUT_ERROR_RESOURCE_BUSY;
}
int handle = 1;
main_port.is_open = true;
return handle;
liverpool->SetVoPort(&main_port);
return 1;
}
void VideoOutDriver::Close(s32 handle) {
@ -158,31 +158,22 @@ int VideoOutDriver::UnregisterBuffers(VideoOutPort* port, s32 attributeIndex) {
return ORBIS_OK;
}
void VideoOutDriver::Flip(std::chrono::microseconds timeout) {
Request req;
{
std::unique_lock lock{mutex};
submit_cond.wait_for(lock, timeout, [&] { return !requests.empty(); });
if (requests.empty()) {
renderer->ShowSplash();
return;
}
// Retrieve the request.
req = requests.front();
requests.pop();
std::chrono::microseconds VideoOutDriver::Flip(const Request& req) {
if (!req) {
return std::chrono::microseconds{0};
}
const auto start = std::chrono::high_resolution_clock::now();
// Whatever the game is rendering show splash if it is active
if (!renderer->ShowSplash(req.frame)) {
// Present the frame.
renderer->Present(req.frame);
}
std::scoped_lock lock{mutex};
// Update flip status.
auto& flip_status = req.port->flip_status;
auto* port = req.port;
auto& flip_status = port->flip_status;
flip_status.count++;
flip_status.processTime = Libraries::Kernel::sceKernelGetProcessTime();
flip_status.tsc = Libraries::Kernel::sceKernelReadTsc();
@ -192,7 +183,7 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) {
flip_status.flipPendingNum = static_cast<int>(requests.size());
// Trigger flip events for the port.
for (auto& event : req.port->flip_events) {
for (auto& event : port->flip_events) {
if (event != nullptr) {
event->TriggerEvent(SCE_VIDEO_OUT_EVENT_FLIP, Kernel::SceKernelEvent::Filter::VideoOut,
reinterpret_cast<void*>(req.flip_arg));
@ -201,21 +192,23 @@ void VideoOutDriver::Flip(std::chrono::microseconds timeout) {
// Reset flip label
if (req.index != -1) {
req.port->buffer_labels[req.index] = 0;
port->buffer_labels[req.index] = 0;
port->SignalVoLabel();
}
const auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(end - start);
}
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
bool is_eop /*= false*/) {
std::scoped_lock lock{mutex};
Vulkan::Frame* frame;
if (index == -1) {
frame = renderer->PrepareBlankFrame();
} else {
const auto& buffer = port->buffer_slots[index];
const auto& group = port->groups[buffer.group_index];
frame = renderer->PrepareFrame(group, buffer.address_left);
frame = renderer->PrepareFrame(group, buffer.address_left, is_eop);
}
if (index != -1 && requests.size() >= port->NumRegisteredBuffers()) {
@ -223,6 +216,7 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
return false;
}
std::scoped_lock lock{mutex};
requests.push({
.frame = frame,
.port = port,
@ -234,24 +228,53 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
port->flip_status.flipPendingNum = static_cast<int>(requests.size());
port->flip_status.gcQueueNum = 0;
submit_cond.notify_one();
return true;
}
void VideoOutDriver::Vblank() {
std::scoped_lock lock{mutex};
void VideoOutDriver::PresentThread(std::stop_token token) {
static constexpr std::chrono::milliseconds VblankPeriod{16};
Common::SetCurrentThreadName("PresentThread");
auto& vblank_status = main_port.vblank_status;
vblank_status.count++;
vblank_status.processTime = Libraries::Kernel::sceKernelGetProcessTime();
vblank_status.tsc = Libraries::Kernel::sceKernelReadTsc();
const auto receive_request = [this] -> Request {
std::scoped_lock lk{mutex};
if (!requests.empty()) {
const auto request = requests.front();
requests.pop();
return request;
}
return {};
};
// Trigger flip events for the port.
for (auto& event : main_port.vblank_events) {
if (event != nullptr) {
event->TriggerEvent(SCE_VIDEO_OUT_EVENT_VBLANK,
Kernel::SceKernelEvent::Filter::VideoOut, nullptr);
auto vblank_period = VblankPeriod / Config::vblankDiv();
auto delay = std::chrono::microseconds{0};
while (!token.stop_requested()) {
// Sleep for most of the vblank duration.
std::this_thread::sleep_for(vblank_period - delay);
// Check if it's time to take a request.
auto& vblank_status = main_port.vblank_status;
if (vblank_status.count % (main_port.flip_rate + 1) == 0) {
const auto request = receive_request();
delay = Flip(request);
FRAME_END;
}
{
// Needs lock here as can be concurrently read by `sceVideoOutGetVblankStatus`
std::unique_lock lock{main_port.vo_mutex};
vblank_status.count++;
vblank_status.processTime = Libraries::Kernel::sceKernelGetProcessTime();
vblank_status.tsc = Libraries::Kernel::sceKernelReadTsc();
main_port.vblank_cv.notify_all();
}
// Trigger flip events for the port.
for (auto& event : main_port.vblank_events) {
if (event != nullptr) {
event->TriggerEvent(SCE_VIDEO_OUT_EVENT_VBLANK,
Kernel::SceKernelEvent::Filter::VideoOut, nullptr);
}
}
}
}

View file

@ -3,10 +3,13 @@
#pragma once
#include "common/debug.h"
#include "common/polyfill_thread.h"
#include "core/libraries/videoout/video_out.h"
#include <condition_variable>
#include <mutex>
#include <queue>
#include "core/libraries/videoout/video_out.h"
namespace Vulkan {
struct Frame;
@ -25,6 +28,9 @@ struct VideoOutPort {
SceVideoOutVblankStatus vblank_status;
std::vector<Kernel::SceKernelEqueue> flip_events;
std::vector<Kernel::SceKernelEqueue> vblank_events;
std::mutex vo_mutex;
std::condition_variable vo_cv;
std::condition_variable vblank_cv;
int flip_rate = 0;
s32 FindFreeGroup() const {
@ -35,6 +41,22 @@ struct VideoOutPort {
return index;
}
bool IsVoLabel(const u64* address) const {
const u64* start = &buffer_labels[0];
const u64* end = &buffer_labels[MaxDisplayBuffers - 1];
return address >= start && address <= end;
}
void WaitVoLabel(auto&& pred) {
std::unique_lock lk{vo_mutex};
vo_cv.wait(lk, pred);
}
void SignalVoLabel() {
std::scoped_lock lk{vo_mutex};
vo_cv.notify_one();
}
[[nodiscard]] int NumRegisteredBuffers() const {
return std::count_if(buffer_slots.cbegin(), buffer_slots.cend(),
[](auto& buffer) { return buffer.group_index != -1; });
@ -63,11 +85,8 @@ public:
const BufferAttribute* attribute);
int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex);
void Flip(std::chrono::microseconds timeout);
bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false);
void Vblank();
private:
struct Request {
Vulkan::Frame* frame;
@ -76,14 +95,19 @@ private:
s64 flip_arg;
u64 submit_tsc;
bool eop;
operator bool() const noexcept {
return frame != nullptr;
}
};
std::chrono::microseconds Flip(const Request& req);
void PresentThread(std::stop_token token);
std::mutex mutex;
VideoOutPort main_port{};
std::condition_variable_any submit_cond;
std::condition_variable done_cond;
std::jthread present_thread;
std::queue<Request> requests;
bool is_neo{};
};
} // namespace Libraries::VideoOut

View file

@ -183,6 +183,7 @@ s32 PS4_SYSV_ABI sceVideoOutGetVblankStatus(int handle, SceVideoOutVblankStatus*
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
}
std::unique_lock lock{port->vo_mutex};
*status = port->vblank_status;
return ORBIS_OK;
}
@ -229,14 +230,6 @@ s32 PS4_SYSV_ABI sceVideoOutUnregisterBuffers(s32 handle, s32 attributeIndex) {
return driver->UnregisterBuffers(port, attributeIndex);
}
void Flip(std::chrono::microseconds micros) {
return driver->Flip(micros);
}
void Vblank() {
return driver->Vblank();
}
void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr) {
auto* port = driver->GetPort(handle);
ASSERT(port);
@ -266,6 +259,18 @@ s32 PS4_SYSV_ABI sceVideoOutGetDeviceCapabilityInfo(
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceVideoOutWaitVblank(s32 handle) {
auto* port = driver->GetPort(handle);
if (!port) {
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
}
std::unique_lock lock{port->vo_mutex};
const auto prev_counter = port->vblank_status.count;
port->vblank_cv.wait(lock, [&]() { return prev_counter != port->vblank_status.count; });
return ORBIS_OK;
}
void RegisterLib(Core::Loader::SymbolsResolver* sym) {
driver = std::make_unique<VideoOutDriver>(Config::getScreenWidth(), Config::getScreenHeight());
@ -294,6 +299,7 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) {
sceVideoOutGetVblankStatus);
LIB_FUNCTION("kGVLc3htQE8", "libSceVideoOut", 1, "libSceVideoOut", 0, 0,
sceVideoOutGetDeviceCapabilityInfo);
LIB_FUNCTION("j6RaAUlaLv0", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutWaitVblank);
// openOrbis appears to have libSceVideoOut_v1 module libSceVideoOut_v1.1
LIB_FUNCTION("Up36PTk687E", "libSceVideoOut", 1, "libSceVideoOut", 1, 1, sceVideoOutOpen);

View file

@ -92,11 +92,12 @@ void PS4_SYSV_ABI sceVideoOutSetBufferAttribute(BufferAttribute* attribute, Pixe
u32 tilingMode, u32 aspectRatio, u32 width,
u32 height, u32 pitchInPixel);
s32 PS4_SYSV_ABI sceVideoOutAddFlipEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata);
s32 PS4_SYSV_ABI sceVideoOutAddVBlankEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata);
s32 PS4_SYSV_ABI sceVideoOutAddVblankEvent(Kernel::SceKernelEqueue eq, s32 handle, void* udata);
s32 PS4_SYSV_ABI sceVideoOutRegisterBuffers(s32 handle, s32 startIndex, void* const* addresses,
s32 bufferNum, const BufferAttribute* attribute);
s32 PS4_SYSV_ABI sceVideoOutSetFlipRate(s32 handle, s32 rate);
s32 PS4_SYSV_ABI sceVideoOutIsFlipPending(s32 handle);
s32 PS4_SYSV_ABI sceVideoOutWaitVblank(s32 handle);
s32 PS4_SYSV_ABI sceVideoOutSubmitFlip(s32 handle, s32 bufferIndex, s32 flipMode, s64 flipArg);
s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status);
s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutionStatus* status);
@ -104,9 +105,6 @@ s32 PS4_SYSV_ABI sceVideoOutOpen(SceUserServiceUserId userId, s32 busType, s32 i
const void* param);
s32 PS4_SYSV_ABI sceVideoOutClose(s32 handle);
void Flip(std::chrono::microseconds micros);
void Vblank();
// Internal system functions
void sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_addr);
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, u32 arg, void** unk);