Merge pull request #5208 from bunnei/service-threads

Service threads
This commit is contained in:
bunnei 2020-12-30 22:06:05 -08:00 committed by GitHub
commit 25d607f5f6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
67 changed files with 770 additions and 1001 deletions

View file

@ -48,6 +48,7 @@ add_library(video_core STATIC
engines/shader_bytecode.h
engines/shader_header.h
engines/shader_type.h
framebuffer_config.h
macro/macro.cpp
macro/macro.h
macro/macro_hle.cpp
@ -59,10 +60,6 @@ add_library(video_core STATIC
fence_manager.h
gpu.cpp
gpu.h
gpu_asynch.cpp
gpu_asynch.h
gpu_synch.cpp
gpu_synch.h
gpu_thread.cpp
gpu_thread.h
guest_driver.cpp

View file

@ -0,0 +1,31 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
namespace Tegra {
/**
* Struct describing framebuffer configuration
*/
struct FramebufferConfig {
enum class PixelFormat : u32 {
A8B8G8R8_UNORM = 1,
RGB565_UNORM = 4,
B8G8R8A8_UNORM = 5,
};
VAddr address{};
u32 offset{};
u32 width{};
u32 height{};
u32 stride{};
PixelFormat pixel_format{};
using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
TransformFlags transform_flags{};
Common::Rectangle<int> crop_rect;
};
} // namespace Tegra

View file

@ -10,6 +10,7 @@
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/frontend/emu_window.h"
#include "core/hardware_interrupt_manager.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/fermi_2d.h"
@ -36,7 +37,8 @@ GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {}
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
gpu_thread{system_, is_async_} {}
GPU::~GPU() = default;
@ -198,10 +200,6 @@ void GPU::SyncGuestHost() {
renderer->Rasterizer().SyncGuestHost();
}
void GPU::OnCommandListEnd() {
renderer->Rasterizer().ReleaseFences();
}
enum class GpuSemaphoreOperation {
AcquireEqual = 0x1,
WriteLong = 0x2,
@ -461,4 +459,75 @@ void GPU::ProcessSemaphoreAcquire() {
}
}
void GPU::Start() {
gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
cpu_context = renderer->GetRenderWindow().CreateSharedContext();
cpu_context->MakeCurrent();
}
void GPU::ObtainContext() {
cpu_context->MakeCurrent();
}
void GPU::ReleaseContext() {
cpu_context->DoneCurrent();
}
void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
gpu_thread.SubmitList(std::move(entries));
}
void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
if (!use_nvdec) {
return;
}
// This condition fires when a video stream ends, clear all intermediary data
if (entries[0].raw == 0xDEADB33F) {
cdma_pusher.reset();
return;
}
if (!cdma_pusher) {
cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
}
// SubmitCommandBuffer would make the nvdec operations async, this is not currently working
// TODO(ameerj): RE proper async nvdec operation
// gpu_thread.SubmitCommandBuffer(std::move(entries));
cdma_pusher->Push(std::move(entries));
cdma_pusher->DispatchCalls();
}
void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
gpu_thread.SwapBuffers(framebuffer);
}
void GPU::FlushRegion(VAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size);
}
void GPU::InvalidateRegion(VAddr addr, u64 size) {
gpu_thread.InvalidateRegion(addr, size);
}
void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size);
}
void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
auto& interrupt_manager = system.InterruptManager();
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
}
void GPU::WaitIdle() const {
gpu_thread.WaitIdle();
}
void GPU::OnCommandListEnd() {
if (is_async) {
// This command only applies to asynchronous GPU mode
gpu_thread.OnCommandListEnd();
}
}
} // namespace Tegra

View file

@ -15,6 +15,8 @@
#include "core/hle/service/nvflinger/buffer_queue.h"
#include "video_core/cdma_pusher.h"
#include "video_core/dma_pusher.h"
#include "video_core/framebuffer_config.h"
#include "video_core/gpu_thread.h"
using CacheAddr = std::uintptr_t;
[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) {
@ -101,28 +103,6 @@ enum class DepthFormat : u32 {
struct CommandListHeader;
class DebugContext;
/**
* Struct describing framebuffer configuration
*/
struct FramebufferConfig {
enum class PixelFormat : u32 {
A8B8G8R8_UNORM = 1,
RGB565_UNORM = 4,
B8G8R8A8_UNORM = 5,
};
VAddr address;
u32 offset;
u32 width;
u32 height;
u32 stride;
PixelFormat pixel_format;
using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
TransformFlags transform_flags;
Common::Rectangle<int> crop_rect;
};
namespace Engines {
class Fermi2D;
class Maxwell3D;
@ -141,7 +121,7 @@ enum class EngineID {
class MemoryManager;
class GPU {
class GPU final {
public:
struct MethodCall {
u32 method{};
@ -159,7 +139,7 @@ public:
};
explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_);
virtual ~GPU();
~GPU();
/// Binds a renderer to the GPU.
void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
@ -176,7 +156,7 @@ public:
/// Synchronizes CPU writes with Host GPU memory.
void SyncGuestHost();
/// Signal the ending of command list.
virtual void OnCommandListEnd();
void OnCommandListEnd();
/// Request a host GPU memory flush from the CPU.
[[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
@ -240,7 +220,7 @@ public:
}
// Waits for the GPU to finish working
virtual void WaitIdle() const = 0;
void WaitIdle() const;
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
void WaitFence(u32 syncpoint_id, u32 value);
@ -330,34 +310,34 @@ public:
/// Performs any additional setup necessary in order to begin GPU emulation.
/// This can be used to launch any necessary threads and register any necessary
/// core timing events.
virtual void Start() = 0;
void Start();
/// Obtain the CPU Context
virtual void ObtainContext() = 0;
void ObtainContext();
/// Release the CPU Context
virtual void ReleaseContext() = 0;
void ReleaseContext();
/// Push GPU command entries to be processed
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
void PushGPUEntries(Tegra::CommandList&& entries);
/// Push GPU command buffer entries to be processed
virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0;
void PushCommandBuffer(Tegra::ChCommandHeaderList& entries);
/// Swap buffers (render frame)
virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(VAddr addr, u64 size) = 0;
void FlushRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
void InvalidateRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
void FlushAndInvalidateRegion(VAddr addr, u64 size);
protected:
virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const;
private:
void ProcessBindMethod(const MethodCall& method_call);
@ -427,6 +407,9 @@ private:
std::mutex flush_request_mutex;
const bool is_async;
VideoCommon::GPUThread::ThreadManager gpu_thread;
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
};
#define ASSERT_REG_POSITION(field_name, position) \

View file

@ -1,86 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/core.h"
#include "core/hardware_interrupt_manager.h"
#include "video_core/gpu_asynch.h"
#include "video_core/gpu_thread.h"
#include "video_core/renderer_base.h"
namespace VideoCommon {
GPUAsynch::GPUAsynch(Core::System& system_, bool use_nvdec_)
: GPU{system_, true, use_nvdec_}, gpu_thread{system_} {}
GPUAsynch::~GPUAsynch() = default;
void GPUAsynch::Start() {
gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
cpu_context = renderer->GetRenderWindow().CreateSharedContext();
cpu_context->MakeCurrent();
}
void GPUAsynch::ObtainContext() {
cpu_context->MakeCurrent();
}
void GPUAsynch::ReleaseContext() {
cpu_context->DoneCurrent();
}
void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
gpu_thread.SubmitList(std::move(entries));
}
void GPUAsynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
if (!use_nvdec) {
return;
}
// This condition fires when a video stream ends, clear all intermediary data
if (entries[0].raw == 0xDEADB33F) {
cdma_pusher.reset();
return;
}
if (!cdma_pusher) {
cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
}
// SubmitCommandBuffer would make the nvdec operations async, this is not currently working
// TODO(ameerj): RE proper async nvdec operation
// gpu_thread.SubmitCommandBuffer(std::move(entries));
cdma_pusher->Push(std::move(entries));
cdma_pusher->DispatchCalls();
}
void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
gpu_thread.SwapBuffers(framebuffer);
}
void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size);
}
void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
gpu_thread.InvalidateRegion(addr, size);
}
void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size);
}
void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
auto& interrupt_manager = system.InterruptManager();
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
}
void GPUAsynch::WaitIdle() const {
gpu_thread.WaitIdle();
}
void GPUAsynch::OnCommandListEnd() {
gpu_thread.OnCommandListEnd();
}
} // namespace VideoCommon

View file

@ -1,47 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
namespace Core::Frontend {
class GraphicsContext;
}
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU asynchronously
class GPUAsynch final : public Tegra::GPU {
public:
explicit GPUAsynch(Core::System& system_, bool use_nvdec_);
~GPUAsynch() override;
void Start() override;
void ObtainContext() override;
void ReleaseContext() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitIdle() const override;
void OnCommandListEnd() override;
protected:
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
private:
GPUThread::ThreadManager gpu_thread;
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
};
} // namespace VideoCommon

View file

@ -1,61 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/gpu_synch.h"
#include "video_core/renderer_base.h"
namespace VideoCommon {
GPUSynch::GPUSynch(Core::System& system_, bool use_nvdec_) : GPU{system_, false, use_nvdec_} {}
GPUSynch::~GPUSynch() = default;
void GPUSynch::Start() {}
void GPUSynch::ObtainContext() {
renderer->Context().MakeCurrent();
}
void GPUSynch::ReleaseContext() {
renderer->Context().DoneCurrent();
}
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
dma_pusher->Push(std::move(entries));
dma_pusher->DispatchCalls();
}
void GPUSynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
if (!use_nvdec) {
return;
}
// This condition fires when a video stream ends, clears all intermediary data
if (entries[0].raw == 0xDEADB33F) {
cdma_pusher.reset();
return;
}
if (!cdma_pusher) {
cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
}
cdma_pusher->Push(std::move(entries));
cdma_pusher->DispatchCalls();
}
void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
renderer->SwapBuffers(framebuffer);
}
void GPUSynch::FlushRegion(VAddr addr, u64 size) {
renderer->Rasterizer().FlushRegion(addr, size);
}
void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
renderer->Rasterizer().InvalidateRegion(addr, size);
}
void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
}
} // namespace VideoCommon

View file

@ -1,41 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/gpu.h"
namespace Core::Frontend {
class GraphicsContext;
}
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU synchronously
class GPUSynch final : public Tegra::GPU {
public:
explicit GPUSynch(Core::System& system_, bool use_nvdec_);
~GPUSynch() override;
void Start() override;
void ObtainContext() override;
void ReleaseContext() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitIdle() const override {}
protected:
void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
[[maybe_unused]] u32 value) const override {}
};
} // namespace VideoCommon

View file

@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/thread.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
@ -21,6 +22,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
SynchState& state, Tegra::CDmaPusher& cdma_pusher) {
std::string name = "yuzu:GPU";
MicroProfileOnThreadCreate(name.c_str());
SCOPE_EXIT({ MicroProfileOnThreadExit(); });
Common::SetCurrentThreadName(name.c_str());
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
system.RegisterHostThread();
@ -65,7 +68,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
}
}
ThreadManager::ThreadManager(Core::System& system_) : system{system_} {}
ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
: system{system_}, is_async{is_async_} {}
ThreadManager::~ThreadManager() {
if (!thread.joinable()) {
@ -97,19 +101,30 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
}
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
if (!Settings::IsGPULevelHigh()) {
if (!is_async) {
// Always flush with synchronous GPU mode
PushCommand(FlushRegionCommand(addr, size));
return;
}
if (!Settings::IsGPULevelExtreme()) {
return;
}
if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
// Asynchronous GPU mode
switch (Settings::values.gpu_accuracy.GetValue()) {
case Settings::GPUAccuracy::Normal:
PushCommand(FlushRegionCommand(addr, size));
break;
case Settings::GPUAccuracy::High:
// TODO(bunnei): Is this right? Preserving existing behavior for now
break;
case Settings::GPUAccuracy::Extreme: {
auto& gpu = system.GPU();
u64 fence = gpu.RequestFlush(addr, size);
PushCommand(GPUTickCommand());
while (fence > gpu.CurrentFlushRequestFence()) {
}
break;
}
default:
UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue());
}
}
@ -123,7 +138,8 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
}
void ThreadManager::WaitIdle() const {
while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) {
while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) &&
system.IsPoweredOn()) {
}
}
@ -134,6 +150,12 @@ void ThreadManager::OnCommandListEnd() {
u64 ThreadManager::PushCommand(CommandData&& command_data) {
const u64 fence{++state.last_fence};
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
if (!is_async) {
// In synchronous GPU mode, block the caller until the command has executed
WaitIdle();
}
return fence;
}

View file

@ -10,8 +10,9 @@
#include <optional>
#include <thread>
#include <variant>
#include "common/threadsafe_queue.h"
#include "video_core/gpu.h"
#include "video_core/framebuffer_config.h"
namespace Tegra {
struct FramebufferConfig;
@ -25,6 +26,10 @@ class GraphicsContext;
class System;
} // namespace Core
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
namespace VideoCommon::GPUThread {
/// Command to signal to the GPU thread that processing has ended
@ -112,7 +117,7 @@ struct SynchState final {
/// Class used to manage the GPU thread
class ThreadManager final {
public:
explicit ThreadManager(Core::System& system_);
explicit ThreadManager(Core::System& system_, bool is_async_);
~ThreadManager();
/// Creates and starts the GPU thread.
@ -150,6 +155,7 @@ private:
Core::System& system;
std::thread thread;
std::thread::id thread_id;
const bool is_async;
};
} // namespace VideoCommon::GPUThread

View file

@ -7,8 +7,6 @@
#include "common/logging/log.h"
#include "core/core.h"
#include "core/settings.h"
#include "video_core/gpu_asynch.h"
#include "video_core/gpu_synch.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
@ -39,13 +37,9 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
namespace VideoCore {
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
std::unique_ptr<Tegra::GPU> gpu;
const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
gpu = std::make_unique<VideoCommon::GPUAsynch>(system, use_nvdec);
} else {
gpu = std::make_unique<VideoCommon::GPUSynch>(system, use_nvdec);
}
std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>(
system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec);
auto context = emu_window.CreateSharedContext();
const auto scope = context->Acquire();