Merge pull request #2912 from FernandoS27/async-fixes
General fixes to Async GPU
This commit is contained in:
commit
ef9b31783d
16 changed files with 67 additions and 52 deletions
|
@ -3,6 +3,7 @@
|
|||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/memory.h"
|
||||
|
@ -17,6 +18,8 @@
|
|||
|
||||
namespace Tegra {
|
||||
|
||||
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
|
||||
|
||||
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
|
||||
: system{system}, renderer{renderer}, is_async{is_async} {
|
||||
auto& rasterizer{renderer.Rasterizer()};
|
||||
|
@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const {
|
|||
return *dma_pusher;
|
||||
}
|
||||
|
||||
void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
|
||||
// Synced GPU, is always in sync
|
||||
if (!is_async) {
|
||||
return;
|
||||
}
|
||||
MICROPROFILE_SCOPE(GPU_wait);
|
||||
while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
|
||||
}
|
||||
}
|
||||
|
||||
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
|
||||
syncpoints[syncpoint_id]++;
|
||||
std::lock_guard lock{sync_mutex};
|
||||
|
|
|
@ -177,6 +177,12 @@ public:
|
|||
/// Returns a reference to the GPU DMA pusher.
|
||||
Tegra::DmaPusher& DmaPusher();
|
||||
|
||||
// Waits for the GPU to finish working
|
||||
virtual void WaitIdle() const = 0;
|
||||
|
||||
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
|
||||
void WaitFence(u32 syncpoint_id, u32 value) const;
|
||||
|
||||
void IncrementSyncPoint(u32 syncpoint_id);
|
||||
|
||||
u32 GetSyncpointValue(u32 syncpoint_id) const;
|
||||
|
|
|
@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con
|
|||
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
|
||||
}
|
||||
|
||||
void GPUAsynch::WaitIdle() const {
|
||||
gpu_thread.WaitIdle();
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
|
|
@ -25,6 +25,7 @@ public:
|
|||
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void WaitIdle() const override;
|
||||
|
||||
protected:
|
||||
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
|
||||
|
|
|
@ -24,6 +24,7 @@ public:
|
|||
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void WaitIdle() const override {}
|
||||
|
||||
protected:
|
||||
void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
|
||||
|
|
|
@ -5,8 +5,6 @@
|
|||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/core_timing_util.h"
|
||||
#include "core/frontend/scope_acquire_window_context.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() {
|
|||
|
||||
void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) {
|
||||
thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)};
|
||||
synchronization_event = system.CoreTiming().RegisterEvent(
|
||||
"GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
|
||||
}
|
||||
|
||||
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
|
||||
const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))};
|
||||
const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})};
|
||||
system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
|
||||
PushCommand(SubmitListCommand(std::move(entries)));
|
||||
}
|
||||
|
||||
void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
|
@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
|||
InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void ThreadManager::WaitIdle() const {
|
||||
while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) {
|
||||
}
|
||||
}
|
||||
|
||||
u64 ThreadManager::PushCommand(CommandData&& command_data) {
|
||||
const u64 fence{++state.last_fence};
|
||||
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
|
||||
return fence;
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
|
||||
void SynchState::WaitForSynchronization(u64 fence) {
|
||||
while (signaled_fence.load() < fence)
|
||||
;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::GPUThread
|
||||
|
|
|
@ -21,9 +21,6 @@ class DmaPusher;
|
|||
|
||||
namespace Core {
|
||||
class System;
|
||||
namespace Timing {
|
||||
struct EventType;
|
||||
} // namespace Timing
|
||||
} // namespace Core
|
||||
|
||||
namespace VideoCommon::GPUThread {
|
||||
|
@ -89,8 +86,6 @@ struct CommandDataContainer {
|
|||
struct SynchState final {
|
||||
std::atomic_bool is_running{true};
|
||||
|
||||
void WaitForSynchronization(u64 fence);
|
||||
|
||||
using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
|
||||
CommandQueue queue;
|
||||
u64 last_fence{};
|
||||
|
@ -121,6 +116,9 @@ public:
|
|||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
|
||||
|
||||
// Wait until the gpu thread is idle.
|
||||
void WaitIdle() const;
|
||||
|
||||
private:
|
||||
/// Pushes a command to be executed by the GPU thread
|
||||
u64 PushCommand(CommandData&& command_data);
|
||||
|
@ -128,7 +126,6 @@ private:
|
|||
private:
|
||||
SynchState state;
|
||||
Core::System& system;
|
||||
Core::Timing::EventType* synchronization_event{};
|
||||
std::thread thread;
|
||||
std::thread::id thread_id;
|
||||
};
|
||||
|
|
|
@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
|
|||
}
|
||||
|
||||
void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
|
||||
std::lock_guard lock{pages_mutex};
|
||||
const u64 page_start{addr >> Memory::PAGE_BITS};
|
||||
const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include <cstddef>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
@ -230,6 +231,8 @@ private:
|
|||
|
||||
using CachedPageMap = boost::icl::interval_map<u64, int>;
|
||||
CachedPageMap cached_pages;
|
||||
|
||||
std::mutex pages_mutex;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
|
|||
RendererOpenGL::~RendererOpenGL() = default;
|
||||
|
||||
void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
system.GetPerfStats().EndSystemFrame();
|
||||
|
||||
// Maintain the rasterizer's state as a priority
|
||||
OpenGLState prev_state = OpenGLState::GetCurState();
|
||||
state.AllDirty();
|
||||
|
@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
|||
|
||||
render_window.PollEvents();
|
||||
|
||||
system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
|
||||
system.GetPerfStats().BeginSystemFrame();
|
||||
|
||||
// Restore the rasterizer state
|
||||
prev_state.AllDirty();
|
||||
prev_state.Apply();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue