video_core: CPU flip relay (#415)

* video_core: cpu flip is propagated via gpu thread now * tentative fix for cpu flips racing * libraries: videoout: better flip status handling
2025-07-02 23:26:20 +00:00 · 2024-08-14 11:36:11 +02:00 · 2024-08-14 11:36:11 +02:00 · 27cb218584
commit 27cb218584
parent ad3b6c793c
8 changed files with 98 additions and 33 deletions
--- a/src/video_core/amdgpu/liverpool.cpp
+++ b/src/video_core/amdgpu/liverpool.cpp
@ -35,7 +35,7 @@ void Liverpool::Process(std::stop_token stoken) {
        {
            std::unique_lock lk{submit_mutex};
            Common::CondvarWait(submit_cv, lk, stoken,
-                                [this] { return num_submits != 0 || submit_done; });
+                                [this] { return num_commands || num_submits || submit_done; });
        }
        if (stoken.stop_requested()) {
            break;
@ -45,7 +45,23 @@ void Liverpool::Process(std::stop_token stoken) {

        int qid = -1;

-        while (num_submits) {
+        while (num_submits || num_commands) {
+
+            // Process incoming commands with high priority
+            while (num_commands) {
+
+                Common::UniqueFunction<void> callback{};
+                {
+                    std::unique_lock lk{submit_mutex};
+                    callback = std::move(command_queue.back());
+                    command_queue.pop();
+                }
+
+                callback();
+
+                --num_commands;
+            }
+
            qid = (qid + 1) % NumTotalQueues;

            auto& queue = mapped_queues[qid];
@ -219,7 +235,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
            // In the case of HW, render target memory has alignment as color block operates on
            // tiles. There is no information of actual resource extents stored in CB context
            // regs, so any deduction of it from slices/pitch will lead to a larger surface created.
-            // The same applies to the depth targets. Fortunatelly, the guest always sends
+            // The same applies to the depth targets. Fortunately, the guest always sends
            // a trailing NOP packet right after the context regs setup, so we can use the heuristic
            // below and extract the hint to determine actual resource dims.

--- a/src/video_core/amdgpu/liverpool.h
+++ b/src/video_core/amdgpu/liverpool.h
@ -11,10 +11,12 @@
 #include <span>
 #include <thread>
 #include <queue>
+
 #include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/polyfill_thread.h"
 #include "common/types.h"
+#include "common/unique_function.h"
 #include "video_core/amdgpu/pixel_format.h"
 #include "video_core/amdgpu/resource.h"

@ -1054,6 +1056,13 @@ public:
        rasterizer = rasterizer_;
    }

+    void SendCommand(Common::UniqueFunction<void>&& func) {
+        std::scoped_lock lk{submit_mutex};
+        command_queue.emplace(std::move(func));
+        ++num_commands;
+        submit_cv.notify_one();
+    }
+
 private:
    struct Task {
        struct promise_type {
@ -1122,9 +1131,11 @@ private:
    Libraries::VideoOut::VideoOutPort* vo_port{};
    std::jthread process_thread{};
    std::atomic<u32> num_submits{};
+    std::atomic<u32> num_commands{};
    std::atomic<bool> submit_done{};
    std::mutex submit_mutex;
    std::condition_variable_any submit_cv;
+    std::queue<Common::UniqueFunction<void>> command_queue{};
 };

 static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@ -48,13 +48,14 @@ public:
                        VAddr cpu_address, bool is_eop) {
        const auto info = VideoCore::ImageInfo{attribute, cpu_address};
        const auto image_id = texture_cache.FindImage(info);
+        texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler);
        auto& image = texture_cache.GetImage(image_id);
        return PrepareFrameInternal(image, is_eop);
    }

-    Frame* PrepareBlankFrame() {
+    Frame* PrepareBlankFrame(bool is_eop) {
        auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID);
-        return PrepareFrameInternal(image, true);
+        return PrepareFrameInternal(image, is_eop);
    }

    VideoCore::Image& RegisterVideoOutSurface(
@ -75,6 +76,11 @@ public:
    void Present(Frame* frame);
    void RecreateFrame(Frame* frame, u32 width, u32 height);

+    void FlushDraw() {
+        SubmitInfo info{};
+        draw_scheduler.Flush(info);
+    }
+
 private:
    Frame* PrepareFrameInternal(VideoCore::Image& image, bool is_eop = true);
    Frame* GetRenderFrame();
--- a/src/video_core/texture_cache/texture_cache.cpp
+++ b/src/video_core/texture_cache/texture_cache.cpp
@ -223,7 +223,7 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
    return RegisterImageView(image_id, view_info);
 }

-void TextureCache::RefreshImage(Image& image) {
+void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) {
    // Mark image as validated.
    image.flags &= ~ImageFlagBits::CpuModified;

@ -269,8 +269,10 @@ void TextureCache::RefreshImage(Image& image) {
        return;
    }

-    scheduler.EndRendering();
-    const auto cmdbuf = scheduler.CommandBuffer();
+    auto* sched_ptr = custom_scheduler ? custom_scheduler : &scheduler;
+    sched_ptr->EndRendering();
+
+    const auto cmdbuf = sched_ptr->CommandBuffer();
    image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf);

    const VAddr image_addr = image.info.guest_address;
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@ -59,17 +59,17 @@ public:
                                             const ImageViewInfo& view_info);

    /// Updates image contents if it was modified by CPU.
-    void UpdateImage(ImageId image_id) {
+    void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) {
        Image& image = slot_images[image_id];
        if (False(image.flags & ImageFlagBits::CpuModified)) {
            return;
        }
-        RefreshImage(image);
+        RefreshImage(image, custom_scheduler);
        TrackImage(image, image_id);
    }

    /// Reuploads image contents.
-    void RefreshImage(Image& image);
+    void RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler = nullptr);

    /// Retrieves the sampler that matches the provided S# descriptor.
    [[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler);