texture_cache: Fix linear image uploads

* Also fixed build for clang-cl with libc
2025-05-30 23:33:17 +00:00 · 2024-04-29 15:16:42 +03:00 · 2024-04-29 15:16:42 +03:00 · 25c04ad42f
commit 25c04ad42f
parent 7d96308759
13 changed files with 511 additions and 363 deletions
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@ -166,37 +166,34 @@ Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGr
    Frame* frame = GetRenderFrame();

    // Post-processing (Anti-aliasing, FSR etc) goes here. For now just blit to the frame image.
-    scheduler.Record([frame, vk_image = vk::Image(image.image),
-                      size = image.info.size](vk::CommandBuffer cmdbuf) {
-        const vk::ImageMemoryBarrier pre_barrier{
-            .srcAccessMask = vk::AccessFlagBits::eTransferRead,
-            .dstAccessMask = vk::AccessFlagBits::eTransferWrite,
-            .oldLayout = vk::ImageLayout::eUndefined,
-            .newLayout = vk::ImageLayout::eGeneral,
-            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .image = frame->image,
-            .subresourceRange{
-                .aspectMask = vk::ImageAspectFlagBits::eColor,
-                .baseMipLevel = 0,
-                .levelCount = 1,
-                .baseArrayLayer = 0,
-                .layerCount = VK_REMAINING_ARRAY_LAYERS,
-            },
-        };
+    const vk::ImageMemoryBarrier pre_barrier{
+        .srcAccessMask = vk::AccessFlagBits::eTransferRead,
+        .dstAccessMask = vk::AccessFlagBits::eTransferWrite,
+        .oldLayout = vk::ImageLayout::eUndefined,
+        .newLayout = vk::ImageLayout::eGeneral,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .image = frame->image,
+        .subresourceRange{
+            .aspectMask = vk::ImageAspectFlagBits::eColor,
+            .baseMipLevel = 0,
+            .levelCount = 1,
+            .baseArrayLayer = 0,
+            .layerCount = VK_REMAINING_ARRAY_LAYERS,
+        },
+    };

-        cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
-                               vk::PipelineStageFlagBits::eTransfer,
-                               vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
-        cmdbuf.blitImage(vk_image, vk::ImageLayout::eGeneral, frame->image,
-                         vk::ImageLayout::eGeneral,
-                         MakeImageBlit(size.width, size.height, frame->width, frame->height),
-                         vk::Filter::eLinear);
-    });
+    const auto cmdbuf = scheduler.CommandBuffer();
+    cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
+                           vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
+                           {}, {}, pre_barrier);
+    cmdbuf.blitImage(
+        image.image, vk::ImageLayout::eGeneral, frame->image, vk::ImageLayout::eGeneral,
+        MakeImageBlit(image.info.size.width, image.info.size.height, frame->width, frame->height),
+        vk::Filter::eLinear);

    // Flush pending vulkan operations.
    scheduler.Flush(frame->render_ready);
-    scheduler.WaitWorker();
    return frame;
 }

--- a/src/video_core/renderer_vulkan/vk_instance.cpp
+++ b/src/video_core/renderer_vulkan/vk_instance.cpp
@ -129,7 +129,8 @@ bool Instance::CreateDevice() {
    shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
    external_memory_host = add_extension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
    tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME);
-    add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
+    custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
+    index_type_uint8 = add_extension(VK_KHR_INDEX_TYPE_UINT8_EXTENSION_NAME);

    const auto family_properties = physical_device.getQueueFamilyProperties();
    if (family_properties.empty()) {
@ -176,16 +177,9 @@ bool Instance::CreateDevice() {
                .shaderClipDistance = features.shaderClipDistance,
            },
        },
-        vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR{
+        vk::PhysicalDeviceVulkan12Features{
            .timelineSemaphore = true,
        },
-        vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT{
-            .extendedDynamicState = true,
-        },
-        vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT{
-            .extendedDynamicState2 = true,
-            .extendedDynamicState2LogicOp = true,
-        },
        vk::PhysicalDeviceCustomBorderColorFeaturesEXT{
            .customBorderColors = true,
            .customBorderColorWithoutFormat = true,
@ -195,6 +189,10 @@ bool Instance::CreateDevice() {
        },
    };

+    if (!index_type_uint8) {
+        device_chain.unlink<vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>();
+    }
+
    try {
        device = physical_device.createDeviceUnique(device_chain.get());
    } catch (vk::ExtensionNotPresentError& err) {
--- a/src/video_core/renderer_vulkan/vk_platform.cpp
+++ b/src/video_core/renderer_vulkan/vk_platform.cpp
@ -4,7 +4,7 @@
 // Include the vulkan platform specific header
 #if defined(ANDROID)
 #define VK_USE_PLATFORM_ANDROID_KHR
-#elif defined(WIN32)
+#elif defined(_WIN64)
 #define VK_USE_PLATFORM_WIN32_KHR
 #elif defined(__APPLE__)
 #define VK_USE_PLATFORM_METAL_EXT
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@ -2,35 +2,14 @@
 // SPDX-License-Identifier: GPL-2.0-or-later

 #include <mutex>
-#include <utility>
-#include "common/thread.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"

 namespace Vulkan {

-void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
-    auto command = first;
-    while (command != nullptr) {
-        auto next = command->GetNext();
-        command->Execute(cmdbuf);
-        command->~Command();
-        command = next;
-    }
-    submit = false;
-    command_offset = 0;
-    first = nullptr;
-    last = nullptr;
-}
-
 Scheduler::Scheduler(const Instance& instance)
-    : master_semaphore{instance}, command_pool{instance, &master_semaphore}, use_worker_thread{
-                                                                                 true} {
+    : master_semaphore{instance}, command_pool{instance, &master_semaphore} {
    AllocateWorkerCommandBuffers();
-    if (use_worker_thread) {
-        AcquireNewChunk();
-        worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); });
-    }
 }

 Scheduler::~Scheduler() = default;
@ -47,24 +26,6 @@ void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) {
    Wait(presubmit_tick);
 }

-void Scheduler::WaitWorker() {
-    if (!use_worker_thread) {
-        return;
-    }
-
-    DispatchWork();
-
-    // Ensure the queue is drained.
-    {
-        std::unique_lock ql{queue_mutex};
-        event_cv.wait(ql, [this] { return work_queue.empty(); });
-    }
-
-    // Now wait for execution to finish.
-    // This needs to be done in the same order as WorkerThread.
-    std::scoped_lock el{execution_mutex};
-}
-
 void Scheduler::Wait(u64 tick) {
    if (tick >= master_semaphore.CurrentTick()) {
        // Make sure we are not waiting for the current tick without signalling
@ -73,73 +34,6 @@ void Scheduler::Wait(u64 tick) {
    master_semaphore.Wait(tick);
 }

-void Scheduler::DispatchWork() {
-    if (!use_worker_thread || chunk->Empty()) {
-        return;
-    }
-
-    {
-        std::scoped_lock ql{queue_mutex};
-        work_queue.push(std::move(chunk));
-    }
-
-    event_cv.notify_all();
-    AcquireNewChunk();
-}
-
-void Scheduler::WorkerThread(std::stop_token stop_token) {
-    Common::SetCurrentThreadName("VulkanWorker");
-
-    const auto TryPopQueue{[this](auto& work) -> bool {
-        if (work_queue.empty()) {
-            return false;
-        }
-
-        work = std::move(work_queue.front());
-        work_queue.pop();
-        event_cv.notify_all();
-        return true;
-    }};
-
-    while (!stop_token.stop_requested()) {
-        std::unique_ptr<CommandChunk> work;
-
-        {
-            std::unique_lock lk{queue_mutex};
-
-            // Wait for work.
-            event_cv.wait(lk, stop_token, [&] { return TryPopQueue(work); });
-
-            // If we've been asked to stop, we're done.
-            if (stop_token.stop_requested()) {
-                return;
-            }
-
-            // Exchange lock ownership so that we take the execution lock before
-            // the queue lock goes out of scope. This allows us to force execution
-            // to complete in the next step.
-            std::exchange(lk, std::unique_lock{execution_mutex});
-
-            // Perform the work, tracking whether the chunk was a submission
-            // before executing.
-            const bool has_submit = work->HasSubmit();
-            work->ExecuteAll(current_cmdbuf);
-
-            // If the chunk was a submission, reallocate the command buffer.
-            if (has_submit) {
-                AllocateWorkerCommandBuffers();
-            }
-        }
-
-        {
-            std::scoped_lock rl{reserve_mutex};
-
-            // Recycle the chunk back to the reserve.
-            chunk_reserve.emplace_back(std::move(work));
-        }
-    }
-}
-
 void Scheduler::AllocateWorkerCommandBuffers() {
    const vk::CommandBufferBeginInfo begin_info = {
        .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
@ -152,30 +46,10 @@ void Scheduler::AllocateWorkerCommandBuffers() {
 void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
    const u64 signal_value = master_semaphore.NextTick();

-    Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
-        std::scoped_lock lock{submit_mutex};
-        master_semaphore.SubmitWork(cmdbuf, wait_semaphore, signal_semaphore, signal_value);
-    });
-
+    std::scoped_lock lk{submit_mutex};
+    master_semaphore.SubmitWork(current_cmdbuf, wait_semaphore, signal_semaphore, signal_value);
    master_semaphore.Refresh();
-
-    if (!use_worker_thread) {
-        AllocateWorkerCommandBuffers();
-    } else {
-        chunk->MarkSubmit();
-        DispatchWork();
-    }
-}
-
-void Scheduler::AcquireNewChunk() {
-    std::scoped_lock lock{reserve_mutex};
-    if (chunk_reserve.empty()) {
-        chunk = std::make_unique<CommandChunk>();
-        return;
-    }
-
-    chunk = std::move(chunk_reserve.back());
-    chunk_reserve.pop_back();
+    AllocateWorkerCommandBuffers();
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@ -4,13 +4,6 @@
 #pragma once

 #include <condition_variable>
-#include <functional>
-#include <memory>
-#include <thread>
-#include <utility>
-#include <queue>
-
-#include "common/alignment.h"
 #include "common/types.h"
 #include "video_core/renderer_vulkan/vk_master_semaphore.h"
 #include "video_core/renderer_vulkan/vk_resource_pool.h"
@ -19,8 +12,6 @@ namespace Vulkan {

 class Instance;

-/// The scheduler abstracts command buffer and fence management with an interface that's able to do
-/// OpenGL-like operations on Vulkan command buffers.
 class Scheduler {
 public:
    explicit Scheduler(const Instance& instance);
@ -32,34 +23,12 @@ public:
    /// Sends the current execution context to the GPU and waits for it to complete.
    void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr);

-    /// Waits for the worker thread to finish executing everything. After this function returns it's
-    /// safe to touch worker resources.
-    void WaitWorker();
-
    /// Waits for the given tick to trigger on the GPU.
    void Wait(u64 tick);

-    /// Sends currently recorded work to the worker thread.
-    void DispatchWork();
-
-    /// Records the command to the current chunk.
-    template <typename T>
-    void Record(T&& command) {
-        if (chunk->Record(command)) {
-            return;
-        }
-        DispatchWork();
-        (void)chunk->Record(command);
-    }
-
-    /// Registers a callback to perform on queue submission.
-    void RegisterOnSubmit(std::function<void()>&& func) {
-        on_submit = std::move(func);
-    }
-
-    /// Registers a callback to perform on queue submission.
-    void RegisterOnDispatch(std::function<void()>&& func) {
-        on_dispatch = std::move(func);
+    /// Returns the current command buffer.
+    vk::CommandBuffer CommandBuffer() const {
+        return current_cmdbuf;
    }

    /// Returns the current command buffer tick.
@ -80,113 +49,15 @@ public:
    std::mutex submit_mutex;

 private:
-    class Command {
-    public:
-        virtual ~Command() = default;
-
-        virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
-
-        Command* GetNext() const {
-            return next;
-        }
-
-        void SetNext(Command* next_) {
-            next = next_;
-        }
-
-    private:
-        Command* next = nullptr;
-    };
-
-    template <typename T>
-    class TypedCommand final : public Command {
-    public:
-        explicit TypedCommand(T&& command_) : command{std::move(command_)} {}
-        ~TypedCommand() override = default;
-
-        TypedCommand(TypedCommand&&) = delete;
-        TypedCommand& operator=(TypedCommand&&) = delete;
-
-        void Execute(vk::CommandBuffer cmdbuf) const override {
-            command(cmdbuf);
-        }
-
-    private:
-        T command;
-    };
-
-    class CommandChunk final {
-    public:
-        void ExecuteAll(vk::CommandBuffer cmdbuf);
-
-        template <typename T>
-        bool Record(T& command) {
-            using FuncType = TypedCommand<T>;
-            static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
-
-            recorded_counts++;
-            command_offset = Common::alignUp(command_offset, alignof(FuncType));
-            if (command_offset > sizeof(data) - sizeof(FuncType)) {
-                return false;
-            }
-            Command* const current_last = last;
-            last = new (data.data() + command_offset) FuncType(std::move(command));
-
-            if (current_last) {
-                current_last->SetNext(last);
-            } else {
-                first = last;
-            }
-            command_offset += sizeof(FuncType);
-            return true;
-        }
-
-        void MarkSubmit() {
-            submit = true;
-        }
-
-        bool Empty() const {
-            return recorded_counts == 0;
-        }
-
-        bool HasSubmit() const {
-            return submit;
-        }
-
-    private:
-        Command* first = nullptr;
-        Command* last = nullptr;
-
-        std::size_t recorded_counts = 0;
-        std::size_t command_offset = 0;
-        bool submit = false;
-        alignas(std::max_align_t) std::array<u8, 0x8000> data{};
-    };
-
-private:
-    void WorkerThread(std::stop_token stop_token);
-
    void AllocateWorkerCommandBuffers();

    void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore);

-    void AcquireNewChunk();
-
 private:
    MasterSemaphore master_semaphore;
    CommandPool command_pool;
-    std::unique_ptr<CommandChunk> chunk;
-    std::queue<std::unique_ptr<CommandChunk>> work_queue;
-    std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
    vk::CommandBuffer current_cmdbuf;
-    std::function<void()> on_submit;
-    std::function<void()> on_dispatch;
-    std::mutex execution_mutex;
-    std::mutex reserve_mutex;
-    std::mutex queue_mutex;
    std::condition_variable_any event_cv;
-    std::jthread worker_thread;
-    bool use_worker_thread;
 };

 } // namespace Vulkan
--- a/src/video_core/texture_cache/image.cpp
+++ b/src/video_core/texture_cache/image.cpp
@ -51,6 +51,18 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
    size.width = attrib.width;
    size.height = attrib.height;
    pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) >> 7;
+    const bool is_32bpp = pixel_format == vk::Format::eB8G8R8A8Srgb ||
+                          pixel_format == vk::Format::eA8B8G8R8SrgbPack32;
+    ASSERT(is_32bpp);
+    if (!is_tiled) {
+        guest_size_bytes = pitch * size.height * 4;
+        return;
+    }
+    if (Config::isNeoMode()) {
+        guest_size_bytes = pitch * 128 * ((size.height + 127) & (~127)) * 4;
+    } else {
+        guest_size_bytes = pitch * 128 * ((size.height + 63) & (~63)) * 4;
+    }
 }

 UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_)
@ -83,8 +95,9 @@ void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) {

 Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
             const ImageInfo& info_, VAddr cpu_addr)
-    : instance{&instance_}, scheduler{&scheduler_}, info{info_},
-      image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr} {
+    : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(),
+                                                                       instance->GetAllocator()},
+      cpu_addr{cpu_addr}, cpu_addr_end{cpu_addr + info.guest_size_bytes} {
    vk::ImageCreateFlags flags{};
    if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 &&
        info.size.width == info.size.height) {
@ -111,39 +124,27 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,

    image.Create(image_ci);

-    const vk::Image handle = image;
-    scheduler->Record([handle](vk::CommandBuffer cmdbuf) {
-        const vk::ImageMemoryBarrier init_barrier = {
-            .srcAccessMask = vk::AccessFlagBits::eNone,
-            .dstAccessMask = vk::AccessFlagBits::eNone,
-            .oldLayout = vk::ImageLayout::eUndefined,
-            .newLayout = vk::ImageLayout::eGeneral,
-            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .image = handle,
-            .subresourceRange{
-                .aspectMask = vk::ImageAspectFlagBits::eColor,
-                .baseMipLevel = 0,
-                .levelCount = VK_REMAINING_MIP_LEVELS,
-                .baseArrayLayer = 0,
-                .layerCount = VK_REMAINING_ARRAY_LAYERS,
-            },
-        };
-        cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe,
-                               vk::PipelineStageFlagBits::eTopOfPipe,
-                               vk::DependencyFlagBits::eByRegion, {}, {}, init_barrier);
-    });
+    const vk::ImageMemoryBarrier init_barrier = {
+        .srcAccessMask = vk::AccessFlagBits::eNone,
+        .dstAccessMask = vk::AccessFlagBits::eNone,
+        .oldLayout = vk::ImageLayout::eUndefined,
+        .newLayout = vk::ImageLayout::eGeneral,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .image = image,
+        .subresourceRange{
+            .aspectMask = vk::ImageAspectFlagBits::eColor,
+            .baseMipLevel = 0,
+            .levelCount = VK_REMAINING_MIP_LEVELS,
+            .baseArrayLayer = 0,
+            .layerCount = VK_REMAINING_ARRAY_LAYERS,
+        },
+    };

-    const bool is_32bpp = info.pixel_format == vk::Format::eB8G8R8A8Srgb ||
-                          info.pixel_format == vk::Format::eA8B8G8R8SrgbPack32;
-    ASSERT(info.is_tiled && is_32bpp);
-
-    if (Config::isNeoMode()) {
-        guest_size_bytes = info.pitch * 128 * ((info.size.height + 127) & (~127)) * 4;
-    } else {
-        guest_size_bytes = info.pitch * 128 * ((info.size.height + 63) & (~63)) * 4;
-    }
-    cpu_addr_end = cpu_addr + guest_size_bytes;
+    const auto cmdbuf = scheduler->CommandBuffer();
+    cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe,
+                           vk::PipelineStageFlagBits::eTopOfPipe, vk::DependencyFlagBits::eByRegion,
+                           {}, {}, init_barrier);
 }

 Image::~Image() = default;
--- a/src/video_core/texture_cache/image.h
+++ b/src/video_core/texture_cache/image.h
@ -38,7 +38,8 @@ struct ImageInfo {
    vk::ImageType type = vk::ImageType::e1D;
    SubresourceExtent resources;
    Extent3D size{1, 1, 1};
-    u32 pitch;
+    u32 pitch = 0;
+    u32 guest_size_bytes = 0;
 };

 struct Handle {
@ -105,12 +106,9 @@ struct Image {
    ImageInfo info;
    UniqueImage image;
    vk::ImageAspectFlags aspect_mask;
-    u32 guest_size_bytes = 0;
-    size_t channel = 0;
    ImageFlagBits flags = ImageFlagBits::CpuModified;
    VAddr cpu_addr = 0;
    VAddr cpu_addr_end = 0;
-    u64 modification_tick = 0;
 };

 } // namespace VideoCore
--- a/src/video_core/texture_cache/texture_cache.cpp
+++ b/src/video_core/texture_cache/texture_cache.cpp
@ -132,10 +132,15 @@ void TextureCache::RefreshImage(Image& image) {
    image.flags &= ~ImageFlagBits::CpuModified;

    // Upload data to the staging buffer.
-    const auto [data, offset, _] = staging.Map(image.guest_size_bytes, 0);
-    ConvertTileToLinear(data, reinterpret_cast<const u8*>(image.cpu_addr), image.info.size.width,
-                        image.info.size.height, Config::isNeoMode());
-    staging.Commit(image.guest_size_bytes);
+    const auto [data, offset, _] = staging.Map(image.info.guest_size_bytes, 0);
+    const u8* image_data = reinterpret_cast<const u8*>(image.cpu_addr);
+    if (image.info.is_tiled) {
+        ConvertTileToLinear(data, image_data, image.info.size.width, image.info.size.height,
+                            Config::isNeoMode());
+    } else {
+        std::memcpy(data, image_data, image.info.guest_size_bytes);
+    }
+    staging.Commit(image.info.guest_size_bytes);

    // Copy to the image.
    const vk::BufferImageCopy image_copy = {
@ -152,11 +157,43 @@ void TextureCache::RefreshImage(Image& image) {
        .imageExtent = {image.info.size.width, image.info.size.height, 1},
    };

-    const vk::Buffer src_buffer = staging.Handle();
-    const vk::Image dst_image = image.image;
-    scheduler.Record([src_buffer, dst_image, image_copy](vk::CommandBuffer cmdbuf) {
-        cmdbuf.copyBufferToImage(src_buffer, dst_image, vk::ImageLayout::eGeneral, image_copy);
-    });
+    const auto cmdbuf = scheduler.CommandBuffer();
+    const vk::ImageSubresourceRange range = {
+        .aspectMask = vk::ImageAspectFlagBits::eColor,
+        .baseMipLevel = 0,
+        .levelCount = 1,
+        .baseArrayLayer = 0,
+        .layerCount = VK_REMAINING_ARRAY_LAYERS,
+    };
+    const vk::ImageMemoryBarrier read_barrier = {
+        .srcAccessMask = vk::AccessFlagBits::eShaderRead,
+        .dstAccessMask = vk::AccessFlagBits::eTransferWrite,
+        .oldLayout = vk::ImageLayout::eGeneral,
+        .newLayout = vk::ImageLayout::eTransferDstOptimal,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .image = image.image,
+        .subresourceRange = range,
+    };
+    const vk::ImageMemoryBarrier write_barrier = {
+        .srcAccessMask = vk::AccessFlagBits::eTransferWrite,
+        .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead,
+        .oldLayout = vk::ImageLayout::eTransferDstOptimal,
+        .newLayout = vk::ImageLayout::eGeneral,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .image = image.image,
+        .subresourceRange = range,
+    };
+
+    cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllGraphics,
+                           vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
+                           {}, {}, read_barrier);
+    cmdbuf.copyBufferToImage(staging.Handle(), image.image, vk::ImageLayout::eTransferDstOptimal,
+                             image_copy);
+    cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
+                           vk::PipelineStageFlagBits::eAllGraphics,
+                           vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier);
 }

 void TextureCache::RegisterImage(ImageId image_id) {
@ -164,7 +201,7 @@ void TextureCache::RegisterImage(ImageId image_id) {
    ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
               "Trying to register an already registered image");
    image.flags |= ImageFlagBits::Registered;
-    ForEachPage(image.cpu_addr, image.guest_size_bytes,
+    ForEachPage(image.cpu_addr, image.info.guest_size_bytes,
                [this, image_id](u64 page) { page_table[page].push_back(image_id); });
 }

@ -173,7 +210,7 @@ void TextureCache::UnregisterImage(ImageId image_id) {
    ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
               "Trying to unregister an already registered image");
    image.flags &= ~ImageFlagBits::Registered;
-    ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
+    ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) {
        const auto page_it = page_table.find(page);
        if (page_it == page_table.end()) {
            ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageBits);
@ -195,7 +232,7 @@ void TextureCache::TrackImage(Image& image, ImageId image_id) {
        return;
    }
    image.flags |= ImageFlagBits::Tracked;
-    UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
+    UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, 1);
 }

 void TextureCache::UntrackImage(Image& image, ImageId image_id) {
@ -203,7 +240,7 @@ void TextureCache::UntrackImage(Image& image, ImageId image_id) {
        return;
    }
    image.flags &= ~ImageFlagBits::Tracked;
-    UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
+    UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, -1);
 }

 void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {