texture_cache: Implement color<->depth copies (#3079)

* texture_cache: Implement color to depth copies and vise versa

* ir_passes: Adjust shared memory barrier pass to cover more cases

* texture_cache: Remove unused code

* review comment
This commit is contained in:
TheTurtle 2025-06-11 11:34:37 +03:00 committed by GitHub
parent fc4fd0107d
commit dedf6de2ac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 157 additions and 54 deletions

View file

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <unordered_set>
#include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/breadth_first_search.h"
#include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/program.h"
@ -51,11 +52,14 @@ static void EmitBarrierInBlock(IR::Block* block) {
} }
} }
using NodeSet = std::unordered_set<const IR::Block*>;
// Inserts a barrier after divergent conditional blocks to avoid undefined // Inserts a barrier after divergent conditional blocks to avoid undefined
// behavior when some threads write and others read from shared memory. // behavior when some threads write and others read from shared memory.
static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) { static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data,
NodeSet& divergence_end, u32& divergence_depth) {
const IR::U1 cond = data.if_node.cond; const IR::U1 cond = data.if_node.cond;
const auto insert_barrier = const auto is_divergent_cond =
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> { IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 && if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) { inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
@ -63,12 +67,16 @@ static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) {
} }
return std::nullopt; return std::nullopt;
}); });
if (insert_barrier) { if (is_divergent_cond) {
if (divergence_depth == 0) {
IR::Block* const merge = data.if_node.merge; IR::Block* const merge = data.if_node.merge;
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi); auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
IR::IREmitter ir{*merge, insert_point}; IR::IREmitter ir{*merge, insert_point};
ir.Barrier(); ir.Barrier();
} }
++divergence_depth;
divergence_end.emplace(data.if_node.merge);
}
} }
static constexpr u32 GcnSubgroupSize = 64; static constexpr u32 GcnSubgroupSize = 64;
@ -89,19 +97,22 @@ void SharedMemoryBarrierPass(IR::Program& program, const RuntimeInfo& runtime_in
return; return;
} }
using Type = IR::AbstractSyntaxNode::Type; using Type = IR::AbstractSyntaxNode::Type;
u32 branch_depth{}; u32 divergence_depth{};
NodeSet divergence_end;
for (const IR::AbstractSyntaxNode& node : program.syntax_list) { for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
if (node.type == Type::EndIf) { if (node.type == Type::EndIf) {
--branch_depth; if (divergence_end.contains(node.data.end_if.merge)) {
--divergence_depth;
}
continue; continue;
} }
// Check if branch depth is zero, we don't want to insert barrier in potentially divergent // Check if branch depth is zero, we don't want to insert barrier in potentially divergent
// code. // code.
if (node.type == Type::If && branch_depth++ == 0) { if (node.type == Type::If) {
EmitBarrierInMergeBlock(node.data); EmitBarrierInMergeBlock(node.data, divergence_end, divergence_depth);
continue; continue;
} }
if (node.type == Type::Block && branch_depth == 0) { if (node.type == Type::Block && divergence_depth == 0) {
EmitBarrierInBlock(node.data.block); EmitBarrierInBlock(node.data.block);
} }
} }

View file

@ -23,6 +23,7 @@ static constexpr size_t DataShareBufferSize = 64_KB;
static constexpr size_t StagingBufferSize = 512_MB; static constexpr size_t StagingBufferSize = 512_MB;
static constexpr size_t UboStreamBufferSize = 128_MB; static constexpr size_t UboStreamBufferSize = 128_MB;
static constexpr size_t DownloadBufferSize = 128_MB; static constexpr size_t DownloadBufferSize = 128_MB;
static constexpr size_t DeviceBufferSize = 16_MB;
static constexpr size_t MaxPageFaults = 1024; static constexpr size_t MaxPageFaults = 1024;
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
@ -32,7 +33,8 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
memory{Core::Memory::Instance()}, texture_cache{texture_cache_}, tracker{tracker_}, memory{Core::Memory::Instance()}, texture_cache{texture_cache_}, tracker{tracker_},
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
download_buffer(instance, scheduler, MemoryUsage::Download, DownloadBufferSize), download_buffer{instance, scheduler, MemoryUsage::Download, DownloadBufferSize},
device_buffer{instance, scheduler, MemoryUsage::DeviceLocal, DeviceBufferSize},
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize}, gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal, bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal,
0, AllFlags, BDA_PAGETABLE_SIZE}, 0, AllFlags, BDA_PAGETABLE_SIZE},
@ -348,7 +350,7 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
return {&buffer, buffer.Offset(device_addr)}; return {&buffer, buffer.Offset(device_addr)};
} }
std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size, bool prefer_gpu) { std::pair<Buffer*, u32> BufferCache::ObtainBufferForImage(VAddr gpu_addr, u32 size) {
// Check if any buffer contains the full requested range. // Check if any buffer contains the full requested range.
const u64 page = gpu_addr >> CACHING_PAGEBITS; const u64 page = gpu_addr >> CACHING_PAGEBITS;
const BufferId buffer_id = page_table[page].buffer_id; const BufferId buffer_id = page_table[page].buffer_id;
@ -361,10 +363,10 @@ std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size,
} }
// If no buffer contains the full requested range but some buffer within was GPU-modified, // If no buffer contains the full requested range but some buffer within was GPU-modified,
// fall back to ObtainBuffer to create a full buffer and avoid losing GPU modifications. // fall back to ObtainBuffer to create a full buffer and avoid losing GPU modifications.
// This is only done if the request prefers to use GPU memory, otherwise we can skip it. if (memory_tracker.IsRegionGpuModified(gpu_addr, size)) {
if (prefer_gpu && memory_tracker.IsRegionGpuModified(gpu_addr, size)) {
return ObtainBuffer(gpu_addr, size, false, false); return ObtainBuffer(gpu_addr, size, false, false);
} }
// In all other cases, just do a CPU copy to the staging buffer. // In all other cases, just do a CPU copy to the staging buffer.
const auto [data, offset] = staging_buffer.Map(size, 16); const auto [data, offset] = staging_buffer.Map(size, 16);
memory->CopySparseMemory(gpu_addr, data, size); memory->CopySparseMemory(gpu_addr, data, size);

View file

@ -80,11 +80,6 @@ public:
return &gds_buffer; return &gds_buffer;
} }
/// Retrieves the host visible device local stream buffer.
[[nodiscard]] StreamBuffer& GetStreamBuffer() noexcept {
return stream_buffer;
}
/// Retrieves the device local DBA page table buffer. /// Retrieves the device local DBA page table buffer.
[[nodiscard]] Buffer* GetBdaPageTableBuffer() noexcept { [[nodiscard]] Buffer* GetBdaPageTableBuffer() noexcept {
return &bda_pagetable_buffer; return &bda_pagetable_buffer;
@ -100,6 +95,20 @@ public:
return slot_buffers[id]; return slot_buffers[id];
} }
/// Retrieves a utility buffer optimized for specified memory usage.
StreamBuffer& GetUtilityBuffer(MemoryUsage usage) noexcept {
switch (usage) {
case MemoryUsage::Stream:
return stream_buffer;
case MemoryUsage::Download:
return download_buffer;
case MemoryUsage::Upload:
return staging_buffer;
case MemoryUsage::DeviceLocal:
return device_buffer;
}
}
/// Invalidates any buffer in the logical page range. /// Invalidates any buffer in the logical page range.
void InvalidateMemory(VAddr device_addr, u64 size, bool unmap); void InvalidateMemory(VAddr device_addr, u64 size, bool unmap);
@ -121,8 +130,7 @@ public:
BufferId buffer_id = {}); BufferId buffer_id = {});
/// Attempts to obtain a buffer without modifying the cache contents. /// Attempts to obtain a buffer without modifying the cache contents.
[[nodiscard]] std::pair<Buffer*, u32> ObtainViewBuffer(VAddr gpu_addr, u32 size, [[nodiscard]] std::pair<Buffer*, u32> ObtainBufferForImage(VAddr gpu_addr, u32 size);
bool prefer_gpu);
/// Return true when a region is registered on the cache /// Return true when a region is registered on the cache
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
@ -193,6 +201,7 @@ private:
StreamBuffer staging_buffer; StreamBuffer staging_buffer;
StreamBuffer stream_buffer; StreamBuffer stream_buffer;
StreamBuffer download_buffer; StreamBuffer download_buffer;
StreamBuffer device_buffer;
Buffer gds_buffer; Buffer gds_buffer;
Buffer bda_pagetable_buffer; Buffer bda_pagetable_buffer;
Buffer fault_buffer; Buffer fault_buffer;

View file

@ -549,7 +549,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
const auto* gds_buf = buffer_cache.GetGdsBuffer(); const auto* gds_buf = buffer_cache.GetGdsBuffer();
buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes()); buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes());
} else if (desc.buffer_type == Shader::BufferType::Flatbuf) { } else if (desc.buffer_type == Shader::BufferType::Flatbuf) {
auto& vk_buffer = buffer_cache.GetStreamBuffer(); auto& vk_buffer = buffer_cache.GetUtilityBuffer(VideoCore::MemoryUsage::Stream);
const u32 ubo_size = stage.flattened_ud_buf.size() * sizeof(u32); const u32 ubo_size = stage.flattened_ud_buf.size() * sizeof(u32);
const u64 offset = vk_buffer.Copy(stage.flattened_ud_buf.data(), ubo_size, const u64 offset = vk_buffer.Copy(stage.flattened_ud_buf.data(), ubo_size,
instance.UniformMinAlignment()); instance.UniformMinAlignment());
@ -561,7 +561,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
const auto* fault_buffer = buffer_cache.GetFaultBuffer(); const auto* fault_buffer = buffer_cache.GetFaultBuffer();
buffer_infos.emplace_back(fault_buffer->Handle(), 0, fault_buffer->SizeBytes()); buffer_infos.emplace_back(fault_buffer->Handle(), 0, fault_buffer->SizeBytes());
} else if (desc.buffer_type == Shader::BufferType::SharedMemory) { } else if (desc.buffer_type == Shader::BufferType::SharedMemory) {
auto& lds_buffer = buffer_cache.GetStreamBuffer(); auto& lds_buffer = buffer_cache.GetUtilityBuffer(VideoCore::MemoryUsage::Stream);
const auto& cs_program = liverpool->GetCsRegs(); const auto& cs_program = liverpool->GetCsRegs();
const auto lds_size = cs_program.SharedMemSize() * cs_program.NumWorkgroups(); const auto lds_size = cs_program.SharedMemSize() * cs_program.NumWorkgroups();
const auto [data, offset] = const auto [data, offset] =

View file

@ -312,43 +312,121 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
} }
void Image::CopyImage(const Image& image) { void Image::CopyImage(const Image& src_image) {
scheduler->EndRendering(); scheduler->EndRendering();
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
auto cmdbuf = scheduler->CommandBuffer(); auto cmdbuf = scheduler->CommandBuffer();
const auto& src_info = src_image.info;
boost::container::small_vector<vk::ImageCopy, 14> image_copy{}; boost::container::small_vector<vk::ImageCopy, 14> image_copy{};
const u32 num_mips = std::min(image.info.resources.levels, info.resources.levels); const u32 num_mips = std::min(src_info.resources.levels, info.resources.levels);
for (u32 m = 0; m < num_mips; ++m) { for (u32 m = 0; m < num_mips; ++m) {
const auto mip_w = std::max(image.info.size.width >> m, 1u); const auto mip_w = std::max(src_info.size.width >> m, 1u);
const auto mip_h = std::max(image.info.size.height >> m, 1u); const auto mip_h = std::max(src_info.size.height >> m, 1u);
const auto mip_d = std::max(image.info.size.depth >> m, 1u); const auto mip_d = std::max(src_info.size.depth >> m, 1u);
image_copy.emplace_back(vk::ImageCopy{ image_copy.emplace_back(vk::ImageCopy{
.srcSubresource{ .srcSubresource{
.aspectMask = image.aspect_mask, .aspectMask = src_image.aspect_mask,
.mipLevel = m, .mipLevel = m,
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = image.info.resources.layers, .layerCount = src_info.resources.layers,
}, },
.dstSubresource{ .dstSubresource{
.aspectMask = image.aspect_mask, .aspectMask = src_image.aspect_mask,
.mipLevel = m, .mipLevel = m,
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = image.info.resources.layers, .layerCount = src_info.resources.layers,
}, },
.extent = {mip_w, mip_h, mip_d}, .extent = {mip_w, mip_h, mip_d},
}); });
} }
cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout, cmdbuf.copyImage(src_image.image, src_image.last_state.layout, image, last_state.layout,
image_copy); image_copy);
Transit(vk::ImageLayout::eGeneral, Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
} }
void Image::CopyMip(const Image& image, u32 mip, u32 slice) { void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset) {
const auto& src_info = src_image.info;
vk::BufferImageCopy buffer_image_copy = {
.bufferOffset = offset,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource =
{
.aspectMask = src_info.IsDepthStencil() ? vk::ImageAspectFlagBits::eDepth
: vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset =
{
.x = 0,
.y = 0,
.z = 0,
},
.imageExtent =
{
.width = src_info.size.width,
.height = src_info.size.height,
.depth = src_info.size.depth,
},
};
const vk::BufferMemoryBarrier2 pre_copy_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer,
.offset = offset,
.size = VK_WHOLE_SIZE,
};
const vk::BufferMemoryBarrier2 post_copy_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
.buffer = buffer,
.offset = offset,
.size = VK_WHOLE_SIZE,
};
scheduler->EndRendering();
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
auto cmdbuf = scheduler->CommandBuffer();
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_copy_barrier,
});
cmdbuf.copyImageToBuffer(src_image.image, vk::ImageLayout::eTransferSrcOptimal, buffer,
buffer_image_copy);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_copy_barrier,
});
buffer_image_copy.imageSubresource.aspectMask =
info.IsDepthStencil() ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eColor;
cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal,
buffer_image_copy);
}
void Image::CopyMip(const Image& src_image, u32 mip, u32 slice) {
scheduler->EndRendering(); scheduler->EndRendering();
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
@ -358,26 +436,27 @@ void Image::CopyMip(const Image& image, u32 mip, u32 slice) {
const auto mip_h = std::max(info.size.height >> mip, 1u); const auto mip_h = std::max(info.size.height >> mip, 1u);
const auto mip_d = std::max(info.size.depth >> mip, 1u); const auto mip_d = std::max(info.size.depth >> mip, 1u);
ASSERT(mip_w == image.info.size.width); const auto& src_info = src_image.info;
ASSERT(mip_h == image.info.size.height); ASSERT(mip_w == src_info.size.width);
ASSERT(mip_h == src_info.size.height);
const u32 num_layers = std::min(image.info.resources.layers, info.resources.layers); const u32 num_layers = std::min(src_info.resources.layers, info.resources.layers);
const vk::ImageCopy image_copy{ const vk::ImageCopy image_copy{
.srcSubresource{ .srcSubresource{
.aspectMask = image.aspect_mask, .aspectMask = src_image.aspect_mask,
.mipLevel = 0, .mipLevel = 0,
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = num_layers, .layerCount = num_layers,
}, },
.dstSubresource{ .dstSubresource{
.aspectMask = image.aspect_mask, .aspectMask = src_image.aspect_mask,
.mipLevel = mip, .mipLevel = mip,
.baseArrayLayer = slice, .baseArrayLayer = slice,
.layerCount = num_layers, .layerCount = num_layers,
}, },
.extent = {mip_w, mip_h, mip_d}, .extent = {mip_w, mip_h, mip_d},
}; };
cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout, cmdbuf.copyImage(src_image.image, src_image.last_state.layout, image, last_state.layout,
image_copy); image_copy);
Transit(vk::ImageLayout::eGeneral, Transit(vk::ImageLayout::eGeneral,

View file

@ -104,7 +104,8 @@ struct Image {
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf = {}); std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf = {});
void Upload(vk::Buffer buffer, u64 offset); void Upload(vk::Buffer buffer, u64 offset);
void CopyImage(const Image& image); void CopyImage(const Image& src_image);
void CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset);
void CopyMip(const Image& src_image, u32 mip, u32 slice); void CopyMip(const Image& src_image, u32 mip, u32 slice);
bool IsTracked() { bool IsTracked() {

View file

@ -8,7 +8,6 @@
#include "common/debug.h" #include "common/debug.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/page_manager.h" #include "video_core/page_manager.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/host_compatibility.h" #include "video_core/texture_cache/host_compatibility.h"
@ -126,7 +125,7 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, BindingType binding, ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, BindingType binding,
ImageId cache_image_id) { ImageId cache_image_id) {
const auto& cache_image = slot_images[cache_image_id]; auto& cache_image = slot_images[cache_image_id];
if (!cache_image.info.IsDepthStencil() && !requested_info.IsDepthStencil()) { if (!cache_image.info.IsDepthStencil() && !requested_info.IsDepthStencil()) {
return {}; return {};
@ -169,18 +168,21 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Bindi
} }
if (recreate) { if (recreate) {
auto new_info{requested_info}; auto new_info = requested_info;
new_info.resources = std::max(requested_info.resources, cache_image.info.resources); new_info.resources = std::min(requested_info.resources, cache_image.info.resources);
new_info.UpdateSize();
const auto new_image_id = slot_images.insert(instance, scheduler, new_info); const auto new_image_id = slot_images.insert(instance, scheduler, new_info);
RegisterImage(new_image_id); RegisterImage(new_image_id);
// Inherit image usage // Inherit image usage
auto& new_image = GetImage(new_image_id); auto& new_image = slot_images[new_image_id];
new_image.usage = cache_image.usage; new_image.usage = cache_image.usage;
new_image.flags &= ~ImageFlagBits::Dirty;
// TODO: perform a depth copy here // Perform depth<->color copy using the intermediate copy buffer.
const auto& copy_buffer = buffer_cache.GetUtilityBuffer(MemoryUsage::DeviceLocal);
new_image.CopyImageWithBuffer(cache_image, copy_buffer.Handle(), 0);
// Free the cache image.
FreeImage(cache_image_id); FreeImage(cache_image_id);
return new_image_id; return new_image_id;
} }
@ -584,12 +586,11 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
const VAddr image_addr = image.info.guest_address; const VAddr image_addr = image.info.guest_address;
const size_t image_size = image.info.guest_size; const size_t image_size = image.info.guest_size;
const auto [vk_buffer, buf_offset] = const auto [vk_buffer, buf_offset] = buffer_cache.ObtainBufferForImage(image_addr, image_size);
buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty);
const auto cmdbuf = sched_ptr->CommandBuffer(); const auto cmdbuf = sched_ptr->CommandBuffer();
// The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW
// hazard // The obtained buffer may be GPU modified so we need to emit a barrier to prevent RAW hazard
if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead, if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
vk::PipelineStageFlagBits2::eTransfer)) { vk::PipelineStageFlagBits2::eTransfer)) {
cmdbuf.pipelineBarrier2(vk::DependencyInfo{ cmdbuf.pipelineBarrier2(vk::DependencyInfo{