mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-25 11:56:18 +00:00
texture_cache: Implement color<->depth copies (#3079)
* texture_cache: Implement color to depth copies and vise versa * ir_passes: Adjust shared memory barrier pass to cover more cases * texture_cache: Remove unused code * review comment
This commit is contained in:
parent
fc4fd0107d
commit
dedf6de2ac
7 changed files with 157 additions and 54 deletions
|
@ -1,6 +1,7 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <unordered_set>
|
||||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||||
#include "shader_recompiler/ir/ir_emitter.h"
|
#include "shader_recompiler/ir/ir_emitter.h"
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
|
@ -51,11 +52,14 @@ static void EmitBarrierInBlock(IR::Block* block) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using NodeSet = std::unordered_set<const IR::Block*>;
|
||||||
|
|
||||||
// Inserts a barrier after divergent conditional blocks to avoid undefined
|
// Inserts a barrier after divergent conditional blocks to avoid undefined
|
||||||
// behavior when some threads write and others read from shared memory.
|
// behavior when some threads write and others read from shared memory.
|
||||||
static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) {
|
static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data,
|
||||||
|
NodeSet& divergence_end, u32& divergence_depth) {
|
||||||
const IR::U1 cond = data.if_node.cond;
|
const IR::U1 cond = data.if_node.cond;
|
||||||
const auto insert_barrier =
|
const auto is_divergent_cond =
|
||||||
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
|
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
|
||||||
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
|
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
|
||||||
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
|
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
|
||||||
|
@ -63,11 +67,15 @@ static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) {
|
||||||
}
|
}
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
});
|
});
|
||||||
if (insert_barrier) {
|
if (is_divergent_cond) {
|
||||||
IR::Block* const merge = data.if_node.merge;
|
if (divergence_depth == 0) {
|
||||||
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
|
IR::Block* const merge = data.if_node.merge;
|
||||||
IR::IREmitter ir{*merge, insert_point};
|
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
|
||||||
ir.Barrier();
|
IR::IREmitter ir{*merge, insert_point};
|
||||||
|
ir.Barrier();
|
||||||
|
}
|
||||||
|
++divergence_depth;
|
||||||
|
divergence_end.emplace(data.if_node.merge);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,19 +97,22 @@ void SharedMemoryBarrierPass(IR::Program& program, const RuntimeInfo& runtime_in
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
using Type = IR::AbstractSyntaxNode::Type;
|
using Type = IR::AbstractSyntaxNode::Type;
|
||||||
u32 branch_depth{};
|
u32 divergence_depth{};
|
||||||
|
NodeSet divergence_end;
|
||||||
for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
|
for (const IR::AbstractSyntaxNode& node : program.syntax_list) {
|
||||||
if (node.type == Type::EndIf) {
|
if (node.type == Type::EndIf) {
|
||||||
--branch_depth;
|
if (divergence_end.contains(node.data.end_if.merge)) {
|
||||||
|
--divergence_depth;
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Check if branch depth is zero, we don't want to insert barrier in potentially divergent
|
// Check if branch depth is zero, we don't want to insert barrier in potentially divergent
|
||||||
// code.
|
// code.
|
||||||
if (node.type == Type::If && branch_depth++ == 0) {
|
if (node.type == Type::If) {
|
||||||
EmitBarrierInMergeBlock(node.data);
|
EmitBarrierInMergeBlock(node.data, divergence_end, divergence_depth);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (node.type == Type::Block && branch_depth == 0) {
|
if (node.type == Type::Block && divergence_depth == 0) {
|
||||||
EmitBarrierInBlock(node.data.block);
|
EmitBarrierInBlock(node.data.block);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ static constexpr size_t DataShareBufferSize = 64_KB;
|
||||||
static constexpr size_t StagingBufferSize = 512_MB;
|
static constexpr size_t StagingBufferSize = 512_MB;
|
||||||
static constexpr size_t UboStreamBufferSize = 128_MB;
|
static constexpr size_t UboStreamBufferSize = 128_MB;
|
||||||
static constexpr size_t DownloadBufferSize = 128_MB;
|
static constexpr size_t DownloadBufferSize = 128_MB;
|
||||||
|
static constexpr size_t DeviceBufferSize = 16_MB;
|
||||||
static constexpr size_t MaxPageFaults = 1024;
|
static constexpr size_t MaxPageFaults = 1024;
|
||||||
|
|
||||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
|
@ -32,7 +33,8 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||||
memory{Core::Memory::Instance()}, texture_cache{texture_cache_}, tracker{tracker_},
|
memory{Core::Memory::Instance()}, texture_cache{texture_cache_}, tracker{tracker_},
|
||||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||||
download_buffer(instance, scheduler, MemoryUsage::Download, DownloadBufferSize),
|
download_buffer{instance, scheduler, MemoryUsage::Download, DownloadBufferSize},
|
||||||
|
device_buffer{instance, scheduler, MemoryUsage::DeviceLocal, DeviceBufferSize},
|
||||||
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
|
||||||
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal,
|
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal,
|
||||||
0, AllFlags, BDA_PAGETABLE_SIZE},
|
0, AllFlags, BDA_PAGETABLE_SIZE},
|
||||||
|
@ -348,7 +350,7 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
|
||||||
return {&buffer, buffer.Offset(device_addr)};
|
return {&buffer, buffer.Offset(device_addr)};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size, bool prefer_gpu) {
|
std::pair<Buffer*, u32> BufferCache::ObtainBufferForImage(VAddr gpu_addr, u32 size) {
|
||||||
// Check if any buffer contains the full requested range.
|
// Check if any buffer contains the full requested range.
|
||||||
const u64 page = gpu_addr >> CACHING_PAGEBITS;
|
const u64 page = gpu_addr >> CACHING_PAGEBITS;
|
||||||
const BufferId buffer_id = page_table[page].buffer_id;
|
const BufferId buffer_id = page_table[page].buffer_id;
|
||||||
|
@ -361,10 +363,10 @@ std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size,
|
||||||
}
|
}
|
||||||
// If no buffer contains the full requested range but some buffer within was GPU-modified,
|
// If no buffer contains the full requested range but some buffer within was GPU-modified,
|
||||||
// fall back to ObtainBuffer to create a full buffer and avoid losing GPU modifications.
|
// fall back to ObtainBuffer to create a full buffer and avoid losing GPU modifications.
|
||||||
// This is only done if the request prefers to use GPU memory, otherwise we can skip it.
|
if (memory_tracker.IsRegionGpuModified(gpu_addr, size)) {
|
||||||
if (prefer_gpu && memory_tracker.IsRegionGpuModified(gpu_addr, size)) {
|
|
||||||
return ObtainBuffer(gpu_addr, size, false, false);
|
return ObtainBuffer(gpu_addr, size, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// In all other cases, just do a CPU copy to the staging buffer.
|
// In all other cases, just do a CPU copy to the staging buffer.
|
||||||
const auto [data, offset] = staging_buffer.Map(size, 16);
|
const auto [data, offset] = staging_buffer.Map(size, 16);
|
||||||
memory->CopySparseMemory(gpu_addr, data, size);
|
memory->CopySparseMemory(gpu_addr, data, size);
|
||||||
|
|
|
@ -80,11 +80,6 @@ public:
|
||||||
return &gds_buffer;
|
return &gds_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retrieves the host visible device local stream buffer.
|
|
||||||
[[nodiscard]] StreamBuffer& GetStreamBuffer() noexcept {
|
|
||||||
return stream_buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retrieves the device local DBA page table buffer.
|
/// Retrieves the device local DBA page table buffer.
|
||||||
[[nodiscard]] Buffer* GetBdaPageTableBuffer() noexcept {
|
[[nodiscard]] Buffer* GetBdaPageTableBuffer() noexcept {
|
||||||
return &bda_pagetable_buffer;
|
return &bda_pagetable_buffer;
|
||||||
|
@ -100,6 +95,20 @@ public:
|
||||||
return slot_buffers[id];
|
return slot_buffers[id];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Retrieves a utility buffer optimized for specified memory usage.
|
||||||
|
StreamBuffer& GetUtilityBuffer(MemoryUsage usage) noexcept {
|
||||||
|
switch (usage) {
|
||||||
|
case MemoryUsage::Stream:
|
||||||
|
return stream_buffer;
|
||||||
|
case MemoryUsage::Download:
|
||||||
|
return download_buffer;
|
||||||
|
case MemoryUsage::Upload:
|
||||||
|
return staging_buffer;
|
||||||
|
case MemoryUsage::DeviceLocal:
|
||||||
|
return device_buffer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Invalidates any buffer in the logical page range.
|
/// Invalidates any buffer in the logical page range.
|
||||||
void InvalidateMemory(VAddr device_addr, u64 size, bool unmap);
|
void InvalidateMemory(VAddr device_addr, u64 size, bool unmap);
|
||||||
|
|
||||||
|
@ -121,8 +130,7 @@ public:
|
||||||
BufferId buffer_id = {});
|
BufferId buffer_id = {});
|
||||||
|
|
||||||
/// Attempts to obtain a buffer without modifying the cache contents.
|
/// Attempts to obtain a buffer without modifying the cache contents.
|
||||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainViewBuffer(VAddr gpu_addr, u32 size,
|
[[nodiscard]] std::pair<Buffer*, u32> ObtainBufferForImage(VAddr gpu_addr, u32 size);
|
||||||
bool prefer_gpu);
|
|
||||||
|
|
||||||
/// Return true when a region is registered on the cache
|
/// Return true when a region is registered on the cache
|
||||||
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
|
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
|
||||||
|
@ -193,6 +201,7 @@ private:
|
||||||
StreamBuffer staging_buffer;
|
StreamBuffer staging_buffer;
|
||||||
StreamBuffer stream_buffer;
|
StreamBuffer stream_buffer;
|
||||||
StreamBuffer download_buffer;
|
StreamBuffer download_buffer;
|
||||||
|
StreamBuffer device_buffer;
|
||||||
Buffer gds_buffer;
|
Buffer gds_buffer;
|
||||||
Buffer bda_pagetable_buffer;
|
Buffer bda_pagetable_buffer;
|
||||||
Buffer fault_buffer;
|
Buffer fault_buffer;
|
||||||
|
|
|
@ -549,7 +549,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||||
const auto* gds_buf = buffer_cache.GetGdsBuffer();
|
const auto* gds_buf = buffer_cache.GetGdsBuffer();
|
||||||
buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes());
|
buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes());
|
||||||
} else if (desc.buffer_type == Shader::BufferType::Flatbuf) {
|
} else if (desc.buffer_type == Shader::BufferType::Flatbuf) {
|
||||||
auto& vk_buffer = buffer_cache.GetStreamBuffer();
|
auto& vk_buffer = buffer_cache.GetUtilityBuffer(VideoCore::MemoryUsage::Stream);
|
||||||
const u32 ubo_size = stage.flattened_ud_buf.size() * sizeof(u32);
|
const u32 ubo_size = stage.flattened_ud_buf.size() * sizeof(u32);
|
||||||
const u64 offset = vk_buffer.Copy(stage.flattened_ud_buf.data(), ubo_size,
|
const u64 offset = vk_buffer.Copy(stage.flattened_ud_buf.data(), ubo_size,
|
||||||
instance.UniformMinAlignment());
|
instance.UniformMinAlignment());
|
||||||
|
@ -561,7 +561,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||||
const auto* fault_buffer = buffer_cache.GetFaultBuffer();
|
const auto* fault_buffer = buffer_cache.GetFaultBuffer();
|
||||||
buffer_infos.emplace_back(fault_buffer->Handle(), 0, fault_buffer->SizeBytes());
|
buffer_infos.emplace_back(fault_buffer->Handle(), 0, fault_buffer->SizeBytes());
|
||||||
} else if (desc.buffer_type == Shader::BufferType::SharedMemory) {
|
} else if (desc.buffer_type == Shader::BufferType::SharedMemory) {
|
||||||
auto& lds_buffer = buffer_cache.GetStreamBuffer();
|
auto& lds_buffer = buffer_cache.GetUtilityBuffer(VideoCore::MemoryUsage::Stream);
|
||||||
const auto& cs_program = liverpool->GetCsRegs();
|
const auto& cs_program = liverpool->GetCsRegs();
|
||||||
const auto lds_size = cs_program.SharedMemSize() * cs_program.NumWorkgroups();
|
const auto lds_size = cs_program.SharedMemSize() * cs_program.NumWorkgroups();
|
||||||
const auto [data, offset] =
|
const auto [data, offset] =
|
||||||
|
|
|
@ -312,43 +312,121 @@ void Image::Upload(vk::Buffer buffer, u64 offset) {
|
||||||
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
|
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
|
||||||
}
|
}
|
||||||
|
|
||||||
void Image::CopyImage(const Image& image) {
|
void Image::CopyImage(const Image& src_image) {
|
||||||
scheduler->EndRendering();
|
scheduler->EndRendering();
|
||||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||||
|
|
||||||
auto cmdbuf = scheduler->CommandBuffer();
|
auto cmdbuf = scheduler->CommandBuffer();
|
||||||
|
const auto& src_info = src_image.info;
|
||||||
|
|
||||||
boost::container::small_vector<vk::ImageCopy, 14> image_copy{};
|
boost::container::small_vector<vk::ImageCopy, 14> image_copy{};
|
||||||
const u32 num_mips = std::min(image.info.resources.levels, info.resources.levels);
|
const u32 num_mips = std::min(src_info.resources.levels, info.resources.levels);
|
||||||
for (u32 m = 0; m < num_mips; ++m) {
|
for (u32 m = 0; m < num_mips; ++m) {
|
||||||
const auto mip_w = std::max(image.info.size.width >> m, 1u);
|
const auto mip_w = std::max(src_info.size.width >> m, 1u);
|
||||||
const auto mip_h = std::max(image.info.size.height >> m, 1u);
|
const auto mip_h = std::max(src_info.size.height >> m, 1u);
|
||||||
const auto mip_d = std::max(image.info.size.depth >> m, 1u);
|
const auto mip_d = std::max(src_info.size.depth >> m, 1u);
|
||||||
|
|
||||||
image_copy.emplace_back(vk::ImageCopy{
|
image_copy.emplace_back(vk::ImageCopy{
|
||||||
.srcSubresource{
|
.srcSubresource{
|
||||||
.aspectMask = image.aspect_mask,
|
.aspectMask = src_image.aspect_mask,
|
||||||
.mipLevel = m,
|
.mipLevel = m,
|
||||||
.baseArrayLayer = 0,
|
.baseArrayLayer = 0,
|
||||||
.layerCount = image.info.resources.layers,
|
.layerCount = src_info.resources.layers,
|
||||||
},
|
},
|
||||||
.dstSubresource{
|
.dstSubresource{
|
||||||
.aspectMask = image.aspect_mask,
|
.aspectMask = src_image.aspect_mask,
|
||||||
.mipLevel = m,
|
.mipLevel = m,
|
||||||
.baseArrayLayer = 0,
|
.baseArrayLayer = 0,
|
||||||
.layerCount = image.info.resources.layers,
|
.layerCount = src_info.resources.layers,
|
||||||
},
|
},
|
||||||
.extent = {mip_w, mip_h, mip_d},
|
.extent = {mip_w, mip_h, mip_d},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout,
|
cmdbuf.copyImage(src_image.image, src_image.last_state.layout, image, last_state.layout,
|
||||||
image_copy);
|
image_copy);
|
||||||
|
|
||||||
Transit(vk::ImageLayout::eGeneral,
|
Transit(vk::ImageLayout::eGeneral,
|
||||||
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
|
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
|
||||||
}
|
}
|
||||||
|
|
||||||
void Image::CopyMip(const Image& image, u32 mip, u32 slice) {
|
void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset) {
|
||||||
|
const auto& src_info = src_image.info;
|
||||||
|
|
||||||
|
vk::BufferImageCopy buffer_image_copy = {
|
||||||
|
.bufferOffset = offset,
|
||||||
|
.bufferRowLength = 0,
|
||||||
|
.bufferImageHeight = 0,
|
||||||
|
.imageSubresource =
|
||||||
|
{
|
||||||
|
.aspectMask = src_info.IsDepthStencil() ? vk::ImageAspectFlagBits::eDepth
|
||||||
|
: vk::ImageAspectFlagBits::eColor,
|
||||||
|
.mipLevel = 0,
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = 1,
|
||||||
|
},
|
||||||
|
.imageOffset =
|
||||||
|
{
|
||||||
|
.x = 0,
|
||||||
|
.y = 0,
|
||||||
|
.z = 0,
|
||||||
|
},
|
||||||
|
.imageExtent =
|
||||||
|
{
|
||||||
|
.width = src_info.size.width,
|
||||||
|
.height = src_info.size.height,
|
||||||
|
.depth = src_info.size.depth,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const vk::BufferMemoryBarrier2 pre_copy_barrier = {
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.buffer = buffer,
|
||||||
|
.offset = offset,
|
||||||
|
.size = VK_WHOLE_SIZE,
|
||||||
|
};
|
||||||
|
|
||||||
|
const vk::BufferMemoryBarrier2 post_copy_barrier = {
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||||
|
.buffer = buffer,
|
||||||
|
.offset = offset,
|
||||||
|
.size = VK_WHOLE_SIZE,
|
||||||
|
};
|
||||||
|
|
||||||
|
scheduler->EndRendering();
|
||||||
|
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
||||||
|
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||||
|
|
||||||
|
auto cmdbuf = scheduler->CommandBuffer();
|
||||||
|
|
||||||
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.bufferMemoryBarrierCount = 1,
|
||||||
|
.pBufferMemoryBarriers = &pre_copy_barrier,
|
||||||
|
});
|
||||||
|
|
||||||
|
cmdbuf.copyImageToBuffer(src_image.image, vk::ImageLayout::eTransferSrcOptimal, buffer,
|
||||||
|
buffer_image_copy);
|
||||||
|
|
||||||
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.bufferMemoryBarrierCount = 1,
|
||||||
|
.pBufferMemoryBarriers = &post_copy_barrier,
|
||||||
|
});
|
||||||
|
|
||||||
|
buffer_image_copy.imageSubresource.aspectMask =
|
||||||
|
info.IsDepthStencil() ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eColor;
|
||||||
|
|
||||||
|
cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal,
|
||||||
|
buffer_image_copy);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Image::CopyMip(const Image& src_image, u32 mip, u32 slice) {
|
||||||
scheduler->EndRendering();
|
scheduler->EndRendering();
|
||||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||||
|
|
||||||
|
@ -358,26 +436,27 @@ void Image::CopyMip(const Image& image, u32 mip, u32 slice) {
|
||||||
const auto mip_h = std::max(info.size.height >> mip, 1u);
|
const auto mip_h = std::max(info.size.height >> mip, 1u);
|
||||||
const auto mip_d = std::max(info.size.depth >> mip, 1u);
|
const auto mip_d = std::max(info.size.depth >> mip, 1u);
|
||||||
|
|
||||||
ASSERT(mip_w == image.info.size.width);
|
const auto& src_info = src_image.info;
|
||||||
ASSERT(mip_h == image.info.size.height);
|
ASSERT(mip_w == src_info.size.width);
|
||||||
|
ASSERT(mip_h == src_info.size.height);
|
||||||
|
|
||||||
const u32 num_layers = std::min(image.info.resources.layers, info.resources.layers);
|
const u32 num_layers = std::min(src_info.resources.layers, info.resources.layers);
|
||||||
const vk::ImageCopy image_copy{
|
const vk::ImageCopy image_copy{
|
||||||
.srcSubresource{
|
.srcSubresource{
|
||||||
.aspectMask = image.aspect_mask,
|
.aspectMask = src_image.aspect_mask,
|
||||||
.mipLevel = 0,
|
.mipLevel = 0,
|
||||||
.baseArrayLayer = 0,
|
.baseArrayLayer = 0,
|
||||||
.layerCount = num_layers,
|
.layerCount = num_layers,
|
||||||
},
|
},
|
||||||
.dstSubresource{
|
.dstSubresource{
|
||||||
.aspectMask = image.aspect_mask,
|
.aspectMask = src_image.aspect_mask,
|
||||||
.mipLevel = mip,
|
.mipLevel = mip,
|
||||||
.baseArrayLayer = slice,
|
.baseArrayLayer = slice,
|
||||||
.layerCount = num_layers,
|
.layerCount = num_layers,
|
||||||
},
|
},
|
||||||
.extent = {mip_w, mip_h, mip_d},
|
.extent = {mip_w, mip_h, mip_d},
|
||||||
};
|
};
|
||||||
cmdbuf.copyImage(image.image, image.last_state.layout, this->image, this->last_state.layout,
|
cmdbuf.copyImage(src_image.image, src_image.last_state.layout, image, last_state.layout,
|
||||||
image_copy);
|
image_copy);
|
||||||
|
|
||||||
Transit(vk::ImageLayout::eGeneral,
|
Transit(vk::ImageLayout::eGeneral,
|
||||||
|
|
|
@ -104,7 +104,8 @@ struct Image {
|
||||||
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf = {});
|
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf = {});
|
||||||
void Upload(vk::Buffer buffer, u64 offset);
|
void Upload(vk::Buffer buffer, u64 offset);
|
||||||
|
|
||||||
void CopyImage(const Image& image);
|
void CopyImage(const Image& src_image);
|
||||||
|
void CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset);
|
||||||
void CopyMip(const Image& src_image, u32 mip, u32 slice);
|
void CopyMip(const Image& src_image, u32 mip, u32 slice);
|
||||||
|
|
||||||
bool IsTracked() {
|
bool IsTracked() {
|
||||||
|
|
|
@ -8,7 +8,6 @@
|
||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
#include "video_core/page_manager.h"
|
#include "video_core/page_manager.h"
|
||||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/texture_cache/host_compatibility.h"
|
#include "video_core/texture_cache/host_compatibility.h"
|
||||||
|
@ -126,7 +125,7 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
|
||||||
|
|
||||||
ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, BindingType binding,
|
ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, BindingType binding,
|
||||||
ImageId cache_image_id) {
|
ImageId cache_image_id) {
|
||||||
const auto& cache_image = slot_images[cache_image_id];
|
auto& cache_image = slot_images[cache_image_id];
|
||||||
|
|
||||||
if (!cache_image.info.IsDepthStencil() && !requested_info.IsDepthStencil()) {
|
if (!cache_image.info.IsDepthStencil() && !requested_info.IsDepthStencil()) {
|
||||||
return {};
|
return {};
|
||||||
|
@ -169,18 +168,21 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Bindi
|
||||||
}
|
}
|
||||||
|
|
||||||
if (recreate) {
|
if (recreate) {
|
||||||
auto new_info{requested_info};
|
auto new_info = requested_info;
|
||||||
new_info.resources = std::max(requested_info.resources, cache_image.info.resources);
|
new_info.resources = std::min(requested_info.resources, cache_image.info.resources);
|
||||||
new_info.UpdateSize();
|
|
||||||
const auto new_image_id = slot_images.insert(instance, scheduler, new_info);
|
const auto new_image_id = slot_images.insert(instance, scheduler, new_info);
|
||||||
RegisterImage(new_image_id);
|
RegisterImage(new_image_id);
|
||||||
|
|
||||||
// Inherit image usage
|
// Inherit image usage
|
||||||
auto& new_image = GetImage(new_image_id);
|
auto& new_image = slot_images[new_image_id];
|
||||||
new_image.usage = cache_image.usage;
|
new_image.usage = cache_image.usage;
|
||||||
|
new_image.flags &= ~ImageFlagBits::Dirty;
|
||||||
|
|
||||||
// TODO: perform a depth copy here
|
// Perform depth<->color copy using the intermediate copy buffer.
|
||||||
|
const auto& copy_buffer = buffer_cache.GetUtilityBuffer(MemoryUsage::DeviceLocal);
|
||||||
|
new_image.CopyImageWithBuffer(cache_image, copy_buffer.Handle(), 0);
|
||||||
|
|
||||||
|
// Free the cache image.
|
||||||
FreeImage(cache_image_id);
|
FreeImage(cache_image_id);
|
||||||
return new_image_id;
|
return new_image_id;
|
||||||
}
|
}
|
||||||
|
@ -584,12 +586,11 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
||||||
|
|
||||||
const VAddr image_addr = image.info.guest_address;
|
const VAddr image_addr = image.info.guest_address;
|
||||||
const size_t image_size = image.info.guest_size;
|
const size_t image_size = image.info.guest_size;
|
||||||
const auto [vk_buffer, buf_offset] =
|
const auto [vk_buffer, buf_offset] = buffer_cache.ObtainBufferForImage(image_addr, image_size);
|
||||||
buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty);
|
|
||||||
|
|
||||||
const auto cmdbuf = sched_ptr->CommandBuffer();
|
const auto cmdbuf = sched_ptr->CommandBuffer();
|
||||||
// The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW
|
|
||||||
// hazard
|
// The obtained buffer may be GPU modified so we need to emit a barrier to prevent RAW hazard
|
||||||
if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
|
if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
|
||||||
vk::PipelineStageFlagBits2::eTransfer)) {
|
vk::PipelineStageFlagBits2::eTransfer)) {
|
||||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue