renderer_vulkan: Cleanup and improve barriers in caches (#1865)

* texture_cache: Stricter barriers on image upload

* buffer_cache: Stricter barrier for vkCmdUpdateBuffer

* vk_rasterizer: Barrier also normal buffers and make it apply to all stages

* texture_cache: Minor barrier cleanup

* Batch image and buffer barriers in a single command

* clang format
This commit is contained in:
TheTurtle 2025-01-02 20:43:56 +02:00 committed by GitHub
parent f7a8e2409c
commit 77d2172441
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 190 additions and 76 deletions

View file

@ -259,7 +259,16 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
const BufferId buffer_id = FindBuffer(address, num_bytes);
return &slot_buffers[buffer_id];
}();
const vk::BufferMemoryBarrier2 buf_barrier = {
const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer->Handle(),
.offset = buffer->Offset(address),
.size = num_bytes,
};
const vk::BufferMemoryBarrier2 post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
@ -271,9 +280,14 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &buf_barrier,
.pBufferMemoryBarriers = &pre_barrier,
});
cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
}
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
@ -465,21 +479,48 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
};
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
static constexpr vk::MemoryBarrier READ_BARRIER{
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
const std::array pre_barriers = {
vk::BufferMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
.buffer = overlap.Handle(),
.offset = 0,
.size = overlap.SizeBytes(),
},
};
static constexpr vk::MemoryBarrier WRITE_BARRIER{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
const std::array post_barriers = {
vk::BufferMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
.buffer = overlap.Handle(),
.offset = 0,
.size = overlap.SizeBytes(),
},
vk::BufferMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = new_buffer.Handle(),
.offset = dst_base_offset,
.size = overlap.SizeBytes(),
},
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
READ_BARRIER, {}, {});
cmdbuf.copyBuffer(overlap.buffer, new_buffer.buffer, copy);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = pre_barriers.data(),
});
cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = static_cast<u32>(post_barriers.size()),
.pBufferMemoryBarriers = post_barriers.data(),
});
DeleteBuffer(overlap_id);
}
@ -583,21 +624,35 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
}
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
static constexpr vk::MemoryBarrier READ_BARRIER{
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(),
.offset = 0,
.size = buffer.SizeBytes(),
};
static constexpr vk::MemoryBarrier WRITE_BARRIER{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
const vk::BufferMemoryBarrier2 post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = buffer.Handle(),
.offset = 0,
.size = buffer.SizeBytes(),
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
READ_BARRIER, {}, {});
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
});
cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
}
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
@ -647,10 +702,42 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
}
if (!copies.empty()) {
scheduler.EndRendering();
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(),
.offset = max_offset - size,
.size = size,
};
const vk::BufferMemoryBarrier2 post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
.buffer = buffer.Handle(),
.offset = max_offset - size,
.size = size,
};
auto barriers = image.GetBarriers(vk::ImageLayout::eTransferSrcOptimal,
vk::AccessFlagBits2::eTransferRead,
vk::PipelineStageFlagBits2::eTransfer, {});
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
.imageMemoryBarrierCount = static_cast<u32>(barriers.size()),
.pImageMemoryBarriers = barriers.data(),
});
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.Handle(),
copies);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
}
return true;
}