Single pass on mapped ranges (no barrier batching)

This commit is contained in:
Lander Gallastegi 2025-07-07 21:59:03 +02:00
parent 5d2598c505
commit 17815ad439

View file

@ -997,53 +997,52 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
void BufferCache::SynchronizeBuffersForDma() { void BufferCache::SynchronizeBuffersForDma() {
RENDERER_TRACE; RENDERER_TRACE;
LOG_WARNING(Render_Vulkan, "SYNC RANGES FOR DMA");
boost::container::small_vector<Buffer*, 64> buffers; boost::container::small_vector<Buffer*, 64> buffers;
boost::container::small_vector<vk::BufferMemoryBarrier2, 64> barriers;
boost::container::small_vector<vk::BufferCopy, 4> copies; boost::container::small_vector<vk::BufferCopy, 4> copies;
const auto& mapped_ranges = rasterizer.GetMappedRanges(); const auto& mapped_ranges = rasterizer.GetMappedRanges();
bool barrier_recorded = false;
memory_tracker->Lock(); memory_tracker->Lock();
scheduler.EndRendering(); scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
mapped_ranges.ForEach([&](VAddr device_addr, u64 size) { mapped_ranges.ForEach([&](VAddr device_addr, u64 size) {
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
if (memory_tracker->IsRegionCpuModified<false>(device_addr, size)) { memory_tracker->ForEachUploadRange<true, false>(
barriers.push_back(vk::BufferMemoryBarrier2{ buffer.CpuAddr(), buffer.SizeBytes(), false,
[&](u64 device_addr_out, u64 range_size) {
if (!barrier_recorded) {
barrier_recorded = true;
const vk::BufferMemoryBarrier2 barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = .srcAccessMask = vk::AccessFlagBits2::eMemoryRead |
vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite | vk::AccessFlagBits2::eMemoryWrite |
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite, vk::AccessFlagBits2::eTransferRead |
vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer, .dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(), .buffer = buffer.Handle(),
.offset = 0, .offset = 0,
.size = buffer.SizeBytes(), .size = buffer.SizeBytes(),
}); };
buffers.push_back(&buffer);
}
});
});
cmdbuf.pipelineBarrier2(vk::DependencyInfo{ cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = static_cast<u32>(barriers.size()), .bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = barriers.data(), .pBufferMemoryBarriers = &barrier,
}); });
for (auto* buffer : buffers) { }
memory_tracker->ForEachUploadRange<true, false>(
buffer->CpuAddr(), buffer->SizeBytes(), false,
[&](u64 device_addr_out, u64 range_size) {
const u64 offset = staging_buffer.Copy(device_addr_out, range_size); const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
copies.push_back(vk::BufferCopy{ copies.push_back(vk::BufferCopy{
.srcOffset = offset, .srcOffset = offset,
.dstOffset = device_addr_out - buffer->CpuAddr(), .dstOffset = device_addr_out - buffer.CpuAddr(),
.size = range_size, .size = range_size,
}); });
}); });
cmdbuf.copyBuffer(staging_buffer.Handle(), buffer->Handle(), copies); cmdbuf.copyBuffer(staging_buffer.Handle(), buffer.Handle(), copies);
copies.clear(); copies.clear();
} barrier_recorded = false;
MemoryBarrier(); });
});
memory_tracker->PerformDeferredProtections<Type::CPU, false, false>(); memory_tracker->PerformDeferredProtections<Type::CPU, false, false>();
MemoryBarrier();
memory_tracker->Unlock(); memory_tracker->Unlock();
} }