Deferring mode

This commit is contained in:
Lander Gallastegi 2025-07-07 01:07:38 +02:00
parent 54907409c7
commit 5a242586ad
4 changed files with 56 additions and 48 deletions

View file

@ -817,7 +817,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
bool is_texel_buffer) { bool is_texel_buffer) {
boost::container::small_vector<vk::BufferCopy, 4> copies; boost::container::small_vector<vk::BufferCopy, 4> copies;
VAddr buffer_start = buffer.CpuAddr(); VAddr buffer_start = buffer.CpuAddr();
memory_tracker->ForEachUploadRange<true>( memory_tracker->ForEachUploadRange(
device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) { device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) {
const u64 offset = staging_buffer.Copy(device_addr_out, range_size); const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
copies.push_back(vk::BufferCopy{ copies.push_back(vk::BufferCopy{
@ -997,6 +997,7 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
void BufferCache::SynchronizeBuffersForDma() { void BufferCache::SynchronizeBuffersForDma() {
RENDERER_TRACE; RENDERER_TRACE;
LOG_WARNING(Render_Vulkan, "SYNC RANGES FOR DMA");
boost::container::small_vector<Buffer*, 64> buffers; boost::container::small_vector<Buffer*, 64> buffers;
boost::container::small_vector<vk::BufferMemoryBarrier2, 64> barriers; boost::container::small_vector<vk::BufferMemoryBarrier2, 64> barriers;
boost::container::small_vector<vk::BufferCopy, 4> copies; boost::container::small_vector<vk::BufferCopy, 4> copies;
@ -1028,7 +1029,7 @@ void BufferCache::SynchronizeBuffersForDma() {
.pBufferMemoryBarriers = barriers.data(), .pBufferMemoryBarriers = barriers.data(),
}); });
for (auto* buffer : buffers) { for (auto* buffer : buffers) {
memory_tracker->ForEachUploadRange<false, false>( memory_tracker->ForEachUploadRange<true, false>(
buffer->CpuAddr(), buffer->SizeBytes(), false, buffer->CpuAddr(), buffer->SizeBytes(), false,
[&](u64 device_addr_out, u64 range_size) { [&](u64 device_addr_out, u64 range_size) {
const u64 offset = staging_buffer.Copy(device_addr_out, range_size); const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
@ -1041,8 +1042,8 @@ void BufferCache::SynchronizeBuffersForDma() {
cmdbuf.copyBuffer(staging_buffer.Handle(), buffer->Handle(), copies); cmdbuf.copyBuffer(staging_buffer.Handle(), buffer->Handle(), copies);
copies.clear(); copies.clear();
} }
memory_tracker->UnmarkAllRegionsAsCpuModified<false>();
MemoryBarrier(); MemoryBarrier();
memory_tracker->PerformDeferredProtections<Type::CPU, false, false>();
memory_tracker->Unlock(); memory_tracker->Unlock();
} }

View file

@ -45,38 +45,29 @@ public:
} }
/// Mark region as CPU modified, notifying the device_tracker about this change /// Mark region as CPU modified, notifying the device_tracker about this change
template <bool locking = true> template <bool defer_protect = false, bool locking = true>
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
IterateRegions<false, locking>(dirty_cpu_addr, query_size, IterateRegions<false, locking>(dirty_cpu_addr, query_size,
[](RegionManager* manager, u64 offset, size_t size) { [](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock}; std::scoped_lock lk{manager->lock};
manager->template ChangeRegionState<Type::CPU, true>( manager->template ChangeRegionState<Type::CPU, true, defer_protect>(
manager->GetCpuAddr() + offset, size); manager->GetCpuAddr() + offset, size);
}); });
} }
/// Unmark all regions as CPU modified, notifying the device_tracker about this change
template <bool locking = true>
void UnmarkAllRegionsAsCpuModified() noexcept {
ForEachRegion<locking>([](RegionManager* manager) {
std::scoped_lock lk{manager->lock};
manager->template ChangeAllRegionState<Type::CPU, false>();
});
}
/// Unmark region as modified from the host GPU /// Unmark region as modified from the host GPU
template <bool locking = true> template <bool defer_protect = true, bool locking = false>
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
IterateRegions<false, locking>(dirty_cpu_addr, query_size, IterateRegions<false, locking>(dirty_cpu_addr, query_size,
[](RegionManager* manager, u64 offset, size_t size) { [](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock}; std::scoped_lock lk{manager->lock};
manager->template ChangeRegionState<Type::GPU, false>( manager->template ChangeRegionState<Type::GPU, false, defer_protect>(
manager->GetCpuAddr() + offset, size); manager->GetCpuAddr() + offset, size);
}); });
} }
/// Removes all protection from a page and ensures GPU data has been flushed if requested /// Removes all protection from a page and ensures GPU data has been flushed if requested
template <bool locking = true> template <bool defer_protect = false, bool locking = true>
void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept { void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept {
IterateRegions<false, locking>( IterateRegions<false, locking>(
cpu_addr, size, cpu_addr, size,
@ -90,7 +81,7 @@ public:
if (try_flush && manager->template IsRegionModified<Type::GPU>(offset, size)) { if (try_flush && manager->template IsRegionModified<Type::GPU>(offset, size)) {
return true; return true;
} }
manager->template ChangeRegionState<Type::CPU, true>( manager->template ChangeRegionState<Type::CPU, true, defer_protect>(
manager->GetCpuAddr() + offset, size); manager->GetCpuAddr() + offset, size);
return false; return false;
}(); }();
@ -101,32 +92,43 @@ public:
} }
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
template <bool clear, bool locking = true> template <bool defer_protect = false, bool locking = true>
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) { void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) {
IterateRegions<true, locking>( IterateRegions<true, locking>(
query_cpu_range, query_size, query_cpu_range, query_size,
[&func, is_written](RegionManager* manager, u64 offset, size_t size) { [&func, is_written](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock}; std::scoped_lock lk{manager->lock};
manager->template ForEachModifiedRange<Type::CPU, clear>( manager->template ForEachModifiedRange<Type::CPU, true, defer_protect>(
manager->GetCpuAddr() + offset, size, func); manager->GetCpuAddr() + offset, size, func);
if (is_written && clear) { if (is_written) {
manager->template ChangeRegionState<Type::GPU, true>( manager->template ChangeRegionState<Type::GPU, true, defer_protect>(
manager->GetCpuAddr() + offset, size); manager->GetCpuAddr() + offset, size);
} }
}); });
} }
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <bool clear, bool locking = true> template <bool clear, bool defer_protect = false, bool locking = true>
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) { void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
IterateRegions<false, locking>(query_cpu_range, query_size, IterateRegions<false, locking>(query_cpu_range, query_size,
[&func](RegionManager* manager, u64 offset, size_t size) { [&func](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock}; std::scoped_lock lk{manager->lock};
manager->template ForEachModifiedRange<Type::GPU, clear>( manager->template ForEachModifiedRange<Type::GPU, clear, defer_protect>(
manager->GetCpuAddr() + offset, size, func); manager->GetCpuAddr() + offset, size, func);
}); });
} }
/// Notifies deferred protection changes to the tracker.
template <Type type, bool enable, bool locking = true>
void PerformDeferredProtections() {
ForEachRegion<locking>([&](RegionManager* manager) {
std::scoped_lock lk{manager->lock};
manager->template PerformDeferredProtections<type, enable>();
});
}
/// Notifies all deferred protection changes to the tracker.
/// Lck the memory tracker. /// Lck the memory tracker.
void Lock() { void Lock() {
global_lock.lock(); global_lock.lock();

View file

@ -4,6 +4,7 @@
#pragma once #pragma once
#include "common/bit_array.h" #include "common/bit_array.h"
#include "common/enum.h"
#include "common/types.h" #include "common/types.h"
namespace VideoCore { namespace VideoCore {
@ -17,9 +18,12 @@ constexpr u64 TRACKER_HIGHER_PAGE_MASK = TRACKER_HIGHER_PAGE_SIZE - 1ULL;
constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PER_PAGE; constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PER_PAGE;
enum class Type { enum class Type {
CPU, None = 0,
GPU, CPU = 1 << 0,
GPU = 1 << 1,
}; };
DECLARE_ENUM_FLAG_OPERATORS(Type)
using RegionBits = Common::BitArray<NUM_PAGES_PER_REGION>; using RegionBits = Common::BitArray<NUM_PAGES_PER_REGION>;

View file

@ -70,13 +70,27 @@ public:
} }
} }
template <Type type, bool enable>
void PerformDeferredProtections() {
bool was_deferred = True(deferred_protection & type);
if (!was_deferred) {
return;
}
deferred_protection &= ~type;
if constexpr (type == Type::CPU) {
UpdateProtection<!enable, false>();
} else if constexpr (type == Type::GPU) {
UpdateProtection<enable, true>();
}
}
/** /**
* Change the state of a range of pages * Change the state of a range of pages
* *
* @param dirty_addr Base address to mark or unmark as modified * @param dirty_addr Base address to mark or unmark as modified
* @param size Size in bytes to mark or unmark as modified * @param size Size in bytes to mark or unmark as modified
*/ */
template <Type type, bool enable> template <Type type, bool enable, bool defer_protect>
void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
RENDERER_TRACE; RENDERER_TRACE;
const size_t offset = dirty_addr - cpu_addr; const size_t offset = dirty_addr - cpu_addr;
@ -93,7 +107,9 @@ public:
} else { } else {
bits.UnsetRange(start_page, end_page); bits.UnsetRange(start_page, end_page);
} }
if constexpr (type == Type::CPU) { if constexpr (defer_protect) {
deferred_protection |= type;
} else if constexpr (type == Type::CPU) {
UpdateProtection<!enable, false>(); UpdateProtection<!enable, false>();
} else if (Config::readbacks()) { } else if (Config::readbacks()) {
UpdateProtection<enable, true>(); UpdateProtection<enable, true>();
@ -108,7 +124,7 @@ public:
* @param size Size in bytes of the CPU range to loop over * @param size Size in bytes of the CPU range to loop over
* @param func Function to call for each turned off region * @param func Function to call for each turned off region
*/ */
template <Type type, bool clear> template <Type type, bool clear, bool defer_protect>
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) { void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) {
RENDERER_TRACE; RENDERER_TRACE;
const size_t offset = query_cpu_range - cpu_addr; const size_t offset = query_cpu_range - cpu_addr;
@ -124,7 +140,9 @@ public:
if constexpr (clear) { if constexpr (clear) {
bits.UnsetRange(start_page, end_page); bits.UnsetRange(start_page, end_page);
if constexpr (type == Type::CPU) { if constexpr (defer_protect) {
deferred_protection |= type;
} else if constexpr (type == Type::CPU) {
UpdateProtection<true, false>(); UpdateProtection<true, false>();
} else if (Config::readbacks()) { } else if (Config::readbacks()) {
UpdateProtection<false, true>(); UpdateProtection<false, true>();
@ -136,24 +154,6 @@ public:
} }
} }
/**
* Chagnes state of all pages in the region
*/
template <Type type, bool enable>
void ChangeAllRegionState() noexcept {
RENDERER_TRACE;
if constexpr (enable) {
GetRegionBits<type>().Fill();
} else {
GetRegionBits<type>().Clear();
}
if constexpr (type == Type::CPU) {
UpdateProtection<!enable, false>();
} else if (Config::readbacks()) {
UpdateProtection<enable, true>();
}
}
/** /**
* Returns true when a region has been modified * Returns true when a region has been modified
* *
@ -204,6 +204,7 @@ private:
PageManager* tracker; PageManager* tracker;
VAddr cpu_addr = 0; VAddr cpu_addr = 0;
Type deferred_protection = Type::None;
RegionBits cpu; RegionBits cpu;
RegionBits gpu; RegionBits gpu;
RegionBits writeable; RegionBits writeable;