This commit is contained in:
Lander Gallastegi 2025-07-07 23:06:42 +01:00 committed by GitHub
commit fd0e496a8c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 238 additions and 105 deletions

View file

@ -921,7 +921,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/buffer_cache/buffer_cache.cpp src/video_core/buffer_cache/buffer_cache.cpp
src/video_core/buffer_cache/buffer_cache.h src/video_core/buffer_cache/buffer_cache.h
src/video_core/buffer_cache/memory_tracker.h src/video_core/buffer_cache/memory_tracker.h
src/video_core/buffer_cache/range_set.h
src/video_core/buffer_cache/region_definitions.h src/video_core/buffer_cache/region_definitions.h
src/video_core/buffer_cache/region_manager.h src/video_core/buffer_cache/region_manager.h
src/video_core/renderer_vulkan/liverpool_to_vk.cpp src/video_core/renderer_vulkan/liverpool_to_vk.cpp
@ -980,6 +979,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/page_manager.cpp src/video_core/page_manager.cpp
src/video_core/page_manager.h src/video_core/page_manager.h
src/video_core/multi_level_page_table.h src/video_core/multi_level_page_table.h
src/video_core/range_set.h
src/video_core/renderdoc.cpp src/video_core/renderdoc.cpp
src/video_core/renderdoc.h src/video_core/renderdoc.h
) )

View file

@ -29,9 +29,9 @@ static constexpr size_t DeviceBufferSize = 128_MB;
static constexpr size_t MaxPageFaults = 1024; static constexpr size_t MaxPageFaults = 1024;
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_,
PageManager& tracker) TextureCache& texture_cache_, PageManager& tracker)
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, : instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_},
memory{Core::Memory::Instance()}, texture_cache{texture_cache_}, memory{Core::Memory::Instance()}, texture_cache{texture_cache_},
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
@ -154,9 +154,8 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
memory_tracker->ForEachDownloadRange<false>( memory_tracker->ForEachDownloadRange<false>(
device_addr, size, [&](u64 device_addr_out, u64 range_size) { device_addr, size, [&](u64 device_addr_out, u64 range_size) {
const VAddr buffer_addr = buffer.CpuAddr(); const VAddr buffer_addr = buffer.CpuAddr();
const auto add_download = [&](VAddr start, VAddr end) { const auto add_download = [&](VAddr start, u64 new_size) {
const u64 new_offset = start - buffer_addr; const u64 new_offset = start - buffer_addr;
const u64 new_size = end - start;
copies.push_back(vk::BufferCopy{ copies.push_back(vk::BufferCopy{
.srcOffset = new_offset, .srcOffset = new_offset,
.dstOffset = total_size_bytes, .dstOffset = total_size_bytes,
@ -996,6 +995,57 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
}); });
} }
void BufferCache::SynchronizeBuffersForDma() {
RENDERER_TRACE;
boost::container::small_vector<Buffer*, 64> buffers;
boost::container::small_vector<vk::BufferCopy, 4> copies;
const auto& mapped_ranges = rasterizer.GetMappedRanges();
bool barrier_recorded = false;
memory_tracker->Lock();
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
mapped_ranges.ForEach([&](VAddr device_addr, u64 size) {
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
memory_tracker->ForEachUploadRange<true, false>(
buffer.CpuAddr(), buffer.SizeBytes(), false,
[&](u64 device_addr_out, u64 range_size) {
if (!barrier_recorded) {
barrier_recorded = true;
const vk::BufferMemoryBarrier2 barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead |
vk::AccessFlagBits2::eMemoryWrite |
vk::AccessFlagBits2::eTransferRead |
vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(),
.offset = 0,
.size = buffer.SizeBytes(),
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &barrier,
});
}
const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
copies.push_back(vk::BufferCopy{
.srcOffset = offset,
.dstOffset = device_addr_out - buffer.CpuAddr(),
.size = range_size,
});
});
cmdbuf.copyBuffer(staging_buffer.Handle(), buffer.Handle(), copies);
copies.clear();
barrier_recorded = false;
});
});
memory_tracker->PerformDeferredProtections<Type::CPU, false, false>();
MemoryBarrier();
memory_tracker->Unlock();
}
void BufferCache::MemoryBarrier() { void BufferCache::MemoryBarrier() {
// Vulkan doesn't know which buffer we access in a shader if we use // Vulkan doesn't know which buffer we access in a shader if we use
// BufferDeviceAddress. We need a full memory barrier. // BufferDeviceAddress. We need a full memory barrier.

View file

@ -5,12 +5,11 @@
#include <shared_mutex> #include <shared_mutex>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include "common/div_ceil.h"
#include "common/slot_vector.h" #include "common/slot_vector.h"
#include "common/types.h" #include "common/types.h"
#include "video_core/buffer_cache/buffer.h" #include "video_core/buffer_cache/buffer.h"
#include "video_core/buffer_cache/range_set.h"
#include "video_core/multi_level_page_table.h" #include "video_core/multi_level_page_table.h"
#include "video_core/range_set.h"
namespace AmdGpu { namespace AmdGpu {
struct Liverpool; struct Liverpool;
@ -22,7 +21,8 @@ class MemoryManager;
namespace Vulkan { namespace Vulkan {
class GraphicsPipeline; class GraphicsPipeline;
} class Rasterizer;
} // namespace Vulkan
namespace VideoCore { namespace VideoCore {
@ -71,8 +71,8 @@ public:
public: public:
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache, Vulkan::Rasterizer& rasterizer, AmdGpu::Liverpool* liverpool,
PageManager& tracker); TextureCache& texture_cache, PageManager& tracker);
~BufferCache(); ~BufferCache();
/// Returns a pointer to GDS device local buffer. /// Returns a pointer to GDS device local buffer.
@ -156,8 +156,8 @@ public:
/// Synchronizes all buffers in the specified range. /// Synchronizes all buffers in the specified range.
void SynchronizeBuffersInRange(VAddr device_addr, u64 size); void SynchronizeBuffersInRange(VAddr device_addr, u64 size);
/// Synchronizes all buffers neede for DMA. /// Synchronizes all buffers for DMA.
void SynchronizeDmaBuffers(); void SynchronizeBuffersForDma();
/// Record memory barrier. Used for buffers when accessed via BDA. /// Record memory barrier. Used for buffers when accessed via BDA.
void MemoryBarrier(); void MemoryBarrier();
@ -204,6 +204,7 @@ private:
const Vulkan::Instance& instance; const Vulkan::Instance& instance;
Vulkan::Scheduler& scheduler; Vulkan::Scheduler& scheduler;
Vulkan::Rasterizer& rasterizer;
AmdGpu::Liverpool* liverpool; AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory; Core::MemoryManager* memory;
TextureCache& texture_cache; TextureCache& texture_cache;

View file

@ -5,6 +5,7 @@
#include <algorithm> #include <algorithm>
#include <deque> #include <deque>
#include <shared_mutex>
#include <type_traits> #include <type_traits>
#include <vector> #include <vector>
#include "common/debug.h" #include "common/debug.h"
@ -24,8 +25,9 @@ public:
~MemoryTracker() = default; ~MemoryTracker() = default;
/// Returns true if a region has been modified from the CPU /// Returns true if a region has been modified from the CPU
template <bool locking = true>
bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<true>( return IterateRegions<true, locking>(
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock}; std::scoped_lock lk{manager->lock};
return manager->template IsRegionModified<Type::CPU>(offset, size); return manager->template IsRegionModified<Type::CPU>(offset, size);
@ -33,8 +35,9 @@ public:
} }
/// Returns true if a region has been modified from the GPU /// Returns true if a region has been modified from the GPU
template <bool locking = true>
bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<false>( return IterateRegions<false, locking>(
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) { query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock}; std::scoped_lock lk{manager->lock};
return manager->template IsRegionModified<Type::GPU>(offset, size); return manager->template IsRegionModified<Type::GPU>(offset, size);
@ -42,28 +45,31 @@ public:
} }
/// Mark region as CPU modified, notifying the device_tracker about this change /// Mark region as CPU modified, notifying the device_tracker about this change
template <bool defer_protect = false, bool locking = true>
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
IteratePages<false>(dirty_cpu_addr, query_size, IterateRegions<false, locking>(dirty_cpu_addr, query_size,
[](RegionManager* manager, u64 offset, size_t size) { [](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock}; std::scoped_lock lk{manager->lock};
manager->template ChangeRegionState<Type::CPU, true>( manager->template ChangeRegionState<Type::CPU, true, defer_protect>(
manager->GetCpuAddr() + offset, size); manager->GetCpuAddr() + offset, size);
}); });
} }
/// Unmark region as modified from the host GPU /// Unmark region as modified from the host GPU
template <bool defer_protect = false, bool locking = true>
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
IteratePages<false>(dirty_cpu_addr, query_size, IterateRegions<false, locking>(dirty_cpu_addr, query_size,
[](RegionManager* manager, u64 offset, size_t size) { [](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock}; std::scoped_lock lk{manager->lock};
manager->template ChangeRegionState<Type::GPU, false>( manager->template ChangeRegionState<Type::GPU, false, defer_protect>(
manager->GetCpuAddr() + offset, size); manager->GetCpuAddr() + offset, size);
}); });
} }
/// Removes all protection from a page and ensures GPU data has been flushed if requested /// Removes all protection from a page and ensures GPU data has been flushed if requested
template <bool defer_protect = false, bool locking = true>
void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept { void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept {
IteratePages<false>( IterateRegions<false, locking>(
cpu_addr, size, cpu_addr, size,
[try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) { [try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) {
const bool should_flush = [&] { const bool should_flush = [&] {
@ -75,7 +81,7 @@ public:
if (try_flush && manager->template IsRegionModified<Type::GPU>(offset, size)) { if (try_flush && manager->template IsRegionModified<Type::GPU>(offset, size)) {
return true; return true;
} }
manager->template ChangeRegionState<Type::CPU, true>( manager->template ChangeRegionState<Type::CPU, true, defer_protect>(
manager->GetCpuAddr() + offset, size); manager->GetCpuAddr() + offset, size);
return false; return false;
}(); }();
@ -86,28 +92,51 @@ public:
} }
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
template <bool defer_protect = false, bool locking = true>
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) { void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) {
IteratePages<true>(query_cpu_range, query_size, IterateRegions<true, locking>(
[&func, is_written](RegionManager* manager, u64 offset, size_t size) { query_cpu_range, query_size,
std::scoped_lock lk{manager->lock}; [&func, is_written](RegionManager* manager, u64 offset, size_t size) {
manager->template ForEachModifiedRange<Type::CPU, true>( std::scoped_lock lk{manager->lock};
manager->GetCpuAddr() + offset, size, func); manager->template ForEachModifiedRange<Type::CPU, true, defer_protect>(
if (is_written) { manager->GetCpuAddr() + offset, size, func);
manager->template ChangeRegionState<Type::GPU, true>( if (is_written) {
manager->GetCpuAddr() + offset, size); manager->template ChangeRegionState<Type::GPU, true, defer_protect>(
} manager->GetCpuAddr() + offset, size);
}); }
});
} }
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <bool clear> template <bool clear, bool defer_protect = false, bool locking = true>
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) { void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
IteratePages<false>(query_cpu_range, query_size, IterateRegions<false, locking>(query_cpu_range, query_size,
[&func](RegionManager* manager, u64 offset, size_t size) { [&func](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock}; std::scoped_lock lk{manager->lock};
manager->template ForEachModifiedRange<Type::GPU, clear>( manager->template ForEachModifiedRange<Type::GPU, clear, defer_protect>(
manager->GetCpuAddr() + offset, size, func); manager->GetCpuAddr() + offset, size, func);
}); });
}
/// Notifies deferred protection changes to the tracker.
template <Type type, bool enable, bool locking = true>
void PerformDeferredProtections() {
ForEachRegion<locking>([&](RegionManager* manager) {
std::scoped_lock lk{manager->lock};
manager->template PerformDeferredProtections<type, enable>();
});
}
/// Notifies all deferred protection changes to the tracker.
/// Lck the memory tracker.
void Lock() {
global_lock.lock();
}
/// Unlock the memory tracker.
void Unlock() {
global_lock.unlock();
} }
private: private:
@ -118,42 +147,75 @@ private:
* @param func Callback for each word manager. * @param func Callback for each word manager.
* @return * @return
*/ */
template <bool create_region_on_fail, typename Func> template <bool create_region_on_fail, bool locking, typename Func>
bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { bool IterateRegions(VAddr cpu_address, size_t size, Func&& func) {
RENDERER_TRACE; RENDERER_TRACE;
using FuncReturn = typename std::invoke_result<Func, RegionManager*, u64, size_t>::type; const auto run = [&]() {
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; using FuncReturn = typename std::invoke_result<Func, RegionManager*, u64, size_t>::type;
std::size_t remaining_size{size}; static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
std::size_t page_index{cpu_address >> TRACKER_HIGHER_PAGE_BITS}; std::size_t remaining_size{size};
u64 page_offset{cpu_address & TRACKER_HIGHER_PAGE_MASK}; std::size_t page_index{cpu_address >> TRACKER_HIGHER_PAGE_BITS};
while (remaining_size > 0) { u64 page_offset{cpu_address & TRACKER_HIGHER_PAGE_MASK};
const std::size_t copy_amount{ while (remaining_size > 0) {
std::min<std::size_t>(TRACKER_HIGHER_PAGE_SIZE - page_offset, remaining_size)}; const std::size_t copy_amount{
auto* manager{top_tier[page_index]}; std::min<std::size_t>(TRACKER_HIGHER_PAGE_SIZE - page_offset, remaining_size)};
if (manager) { auto* manager{top_tier[page_index]};
if constexpr (BOOL_BREAK) { if (manager) {
if (func(manager, page_offset, copy_amount)) { if constexpr (BOOL_BREAK) {
return true; if (func(manager, page_offset, copy_amount)) {
return true;
}
} else {
func(manager, page_offset, copy_amount);
}
} else if constexpr (create_region_on_fail) {
CreateRegion(page_index);
manager = top_tier[page_index];
if constexpr (BOOL_BREAK) {
if (func(manager, page_offset, copy_amount)) {
return true;
}
} else {
func(manager, page_offset, copy_amount);
} }
} else {
func(manager, page_offset, copy_amount);
} }
} else if constexpr (create_region_on_fail) { page_index++;
CreateRegion(page_index); page_offset = 0;
manager = top_tier[page_index]; remaining_size -= copy_amount;
if constexpr (BOOL_BREAK) { }
if (func(manager, page_offset, copy_amount)) { return false;
return true; };
if constexpr (locking) {
std::shared_lock lock{global_lock};
return run();
} else {
return run();
}
}
/**
* @brief Iterate throw all regions in the memory tracker.
* @param func Callback for each region manager.
* @return
*/
template <bool locking, typename Func>
void ForEachRegion(Func&& func) {
RENDERER_TRACE;
const auto run = [&]() {
for (auto& pool : manager_pool) {
for (auto& manager : pool) {
if (manager.GetCpuAddr() != 0) {
func(&manager);
} }
} else {
func(manager, page_offset, copy_amount);
} }
} }
page_index++; };
page_offset = 0; if constexpr (locking) {
remaining_size -= copy_amount; std::shared_lock lock{global_lock};
run();
} else {
run();
} }
return false;
} }
void CreateRegion(std::size_t page_index) { void CreateRegion(std::size_t page_index) {
@ -177,6 +239,7 @@ private:
std::deque<std::array<RegionManager, MANAGER_POOL_SIZE>> manager_pool; std::deque<std::array<RegionManager, MANAGER_POOL_SIZE>> manager_pool;
std::vector<RegionManager*> free_managers; std::vector<RegionManager*> free_managers;
std::array<RegionManager*, NUM_HIGH_PAGES> top_tier{}; std::array<RegionManager*, NUM_HIGH_PAGES> top_tier{};
std::shared_mutex global_lock;
}; };
} // namespace VideoCore } // namespace VideoCore

View file

@ -4,6 +4,7 @@
#pragma once #pragma once
#include "common/bit_array.h" #include "common/bit_array.h"
#include "common/enum.h"
#include "common/types.h" #include "common/types.h"
namespace VideoCore { namespace VideoCore {
@ -17,9 +18,12 @@ constexpr u64 TRACKER_HIGHER_PAGE_MASK = TRACKER_HIGHER_PAGE_SIZE - 1ULL;
constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PER_PAGE; constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PER_PAGE;
enum class Type { enum class Type {
CPU, None = 0,
GPU, CPU = 1 << 0,
GPU = 1 << 1,
}; };
DECLARE_ENUM_FLAG_OPERATORS(Type)
using RegionBits = Common::BitArray<NUM_PAGES_PER_REGION>; using RegionBits = Common::BitArray<NUM_PAGES_PER_REGION>;

View file

@ -70,13 +70,27 @@ public:
} }
} }
template <Type type, bool enable>
void PerformDeferredProtections() {
bool was_deferred = True(deferred_protection & type);
if (!was_deferred) {
return;
}
deferred_protection &= ~type;
if constexpr (type == Type::CPU) {
UpdateProtection<!enable, false>();
} else if constexpr (type == Type::GPU) {
UpdateProtection<enable, true>();
}
}
/** /**
* Change the state of a range of pages * Change the state of a range of pages
* *
* @param dirty_addr Base address to mark or unmark as modified * @param dirty_addr Base address to mark or unmark as modified
* @param size Size in bytes to mark or unmark as modified * @param size Size in bytes to mark or unmark as modified
*/ */
template <Type type, bool enable> template <Type type, bool enable, bool defer_protect>
void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
RENDERER_TRACE; RENDERER_TRACE;
const size_t offset = dirty_addr - cpu_addr; const size_t offset = dirty_addr - cpu_addr;
@ -93,7 +107,9 @@ public:
} else { } else {
bits.UnsetRange(start_page, end_page); bits.UnsetRange(start_page, end_page);
} }
if constexpr (type == Type::CPU) { if constexpr (defer_protect) {
deferred_protection |= type;
} else if constexpr (type == Type::CPU) {
UpdateProtection<!enable, false>(); UpdateProtection<!enable, false>();
} else if (Config::readbacks()) { } else if (Config::readbacks()) {
UpdateProtection<enable, true>(); UpdateProtection<enable, true>();
@ -108,7 +124,7 @@ public:
* @param size Size in bytes of the CPU range to loop over * @param size Size in bytes of the CPU range to loop over
* @param func Function to call for each turned off region * @param func Function to call for each turned off region
*/ */
template <Type type, bool clear> template <Type type, bool clear, bool defer_protect>
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) { void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) {
RENDERER_TRACE; RENDERER_TRACE;
const size_t offset = query_cpu_range - cpu_addr; const size_t offset = query_cpu_range - cpu_addr;
@ -124,7 +140,9 @@ public:
if constexpr (clear) { if constexpr (clear) {
bits.UnsetRange(start_page, end_page); bits.UnsetRange(start_page, end_page);
if constexpr (type == Type::CPU) { if constexpr (defer_protect) {
deferred_protection |= type;
} else if constexpr (type == Type::CPU) {
UpdateProtection<true, false>(); UpdateProtection<true, false>();
} else if (Config::readbacks()) { } else if (Config::readbacks()) {
UpdateProtection<false, true>(); UpdateProtection<false, true>();
@ -186,6 +204,7 @@ private:
PageManager* tracker; PageManager* tracker;
VAddr cpu_addr = 0; VAddr cpu_addr = 0;
Type deferred_protection = Type::None;
RegionBits cpu; RegionBits cpu;
RegionBits gpu; RegionBits gpu;
RegionBits writeable; RegionBits writeable;

View file

@ -4,6 +4,7 @@
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "common/debug.h" #include "common/debug.h"
#include "common/div_ceil.h"
#include "common/range_lock.h" #include "common/range_lock.h"
#include "common/signal_context.h" #include "common/signal_context.h"
#include "core/memory.h" #include "core/memory.h"

View file

@ -66,7 +66,7 @@ struct RangeSet {
for (const auto& set : m_ranges_set) { for (const auto& set : m_ranges_set) {
const VAddr inter_addr_end = set.upper(); const VAddr inter_addr_end = set.upper();
const VAddr inter_addr = set.lower(); const VAddr inter_addr = set.lower();
func(inter_addr, inter_addr_end); func(inter_addr, inter_addr_end - inter_addr);
} }
} }
@ -92,7 +92,7 @@ struct RangeSet {
if (inter_addr < start_address) { if (inter_addr < start_address) {
inter_addr = start_address; inter_addr = start_address;
} }
func(inter_addr, inter_addr_end); func(inter_addr, inter_addr_end - inter_addr);
} }
} }
@ -170,7 +170,7 @@ public:
for (const auto& [interval, value] : m_ranges_map) { for (const auto& [interval, value] : m_ranges_map) {
const VAddr inter_addr_end = interval.upper(); const VAddr inter_addr_end = interval.upper();
const VAddr inter_addr = interval.lower(); const VAddr inter_addr = interval.lower();
func(inter_addr, inter_addr_end, value); func(inter_addr, inter_addr_end - inter_addr, value);
} }
} }
@ -196,7 +196,7 @@ public:
if (inter_addr < start_address) { if (inter_addr < start_address) {
inter_addr = start_address; inter_addr = start_address;
} }
func(inter_addr, inter_addr_end, it->second); func(inter_addr, inter_addr_end - inter_addr, it->second);
} }
} }
@ -274,7 +274,7 @@ public:
for (const auto& [interval, value] : m_ranges_map) { for (const auto& [interval, value] : m_ranges_map) {
const VAddr inter_addr_end = interval.upper(); const VAddr inter_addr_end = interval.upper();
const VAddr inter_addr = interval.lower(); const VAddr inter_addr = interval.lower();
func(inter_addr, inter_addr_end, value); func(inter_addr, inter_addr_end - inter_addr, value);
} }
} }
@ -300,7 +300,7 @@ public:
if (inter_addr < start_address) { if (inter_addr < start_address) {
inter_addr = start_address; inter_addr = start_address;
} }
func(inter_addr, inter_addr_end, it->second); func(inter_addr, inter_addr_end - inter_addr, it->second);
} }
} }

View file

@ -36,7 +36,7 @@ static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_) AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, page_manager{this}, : instance{instance_}, scheduler{scheduler_}, page_manager{this},
buffer_cache{instance, scheduler, liverpool_, texture_cache, page_manager}, buffer_cache{instance, scheduler, *this, liverpool_, texture_cache, page_manager},
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_}, texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} { memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
if (!Config::nullGpu()) { if (!Config::nullGpu()) {
@ -484,12 +484,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
// We only use fault buffer for DMA right now. // We only use fault buffer for DMA right now.
{ {
Common::RecursiveSharedLock lock{mapped_ranges_mutex}; Common::RecursiveSharedLock lock{mapped_ranges_mutex};
for (auto& range : mapped_ranges) { buffer_cache.SynchronizeBuffersForDma();
buffer_cache.SynchronizeBuffersInRange(range.lower(),
range.upper() - range.lower());
}
} }
buffer_cache.MemoryBarrier();
} }
fault_process_pending |= uses_dma; fault_process_pending |= uses_dma;
@ -987,16 +983,14 @@ bool Rasterizer::IsMapped(VAddr addr, u64 size) {
// There is no memory, so not mapped. // There is no memory, so not mapped.
return false; return false;
} }
const auto range = decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
Common::RecursiveSharedLock lock{mapped_ranges_mutex}; Common::RecursiveSharedLock lock{mapped_ranges_mutex};
return boost::icl::contains(mapped_ranges, range); return mapped_ranges.Contains(addr, size);
} }
void Rasterizer::MapMemory(VAddr addr, u64 size) { void Rasterizer::MapMemory(VAddr addr, u64 size) {
{ {
std::scoped_lock lock{mapped_ranges_mutex}; std::scoped_lock lock{mapped_ranges_mutex};
mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size); mapped_ranges.Add(addr, size);
} }
page_manager.OnGpuMap(addr, size); page_manager.OnGpuMap(addr, size);
} }
@ -1007,7 +1001,7 @@ void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
page_manager.OnGpuUnmap(addr, size); page_manager.OnGpuUnmap(addr, size);
{ {
std::scoped_lock lock{mapped_ranges_mutex}; std::scoped_lock lock{mapped_ranges_mutex};
mapped_ranges -= decltype(mapped_ranges)::interval_type::right_open(addr, addr + size); mapped_ranges.Subtract(addr, size);
} }
} }

View file

@ -3,11 +3,11 @@
#pragma once #pragma once
#include <shared_mutex>
#include "common/recursive_lock.h" #include "common/recursive_lock.h"
#include "common/shared_first_mutex.h" #include "common/shared_first_mutex.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/page_manager.h" #include "video_core/page_manager.h"
#include "video_core/range_set.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache.h"
@ -43,6 +43,10 @@ public:
return texture_cache; return texture_cache;
} }
[[nodiscard]] const VideoCore::RangeSet& GetMappedRanges() const noexcept {
return mapped_ranges;
}
void Draw(bool is_indexed, u32 index_offset = 0); void Draw(bool is_indexed, u32 index_offset = 0);
void DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 size, u32 max_count, void DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 size, u32 max_count,
VAddr count_address); VAddr count_address);
@ -76,11 +80,8 @@ public:
template <typename Func> template <typename Func>
void ForEachMappedRangeInRange(VAddr addr, u64 size, Func&& func) { void ForEachMappedRangeInRange(VAddr addr, u64 size, Func&& func) {
const auto range = decltype(mapped_ranges)::interval_type::right_open(addr, addr + size); Common::RecursiveSharedLock lk(mapped_ranges_mutex);
Common::RecursiveSharedLock lock{mapped_ranges_mutex}; mapped_ranges.ForEachInRange(addr, size, std::forward<Func>(func));
for (const auto& mapped_range : (mapped_ranges & range)) {
func(mapped_range);
}
} }
private: private:
@ -122,7 +123,7 @@ private:
VideoCore::TextureCache texture_cache; VideoCore::TextureCache texture_cache;
AmdGpu::Liverpool* liverpool; AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory; Core::MemoryManager* memory;
boost::icl::interval_set<VAddr> mapped_ranges; VideoCore::RangeSet mapped_ranges;
Common::SharedFirstMutex mapped_ranges_mutex; Common::SharedFirstMutex mapped_ranges_mutex;
PipelineCache pipeline_cache; PipelineCache pipeline_cache;