mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-12 04:35:56 +00:00
Merge 17815ad439
into ddede4a52d
This commit is contained in:
commit
fd0e496a8c
10 changed files with 238 additions and 105 deletions
|
@ -921,7 +921,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
|||
src/video_core/buffer_cache/buffer_cache.cpp
|
||||
src/video_core/buffer_cache/buffer_cache.h
|
||||
src/video_core/buffer_cache/memory_tracker.h
|
||||
src/video_core/buffer_cache/range_set.h
|
||||
src/video_core/buffer_cache/region_definitions.h
|
||||
src/video_core/buffer_cache/region_manager.h
|
||||
src/video_core/renderer_vulkan/liverpool_to_vk.cpp
|
||||
|
@ -980,6 +979,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
|||
src/video_core/page_manager.cpp
|
||||
src/video_core/page_manager.h
|
||||
src/video_core/multi_level_page_table.h
|
||||
src/video_core/range_set.h
|
||||
src/video_core/renderdoc.cpp
|
||||
src/video_core/renderdoc.h
|
||||
)
|
||||
|
|
|
@ -29,9 +29,9 @@ static constexpr size_t DeviceBufferSize = 128_MB;
|
|||
static constexpr size_t MaxPageFaults = 1024;
|
||||
|
||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
||||
PageManager& tracker)
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
||||
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_,
|
||||
TextureCache& texture_cache_, PageManager& tracker)
|
||||
: instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_},
|
||||
memory{Core::Memory::Instance()}, texture_cache{texture_cache_},
|
||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||
|
@ -154,9 +154,8 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
|||
memory_tracker->ForEachDownloadRange<false>(
|
||||
device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||
const VAddr buffer_addr = buffer.CpuAddr();
|
||||
const auto add_download = [&](VAddr start, VAddr end) {
|
||||
const auto add_download = [&](VAddr start, u64 new_size) {
|
||||
const u64 new_offset = start - buffer_addr;
|
||||
const u64 new_size = end - start;
|
||||
copies.push_back(vk::BufferCopy{
|
||||
.srcOffset = new_offset,
|
||||
.dstOffset = total_size_bytes,
|
||||
|
@ -996,6 +995,57 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
|||
});
|
||||
}
|
||||
|
||||
void BufferCache::SynchronizeBuffersForDma() {
|
||||
RENDERER_TRACE;
|
||||
boost::container::small_vector<Buffer*, 64> buffers;
|
||||
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
||||
const auto& mapped_ranges = rasterizer.GetMappedRanges();
|
||||
bool barrier_recorded = false;
|
||||
memory_tracker->Lock();
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
mapped_ranges.ForEach([&](VAddr device_addr, u64 size) {
|
||||
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
||||
memory_tracker->ForEachUploadRange<true, false>(
|
||||
buffer.CpuAddr(), buffer.SizeBytes(), false,
|
||||
[&](u64 device_addr_out, u64 range_size) {
|
||||
if (!barrier_recorded) {
|
||||
barrier_recorded = true;
|
||||
const vk::BufferMemoryBarrier2 barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead |
|
||||
vk::AccessFlagBits2::eMemoryWrite |
|
||||
vk::AccessFlagBits2::eTransferRead |
|
||||
vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = 0,
|
||||
.size = buffer.SizeBytes(),
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &barrier,
|
||||
});
|
||||
}
|
||||
const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
|
||||
copies.push_back(vk::BufferCopy{
|
||||
.srcOffset = offset,
|
||||
.dstOffset = device_addr_out - buffer.CpuAddr(),
|
||||
.size = range_size,
|
||||
});
|
||||
});
|
||||
cmdbuf.copyBuffer(staging_buffer.Handle(), buffer.Handle(), copies);
|
||||
copies.clear();
|
||||
barrier_recorded = false;
|
||||
});
|
||||
});
|
||||
memory_tracker->PerformDeferredProtections<Type::CPU, false, false>();
|
||||
MemoryBarrier();
|
||||
memory_tracker->Unlock();
|
||||
}
|
||||
|
||||
void BufferCache::MemoryBarrier() {
|
||||
// Vulkan doesn't know which buffer we access in a shader if we use
|
||||
// BufferDeviceAddress. We need a full memory barrier.
|
||||
|
|
|
@ -5,12 +5,11 @@
|
|||
|
||||
#include <shared_mutex>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/div_ceil.h"
|
||||
#include "common/slot_vector.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/buffer_cache/buffer.h"
|
||||
#include "video_core/buffer_cache/range_set.h"
|
||||
#include "video_core/multi_level_page_table.h"
|
||||
#include "video_core/range_set.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
struct Liverpool;
|
||||
|
@ -22,7 +21,8 @@ class MemoryManager;
|
|||
|
||||
namespace Vulkan {
|
||||
class GraphicsPipeline;
|
||||
}
|
||||
class Rasterizer;
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
|
@ -71,8 +71,8 @@ public:
|
|||
|
||||
public:
|
||||
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
|
||||
PageManager& tracker);
|
||||
Vulkan::Rasterizer& rasterizer, AmdGpu::Liverpool* liverpool,
|
||||
TextureCache& texture_cache, PageManager& tracker);
|
||||
~BufferCache();
|
||||
|
||||
/// Returns a pointer to GDS device local buffer.
|
||||
|
@ -156,8 +156,8 @@ public:
|
|||
/// Synchronizes all buffers in the specified range.
|
||||
void SynchronizeBuffersInRange(VAddr device_addr, u64 size);
|
||||
|
||||
/// Synchronizes all buffers neede for DMA.
|
||||
void SynchronizeDmaBuffers();
|
||||
/// Synchronizes all buffers for DMA.
|
||||
void SynchronizeBuffersForDma();
|
||||
|
||||
/// Record memory barrier. Used for buffers when accessed via BDA.
|
||||
void MemoryBarrier();
|
||||
|
@ -204,6 +204,7 @@ private:
|
|||
|
||||
const Vulkan::Instance& instance;
|
||||
Vulkan::Scheduler& scheduler;
|
||||
Vulkan::Rasterizer& rasterizer;
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
Core::MemoryManager* memory;
|
||||
TextureCache& texture_cache;
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <deque>
|
||||
#include <shared_mutex>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
#include "common/debug.h"
|
||||
|
@ -24,8 +25,9 @@ public:
|
|||
~MemoryTracker() = default;
|
||||
|
||||
/// Returns true if a region has been modified from the CPU
|
||||
template <bool locking = true>
|
||||
bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||
return IteratePages<true>(
|
||||
return IterateRegions<true, locking>(
|
||||
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
return manager->template IsRegionModified<Type::CPU>(offset, size);
|
||||
|
@ -33,8 +35,9 @@ public:
|
|||
}
|
||||
|
||||
/// Returns true if a region has been modified from the GPU
|
||||
template <bool locking = true>
|
||||
bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||
return IteratePages<false>(
|
||||
return IterateRegions<false, locking>(
|
||||
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
return manager->template IsRegionModified<Type::GPU>(offset, size);
|
||||
|
@ -42,28 +45,31 @@ public:
|
|||
}
|
||||
|
||||
/// Mark region as CPU modified, notifying the device_tracker about this change
|
||||
template <bool defer_protect = false, bool locking = true>
|
||||
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
|
||||
IteratePages<false>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ChangeRegionState<Type::CPU, true>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
IterateRegions<false, locking>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ChangeRegionState<Type::CPU, true, defer_protect>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
}
|
||||
|
||||
/// Unmark region as modified from the host GPU
|
||||
template <bool defer_protect = false, bool locking = true>
|
||||
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
||||
IteratePages<false>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ChangeRegionState<Type::GPU, false>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
IterateRegions<false, locking>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ChangeRegionState<Type::GPU, false, defer_protect>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
}
|
||||
|
||||
/// Removes all protection from a page and ensures GPU data has been flushed if requested
|
||||
template <bool defer_protect = false, bool locking = true>
|
||||
void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept {
|
||||
IteratePages<false>(
|
||||
IterateRegions<false, locking>(
|
||||
cpu_addr, size,
|
||||
[try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) {
|
||||
const bool should_flush = [&] {
|
||||
|
@ -75,7 +81,7 @@ public:
|
|||
if (try_flush && manager->template IsRegionModified<Type::GPU>(offset, size)) {
|
||||
return true;
|
||||
}
|
||||
manager->template ChangeRegionState<Type::CPU, true>(
|
||||
manager->template ChangeRegionState<Type::CPU, true, defer_protect>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
return false;
|
||||
}();
|
||||
|
@ -86,28 +92,51 @@ public:
|
|||
}
|
||||
|
||||
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
||||
template <bool defer_protect = false, bool locking = true>
|
||||
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) {
|
||||
IteratePages<true>(query_cpu_range, query_size,
|
||||
[&func, is_written](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ForEachModifiedRange<Type::CPU, true>(
|
||||
manager->GetCpuAddr() + offset, size, func);
|
||||
if (is_written) {
|
||||
manager->template ChangeRegionState<Type::GPU, true>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
}
|
||||
});
|
||||
IterateRegions<true, locking>(
|
||||
query_cpu_range, query_size,
|
||||
[&func, is_written](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ForEachModifiedRange<Type::CPU, true, defer_protect>(
|
||||
manager->GetCpuAddr() + offset, size, func);
|
||||
if (is_written) {
|
||||
manager->template ChangeRegionState<Type::GPU, true, defer_protect>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
|
||||
template <bool clear>
|
||||
template <bool clear, bool defer_protect = false, bool locking = true>
|
||||
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
|
||||
IteratePages<false>(query_cpu_range, query_size,
|
||||
[&func](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ForEachModifiedRange<Type::GPU, clear>(
|
||||
manager->GetCpuAddr() + offset, size, func);
|
||||
});
|
||||
IterateRegions<false, locking>(query_cpu_range, query_size,
|
||||
[&func](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ForEachModifiedRange<Type::GPU, clear, defer_protect>(
|
||||
manager->GetCpuAddr() + offset, size, func);
|
||||
});
|
||||
}
|
||||
|
||||
/// Notifies deferred protection changes to the tracker.
|
||||
template <Type type, bool enable, bool locking = true>
|
||||
void PerformDeferredProtections() {
|
||||
ForEachRegion<locking>([&](RegionManager* manager) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template PerformDeferredProtections<type, enable>();
|
||||
});
|
||||
}
|
||||
|
||||
/// Notifies all deferred protection changes to the tracker.
|
||||
|
||||
/// Lck the memory tracker.
|
||||
void Lock() {
|
||||
global_lock.lock();
|
||||
}
|
||||
|
||||
/// Unlock the memory tracker.
|
||||
void Unlock() {
|
||||
global_lock.unlock();
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -118,42 +147,75 @@ private:
|
|||
* @param func Callback for each word manager.
|
||||
* @return
|
||||
*/
|
||||
template <bool create_region_on_fail, typename Func>
|
||||
bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
|
||||
template <bool create_region_on_fail, bool locking, typename Func>
|
||||
bool IterateRegions(VAddr cpu_address, size_t size, Func&& func) {
|
||||
RENDERER_TRACE;
|
||||
using FuncReturn = typename std::invoke_result<Func, RegionManager*, u64, size_t>::type;
|
||||
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||
std::size_t remaining_size{size};
|
||||
std::size_t page_index{cpu_address >> TRACKER_HIGHER_PAGE_BITS};
|
||||
u64 page_offset{cpu_address & TRACKER_HIGHER_PAGE_MASK};
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount{
|
||||
std::min<std::size_t>(TRACKER_HIGHER_PAGE_SIZE - page_offset, remaining_size)};
|
||||
auto* manager{top_tier[page_index]};
|
||||
if (manager) {
|
||||
if constexpr (BOOL_BREAK) {
|
||||
if (func(manager, page_offset, copy_amount)) {
|
||||
return true;
|
||||
const auto run = [&]() {
|
||||
using FuncReturn = typename std::invoke_result<Func, RegionManager*, u64, size_t>::type;
|
||||
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||
std::size_t remaining_size{size};
|
||||
std::size_t page_index{cpu_address >> TRACKER_HIGHER_PAGE_BITS};
|
||||
u64 page_offset{cpu_address & TRACKER_HIGHER_PAGE_MASK};
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount{
|
||||
std::min<std::size_t>(TRACKER_HIGHER_PAGE_SIZE - page_offset, remaining_size)};
|
||||
auto* manager{top_tier[page_index]};
|
||||
if (manager) {
|
||||
if constexpr (BOOL_BREAK) {
|
||||
if (func(manager, page_offset, copy_amount)) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
func(manager, page_offset, copy_amount);
|
||||
}
|
||||
} else if constexpr (create_region_on_fail) {
|
||||
CreateRegion(page_index);
|
||||
manager = top_tier[page_index];
|
||||
if constexpr (BOOL_BREAK) {
|
||||
if (func(manager, page_offset, copy_amount)) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
func(manager, page_offset, copy_amount);
|
||||
}
|
||||
} else {
|
||||
func(manager, page_offset, copy_amount);
|
||||
}
|
||||
} else if constexpr (create_region_on_fail) {
|
||||
CreateRegion(page_index);
|
||||
manager = top_tier[page_index];
|
||||
if constexpr (BOOL_BREAK) {
|
||||
if (func(manager, page_offset, copy_amount)) {
|
||||
return true;
|
||||
page_index++;
|
||||
page_offset = 0;
|
||||
remaining_size -= copy_amount;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
if constexpr (locking) {
|
||||
std::shared_lock lock{global_lock};
|
||||
return run();
|
||||
} else {
|
||||
return run();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Iterate throw all regions in the memory tracker.
|
||||
* @param func Callback for each region manager.
|
||||
* @return
|
||||
*/
|
||||
template <bool locking, typename Func>
|
||||
void ForEachRegion(Func&& func) {
|
||||
RENDERER_TRACE;
|
||||
const auto run = [&]() {
|
||||
for (auto& pool : manager_pool) {
|
||||
for (auto& manager : pool) {
|
||||
if (manager.GetCpuAddr() != 0) {
|
||||
func(&manager);
|
||||
}
|
||||
} else {
|
||||
func(manager, page_offset, copy_amount);
|
||||
}
|
||||
}
|
||||
page_index++;
|
||||
page_offset = 0;
|
||||
remaining_size -= copy_amount;
|
||||
};
|
||||
if constexpr (locking) {
|
||||
std::shared_lock lock{global_lock};
|
||||
run();
|
||||
} else {
|
||||
run();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void CreateRegion(std::size_t page_index) {
|
||||
|
@ -177,6 +239,7 @@ private:
|
|||
std::deque<std::array<RegionManager, MANAGER_POOL_SIZE>> manager_pool;
|
||||
std::vector<RegionManager*> free_managers;
|
||||
std::array<RegionManager*, NUM_HIGH_PAGES> top_tier{};
|
||||
std::shared_mutex global_lock;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "common/bit_array.h"
|
||||
#include "common/enum.h"
|
||||
#include "common/types.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
@ -17,9 +18,12 @@ constexpr u64 TRACKER_HIGHER_PAGE_MASK = TRACKER_HIGHER_PAGE_SIZE - 1ULL;
|
|||
constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PER_PAGE;
|
||||
|
||||
enum class Type {
|
||||
CPU,
|
||||
GPU,
|
||||
None = 0,
|
||||
CPU = 1 << 0,
|
||||
GPU = 1 << 1,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(Type)
|
||||
|
||||
|
||||
using RegionBits = Common::BitArray<NUM_PAGES_PER_REGION>;
|
||||
|
||||
|
|
|
@ -70,13 +70,27 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
template <Type type, bool enable>
|
||||
void PerformDeferredProtections() {
|
||||
bool was_deferred = True(deferred_protection & type);
|
||||
if (!was_deferred) {
|
||||
return;
|
||||
}
|
||||
deferred_protection &= ~type;
|
||||
if constexpr (type == Type::CPU) {
|
||||
UpdateProtection<!enable, false>();
|
||||
} else if constexpr (type == Type::GPU) {
|
||||
UpdateProtection<enable, true>();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the state of a range of pages
|
||||
*
|
||||
* @param dirty_addr Base address to mark or unmark as modified
|
||||
* @param size Size in bytes to mark or unmark as modified
|
||||
*/
|
||||
template <Type type, bool enable>
|
||||
template <Type type, bool enable, bool defer_protect>
|
||||
void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
|
||||
RENDERER_TRACE;
|
||||
const size_t offset = dirty_addr - cpu_addr;
|
||||
|
@ -93,7 +107,9 @@ public:
|
|||
} else {
|
||||
bits.UnsetRange(start_page, end_page);
|
||||
}
|
||||
if constexpr (type == Type::CPU) {
|
||||
if constexpr (defer_protect) {
|
||||
deferred_protection |= type;
|
||||
} else if constexpr (type == Type::CPU) {
|
||||
UpdateProtection<!enable, false>();
|
||||
} else if (Config::readbacks()) {
|
||||
UpdateProtection<enable, true>();
|
||||
|
@ -108,7 +124,7 @@ public:
|
|||
* @param size Size in bytes of the CPU range to loop over
|
||||
* @param func Function to call for each turned off region
|
||||
*/
|
||||
template <Type type, bool clear>
|
||||
template <Type type, bool clear, bool defer_protect>
|
||||
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) {
|
||||
RENDERER_TRACE;
|
||||
const size_t offset = query_cpu_range - cpu_addr;
|
||||
|
@ -124,7 +140,9 @@ public:
|
|||
|
||||
if constexpr (clear) {
|
||||
bits.UnsetRange(start_page, end_page);
|
||||
if constexpr (type == Type::CPU) {
|
||||
if constexpr (defer_protect) {
|
||||
deferred_protection |= type;
|
||||
} else if constexpr (type == Type::CPU) {
|
||||
UpdateProtection<true, false>();
|
||||
} else if (Config::readbacks()) {
|
||||
UpdateProtection<false, true>();
|
||||
|
@ -186,6 +204,7 @@ private:
|
|||
|
||||
PageManager* tracker;
|
||||
VAddr cpu_addr = 0;
|
||||
Type deferred_protection = Type::None;
|
||||
RegionBits cpu;
|
||||
RegionBits gpu;
|
||||
RegionBits writeable;
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/assert.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "common/range_lock.h"
|
||||
#include "common/signal_context.h"
|
||||
#include "core/memory.h"
|
||||
|
|
|
@ -66,7 +66,7 @@ struct RangeSet {
|
|||
for (const auto& set : m_ranges_set) {
|
||||
const VAddr inter_addr_end = set.upper();
|
||||
const VAddr inter_addr = set.lower();
|
||||
func(inter_addr, inter_addr_end);
|
||||
func(inter_addr, inter_addr_end - inter_addr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -92,7 +92,7 @@ struct RangeSet {
|
|||
if (inter_addr < start_address) {
|
||||
inter_addr = start_address;
|
||||
}
|
||||
func(inter_addr, inter_addr_end);
|
||||
func(inter_addr, inter_addr_end - inter_addr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -170,7 +170,7 @@ public:
|
|||
for (const auto& [interval, value] : m_ranges_map) {
|
||||
const VAddr inter_addr_end = interval.upper();
|
||||
const VAddr inter_addr = interval.lower();
|
||||
func(inter_addr, inter_addr_end, value);
|
||||
func(inter_addr, inter_addr_end - inter_addr, value);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -196,7 +196,7 @@ public:
|
|||
if (inter_addr < start_address) {
|
||||
inter_addr = start_address;
|
||||
}
|
||||
func(inter_addr, inter_addr_end, it->second);
|
||||
func(inter_addr, inter_addr_end - inter_addr, it->second);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -274,7 +274,7 @@ public:
|
|||
for (const auto& [interval, value] : m_ranges_map) {
|
||||
const VAddr inter_addr_end = interval.upper();
|
||||
const VAddr inter_addr = interval.lower();
|
||||
func(inter_addr, inter_addr_end, value);
|
||||
func(inter_addr, inter_addr_end - inter_addr, value);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -300,7 +300,7 @@ public:
|
|||
if (inter_addr < start_address) {
|
||||
inter_addr = start_address;
|
||||
}
|
||||
func(inter_addr, inter_addr_end, it->second);
|
||||
func(inter_addr, inter_addr_end - inter_addr, it->second);
|
||||
}
|
||||
}
|
||||
|
|
@ -36,7 +36,7 @@ static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
|
|||
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||
AmdGpu::Liverpool* liverpool_)
|
||||
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
||||
buffer_cache{instance, scheduler, liverpool_, texture_cache, page_manager},
|
||||
buffer_cache{instance, scheduler, *this, liverpool_, texture_cache, page_manager},
|
||||
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
|
||||
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
|
||||
if (!Config::nullGpu()) {
|
||||
|
@ -484,12 +484,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
|||
// We only use fault buffer for DMA right now.
|
||||
{
|
||||
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
||||
for (auto& range : mapped_ranges) {
|
||||
buffer_cache.SynchronizeBuffersInRange(range.lower(),
|
||||
range.upper() - range.lower());
|
||||
}
|
||||
buffer_cache.SynchronizeBuffersForDma();
|
||||
}
|
||||
buffer_cache.MemoryBarrier();
|
||||
}
|
||||
|
||||
fault_process_pending |= uses_dma;
|
||||
|
@ -987,16 +983,14 @@ bool Rasterizer::IsMapped(VAddr addr, u64 size) {
|
|||
// There is no memory, so not mapped.
|
||||
return false;
|
||||
}
|
||||
const auto range = decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
||||
|
||||
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
||||
return boost::icl::contains(mapped_ranges, range);
|
||||
return mapped_ranges.Contains(addr, size);
|
||||
}
|
||||
|
||||
void Rasterizer::MapMemory(VAddr addr, u64 size) {
|
||||
{
|
||||
std::scoped_lock lock{mapped_ranges_mutex};
|
||||
mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
||||
mapped_ranges.Add(addr, size);
|
||||
}
|
||||
page_manager.OnGpuMap(addr, size);
|
||||
}
|
||||
|
@ -1007,7 +1001,7 @@ void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
|
|||
page_manager.OnGpuUnmap(addr, size);
|
||||
{
|
||||
std::scoped_lock lock{mapped_ranges_mutex};
|
||||
mapped_ranges -= decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
||||
mapped_ranges.Subtract(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3,11 +3,11 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <shared_mutex>
|
||||
#include "common/recursive_lock.h"
|
||||
#include "common/shared_first_mutex.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/page_manager.h"
|
||||
#include "video_core/range_set.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
|
@ -43,6 +43,10 @@ public:
|
|||
return texture_cache;
|
||||
}
|
||||
|
||||
[[nodiscard]] const VideoCore::RangeSet& GetMappedRanges() const noexcept {
|
||||
return mapped_ranges;
|
||||
}
|
||||
|
||||
void Draw(bool is_indexed, u32 index_offset = 0);
|
||||
void DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 size, u32 max_count,
|
||||
VAddr count_address);
|
||||
|
@ -76,11 +80,8 @@ public:
|
|||
|
||||
template <typename Func>
|
||||
void ForEachMappedRangeInRange(VAddr addr, u64 size, Func&& func) {
|
||||
const auto range = decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
||||
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
||||
for (const auto& mapped_range : (mapped_ranges & range)) {
|
||||
func(mapped_range);
|
||||
}
|
||||
Common::RecursiveSharedLock lk(mapped_ranges_mutex);
|
||||
mapped_ranges.ForEachInRange(addr, size, std::forward<Func>(func));
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -122,7 +123,7 @@ private:
|
|||
VideoCore::TextureCache texture_cache;
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
Core::MemoryManager* memory;
|
||||
boost::icl::interval_set<VAddr> mapped_ranges;
|
||||
VideoCore::RangeSet mapped_ranges;
|
||||
Common::SharedFirstMutex mapped_ranges_mutex;
|
||||
PipelineCache pipeline_cache;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue