mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-12 04:35:56 +00:00
Merge 17815ad439
into ddede4a52d
This commit is contained in:
commit
fd0e496a8c
10 changed files with 238 additions and 105 deletions
|
@ -921,7 +921,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
||||||
src/video_core/buffer_cache/buffer_cache.cpp
|
src/video_core/buffer_cache/buffer_cache.cpp
|
||||||
src/video_core/buffer_cache/buffer_cache.h
|
src/video_core/buffer_cache/buffer_cache.h
|
||||||
src/video_core/buffer_cache/memory_tracker.h
|
src/video_core/buffer_cache/memory_tracker.h
|
||||||
src/video_core/buffer_cache/range_set.h
|
|
||||||
src/video_core/buffer_cache/region_definitions.h
|
src/video_core/buffer_cache/region_definitions.h
|
||||||
src/video_core/buffer_cache/region_manager.h
|
src/video_core/buffer_cache/region_manager.h
|
||||||
src/video_core/renderer_vulkan/liverpool_to_vk.cpp
|
src/video_core/renderer_vulkan/liverpool_to_vk.cpp
|
||||||
|
@ -980,6 +979,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
|
||||||
src/video_core/page_manager.cpp
|
src/video_core/page_manager.cpp
|
||||||
src/video_core/page_manager.h
|
src/video_core/page_manager.h
|
||||||
src/video_core/multi_level_page_table.h
|
src/video_core/multi_level_page_table.h
|
||||||
|
src/video_core/range_set.h
|
||||||
src/video_core/renderdoc.cpp
|
src/video_core/renderdoc.cpp
|
||||||
src/video_core/renderdoc.h
|
src/video_core/renderdoc.h
|
||||||
)
|
)
|
||||||
|
|
|
@ -29,9 +29,9 @@ static constexpr size_t DeviceBufferSize = 128_MB;
|
||||||
static constexpr size_t MaxPageFaults = 1024;
|
static constexpr size_t MaxPageFaults = 1024;
|
||||||
|
|
||||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_,
|
||||||
PageManager& tracker)
|
TextureCache& texture_cache_, PageManager& tracker)
|
||||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
: instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_},
|
||||||
memory{Core::Memory::Instance()}, texture_cache{texture_cache_},
|
memory{Core::Memory::Instance()}, texture_cache{texture_cache_},
|
||||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||||
|
@ -154,9 +154,8 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
||||||
memory_tracker->ForEachDownloadRange<false>(
|
memory_tracker->ForEachDownloadRange<false>(
|
||||||
device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||||
const VAddr buffer_addr = buffer.CpuAddr();
|
const VAddr buffer_addr = buffer.CpuAddr();
|
||||||
const auto add_download = [&](VAddr start, VAddr end) {
|
const auto add_download = [&](VAddr start, u64 new_size) {
|
||||||
const u64 new_offset = start - buffer_addr;
|
const u64 new_offset = start - buffer_addr;
|
||||||
const u64 new_size = end - start;
|
|
||||||
copies.push_back(vk::BufferCopy{
|
copies.push_back(vk::BufferCopy{
|
||||||
.srcOffset = new_offset,
|
.srcOffset = new_offset,
|
||||||
.dstOffset = total_size_bytes,
|
.dstOffset = total_size_bytes,
|
||||||
|
@ -996,6 +995,57 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BufferCache::SynchronizeBuffersForDma() {
|
||||||
|
RENDERER_TRACE;
|
||||||
|
boost::container::small_vector<Buffer*, 64> buffers;
|
||||||
|
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
||||||
|
const auto& mapped_ranges = rasterizer.GetMappedRanges();
|
||||||
|
bool barrier_recorded = false;
|
||||||
|
memory_tracker->Lock();
|
||||||
|
scheduler.EndRendering();
|
||||||
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
mapped_ranges.ForEach([&](VAddr device_addr, u64 size) {
|
||||||
|
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
||||||
|
memory_tracker->ForEachUploadRange<true, false>(
|
||||||
|
buffer.CpuAddr(), buffer.SizeBytes(), false,
|
||||||
|
[&](u64 device_addr_out, u64 range_size) {
|
||||||
|
if (!barrier_recorded) {
|
||||||
|
barrier_recorded = true;
|
||||||
|
const vk::BufferMemoryBarrier2 barrier = {
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead |
|
||||||
|
vk::AccessFlagBits2::eMemoryWrite |
|
||||||
|
vk::AccessFlagBits2::eTransferRead |
|
||||||
|
vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.buffer = buffer.Handle(),
|
||||||
|
.offset = 0,
|
||||||
|
.size = buffer.SizeBytes(),
|
||||||
|
};
|
||||||
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.bufferMemoryBarrierCount = 1,
|
||||||
|
.pBufferMemoryBarriers = &barrier,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
|
||||||
|
copies.push_back(vk::BufferCopy{
|
||||||
|
.srcOffset = offset,
|
||||||
|
.dstOffset = device_addr_out - buffer.CpuAddr(),
|
||||||
|
.size = range_size,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
cmdbuf.copyBuffer(staging_buffer.Handle(), buffer.Handle(), copies);
|
||||||
|
copies.clear();
|
||||||
|
barrier_recorded = false;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
memory_tracker->PerformDeferredProtections<Type::CPU, false, false>();
|
||||||
|
MemoryBarrier();
|
||||||
|
memory_tracker->Unlock();
|
||||||
|
}
|
||||||
|
|
||||||
void BufferCache::MemoryBarrier() {
|
void BufferCache::MemoryBarrier() {
|
||||||
// Vulkan doesn't know which buffer we access in a shader if we use
|
// Vulkan doesn't know which buffer we access in a shader if we use
|
||||||
// BufferDeviceAddress. We need a full memory barrier.
|
// BufferDeviceAddress. We need a full memory barrier.
|
||||||
|
|
|
@ -5,12 +5,11 @@
|
||||||
|
|
||||||
#include <shared_mutex>
|
#include <shared_mutex>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include "common/div_ceil.h"
|
|
||||||
#include "common/slot_vector.h"
|
#include "common/slot_vector.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "video_core/buffer_cache/buffer.h"
|
#include "video_core/buffer_cache/buffer.h"
|
||||||
#include "video_core/buffer_cache/range_set.h"
|
|
||||||
#include "video_core/multi_level_page_table.h"
|
#include "video_core/multi_level_page_table.h"
|
||||||
|
#include "video_core/range_set.h"
|
||||||
|
|
||||||
namespace AmdGpu {
|
namespace AmdGpu {
|
||||||
struct Liverpool;
|
struct Liverpool;
|
||||||
|
@ -22,7 +21,8 @@ class MemoryManager;
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
class GraphicsPipeline;
|
class GraphicsPipeline;
|
||||||
}
|
class Rasterizer;
|
||||||
|
} // namespace Vulkan
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
|
@ -71,8 +71,8 @@ public:
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||||
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
|
Vulkan::Rasterizer& rasterizer, AmdGpu::Liverpool* liverpool,
|
||||||
PageManager& tracker);
|
TextureCache& texture_cache, PageManager& tracker);
|
||||||
~BufferCache();
|
~BufferCache();
|
||||||
|
|
||||||
/// Returns a pointer to GDS device local buffer.
|
/// Returns a pointer to GDS device local buffer.
|
||||||
|
@ -156,8 +156,8 @@ public:
|
||||||
/// Synchronizes all buffers in the specified range.
|
/// Synchronizes all buffers in the specified range.
|
||||||
void SynchronizeBuffersInRange(VAddr device_addr, u64 size);
|
void SynchronizeBuffersInRange(VAddr device_addr, u64 size);
|
||||||
|
|
||||||
/// Synchronizes all buffers neede for DMA.
|
/// Synchronizes all buffers for DMA.
|
||||||
void SynchronizeDmaBuffers();
|
void SynchronizeBuffersForDma();
|
||||||
|
|
||||||
/// Record memory barrier. Used for buffers when accessed via BDA.
|
/// Record memory barrier. Used for buffers when accessed via BDA.
|
||||||
void MemoryBarrier();
|
void MemoryBarrier();
|
||||||
|
@ -204,6 +204,7 @@ private:
|
||||||
|
|
||||||
const Vulkan::Instance& instance;
|
const Vulkan::Instance& instance;
|
||||||
Vulkan::Scheduler& scheduler;
|
Vulkan::Scheduler& scheduler;
|
||||||
|
Vulkan::Rasterizer& rasterizer;
|
||||||
AmdGpu::Liverpool* liverpool;
|
AmdGpu::Liverpool* liverpool;
|
||||||
Core::MemoryManager* memory;
|
Core::MemoryManager* memory;
|
||||||
TextureCache& texture_cache;
|
TextureCache& texture_cache;
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
|
#include <shared_mutex>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
|
@ -24,8 +25,9 @@ public:
|
||||||
~MemoryTracker() = default;
|
~MemoryTracker() = default;
|
||||||
|
|
||||||
/// Returns true if a region has been modified from the CPU
|
/// Returns true if a region has been modified from the CPU
|
||||||
|
template <bool locking = true>
|
||||||
bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||||
return IteratePages<true>(
|
return IterateRegions<true, locking>(
|
||||||
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||||
std::scoped_lock lk{manager->lock};
|
std::scoped_lock lk{manager->lock};
|
||||||
return manager->template IsRegionModified<Type::CPU>(offset, size);
|
return manager->template IsRegionModified<Type::CPU>(offset, size);
|
||||||
|
@ -33,8 +35,9 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if a region has been modified from the GPU
|
/// Returns true if a region has been modified from the GPU
|
||||||
|
template <bool locking = true>
|
||||||
bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||||
return IteratePages<false>(
|
return IterateRegions<false, locking>(
|
||||||
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||||
std::scoped_lock lk{manager->lock};
|
std::scoped_lock lk{manager->lock};
|
||||||
return manager->template IsRegionModified<Type::GPU>(offset, size);
|
return manager->template IsRegionModified<Type::GPU>(offset, size);
|
||||||
|
@ -42,28 +45,31 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Mark region as CPU modified, notifying the device_tracker about this change
|
/// Mark region as CPU modified, notifying the device_tracker about this change
|
||||||
|
template <bool defer_protect = false, bool locking = true>
|
||||||
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
|
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
|
||||||
IteratePages<false>(dirty_cpu_addr, query_size,
|
IterateRegions<false, locking>(dirty_cpu_addr, query_size,
|
||||||
[](RegionManager* manager, u64 offset, size_t size) {
|
[](RegionManager* manager, u64 offset, size_t size) {
|
||||||
std::scoped_lock lk{manager->lock};
|
std::scoped_lock lk{manager->lock};
|
||||||
manager->template ChangeRegionState<Type::CPU, true>(
|
manager->template ChangeRegionState<Type::CPU, true, defer_protect>(
|
||||||
manager->GetCpuAddr() + offset, size);
|
manager->GetCpuAddr() + offset, size);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Unmark region as modified from the host GPU
|
/// Unmark region as modified from the host GPU
|
||||||
|
template <bool defer_protect = false, bool locking = true>
|
||||||
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
||||||
IteratePages<false>(dirty_cpu_addr, query_size,
|
IterateRegions<false, locking>(dirty_cpu_addr, query_size,
|
||||||
[](RegionManager* manager, u64 offset, size_t size) {
|
[](RegionManager* manager, u64 offset, size_t size) {
|
||||||
std::scoped_lock lk{manager->lock};
|
std::scoped_lock lk{manager->lock};
|
||||||
manager->template ChangeRegionState<Type::GPU, false>(
|
manager->template ChangeRegionState<Type::GPU, false, defer_protect>(
|
||||||
manager->GetCpuAddr() + offset, size);
|
manager->GetCpuAddr() + offset, size);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Removes all protection from a page and ensures GPU data has been flushed if requested
|
/// Removes all protection from a page and ensures GPU data has been flushed if requested
|
||||||
|
template <bool defer_protect = false, bool locking = true>
|
||||||
void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept {
|
void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept {
|
||||||
IteratePages<false>(
|
IterateRegions<false, locking>(
|
||||||
cpu_addr, size,
|
cpu_addr, size,
|
||||||
[try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) {
|
[try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) {
|
||||||
const bool should_flush = [&] {
|
const bool should_flush = [&] {
|
||||||
|
@ -75,7 +81,7 @@ public:
|
||||||
if (try_flush && manager->template IsRegionModified<Type::GPU>(offset, size)) {
|
if (try_flush && manager->template IsRegionModified<Type::GPU>(offset, size)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
manager->template ChangeRegionState<Type::CPU, true>(
|
manager->template ChangeRegionState<Type::CPU, true, defer_protect>(
|
||||||
manager->GetCpuAddr() + offset, size);
|
manager->GetCpuAddr() + offset, size);
|
||||||
return false;
|
return false;
|
||||||
}();
|
}();
|
||||||
|
@ -86,30 +92,53 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
||||||
|
template <bool defer_protect = false, bool locking = true>
|
||||||
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) {
|
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) {
|
||||||
IteratePages<true>(query_cpu_range, query_size,
|
IterateRegions<true, locking>(
|
||||||
|
query_cpu_range, query_size,
|
||||||
[&func, is_written](RegionManager* manager, u64 offset, size_t size) {
|
[&func, is_written](RegionManager* manager, u64 offset, size_t size) {
|
||||||
std::scoped_lock lk{manager->lock};
|
std::scoped_lock lk{manager->lock};
|
||||||
manager->template ForEachModifiedRange<Type::CPU, true>(
|
manager->template ForEachModifiedRange<Type::CPU, true, defer_protect>(
|
||||||
manager->GetCpuAddr() + offset, size, func);
|
manager->GetCpuAddr() + offset, size, func);
|
||||||
if (is_written) {
|
if (is_written) {
|
||||||
manager->template ChangeRegionState<Type::GPU, true>(
|
manager->template ChangeRegionState<Type::GPU, true, defer_protect>(
|
||||||
manager->GetCpuAddr() + offset, size);
|
manager->GetCpuAddr() + offset, size);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
|
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
|
||||||
template <bool clear>
|
template <bool clear, bool defer_protect = false, bool locking = true>
|
||||||
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
|
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
|
||||||
IteratePages<false>(query_cpu_range, query_size,
|
IterateRegions<false, locking>(query_cpu_range, query_size,
|
||||||
[&func](RegionManager* manager, u64 offset, size_t size) {
|
[&func](RegionManager* manager, u64 offset, size_t size) {
|
||||||
std::scoped_lock lk{manager->lock};
|
std::scoped_lock lk{manager->lock};
|
||||||
manager->template ForEachModifiedRange<Type::GPU, clear>(
|
manager->template ForEachModifiedRange<Type::GPU, clear, defer_protect>(
|
||||||
manager->GetCpuAddr() + offset, size, func);
|
manager->GetCpuAddr() + offset, size, func);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Notifies deferred protection changes to the tracker.
|
||||||
|
template <Type type, bool enable, bool locking = true>
|
||||||
|
void PerformDeferredProtections() {
|
||||||
|
ForEachRegion<locking>([&](RegionManager* manager) {
|
||||||
|
std::scoped_lock lk{manager->lock};
|
||||||
|
manager->template PerformDeferredProtections<type, enable>();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Notifies all deferred protection changes to the tracker.
|
||||||
|
|
||||||
|
/// Lck the memory tracker.
|
||||||
|
void Lock() {
|
||||||
|
global_lock.lock();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Unlock the memory tracker.
|
||||||
|
void Unlock() {
|
||||||
|
global_lock.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/**
|
/**
|
||||||
* @brief IteratePages Iterates L2 word manager page table.
|
* @brief IteratePages Iterates L2 word manager page table.
|
||||||
|
@ -118,9 +147,10 @@ private:
|
||||||
* @param func Callback for each word manager.
|
* @param func Callback for each word manager.
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
template <bool create_region_on_fail, typename Func>
|
template <bool create_region_on_fail, bool locking, typename Func>
|
||||||
bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
|
bool IterateRegions(VAddr cpu_address, size_t size, Func&& func) {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
|
const auto run = [&]() {
|
||||||
using FuncReturn = typename std::invoke_result<Func, RegionManager*, u64, size_t>::type;
|
using FuncReturn = typename std::invoke_result<Func, RegionManager*, u64, size_t>::type;
|
||||||
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||||
std::size_t remaining_size{size};
|
std::size_t remaining_size{size};
|
||||||
|
@ -154,6 +184,38 @@ private:
|
||||||
remaining_size -= copy_amount;
|
remaining_size -= copy_amount;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
};
|
||||||
|
if constexpr (locking) {
|
||||||
|
std::shared_lock lock{global_lock};
|
||||||
|
return run();
|
||||||
|
} else {
|
||||||
|
return run();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Iterate throw all regions in the memory tracker.
|
||||||
|
* @param func Callback for each region manager.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
template <bool locking, typename Func>
|
||||||
|
void ForEachRegion(Func&& func) {
|
||||||
|
RENDERER_TRACE;
|
||||||
|
const auto run = [&]() {
|
||||||
|
for (auto& pool : manager_pool) {
|
||||||
|
for (auto& manager : pool) {
|
||||||
|
if (manager.GetCpuAddr() != 0) {
|
||||||
|
func(&manager);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if constexpr (locking) {
|
||||||
|
std::shared_lock lock{global_lock};
|
||||||
|
run();
|
||||||
|
} else {
|
||||||
|
run();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CreateRegion(std::size_t page_index) {
|
void CreateRegion(std::size_t page_index) {
|
||||||
|
@ -177,6 +239,7 @@ private:
|
||||||
std::deque<std::array<RegionManager, MANAGER_POOL_SIZE>> manager_pool;
|
std::deque<std::array<RegionManager, MANAGER_POOL_SIZE>> manager_pool;
|
||||||
std::vector<RegionManager*> free_managers;
|
std::vector<RegionManager*> free_managers;
|
||||||
std::array<RegionManager*, NUM_HIGH_PAGES> top_tier{};
|
std::array<RegionManager*, NUM_HIGH_PAGES> top_tier{};
|
||||||
|
std::shared_mutex global_lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "common/bit_array.h"
|
#include "common/bit_array.h"
|
||||||
|
#include "common/enum.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
@ -17,9 +18,12 @@ constexpr u64 TRACKER_HIGHER_PAGE_MASK = TRACKER_HIGHER_PAGE_SIZE - 1ULL;
|
||||||
constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PER_PAGE;
|
constexpr u64 NUM_PAGES_PER_REGION = TRACKER_HIGHER_PAGE_SIZE / TRACKER_BYTES_PER_PAGE;
|
||||||
|
|
||||||
enum class Type {
|
enum class Type {
|
||||||
CPU,
|
None = 0,
|
||||||
GPU,
|
CPU = 1 << 0,
|
||||||
|
GPU = 1 << 1,
|
||||||
};
|
};
|
||||||
|
DECLARE_ENUM_FLAG_OPERATORS(Type)
|
||||||
|
|
||||||
|
|
||||||
using RegionBits = Common::BitArray<NUM_PAGES_PER_REGION>;
|
using RegionBits = Common::BitArray<NUM_PAGES_PER_REGION>;
|
||||||
|
|
||||||
|
|
|
@ -70,13 +70,27 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <Type type, bool enable>
|
||||||
|
void PerformDeferredProtections() {
|
||||||
|
bool was_deferred = True(deferred_protection & type);
|
||||||
|
if (!was_deferred) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
deferred_protection &= ~type;
|
||||||
|
if constexpr (type == Type::CPU) {
|
||||||
|
UpdateProtection<!enable, false>();
|
||||||
|
} else if constexpr (type == Type::GPU) {
|
||||||
|
UpdateProtection<enable, true>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Change the state of a range of pages
|
* Change the state of a range of pages
|
||||||
*
|
*
|
||||||
* @param dirty_addr Base address to mark or unmark as modified
|
* @param dirty_addr Base address to mark or unmark as modified
|
||||||
* @param size Size in bytes to mark or unmark as modified
|
* @param size Size in bytes to mark or unmark as modified
|
||||||
*/
|
*/
|
||||||
template <Type type, bool enable>
|
template <Type type, bool enable, bool defer_protect>
|
||||||
void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
|
void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
const size_t offset = dirty_addr - cpu_addr;
|
const size_t offset = dirty_addr - cpu_addr;
|
||||||
|
@ -93,7 +107,9 @@ public:
|
||||||
} else {
|
} else {
|
||||||
bits.UnsetRange(start_page, end_page);
|
bits.UnsetRange(start_page, end_page);
|
||||||
}
|
}
|
||||||
if constexpr (type == Type::CPU) {
|
if constexpr (defer_protect) {
|
||||||
|
deferred_protection |= type;
|
||||||
|
} else if constexpr (type == Type::CPU) {
|
||||||
UpdateProtection<!enable, false>();
|
UpdateProtection<!enable, false>();
|
||||||
} else if (Config::readbacks()) {
|
} else if (Config::readbacks()) {
|
||||||
UpdateProtection<enable, true>();
|
UpdateProtection<enable, true>();
|
||||||
|
@ -108,7 +124,7 @@ public:
|
||||||
* @param size Size in bytes of the CPU range to loop over
|
* @param size Size in bytes of the CPU range to loop over
|
||||||
* @param func Function to call for each turned off region
|
* @param func Function to call for each turned off region
|
||||||
*/
|
*/
|
||||||
template <Type type, bool clear>
|
template <Type type, bool clear, bool defer_protect>
|
||||||
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) {
|
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, auto&& func) {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
const size_t offset = query_cpu_range - cpu_addr;
|
const size_t offset = query_cpu_range - cpu_addr;
|
||||||
|
@ -124,7 +140,9 @@ public:
|
||||||
|
|
||||||
if constexpr (clear) {
|
if constexpr (clear) {
|
||||||
bits.UnsetRange(start_page, end_page);
|
bits.UnsetRange(start_page, end_page);
|
||||||
if constexpr (type == Type::CPU) {
|
if constexpr (defer_protect) {
|
||||||
|
deferred_protection |= type;
|
||||||
|
} else if constexpr (type == Type::CPU) {
|
||||||
UpdateProtection<true, false>();
|
UpdateProtection<true, false>();
|
||||||
} else if (Config::readbacks()) {
|
} else if (Config::readbacks()) {
|
||||||
UpdateProtection<false, true>();
|
UpdateProtection<false, true>();
|
||||||
|
@ -186,6 +204,7 @@ private:
|
||||||
|
|
||||||
PageManager* tracker;
|
PageManager* tracker;
|
||||||
VAddr cpu_addr = 0;
|
VAddr cpu_addr = 0;
|
||||||
|
Type deferred_protection = Type::None;
|
||||||
RegionBits cpu;
|
RegionBits cpu;
|
||||||
RegionBits gpu;
|
RegionBits gpu;
|
||||||
RegionBits writeable;
|
RegionBits writeable;
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
|
#include "common/div_ceil.h"
|
||||||
#include "common/range_lock.h"
|
#include "common/range_lock.h"
|
||||||
#include "common/signal_context.h"
|
#include "common/signal_context.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
|
|
|
@ -66,7 +66,7 @@ struct RangeSet {
|
||||||
for (const auto& set : m_ranges_set) {
|
for (const auto& set : m_ranges_set) {
|
||||||
const VAddr inter_addr_end = set.upper();
|
const VAddr inter_addr_end = set.upper();
|
||||||
const VAddr inter_addr = set.lower();
|
const VAddr inter_addr = set.lower();
|
||||||
func(inter_addr, inter_addr_end);
|
func(inter_addr, inter_addr_end - inter_addr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,7 +92,7 @@ struct RangeSet {
|
||||||
if (inter_addr < start_address) {
|
if (inter_addr < start_address) {
|
||||||
inter_addr = start_address;
|
inter_addr = start_address;
|
||||||
}
|
}
|
||||||
func(inter_addr, inter_addr_end);
|
func(inter_addr, inter_addr_end - inter_addr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,7 +170,7 @@ public:
|
||||||
for (const auto& [interval, value] : m_ranges_map) {
|
for (const auto& [interval, value] : m_ranges_map) {
|
||||||
const VAddr inter_addr_end = interval.upper();
|
const VAddr inter_addr_end = interval.upper();
|
||||||
const VAddr inter_addr = interval.lower();
|
const VAddr inter_addr = interval.lower();
|
||||||
func(inter_addr, inter_addr_end, value);
|
func(inter_addr, inter_addr_end - inter_addr, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -196,7 +196,7 @@ public:
|
||||||
if (inter_addr < start_address) {
|
if (inter_addr < start_address) {
|
||||||
inter_addr = start_address;
|
inter_addr = start_address;
|
||||||
}
|
}
|
||||||
func(inter_addr, inter_addr_end, it->second);
|
func(inter_addr, inter_addr_end - inter_addr, it->second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -274,7 +274,7 @@ public:
|
||||||
for (const auto& [interval, value] : m_ranges_map) {
|
for (const auto& [interval, value] : m_ranges_map) {
|
||||||
const VAddr inter_addr_end = interval.upper();
|
const VAddr inter_addr_end = interval.upper();
|
||||||
const VAddr inter_addr = interval.lower();
|
const VAddr inter_addr = interval.lower();
|
||||||
func(inter_addr, inter_addr_end, value);
|
func(inter_addr, inter_addr_end - inter_addr, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -300,7 +300,7 @@ public:
|
||||||
if (inter_addr < start_address) {
|
if (inter_addr < start_address) {
|
||||||
inter_addr = start_address;
|
inter_addr = start_address;
|
||||||
}
|
}
|
||||||
func(inter_addr, inter_addr_end, it->second);
|
func(inter_addr, inter_addr_end - inter_addr, it->second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,7 +36,7 @@ static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
|
||||||
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||||
AmdGpu::Liverpool* liverpool_)
|
AmdGpu::Liverpool* liverpool_)
|
||||||
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
||||||
buffer_cache{instance, scheduler, liverpool_, texture_cache, page_manager},
|
buffer_cache{instance, scheduler, *this, liverpool_, texture_cache, page_manager},
|
||||||
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
|
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
|
||||||
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
|
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
|
||||||
if (!Config::nullGpu()) {
|
if (!Config::nullGpu()) {
|
||||||
|
@ -484,13 +484,9 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||||
// We only use fault buffer for DMA right now.
|
// We only use fault buffer for DMA right now.
|
||||||
{
|
{
|
||||||
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
||||||
for (auto& range : mapped_ranges) {
|
buffer_cache.SynchronizeBuffersForDma();
|
||||||
buffer_cache.SynchronizeBuffersInRange(range.lower(),
|
|
||||||
range.upper() - range.lower());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
buffer_cache.MemoryBarrier();
|
|
||||||
}
|
|
||||||
|
|
||||||
fault_process_pending |= uses_dma;
|
fault_process_pending |= uses_dma;
|
||||||
|
|
||||||
|
@ -987,16 +983,14 @@ bool Rasterizer::IsMapped(VAddr addr, u64 size) {
|
||||||
// There is no memory, so not mapped.
|
// There is no memory, so not mapped.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const auto range = decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
|
||||||
|
|
||||||
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
||||||
return boost::icl::contains(mapped_ranges, range);
|
return mapped_ranges.Contains(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rasterizer::MapMemory(VAddr addr, u64 size) {
|
void Rasterizer::MapMemory(VAddr addr, u64 size) {
|
||||||
{
|
{
|
||||||
std::scoped_lock lock{mapped_ranges_mutex};
|
std::scoped_lock lock{mapped_ranges_mutex};
|
||||||
mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
mapped_ranges.Add(addr, size);
|
||||||
}
|
}
|
||||||
page_manager.OnGpuMap(addr, size);
|
page_manager.OnGpuMap(addr, size);
|
||||||
}
|
}
|
||||||
|
@ -1007,7 +1001,7 @@ void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
|
||||||
page_manager.OnGpuUnmap(addr, size);
|
page_manager.OnGpuUnmap(addr, size);
|
||||||
{
|
{
|
||||||
std::scoped_lock lock{mapped_ranges_mutex};
|
std::scoped_lock lock{mapped_ranges_mutex};
|
||||||
mapped_ranges -= decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
mapped_ranges.Subtract(addr, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,11 +3,11 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <shared_mutex>
|
|
||||||
#include "common/recursive_lock.h"
|
#include "common/recursive_lock.h"
|
||||||
#include "common/shared_first_mutex.h"
|
#include "common/shared_first_mutex.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
#include "video_core/page_manager.h"
|
#include "video_core/page_manager.h"
|
||||||
|
#include "video_core/range_set.h"
|
||||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||||
#include "video_core/texture_cache/texture_cache.h"
|
#include "video_core/texture_cache/texture_cache.h"
|
||||||
|
|
||||||
|
@ -43,6 +43,10 @@ public:
|
||||||
return texture_cache;
|
return texture_cache;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] const VideoCore::RangeSet& GetMappedRanges() const noexcept {
|
||||||
|
return mapped_ranges;
|
||||||
|
}
|
||||||
|
|
||||||
void Draw(bool is_indexed, u32 index_offset = 0);
|
void Draw(bool is_indexed, u32 index_offset = 0);
|
||||||
void DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 size, u32 max_count,
|
void DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 size, u32 max_count,
|
||||||
VAddr count_address);
|
VAddr count_address);
|
||||||
|
@ -76,11 +80,8 @@ public:
|
||||||
|
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
void ForEachMappedRangeInRange(VAddr addr, u64 size, Func&& func) {
|
void ForEachMappedRangeInRange(VAddr addr, u64 size, Func&& func) {
|
||||||
const auto range = decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
Common::RecursiveSharedLock lk(mapped_ranges_mutex);
|
||||||
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
mapped_ranges.ForEachInRange(addr, size, std::forward<Func>(func));
|
||||||
for (const auto& mapped_range : (mapped_ranges & range)) {
|
|
||||||
func(mapped_range);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -122,7 +123,7 @@ private:
|
||||||
VideoCore::TextureCache texture_cache;
|
VideoCore::TextureCache texture_cache;
|
||||||
AmdGpu::Liverpool* liverpool;
|
AmdGpu::Liverpool* liverpool;
|
||||||
Core::MemoryManager* memory;
|
Core::MemoryManager* memory;
|
||||||
boost::icl::interval_set<VAddr> mapped_ranges;
|
VideoCore::RangeSet mapped_ranges;
|
||||||
Common::SharedFirstMutex mapped_ranges_mutex;
|
Common::SharedFirstMutex mapped_ranges_mutex;
|
||||||
PipelineCache pipeline_cache;
|
PipelineCache pipeline_cache;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue