Initial implementation (untested)

This commit is contained in:
Lander Gallastegi 2025-07-06 02:34:51 +02:00
parent 47c43df544
commit 54907409c7
6 changed files with 145 additions and 49 deletions

View file

@ -29,9 +29,9 @@ static constexpr size_t DeviceBufferSize = 128_MB;
static constexpr size_t MaxPageFaults = 1024;
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
PageManager& tracker)
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_,
TextureCache& texture_cache_, PageManager& tracker)
: instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_},
memory{Core::Memory::Instance()}, texture_cache{texture_cache_},
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
@ -817,7 +817,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
bool is_texel_buffer) {
boost::container::small_vector<vk::BufferCopy, 4> copies;
VAddr buffer_start = buffer.CpuAddr();
memory_tracker->ForEachUploadRange(
memory_tracker->ForEachUploadRange<true>(
device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) {
const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
copies.push_back(vk::BufferCopy{
@ -996,7 +996,54 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
}
void BufferCache::SynchronizeBuffersForDma() {
RENDERER_TRACE;
boost::container::small_vector<Buffer*, 64> buffers;
boost::container::small_vector<vk::BufferMemoryBarrier2, 64> barriers;
boost::container::small_vector<vk::BufferCopy, 4> copies;
const auto& mapped_ranges = rasterizer.GetMappedRanges();
memory_tracker->Lock();
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
mapped_ranges.ForEach([&](VAddr device_addr, u64 size) {
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
if (memory_tracker->IsRegionCpuModified<false>(device_addr, size)) {
barriers.push_back(vk::BufferMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask =
vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(),
.offset = 0,
.size = buffer.SizeBytes(),
});
buffers.push_back(&buffer);
}
});
});
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = static_cast<u32>(barriers.size()),
.pBufferMemoryBarriers = barriers.data(),
});
for (auto* buffer : buffers) {
memory_tracker->ForEachUploadRange<false, false>(
buffer->CpuAddr(), buffer->SizeBytes(), false,
[&](u64 device_addr_out, u64 range_size) {
const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
copies.push_back(vk::BufferCopy{
.srcOffset = offset,
.dstOffset = device_addr_out - buffer->CpuAddr(),
.size = range_size,
});
});
cmdbuf.copyBuffer(staging_buffer.Handle(), buffer->Handle(), copies);
copies.clear();
}
memory_tracker->UnmarkAllRegionsAsCpuModified<false>();
MemoryBarrier();
memory_tracker->Unlock();
}
void BufferCache::MemoryBarrier() {

View file

@ -21,7 +21,8 @@ class MemoryManager;
namespace Vulkan {
class GraphicsPipeline;
}
class Rasterizer;
} // namespace Vulkan
namespace VideoCore {
@ -70,8 +71,8 @@ public:
public:
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
PageManager& tracker);
Vulkan::Rasterizer& rasterizer, AmdGpu::Liverpool* liverpool,
TextureCache& texture_cache, PageManager& tracker);
~BufferCache();
/// Returns a pointer to GDS device local buffer.
@ -203,6 +204,7 @@ private:
const Vulkan::Instance& instance;
Vulkan::Scheduler& scheduler;
Vulkan::Rasterizer& rasterizer;
AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory;
TextureCache& texture_cache;

View file

@ -27,7 +27,7 @@ public:
/// Returns true if a region has been modified from the CPU
template <bool locking = true>
bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<true, locking>(
return IterateRegions<true, locking>(
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock};
return manager->template IsRegionModified<Type::CPU>(offset, size);
@ -37,7 +37,7 @@ public:
/// Returns true if a region has been modified from the GPU
template <bool locking = true>
bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<false, locking>(
return IterateRegions<false, locking>(
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock};
return manager->template IsRegionModified<Type::GPU>(offset, size);
@ -47,7 +47,7 @@ public:
/// Mark region as CPU modified, notifying the device_tracker about this change
template <bool locking = true>
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
IteratePages<false, locking>(dirty_cpu_addr, query_size,
IterateRegions<false, locking>(dirty_cpu_addr, query_size,
[](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock};
manager->template ChangeRegionState<Type::CPU, true>(
@ -55,10 +55,19 @@ public:
});
}
/// Unmark all regions as CPU modified, notifying the device_tracker about this change
template <bool locking = true>
void UnmarkAllRegionsAsCpuModified() noexcept {
ForEachRegion<locking>([](RegionManager* manager) {
std::scoped_lock lk{manager->lock};
manager->template ChangeAllRegionState<Type::CPU, false>();
});
}
/// Unmark region as modified from the host GPU
template <bool locking = true>
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
IteratePages<false, locking>(dirty_cpu_addr, query_size,
IterateRegions<false, locking>(dirty_cpu_addr, query_size,
[](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock};
manager->template ChangeRegionState<Type::GPU, false>(
@ -69,7 +78,7 @@ public:
/// Removes all protection from a page and ensures GPU data has been flushed if requested
template <bool locking = true>
void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept {
IteratePages<false, locking>(
IterateRegions<false, locking>(
cpu_addr, size,
[try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) {
const bool should_flush = [&] {
@ -92,14 +101,15 @@ public:
}
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
template <bool locking = true>
template <bool clear, bool locking = true>
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) {
IteratePages<true, locking>(query_cpu_range, query_size,
IterateRegions<true, locking>(
query_cpu_range, query_size,
[&func, is_written](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock};
manager->template ForEachModifiedRange<Type::CPU, true>(
manager->template ForEachModifiedRange<Type::CPU, clear>(
manager->GetCpuAddr() + offset, size, func);
if (is_written) {
if (is_written && clear) {
manager->template ChangeRegionState<Type::GPU, true>(
manager->GetCpuAddr() + offset, size);
}
@ -109,7 +119,7 @@ public:
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
template <bool clear, bool locking = true>
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
IteratePages<false, locking>(query_cpu_range, query_size,
IterateRegions<false, locking>(query_cpu_range, query_size,
[&func](RegionManager* manager, u64 offset, size_t size) {
std::scoped_lock lk{manager->lock};
manager->template ForEachModifiedRange<Type::GPU, clear>(
@ -127,7 +137,6 @@ public:
global_lock.unlock();
}
private:
/**
* @brief IteratePages Iterates L2 word manager page table.
@ -137,7 +146,7 @@ private:
* @return
*/
template <bool create_region_on_fail, bool locking, typename Func>
bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
bool IterateRegions(VAddr cpu_address, size_t size, Func&& func) {
RENDERER_TRACE;
if constexpr (locking) {
std::shared_lock lock{global_lock};
@ -177,6 +186,26 @@ private:
return false;
}
/**
* @brief Iterate throw all regions in the memory tracker.
* @param func Callback for each region manager.
* @return
*/
template <bool locking, typename Func>
void ForEachRegion(Func&& func) {
RENDERER_TRACE;
if constexpr (locking) {
std::shared_lock lock{global_lock};
}
for (auto& pool : manager_pool) {
for (auto& manager : pool) {
if (manager.GetCpuAddr() != 0) {
func(&manager);
}
}
}
}
void CreateRegion(std::size_t page_index) {
const VAddr base_cpu_addr = page_index << TRACKER_HIGHER_PAGE_BITS;
if (free_managers.empty()) {

View file

@ -136,6 +136,24 @@ public:
}
}
/**
* Chagnes state of all pages in the region
*/
template <Type type, bool enable>
void ChangeAllRegionState() noexcept {
RENDERER_TRACE;
if constexpr (enable) {
GetRegionBits<type>().Fill();
} else {
GetRegionBits<type>().Clear();
}
if constexpr (type == Type::CPU) {
UpdateProtection<!enable, false>();
} else if (Config::readbacks()) {
UpdateProtection<enable, true>();
}
}
/**
* Returns true when a region has been modified
*

View file

@ -36,7 +36,7 @@ static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
buffer_cache{instance, scheduler, liverpool_, texture_cache, page_manager},
buffer_cache{instance, scheduler, *this, liverpool_, texture_cache, page_manager},
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
if (!Config::nullGpu()) {
@ -475,12 +475,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
// We only use fault buffer for DMA right now.
{
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
mapped_ranges.ForEach(
[&](const VAddr addr, u64 size) {
buffer_cache.SynchronizeBuffersInRange(addr, size);
});
buffer_cache.SynchronizeBuffersForDma();
}
buffer_cache.MemoryBarrier();
}
fault_process_pending |= uses_dma;

View file

@ -7,9 +7,9 @@
#include "common/shared_first_mutex.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/page_manager.h"
#include "video_core/range_set.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/range_set.h"
namespace AmdGpu {
struct Liverpool;
@ -43,6 +43,10 @@ public:
return texture_cache;
}
[[nodiscard]] const VideoCore::RangeSet& GetMappedRanges() const noexcept {
return mapped_ranges;
}
void Draw(bool is_indexed, u32 index_offset = 0);
void DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 size, u32 max_count,
VAddr count_address);