mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-12 04:35:56 +00:00
Initial implementation (untested)
This commit is contained in:
parent
47c43df544
commit
54907409c7
6 changed files with 145 additions and 49 deletions
|
@ -29,9 +29,9 @@ static constexpr size_t DeviceBufferSize = 128_MB;
|
|||
static constexpr size_t MaxPageFaults = 1024;
|
||||
|
||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
||||
PageManager& tracker)
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
||||
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_,
|
||||
TextureCache& texture_cache_, PageManager& tracker)
|
||||
: instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_},
|
||||
memory{Core::Memory::Instance()}, texture_cache{texture_cache_},
|
||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||
|
@ -817,7 +817,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
|||
bool is_texel_buffer) {
|
||||
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
||||
VAddr buffer_start = buffer.CpuAddr();
|
||||
memory_tracker->ForEachUploadRange(
|
||||
memory_tracker->ForEachUploadRange<true>(
|
||||
device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) {
|
||||
const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
|
||||
copies.push_back(vk::BufferCopy{
|
||||
|
@ -996,7 +996,54 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
|||
}
|
||||
|
||||
void BufferCache::SynchronizeBuffersForDma() {
|
||||
|
||||
RENDERER_TRACE;
|
||||
boost::container::small_vector<Buffer*, 64> buffers;
|
||||
boost::container::small_vector<vk::BufferMemoryBarrier2, 64> barriers;
|
||||
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
||||
const auto& mapped_ranges = rasterizer.GetMappedRanges();
|
||||
memory_tracker->Lock();
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
mapped_ranges.ForEach([&](VAddr device_addr, u64 size) {
|
||||
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
||||
if (memory_tracker->IsRegionCpuModified<false>(device_addr, size)) {
|
||||
barriers.push_back(vk::BufferMemoryBarrier2{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask =
|
||||
vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
|
||||
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = 0,
|
||||
.size = buffer.SizeBytes(),
|
||||
});
|
||||
buffers.push_back(&buffer);
|
||||
}
|
||||
});
|
||||
});
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = static_cast<u32>(barriers.size()),
|
||||
.pBufferMemoryBarriers = barriers.data(),
|
||||
});
|
||||
for (auto* buffer : buffers) {
|
||||
memory_tracker->ForEachUploadRange<false, false>(
|
||||
buffer->CpuAddr(), buffer->SizeBytes(), false,
|
||||
[&](u64 device_addr_out, u64 range_size) {
|
||||
const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
|
||||
copies.push_back(vk::BufferCopy{
|
||||
.srcOffset = offset,
|
||||
.dstOffset = device_addr_out - buffer->CpuAddr(),
|
||||
.size = range_size,
|
||||
});
|
||||
});
|
||||
cmdbuf.copyBuffer(staging_buffer.Handle(), buffer->Handle(), copies);
|
||||
copies.clear();
|
||||
}
|
||||
memory_tracker->UnmarkAllRegionsAsCpuModified<false>();
|
||||
MemoryBarrier();
|
||||
memory_tracker->Unlock();
|
||||
}
|
||||
|
||||
void BufferCache::MemoryBarrier() {
|
||||
|
|
|
@ -21,7 +21,8 @@ class MemoryManager;
|
|||
|
||||
namespace Vulkan {
|
||||
class GraphicsPipeline;
|
||||
}
|
||||
class Rasterizer;
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
|
@ -70,8 +71,8 @@ public:
|
|||
|
||||
public:
|
||||
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
|
||||
PageManager& tracker);
|
||||
Vulkan::Rasterizer& rasterizer, AmdGpu::Liverpool* liverpool,
|
||||
TextureCache& texture_cache, PageManager& tracker);
|
||||
~BufferCache();
|
||||
|
||||
/// Returns a pointer to GDS device local buffer.
|
||||
|
@ -203,6 +204,7 @@ private:
|
|||
|
||||
const Vulkan::Instance& instance;
|
||||
Vulkan::Scheduler& scheduler;
|
||||
Vulkan::Rasterizer& rasterizer;
|
||||
AmdGpu::Liverpool* liverpool;
|
||||
Core::MemoryManager* memory;
|
||||
TextureCache& texture_cache;
|
||||
|
|
|
@ -27,7 +27,7 @@ public:
|
|||
/// Returns true if a region has been modified from the CPU
|
||||
template <bool locking = true>
|
||||
bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||
return IteratePages<true, locking>(
|
||||
return IterateRegions<true, locking>(
|
||||
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
return manager->template IsRegionModified<Type::CPU>(offset, size);
|
||||
|
@ -37,7 +37,7 @@ public:
|
|||
/// Returns true if a region has been modified from the GPU
|
||||
template <bool locking = true>
|
||||
bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||
return IteratePages<false, locking>(
|
||||
return IterateRegions<false, locking>(
|
||||
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
return manager->template IsRegionModified<Type::GPU>(offset, size);
|
||||
|
@ -47,7 +47,7 @@ public:
|
|||
/// Mark region as CPU modified, notifying the device_tracker about this change
|
||||
template <bool locking = true>
|
||||
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
|
||||
IteratePages<false, locking>(dirty_cpu_addr, query_size,
|
||||
IterateRegions<false, locking>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ChangeRegionState<Type::CPU, true>(
|
||||
|
@ -55,10 +55,19 @@ public:
|
|||
});
|
||||
}
|
||||
|
||||
/// Unmark all regions as CPU modified, notifying the device_tracker about this change
|
||||
template <bool locking = true>
|
||||
void UnmarkAllRegionsAsCpuModified() noexcept {
|
||||
ForEachRegion<locking>([](RegionManager* manager) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ChangeAllRegionState<Type::CPU, false>();
|
||||
});
|
||||
}
|
||||
|
||||
/// Unmark region as modified from the host GPU
|
||||
template <bool locking = true>
|
||||
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
||||
IteratePages<false, locking>(dirty_cpu_addr, query_size,
|
||||
IterateRegions<false, locking>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ChangeRegionState<Type::GPU, false>(
|
||||
|
@ -69,7 +78,7 @@ public:
|
|||
/// Removes all protection from a page and ensures GPU data has been flushed if requested
|
||||
template <bool locking = true>
|
||||
void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept {
|
||||
IteratePages<false, locking>(
|
||||
IterateRegions<false, locking>(
|
||||
cpu_addr, size,
|
||||
[try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) {
|
||||
const bool should_flush = [&] {
|
||||
|
@ -92,14 +101,15 @@ public:
|
|||
}
|
||||
|
||||
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
||||
template <bool locking = true>
|
||||
template <bool clear, bool locking = true>
|
||||
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) {
|
||||
IteratePages<true, locking>(query_cpu_range, query_size,
|
||||
IterateRegions<true, locking>(
|
||||
query_cpu_range, query_size,
|
||||
[&func, is_written](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ForEachModifiedRange<Type::CPU, true>(
|
||||
manager->template ForEachModifiedRange<Type::CPU, clear>(
|
||||
manager->GetCpuAddr() + offset, size, func);
|
||||
if (is_written) {
|
||||
if (is_written && clear) {
|
||||
manager->template ChangeRegionState<Type::GPU, true>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
}
|
||||
|
@ -109,7 +119,7 @@ public:
|
|||
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
|
||||
template <bool clear, bool locking = true>
|
||||
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
|
||||
IteratePages<false, locking>(query_cpu_range, query_size,
|
||||
IterateRegions<false, locking>(query_cpu_range, query_size,
|
||||
[&func](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ForEachModifiedRange<Type::GPU, clear>(
|
||||
|
@ -127,7 +137,6 @@ public:
|
|||
global_lock.unlock();
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief IteratePages Iterates L2 word manager page table.
|
||||
|
@ -137,7 +146,7 @@ private:
|
|||
* @return
|
||||
*/
|
||||
template <bool create_region_on_fail, bool locking, typename Func>
|
||||
bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
|
||||
bool IterateRegions(VAddr cpu_address, size_t size, Func&& func) {
|
||||
RENDERER_TRACE;
|
||||
if constexpr (locking) {
|
||||
std::shared_lock lock{global_lock};
|
||||
|
@ -177,6 +186,26 @@ private:
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Iterate throw all regions in the memory tracker.
|
||||
* @param func Callback for each region manager.
|
||||
* @return
|
||||
*/
|
||||
template <bool locking, typename Func>
|
||||
void ForEachRegion(Func&& func) {
|
||||
RENDERER_TRACE;
|
||||
if constexpr (locking) {
|
||||
std::shared_lock lock{global_lock};
|
||||
}
|
||||
for (auto& pool : manager_pool) {
|
||||
for (auto& manager : pool) {
|
||||
if (manager.GetCpuAddr() != 0) {
|
||||
func(&manager);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CreateRegion(std::size_t page_index) {
|
||||
const VAddr base_cpu_addr = page_index << TRACKER_HIGHER_PAGE_BITS;
|
||||
if (free_managers.empty()) {
|
||||
|
|
|
@ -136,6 +136,24 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Chagnes state of all pages in the region
|
||||
*/
|
||||
template <Type type, bool enable>
|
||||
void ChangeAllRegionState() noexcept {
|
||||
RENDERER_TRACE;
|
||||
if constexpr (enable) {
|
||||
GetRegionBits<type>().Fill();
|
||||
} else {
|
||||
GetRegionBits<type>().Clear();
|
||||
}
|
||||
if constexpr (type == Type::CPU) {
|
||||
UpdateProtection<!enable, false>();
|
||||
} else if (Config::readbacks()) {
|
||||
UpdateProtection<enable, true>();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true when a region has been modified
|
||||
*
|
||||
|
|
|
@ -36,7 +36,7 @@ static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
|
|||
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||
AmdGpu::Liverpool* liverpool_)
|
||||
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
||||
buffer_cache{instance, scheduler, liverpool_, texture_cache, page_manager},
|
||||
buffer_cache{instance, scheduler, *this, liverpool_, texture_cache, page_manager},
|
||||
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
|
||||
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
|
||||
if (!Config::nullGpu()) {
|
||||
|
@ -475,12 +475,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
|||
// We only use fault buffer for DMA right now.
|
||||
{
|
||||
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
||||
mapped_ranges.ForEach(
|
||||
[&](const VAddr addr, u64 size) {
|
||||
buffer_cache.SynchronizeBuffersInRange(addr, size);
|
||||
});
|
||||
buffer_cache.SynchronizeBuffersForDma();
|
||||
}
|
||||
buffer_cache.MemoryBarrier();
|
||||
}
|
||||
|
||||
fault_process_pending |= uses_dma;
|
||||
|
|
|
@ -7,9 +7,9 @@
|
|||
#include "common/shared_first_mutex.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/page_manager.h"
|
||||
#include "video_core/range_set.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
#include "video_core/range_set.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
struct Liverpool;
|
||||
|
@ -43,6 +43,10 @@ public:
|
|||
return texture_cache;
|
||||
}
|
||||
|
||||
[[nodiscard]] const VideoCore::RangeSet& GetMappedRanges() const noexcept {
|
||||
return mapped_ranges;
|
||||
}
|
||||
|
||||
void Draw(bool is_indexed, u32 index_offset = 0);
|
||||
void DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 size, u32 max_count,
|
||||
VAddr count_address);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue