mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-12 04:35:56 +00:00
Initial implementation (untested)
This commit is contained in:
parent
47c43df544
commit
54907409c7
6 changed files with 145 additions and 49 deletions
|
@ -29,9 +29,9 @@ static constexpr size_t DeviceBufferSize = 128_MB;
|
||||||
static constexpr size_t MaxPageFaults = 1024;
|
static constexpr size_t MaxPageFaults = 1024;
|
||||||
|
|
||||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_,
|
||||||
PageManager& tracker)
|
TextureCache& texture_cache_, PageManager& tracker)
|
||||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
: instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_},
|
||||||
memory{Core::Memory::Instance()}, texture_cache{texture_cache_},
|
memory{Core::Memory::Instance()}, texture_cache{texture_cache_},
|
||||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||||
|
@ -817,7 +817,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||||
bool is_texel_buffer) {
|
bool is_texel_buffer) {
|
||||||
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
||||||
VAddr buffer_start = buffer.CpuAddr();
|
VAddr buffer_start = buffer.CpuAddr();
|
||||||
memory_tracker->ForEachUploadRange(
|
memory_tracker->ForEachUploadRange<true>(
|
||||||
device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) {
|
device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) {
|
||||||
const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
|
const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
|
||||||
copies.push_back(vk::BufferCopy{
|
copies.push_back(vk::BufferCopy{
|
||||||
|
@ -996,7 +996,54 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::SynchronizeBuffersForDma() {
|
void BufferCache::SynchronizeBuffersForDma() {
|
||||||
|
RENDERER_TRACE;
|
||||||
|
boost::container::small_vector<Buffer*, 64> buffers;
|
||||||
|
boost::container::small_vector<vk::BufferMemoryBarrier2, 64> barriers;
|
||||||
|
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
||||||
|
const auto& mapped_ranges = rasterizer.GetMappedRanges();
|
||||||
|
memory_tracker->Lock();
|
||||||
|
scheduler.EndRendering();
|
||||||
|
const auto cmdbuf = scheduler.CommandBuffer();
|
||||||
|
mapped_ranges.ForEach([&](VAddr device_addr, u64 size) {
|
||||||
|
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
||||||
|
if (memory_tracker->IsRegionCpuModified<false>(device_addr, size)) {
|
||||||
|
barriers.push_back(vk::BufferMemoryBarrier2{
|
||||||
|
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||||
|
.srcAccessMask =
|
||||||
|
vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
|
||||||
|
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||||
|
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||||
|
.buffer = buffer.Handle(),
|
||||||
|
.offset = 0,
|
||||||
|
.size = buffer.SizeBytes(),
|
||||||
|
});
|
||||||
|
buffers.push_back(&buffer);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||||
|
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||||
|
.bufferMemoryBarrierCount = static_cast<u32>(barriers.size()),
|
||||||
|
.pBufferMemoryBarriers = barriers.data(),
|
||||||
|
});
|
||||||
|
for (auto* buffer : buffers) {
|
||||||
|
memory_tracker->ForEachUploadRange<false, false>(
|
||||||
|
buffer->CpuAddr(), buffer->SizeBytes(), false,
|
||||||
|
[&](u64 device_addr_out, u64 range_size) {
|
||||||
|
const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
|
||||||
|
copies.push_back(vk::BufferCopy{
|
||||||
|
.srcOffset = offset,
|
||||||
|
.dstOffset = device_addr_out - buffer->CpuAddr(),
|
||||||
|
.size = range_size,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
cmdbuf.copyBuffer(staging_buffer.Handle(), buffer->Handle(), copies);
|
||||||
|
copies.clear();
|
||||||
|
}
|
||||||
|
memory_tracker->UnmarkAllRegionsAsCpuModified<false>();
|
||||||
|
MemoryBarrier();
|
||||||
|
memory_tracker->Unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::MemoryBarrier() {
|
void BufferCache::MemoryBarrier() {
|
||||||
|
|
|
@ -21,7 +21,8 @@ class MemoryManager;
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
class GraphicsPipeline;
|
class GraphicsPipeline;
|
||||||
}
|
class Rasterizer;
|
||||||
|
} // namespace Vulkan
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
|
@ -70,8 +71,8 @@ public:
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||||
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
|
Vulkan::Rasterizer& rasterizer, AmdGpu::Liverpool* liverpool,
|
||||||
PageManager& tracker);
|
TextureCache& texture_cache, PageManager& tracker);
|
||||||
~BufferCache();
|
~BufferCache();
|
||||||
|
|
||||||
/// Returns a pointer to GDS device local buffer.
|
/// Returns a pointer to GDS device local buffer.
|
||||||
|
@ -203,6 +204,7 @@ private:
|
||||||
|
|
||||||
const Vulkan::Instance& instance;
|
const Vulkan::Instance& instance;
|
||||||
Vulkan::Scheduler& scheduler;
|
Vulkan::Scheduler& scheduler;
|
||||||
|
Vulkan::Rasterizer& rasterizer;
|
||||||
AmdGpu::Liverpool* liverpool;
|
AmdGpu::Liverpool* liverpool;
|
||||||
Core::MemoryManager* memory;
|
Core::MemoryManager* memory;
|
||||||
TextureCache& texture_cache;
|
TextureCache& texture_cache;
|
||||||
|
|
|
@ -27,7 +27,7 @@ public:
|
||||||
/// Returns true if a region has been modified from the CPU
|
/// Returns true if a region has been modified from the CPU
|
||||||
template <bool locking = true>
|
template <bool locking = true>
|
||||||
bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||||
return IteratePages<true, locking>(
|
return IterateRegions<true, locking>(
|
||||||
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||||
std::scoped_lock lk{manager->lock};
|
std::scoped_lock lk{manager->lock};
|
||||||
return manager->template IsRegionModified<Type::CPU>(offset, size);
|
return manager->template IsRegionModified<Type::CPU>(offset, size);
|
||||||
|
@ -37,7 +37,7 @@ public:
|
||||||
/// Returns true if a region has been modified from the GPU
|
/// Returns true if a region has been modified from the GPU
|
||||||
template <bool locking = true>
|
template <bool locking = true>
|
||||||
bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||||
return IteratePages<false, locking>(
|
return IterateRegions<false, locking>(
|
||||||
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||||
std::scoped_lock lk{manager->lock};
|
std::scoped_lock lk{manager->lock};
|
||||||
return manager->template IsRegionModified<Type::GPU>(offset, size);
|
return manager->template IsRegionModified<Type::GPU>(offset, size);
|
||||||
|
@ -47,7 +47,7 @@ public:
|
||||||
/// Mark region as CPU modified, notifying the device_tracker about this change
|
/// Mark region as CPU modified, notifying the device_tracker about this change
|
||||||
template <bool locking = true>
|
template <bool locking = true>
|
||||||
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
|
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
|
||||||
IteratePages<false, locking>(dirty_cpu_addr, query_size,
|
IterateRegions<false, locking>(dirty_cpu_addr, query_size,
|
||||||
[](RegionManager* manager, u64 offset, size_t size) {
|
[](RegionManager* manager, u64 offset, size_t size) {
|
||||||
std::scoped_lock lk{manager->lock};
|
std::scoped_lock lk{manager->lock};
|
||||||
manager->template ChangeRegionState<Type::CPU, true>(
|
manager->template ChangeRegionState<Type::CPU, true>(
|
||||||
|
@ -55,10 +55,19 @@ public:
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Unmark all regions as CPU modified, notifying the device_tracker about this change
|
||||||
|
template <bool locking = true>
|
||||||
|
void UnmarkAllRegionsAsCpuModified() noexcept {
|
||||||
|
ForEachRegion<locking>([](RegionManager* manager) {
|
||||||
|
std::scoped_lock lk{manager->lock};
|
||||||
|
manager->template ChangeAllRegionState<Type::CPU, false>();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/// Unmark region as modified from the host GPU
|
/// Unmark region as modified from the host GPU
|
||||||
template <bool locking = true>
|
template <bool locking = true>
|
||||||
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
||||||
IteratePages<false, locking>(dirty_cpu_addr, query_size,
|
IterateRegions<false, locking>(dirty_cpu_addr, query_size,
|
||||||
[](RegionManager* manager, u64 offset, size_t size) {
|
[](RegionManager* manager, u64 offset, size_t size) {
|
||||||
std::scoped_lock lk{manager->lock};
|
std::scoped_lock lk{manager->lock};
|
||||||
manager->template ChangeRegionState<Type::GPU, false>(
|
manager->template ChangeRegionState<Type::GPU, false>(
|
||||||
|
@ -69,7 +78,7 @@ public:
|
||||||
/// Removes all protection from a page and ensures GPU data has been flushed if requested
|
/// Removes all protection from a page and ensures GPU data has been flushed if requested
|
||||||
template <bool locking = true>
|
template <bool locking = true>
|
||||||
void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept {
|
void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept {
|
||||||
IteratePages<false, locking>(
|
IterateRegions<false, locking>(
|
||||||
cpu_addr, size,
|
cpu_addr, size,
|
||||||
[try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) {
|
[try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) {
|
||||||
const bool should_flush = [&] {
|
const bool should_flush = [&] {
|
||||||
|
@ -92,14 +101,15 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
||||||
template <bool locking = true>
|
template <bool clear, bool locking = true>
|
||||||
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) {
|
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) {
|
||||||
IteratePages<true, locking>(query_cpu_range, query_size,
|
IterateRegions<true, locking>(
|
||||||
|
query_cpu_range, query_size,
|
||||||
[&func, is_written](RegionManager* manager, u64 offset, size_t size) {
|
[&func, is_written](RegionManager* manager, u64 offset, size_t size) {
|
||||||
std::scoped_lock lk{manager->lock};
|
std::scoped_lock lk{manager->lock};
|
||||||
manager->template ForEachModifiedRange<Type::CPU, true>(
|
manager->template ForEachModifiedRange<Type::CPU, clear>(
|
||||||
manager->GetCpuAddr() + offset, size, func);
|
manager->GetCpuAddr() + offset, size, func);
|
||||||
if (is_written) {
|
if (is_written && clear) {
|
||||||
manager->template ChangeRegionState<Type::GPU, true>(
|
manager->template ChangeRegionState<Type::GPU, true>(
|
||||||
manager->GetCpuAddr() + offset, size);
|
manager->GetCpuAddr() + offset, size);
|
||||||
}
|
}
|
||||||
|
@ -109,7 +119,7 @@ public:
|
||||||
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
|
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
|
||||||
template <bool clear, bool locking = true>
|
template <bool clear, bool locking = true>
|
||||||
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
|
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
|
||||||
IteratePages<false, locking>(query_cpu_range, query_size,
|
IterateRegions<false, locking>(query_cpu_range, query_size,
|
||||||
[&func](RegionManager* manager, u64 offset, size_t size) {
|
[&func](RegionManager* manager, u64 offset, size_t size) {
|
||||||
std::scoped_lock lk{manager->lock};
|
std::scoped_lock lk{manager->lock};
|
||||||
manager->template ForEachModifiedRange<Type::GPU, clear>(
|
manager->template ForEachModifiedRange<Type::GPU, clear>(
|
||||||
|
@ -127,7 +137,6 @@ public:
|
||||||
global_lock.unlock();
|
global_lock.unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/**
|
/**
|
||||||
* @brief IteratePages Iterates L2 word manager page table.
|
* @brief IteratePages Iterates L2 word manager page table.
|
||||||
|
@ -137,7 +146,7 @@ private:
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
template <bool create_region_on_fail, bool locking, typename Func>
|
template <bool create_region_on_fail, bool locking, typename Func>
|
||||||
bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
|
bool IterateRegions(VAddr cpu_address, size_t size, Func&& func) {
|
||||||
RENDERER_TRACE;
|
RENDERER_TRACE;
|
||||||
if constexpr (locking) {
|
if constexpr (locking) {
|
||||||
std::shared_lock lock{global_lock};
|
std::shared_lock lock{global_lock};
|
||||||
|
@ -177,6 +186,26 @@ private:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Iterate throw all regions in the memory tracker.
|
||||||
|
* @param func Callback for each region manager.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
template <bool locking, typename Func>
|
||||||
|
void ForEachRegion(Func&& func) {
|
||||||
|
RENDERER_TRACE;
|
||||||
|
if constexpr (locking) {
|
||||||
|
std::shared_lock lock{global_lock};
|
||||||
|
}
|
||||||
|
for (auto& pool : manager_pool) {
|
||||||
|
for (auto& manager : pool) {
|
||||||
|
if (manager.GetCpuAddr() != 0) {
|
||||||
|
func(&manager);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void CreateRegion(std::size_t page_index) {
|
void CreateRegion(std::size_t page_index) {
|
||||||
const VAddr base_cpu_addr = page_index << TRACKER_HIGHER_PAGE_BITS;
|
const VAddr base_cpu_addr = page_index << TRACKER_HIGHER_PAGE_BITS;
|
||||||
if (free_managers.empty()) {
|
if (free_managers.empty()) {
|
||||||
|
|
|
@ -136,6 +136,24 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Chagnes state of all pages in the region
|
||||||
|
*/
|
||||||
|
template <Type type, bool enable>
|
||||||
|
void ChangeAllRegionState() noexcept {
|
||||||
|
RENDERER_TRACE;
|
||||||
|
if constexpr (enable) {
|
||||||
|
GetRegionBits<type>().Fill();
|
||||||
|
} else {
|
||||||
|
GetRegionBits<type>().Clear();
|
||||||
|
}
|
||||||
|
if constexpr (type == Type::CPU) {
|
||||||
|
UpdateProtection<!enable, false>();
|
||||||
|
} else if (Config::readbacks()) {
|
||||||
|
UpdateProtection<enable, true>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true when a region has been modified
|
* Returns true when a region has been modified
|
||||||
*
|
*
|
||||||
|
|
|
@ -36,7 +36,7 @@ static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
|
||||||
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||||
AmdGpu::Liverpool* liverpool_)
|
AmdGpu::Liverpool* liverpool_)
|
||||||
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
||||||
buffer_cache{instance, scheduler, liverpool_, texture_cache, page_manager},
|
buffer_cache{instance, scheduler, *this, liverpool_, texture_cache, page_manager},
|
||||||
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
|
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
|
||||||
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
|
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
|
||||||
if (!Config::nullGpu()) {
|
if (!Config::nullGpu()) {
|
||||||
|
@ -475,12 +475,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||||
// We only use fault buffer for DMA right now.
|
// We only use fault buffer for DMA right now.
|
||||||
{
|
{
|
||||||
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
|
||||||
mapped_ranges.ForEach(
|
buffer_cache.SynchronizeBuffersForDma();
|
||||||
[&](const VAddr addr, u64 size) {
|
|
||||||
buffer_cache.SynchronizeBuffersInRange(addr, size);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
buffer_cache.MemoryBarrier();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fault_process_pending |= uses_dma;
|
fault_process_pending |= uses_dma;
|
||||||
|
|
|
@ -7,9 +7,9 @@
|
||||||
#include "common/shared_first_mutex.h"
|
#include "common/shared_first_mutex.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
#include "video_core/page_manager.h"
|
#include "video_core/page_manager.h"
|
||||||
|
#include "video_core/range_set.h"
|
||||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||||
#include "video_core/texture_cache/texture_cache.h"
|
#include "video_core/texture_cache/texture_cache.h"
|
||||||
#include "video_core/range_set.h"
|
|
||||||
|
|
||||||
namespace AmdGpu {
|
namespace AmdGpu {
|
||||||
struct Liverpool;
|
struct Liverpool;
|
||||||
|
@ -43,6 +43,10 @@ public:
|
||||||
return texture_cache;
|
return texture_cache;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] const VideoCore::RangeSet& GetMappedRanges() const noexcept {
|
||||||
|
return mapped_ranges;
|
||||||
|
}
|
||||||
|
|
||||||
void Draw(bool is_indexed, u32 index_offset = 0);
|
void Draw(bool is_indexed, u32 index_offset = 0);
|
||||||
void DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 size, u32 max_count,
|
void DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 size, u32 max_count,
|
||||||
VAddr count_address);
|
VAddr count_address);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue