mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-11 20:25:55 +00:00
buffer_cache: Fix various thread races on data upload and invalidation (#3186)
* buffer_cache: Fix various thread races on data upload and invalidation * memory_tracker: Improve locking more on invalidation --------- Co-authored-by: georgemoralis <giorgosmrls@gmail.com>
This commit is contained in:
parent
df22c4225e
commit
b22da77f9a
5 changed files with 84 additions and 84 deletions
|
@ -2,8 +2,8 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <mutex>
|
||||
#include "common/alignment.h"
|
||||
#include "common/config.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/types.h"
|
||||
|
@ -136,20 +136,19 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
|
|||
if (!IsRegionRegistered(device_addr, size)) {
|
||||
return;
|
||||
}
|
||||
if (Config::readbacks() && memory_tracker->IsRegionGpuModified(device_addr, size)) {
|
||||
ReadMemory(device_addr, size);
|
||||
}
|
||||
memory_tracker->MarkRegionAsCpuModified(device_addr, size);
|
||||
memory_tracker->InvalidateRegion(
|
||||
device_addr, size, Config::readbacks(),
|
||||
[this, device_addr, size] { ReadMemory(device_addr, size, true); });
|
||||
}
|
||||
|
||||
void BufferCache::ReadMemory(VAddr device_addr, u64 size) {
|
||||
liverpool->SendCommand<true>([this, device_addr, size] {
|
||||
void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) {
|
||||
liverpool->SendCommand<true>([this, device_addr, size, is_write] {
|
||||
Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)];
|
||||
DownloadBufferMemory(buffer, device_addr, size);
|
||||
DownloadBufferMemory(buffer, device_addr, size, is_write);
|
||||
});
|
||||
}
|
||||
|
||||
void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) {
|
||||
void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write) {
|
||||
boost::container::small_vector<vk::BufferCopy, 1> copies;
|
||||
u64 total_size_bytes = 0;
|
||||
memory_tracker->ForEachDownloadRange<false>(
|
||||
|
@ -192,6 +191,9 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
|||
copy.size);
|
||||
}
|
||||
memory_tracker->UnmarkRegionAsGpuModified(device_addr, size);
|
||||
if (is_write) {
|
||||
memory_tracker->MarkRegionAsCpuModified(device_addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
|
||||
|
@ -359,7 +361,7 @@ void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds,
|
|||
// Avoid using ObtainBuffer here as that might give us the stream buffer.
|
||||
const BufferId buffer_id = FindBuffer(src, num_bytes);
|
||||
auto& buffer = slot_buffers[buffer_id];
|
||||
SynchronizeBuffer(buffer, src, num_bytes, false);
|
||||
SynchronizeBuffer(buffer, src, num_bytes, false, false);
|
||||
return buffer;
|
||||
}();
|
||||
auto& dst_buffer = [&] -> const Buffer& {
|
||||
|
@ -441,9 +443,8 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
|
|||
buffer_id = FindBuffer(device_addr, size);
|
||||
}
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer);
|
||||
SynchronizeBuffer(buffer, device_addr, size, is_written, is_texel_buffer);
|
||||
if (is_written) {
|
||||
memory_tracker->MarkRegionAsGpuModified(device_addr, size);
|
||||
gpu_modified_ranges.Add(device_addr, size);
|
||||
}
|
||||
return {&buffer, buffer.Offset(device_addr)};
|
||||
|
@ -454,7 +455,7 @@ std::pair<Buffer*, u32> BufferCache::ObtainBufferForImage(VAddr gpu_addr, u32 si
|
|||
const BufferId buffer_id = page_table[gpu_addr >> CACHING_PAGEBITS].buffer_id;
|
||||
if (buffer_id) {
|
||||
if (Buffer& buffer = slot_buffers[buffer_id]; buffer.IsInBounds(gpu_addr, size)) {
|
||||
SynchronizeBuffer(buffer, gpu_addr, size, false);
|
||||
SynchronizeBuffer(buffer, gpu_addr, size, false, false);
|
||||
return {&buffer, buffer.Offset(gpu_addr)};
|
||||
}
|
||||
}
|
||||
|
@ -813,56 +814,27 @@ void BufferCache::ChangeRegister(BufferId buffer_id) {
|
|||
}
|
||||
}
|
||||
|
||||
void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||
void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer) {
|
||||
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
||||
u64 total_size_bytes = 0;
|
||||
VAddr buffer_start = buffer.CpuAddr();
|
||||
memory_tracker->ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||
copies.push_back(vk::BufferCopy{
|
||||
.srcOffset = total_size_bytes,
|
||||
.dstOffset = device_addr_out - buffer_start,
|
||||
.size = range_size,
|
||||
memory_tracker->ForEachUploadRange(
|
||||
device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) {
|
||||
const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
|
||||
copies.push_back(vk::BufferCopy{
|
||||
.srcOffset = offset,
|
||||
.dstOffset = device_addr_out - buffer_start,
|
||||
.size = range_size,
|
||||
});
|
||||
});
|
||||
total_size_bytes += range_size;
|
||||
});
|
||||
SCOPE_EXIT {
|
||||
if (is_texel_buffer) {
|
||||
SynchronizeBufferFromImage(buffer, device_addr, size);
|
||||
}
|
||||
};
|
||||
if (total_size_bytes == 0) {
|
||||
if (copies.empty()) {
|
||||
return;
|
||||
}
|
||||
vk::Buffer src_buffer = staging_buffer.Handle();
|
||||
if (total_size_bytes < StagingBufferSize) {
|
||||
const auto [staging, offset] = staging_buffer.Map(total_size_bytes);
|
||||
for (auto& copy : copies) {
|
||||
u8* const src_pointer = staging + copy.srcOffset;
|
||||
const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset;
|
||||
std::memcpy(src_pointer, std::bit_cast<const u8*>(device_addr), copy.size);
|
||||
// Apply the staging offset
|
||||
copy.srcOffset += offset;
|
||||
}
|
||||
staging_buffer.Commit();
|
||||
} else {
|
||||
// For large one time transfers use a temporary host buffer.
|
||||
// RenderDoc can lag quite a bit if the stream buffer is too large.
|
||||
Buffer temp_buffer{instance,
|
||||
scheduler,
|
||||
MemoryUsage::Upload,
|
||||
0,
|
||||
vk::BufferUsageFlagBits::eTransferSrc,
|
||||
total_size_bytes};
|
||||
src_buffer = temp_buffer.Handle();
|
||||
u8* const staging = temp_buffer.mapped_data.data();
|
||||
for (auto& copy : copies) {
|
||||
u8* const src_pointer = staging + copy.srcOffset;
|
||||
const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset;
|
||||
std::memcpy(src_pointer, std::bit_cast<const u8*>(device_addr), copy.size);
|
||||
}
|
||||
scheduler.DeferOperation([buffer = std::move(temp_buffer)]() mutable {});
|
||||
}
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
|
@ -889,7 +861,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
|||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
});
|
||||
cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies);
|
||||
cmdbuf.copyBuffer(staging_buffer.Handle(), buffer.buffer, copies);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
|
@ -1020,7 +992,7 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
|
|||
VAddr start = std::max(buffer.CpuAddr(), device_addr);
|
||||
VAddr end = std::min(buffer.CpuAddr() + buffer.SizeBytes(), device_addr_end);
|
||||
u32 size = static_cast<u32>(end - start);
|
||||
SynchronizeBuffer(buffer, start, size, false);
|
||||
SynchronizeBuffer(buffer, start, size, false, false);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -113,7 +113,7 @@ public:
|
|||
void InvalidateMemory(VAddr device_addr, u64 size);
|
||||
|
||||
/// Waits on pending downloads in the logical page range.
|
||||
void ReadMemory(VAddr device_addr, u64 size);
|
||||
void ReadMemory(VAddr device_addr, u64 size, bool is_write = false);
|
||||
|
||||
/// Binds host vertex buffers for the current draw.
|
||||
void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline);
|
||||
|
@ -176,7 +176,7 @@ private:
|
|||
return !buffer_id || slot_buffers[buffer_id].is_deleted;
|
||||
}
|
||||
|
||||
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size);
|
||||
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write);
|
||||
|
||||
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
|
||||
|
||||
|
@ -191,7 +191,8 @@ private:
|
|||
template <bool insert>
|
||||
void ChangeRegister(BufferId buffer_id);
|
||||
|
||||
void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_texel_buffer);
|
||||
void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer);
|
||||
|
||||
bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size);
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ public:
|
|||
bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||
return IteratePages<true>(
|
||||
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
return manager->template IsRegionModified<Type::CPU>(offset, size);
|
||||
});
|
||||
}
|
||||
|
@ -35,6 +36,7 @@ public:
|
|||
bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||
return IteratePages<false>(
|
||||
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
return manager->template IsRegionModified<Type::GPU>(offset, size);
|
||||
});
|
||||
}
|
||||
|
@ -43,34 +45,57 @@ public:
|
|||
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
|
||||
IteratePages<false>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ChangeRegionState<Type::CPU, true>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
}
|
||||
|
||||
/// Mark region as modified from the host GPU
|
||||
void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
||||
IteratePages<false>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
manager->template ChangeRegionState<Type::GPU, true>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
}
|
||||
|
||||
/// Unmark region as modified from the host GPU
|
||||
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
||||
IteratePages<false>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ChangeRegionState<Type::GPU, false>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
}
|
||||
|
||||
/// Removes all protection from a page and ensures GPU data has been flushed if requested
|
||||
void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept {
|
||||
IteratePages<false>(
|
||||
cpu_addr, size,
|
||||
[try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) {
|
||||
const bool should_flush = [&] {
|
||||
// Perform both the GPU modification check and CPU state change with the lock
|
||||
// in case we are racing with GPU thread trying to mark the page as GPU
|
||||
// modified. If we need to flush the flush function is going to perform CPU
|
||||
// state change.
|
||||
std::scoped_lock lk{manager->lock};
|
||||
if (try_flush && manager->template IsRegionModified<Type::GPU>(offset, size)) {
|
||||
return true;
|
||||
}
|
||||
manager->template ChangeRegionState<Type::CPU, true>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
return false;
|
||||
}();
|
||||
if (should_flush) {
|
||||
on_flush();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
||||
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
|
||||
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) {
|
||||
IteratePages<true>(query_cpu_range, query_size,
|
||||
[&func](RegionManager* manager, u64 offset, size_t size) {
|
||||
[&func, is_written](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ForEachModifiedRange<Type::CPU, true>(
|
||||
manager->GetCpuAddr() + offset, size, func);
|
||||
if (is_written) {
|
||||
manager->template ChangeRegionState<Type::GPU, true>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -79,6 +104,7 @@ public:
|
|||
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
|
||||
IteratePages<false>(query_cpu_range, query_size,
|
||||
[&func](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ForEachModifiedRange<Type::GPU, clear>(
|
||||
manager->GetCpuAddr() + offset, size, func);
|
||||
});
|
||||
|
|
|
@ -3,9 +3,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include "common/config.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "common/logging/log.h"
|
||||
|
||||
#ifdef __linux__
|
||||
#include "common/adaptive_mutex.h"
|
||||
|
@ -19,6 +19,12 @@
|
|||
|
||||
namespace VideoCore {
|
||||
|
||||
#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
|
||||
using LockType = Common::AdaptiveMutex;
|
||||
#else
|
||||
using LockType = Common::SpinLock;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Allows tracking CPU and GPU modification of pages in a contigious 16MB virtual address region.
|
||||
* Information is stored in bitsets for spacial locality and fast update of single pages.
|
||||
|
@ -80,7 +86,6 @@ public:
|
|||
if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lk{lock};
|
||||
|
||||
RegionBits& bits = GetRegionBits<type>();
|
||||
if constexpr (enable) {
|
||||
|
@ -113,15 +118,10 @@ public:
|
|||
if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lk{lock};
|
||||
|
||||
RegionBits& bits = GetRegionBits<type>();
|
||||
RegionBits mask(bits, start_page, end_page);
|
||||
|
||||
for (const auto& [start, end] : mask) {
|
||||
func(cpu_addr + start * TRACKER_BYTES_PER_PAGE, (end - start) * TRACKER_BYTES_PER_PAGE);
|
||||
}
|
||||
|
||||
if constexpr (clear) {
|
||||
bits.UnsetRange(start_page, end_page);
|
||||
if constexpr (type == Type::CPU) {
|
||||
|
@ -130,6 +130,10 @@ public:
|
|||
UpdateProtection<false, true>();
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& [start, end] : mask) {
|
||||
func(cpu_addr + start * TRACKER_BYTES_PER_PAGE, (end - start) * TRACKER_BYTES_PER_PAGE);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -147,13 +151,14 @@ public:
|
|||
if (start_page >= NUM_PAGES_PER_REGION || end_page <= start_page) {
|
||||
return false;
|
||||
}
|
||||
std::scoped_lock lk{lock};
|
||||
|
||||
const RegionBits& bits = GetRegionBits<type>();
|
||||
RegionBits test(bits, start_page, end_page);
|
||||
return test.Any();
|
||||
}
|
||||
|
||||
LockType lock;
|
||||
|
||||
private:
|
||||
/**
|
||||
* Notify tracker about changes in the CPU tracking state of a word in the buffer
|
||||
|
@ -179,11 +184,6 @@ private:
|
|||
tracker->UpdatePageWatchersForRegion<track, is_read>(cpu_addr, mask);
|
||||
}
|
||||
|
||||
#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
|
||||
Common::AdaptiveMutex lock;
|
||||
#else
|
||||
Common::SpinLock lock;
|
||||
#endif
|
||||
PageManager* tracker;
|
||||
VAddr cpu_addr = 0;
|
||||
RegionBits cpu;
|
||||
|
|
|
@ -201,16 +201,17 @@ struct PageManager::Impl {
|
|||
RENDERER_TRACE;
|
||||
auto* memory = Core::Memory::Instance();
|
||||
auto& impl = memory->GetAddressSpace();
|
||||
// ASSERT(perms != Core::MemoryPermission::Write);
|
||||
ASSERT_MSG(perms != Core::MemoryPermission::Write,
|
||||
"Attempted to protect region as write-only which is not a valid permission");
|
||||
impl.Protect(address, size, perms);
|
||||
}
|
||||
|
||||
static bool GuestFaultSignalHandler(void* context, void* fault_address) {
|
||||
const auto addr = reinterpret_cast<VAddr>(fault_address);
|
||||
if (Common::IsWriteError(context)) {
|
||||
return rasterizer->InvalidateMemory(addr, 1);
|
||||
return rasterizer->InvalidateMemory(addr, 8);
|
||||
} else {
|
||||
return rasterizer->ReadMemory(addr, 1);
|
||||
return rasterizer->ReadMemory(addr, 8);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue