mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-25 20:06:17 +00:00
video_core: Page manager/region manager optimization (#3070)
* Bit array test * Some corrections * Fix AVX path on SetRange * Finish bitArray * Batched protect progress * Inclusion fix * Last logic fixes for BitArray * Page manager: batch protect, masked ranges * Page manager bitarray * clang-format * Fix out of bounds read * clang * clang * Lock during callbacks * Rename untracked to writeable * Construct and mask in one step * Sync on region mutex for thw whole protection This is a temporary workarround until a fix is found for the page manager having issues when multiple threads update the same page at the same time. * Bring back the gpu masking until properly handled * Sync page manager protections * clang-format * Rename and fixups * I fucked up clang-formatting one more time... * kek
This commit is contained in:
parent
e214ca6884
commit
be12305f65
10 changed files with 781 additions and 361 deletions
|
@ -48,19 +48,15 @@ struct PageManager::Impl {
|
|||
u8 AddDelta() {
|
||||
if constexpr (delta == 1) {
|
||||
return ++num_watchers;
|
||||
} else {
|
||||
} else if constexpr (delta == -1) {
|
||||
ASSERT_MSG(num_watchers > 0, "Not enough watchers");
|
||||
return --num_watchers;
|
||||
} else {
|
||||
return num_watchers;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct UpdateProtectRange {
|
||||
VAddr addr;
|
||||
u64 size;
|
||||
Core::MemoryPermission perms;
|
||||
};
|
||||
|
||||
static constexpr size_t ADDRESS_BITS = 40;
|
||||
static constexpr size_t NUM_ADDRESS_PAGES = 1ULL << (40 - PAGE_BITS);
|
||||
inline static Vulkan::Rasterizer* rasterizer;
|
||||
|
@ -190,66 +186,122 @@ struct PageManager::Impl {
|
|||
}
|
||||
|
||||
#endif
|
||||
template <s32 delta>
|
||||
template <bool track>
|
||||
void UpdatePageWatchers(VAddr addr, u64 size) {
|
||||
RENDERER_TRACE;
|
||||
boost::container::small_vector<UpdateProtectRange, 16> update_ranges;
|
||||
{
|
||||
std::scoped_lock lk(lock);
|
||||
|
||||
size_t page = addr >> PAGE_BITS;
|
||||
auto perms = cached_pages[page].Perm();
|
||||
u64 range_begin = 0;
|
||||
u64 range_bytes = 0;
|
||||
size_t page = addr >> PAGE_BITS;
|
||||
auto perms = cached_pages[page].Perm();
|
||||
u64 range_begin = 0;
|
||||
u64 range_bytes = 0;
|
||||
|
||||
const auto release_pending = [&] {
|
||||
if (range_bytes > 0) {
|
||||
RENDERER_TRACE;
|
||||
// Add pending (un)protect action
|
||||
update_ranges.push_back({range_begin << PAGE_BITS, range_bytes, perms});
|
||||
range_bytes = 0;
|
||||
}
|
||||
};
|
||||
const auto release_pending = [&] {
|
||||
if (range_bytes > 0) {
|
||||
RENDERER_TRACE;
|
||||
// Perform pending (un)protect action
|
||||
Protect(range_begin << PAGE_BITS, range_bytes, perms);
|
||||
range_bytes = 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Iterate requested pages
|
||||
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
|
||||
const u64 aligned_addr = page << PAGE_BITS;
|
||||
const u64 aligned_end = page_end << PAGE_BITS;
|
||||
ASSERT_MSG(rasterizer->IsMapped(aligned_addr, aligned_end - aligned_addr),
|
||||
"Attempted to track non-GPU memory at address {:#x}, size {:#x}.",
|
||||
aligned_addr, aligned_end - aligned_addr);
|
||||
std::scoped_lock lk(lock);
|
||||
|
||||
for (; page != page_end; ++page) {
|
||||
PageState& state = cached_pages[page];
|
||||
// Iterate requested pages
|
||||
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
|
||||
const u64 aligned_addr = page << PAGE_BITS;
|
||||
const u64 aligned_end = page_end << PAGE_BITS;
|
||||
ASSERT_MSG(rasterizer->IsMapped(aligned_addr, aligned_end - aligned_addr),
|
||||
"Attempted to track non-GPU memory at address {:#x}, size {:#x}.", aligned_addr,
|
||||
aligned_end - aligned_addr);
|
||||
|
||||
// Apply the change to the page state
|
||||
const u8 new_count = state.AddDelta<delta>();
|
||||
for (; page != page_end; ++page) {
|
||||
PageState& state = cached_pages[page];
|
||||
|
||||
// Apply the change to the page state
|
||||
const u8 new_count = state.AddDelta<track ? 1 : -1>();
|
||||
|
||||
if (auto new_perms = state.Perm(); new_perms != perms) [[unlikely]] {
|
||||
// If the protection changed add pending (un)protect action
|
||||
if (auto new_perms = state.Perm(); new_perms != perms) [[unlikely]] {
|
||||
release_pending();
|
||||
perms = new_perms;
|
||||
}
|
||||
|
||||
// If the page must be (un)protected, add it to the pending range
|
||||
if ((new_count == 0 && delta < 0) || (new_count == 1 && delta > 0)) {
|
||||
if (range_bytes == 0) {
|
||||
range_begin = page;
|
||||
}
|
||||
range_bytes += PAGE_SIZE;
|
||||
} else {
|
||||
release_pending();
|
||||
}
|
||||
release_pending();
|
||||
perms = new_perms;
|
||||
} else if (range_bytes != 0) {
|
||||
// If the protection did not change, extend the current range
|
||||
range_bytes += PAGE_SIZE;
|
||||
}
|
||||
|
||||
// Add pending (un)protect action
|
||||
release_pending();
|
||||
// Only start a new range if the page must be (un)protected
|
||||
if (range_bytes == 0 && ((new_count == 0 && !track) || (new_count == 1 && track))) {
|
||||
range_begin = page;
|
||||
range_bytes = PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
// Flush deferred protects
|
||||
for (const auto& range : update_ranges) {
|
||||
Protect(range.addr, range.size, range.perms);
|
||||
// Add pending (un)protect action
|
||||
release_pending();
|
||||
}
|
||||
|
||||
template <bool track>
|
||||
void UpdatePageWatchersForRegion(VAddr base_addr, RegionBits& mask) {
|
||||
RENDERER_TRACE;
|
||||
auto start_range = mask.FirstRange();
|
||||
auto end_range = mask.LastRange();
|
||||
|
||||
if (start_range.second == end_range.second) {
|
||||
// Optimization: if all pages are contiguous, use the regular UpdatePageWatchers
|
||||
const VAddr start_addr = base_addr + (start_range.first << PAGE_BITS);
|
||||
const u64 size = (start_range.second - start_range.first) << PAGE_BITS;
|
||||
|
||||
UpdatePageWatchers<track>(start_addr, size);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t base_page = (base_addr >> PAGE_BITS);
|
||||
auto perms = cached_pages[base_page + start_range.first].Perm();
|
||||
u64 range_begin = 0;
|
||||
u64 range_bytes = 0;
|
||||
|
||||
const auto release_pending = [&] {
|
||||
if (range_bytes > 0) {
|
||||
RENDERER_TRACE;
|
||||
// Perform pending (un)protect action
|
||||
Protect((range_begin << PAGE_BITS), range_bytes, perms);
|
||||
range_bytes = 0;
|
||||
}
|
||||
};
|
||||
|
||||
std::scoped_lock lk(lock);
|
||||
|
||||
// Iterate pages
|
||||
for (size_t page = start_range.first; page < end_range.second; ++page) {
|
||||
PageState& state = cached_pages[base_page + page];
|
||||
const bool update = mask.Get(page);
|
||||
|
||||
// Apply the change to the page state
|
||||
const u8 new_count = update ? state.AddDelta<track ? 1 : -1>() : state.AddDelta<0>();
|
||||
|
||||
if (auto new_perms = state.Perm(); new_perms != perms) [[unlikely]] {
|
||||
// If the protection changed add pending (un)protect action
|
||||
release_pending();
|
||||
perms = new_perms;
|
||||
} else if (range_bytes != 0) {
|
||||
// If the protection did not change, extend the current range
|
||||
range_bytes += PAGE_SIZE;
|
||||
}
|
||||
|
||||
// If the page is not being updated, skip it
|
||||
if (!update) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Only start a new range if the page must be (un)protected
|
||||
if (range_bytes == 0 && ((new_count == 0 && !track) || (new_count == 1 && track))) {
|
||||
range_begin = base_page + page;
|
||||
range_bytes = PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
// Add pending (un)protect action
|
||||
release_pending();
|
||||
}
|
||||
|
||||
std::array<PageState, NUM_ADDRESS_PAGES> cached_pages{};
|
||||
|
@ -273,12 +325,21 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) {
|
|||
impl->OnUnmap(address, size);
|
||||
}
|
||||
|
||||
template <s32 delta>
|
||||
template <bool track>
|
||||
void PageManager::UpdatePageWatchers(VAddr addr, u64 size) const {
|
||||
impl->UpdatePageWatchers<delta>(addr, size);
|
||||
impl->UpdatePageWatchers<track>(addr, size);
|
||||
}
|
||||
|
||||
template void PageManager::UpdatePageWatchers<1>(VAddr addr, u64 size) const;
|
||||
template void PageManager::UpdatePageWatchers<-1>(VAddr addr, u64 size) const;
|
||||
template <bool track>
|
||||
void PageManager::UpdatePageWatchersForRegion(VAddr base_addr, RegionBits& mask) const {
|
||||
impl->UpdatePageWatchersForRegion<track>(base_addr, mask);
|
||||
}
|
||||
|
||||
template void PageManager::UpdatePageWatchers<true>(VAddr addr, u64 size) const;
|
||||
template void PageManager::UpdatePageWatchers<false>(VAddr addr, u64 size) const;
|
||||
template void PageManager::UpdatePageWatchersForRegion<true>(VAddr base_addr,
|
||||
RegionBits& mask) const;
|
||||
template void PageManager::UpdatePageWatchersForRegion<false>(VAddr base_addr,
|
||||
RegionBits& mask) const;
|
||||
|
||||
} // namespace VideoCore
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue