mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-10 20:53:15 +00:00
video_core: Implement guest buffer manager (#373)
* video_core: Introduce buffer cache * video_core: Use multi level page table for caches * renderer_vulkan: Remove unused stream buffer * fix build * oops forgot optimize off
This commit is contained in:
parent
159be2c7f4
commit
381ba8c7a5
55 changed files with 2697 additions and 1039 deletions
|
@ -260,7 +260,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
|
|||
case AmdGpu::TilingMode::Display_MacroTiled:
|
||||
case AmdGpu::TilingMode::Texture_MacroTiled:
|
||||
case AmdGpu::TilingMode::Depth_MacroTiled: {
|
||||
ASSERT(!props.is_cube && !props.is_block);
|
||||
// ASSERT(!props.is_cube && !props.is_block);
|
||||
ASSERT(num_samples == 1);
|
||||
std::tie(mip_info.pitch, mip_info.size) =
|
||||
ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index);
|
||||
|
|
|
@ -61,23 +61,24 @@ vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) {
|
|||
return format;
|
||||
}
|
||||
|
||||
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept
|
||||
: is_storage{is_storage} {
|
||||
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexcept
|
||||
: is_storage{is_storage_} {
|
||||
type = ConvertImageViewType(image.GetType());
|
||||
format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
|
||||
range.base.level = image.base_level;
|
||||
range.base.layer = image.base_array;
|
||||
range.extent.levels = image.last_level + 1;
|
||||
range.extent.layers = image.last_array + 1;
|
||||
mapping.r = ConvertComponentSwizzle(image.dst_sel_x);
|
||||
mapping.g = ConvertComponentSwizzle(image.dst_sel_y);
|
||||
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);
|
||||
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
|
||||
if (!is_storage) {
|
||||
mapping.r = ConvertComponentSwizzle(image.dst_sel_x);
|
||||
mapping.g = ConvertComponentSwizzle(image.dst_sel_y);
|
||||
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);
|
||||
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
|
||||
}
|
||||
// Check for unfortunate case of storage images being swizzled
|
||||
const u32 num_comps = AmdGpu::NumComponents(image.GetDataFmt());
|
||||
const u32 dst_sel = image.DstSelect();
|
||||
if (is_storage && !IsIdentityMapping(dst_sel, num_comps)) {
|
||||
mapping = vk::ComponentMapping{};
|
||||
if (auto new_format = TrySwizzleFormat(format, dst_sel); new_format != format) {
|
||||
format = new_format;
|
||||
return;
|
||||
|
|
|
@ -3,103 +3,22 @@
|
|||
|
||||
#include <xxhash.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "core/virtual_memory.h"
|
||||
#include "video_core/page_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
#include "video_core/texture_cache/tile_manager.h"
|
||||
|
||||
#ifndef _WIN64
|
||||
#include <signal.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#define PAGE_NOACCESS PROT_NONE
|
||||
#define PAGE_READWRITE (PROT_READ | PROT_WRITE)
|
||||
#define PAGE_READONLY PROT_READ
|
||||
#else
|
||||
#include <windows.h>
|
||||
|
||||
void mprotect(void* addr, size_t len, int prot) {
|
||||
DWORD old_prot{};
|
||||
BOOL result = VirtualProtect(addr, len, prot, &old_prot);
|
||||
ASSERT_MSG(result != 0, "Region protection failed");
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
static TextureCache* g_texture_cache = nullptr;
|
||||
|
||||
#ifndef _WIN64
|
||||
void GuestFaultSignalHandler(int sig, siginfo_t* info, void* raw_context) {
|
||||
ucontext_t* ctx = reinterpret_cast<ucontext_t*>(raw_context);
|
||||
const VAddr address = reinterpret_cast<VAddr>(info->si_addr);
|
||||
|
||||
#ifdef __APPLE__
|
||||
const u32 err = ctx->uc_mcontext->__es.__err;
|
||||
#else
|
||||
const greg_t err = ctx->uc_mcontext.gregs[REG_ERR];
|
||||
#endif
|
||||
|
||||
if (err & 0x2) {
|
||||
g_texture_cache->OnCpuWrite(address);
|
||||
} else {
|
||||
// Read not supported!
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
#else
|
||||
LONG WINAPI GuestFaultSignalHandler(EXCEPTION_POINTERS* pExp) noexcept {
|
||||
const u32 ec = pExp->ExceptionRecord->ExceptionCode;
|
||||
if (ec == EXCEPTION_ACCESS_VIOLATION) {
|
||||
const auto info = pExp->ExceptionRecord->ExceptionInformation;
|
||||
if (info[0] == 1) { // Write violation
|
||||
g_texture_cache->OnCpuWrite(info[1]);
|
||||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
} /* else {
|
||||
UNREACHABLE();
|
||||
}*/
|
||||
}
|
||||
return EXCEPTION_CONTINUE_SEARCH; // pass further
|
||||
}
|
||||
#endif
|
||||
|
||||
static constexpr u64 StreamBufferSize = 512_MB;
|
||||
static constexpr u64 PageShift = 12;
|
||||
|
||||
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_)
|
||||
: instance{instance_}, scheduler{scheduler_},
|
||||
staging{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, StreamBufferSize,
|
||||
Vulkan::BufferType::Upload},
|
||||
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
BufferCache& buffer_cache_, PageManager& tracker_)
|
||||
: instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_},
|
||||
staging{instance, scheduler, MemoryUsage::Upload, StreamBufferSize},
|
||||
tile_manager{instance, scheduler} {
|
||||
|
||||
#ifndef _WIN64
|
||||
#ifdef __APPLE__
|
||||
// Read-only memory write results in SIGBUS on Apple.
|
||||
static constexpr int SignalType = SIGBUS;
|
||||
#else
|
||||
static constexpr int SignalType = SIGSEGV;
|
||||
#endif
|
||||
|
||||
sigset_t signal_mask;
|
||||
sigemptyset(&signal_mask);
|
||||
sigaddset(&signal_mask, SignalType);
|
||||
|
||||
using HandlerType = decltype(sigaction::sa_sigaction);
|
||||
|
||||
struct sigaction guest_access_fault {};
|
||||
guest_access_fault.sa_flags = SA_SIGINFO | SA_ONSTACK;
|
||||
guest_access_fault.sa_sigaction = &GuestFaultSignalHandler;
|
||||
guest_access_fault.sa_mask = signal_mask;
|
||||
sigaction(SignalType, &guest_access_fault, nullptr);
|
||||
#else
|
||||
veh_handle = AddVectoredExceptionHandler(0, GuestFaultSignalHandler);
|
||||
ASSERT_MSG(veh_handle, "Failed to register an exception handler");
|
||||
#endif
|
||||
g_texture_cache = this;
|
||||
|
||||
ImageInfo info;
|
||||
info.pixel_format = vk::Format::eR8G8B8A8Unorm;
|
||||
info.type = vk::ImageType::e2D;
|
||||
|
@ -110,15 +29,11 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
|
|||
void(slot_image_views.insert(instance, view_info, slot_images[null_id], null_id));
|
||||
}
|
||||
|
||||
TextureCache::~TextureCache() {
|
||||
#if _WIN64
|
||||
RemoveVectoredExceptionHandler(veh_handle);
|
||||
#endif
|
||||
}
|
||||
TextureCache::~TextureCache() = default;
|
||||
|
||||
void TextureCache::OnCpuWrite(VAddr address) {
|
||||
std::unique_lock lock{m_page_table};
|
||||
ForEachImageInRegion(address, 1 << PageShift, [&](ImageId image_id, Image& image) {
|
||||
void TextureCache::InvalidateMemory(VAddr address, size_t size) {
|
||||
std::unique_lock lock{mutex};
|
||||
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
|
||||
// Ensure image is reuploaded when accessed again.
|
||||
image.flags |= ImageFlagBits::CpuModified;
|
||||
// Untrack image, so the range is unprotected and the guest can write freely.
|
||||
|
@ -126,8 +41,28 @@ void TextureCache::OnCpuWrite(VAddr address) {
|
|||
});
|
||||
}
|
||||
|
||||
void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
|
||||
std::scoped_lock lk{mutex};
|
||||
|
||||
boost::container::small_vector<ImageId, 16> deleted_images;
|
||||
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
|
||||
for (const ImageId id : deleted_images) {
|
||||
Image& image = slot_images[id];
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(image, id);
|
||||
}
|
||||
// TODO: Download image data back to host.
|
||||
UnregisterImage(id);
|
||||
DeleteImage(id);
|
||||
}
|
||||
}
|
||||
|
||||
ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) {
|
||||
std::unique_lock lock{m_page_table};
|
||||
if (info.guest_address == 0) [[unlikely]] {
|
||||
return NULL_IMAGE_VIEW_ID;
|
||||
}
|
||||
|
||||
std::unique_lock lock{mutex};
|
||||
boost::container::small_vector<ImageId, 2> image_ids;
|
||||
ForEachImageInRegion(
|
||||
info.guest_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
|
||||
|
@ -183,10 +118,6 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
|
|||
}
|
||||
|
||||
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
|
||||
if (info.guest_address == 0) [[unlikely]] {
|
||||
return slot_image_views[NULL_IMAGE_VIEW_ID];
|
||||
}
|
||||
|
||||
const ImageId image_id = FindImage(info);
|
||||
Image& image = slot_images[image_id];
|
||||
auto& usage = image.info.usage;
|
||||
|
@ -310,10 +241,7 @@ void TextureCache::RefreshImage(Image& image) {
|
|||
buffer = *upload_buffer;
|
||||
} else {
|
||||
// Upload data to the staging buffer.
|
||||
const auto [data, offset_, _] = staging.Map(image.info.guest_size_bytes, 16);
|
||||
std::memcpy(data, (void*)image.info.guest_address, image.info.guest_size_bytes);
|
||||
staging.Commit(image.info.guest_size_bytes);
|
||||
offset = offset_;
|
||||
offset = staging.Copy(image.info.guest_address, image.info.guest_size_bytes, 16);
|
||||
}
|
||||
|
||||
const auto& num_layers = image.info.resources.layers;
|
||||
|
@ -344,9 +272,6 @@ void TextureCache::RefreshImage(Image& image) {
|
|||
}
|
||||
|
||||
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
|
||||
image.Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits::eMemoryWrite | vk::AccessFlagBits::eMemoryRead);
|
||||
}
|
||||
|
||||
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
|
||||
|
@ -362,8 +287,6 @@ void TextureCache::RegisterImage(ImageId image_id) {
|
|||
image.flags |= ImageFlagBits::Registered;
|
||||
ForEachPage(image.cpu_addr, image.info.guest_size_bytes,
|
||||
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
|
||||
|
||||
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone);
|
||||
}
|
||||
|
||||
void TextureCache::UnregisterImage(ImageId image_id) {
|
||||
|
@ -373,11 +296,11 @@ void TextureCache::UnregisterImage(ImageId image_id) {
|
|||
image.flags &= ~ImageFlagBits::Registered;
|
||||
ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) {
|
||||
const auto page_it = page_table.find(page);
|
||||
if (page_it == page_table.end()) {
|
||||
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift);
|
||||
if (page_it == nullptr) {
|
||||
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PageShift);
|
||||
return;
|
||||
}
|
||||
auto& image_ids = page_it.value();
|
||||
auto& image_ids = *page_it;
|
||||
const auto vector_it = std::ranges::find(image_ids, image_id);
|
||||
if (vector_it == image_ids.end()) {
|
||||
ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift);
|
||||
|
@ -393,7 +316,7 @@ void TextureCache::TrackImage(Image& image, ImageId image_id) {
|
|||
return;
|
||||
}
|
||||
image.flags |= ImageFlagBits::Tracked;
|
||||
UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, 1);
|
||||
tracker.UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, 1);
|
||||
}
|
||||
|
||||
void TextureCache::UntrackImage(Image& image, ImageId image_id) {
|
||||
|
@ -401,40 +324,34 @@ void TextureCache::UntrackImage(Image& image, ImageId image_id) {
|
|||
return;
|
||||
}
|
||||
image.flags &= ~ImageFlagBits::Tracked;
|
||||
UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, -1);
|
||||
tracker.UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, -1);
|
||||
}
|
||||
|
||||
void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
|
||||
std::scoped_lock lk{mutex};
|
||||
const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1;
|
||||
const u64 page_start = addr >> PageShift;
|
||||
const u64 page_end = page_start + num_pages;
|
||||
void TextureCache::DeleteImage(ImageId image_id) {
|
||||
Image& image = slot_images[image_id];
|
||||
ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
|
||||
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
|
||||
|
||||
const auto pages_interval =
|
||||
decltype(cached_pages)::interval_type::right_open(page_start, page_end);
|
||||
if (delta > 0) {
|
||||
cached_pages.add({pages_interval, delta});
|
||||
// Remove any registered meta areas.
|
||||
const auto& meta_info = image.info.meta_info;
|
||||
if (meta_info.cmask_addr) {
|
||||
surface_metas.erase(meta_info.cmask_addr);
|
||||
}
|
||||
if (meta_info.fmask_addr) {
|
||||
surface_metas.erase(meta_info.fmask_addr);
|
||||
}
|
||||
if (meta_info.htile_addr) {
|
||||
surface_metas.erase(meta_info.htile_addr);
|
||||
}
|
||||
|
||||
const auto& range = cached_pages.equal_range(pages_interval);
|
||||
for (const auto& [range, count] : boost::make_iterator_range(range)) {
|
||||
const auto interval = range & pages_interval;
|
||||
const VAddr interval_start_addr = boost::icl::first(interval) << PageShift;
|
||||
const VAddr interval_end_addr = boost::icl::last_next(interval) << PageShift;
|
||||
const u32 interval_size = interval_end_addr - interval_start_addr;
|
||||
void* addr = reinterpret_cast<void*>(interval_start_addr);
|
||||
if (delta > 0 && count == delta) {
|
||||
mprotect(addr, interval_size, PAGE_READONLY);
|
||||
} else if (delta < 0 && count == -delta) {
|
||||
mprotect(addr, interval_size, PAGE_READWRITE);
|
||||
} else {
|
||||
ASSERT(count >= 0);
|
||||
// Reclaim image and any image views it references.
|
||||
scheduler.DeferOperation([this, image_id] {
|
||||
Image& image = slot_images[image_id];
|
||||
for (const ImageViewId image_view_id : image.image_view_ids) {
|
||||
slot_image_views.erase(image_view_id);
|
||||
}
|
||||
}
|
||||
|
||||
if (delta < 0) {
|
||||
cached_pages.add({pages_interval, delta});
|
||||
}
|
||||
slot_images.erase(image_id);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -4,12 +4,11 @@
|
|||
#pragma once
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include <tsl/robin_map.h>
|
||||
|
||||
#include "common/slot_vector.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/multi_level_page_table.h"
|
||||
#include "video_core/texture_cache/image.h"
|
||||
#include "video_core/texture_cache/image_view.h"
|
||||
#include "video_core/texture_cache/sampler.h"
|
||||
|
@ -21,31 +20,28 @@ struct BufferAttributeGroup;
|
|||
|
||||
namespace VideoCore {
|
||||
|
||||
class BufferCache;
|
||||
class PageManager;
|
||||
|
||||
class TextureCache {
|
||||
// This is the page shift for adding images into the hash map. It isn't related to
|
||||
// the page size of the guest or the host and is chosen for convenience. A number too
|
||||
// small will increase the number of hash map lookups per image, while too large will
|
||||
// increase the number of images per page.
|
||||
static constexpr u64 PageBits = 20;
|
||||
static constexpr u64 PageMask = (1ULL << PageBits) - 1;
|
||||
|
||||
struct MetaDataInfo {
|
||||
enum class Type {
|
||||
CMask,
|
||||
FMask,
|
||||
HTile,
|
||||
};
|
||||
|
||||
Type type;
|
||||
bool is_cleared;
|
||||
struct Traits {
|
||||
using Entry = boost::container::small_vector<ImageId, 16>;
|
||||
static constexpr size_t AddressSpaceBits = 39;
|
||||
static constexpr size_t FirstLevelBits = 9;
|
||||
static constexpr size_t PageBits = 22;
|
||||
};
|
||||
using PageTable = MultiLevelPageTable<Traits>;
|
||||
|
||||
public:
|
||||
explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
|
||||
explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||
BufferCache& buffer_cache, PageManager& tracker);
|
||||
~TextureCache();
|
||||
|
||||
/// Invalidates any image in the logical page range.
|
||||
void OnCpuWrite(VAddr address);
|
||||
void InvalidateMemory(VAddr address, size_t size);
|
||||
|
||||
/// Evicts any images that overlap the unmapped range.
|
||||
void UnmapMemory(VAddr cpu_addr, size_t size);
|
||||
|
||||
/// Retrieves the image handle of the image with the provided attributes.
|
||||
[[nodiscard]] ImageId FindImage(const ImageInfo& info, bool refresh_on_create = true);
|
||||
|
@ -101,8 +97,8 @@ private:
|
|||
template <typename Func>
|
||||
static void ForEachPage(PAddr addr, size_t size, Func&& func) {
|
||||
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
|
||||
const u64 page_end = (addr + size - 1) >> PageBits;
|
||||
for (u64 page = addr >> PageBits; page <= page_end; ++page) {
|
||||
const u64 page_end = (addr + size - 1) >> Traits::PageBits;
|
||||
for (u64 page = addr >> Traits::PageBits; page <= page_end; ++page) {
|
||||
if constexpr (RETURNS_BOOL) {
|
||||
if (func(page)) {
|
||||
break;
|
||||
|
@ -120,14 +116,14 @@ private:
|
|||
boost::container::small_vector<ImageId, 32> images;
|
||||
ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
|
||||
const auto it = page_table.find(page);
|
||||
if (it == page_table.end()) {
|
||||
if (it == nullptr) {
|
||||
if constexpr (BOOL_BREAK) {
|
||||
return false;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (const ImageId image_id : it->second) {
|
||||
for (const ImageId image_id : *it) {
|
||||
Image& image = slot_images[image_id];
|
||||
if (image.flags & ImageFlagBits::Picked) {
|
||||
continue;
|
||||
|
@ -166,25 +162,32 @@ private:
|
|||
/// Stop tracking CPU reads and writes for image
|
||||
void UntrackImage(Image& image, ImageId image_id);
|
||||
|
||||
/// Increase/decrease the number of surface in pages touching the specified region
|
||||
void UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta);
|
||||
/// Removes the image and any views/surface metas that reference it.
|
||||
void DeleteImage(ImageId image_id);
|
||||
|
||||
private:
|
||||
const Vulkan::Instance& instance;
|
||||
Vulkan::Scheduler& scheduler;
|
||||
Vulkan::StreamBuffer staging;
|
||||
BufferCache& buffer_cache;
|
||||
PageManager& tracker;
|
||||
StreamBuffer staging;
|
||||
TileManager tile_manager;
|
||||
Common::SlotVector<Image> slot_images;
|
||||
Common::SlotVector<ImageView> slot_image_views;
|
||||
tsl::robin_map<u64, Sampler> samplers;
|
||||
tsl::robin_pg_map<u64, std::vector<ImageId>> page_table;
|
||||
boost::icl::interval_map<VAddr, s32> cached_pages;
|
||||
tsl::robin_map<VAddr, MetaDataInfo> surface_metas;
|
||||
PageTable page_table;
|
||||
std::mutex mutex;
|
||||
#ifdef _WIN64
|
||||
void* veh_handle{};
|
||||
#endif
|
||||
std::mutex m_page_table;
|
||||
|
||||
struct MetaDataInfo {
|
||||
enum class Type {
|
||||
CMask,
|
||||
FMask,
|
||||
HTile,
|
||||
};
|
||||
Type type;
|
||||
bool is_cleared;
|
||||
};
|
||||
tsl::robin_map<VAddr, MetaDataInfo> surface_metas;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -183,10 +183,12 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
|||
case vk::Format::eB8G8R8A8Srgb:
|
||||
case vk::Format::eB8G8R8A8Unorm:
|
||||
case vk::Format::eR8G8B8A8Unorm:
|
||||
case vk::Format::eR8G8B8A8Uint:
|
||||
case vk::Format::eR32Sfloat:
|
||||
case vk::Format::eR32Uint:
|
||||
case vk::Format::eR16G16Sfloat:
|
||||
return vk::Format::eR32Uint;
|
||||
case vk::Format::eBc1RgbaSrgbBlock:
|
||||
case vk::Format::eBc1RgbaUnormBlock:
|
||||
case vk::Format::eBc4UnormBlock:
|
||||
case vk::Format::eR32G32Sfloat:
|
||||
|
@ -200,11 +202,20 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
|||
case vk::Format::eBc5UnormBlock:
|
||||
case vk::Format::eBc7SrgbBlock:
|
||||
case vk::Format::eBc7UnormBlock:
|
||||
case vk::Format::eBc6HUfloatBlock:
|
||||
case vk::Format::eR32G32B32A32Sfloat:
|
||||
return vk::Format::eR32G32B32A32Uint;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
|
||||
|
||||
// Log missing formats only once to avoid spamming the log.
|
||||
static constexpr size_t MaxFormatIndex = 256;
|
||||
static std::array<bool, MaxFormatIndex> logged_formats{};
|
||||
if (const u32 index = u32(format); !logged_formats[index]) {
|
||||
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
|
||||
logged_formats[index] = true;
|
||||
}
|
||||
return format;
|
||||
}
|
||||
|
||||
|
@ -236,8 +247,11 @@ struct DetilerParams {
|
|||
u32 sizes[14];
|
||||
};
|
||||
|
||||
static constexpr size_t StreamBufferSize = 128_MB;
|
||||
|
||||
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
|
||||
: instance{instance}, scheduler{scheduler} {
|
||||
: instance{instance}, scheduler{scheduler},
|
||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, StreamBufferSize} {
|
||||
static const std::array detiler_shaders{
|
||||
HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP,
|
||||
HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP,
|
||||
|
@ -336,8 +350,7 @@ TileManager::ScratchBuffer TileManager::AllocBuffer(u32 size, bool is_storage /*
|
|||
.flags = !is_storage ? VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT |
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT
|
||||
: static_cast<VmaAllocationCreateFlags>(0),
|
||||
.usage = is_large_buffer ? VMA_MEMORY_USAGE_AUTO_PREFER_HOST
|
||||
: VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
|
||||
.requiredFlags = !is_storage ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
||||
: static_cast<VkMemoryPropertyFlags>(0),
|
||||
};
|
||||
|
@ -373,37 +386,46 @@ std::optional<vk::Buffer> TileManager::TryDetile(Image& image) {
|
|||
|
||||
const auto* detiler = GetDetiler(image);
|
||||
if (!detiler) {
|
||||
LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})",
|
||||
vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode));
|
||||
if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled) {
|
||||
LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})",
|
||||
vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode));
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Prepare input buffer
|
||||
auto in_buffer = AllocBuffer(image.info.guest_size_bytes);
|
||||
Upload(in_buffer, reinterpret_cast<const void*>(image.info.guest_address),
|
||||
image.info.guest_size_bytes);
|
||||
const u32 image_size = image.info.guest_size_bytes;
|
||||
const auto [in_buffer, in_offset] = [&] -> std::pair<vk::Buffer, u32> {
|
||||
// Use stream buffer for smaller textures.
|
||||
if (image_size <= StreamBufferSize) {
|
||||
u32 offset = stream_buffer.Copy(image.info.guest_address, image_size);
|
||||
return {stream_buffer.Handle(), offset};
|
||||
}
|
||||
// Request temporary host buffer for larger sizes.
|
||||
auto in_buffer = AllocBuffer(image_size);
|
||||
const auto addr = reinterpret_cast<const void*>(image.info.guest_address);
|
||||
Upload(in_buffer, addr, image_size);
|
||||
scheduler.DeferOperation([=, this]() { FreeBuffer(in_buffer); });
|
||||
return {in_buffer.first, 0};
|
||||
}();
|
||||
|
||||
// Prepare output buffer
|
||||
auto out_buffer = AllocBuffer(image.info.guest_size_bytes, true);
|
||||
|
||||
scheduler.DeferOperation([=, this]() {
|
||||
FreeBuffer(in_buffer);
|
||||
FreeBuffer(out_buffer);
|
||||
});
|
||||
auto out_buffer = AllocBuffer(image_size, true);
|
||||
scheduler.DeferOperation([=, this]() { FreeBuffer(out_buffer); });
|
||||
|
||||
auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *detiler->pl);
|
||||
|
||||
const vk::DescriptorBufferInfo input_buffer_info{
|
||||
.buffer = in_buffer.first,
|
||||
.offset = 0,
|
||||
.range = image.info.guest_size_bytes,
|
||||
.buffer = in_buffer,
|
||||
.offset = in_offset,
|
||||
.range = image_size,
|
||||
};
|
||||
|
||||
const vk::DescriptorBufferInfo output_buffer_info{
|
||||
.buffer = out_buffer.first,
|
||||
.offset = 0,
|
||||
.range = image.info.guest_size_bytes,
|
||||
.range = image_size,
|
||||
};
|
||||
|
||||
std::vector<vk::WriteDescriptorSet> set_writes{
|
||||
|
@ -442,16 +464,16 @@ std::optional<vk::Buffer> TileManager::TryDetile(Image& image) {
|
|||
cmdbuf.pushConstants(*detiler->pl_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(params),
|
||||
¶ms);
|
||||
|
||||
ASSERT((image.info.guest_size_bytes % 64) == 0);
|
||||
ASSERT((image_size % 64) == 0);
|
||||
const auto bpp = image.info.num_bits * (image.info.props.is_block ? 16u : 1u);
|
||||
const auto num_tiles = image.info.guest_size_bytes / (64 * (bpp / 8));
|
||||
const auto num_tiles = image_size / (64 * (bpp / 8));
|
||||
cmdbuf.dispatch(num_tiles, 1, 1);
|
||||
|
||||
const vk::BufferMemoryBarrier post_barrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
|
||||
.buffer = out_buffer.first,
|
||||
.size = image.info.guest_size_bytes,
|
||||
.size = image_size,
|
||||
};
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/buffer_cache/buffer.h"
|
||||
#include "video_core/texture_cache/image.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
@ -34,7 +34,7 @@ struct DetilerContext {
|
|||
|
||||
class TileManager {
|
||||
public:
|
||||
using ScratchBuffer = std::pair<VkBuffer, VmaAllocation>;
|
||||
using ScratchBuffer = std::pair<vk::Buffer, VmaAllocation>;
|
||||
|
||||
TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
|
||||
~TileManager();
|
||||
|
@ -51,6 +51,7 @@ private:
|
|||
private:
|
||||
const Vulkan::Instance& instance;
|
||||
Vulkan::Scheduler& scheduler;
|
||||
StreamBuffer stream_buffer;
|
||||
std::array<DetilerContext, DetilerType::Max> detilers;
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue