video_core: Implement guest buffer manager (#373)

* video_core: Introduce buffer cache

* video_core: Use multi level page table for caches

* renderer_vulkan: Remove unused stream buffer

* fix build

* oops forgot optimize off
This commit is contained in:
TheTurtle 2024-08-08 15:02:10 +03:00 committed by GitHub
parent 159be2c7f4
commit 381ba8c7a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 2697 additions and 1039 deletions

View file

@ -260,7 +260,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
case AmdGpu::TilingMode::Display_MacroTiled:
case AmdGpu::TilingMode::Texture_MacroTiled:
case AmdGpu::TilingMode::Depth_MacroTiled: {
ASSERT(!props.is_cube && !props.is_block);
// ASSERT(!props.is_cube && !props.is_block);
ASSERT(num_samples == 1);
std::tie(mip_info.pitch, mip_info.size) =
ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index);

View file

@ -61,23 +61,24 @@ vk::Format TrySwizzleFormat(vk::Format format, u32 dst_sel) {
return format;
}
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept
: is_storage{is_storage} {
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage_) noexcept
: is_storage{is_storage_} {
type = ConvertImageViewType(image.GetType());
format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
range.base.level = image.base_level;
range.base.layer = image.base_array;
range.extent.levels = image.last_level + 1;
range.extent.layers = image.last_array + 1;
mapping.r = ConvertComponentSwizzle(image.dst_sel_x);
mapping.g = ConvertComponentSwizzle(image.dst_sel_y);
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
if (!is_storage) {
mapping.r = ConvertComponentSwizzle(image.dst_sel_x);
mapping.g = ConvertComponentSwizzle(image.dst_sel_y);
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
}
// Check for unfortunate case of storage images being swizzled
const u32 num_comps = AmdGpu::NumComponents(image.GetDataFmt());
const u32 dst_sel = image.DstSelect();
if (is_storage && !IsIdentityMapping(dst_sel, num_comps)) {
mapping = vk::ComponentMapping{};
if (auto new_format = TrySwizzleFormat(format, dst_sel); new_format != format) {
format = new_format;
return;

View file

@ -3,103 +3,22 @@
#include <xxhash.h>
#include "common/assert.h"
#include "common/config.h"
#include "core/virtual_memory.h"
#include "video_core/page_manager.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/tile_manager.h"
#ifndef _WIN64
#include <signal.h>
#include <sys/mman.h>
#define PAGE_NOACCESS PROT_NONE
#define PAGE_READWRITE (PROT_READ | PROT_WRITE)
#define PAGE_READONLY PROT_READ
#else
#include <windows.h>
void mprotect(void* addr, size_t len, int prot) {
DWORD old_prot{};
BOOL result = VirtualProtect(addr, len, prot, &old_prot);
ASSERT_MSG(result != 0, "Region protection failed");
}
#endif
namespace VideoCore {
static TextureCache* g_texture_cache = nullptr;
#ifndef _WIN64
void GuestFaultSignalHandler(int sig, siginfo_t* info, void* raw_context) {
ucontext_t* ctx = reinterpret_cast<ucontext_t*>(raw_context);
const VAddr address = reinterpret_cast<VAddr>(info->si_addr);
#ifdef __APPLE__
const u32 err = ctx->uc_mcontext->__es.__err;
#else
const greg_t err = ctx->uc_mcontext.gregs[REG_ERR];
#endif
if (err & 0x2) {
g_texture_cache->OnCpuWrite(address);
} else {
// Read not supported!
UNREACHABLE();
}
}
#else
LONG WINAPI GuestFaultSignalHandler(EXCEPTION_POINTERS* pExp) noexcept {
const u32 ec = pExp->ExceptionRecord->ExceptionCode;
if (ec == EXCEPTION_ACCESS_VIOLATION) {
const auto info = pExp->ExceptionRecord->ExceptionInformation;
if (info[0] == 1) { // Write violation
g_texture_cache->OnCpuWrite(info[1]);
return EXCEPTION_CONTINUE_EXECUTION;
} /* else {
UNREACHABLE();
}*/
}
return EXCEPTION_CONTINUE_SEARCH; // pass further
}
#endif
static constexpr u64 StreamBufferSize = 512_MB;
static constexpr u64 PageShift = 12;
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_)
: instance{instance_}, scheduler{scheduler_},
staging{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, StreamBufferSize,
Vulkan::BufferType::Upload},
TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
BufferCache& buffer_cache_, PageManager& tracker_)
: instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_},
staging{instance, scheduler, MemoryUsage::Upload, StreamBufferSize},
tile_manager{instance, scheduler} {
#ifndef _WIN64
#ifdef __APPLE__
// Read-only memory write results in SIGBUS on Apple.
static constexpr int SignalType = SIGBUS;
#else
static constexpr int SignalType = SIGSEGV;
#endif
sigset_t signal_mask;
sigemptyset(&signal_mask);
sigaddset(&signal_mask, SignalType);
using HandlerType = decltype(sigaction::sa_sigaction);
struct sigaction guest_access_fault {};
guest_access_fault.sa_flags = SA_SIGINFO | SA_ONSTACK;
guest_access_fault.sa_sigaction = &GuestFaultSignalHandler;
guest_access_fault.sa_mask = signal_mask;
sigaction(SignalType, &guest_access_fault, nullptr);
#else
veh_handle = AddVectoredExceptionHandler(0, GuestFaultSignalHandler);
ASSERT_MSG(veh_handle, "Failed to register an exception handler");
#endif
g_texture_cache = this;
ImageInfo info;
info.pixel_format = vk::Format::eR8G8B8A8Unorm;
info.type = vk::ImageType::e2D;
@ -110,15 +29,11 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
void(slot_image_views.insert(instance, view_info, slot_images[null_id], null_id));
}
TextureCache::~TextureCache() {
#if _WIN64
RemoveVectoredExceptionHandler(veh_handle);
#endif
}
TextureCache::~TextureCache() = default;
void TextureCache::OnCpuWrite(VAddr address) {
std::unique_lock lock{m_page_table};
ForEachImageInRegion(address, 1 << PageShift, [&](ImageId image_id, Image& image) {
void TextureCache::InvalidateMemory(VAddr address, size_t size) {
std::unique_lock lock{mutex};
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
// Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified;
// Untrack image, so the range is unprotected and the guest can write freely.
@ -126,8 +41,28 @@ void TextureCache::OnCpuWrite(VAddr address) {
});
}
void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
std::scoped_lock lk{mutex};
boost::container::small_vector<ImageId, 16> deleted_images;
ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
for (const ImageId id : deleted_images) {
Image& image = slot_images[id];
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, id);
}
// TODO: Download image data back to host.
UnregisterImage(id);
DeleteImage(id);
}
}
ImageId TextureCache::FindImage(const ImageInfo& info, bool refresh_on_create) {
std::unique_lock lock{m_page_table};
if (info.guest_address == 0) [[unlikely]] {
return NULL_IMAGE_VIEW_ID;
}
std::unique_lock lock{mutex};
boost::container::small_vector<ImageId, 2> image_ids;
ForEachImageInRegion(
info.guest_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
@ -183,10 +118,6 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
}
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
if (info.guest_address == 0) [[unlikely]] {
return slot_image_views[NULL_IMAGE_VIEW_ID];
}
const ImageId image_id = FindImage(info);
Image& image = slot_images[image_id];
auto& usage = image.info.usage;
@ -310,10 +241,7 @@ void TextureCache::RefreshImage(Image& image) {
buffer = *upload_buffer;
} else {
// Upload data to the staging buffer.
const auto [data, offset_, _] = staging.Map(image.info.guest_size_bytes, 16);
std::memcpy(data, (void*)image.info.guest_address, image.info.guest_size_bytes);
staging.Commit(image.info.guest_size_bytes);
offset = offset_;
offset = staging.Copy(image.info.guest_address, image.info.guest_size_bytes, 16);
}
const auto& num_layers = image.info.resources.layers;
@ -344,9 +272,6 @@ void TextureCache::RefreshImage(Image& image) {
}
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
image.Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits::eMemoryWrite | vk::AccessFlagBits::eMemoryRead);
}
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
@ -362,8 +287,6 @@ void TextureCache::RegisterImage(ImageId image_id) {
image.flags |= ImageFlagBits::Registered;
ForEachPage(image.cpu_addr, image.info.guest_size_bytes,
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
image.Transit(vk::ImageLayout::eGeneral, vk::AccessFlagBits::eNone);
}
void TextureCache::UnregisterImage(ImageId image_id) {
@ -373,11 +296,11 @@ void TextureCache::UnregisterImage(ImageId image_id) {
image.flags &= ~ImageFlagBits::Registered;
ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) {
const auto page_it = page_table.find(page);
if (page_it == page_table.end()) {
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift);
if (page_it == nullptr) {
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PageShift);
return;
}
auto& image_ids = page_it.value();
auto& image_ids = *page_it;
const auto vector_it = std::ranges::find(image_ids, image_id);
if (vector_it == image_ids.end()) {
ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift);
@ -393,7 +316,7 @@ void TextureCache::TrackImage(Image& image, ImageId image_id) {
return;
}
image.flags |= ImageFlagBits::Tracked;
UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, 1);
tracker.UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, 1);
}
void TextureCache::UntrackImage(Image& image, ImageId image_id) {
@ -401,40 +324,34 @@ void TextureCache::UntrackImage(Image& image, ImageId image_id) {
return;
}
image.flags &= ~ImageFlagBits::Tracked;
UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, -1);
tracker.UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, -1);
}
void TextureCache::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
std::scoped_lock lk{mutex};
const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1;
const u64 page_start = addr >> PageShift;
const u64 page_end = page_start + num_pages;
void TextureCache::DeleteImage(ImageId image_id) {
Image& image = slot_images[image_id];
ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
const auto pages_interval =
decltype(cached_pages)::interval_type::right_open(page_start, page_end);
if (delta > 0) {
cached_pages.add({pages_interval, delta});
// Remove any registered meta areas.
const auto& meta_info = image.info.meta_info;
if (meta_info.cmask_addr) {
surface_metas.erase(meta_info.cmask_addr);
}
if (meta_info.fmask_addr) {
surface_metas.erase(meta_info.fmask_addr);
}
if (meta_info.htile_addr) {
surface_metas.erase(meta_info.htile_addr);
}
const auto& range = cached_pages.equal_range(pages_interval);
for (const auto& [range, count] : boost::make_iterator_range(range)) {
const auto interval = range & pages_interval;
const VAddr interval_start_addr = boost::icl::first(interval) << PageShift;
const VAddr interval_end_addr = boost::icl::last_next(interval) << PageShift;
const u32 interval_size = interval_end_addr - interval_start_addr;
void* addr = reinterpret_cast<void*>(interval_start_addr);
if (delta > 0 && count == delta) {
mprotect(addr, interval_size, PAGE_READONLY);
} else if (delta < 0 && count == -delta) {
mprotect(addr, interval_size, PAGE_READWRITE);
} else {
ASSERT(count >= 0);
// Reclaim image and any image views it references.
scheduler.DeferOperation([this, image_id] {
Image& image = slot_images[image_id];
for (const ImageViewId image_view_id : image.image_view_ids) {
slot_image_views.erase(image_view_id);
}
}
if (delta < 0) {
cached_pages.add({pages_interval, delta});
}
slot_images.erase(image_id);
});
}
} // namespace VideoCore

View file

@ -4,12 +4,11 @@
#pragma once
#include <boost/container/small_vector.hpp>
#include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h>
#include "common/slot_vector.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/multi_level_page_table.h"
#include "video_core/texture_cache/image.h"
#include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/sampler.h"
@ -21,31 +20,28 @@ struct BufferAttributeGroup;
namespace VideoCore {
class BufferCache;
class PageManager;
class TextureCache {
// This is the page shift for adding images into the hash map. It isn't related to
// the page size of the guest or the host and is chosen for convenience. A number too
// small will increase the number of hash map lookups per image, while too large will
// increase the number of images per page.
static constexpr u64 PageBits = 20;
static constexpr u64 PageMask = (1ULL << PageBits) - 1;
struct MetaDataInfo {
enum class Type {
CMask,
FMask,
HTile,
};
Type type;
bool is_cleared;
struct Traits {
using Entry = boost::container::small_vector<ImageId, 16>;
static constexpr size_t AddressSpaceBits = 39;
static constexpr size_t FirstLevelBits = 9;
static constexpr size_t PageBits = 22;
};
using PageTable = MultiLevelPageTable<Traits>;
public:
explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
BufferCache& buffer_cache, PageManager& tracker);
~TextureCache();
/// Invalidates any image in the logical page range.
void OnCpuWrite(VAddr address);
void InvalidateMemory(VAddr address, size_t size);
/// Evicts any images that overlap the unmapped range.
void UnmapMemory(VAddr cpu_addr, size_t size);
/// Retrieves the image handle of the image with the provided attributes.
[[nodiscard]] ImageId FindImage(const ImageInfo& info, bool refresh_on_create = true);
@ -101,8 +97,8 @@ private:
template <typename Func>
static void ForEachPage(PAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PageBits;
for (u64 page = addr >> PageBits; page <= page_end; ++page) {
const u64 page_end = (addr + size - 1) >> Traits::PageBits;
for (u64 page = addr >> Traits::PageBits; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
@ -120,14 +116,14 @@ private:
boost::container::small_vector<ImageId, 32> images;
ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
const auto it = page_table.find(page);
if (it == page_table.end()) {
if (it == nullptr) {
if constexpr (BOOL_BREAK) {
return false;
} else {
return;
}
}
for (const ImageId image_id : it->second) {
for (const ImageId image_id : *it) {
Image& image = slot_images[image_id];
if (image.flags & ImageFlagBits::Picked) {
continue;
@ -166,25 +162,32 @@ private:
/// Stop tracking CPU reads and writes for image
void UntrackImage(Image& image, ImageId image_id);
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta);
/// Removes the image and any views/surface metas that reference it.
void DeleteImage(ImageId image_id);
private:
const Vulkan::Instance& instance;
Vulkan::Scheduler& scheduler;
Vulkan::StreamBuffer staging;
BufferCache& buffer_cache;
PageManager& tracker;
StreamBuffer staging;
TileManager tile_manager;
Common::SlotVector<Image> slot_images;
Common::SlotVector<ImageView> slot_image_views;
tsl::robin_map<u64, Sampler> samplers;
tsl::robin_pg_map<u64, std::vector<ImageId>> page_table;
boost::icl::interval_map<VAddr, s32> cached_pages;
tsl::robin_map<VAddr, MetaDataInfo> surface_metas;
PageTable page_table;
std::mutex mutex;
#ifdef _WIN64
void* veh_handle{};
#endif
std::mutex m_page_table;
struct MetaDataInfo {
enum class Type {
CMask,
FMask,
HTile,
};
Type type;
bool is_cleared;
};
tsl::robin_map<VAddr, MetaDataInfo> surface_metas;
};
} // namespace VideoCore

View file

@ -183,10 +183,12 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eB8G8R8A8Srgb:
case vk::Format::eB8G8R8A8Unorm:
case vk::Format::eR8G8B8A8Unorm:
case vk::Format::eR8G8B8A8Uint:
case vk::Format::eR32Sfloat:
case vk::Format::eR32Uint:
case vk::Format::eR16G16Sfloat:
return vk::Format::eR32Uint;
case vk::Format::eBc1RgbaSrgbBlock:
case vk::Format::eBc1RgbaUnormBlock:
case vk::Format::eBc4UnormBlock:
case vk::Format::eR32G32Sfloat:
@ -200,11 +202,20 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eBc5UnormBlock:
case vk::Format::eBc7SrgbBlock:
case vk::Format::eBc7UnormBlock:
case vk::Format::eBc6HUfloatBlock:
case vk::Format::eR32G32B32A32Sfloat:
return vk::Format::eR32G32B32A32Uint;
default:
break;
}
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
// Log missing formats only once to avoid spamming the log.
static constexpr size_t MaxFormatIndex = 256;
static std::array<bool, MaxFormatIndex> logged_formats{};
if (const u32 index = u32(format); !logged_formats[index]) {
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
logged_formats[index] = true;
}
return format;
}
@ -236,8 +247,11 @@ struct DetilerParams {
u32 sizes[14];
};
static constexpr size_t StreamBufferSize = 128_MB;
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
: instance{instance}, scheduler{scheduler} {
: instance{instance}, scheduler{scheduler},
stream_buffer{instance, scheduler, MemoryUsage::Stream, StreamBufferSize} {
static const std::array detiler_shaders{
HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP,
HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP,
@ -336,8 +350,7 @@ TileManager::ScratchBuffer TileManager::AllocBuffer(u32 size, bool is_storage /*
.flags = !is_storage ? VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT |
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT
: static_cast<VmaAllocationCreateFlags>(0),
.usage = is_large_buffer ? VMA_MEMORY_USAGE_AUTO_PREFER_HOST
: VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
.requiredFlags = !is_storage ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
: static_cast<VkMemoryPropertyFlags>(0),
};
@ -373,37 +386,46 @@ std::optional<vk::Buffer> TileManager::TryDetile(Image& image) {
const auto* detiler = GetDetiler(image);
if (!detiler) {
LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})",
vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode));
if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled) {
LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})",
vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode));
}
return std::nullopt;
}
// Prepare input buffer
auto in_buffer = AllocBuffer(image.info.guest_size_bytes);
Upload(in_buffer, reinterpret_cast<const void*>(image.info.guest_address),
image.info.guest_size_bytes);
const u32 image_size = image.info.guest_size_bytes;
const auto [in_buffer, in_offset] = [&] -> std::pair<vk::Buffer, u32> {
// Use stream buffer for smaller textures.
if (image_size <= StreamBufferSize) {
u32 offset = stream_buffer.Copy(image.info.guest_address, image_size);
return {stream_buffer.Handle(), offset};
}
// Request temporary host buffer for larger sizes.
auto in_buffer = AllocBuffer(image_size);
const auto addr = reinterpret_cast<const void*>(image.info.guest_address);
Upload(in_buffer, addr, image_size);
scheduler.DeferOperation([=, this]() { FreeBuffer(in_buffer); });
return {in_buffer.first, 0};
}();
// Prepare output buffer
auto out_buffer = AllocBuffer(image.info.guest_size_bytes, true);
scheduler.DeferOperation([=, this]() {
FreeBuffer(in_buffer);
FreeBuffer(out_buffer);
});
auto out_buffer = AllocBuffer(image_size, true);
scheduler.DeferOperation([=, this]() { FreeBuffer(out_buffer); });
auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *detiler->pl);
const vk::DescriptorBufferInfo input_buffer_info{
.buffer = in_buffer.first,
.offset = 0,
.range = image.info.guest_size_bytes,
.buffer = in_buffer,
.offset = in_offset,
.range = image_size,
};
const vk::DescriptorBufferInfo output_buffer_info{
.buffer = out_buffer.first,
.offset = 0,
.range = image.info.guest_size_bytes,
.range = image_size,
};
std::vector<vk::WriteDescriptorSet> set_writes{
@ -442,16 +464,16 @@ std::optional<vk::Buffer> TileManager::TryDetile(Image& image) {
cmdbuf.pushConstants(*detiler->pl_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(params),
&params);
ASSERT((image.info.guest_size_bytes % 64) == 0);
ASSERT((image_size % 64) == 0);
const auto bpp = image.info.num_bits * (image.info.props.is_block ? 16u : 1u);
const auto num_tiles = image.info.guest_size_bytes / (64 * (bpp / 8));
const auto num_tiles = image_size / (64 * (bpp / 8));
cmdbuf.dispatch(num_tiles, 1, 1);
const vk::BufferMemoryBarrier post_barrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
.buffer = out_buffer.first,
.size = image.info.guest_size_bytes,
.size = image_size,
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,

View file

@ -4,7 +4,7 @@
#pragma once
#include "common/types.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/buffer_cache/buffer.h"
#include "video_core/texture_cache/image.h"
namespace VideoCore {
@ -34,7 +34,7 @@ struct DetilerContext {
class TileManager {
public:
using ScratchBuffer = std::pair<VkBuffer, VmaAllocation>;
using ScratchBuffer = std::pair<vk::Buffer, VmaAllocation>;
TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
~TileManager();
@ -51,6 +51,7 @@ private:
private:
const Vulkan::Instance& instance;
Vulkan::Scheduler& scheduler;
StreamBuffer stream_buffer;
std::array<DetilerContext, DetilerType::Max> detilers;
};