video_core: Improve handling of image buffer aliases (#757)

* texture_cache: Use invalidate threshhold

* It's possible for shaders to bind huge buffers and only write to lower portion of it. This is a problem if upper parts of the buffer overlap with render targets. If the image is very far away from buffer base it's unlikely the shader will want to write it, so skip invalidation for it

* video_core: Allow using texture cache to validate texture buffers

* texture_cache: Use buffer cache in all cases for data source

* Allows to correctly handle compute written micro tiled textures

* texture_cache: Fix depth pitch

* kernel: Remove missed code

* clang format

* video_core: Adjust depth format

* buffer_cache: Do not cache buffer views

* thread_management: Do not call createMutex on unlock

* temp: Revert this when pr is done

* buffer_cache: Dont skip cpu uploads with image sync

* Sometimes image does not fully overlap with a region

* fix build

* video_core: Improve invalidate heuristic

* small fixes

* video_core: Hopefully fix some vertex explosions
This commit is contained in:
TheTurtle 2024-09-05 17:25:45 +03:00 committed by GitHub
parent 4e0dc91040
commit b08baaeb13
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 248 additions and 191 deletions

View file

@ -7,7 +7,7 @@
#pragma once
#include <unordered_map>
#include <vulkan/vulkan.h>
#include "video_core/renderer_vulkan/vk_common.h"
namespace VideoCore {
/**
@ -383,9 +383,10 @@ static const std::unordered_map<VkFormat, FORMAT_COMPATIBILITY_CLASS> vkFormatCl
* @url
* https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility
*/
static bool IsVulkanFormatCompatible(VkFormat lhs, VkFormat rhs) {
if (lhs == rhs)
static bool IsVulkanFormatCompatible(vk::Format lhs, vk::Format rhs) {
if (lhs == rhs) {
return true;
return vkFormatClassTable.at(lhs) == vkFormatClassTable.at(rhs);
}
return vkFormatClassTable.at(VkFormat(lhs)) == vkFormatClassTable.at(VkFormat(rhs));
}
} // namespace VideoCore

View file

@ -166,8 +166,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
image.Create(image_ci);
Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {:#x}:{:#x}",
info.guest_address, info.guest_size_bytes);
Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {}x{}x{} {:#x}:{:#x}",
info.size.width, info.size.height, info.size.depth, info.guest_address,
info.guest_size_bytes);
}
void Image::Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits> dst_mask,

View file

@ -187,7 +187,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
size.width = hint.Valid() ? hint.width : buffer.Pitch();
size.height = hint.Valid() ? hint.height : buffer.Height();
size.depth = 1;
pitch = size.width;
pitch = buffer.Pitch();
resources.layers = num_slices;
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
usage.depth_target = true;
@ -207,7 +207,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) n
if (force_depth || tiling_mode == AmdGpu::TilingMode::Depth_MacroTiled) {
if (pixel_format == vk::Format::eR32Sfloat) {
pixel_format = vk::Format::eD32SfloatS8Uint;
} else if (pixel_format == vk::Format::eR16Sfloat) {
} else if (pixel_format == vk::Format::eR16Unorm) {
pixel_format = vk::Format::eD16UnormS8Uint;
} else {
UNREACHABLE();

View file

@ -123,7 +123,8 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
// When sampling D32 texture from shader, the T# specifies R32 Float format so adjust it.
vk::Format format = info.format;
vk::ImageAspectFlags aspect = image.aspect_mask;
if (image.aspect_mask & vk::ImageAspectFlagBits::eDepth && format == vk::Format::eR32Sfloat) {
if (image.aspect_mask & vk::ImageAspectFlagBits::eDepth &&
(format == vk::Format::eR32Sfloat || format == vk::Format::eD32Sfloat)) {
format = image.info.pixel_format;
aspect = vk::ImageAspectFlagBits::eDepth;
}

View file

@ -38,13 +38,14 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
TextureCache::~TextureCache() = default;
void TextureCache::InvalidateMemory(VAddr address, size_t size) {
std::unique_lock lock{mutex};
std::scoped_lock lock{mutex};
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
if (!image.Overlaps(address, size)) {
return;
const size_t image_dist =
image.cpu_addr > address ? image.cpu_addr - address : address - image.cpu_addr;
if (image_dist < MaxInvalidateDist) {
// Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified;
}
// Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified;
// Untrack image, so the range is unprotected and the guest can write freely.
UntrackImage(image_id);
});
@ -144,17 +145,12 @@ ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_
FreeImage(cache_image_id);
}
if (tex_cache_image.info.IsSliceOf(image_info)) {
UNREACHABLE();
}
}
return merged_image_id;
}
ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
const auto new_image_id = slot_images.insert(instance, scheduler, info);
RegisterImage(new_image_id);
@ -171,50 +167,37 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
return new_image_id;
}
ImageId TextureCache::FindImage(const ImageInfo& info) {
ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) {
if (info.guest_address == 0) [[unlikely]] {
return NULL_IMAGE_VIEW_ID;
}
std::unique_lock lock{mutex};
std::scoped_lock lock{mutex};
boost::container::small_vector<ImageId, 8> image_ids;
ForEachImageInRegion(
info.guest_address, info.guest_size_bytes, [&](ImageId image_id, Image& image) {
// Ignore images scheduled for deletion
if (True(image.flags & ImageFlagBits::Deleted)) {
return;
}
// Check if image is fully outside of the region
const auto in_image_cpu_addr = info.guest_address;
const auto in_image_cpu_addr_end = info.guest_address + info.guest_size_bytes;
if (in_image_cpu_addr_end <= image.cpu_addr) {
return;
}
if (in_image_cpu_addr >= image.cpu_addr_end) {
return;
}
image_ids.push_back(image_id);
});
ForEachImageInRegion(info.guest_address, info.guest_size_bytes,
[&](ImageId image_id, Image& image) { image_ids.push_back(image_id); });
ImageId image_id{};
// Check for a perfect match first
for (const auto& cache_id : image_ids) {
auto& cache_image = slot_images[cache_id];
if (cache_image.info.guest_address == info.guest_address &&
cache_image.info.guest_size_bytes == info.guest_size_bytes &&
cache_image.info.size == info.size) {
ASSERT(cache_image.info.type == info.type);
if (IsVulkanFormatCompatible((VkFormat)info.pixel_format,
(VkFormat)cache_image.info.pixel_format)) {
image_id = cache_id;
}
break;
if (cache_image.info.guest_address != info.guest_address) {
continue;
}
if (False(flags & FindFlags::RelaxSize) &&
cache_image.info.guest_size_bytes != info.guest_size_bytes) {
continue;
}
if (False(flags & FindFlags::RelaxDim) && cache_image.info.size != info.size) {
continue;
}
if (False(flags & FindFlags::RelaxFmt) &&
!IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format)) {
continue;
}
ASSERT(cache_image.info.type == info.type);
image_id = cache_id;
}
// Try to resolve overlaps (if any)
@ -225,13 +208,18 @@ ImageId TextureCache::FindImage(const ImageInfo& info) {
}
}
if (True(flags & FindFlags::NoCreate) && !image_id) {
return {};
}
// Create and register a new image
if (!image_id) {
image_id = slot_images.insert(instance, scheduler, info);
RegisterImage(image_id);
}
slot_images[image_id].tick_accessed_last = scheduler.CurrentTick();
Image& image = slot_images[image_id];
image.tick_accessed_last = scheduler.CurrentTick();
return image_id;
}
@ -259,8 +247,11 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
const ImageId image_id = FindImage(info);
UpdateImage(image_id);
Image& image = slot_images[image_id];
if (view_info.is_storage) {
image.flags |= ImageFlagBits::GpuModified;
}
UpdateImage(image_id);
auto& usage = image.info.usage;
if (view_info.is_storage) {
@ -354,6 +345,10 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
}
void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) {
if (False(image.flags & ImageFlagBits::CpuModified)) {
return;
}
// Mark image as validated.
image.flags &= ~ImageFlagBits::CpuModified;
@ -407,27 +402,20 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
const VAddr image_addr = image.info.guest_address;
const size_t image_size = image.info.guest_size_bytes;
vk::Buffer buffer{};
u32 offset{};
if (auto upload_buffer = tile_manager.TryDetile(image); upload_buffer) {
buffer = *upload_buffer;
} else {
const auto [vk_buffer, buf_offset] = buffer_cache.ObtainTempBuffer(image_addr, image_size);
buffer = vk_buffer->Handle();
offset = buf_offset;
// The obtained buffer may be written by a shader so we need to emit a barrier to prevent
// RAW hazard
if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
vk::PipelineStageFlagBits2::eTransfer)) {
auto dependencies = vk::DependencyInfo{
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &barrier.value(),
};
cmdbuf.pipelineBarrier2(dependencies);
}
const auto [vk_buffer, buf_offset] = buffer_cache.ObtainTempBuffer(image_addr, image_size);
// The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW
// hazard
if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
vk::PipelineStageFlagBits2::eTransfer)) {
const auto dependencies = vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &barrier.value(),
};
cmdbuf.pipelineBarrier2(dependencies);
}
const auto [buffer, offset] = tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image);
for (auto& copy : image_copy) {
copy.bufferOffset += offset;
}

View file

@ -23,6 +23,16 @@ namespace VideoCore {
class BufferCache;
class PageManager;
enum class FindFlags {
NoCreate = 1 << 0, ///< Do not create an image if searching for one fails.
RelaxDim = 1 << 1, ///< Do not check the dimentions of image, only address.
RelaxSize = 1 << 2, ///< Do not check that the size matches exactly.
RelaxFmt = 1 << 3, ///< Do not check that format is compatible.
};
DECLARE_ENUM_FLAG_OPERATORS(FindFlags)
static constexpr u32 MaxInvalidateDist = 12_MB;
class TextureCache {
struct Traits {
using Entry = boost::container::small_vector<ImageId, 16>;
@ -44,7 +54,7 @@ public:
void UnmapMemory(VAddr cpu_addr, size_t size);
/// Retrieves the image handle of the image with the provided attributes.
[[nodiscard]] ImageId FindImage(const ImageInfo& info);
[[nodiscard]] ImageId FindImage(const ImageInfo& info, FindFlags flags = {});
/// Retrieves an image view with the properties of the specified image descriptor.
[[nodiscard]] ImageView& FindTexture(const ImageInfo& image_info,
@ -61,11 +71,8 @@ public:
/// Updates image contents if it was modified by CPU.
void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) {
Image& image = slot_images[image_id];
if (False(image.flags & ImageFlagBits::CpuModified)) {
return;
}
RefreshImage(image, custom_scheduler);
TrackImage(image_id);
RefreshImage(image, custom_scheduler);
}
[[nodiscard]] ImageId ResolveOverlap(const ImageInfo& info, ImageId cache_img_id,
@ -109,31 +116,12 @@ public:
return false;
}
private:
ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info);
/// Iterate over all page indices in a range
template <typename Func>
static void ForEachPage(PAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> Traits::PageBits;
for (u64 page = addr >> Traits::PageBits; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
template <typename Func>
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
boost::container::small_vector<ImageId, 32> images;
ForEachPage(cpu_addr, size, [this, &images, func](u64 page) {
ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
const auto it = page_table.find(page);
if (it == nullptr) {
if constexpr (BOOL_BREAK) {
@ -147,6 +135,9 @@ private:
if (image.flags & ImageFlagBits::Picked) {
continue;
}
if (!image.Overlaps(cpu_addr, size)) {
continue;
}
image.flags |= ImageFlagBits::Picked;
images.push_back(image_id);
if constexpr (BOOL_BREAK) {
@ -166,6 +157,26 @@ private:
}
}
private:
/// Iterate over all page indices in a range
template <typename Func>
static void ForEachPage(PAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> Traits::PageBits;
for (u64 page = addr >> Traits::PageBits; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
/// Registers an image view for provided image
ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info);
/// Create an image from the given parameters
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr);

View file

@ -377,30 +377,23 @@ void TileManager::FreeBuffer(ScratchBuffer buffer) {
vmaDestroyBuffer(instance.GetAllocator(), buffer.first, buffer.second);
}
std::optional<vk::Buffer> TileManager::TryDetile(Image& image) {
std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_offset,
Image& image) {
if (!image.info.props.is_tiled) {
return std::nullopt;
return {in_buffer, in_offset};
}
const auto* detiler = GetDetiler(image);
if (!detiler) {
if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled) {
if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled &&
image.info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled) {
LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})",
vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode));
}
return std::nullopt;
return {in_buffer, in_offset};
}
// Prepare input buffer
const u32 image_size = image.info.guest_size_bytes;
const auto [in_buffer, in_offset] = [&] -> std::pair<vk::Buffer, u32> {
// Request temporary host buffer for larger sizes.
auto in_buffer = AllocBuffer(image_size);
const auto addr = reinterpret_cast<const void*>(image.info.guest_address);
Upload(in_buffer, addr, image_size);
scheduler.DeferOperation([=, this]() { FreeBuffer(in_buffer); });
return {in_buffer.first, 0};
}();
// Prepare output buffer
auto out_buffer = AllocBuffer(image_size, true);
@ -471,7 +464,7 @@ std::optional<vk::Buffer> TileManager::TryDetile(Image& image) {
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
{}, post_barrier, {});
return {out_buffer.first};
return {out_buffer.first, 0};
}
} // namespace VideoCore

View file

@ -39,7 +39,7 @@ public:
TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler);
~TileManager();
std::optional<vk::Buffer> TryDetile(Image& image);
std::pair<vk::Buffer, u32> TryDetile(vk::Buffer in_buffer, u32 in_offset, Image& image);
ScratchBuffer AllocBuffer(u32 size, bool is_storage = false);
void Upload(ScratchBuffer buffer, const void* data, size_t size);