video_core: Separate dirty flags and better gpu invalidation (#1034)

This commit is contained in:
TheTurtle 2024-09-23 18:03:42 +03:00 committed by GitHub
parent 10d29cc007
commit cd7268a70e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 28 additions and 31 deletions

View file

@ -17,7 +17,7 @@ namespace VideoCore {
static constexpr size_t NumVertexBuffers = 32; static constexpr size_t NumVertexBuffers = 32;
static constexpr size_t GdsBufferSize = 64_KB; static constexpr size_t GdsBufferSize = 64_KB;
static constexpr size_t StagingBufferSize = 1_GB; static constexpr size_t StagingBufferSize = 1_GB;
static constexpr size_t UboStreamBufferSize = 128_MB; static constexpr size_t UboStreamBufferSize = 64_MB;
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
const AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, const AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,

View file

@ -199,7 +199,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
buffer_barriers.emplace_back(*barrier); buffer_barriers.emplace_back(*barrier);
} }
if (desc.is_written) { if (desc.is_written) {
texture_cache.MarkWritten(address, size); texture_cache.InvalidateMemoryFromGPU(address, size);
} }
} }
set_writes.push_back({ set_writes.push_back({

View file

@ -431,7 +431,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
buffer_barriers.emplace_back(*barrier); buffer_barriers.emplace_back(*barrier);
} }
if (desc.is_written) { if (desc.is_written) {
texture_cache.MarkWritten(address, size); texture_cache.InvalidateMemoryFromGPU(address, size);
} }
} }
set_writes.push_back({ set_writes.push_back({

View file

@ -5,13 +5,9 @@
#include "common/enum.h" #include "common/enum.h"
#include "common/types.h" #include "common/types.h"
#include "core/libraries/videoout/buffer.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/image_view.h"
#include "video_core/texture_cache/types.h"
#include <optional> #include <optional>
@ -26,7 +22,9 @@ VK_DEFINE_HANDLE(VmaAllocator)
namespace VideoCore { namespace VideoCore {
enum ImageFlagBits : u32 { enum ImageFlagBits : u32 {
CpuModified = 1 << 2, ///< Contents have been modified from the CPU CpuDirty = 1 << 1, ///< Contents have been modified from the CPU
GpuDirty = 1 << 2, ///< Contents have been modified from the GPU (valid data in buffer cache)
Dirty = CpuDirty | GpuDirty,
GpuModified = 1 << 3, ///< Contents have been modified from the GPU GpuModified = 1 << 3, ///< Contents have been modified from the GPU
Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU
Registered = 1 << 6, ///< True when the image is registered Registered = 1 << 6, ///< True when the image is registered
@ -108,7 +106,7 @@ struct Image {
ImageInfo info; ImageInfo info;
UniqueImage image; UniqueImage image;
vk::ImageAspectFlags aspect_mask = vk::ImageAspectFlagBits::eColor; vk::ImageAspectFlags aspect_mask = vk::ImageAspectFlagBits::eColor;
ImageFlagBits flags = ImageFlagBits::CpuModified; ImageFlagBits flags = ImageFlagBits::Dirty;
VAddr cpu_addr = 0; VAddr cpu_addr = 0;
VAddr cpu_addr_end = 0; VAddr cpu_addr_end = 0;
std::vector<ImageViewInfo> image_view_infos; std::vector<ImageViewInfo> image_view_infos;

View file

@ -47,24 +47,23 @@ void TextureCache::InvalidateMemory(VAddr address, size_t size) {
std::scoped_lock lock{mutex}; std::scoped_lock lock{mutex};
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) { ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
// Ensure image is reuploaded when accessed again. // Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified; image.flags |= ImageFlagBits::CpuDirty;
// Untrack image, so the range is unprotected and the guest can write freely. // Untrack image, so the range is unprotected and the guest can write freely.
UntrackImage(image_id); UntrackImage(image_id);
}); });
} }
void TextureCache::MarkWritten(VAddr address, size_t max_size) { void TextureCache::InvalidateMemoryFromGPU(VAddr address, size_t max_size) {
static constexpr FindFlags find_flags = std::scoped_lock lock{mutex};
FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize; ForEachImageInRegion(address, max_size, [&](ImageId image_id, Image& image) {
ImageInfo info{}; // Only consider images that match base address.
info.guest_address = address; // TODO: Maybe also consider subresources
info.guest_size_bytes = max_size; if (image.info.guest_address != address) {
const ImageId image_id = FindImage(info, find_flags); return;
if (!image_id) { }
return; // Ensure image is reuploaded when accessed again.
} image.flags |= ImageFlagBits::GpuDirty;
// Ensure image is copied when accessed again. });
slot_images[image_id].flags |= ImageFlagBits::CpuModified;
} }
void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
@ -189,7 +188,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
FreeImage(image_id); FreeImage(image_id);
TrackImage(new_image_id); TrackImage(new_image_id);
new_image.flags &= ~ImageFlagBits::CpuModified; new_image.flags &= ~ImageFlagBits::Dirty;
return new_image_id; return new_image_id;
} }
@ -325,7 +324,7 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
const ImageId image_id = FindImage(image_info); const ImageId image_id = FindImage(image_info);
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
image.flags |= ImageFlagBits::GpuModified; image.flags |= ImageFlagBits::GpuModified;
image.flags &= ~ImageFlagBits::CpuModified; image.flags &= ~ImageFlagBits::Dirty;
image.aspect_mask = vk::ImageAspectFlagBits::eDepth; image.aspect_mask = vk::ImageAspectFlagBits::eDepth;
const bool has_stencil = image_info.usage.stencil; const bool has_stencil = image_info.usage.stencil;
@ -362,11 +361,9 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
} }
void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) { void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) {
if (False(image.flags & ImageFlagBits::CpuModified)) { if (False(image.flags & ImageFlagBits::Dirty)) {
return; return;
} }
// Mark image as validated.
image.flags &= ~ImageFlagBits::CpuModified;
const auto& num_layers = image.info.resources.layers; const auto& num_layers = image.info.resources.layers;
const auto& num_mips = image.info.resources.levels; const auto& num_mips = image.info.resources.levels;
@ -380,9 +377,10 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u;
const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m];
// Protect GPU modified resources from accidental reuploads. // Protect GPU modified resources from accidental CPU reuploads.
if (True(image.flags & ImageFlagBits::GpuModified) && const bool is_gpu_modified = True(image.flags & ImageFlagBits::GpuModified);
!buffer_cache.IsRegionGpuModified(image.info.guest_address + mip_ofs, mip_size)) { const bool is_gpu_dirty = True(image.flags & ImageFlagBits::GpuDirty);
if (is_gpu_modified && !is_gpu_dirty) {
const u8* addr = std::bit_cast<u8*>(image.info.guest_address); const u8* addr = std::bit_cast<u8*>(image.info.guest_address);
const u64 hash = XXH3_64bits(addr + mip_ofs, mip_size); const u64 hash = XXH3_64bits(addr + mip_ofs, mip_size);
if (image.mip_hashes[m] == hash) { if (image.mip_hashes[m] == hash) {
@ -438,6 +436,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
} }
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
image.flags &= ~ImageFlagBits::Dirty;
} }
vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) { vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {

View file

@ -51,7 +51,7 @@ public:
void InvalidateMemory(VAddr address, size_t size); void InvalidateMemory(VAddr address, size_t size);
/// Marks an image as dirty if it exists at the provided address. /// Marks an image as dirty if it exists at the provided address.
void MarkWritten(VAddr address, size_t max_size); void InvalidateMemoryFromGPU(VAddr address, size_t max_size);
/// Evicts any images that overlap the unmapped range. /// Evicts any images that overlap the unmapped range.
void UnmapMemory(VAddr cpu_addr, size_t size); void UnmapMemory(VAddr cpu_addr, size_t size);