shader_recompiler: Implement data share append and consume operations (#814)

* shader_recompiler: Add more format swap modes

* texture_cache: Handle stencil texture reads

* emulator: Support loading font library

* readme: Add thanks section

* shader_recompiler: Constant buffers as integers

* shader_recompiler: Typed buffers as integers

* shader_recompiler: Separate thread bit scalars

* We can assume guest shader never mixes them with normal sgprs. This helps avoid errors where ssa could view an sgpr write dominating a thread bit read, due to how control flow is structurized, even though its not possible in actual control flow

* shader_recompiler: Implement data append/consume operations

* clang format

* buffer_cache: Simplify invalidation scheme

* video_core: Remove some invalidation remnants

* adjust
This commit is contained in:
TheTurtle 2024-09-07 00:14:51 +03:00 committed by GitHub
parent 649527a235
commit 13743b27fc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
34 changed files with 512 additions and 272 deletions

View file

@ -32,7 +32,6 @@ enum ImageFlagBits : u32 {
Registered = 1 << 6, ///< True when the image is registered
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered
Deleted = 1 << 9, ///< Indicates that images was marked for deletion once frame is done
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)

View file

@ -205,7 +205,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) n
pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
// Override format if image is forced to be a depth target
if (force_depth || tiling_mode == AmdGpu::TilingMode::Depth_MacroTiled) {
if (pixel_format == vk::Format::eR32Sfloat) {
if (pixel_format == vk::Format::eR32Sfloat || pixel_format == vk::Format::eR8Unorm) {
pixel_format = vk::Format::eD32SfloatS8Uint;
} else if (pixel_format == vk::Format::eR16Unorm) {
pixel_format = vk::Format::eD16UnormS8Uint;

View file

@ -128,6 +128,10 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
format = image.info.pixel_format;
aspect = vk::ImageAspectFlagBits::eDepth;
}
if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Unorm) {
format = image.info.pixel_format;
aspect = vk::ImageAspectFlagBits::eStencil;
}
const vk::ImageViewCreateInfo image_view_ci = {
.pNext = usage_override ? &usage_ci : nullptr,

View file

@ -40,17 +40,27 @@ TextureCache::~TextureCache() = default;
void TextureCache::InvalidateMemory(VAddr address, size_t size) {
std::scoped_lock lock{mutex};
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
const size_t image_dist =
image.cpu_addr > address ? image.cpu_addr - address : address - image.cpu_addr;
if (image_dist < MaxInvalidateDist) {
// Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified;
}
// Ensure image is reuploaded when accessed again.
image.flags |= ImageFlagBits::CpuModified;
// Untrack image, so the range is unprotected and the guest can write freely.
UntrackImage(image_id);
});
}
void TextureCache::MarkWritten(VAddr address, size_t max_size) {
static constexpr FindFlags find_flags =
FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize;
ImageInfo info{};
info.guest_address = address;
info.guest_size_bytes = max_size;
const ImageId image_id = FindImage(info, find_flags);
if (!image_id) {
return;
}
// Ensure image is copied when accessed again.
slot_images[image_id].flags |= ImageFlagBits::CpuModified;
}
void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) {
std::scoped_lock lk{mutex};
@ -199,10 +209,14 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) {
!IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format)) {
continue;
}
ASSERT(cache_image.info.type == info.type);
ASSERT(cache_image.info.type == info.type || True(flags & FindFlags::RelaxFmt));
image_id = cache_id;
}
if (True(flags & FindFlags::NoCreate) && !image_id) {
return {};
}
// Try to resolve overlaps (if any)
if (!image_id) {
for (const auto& cache_id : image_ids) {
@ -211,10 +225,6 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) {
}
}
if (True(flags & FindFlags::NoCreate) && !image_id) {
return {};
}
// Create and register a new image
if (!image_id) {
image_id = slot_images.insert(instance, scheduler, info);
@ -251,9 +261,6 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) {
const ImageId image_id = FindImage(info);
Image& image = slot_images[image_id];
if (view_info.is_storage) {
image.flags |= ImageFlagBits::GpuModified;
}
UpdateImage(image_id);
auto& usage = image.info.usage;
@ -351,7 +358,6 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
if (False(image.flags & ImageFlagBits::CpuModified)) {
return;
}
// Mark image as validated.
image.flags &= ~ImageFlagBits::CpuModified;
@ -485,8 +491,6 @@ void TextureCache::DeleteImage(ImageId image_id) {
ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
image.flags |= ImageFlagBits::Deleted;
// Remove any registered meta areas.
const auto& meta_info = image.info.meta_info;
if (meta_info.cmask_addr) {

View file

@ -50,6 +50,9 @@ public:
/// Invalidates any image in the logical page range.
void InvalidateMemory(VAddr address, size_t size);
/// Marks an image as dirty if it exists at the provided address.
void MarkWritten(VAddr address, size_t max_size);
/// Evicts any images that overlap the unmapped range.
void UnmapMemory(VAddr cpu_addr, size_t size);