texture_cache: Async download of GPU modified linear images (#3204)

* texture_cache: Async download of GPU modified linear images

* liverpool: Back to less submits

* texture_cache: Don't download depth images

* config: Add option for linear image readback
This commit is contained in:
TheTurtle 2025-07-07 16:23:20 +03:00 committed by GitHub
parent d6163a6edb
commit 7fedbd52e0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 106 additions and 16 deletions

View file

@ -65,6 +65,7 @@ static u32 screenHeight = 720;
static bool isNullGpu = false; static bool isNullGpu = false;
static bool shouldCopyGPUBuffers = false; static bool shouldCopyGPUBuffers = false;
static bool readbacksEnabled = false; static bool readbacksEnabled = false;
static bool readbackLinearImagesEnabled = false;
static bool directMemoryAccessEnabled = false; static bool directMemoryAccessEnabled = false;
static bool shouldDumpShaders = false; static bool shouldDumpShaders = false;
static bool shouldPatchShaders = false; static bool shouldPatchShaders = false;
@ -103,7 +104,7 @@ u32 m_language = 1; // english
static std::string trophyKey = ""; static std::string trophyKey = "";
// Expected number of items in the config file // Expected number of items in the config file
static constexpr u64 total_entries = 51; static constexpr u64 total_entries = 52;
bool allowHDR() { bool allowHDR() {
return isHDRAllowed; return isHDRAllowed;
@ -262,6 +263,10 @@ bool readbacks() {
return readbacksEnabled; return readbacksEnabled;
} }
bool readbackLinearImages() {
return readbackLinearImagesEnabled;
}
bool directMemoryAccess() { bool directMemoryAccess() {
return directMemoryAccessEnabled; return directMemoryAccessEnabled;
} }
@ -631,6 +636,8 @@ void load(const std::filesystem::path& path) {
isNullGpu = toml::find_or<bool>(gpu, "nullGpu", isNullGpu); isNullGpu = toml::find_or<bool>(gpu, "nullGpu", isNullGpu);
shouldCopyGPUBuffers = toml::find_or<bool>(gpu, "copyGPUBuffers", shouldCopyGPUBuffers); shouldCopyGPUBuffers = toml::find_or<bool>(gpu, "copyGPUBuffers", shouldCopyGPUBuffers);
readbacksEnabled = toml::find_or<bool>(gpu, "readbacks", readbacksEnabled); readbacksEnabled = toml::find_or<bool>(gpu, "readbacks", readbacksEnabled);
readbackLinearImagesEnabled =
toml::find_or<bool>(gpu, "readbackLinearImages", readbackLinearImagesEnabled);
directMemoryAccessEnabled = directMemoryAccessEnabled =
toml::find_or<bool>(gpu, "directMemoryAccess", directMemoryAccessEnabled); toml::find_or<bool>(gpu, "directMemoryAccess", directMemoryAccessEnabled);
shouldDumpShaders = toml::find_or<bool>(gpu, "dumpShaders", shouldDumpShaders); shouldDumpShaders = toml::find_or<bool>(gpu, "dumpShaders", shouldDumpShaders);
@ -802,6 +809,7 @@ void save(const std::filesystem::path& path) {
data["GPU"]["nullGpu"] = isNullGpu; data["GPU"]["nullGpu"] = isNullGpu;
data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers; data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers;
data["GPU"]["readbacks"] = readbacksEnabled; data["GPU"]["readbacks"] = readbacksEnabled;
data["GPU"]["readbackLinearImages"] = readbackLinearImagesEnabled;
data["GPU"]["directMemoryAccess"] = directMemoryAccessEnabled; data["GPU"]["directMemoryAccess"] = directMemoryAccessEnabled;
data["GPU"]["dumpShaders"] = shouldDumpShaders; data["GPU"]["dumpShaders"] = shouldDumpShaders;
data["GPU"]["patchShaders"] = shouldPatchShaders; data["GPU"]["patchShaders"] = shouldPatchShaders;
@ -902,6 +910,7 @@ void setDefaultValues() {
isNullGpu = false; isNullGpu = false;
shouldCopyGPUBuffers = false; shouldCopyGPUBuffers = false;
readbacksEnabled = false; readbacksEnabled = false;
readbackLinearImagesEnabled = false;
directMemoryAccessEnabled = false; directMemoryAccessEnabled = false;
shouldDumpShaders = false; shouldDumpShaders = false;
shouldPatchShaders = false; shouldPatchShaders = false;

View file

@ -47,6 +47,7 @@ bool copyGPUCmdBuffers();
void setCopyGPUCmdBuffers(bool enable); void setCopyGPUCmdBuffers(bool enable);
bool readbacks(); bool readbacks();
void setReadbacks(bool enable); void setReadbacks(bool enable);
bool readbackLinearImages();
bool directMemoryAccess(); bool directMemoryAccess();
void setDirectMemoryAccess(bool enable); void setDirectMemoryAccess(bool enable);
bool dumpShaders(); bool dumpShaders();

View file

@ -135,9 +135,8 @@ void Liverpool::Process(std::stop_token stoken) {
if (submit_done) { if (submit_done) {
VideoCore::EndCapture(); VideoCore::EndCapture();
if (rasterizer) { if (rasterizer) {
rasterizer->ProcessFaults(); rasterizer->EndCommandList();
rasterizer->Flush(); rasterizer->Flush();
} }
submit_done = false; submit_done = false;

View file

@ -112,7 +112,7 @@ public:
/// Invalidates any buffer in the logical page range. /// Invalidates any buffer in the logical page range.
void InvalidateMemory(VAddr device_addr, u64 size); void InvalidateMemory(VAddr device_addr, u64 size);
/// Waits on pending downloads in the logical page range. /// Flushes any GPU modified buffer in the logical page range back to CPU memory.
void ReadMemory(VAddr device_addr, u64 size, bool is_write = false); void ReadMemory(VAddr device_addr, u64 size, bool is_write = false);
/// Binds host vertex buffers for the current draw. /// Binds host vertex buffers for the current draw.

View file

@ -272,6 +272,8 @@ void Rasterizer::EliminateFastClear() {
void Rasterizer::Draw(bool is_indexed, u32 index_offset) { void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
RENDERER_TRACE; RENDERER_TRACE;
scheduler.PopPendingOperations();
if (!FilterDraw()) { if (!FilterDraw()) {
return; return;
} }
@ -317,6 +319,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
u32 max_count, VAddr count_address) { u32 max_count, VAddr count_address) {
RENDERER_TRACE; RENDERER_TRACE;
scheduler.PopPendingOperations();
if (!FilterDraw()) { if (!FilterDraw()) {
return; return;
} }
@ -380,6 +384,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
void Rasterizer::DispatchDirect() { void Rasterizer::DispatchDirect() {
RENDERER_TRACE; RENDERER_TRACE;
scheduler.PopPendingOperations();
const auto& cs_program = liverpool->GetCsRegs(); const auto& cs_program = liverpool->GetCsRegs();
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
if (!pipeline) { if (!pipeline) {
@ -407,6 +413,8 @@ void Rasterizer::DispatchDirect() {
void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
RENDERER_TRACE; RENDERER_TRACE;
scheduler.PopPendingOperations();
const auto& cs_program = liverpool->GetCsRegs(); const auto& cs_program = liverpool->GetCsRegs();
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
if (!pipeline) { if (!pipeline) {
@ -439,11 +447,12 @@ void Rasterizer::Finish() {
scheduler.Finish(); scheduler.Finish();
} }
void Rasterizer::ProcessFaults() { void Rasterizer::EndCommandList() {
if (fault_process_pending) { if (fault_process_pending) {
fault_process_pending = false; fault_process_pending = false;
buffer_cache.ProcessFaultBuffer(); buffer_cache.ProcessFaultBuffer();
} }
texture_cache.ProcessDownloadImages();
} }
bool Rasterizer::BindResources(const Pipeline* pipeline) { bool Rasterizer::BindResources(const Pipeline* pipeline) {
@ -649,8 +658,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
if (instance.IsNullDescriptorSupported()) { if (instance.IsNullDescriptorSupported()) {
image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
} else { } else {
auto& null_image_view = auto& null_image_view = texture_cache.FindTexture(VideoCore::NULL_IMAGE_ID, desc);
texture_cache.FindTexture(VideoCore::NULL_IMAGE_ID, desc.view_info);
image_infos.emplace_back(VK_NULL_HANDLE, *null_image_view.image_view, image_infos.emplace_back(VK_NULL_HANDLE, *null_image_view.image_view,
vk::ImageLayout::eGeneral); vk::ImageLayout::eGeneral);
} }
@ -664,7 +672,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
bound_images.emplace_back(image_id); bound_images.emplace_back(image_id);
auto& image = texture_cache.GetImage(image_id); auto& image = texture_cache.GetImage(image_id);
auto& image_view = texture_cache.FindTexture(image_id, desc.view_info); auto& image_view = texture_cache.FindTexture(image_id, desc);
if (image.binding.force_general || image.binding.is_target) { if (image.binding.force_general || image.binding.is_target) {
image.Transit(vk::ImageLayout::eGeneral, image.Transit(vk::ImageLayout::eGeneral,

View file

@ -68,7 +68,7 @@ public:
void CpSync(); void CpSync();
u64 Flush(); u64 Flush();
void Finish(); void Finish();
void ProcessFaults(); void EndCommandList();
PipelineCache& GetPipelineCache() { PipelineCache& GetPipelineCache() {
return pipeline_cache; return pipeline_cache;

View file

@ -101,6 +101,14 @@ void Scheduler::Wait(u64 tick) {
} }
} }
void Scheduler::PopPendingOperations() {
master_semaphore.Refresh();
while (!pending_ops.empty() && master_semaphore.IsFree(pending_ops.front().gpu_tick)) {
pending_ops.front().callback();
pending_ops.pop();
}
}
void Scheduler::AllocateWorkerCommandBuffers() { void Scheduler::AllocateWorkerCommandBuffers() {
const vk::CommandBufferBeginInfo begin_info = { const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
@ -175,10 +183,7 @@ void Scheduler::SubmitExecution(SubmitInfo& info) {
AllocateWorkerCommandBuffers(); AllocateWorkerCommandBuffers();
// Apply pending operations // Apply pending operations
while (!pending_ops.empty() && IsFree(pending_ops.front().gpu_tick)) { PopPendingOperations();
pending_ops.front().callback();
pending_ops.pop();
}
} }
void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf) { void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf) {

View file

@ -317,6 +317,9 @@ public:
/// Waits for the given tick to trigger on the GPU. /// Waits for the given tick to trigger on the GPU.
void Wait(u64 tick); void Wait(u64 tick);
/// Attempts to execute operations whose tick the GPU has caught up with.
void PopPendingOperations();
/// Starts a new rendering scope with provided state. /// Starts a new rendering scope with provided state.
void BeginRendering(const RenderState& new_state); void BeginRendering(const RenderState& new_state);

View file

@ -5,7 +5,9 @@
#include <xxhash.h> #include <xxhash.h>
#include "common/assert.h" #include "common/assert.h"
#include "common/config.h"
#include "common/debug.h" #include "common/debug.h"
#include "core/memory.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/page_manager.h" #include "video_core/page_manager.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
@ -58,6 +60,50 @@ ImageId TextureCache::GetNullImage(const vk::Format format) {
return null_id; return null_id;
} }
void TextureCache::ProcessDownloadImages() {
for (const ImageId image_id : download_images) {
DownloadImageMemory(image_id);
}
download_images.clear();
}
void TextureCache::DownloadImageMemory(ImageId image_id) {
Image& image = slot_images[image_id];
if (False(image.flags & ImageFlagBits::GpuModified)) {
return;
}
auto& download_buffer = buffer_cache.GetUtilityBuffer(MemoryUsage::Download);
const u32 download_size = image.info.pitch * image.info.size.height *
image.info.resources.layers * (image.info.num_bits / 8);
ASSERT(download_size <= image.info.guest_size);
const auto [download, offset] = download_buffer.Map(download_size);
download_buffer.Commit();
const vk::BufferImageCopy image_download = {
.bufferOffset = offset,
.bufferRowLength = image.info.pitch,
.bufferImageHeight = image.info.size.height,
.imageSubresource =
{
.aspectMask = image.info.IsDepthStencil() ? vk::ImageAspectFlagBits::eDepth
: vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = image.info.resources.layers,
},
.imageOffset = {0, 0, 0},
.imageExtent = {image.info.size.width, image.info.size.height, 1},
};
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal,
download_buffer.Handle(), image_download);
scheduler.DeferOperation([device_addr = image.info.guest_address, download, download_size] {
auto* memory = Core::Memory::Instance();
memory->TryWriteBacking(std::bit_cast<u8*>(device_addr), download, download_size);
});
}
void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) { void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) {
if (image.hash == 0) { if (image.hash == 0) {
// Initialize hash // Initialize hash
@ -437,16 +483,27 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
return slot_image_views[view_id]; return slot_image_views[view_id];
} }
ImageView& TextureCache::FindTexture(ImageId image_id, const ImageViewInfo& view_info) { ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) {
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
if (desc.type == BindingType::Storage) {
image.flags |= ImageFlagBits::GpuModified;
if (Config::readbackLinearImages() &&
image.info.tiling_mode == AmdGpu::TilingMode::Display_Linear) {
download_images.emplace(image_id);
}
}
UpdateImage(image_id); UpdateImage(image_id);
return RegisterImageView(image_id, view_info); return RegisterImageView(image_id, desc.view_info);
} }
ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) { ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) {
const ImageId image_id = FindImage(desc); const ImageId image_id = FindImage(desc);
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
image.flags |= ImageFlagBits::GpuModified; image.flags |= ImageFlagBits::GpuModified;
if (Config::readbackLinearImages() &&
image.info.tiling_mode == AmdGpu::TilingMode::Display_Linear) {
download_images.emplace(image_id);
}
image.usage.render_target = 1u; image.usage.render_target = 1u;
UpdateImage(image_id); UpdateImage(image_id);

View file

@ -3,6 +3,7 @@
#pragma once #pragma once
#include <unordered_set>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <tsl/robin_map.h> #include <tsl/robin_map.h>
@ -105,11 +106,14 @@ public:
/// Evicts any images that overlap the unmapped range. /// Evicts any images that overlap the unmapped range.
void UnmapMemory(VAddr cpu_addr, size_t size); void UnmapMemory(VAddr cpu_addr, size_t size);
/// Schedules a copy of pending images for download back to CPU memory.
void ProcessDownloadImages();
/// Retrieves the image handle of the image with the provided attributes. /// Retrieves the image handle of the image with the provided attributes.
[[nodiscard]] ImageId FindImage(BaseDesc& desc, FindFlags flags = {}); [[nodiscard]] ImageId FindImage(BaseDesc& desc, FindFlags flags = {});
/// Retrieves an image view with the properties of the specified image id. /// Retrieves an image view with the properties of the specified image id.
[[nodiscard]] ImageView& FindTexture(ImageId image_id, const ImageViewInfo& view_info); [[nodiscard]] ImageView& FindTexture(ImageId image_id, const BaseDesc& desc);
/// Retrieves the render target with specified properties /// Retrieves the render target with specified properties
[[nodiscard]] ImageView& FindRenderTarget(BaseDesc& desc); [[nodiscard]] ImageView& FindRenderTarget(BaseDesc& desc);
@ -252,6 +256,9 @@ private:
/// Gets or creates a null image for a particular format. /// Gets or creates a null image for a particular format.
ImageId GetNullImage(vk::Format format); ImageId GetNullImage(vk::Format format);
/// Copies image memory back to CPU.
void DownloadImageMemory(ImageId image_id);
/// Create an image from the given parameters /// Create an image from the given parameters
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr); [[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr);
@ -293,6 +300,7 @@ private:
Common::SlotVector<ImageView> slot_image_views; Common::SlotVector<ImageView> slot_image_views;
tsl::robin_map<u64, Sampler> samplers; tsl::robin_map<u64, Sampler> samplers;
tsl::robin_map<vk::Format, ImageId> null_images; tsl::robin_map<vk::Format, ImageId> null_images;
std::unordered_set<ImageId> download_images;
PageTable page_table; PageTable page_table;
std::mutex mutex; std::mutex mutex;