From d1f5a7e8fbfc247888bc914cc615b75927013bc7 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Sun, 6 Jul 2025 15:03:59 -0500 Subject: [PATCH 1/7] libkernel mprotect export (#3199) --- src/core/libraries/kernel/memory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 114a096ca..8153b7610 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -711,6 +711,7 @@ void RegisterMemory(Core::Loader::SymbolsResolver* sym) { sceKernelConfiguredFlexibleMemorySize); LIB_FUNCTION("vSMAm3cxYTY", "libkernel", 1, "libkernel", 1, 1, sceKernelMprotect); + LIB_FUNCTION("YQOfxL4QfeU", "libkernel", 1, "libkernel", 1, 1, posix_mprotect); LIB_FUNCTION("YQOfxL4QfeU", "libScePosix", 1, "libkernel", 1, 1, posix_mprotect); LIB_FUNCTION("9bfdLIyuwCY", "libkernel", 1, "libkernel", 1, 1, sceKernelMtypeprotect); From 5eef2fd28ad6c8b38e7dfc1157af1045d86d68a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Mon, 7 Jul 2025 11:26:27 +0200 Subject: [PATCH 2/7] mmap executable memory (#3201) --- src/core/address_space.cpp | 12 ++++++++++-- src/core/libraries/kernel/memory.cpp | 3 ++- src/core/memory.cpp | 9 ++++++--- src/core/memory.h | 1 + 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index 2e29f70ee..846bb5eb4 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -358,9 +358,17 @@ enum PosixPageProtection { [[nodiscard]] constexpr PosixPageProtection ToPosixProt(Core::MemoryProt prot) { if (True(prot & Core::MemoryProt::CpuReadWrite) || True(prot & Core::MemoryProt::GpuReadWrite)) { - return PAGE_READWRITE; + if (True(prot & Core::MemoryProt::CpuExec)) { + return PAGE_EXECUTE_READWRITE; + } else { + return PAGE_READWRITE; + } } else if (True(prot & Core::MemoryProt::CpuRead) || True(prot & Core::MemoryProt::GpuRead)) { - return PAGE_READONLY; + if (True(prot & Core::MemoryProt::CpuExec)) { + return PAGE_EXECUTE_READ; + } else { + return PAGE_READONLY; + } } else { return PAGE_NOACCESS; } diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 8153b7610..e0c359f2c 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -573,11 +573,12 @@ void* PS4_SYSV_ABI posix_mmap(void* addr, u64 len, s32 prot, s32 flags, s32 fd, auto* memory = Core::Memory::Instance(); const auto mem_prot = static_cast(prot); const auto mem_flags = static_cast(flags); + const auto is_exec = True(mem_prot & Core::MemoryProt::CpuExec); s32 result = ORBIS_OK; if (fd == -1) { result = memory->MapMemory(&addr_out, std::bit_cast(addr), len, mem_prot, mem_flags, - Core::VMAType::Flexible); + Core::VMAType::Flexible, "anon", is_exec); } else { result = memory->MapFile(&addr_out, std::bit_cast(addr), len, mem_prot, mem_flags, fd, phys_addr); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index e7ecf8d80..3d9bf58a7 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -631,6 +631,9 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea vma_base, u64 size if (True(prot & MemoryProt::CpuReadWrite)) { perms |= Core::MemoryPermission::ReadWrite; } + if (True(prot & MemoryProt::CpuExec)) { + perms |= Core::MemoryPermission::Execute; + } if (True(prot & MemoryProt::GpuRead)) { perms |= Core::MemoryPermission::Read; } @@ -650,9 +653,9 @@ s32 MemoryManager::Protect(VAddr addr, u64 size, MemoryProt prot) { std::scoped_lock lk{mutex}; // Validate protection flags - constexpr static MemoryProt valid_flags = MemoryProt::NoAccess | MemoryProt::CpuRead | - MemoryProt::CpuReadWrite | MemoryProt::GpuRead | - MemoryProt::GpuWrite | MemoryProt::GpuReadWrite; + constexpr static MemoryProt valid_flags = + MemoryProt::NoAccess | MemoryProt::CpuRead | MemoryProt::CpuReadWrite | + MemoryProt::CpuExec | MemoryProt::GpuRead | MemoryProt::GpuWrite | MemoryProt::GpuReadWrite; MemoryProt invalid_flags = prot & ~valid_flags; if (invalid_flags != MemoryProt::NoAccess) { diff --git a/src/core/memory.h b/src/core/memory.h index c800ef763..285d7dbed 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -31,6 +31,7 @@ enum class MemoryProt : u32 { NoAccess = 0, CpuRead = 1, CpuReadWrite = 2, + CpuExec = 4, GpuRead = 16, GpuWrite = 32, GpuReadWrite = 48, From 146e81a56a2e83420521be270a6dafec5e6d3b8d Mon Sep 17 00:00:00 2001 From: Paris Oplopoios Date: Mon, 7 Jul 2025 12:44:06 +0300 Subject: [PATCH 3/7] Fix V_ADDC_U32 carry-out edge cases (#3200) * Fix V_ADDC_U32 carry-out edge cases * Use IAddCarry instead --- .../frontend/translate/vector_alu.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 54f1088f2..5a80855d3 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -623,12 +623,15 @@ void Translator::V_ADDC_U32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 carry{GetCarryIn(inst)}; - const IR::U32 result{ir.IAdd(ir.IAdd(src0, src1), carry)}; - SetDst(inst.dst[0], result); + const IR::Value tmp1{ir.IAddCary(src0, src1)}; + const IR::U32 result1{ir.CompositeExtract(tmp1, 0)}; + const IR::U32 carry_out1{ir.CompositeExtract(tmp1, 1)}; + const IR::Value tmp2{ir.IAddCary(result1, carry)}; + const IR::U32 result2{ir.CompositeExtract(tmp2, 0)}; + const IR::U32 carry_out2{ir.CompositeExtract(tmp2, 1)}; + SetDst(inst.dst[0], result2); - const IR::U1 less_src0{ir.ILessThan(result, src0, false)}; - const IR::U1 less_src1{ir.ILessThan(result, src1, false)}; - const IR::U1 did_overflow{ir.LogicalOr(less_src0, less_src1)}; + const IR::U1 did_overflow{ir.INotEqual(ir.BitwiseOr(carry_out1, carry_out2), ir.Imm32(0))}; SetCarryOut(inst, did_overflow); } From 70eef0de903aa3e8fe0458c83c03c4cf846e8581 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Mon, 7 Jul 2025 03:03:19 -0700 Subject: [PATCH 4/7] texture_cache: Change depth resolve new image back to max of resources. (#3205) --- src/video_core/texture_cache/texture_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index a50601af6..aa6563a84 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -169,7 +169,7 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Bindi if (recreate) { auto new_info = requested_info; - new_info.resources = std::min(requested_info.resources, cache_image.info.resources); + new_info.resources = std::max(requested_info.resources, cache_image.info.resources); const auto new_image_id = slot_images.insert(instance, scheduler, new_info); RegisterImage(new_image_id); From 4eaa992affc1811a99c8ac8a629ee7b2d3b785c0 Mon Sep 17 00:00:00 2001 From: Paris Oplopoios Date: Mon, 7 Jul 2025 13:29:11 +0300 Subject: [PATCH 5/7] Rename 'AddCary' to 'AddCarry' (#3206) --- src/shader_recompiler/backend/spirv/emit_spirv_instructions.h | 2 +- src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp | 2 +- src/shader_recompiler/frontend/translate/vector_alu.cpp | 4 ++-- src/shader_recompiler/ir/ir_emitter.cpp | 4 ++-- src/shader_recompiler/ir/ir_emitter.h | 2 +- src/shader_recompiler/ir/opcodes.inc | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 1ac2266bd..6e146c5f6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -353,7 +353,7 @@ Id EmitFPIsInf32(EmitContext& ctx, Id value); Id EmitFPIsInf64(EmitContext& ctx, Id value); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitIAdd64(EmitContext& ctx, Id a, Id b); -Id EmitIAddCary32(EmitContext& ctx, Id a, Id b); +Id EmitIAddCarry32(EmitContext& ctx, Id a, Id b); Id EmitISub32(EmitContext& ctx, Id a, Id b); Id EmitISub64(EmitContext& ctx, Id a, Id b); Id EmitSMulHi(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index ddc1e7574..01652c1cf 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -60,7 +60,7 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b) { return ctx.OpIAdd(ctx.U64, a, b); } -Id EmitIAddCary32(EmitContext& ctx, Id a, Id b) { +Id EmitIAddCarry32(EmitContext& ctx, Id a, Id b) { return ctx.OpIAddCarry(ctx.full_result_u32x2, a, b); } diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 5a80855d3..74c7ec601 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -623,10 +623,10 @@ void Translator::V_ADDC_U32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 carry{GetCarryIn(inst)}; - const IR::Value tmp1{ir.IAddCary(src0, src1)}; + const IR::Value tmp1{ir.IAddCarry(src0, src1)}; const IR::U32 result1{ir.CompositeExtract(tmp1, 0)}; const IR::U32 carry_out1{ir.CompositeExtract(tmp1, 1)}; - const IR::Value tmp2{ir.IAddCary(result1, carry)}; + const IR::Value tmp2{ir.IAddCarry(result1, carry)}; const IR::U32 result2{ir.CompositeExtract(tmp2, 0)}; const IR::U32 carry_out2{ir.CompositeExtract(tmp2, 1)}; SetDst(inst.dst[0], result2); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 3d64cc5da..2334777ed 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1424,13 +1424,13 @@ U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { } } -Value IREmitter::IAddCary(const U32& a, const U32& b) { +Value IREmitter::IAddCarry(const U32& a, const U32& b) { if (a.Type() != b.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { case Type::U32: - return Inst(Opcode::IAddCary32, a, b); + return Inst(Opcode::IAddCarry32, a, b); default: ThrowInvalidType(a.Type()); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 119e3752e..1c5a8f545 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -254,7 +254,7 @@ public: [[nodiscard]] F32F64 FPMedTri(const F32F64& a, const F32F64& b, const F32F64& c); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); - [[nodiscard]] Value IAddCary(const U32& a, const U32& b); + [[nodiscard]] Value IAddCarry(const U32& a, const U32& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMulHi(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] U32U64 IMul(const U32U64& a, const U32U64& b); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 008f44659..680159132 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -328,7 +328,7 @@ OPCODE(FPCmpClass32, U1, F32, // Integer operations OPCODE(IAdd32, U32, U32, U32, ) OPCODE(IAdd64, U64, U64, U64, ) -OPCODE(IAddCary32, U32x2, U32, U32, ) +OPCODE(IAddCarry32, U32x2, U32, U32, ) OPCODE(ISub32, U32, U32, U32, ) OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) From d6163a6edbdbd6cc96ada7eb523e56a8aab03bc8 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Mon, 7 Jul 2025 13:37:08 +0300 Subject: [PATCH 6/7] uber fix --- src/shader_recompiler/ir/opcodes.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 680159132..553e63f3e 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -328,7 +328,7 @@ OPCODE(FPCmpClass32, U1, F32, // Integer operations OPCODE(IAdd32, U32, U32, U32, ) OPCODE(IAdd64, U64, U64, U64, ) -OPCODE(IAddCarry32, U32x2, U32, U32, ) +OPCODE(IAddCarry32, U32x2, U32, U32, ) OPCODE(ISub32, U32, U32, U32, ) OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) From 7fedbd52e0629c037be631068f609e77a2b27615 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Mon, 7 Jul 2025 16:23:20 +0300 Subject: [PATCH 7/7] texture_cache: Async download of GPU modified linear images (#3204) * texture_cache: Async download of GPU modified linear images * liverpool: Back to less submits * texture_cache: Don't download depth images * config: Add option for linear image readback --- src/common/config.cpp | 11 +++- src/common/config.h | 1 + src/video_core/amdgpu/liverpool.cpp | 3 +- src/video_core/buffer_cache/buffer_cache.h | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 16 +++-- .../renderer_vulkan/vk_rasterizer.h | 2 +- .../renderer_vulkan/vk_scheduler.cpp | 13 ++-- src/video_core/renderer_vulkan/vk_scheduler.h | 3 + .../texture_cache/texture_cache.cpp | 61 ++++++++++++++++++- src/video_core/texture_cache/texture_cache.h | 10 ++- 10 files changed, 106 insertions(+), 16 deletions(-) diff --git a/src/common/config.cpp b/src/common/config.cpp index d3a5fa6a1..010fecf95 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -65,6 +65,7 @@ static u32 screenHeight = 720; static bool isNullGpu = false; static bool shouldCopyGPUBuffers = false; static bool readbacksEnabled = false; +static bool readbackLinearImagesEnabled = false; static bool directMemoryAccessEnabled = false; static bool shouldDumpShaders = false; static bool shouldPatchShaders = false; @@ -103,7 +104,7 @@ u32 m_language = 1; // english static std::string trophyKey = ""; // Expected number of items in the config file -static constexpr u64 total_entries = 51; +static constexpr u64 total_entries = 52; bool allowHDR() { return isHDRAllowed; @@ -262,6 +263,10 @@ bool readbacks() { return readbacksEnabled; } +bool readbackLinearImages() { + return readbackLinearImagesEnabled; +} + bool directMemoryAccess() { return directMemoryAccessEnabled; } @@ -631,6 +636,8 @@ void load(const std::filesystem::path& path) { isNullGpu = toml::find_or(gpu, "nullGpu", isNullGpu); shouldCopyGPUBuffers = toml::find_or(gpu, "copyGPUBuffers", shouldCopyGPUBuffers); readbacksEnabled = toml::find_or(gpu, "readbacks", readbacksEnabled); + readbackLinearImagesEnabled = + toml::find_or(gpu, "readbackLinearImages", readbackLinearImagesEnabled); directMemoryAccessEnabled = toml::find_or(gpu, "directMemoryAccess", directMemoryAccessEnabled); shouldDumpShaders = toml::find_or(gpu, "dumpShaders", shouldDumpShaders); @@ -802,6 +809,7 @@ void save(const std::filesystem::path& path) { data["GPU"]["nullGpu"] = isNullGpu; data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers; data["GPU"]["readbacks"] = readbacksEnabled; + data["GPU"]["readbackLinearImages"] = readbackLinearImagesEnabled; data["GPU"]["directMemoryAccess"] = directMemoryAccessEnabled; data["GPU"]["dumpShaders"] = shouldDumpShaders; data["GPU"]["patchShaders"] = shouldPatchShaders; @@ -902,6 +910,7 @@ void setDefaultValues() { isNullGpu = false; shouldCopyGPUBuffers = false; readbacksEnabled = false; + readbackLinearImagesEnabled = false; directMemoryAccessEnabled = false; shouldDumpShaders = false; shouldPatchShaders = false; diff --git a/src/common/config.h b/src/common/config.h index 931fa68e2..2ed08198a 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -47,6 +47,7 @@ bool copyGPUCmdBuffers(); void setCopyGPUCmdBuffers(bool enable); bool readbacks(); void setReadbacks(bool enable); +bool readbackLinearImages(); bool directMemoryAccess(); void setDirectMemoryAccess(bool enable); bool dumpShaders(); diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 9b8c28b66..e264de74a 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -135,9 +135,8 @@ void Liverpool::Process(std::stop_token stoken) { if (submit_done) { VideoCore::EndCapture(); - if (rasterizer) { - rasterizer->ProcessFaults(); + rasterizer->EndCommandList(); rasterizer->Flush(); } submit_done = false; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 900a27aee..354d01431 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -112,7 +112,7 @@ public: /// Invalidates any buffer in the logical page range. void InvalidateMemory(VAddr device_addr, u64 size); - /// Waits on pending downloads in the logical page range. + /// Flushes any GPU modified buffer in the logical page range back to CPU memory. void ReadMemory(VAddr device_addr, u64 size, bool is_write = false); /// Binds host vertex buffers for the current draw. diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e4e026485..cca193831 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -272,6 +272,8 @@ void Rasterizer::EliminateFastClear() { void Rasterizer::Draw(bool is_indexed, u32 index_offset) { RENDERER_TRACE; + scheduler.PopPendingOperations(); + if (!FilterDraw()) { return; } @@ -317,6 +319,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 u32 max_count, VAddr count_address) { RENDERER_TRACE; + scheduler.PopPendingOperations(); + if (!FilterDraw()) { return; } @@ -380,6 +384,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 void Rasterizer::DispatchDirect() { RENDERER_TRACE; + scheduler.PopPendingOperations(); + const auto& cs_program = liverpool->GetCsRegs(); const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); if (!pipeline) { @@ -407,6 +413,8 @@ void Rasterizer::DispatchDirect() { void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { RENDERER_TRACE; + scheduler.PopPendingOperations(); + const auto& cs_program = liverpool->GetCsRegs(); const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); if (!pipeline) { @@ -439,11 +447,12 @@ void Rasterizer::Finish() { scheduler.Finish(); } -void Rasterizer::ProcessFaults() { +void Rasterizer::EndCommandList() { if (fault_process_pending) { fault_process_pending = false; buffer_cache.ProcessFaultBuffer(); } + texture_cache.ProcessDownloadImages(); } bool Rasterizer::BindResources(const Pipeline* pipeline) { @@ -649,8 +658,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin if (instance.IsNullDescriptorSupported()) { image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); } else { - auto& null_image_view = - texture_cache.FindTexture(VideoCore::NULL_IMAGE_ID, desc.view_info); + auto& null_image_view = texture_cache.FindTexture(VideoCore::NULL_IMAGE_ID, desc); image_infos.emplace_back(VK_NULL_HANDLE, *null_image_view.image_view, vk::ImageLayout::eGeneral); } @@ -664,7 +672,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin bound_images.emplace_back(image_id); auto& image = texture_cache.GetImage(image_id); - auto& image_view = texture_cache.FindTexture(image_id, desc.view_info); + auto& image_view = texture_cache.FindTexture(image_id, desc); if (image.binding.force_general || image.binding.is_target) { image.Transit(vk::ImageLayout::eGeneral, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 4a978746c..1e1680258 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -68,7 +68,7 @@ public: void CpSync(); u64 Flush(); void Finish(); - void ProcessFaults(); + void EndCommandList(); PipelineCache& GetPipelineCache() { return pipeline_cache; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index e75a69924..4c4e17fe4 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -101,6 +101,14 @@ void Scheduler::Wait(u64 tick) { } } +void Scheduler::PopPendingOperations() { + master_semaphore.Refresh(); + while (!pending_ops.empty() && master_semaphore.IsFree(pending_ops.front().gpu_tick)) { + pending_ops.front().callback(); + pending_ops.pop(); + } +} + void Scheduler::AllocateWorkerCommandBuffers() { const vk::CommandBufferBeginInfo begin_info = { .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, @@ -175,10 +183,7 @@ void Scheduler::SubmitExecution(SubmitInfo& info) { AllocateWorkerCommandBuffers(); // Apply pending operations - while (!pending_ops.empty() && IsFree(pending_ops.front().gpu_tick)) { - pending_ops.front().callback(); - pending_ops.pop(); - } + PopPendingOperations(); } void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf) { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 8ddf00f6a..36fd9c055 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -317,6 +317,9 @@ public: /// Waits for the given tick to trigger on the GPU. void Wait(u64 tick); + /// Attempts to execute operations whose tick the GPU has caught up with. + void PopPendingOperations(); + /// Starts a new rendering scope with provided state. void BeginRendering(const RenderState& new_state); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index aa6563a84..723b95892 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -5,7 +5,9 @@ #include #include "common/assert.h" +#include "common/config.h" #include "common/debug.h" +#include "core/memory.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/page_manager.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -58,6 +60,50 @@ ImageId TextureCache::GetNullImage(const vk::Format format) { return null_id; } +void TextureCache::ProcessDownloadImages() { + for (const ImageId image_id : download_images) { + DownloadImageMemory(image_id); + } + download_images.clear(); +} + +void TextureCache::DownloadImageMemory(ImageId image_id) { + Image& image = slot_images[image_id]; + if (False(image.flags & ImageFlagBits::GpuModified)) { + return; + } + auto& download_buffer = buffer_cache.GetUtilityBuffer(MemoryUsage::Download); + const u32 download_size = image.info.pitch * image.info.size.height * + image.info.resources.layers * (image.info.num_bits / 8); + ASSERT(download_size <= image.info.guest_size); + const auto [download, offset] = download_buffer.Map(download_size); + download_buffer.Commit(); + const vk::BufferImageCopy image_download = { + .bufferOffset = offset, + .bufferRowLength = image.info.pitch, + .bufferImageHeight = image.info.size.height, + .imageSubresource = + { + .aspectMask = image.info.IsDepthStencil() ? vk::ImageAspectFlagBits::eDepth + : vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = image.info.resources.layers, + }, + .imageOffset = {0, 0, 0}, + .imageExtent = {image.info.size.width, image.info.size.height, 1}, + }; + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); + cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, + download_buffer.Handle(), image_download); + scheduler.DeferOperation([device_addr = image.info.guest_address, download, download_size] { + auto* memory = Core::Memory::Instance(); + memory->TryWriteBacking(std::bit_cast(device_addr), download, download_size); + }); +} + void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) { if (image.hash == 0) { // Initialize hash @@ -437,16 +483,27 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo return slot_image_views[view_id]; } -ImageView& TextureCache::FindTexture(ImageId image_id, const ImageViewInfo& view_info) { +ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) { Image& image = slot_images[image_id]; + if (desc.type == BindingType::Storage) { + image.flags |= ImageFlagBits::GpuModified; + if (Config::readbackLinearImages() && + image.info.tiling_mode == AmdGpu::TilingMode::Display_Linear) { + download_images.emplace(image_id); + } + } UpdateImage(image_id); - return RegisterImageView(image_id, view_info); + return RegisterImageView(image_id, desc.view_info); } ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) { const ImageId image_id = FindImage(desc); Image& image = slot_images[image_id]; image.flags |= ImageFlagBits::GpuModified; + if (Config::readbackLinearImages() && + image.info.tiling_mode == AmdGpu::TilingMode::Display_Linear) { + download_images.emplace(image_id); + } image.usage.render_target = 1u; UpdateImage(image_id); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 87228b84f..ff8ffb61c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include @@ -105,11 +106,14 @@ public: /// Evicts any images that overlap the unmapped range. void UnmapMemory(VAddr cpu_addr, size_t size); + /// Schedules a copy of pending images for download back to CPU memory. + void ProcessDownloadImages(); + /// Retrieves the image handle of the image with the provided attributes. [[nodiscard]] ImageId FindImage(BaseDesc& desc, FindFlags flags = {}); /// Retrieves an image view with the properties of the specified image id. - [[nodiscard]] ImageView& FindTexture(ImageId image_id, const ImageViewInfo& view_info); + [[nodiscard]] ImageView& FindTexture(ImageId image_id, const BaseDesc& desc); /// Retrieves the render target with specified properties [[nodiscard]] ImageView& FindRenderTarget(BaseDesc& desc); @@ -252,6 +256,9 @@ private: /// Gets or creates a null image for a particular format. ImageId GetNullImage(vk::Format format); + /// Copies image memory back to CPU. + void DownloadImageMemory(ImageId image_id); + /// Create an image from the given parameters [[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr); @@ -293,6 +300,7 @@ private: Common::SlotVector slot_image_views; tsl::robin_map samplers; tsl::robin_map null_images; + std::unordered_set download_images; PageTable page_table; std::mutex mutex;