diff --git a/CMakeLists.txt b/CMakeLists.txt index 12ff0b53a..09fddb3d7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -952,6 +952,10 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/renderer_vulkan/host_passes/fsr_pass.h src/video_core/renderer_vulkan/host_passes/pp_pass.cpp src/video_core/renderer_vulkan/host_passes/pp_pass.h + src/video_core/texture_cache/blit_helper.cpp + src/video_core/texture_cache/blit_helper.h + src/video_core/texture_cache/host_compatibility.cpp + src/video_core/texture_cache/host_compatibility.h src/video_core/texture_cache/image.cpp src/video_core/texture_cache/image.h src/video_core/texture_cache/image_info.cpp @@ -965,8 +969,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/texture_cache/tile_manager.cpp src/video_core/texture_cache/tile_manager.h src/video_core/texture_cache/types.h - src/video_core/texture_cache/host_compatibility.cpp - src/video_core/texture_cache/host_compatibility.h src/video_core/page_manager.cpp src/video_core/page_manager.h src/video_core/multi_level_page_table.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 47290e7e8..79f47a6a0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -19,8 +19,7 @@ Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, const Id shift_id{ctx.ConstU32(2U)}; const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)}; const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)}; - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)}; + const Id pointer{ctx.EmitSharedMemoryAccess(ctx.shared_u32, ctx.shared_memory_u32, index)}; const auto [scope, semantics]{AtomicArgs(ctx)}; return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] { return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); @@ -32,8 +31,7 @@ Id SharedAtomicU32IncDec(EmitContext& ctx, Id offset, const Id shift_id{ctx.ConstU32(2U)}; const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)}; const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)}; - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)}; + const Id pointer{ctx.EmitSharedMemoryAccess(ctx.shared_u32, ctx.shared_memory_u32, index)}; const auto [scope, semantics]{AtomicArgs(ctx)}; return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] { return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics); @@ -45,8 +43,7 @@ Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value, const Id shift_id{ctx.ConstU32(3U)}; const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)}; const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)}; - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; + const Id pointer{ctx.EmitSharedMemoryAccess(ctx.shared_u64, ctx.shared_memory_u64, index)}; const auto [scope, semantics]{AtomicArgs(ctx)}; return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] { return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index c59406499..731ccd55a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -14,8 +14,7 @@ Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)}; return AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] { - const Id pointer = - ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index); + const Id pointer = ctx.EmitSharedMemoryAccess(ctx.shared_u16, ctx.shared_memory_u16, index); return ctx.OpLoad(ctx.U16, pointer); }); } @@ -26,8 +25,7 @@ Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)}; return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] { - const Id pointer = - ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index); + const Id pointer = ctx.EmitSharedMemoryAccess(ctx.shared_u32, ctx.shared_memory_u32, index); return ctx.OpLoad(ctx.U32[1], pointer); }); } @@ -38,8 +36,7 @@ Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)}; return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] { - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; + const Id pointer = ctx.EmitSharedMemoryAccess(ctx.shared_u64, ctx.shared_memory_u64, index); return ctx.OpLoad(ctx.U64, pointer); }); } @@ -50,8 +47,7 @@ void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)}; AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] { - const Id pointer = - ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index); + const Id pointer = ctx.EmitSharedMemoryAccess(ctx.shared_u16, ctx.shared_memory_u16, index); ctx.OpStore(pointer, value); return Id{0}; }); @@ -63,8 +59,7 @@ void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)}; AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] { - const Id pointer = - ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index); + const Id pointer = ctx.EmitSharedMemoryAccess(ctx.shared_u32, ctx.shared_memory_u32, index); ctx.OpStore(pointer, value); return Id{0}; }); @@ -76,8 +71,7 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)}; AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] { - const Id pointer{ - ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; + const Id pointer = ctx.EmitSharedMemoryAccess(ctx.shared_u64, ctx.shared_memory_u64, index); ctx.OpStore(pointer, value); return Id{0}; }); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 0a8f78f72..567c059ae 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -972,7 +972,12 @@ void EmitContext::DefineImagesAndSamplers() { const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding.unified++); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("{}_{}{}", stage, "samp", samp_desc.sharp_idx)); + auto sharp_desc = std::holds_alternative(samp_desc.sampler) + ? fmt::format("sgpr:{}", std::get(samp_desc.sampler)) + : fmt::format("inline:{:#x}:{:#x}", + std::get(samp_desc.sampler).raw0, + std::get(samp_desc.sampler).raw1); + Name(id, fmt::format("{}_{}{}", stage, "samp", sharp_desc)); samplers.push_back(id); interfaces.push_back(id); } @@ -995,19 +1000,26 @@ void EmitContext::DefineSharedMemory() { const u32 num_elements{Common::DivCeil(shared_memory_size, element_size)}; const Id array_type{TypeArray(element_type, ConstU32(num_elements))}; - Decorate(array_type, spv::Decoration::ArrayStride, element_size); - const Id struct_type{TypeStruct(array_type)}; - MemberDecorate(struct_type, 0u, spv::Decoration::Offset, 0u); + const auto mem_type = [&] { + if (num_types > 1) { + const Id struct_type{TypeStruct(array_type)}; + Decorate(struct_type, spv::Decoration::Block); + MemberDecorate(struct_type, 0u, spv::Decoration::Offset, 0u); + return struct_type; + } else { + return array_type; + } + }(); - const Id pointer = TypePointer(spv::StorageClass::Workgroup, struct_type); + const Id pointer = TypePointer(spv::StorageClass::Workgroup, mem_type); const Id element_pointer = TypePointer(spv::StorageClass::Workgroup, element_type); const Id variable = AddGlobalVariable(pointer, spv::StorageClass::Workgroup); Name(variable, name); interfaces.push_back(variable); if (num_types > 1) { - Decorate(struct_type, spv::Decoration::Block); + Decorate(array_type, spv::Decoration::ArrayStride, element_size); Decorate(variable, spv::Decoration::Aliased); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 93c4ed265..1eb7d05c6 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -203,6 +203,14 @@ public: return final_result; } + Id EmitSharedMemoryAccess(const Id result_type, const Id shared_mem, const Id index) { + if (std::popcount(static_cast(info.shared_types)) > 1) { + return OpAccessChain(result_type, shared_mem, u32_zero_value, index); + } + // Workgroup layout struct omitted. + return OpAccessChain(result_type, shared_mem, index); + } + Info& info; const RuntimeInfo& runtime_info; const Profile& profile; diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 54e8b8ee8..3451358b6 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -531,8 +531,10 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal // Load first dword of T# and S#. We will use them as the handle that will guide resource // tracking pass where to read the sharps. This will later also get patched to the SPIRV texture // binding index. - const IR::Value handle = - ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg)); + const IR::Value handle = ir.GetScalarReg(tsharp_reg); + const IR::Value inline_sampler = + ir.CompositeConstruct(ir.GetScalarReg(sampler_reg), ir.GetScalarReg(sampler_reg + 1), + ir.GetScalarReg(sampler_reg + 2), ir.GetScalarReg(sampler_reg + 3)); // Determine how many address registers need to be passed. // The image type is unknown, so add all 4 possible base registers and resolve later. @@ -568,7 +570,8 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal const IR::Value address4 = get_addr_reg(12); // Issue the placeholder IR instruction. - IR::Value texel = ir.ImageSampleRaw(handle, address1, address2, address3, address4, info); + IR::Value texel = + ir.ImageSampleRaw(handle, address1, address2, address3, address4, inline_sampler, info); if (info.is_depth && !gather) { // For non-gather depth sampling, only return a single value. texel = ir.CompositeExtract(texel, 0); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index f25111350..f9b932c1d 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -3,6 +3,7 @@ #pragma once #include +#include #include #include #include @@ -91,11 +92,15 @@ struct ImageResource { using ImageResourceList = boost::container::small_vector; struct SamplerResource { - u32 sharp_idx; - AmdGpu::Sampler inline_sampler{}; + std::variant sampler; u32 associated_image : 4; u32 disable_aniso : 1; + SamplerResource(u32 sharp_idx, u32 associated_image_, bool disable_aniso_) + : sampler{sharp_idx}, associated_image{associated_image_}, disable_aniso{disable_aniso_} {} + SamplerResource(AmdGpu::Sampler sampler_) + : sampler{sampler_}, associated_image{0}, disable_aniso(0) {} + constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept; }; using SamplerResourceList = boost::container::small_vector; @@ -318,7 +323,9 @@ constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept } constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept { - return inline_sampler ? inline_sampler : info.ReadUdSharp(sharp_idx); + return std::holds_alternative(sampler) + ? std::get(sampler) + : info.ReadUdSharp(std::get(sampler)); } constexpr AmdGpu::Image FMaskResource::GetSharp(const Info& info) const noexcept { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 3d7cf71dc..82712c441 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1964,9 +1964,9 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c Value IREmitter::ImageSampleRaw(const Value& handle, const Value& address1, const Value& address2, const Value& address3, const Value& address4, - TextureInstInfo info) { - return Inst(Opcode::ImageSampleRaw, Flags{info}, handle, address1, address2, address3, - address4); + const Value& inline_sampler, TextureInstInfo info) { + return Inst(Opcode::ImageSampleRaw, Flags{info}, handle, address1, address2, address3, address4, + inline_sampler); } Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 215a35ee9..982c2dee4 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -349,7 +349,8 @@ public: [[nodiscard]] Value ImageSampleRaw(const Value& handle, const Value& address1, const Value& address2, const Value& address3, - const Value& address4, TextureInstInfo info); + const Value& address4, const Value& inline_sampler, + TextureInstInfo info); [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body, const F32& bias, const Value& offset, diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 1621d2acf..0380cb0e6 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -412,7 +412,7 @@ OPCODE(ConvertU8U32, U8, U32, OPCODE(ConvertU32U8, U32, U8, ) // Image operations -OPCODE(ImageSampleRaw, F32x4, Opaque, F32x4, F32x4, F32x4, F32, ) +OPCODE(ImageSampleRaw, F32x4, Opaque, F32x4, F32x4, F32x4, F32, Opaque, ) OPCODE(ImageSampleImplicitLod, F32x4, Opaque, F32x4, F32, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, F32, Opaque, ) OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index ba96d1034..a209f7126 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -168,7 +168,7 @@ public: u32 Add(const SamplerResource& desc) { const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) { - return desc.sharp_idx == existing.sharp_idx; + return desc.sampler == existing.sampler; })}; return index; } @@ -351,8 +351,7 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { const auto pred = [](const IR::Inst* inst) -> std::optional { const auto opcode = inst->GetOpcode(); - if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler) - opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only) + if (opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only) opcode == IR::Opcode::GetUserData) { return inst; } @@ -360,9 +359,7 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& }; const auto result = IR::BreadthFirstSearch(&inst, pred); ASSERT_MSG(result, "Unable to find image sharp source"); - const IR::Inst* producer = result.value(); - const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2; - const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer; + const IR::Inst* tsharp_handle = result.value(); // Read image sharp. const auto tsharp = TrackSharp(tsharp_handle, info); @@ -427,29 +424,32 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) { // Read sampler sharp. - const auto [sampler_binding, sampler] = [&] -> std::pair { - ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2); - const IR::Value& handle = producer->Arg(1); + const auto sampler_binding = [&] -> u32 { + const auto sampler = inst.Arg(5).InstRecursive(); + ASSERT(sampler && sampler->GetOpcode() == IR::Opcode::CompositeConstructU32x4); + const auto handle = sampler->Arg(0); // Inline sampler resource. if (handle.IsImmediate()) { - LOG_WARNING(Render_Vulkan, "Inline sampler detected"); - const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}; - const auto binding = descriptors.Add(SamplerResource{ - .sharp_idx = std::numeric_limits::max(), - .inline_sampler = inline_sampler, - }); - return {binding, inline_sampler}; + LOG_DEBUG(Render_Vulkan, "Inline sampler detected"); + const auto [s1, s2, s3, s4] = + std::tuple{sampler->Arg(0), sampler->Arg(1), sampler->Arg(2), sampler->Arg(3)}; + ASSERT(s1.IsImmediate() && s2.IsImmediate() && s3.IsImmediate() && + s4.IsImmediate()); + const auto inline_sampler = AmdGpu::Sampler{ + .raw0 = u64(s2.U32()) << 32 | u64(s1.U32()), + .raw1 = u64(s4.U32()) << 32 | u64(s3.U32()), + }; + const auto binding = descriptors.Add(SamplerResource{inline_sampler}); + return binding; + } else { + // Normal sampler resource. + const auto ssharp_handle = handle.InstRecursive(); + const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); + const auto ssharp = TrackSharp(ssharp_ud, info); + const auto binding = + descriptors.Add(SamplerResource{ssharp, image_binding, disable_aniso}); + return binding; } - // Normal sampler resource. - const auto ssharp_handle = handle.InstRecursive(); - const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); - const auto ssharp = TrackSharp(ssharp_ud, info); - const auto binding = descriptors.Add(SamplerResource{ - .sharp_idx = ssharp, - .associated_image = image_binding, - .disable_aniso = disable_aniso, - }); - return {binding, info.ReadUdSharp(ssharp)}; }(); // Patch image and sampler handle. inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16)); diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index d52afe738..e88147eb5 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -11,6 +11,7 @@ set(SHADER_FILES detilers/micro_32bpp.comp detilers/micro_64bpp.comp detilers/micro_8bpp.comp + color_to_ms_depth.frag fault_buffer_process.comp fs_tri.vert fsr.comp diff --git a/src/video_core/host_shaders/color_to_ms_depth.frag b/src/video_core/host_shaders/color_to_ms_depth.frag new file mode 100644 index 000000000..e477fc942 --- /dev/null +++ b/src/video_core/host_shaders/color_to_ms_depth.frag @@ -0,0 +1,15 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 core +#extension GL_EXT_samplerless_texture_functions : require + +layout (binding = 0, set = 0) uniform texture2D color; + +layout (location = 0) in vec2 uv; + +void main() +{ + ivec2 coord = ivec2(uv * vec2(textureSize(color, 0).xy)); + gl_FragDepth = texelFetch(color, coord, 0)[gl_SampleID]; +} diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 63c0a38d6..fb489ec78 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -355,6 +355,7 @@ bool Instance::CreateDevice() { .independentBlend = features.independentBlend, .geometryShader = features.geometryShader, .tessellationShader = features.tessellationShader, + .sampleRateShading = features.sampleRateShading, .dualSrcBlend = features.dualSrcBlend, .logicOp = features.logicOp, .multiDrawIndirect = features.multiDrawIndirect, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index c30fc6e0e..8ddf00f6a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -328,6 +328,7 @@ public: return render_state; } + /// Returns the current pipeline dynamic state tracking. DynamicState& GetDynamicState() { return dynamic_state; } diff --git a/src/video_core/texture_cache/blit_helper.cpp b/src/video_core/texture_cache/blit_helper.cpp new file mode 100644 index 000000000..1ad41be00 --- /dev/null +++ b/src/video_core/texture_cache/blit_helper.cpp @@ -0,0 +1,256 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/texture_cache/blit_helper.h" +#include "video_core/texture_cache/image.h" + +#include "video_core/host_shaders/color_to_ms_depth_frag.h" +#include "video_core/host_shaders/fs_tri_vert.h" + +namespace VideoCore { + +static vk::SampleCountFlagBits ToSampleCount(u32 num_samples) { + switch (num_samples) { + case 1: + return vk::SampleCountFlagBits::e1; + case 2: + return vk::SampleCountFlagBits::e2; + case 4: + return vk::SampleCountFlagBits::e4; + case 8: + return vk::SampleCountFlagBits::e8; + case 16: + return vk::SampleCountFlagBits::e16; + default: + UNREACHABLE_MSG("Unknown samples count = {}", num_samples); + } +} + +BlitHelper::BlitHelper(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_) + : instance{instance_}, scheduler{scheduler_} { + CreateShaders(); + CreatePipelineLayouts(); +} + +BlitHelper::~BlitHelper() = default; + +void BlitHelper::BlitColorToMsDepth(Image& source, Image& dest) { + source.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {}); + dest.Transit(vk::ImageLayout::eDepthAttachmentOptimal, + vk::AccessFlagBits2::eDepthStencilAttachmentWrite, {}); + + const vk::ImageViewUsageCreateInfo color_usage_ci{.usage = vk::ImageUsageFlagBits::eSampled}; + const vk::ImageViewCreateInfo color_view_ci = { + .pNext = &color_usage_ci, + .image = source.image, + .viewType = vk::ImageViewType::e2D, + .format = source.info.pixel_format, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0U, + .levelCount = 1U, + .baseArrayLayer = 0U, + .layerCount = 1U, + }, + }; + const auto [color_view_result, color_view] = + instance.GetDevice().createImageView(color_view_ci); + ASSERT_MSG(color_view_result == vk::Result::eSuccess, "Failed to create image view: {}", + vk::to_string(color_view_result)); + const vk::ImageViewUsageCreateInfo depth_usage_ci{ + .usage = vk::ImageUsageFlagBits::eDepthStencilAttachment}; + const vk::ImageViewCreateInfo depth_view_ci = { + .pNext = &depth_usage_ci, + .image = dest.image, + .viewType = vk::ImageViewType::e2D, + .format = dest.info.pixel_format, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eDepth, + .baseMipLevel = 0U, + .levelCount = 1U, + .baseArrayLayer = 0U, + .layerCount = 1U, + }, + }; + const auto [depth_view_result, depth_view] = + instance.GetDevice().createImageView(depth_view_ci); + ASSERT_MSG(depth_view_result == vk::Result::eSuccess, "Failed to create image view: {}", + vk::to_string(depth_view_result)); + scheduler.DeferOperation([device = instance.GetDevice(), color_view, depth_view] { + device.destroyImageView(color_view); + device.destroyImageView(depth_view); + }); + + Vulkan::RenderState state{}; + state.has_depth = true; + state.width = dest.info.size.width; + state.height = dest.info.size.height; + state.depth_attachment = vk::RenderingAttachmentInfo{ + .imageView = depth_view, + .imageLayout = vk::ImageLayout::eDepthAttachmentOptimal, + .loadOp = vk::AttachmentLoadOp::eDontCare, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{.depthStencil = {.depth = 0.f}}, + }; + scheduler.BeginRendering(state); + + const auto cmdbuf = scheduler.CommandBuffer(); + const vk::DescriptorImageInfo image_info = { + .sampler = VK_NULL_HANDLE, + .imageView = color_view, + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal, + }; + const vk::WriteDescriptorSet texture_write = { + .dstSet = VK_NULL_HANDLE, + .dstBinding = 0U, + .dstArrayElement = 0U, + .descriptorCount = 1U, + .descriptorType = vk::DescriptorType::eSampledImage, + .pImageInfo = &image_info, + }; + cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *single_texture_pl_layout, 0U, + texture_write); + + const DepthPipelineKey key{dest.info.num_samples, dest.info.pixel_format}; + const vk::Pipeline depth_pipeline = GetDepthToMsPipeline(key); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, depth_pipeline); + + const vk::Viewport viewport = { + .x = 0, + .y = 0, + .width = float(state.width), + .height = float(state.height), + .minDepth = 0.f, + .maxDepth = 1.f, + }; + cmdbuf.setViewport(0, viewport); + + const vk::Rect2D scissor = { + .offset = {0, 0}, + .extent = {state.width, state.height}, + }; + cmdbuf.setScissor(0, scissor); + + cmdbuf.draw(3, 1, 0, 0); + + scheduler.GetDynamicState().Invalidate(); +} + +vk::Pipeline BlitHelper::GetDepthToMsPipeline(const DepthPipelineKey& key) { + auto it = std::ranges::find(color_to_ms_depth_pl, key, &DepthPipeline::first); + if (it != color_to_ms_depth_pl.end()) { + return *it->second; + } + CreateColorToMSDepthPipeline(key); + return *color_to_ms_depth_pl.back().second; +} + +void BlitHelper::CreateShaders() { + fs_tri_vertex = Vulkan::Compile(HostShaders::FS_TRI_VERT, vk::ShaderStageFlagBits::eVertex, + instance.GetDevice()); + color_to_ms_depth_frag = + Vulkan::Compile(HostShaders::COLOR_TO_MS_DEPTH_FRAG, vk::ShaderStageFlagBits::eFragment, + instance.GetDevice()); +} + +void BlitHelper::CreatePipelineLayouts() { + const vk::DescriptorSetLayoutBinding texture_binding = { + .binding = 0, + .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eFragment, + }; + const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = { + .flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR, + .bindingCount = 1U, + .pBindings = &texture_binding, + }; + auto [desc_layout_result, desc_layout] = + instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci); + single_texture_descriptor_set_layout = std::move(desc_layout); + const vk::DescriptorSetLayout set_layout = *single_texture_descriptor_set_layout; + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = 1U, + .pSetLayouts = &set_layout, + .pushConstantRangeCount = 0U, + .pPushConstantRanges = nullptr, + }; + auto [layout_result, pipeline_layout] = + instance.GetDevice().createPipelineLayoutUnique(layout_info); + ASSERT_MSG(layout_result == vk::Result::eSuccess, + "Failed to create graphics pipeline layout: {}", vk::to_string(layout_result)); + Vulkan::SetObjectName(instance.GetDevice(), *pipeline_layout, "Single texture pipeline layout"); + single_texture_pl_layout = std::move(pipeline_layout); +} + +void BlitHelper::CreateColorToMSDepthPipeline(const DepthPipelineKey& key) { + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { + .topology = vk::PrimitiveTopology::eTriangleList, + }; + const vk::PipelineMultisampleStateCreateInfo multisampling = { + .rasterizationSamples = ToSampleCount(key.num_samples), + }; + const vk::PipelineDepthStencilStateCreateInfo depth_state = { + .depthTestEnable = true, + .depthWriteEnable = true, + .depthCompareOp = vk::CompareOp::eAlways, + }; + const std::array dynamic_states = {vk::DynamicState::eViewportWithCount, + vk::DynamicState::eScissorWithCount}; + const vk::PipelineDynamicStateCreateInfo dynamic_info = { + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + + std::array shader_stages; + shader_stages[0] = { + .stage = vk::ShaderStageFlagBits::eVertex, + .module = fs_tri_vertex, + .pName = "main", + }; + shader_stages[1] = { + .stage = vk::ShaderStageFlagBits::eFragment, + .module = color_to_ms_depth_frag, + .pName = "main", + }; + + const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = { + .colorAttachmentCount = 0U, + .pColorAttachmentFormats = nullptr, + .depthAttachmentFormat = key.depth_format, + .stencilAttachmentFormat = vk::Format::eUndefined, + }; + + const vk::PipelineColorBlendStateCreateInfo color_blending{}; + const vk::PipelineViewportStateCreateInfo viewport_info{}; + const vk::PipelineVertexInputStateCreateInfo vertex_input_info{}; + const vk::PipelineRasterizationStateCreateInfo raster_state{.lineWidth = 1.f}; + + const vk::GraphicsPipelineCreateInfo pipeline_info = { + .pNext = &pipeline_rendering_ci, + .stageCount = static_cast(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_info, + .pRasterizationState = &raster_state, + .pMultisampleState = &multisampling, + .pDepthStencilState = &depth_state, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_info, + .layout = *single_texture_pl_layout, + }; + + auto [pipeline_result, pipeline] = + instance.GetDevice().createGraphicsPipelineUnique(VK_NULL_HANDLE, pipeline_info); + ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create graphics pipeline: {}", + vk::to_string(pipeline_result)); + Vulkan::SetObjectName(instance.GetDevice(), *pipeline, "Color to MS Depth {}", key.num_samples); + + color_to_ms_depth_pl.emplace_back(key, std::move(pipeline)); +} + +} // namespace VideoCore diff --git a/src/video_core/texture_cache/blit_helper.h b/src/video_core/texture_cache/blit_helper.h new file mode 100644 index 000000000..8c506bd0b --- /dev/null +++ b/src/video_core/texture_cache/blit_helper.h @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "common/types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { +class Instance; +class Scheduler; +} // namespace Vulkan + +namespace VideoCore { + +class Image; +class ImageView; + +class BlitHelper { + static constexpr size_t MaxMsPipelines = 6; + +public: + explicit BlitHelper(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler); + ~BlitHelper(); + + void BlitColorToMsDepth(Image& source, Image& dest); + +private: + void CreateShaders(); + void CreatePipelineLayouts(); + + struct DepthPipelineKey { + u32 num_samples; + vk::Format depth_format; + + auto operator<=>(const DepthPipelineKey&) const noexcept = default; + }; + vk::Pipeline GetDepthToMsPipeline(const DepthPipelineKey& key); + void CreateColorToMSDepthPipeline(const DepthPipelineKey& key); + +private: + const Vulkan::Instance& instance; + Vulkan::Scheduler& scheduler; + vk::UniqueDescriptorSetLayout single_texture_descriptor_set_layout; + vk::UniquePipelineLayout single_texture_pl_layout; + vk::ShaderModule fs_tri_vertex; + vk::ShaderModule color_to_ms_depth_frag; + + using DepthPipeline = std::pair; + std::vector color_to_ms_depth_pl{}; +}; + +} // namespace VideoCore diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 47718a095..dbd7f7cbb 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -47,6 +47,7 @@ struct ImageInfo { VAddr cmask_addr; VAddr fmask_addr; VAddr htile_addr; + u32 htile_clear_mask{u32(-1)}; } meta_info{}; struct { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index a47e858ab..a1ff5db8a 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -22,7 +22,7 @@ static constexpr u64 NumFramesBeforeRemoval = 32; TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, BufferCache& buffer_cache_, PageManager& tracker_) : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_}, - tile_manager{instance, scheduler} { + blit_helper{instance, scheduler}, tile_manager{instance, scheduler} { // Create basic null image at fixed image ID. const auto null_id = GetNullImage(vk::Format::eR8G8B8A8Unorm); ASSERT(null_id.index == NULL_IMAGE_ID.index); @@ -177,10 +177,20 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Bindi auto& new_image = slot_images[new_image_id]; new_image.usage = cache_image.usage; new_image.flags &= ~ImageFlagBits::Dirty; + // When creating a depth buffer through overlap resolution don't clear it on first use. + new_image.info.meta_info.htile_clear_mask = 0; - // Perform depth<->color copy using the intermediate copy buffer. - const auto& copy_buffer = buffer_cache.GetUtilityBuffer(MemoryUsage::DeviceLocal); - new_image.CopyImageWithBuffer(cache_image, copy_buffer.Handle(), 0); + if (cache_image.info.num_samples == 1 && new_info.num_samples == 1) { + // Perform depth<->color copy using the intermediate copy buffer. + const auto& copy_buffer = buffer_cache.GetUtilityBuffer(MemoryUsage::DeviceLocal); + new_image.CopyImageWithBuffer(cache_image, copy_buffer.Handle(), 0); + } else if (cache_image.info.num_samples == 1 && new_info.IsDepthStencil() && + new_info.num_samples > 1) { + // Perform a rendering pass to transfer the channels of source as samples in dest. + blit_helper.BlitColorToMsDepth(cache_image, new_image); + } else { + LOG_WARNING(Render_Vulkan, "Unimplemented depth overlap copy"); + } // Free the cache image. FreeImage(cache_image_id); @@ -202,7 +212,8 @@ std::tuple TextureCache::ResolveOverlap(const ImageInfo& imag if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address if (image_info.BlockDim() != tex_cache_image.info.BlockDim() || - image_info.num_bits != tex_cache_image.info.num_bits) { + image_info.num_bits * image_info.num_samples != + tex_cache_image.info.num_bits * tex_cache_image.info.num_samples) { // Very likely this kind of overlap is caused by allocation from a pool. if (safe_to_delete) { FreeImage(cache_image_id); @@ -470,8 +481,10 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) { // Register meta data for this depth buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { if (desc.info.meta_info.htile_addr) { - surface_metas.emplace(desc.info.meta_info.htile_addr, - MetaDataInfo{.type = MetaDataInfo::Type::HTile}); + surface_metas.emplace( + desc.info.meta_info.htile_addr, + MetaDataInfo{.type = MetaDataInfo::Type::HTile, + .clear_mask = image.info.meta_info.htile_clear_mask}); image.info.meta_info.htile_addr = desc.info.meta_info.htile_addr; image.flags |= ImageFlagBits::MetaRegistered; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ccfeb36b2..87228b84f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -9,6 +9,7 @@ #include "common/slot_vector.h" #include "video_core/amdgpu/resource.h" #include "video_core/multi_level_page_table.h" +#include "video_core/texture_cache/blit_helper.h" #include "video_core/texture_cache/image.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/sampler.h" @@ -286,6 +287,7 @@ private: Vulkan::Scheduler& scheduler; BufferCache& buffer_cache; PageManager& tracker; + BlitHelper blit_helper; TileManager tile_manager; Common::SlotVector slot_images; Common::SlotVector slot_image_views;