From 8caca4df32c05a11af8351590dcfa0fa5266eb11 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 14 Dec 2024 02:03:42 -0800 Subject: [PATCH] shader_recompiler: Support VK_AMD_shader_image_load_store_lod for IMAGE_STORE_MIP (#1770) * shader_recompiler: Support VK_AMD_shader_image_load_store_lod for IMAGE_STORE_MIP * emit_spirv: Fix missing extension declaration. --- .../backend/spirv/emit_spirv.cpp | 4 ++++ .../backend/spirv/emit_spirv_image.cpp | 17 ++++++++++++----- .../backend/spirv/emit_spirv_instructions.h | 8 ++++---- .../frontend/translate/translate.h | 2 +- .../frontend/translate/vector_memory.cpp | 11 ++++++++--- src/shader_recompiler/ir/ir_emitter.cpp | 17 +++++++++-------- src/shader_recompiler/ir/ir_emitter.h | 10 ++++++---- src/shader_recompiler/ir/opcodes.inc | 6 +++--- .../ir/passes/resource_tracking_pass.cpp | 8 +++++--- src/shader_recompiler/profile.h | 1 + src/video_core/renderer_vulkan/vk_instance.cpp | 1 + src/video_core/renderer_vulkan/vk_instance.h | 6 ++++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 13 files changed, 61 insertions(+), 31 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 23800fc49..ab9d6afae 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -222,6 +222,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct ctx.AddCapability(spv::Capability::StorageImageExtendedFormats); ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); + if (profile.supports_image_load_store_lod) { + ctx.AddExtension("SPV_AMD_shader_image_load_store_lod"); + ctx.AddCapability(spv::Capability::ImageReadWriteLodAMD); + } } if (info.has_texel_buffers) { ctx.AddCapability(spv::Capability::SampledBuffer); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 736410dcd..8da9280d0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -168,8 +168,8 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texels) : texels; } -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, - Id lod, Id ms) { +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, + const IR::Value& offset, Id ms) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id result_type = texture.data_types->Get(4); @@ -236,15 +236,22 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], sample) : sample; } -Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod) { UNREACHABLE_MSG("SPIR-V Instruction"); } -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color) { +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id color) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id color_type = texture.data_types->Get(4); - ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color)); + ImageOperands operands; + if (ctx.profile.supports_image_load_store_lod) { + operands.Add(spv::ImageOperandsMask::Lod, lod); + } else if (lod.value != 0) { + LOG_WARNING(Render, "Image write with LOD not supported by driver"); + } + ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color), operands.mask, + operands.operands); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 4ff53670e..057b0d692 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -392,14 +392,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset); Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, Id dref); -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const IR::Value& offset, - Id lod, Id ms); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, + const IR::Value& offset, Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives_dx, Id derivatives_dy, const IR::Value& offset, const IR::Value& lod_clamp); -Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color); +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id lod); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id color); Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 2f320a6c7..198cea276 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -276,7 +276,7 @@ public: // Image Memory // MIMG void IMAGE_LOAD(bool has_mip, const GcnInst& inst); - void IMAGE_STORE(const GcnInst& inst); + void IMAGE_STORE(bool has_mip, const GcnInst& inst); void IMAGE_GET_RESINFO(const GcnInst& inst); void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst); void IMAGE_SAMPLE(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 74b9c905d..eadd1c4db 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -98,7 +98,9 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { // Buffer store operations case Opcode::IMAGE_STORE: - return IMAGE_STORE(inst); + return IMAGE_STORE(false, inst); + case Opcode::IMAGE_STORE_MIP: + return IMAGE_STORE(true, inst); // Image misc operations case Opcode::IMAGE_GET_RESINFO: @@ -423,7 +425,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { } } -void Translator::IMAGE_STORE(const GcnInst& inst) { +void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) { const auto& mimg = inst.control.mimg; IR::VectorReg addr_reg{inst.src[0].code}; IR::VectorReg data_reg{inst.dst[0].code}; @@ -434,6 +436,9 @@ void Translator::IMAGE_STORE(const GcnInst& inst) { ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); + IR::TextureInstInfo info{}; + info.has_lod.Assign(has_mip); + boost::container::static_vector comps; for (u32 i = 0; i < 4; i++) { if (((mimg.dmask >> i) & 1) == 0) { @@ -443,7 +448,7 @@ void Translator::IMAGE_STORE(const GcnInst& inst) { comps.push_back(ir.GetVectorReg(data_reg++)); } const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); - ir.ImageWrite(handle, body, value, {}); + ir.ImageWrite(handle, body, {}, value, info); } void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 29b406699..3ebc82e64 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1599,9 +1599,9 @@ Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref); } -Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset, - const U32& lod, const U32& multisampling, TextureInstInfo info) { - return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, offset, lod, multisampling); +Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const U32& lod, + const Value& offset, const U32& multisampling, TextureInstInfo info) { + return Inst(Opcode::ImageFetch, Flags{info}, handle, coords, lod, offset, multisampling); } Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod, @@ -1625,13 +1625,14 @@ Value IREmitter::ImageGradient(const Value& handle, const Value& coords, offset, lod_clamp); } -Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { - return Inst(Opcode::ImageRead, Flags{info}, handle, coords); +Value IREmitter::ImageRead(const Value& handle, const Value& coords, const U32& lod, + TextureInstInfo info) { + return Inst(Opcode::ImageRead, Flags{info}, handle, coords, lod); } -void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, - TextureInstInfo info) { - Inst(Opcode::ImageWrite, Flags{info}, handle, coords, color); +void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32& lod, + const Value& color, TextureInstInfo info) { + Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, color); } // Debug print maps to SPIRV's NonSemantic DebugPrintf instruction diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index f77e22b82..068aba14d 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -314,14 +314,16 @@ public: TextureInstInfo info); [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, const F32& dref, TextureInstInfo info); - [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, - const U32& lod, const U32& multisampling, TextureInstInfo info); + [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const U32& lod, + const Value& offset, const U32& multisampling, + TextureInstInfo info); [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, const Value& derivatives_dx, const Value& derivatives_dy, const Value& offset, const F32& lod_clamp, TextureInstInfo info); - [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); - void ImageWrite(const Value& handle, const Value& coords, const Value& color, + [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, const U32& lod, + TextureInstInfo info); + void ImageWrite(const Value& handle, const Value& coords, const U32& lod, const Value& color, TextureInstInfo info); void EmitVertex(); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 8f40ed985..477275824 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -334,12 +334,12 @@ OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaq OPCODE(ImageSampleDrefExplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, ) OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageFetch, F32x4, Opaque, Opaque, U32, Opaque, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageRead, U32x4, Opaque, Opaque, ) -OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) +OPCODE(ImageRead, U32x4, Opaque, Opaque, U32, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, U32, U32x4, ) // Image atomic operations OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 398579ad4..f436db07a 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -771,14 +771,16 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip inst.SetArg(1, coords); if (inst.GetOpcode() == IR::Opcode::ImageWrite) { - inst.SetArg(2, SwizzleVector(ir, image, inst.Arg(2))); + inst.SetArg(3, SwizzleVector(ir, image, inst.Arg(3))); } if (inst_info.has_lod) { - ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch); + ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch || + inst.GetOpcode() == IR::Opcode::ImageRead || + inst.GetOpcode() == IR::Opcode::ImageWrite); ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa && image.GetType() != AmdGpu::ImageType::Color2DMsaaArray); - inst.SetArg(3, arg); + inst.SetArg(2, arg); } else if (image.GetType() == AmdGpu::ImageType::Color2DMsaa || image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) { inst.SetArg(4, arg); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 96c458d44..c00e37f9c 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -23,6 +23,7 @@ struct Profile { bool support_fp32_denorm_flush{}; bool support_explicit_workgroup_layout{}; bool support_legacy_vertex_attributes{}; + bool supports_image_load_store_lod{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 81784eb60..2f9695055 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -267,6 +267,7 @@ bool Instance::CreateDevice() { list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME); + image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME); // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // with extensions. diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 81303c9cc..2b4bd612f 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -158,6 +158,11 @@ public: return legacy_vertex_attributes; } + /// Returns true when VK_AMD_shader_image_load_store_lod is supported. + bool IsImageLoadStoreLodSupported() const { + return image_load_store_lod; + } + /// Returns true when geometry shaders are supported by the device bool IsGeometryStageSupported() const { return features.geometryShader; @@ -327,6 +332,7 @@ private: bool maintenance5{}; bool list_restart{}; bool legacy_vertex_attributes{}; + bool image_load_store_lod{}; u64 min_imported_host_pointer_alignment{}; u32 subgroup_size{}; bool tooling_info{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0fa77e19b..ff27b742f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -172,6 +172,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_explicit_workgroup_layout = true, .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), + .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, };