diff --git a/externals/sirit b/externals/sirit index 427a42c9e..09a1416ab 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 427a42c9ed99b38204d9107bc3dc14e92458acf1 +Subproject commit 09a1416ab1b59ddfebd2618412f118f2004f3b2c diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 936f82cd6..ff38bb5d8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -270,6 +270,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct if (info.has_image_query) { ctx.AddCapability(spv::Capability::ImageQuery); } + if (info.uses_atomic_float_min_max) { + ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max"); + ctx.AddCapability(spv::Capability::AtomicFloat32MinMaxEXT); + } if (info.uses_lane_id) { ctx.AddCapability(spv::Capability::GroupNonUniform); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 211899714..c3799fb4b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -75,6 +75,14 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id va const auto [scope, semantics]{AtomicArgs(ctx)}; return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); } + +Id ImageAtomicF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const auto& texture = ctx.images[handle & 0xFFFF]; + const Id pointer{ctx.OpImageTexelPointer(ctx.image_f32, texture.id, coords, ctx.ConstU32(0U))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.F32[1], pointer, scope, semantics, value); +} } // Anonymous namespace Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { @@ -187,6 +195,40 @@ Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicUMax); } +Id EmitImageAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { + if (ctx.profile.supports_image_fp32_atomic_min_max) { + return ImageAtomicF32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicFMax); + } + + const auto u32_value = ctx.OpBitcast(ctx.U32[1], value); + const auto sign_bit_set = + ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u)); + + const auto result = ctx.OpSelect( + ctx.F32[1], sign_bit_set, + EmitBitCastF32U32(ctx, EmitImageAtomicUMin32(ctx, inst, handle, coords, u32_value)), + EmitBitCastF32U32(ctx, EmitImageAtomicSMax32(ctx, inst, handle, coords, u32_value))); + + return result; +} + +Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { + if (ctx.profile.supports_image_fp32_atomic_min_max) { + return ImageAtomicF32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicFMin); + } + + const auto u32_value = ctx.OpBitcast(ctx.U32[1], value); + const auto sign_bit_set = + ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u)); + + const auto result = ctx.OpSelect( + ctx.F32[1], sign_bit_set, + EmitBitCastF32U32(ctx, EmitImageAtomicUMax32(ctx, inst, handle, coords, u32_value)), + EmitBitCastF32U32(ctx, EmitImageAtomicSMin32(ctx, inst, handle, coords, u32_value))); + + return result; +} + Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) { // TODO: This is not yet implemented throw NotImplementedException("SPIR-V Instruction"); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 079f1005d..269f372d5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -482,6 +482,8 @@ Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); +Id EmitImageAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); +Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 8433251ff..2640030df 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -869,6 +869,7 @@ void EmitContext::DefineImagesAndSamplers() { } if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) { image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); + image_f32 = TypePointer(spv::StorageClass::Image, F32[1]); } if (info.samplers.empty()) { return; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 784748658..38d55e0e4 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -207,6 +207,7 @@ public: Id invocation_id{}; Id subgroup_local_invocation_id{}; Id image_u32{}; + Id image_f32{}; Id shared_memory_u8{}; Id shared_memory_u16{}; diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index ed7788d8c..cfc01c58f 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -115,8 +115,12 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return IMAGE_ATOMIC(AtomicOp::Smin, inst); case Opcode::IMAGE_ATOMIC_UMIN: return IMAGE_ATOMIC(AtomicOp::Umin, inst); + case Opcode::IMAGE_ATOMIC_FMIN: + return IMAGE_ATOMIC(AtomicOp::Fmin, inst); case Opcode::IMAGE_ATOMIC_SMAX: return IMAGE_ATOMIC(AtomicOp::Smax, inst); + case Opcode::IMAGE_ATOMIC_FMAX: + return IMAGE_ATOMIC(AtomicOp::Fmax, inst); case Opcode::IMAGE_ATOMIC_UMAX: return IMAGE_ATOMIC(AtomicOp::Umax, inst); case Opcode::IMAGE_ATOMIC_AND: @@ -466,6 +470,10 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) { return ir.ImageAtomicIMax(handle, body, value, true, info); case AtomicOp::Umax: return ir.ImageAtomicUMax(handle, body, value, info); + case AtomicOp::Fmax: + return ir.ImageAtomicFMax(handle, body, value, info); + case AtomicOp::Fmin: + return ir.ImageAtomicFMin(handle, body, value, info); case AtomicOp::And: return ir.ImageAtomicAnd(handle, body, value, info); case AtomicOp::Or: diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 8dcf9c5c4..784f8b4d2 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -196,6 +196,7 @@ struct Info { bool has_discard{}; bool has_image_gather{}; bool has_image_query{}; + bool uses_atomic_float_min_max{}; bool uses_lane_id{}; bool uses_group_quad{}; bool uses_group_ballot{}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index e1ebf2206..01d945178 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1870,6 +1870,16 @@ Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const return Inst(Opcode::ImageAtomicUMax32, Flags{info}, handle, coords, value); } +Value IREmitter::ImageAtomicFMax(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + return Inst(Opcode::ImageAtomicFMax32, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicFMin(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + return Inst(Opcode::ImageAtomicFMin32, Flags{info}, handle, coords, value); +} + Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value, bool is_signed, TextureInstInfo info) { return is_signed ? ImageAtomicSMax(handle, coords, value, info) diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index d978b3b4f..8f8a12736 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -321,6 +321,10 @@ public: const Value& value, TextureInstInfo info); [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicFMax(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicFMin(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value, bool is_signed, TextureInstInfo info); [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value, diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 6f186808c..ab6dbfde9 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -420,6 +420,8 @@ OPCODE(ImageAtomicSMin32, U32, Opaq OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicFMax32, F32, Opaque, Opaque, F32, ) +OPCODE(ImageAtomicFMin32, F32, Opaque, Opaque, F32, ) OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index d739b2da5..f53a0f4d4 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -71,6 +71,10 @@ void Visit(Info& info, const IR::Inst& inst) { case IR::Opcode::ImageQueryLod: info.has_image_query = true; break; + case IR::Opcode::ImageAtomicFMax32: + case IR::Opcode::ImageAtomicFMin32: + info.uses_atomic_float_min_max = true; + break; case IR::Opcode::LaneId: info.uses_lane_id = true; break; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 9aac6230a..853e4854d 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -29,6 +29,7 @@ struct Profile { bool supports_native_cube_calc{}; bool supports_trinary_minmax{}; bool supports_robust_buffer_access{}; + bool supports_image_fp32_atomic_min_max{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 072807124..99f225d79 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -210,7 +210,8 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceRobustness2FeaturesEXT, vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT, vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, - vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(); + vk::PhysicalDevicePortabilitySubsetFeaturesKHR, + vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>(); features = feature_chain.get().features; const vk::StructureChain properties_chain = physical_device.getProperties2< @@ -272,6 +273,13 @@ bool Instance::CreateDevice() { image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME); amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME); amd_shader_trinary_minmax = add_extension(VK_AMD_SHADER_TRINARY_MINMAX_EXTENSION_NAME); + shader_atomic_float2 = add_extension(VK_EXT_SHADER_ATOMIC_FLOAT_2_EXTENSION_NAME); + if (shader_atomic_float2) { + shader_atomic_float2_features = + feature_chain.get(); + LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}", + shader_atomic_float2_features.shaderImageFloat32AtomicMinMax); + } const bool calibrated_timestamps = TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false; @@ -401,6 +409,10 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{ .legacyVertexAttributes = true, }, + vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{ + .shaderImageFloat32AtomicMinMax = + shader_atomic_float2_features.shaderImageFloat32AtomicMinMax, + }, #ifdef __APPLE__ portability_features, #endif @@ -430,6 +442,9 @@ bool Instance::CreateDevice() { if (!legacy_vertex_attributes) { device_chain.unlink(); } + if (!shader_atomic_float2) { + device_chain.unlink(); + } auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get()); if (device_result != vk::Result::eSuccess) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index bf9af1f24..573473869 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -165,6 +165,12 @@ public: return amd_shader_trinary_minmax; } + /// Returns true when the shaderImageFloat32AtomicMinMax feature of + /// VK_EXT_shader_atomic_float2 is supported. + bool IsShaderAtomicFloatImage32MinMaxSupported() const { + return shader_atomic_float2 && shader_atomic_float2_features.shaderImageFloat32AtomicMinMax; + } + /// Returns true when geometry shaders are supported by the device bool IsGeometryStageSupported() const { return features.geometryShader; @@ -336,6 +342,7 @@ private: vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features; vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features; vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features; + vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features; vk::DriverIdKHR driver_id; vk::UniqueDebugUtilsMessengerEXT debug_callback{}; std::string vendor_name; @@ -360,6 +367,7 @@ private: bool image_load_store_lod{}; bool amd_gcn_shader{}; bool amd_shader_trinary_minmax{}; + bool shader_atomic_float2{}; bool portability_subset{}; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0b991cda0..0a0c81d4c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -206,6 +206,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(), .supports_trinary_minmax = instance_.IsAmdShaderTrinaryMinMaxSupported(), .supports_robust_buffer_access = instance_.IsRobustBufferAccess2Supported(), + .supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||