diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 93fb81df4..02f290140 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -271,7 +271,8 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct if (info.has_image_query) { ctx.AddCapability(spv::Capability::ImageQuery); } - if (info.uses_atomic_float_min_max && profile.supports_image_fp32_atomic_min_max) { + if ((info.uses_image_atomic_float_min_max && profile.supports_image_fp32_atomic_min_max) || + (info.uses_buffer_atomic_float_min_max && profile.supports_buffer_fp32_atomic_min_max)) { ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max"); ctx.AddCapability(spv::Capability::AtomicFloat32MinMaxEXT); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 79f47a6a0..97e455ff8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -50,9 +50,17 @@ Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value, }); } +template Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { const auto& buffer = ctx.buffers[handle]; + const auto type = [&] { + if constexpr (is_float) { + return ctx.F32[1]; + } else { + return ctx.U32[1]; + } + }(); if (Sirit::ValidId(buffer.offset)) { address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); } @@ -60,8 +68,8 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); const auto [scope, semantics]{AtomicArgs(ctx)}; - return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] { - return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value); + return AccessBoundsCheck<32, 1, is_float>(ctx, index, buffer.size_dwords, [&] { + return (ctx.*atomic_func)(type, ptr, scope, semantics, value); }); } @@ -196,6 +204,24 @@ Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin); } +Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + if (ctx.profile.supports_buffer_fp32_atomic_min_max) { + return BufferAtomicU32(ctx, inst, handle, address, value, + &Sirit::Module::OpAtomicFMin); + } + + const auto u32_value = ctx.OpBitcast(ctx.U32[1], value); + const auto sign_bit_set = + ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u)); + + const auto result = ctx.OpSelect( + ctx.F32[1], sign_bit_set, + EmitBitCastF32U32(ctx, EmitBufferAtomicUMax32(ctx, inst, handle, address, u32_value)), + EmitBitCastF32U32(ctx, EmitBufferAtomicSMin32(ctx, inst, handle, address, u32_value))); + + return result; +} + Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMax); } @@ -204,6 +230,24 @@ Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax); } +Id EmitBufferAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + if (ctx.profile.supports_buffer_fp32_atomic_min_max) { + return BufferAtomicU32(ctx, inst, handle, address, value, + &Sirit::Module::OpAtomicFMax); + } + + const auto u32_value = ctx.OpBitcast(ctx.U32[1], value); + const auto sign_bit_set = + ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u)); + + const auto result = ctx.OpSelect( + ctx.F32[1], sign_bit_set, + EmitBitCastF32U32(ctx, EmitBufferAtomicUMin32(ctx, inst, handle, address, u32_value)), + EmitBitCastF32U32(ctx, EmitBufferAtomicSMax32(ctx, inst, handle, address, u32_value))); + + return result; +} + Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { return BufferAtomicU32IncDec(ctx, inst, handle, address, &Sirit::Module::OpAtomicIIncrement); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index daf1b973e..12d4fa671 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -92,8 +92,10 @@ Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre Id EmitBufferAtomicISub32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 3451358b6..a102ebf99 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -90,6 +90,10 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return BUFFER_ATOMIC(AtomicOp::Inc, inst); case Opcode::BUFFER_ATOMIC_DEC: return BUFFER_ATOMIC(AtomicOp::Dec, inst); + case Opcode::BUFFER_ATOMIC_FMIN: + return BUFFER_ATOMIC(AtomicOp::Fmin, inst); + case Opcode::BUFFER_ATOMIC_FMAX: + return BUFFER_ATOMIC(AtomicOp::Fmax, inst); // MIMG // Image load operations @@ -357,6 +361,10 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { return ir.BufferAtomicInc(handle, address, buffer_info); case AtomicOp::Dec: return ir.BufferAtomicDec(handle, address, buffer_info); + case AtomicOp::Fmin: + return ir.BufferAtomicFMin(handle, address, vdata_val, buffer_info); + case AtomicOp::Fmax: + return ir.BufferAtomicFMax(handle, address, vdata_val, buffer_info); default: UNREACHABLE(); } diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index f9b932c1d..6c931da31 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -215,7 +215,8 @@ struct Info { bool has_image_query{}; bool has_perspective_interp{}; bool has_linear_interp{}; - bool uses_atomic_float_min_max{}; + bool uses_buffer_atomic_float_min_max{}; + bool uses_image_atomic_float_min_max{}; bool uses_lane_id{}; bool uses_group_quad{}; bool uses_group_ballot{}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 82712c441..ab6535af2 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -504,12 +504,22 @@ Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, con : Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value); } +Value IREmitter::BufferAtomicFMin(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicFMin32, Flags{info}, handle, address, value); +} + Value IREmitter::BufferAtomicIMax(const Value& handle, const Value& address, const Value& value, bool is_signed, BufferInstInfo info) { return is_signed ? Inst(Opcode::BufferAtomicSMax32, Flags{info}, handle, address, value) : Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value); } +Value IREmitter::BufferAtomicFMax(const Value& handle, const Value& address, const Value& value, + BufferInstInfo info) { + return Inst(Opcode::BufferAtomicFMax32, Flags{info}, handle, address, value); +} + Value IREmitter::BufferAtomicInc(const Value& handle, const Value& address, BufferInstInfo info) { return Inst(Opcode::BufferAtomicInc32, Flags{info}, handle, address); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 982c2dee4..9e2f79978 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -140,8 +140,12 @@ public: const Value& value, BufferInstInfo info); [[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address, const Value& value, bool is_signed, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicFMin(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); [[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address, const Value& value, bool is_signed, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicFMax(const Value& handle, const Value& address, + const Value& value, BufferInstInfo info); [[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address, BufferInstInfo info); [[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address, diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index c2311afea..1ea5c0967 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -71,8 +71,10 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::BufferAtomicISub32: case Opcode::BufferAtomicSMin32: case Opcode::BufferAtomicUMin32: + case Opcode::BufferAtomicFMin32: case Opcode::BufferAtomicSMax32: case Opcode::BufferAtomicUMax32: + case Opcode::BufferAtomicFMax32: case Opcode::BufferAtomicInc32: case Opcode::BufferAtomicDec32: case Opcode::BufferAtomicAnd32: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 0380cb0e6..179a01945 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -125,8 +125,10 @@ OPCODE(BufferAtomicIAdd64, U64, Opaq OPCODE(BufferAtomicISub32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicFMin32, U32, Opaque, Opaque, F32 ) OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicFMax32, U32, Opaque, Opaque, F32 ) OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, ) OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, ) OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index a209f7126..e278d10f8 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -21,8 +21,10 @@ bool IsBufferAtomic(const IR::Inst& inst) { case IR::Opcode::BufferAtomicISub32: case IR::Opcode::BufferAtomicSMin32: case IR::Opcode::BufferAtomicUMin32: + case IR::Opcode::BufferAtomicFMin32: case IR::Opcode::BufferAtomicSMax32: case IR::Opcode::BufferAtomicUMax32: + case IR::Opcode::BufferAtomicFMax32: case IR::Opcode::BufferAtomicInc32: case IR::Opcode::BufferAtomicDec32: case IR::Opcode::BufferAtomicAnd32: diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 4cd16d18f..b3b4ac36a 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -92,7 +92,11 @@ void Visit(Info& info, const IR::Inst& inst) { break; case IR::Opcode::ImageAtomicFMax32: case IR::Opcode::ImageAtomicFMin32: - info.uses_atomic_float_min_max = true; + info.uses_image_atomic_float_min_max = true; + break; + case IR::Opcode::BufferAtomicFMax32: + case IR::Opcode::BufferAtomicFMin32: + info.uses_buffer_atomic_float_min_max = true; break; case IR::Opcode::LaneId: info.uses_lane_id = true; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 7d313180f..bcdf86962 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -28,6 +28,7 @@ struct Profile { bool supports_native_cube_calc{}; bool supports_trinary_minmax{}; bool supports_robust_buffer_access{}; + bool supports_buffer_fp32_atomic_min_max{}; bool supports_image_fp32_atomic_min_max{}; bool supports_workgroup_explicit_memory_layout{}; bool has_broken_spirv_clamp{}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index fb489ec78..61ddd3f05 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -281,6 +281,8 @@ bool Instance::CreateDevice() { if (shader_atomic_float2) { shader_atomic_float2_features = feature_chain.get(); + LOG_INFO(Render_Vulkan, "- shaderBufferFloat32AtomicMinMax: {}", + shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax); LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}", shader_atomic_float2_features.shaderImageFloat32AtomicMinMax); } @@ -433,6 +435,8 @@ bool Instance::CreateDevice() { .legacyVertexAttributes = true, }, vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{ + .shaderBufferFloat32AtomicMinMax = + shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax, .shaderImageFloat32AtomicMinMax = shader_atomic_float2_features.shaderImageFloat32AtomicMinMax, }, diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index fb13a696a..991bfb031 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -165,6 +165,13 @@ public: return amd_shader_trinary_minmax; } + /// Returns true when the shaderBufferFloat32AtomicMinMax feature of + /// VK_EXT_shader_atomic_float2 is supported. + bool IsShaderAtomicFloatBuffer32MinMaxSupported() const { + return shader_atomic_float2 && + shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax; + } + /// Returns true when the shaderImageFloat32AtomicMinMax feature of /// VK_EXT_shader_atomic_float2 is supported. bool IsShaderAtomicFloatImage32MinMaxSupported() const { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6b1d7e66c..1d8ac4823 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -216,6 +216,8 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .supports_trinary_minmax = instance_.IsAmdShaderTrinaryMinMaxSupported(), // TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed. .supports_robust_buffer_access = true, // instance_.IsRobustBufferAccess2Supported(), + .supports_buffer_fp32_atomic_min_max = + instance_.IsShaderAtomicFloatBuffer32MinMaxSupported(), .supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(), .supports_workgroup_explicit_memory_layout = instance_.IsWorkgroupMemoryExplicitLayoutSupported(),