From 1757dfaf5a8d7d59601013da80c72e8de7d538a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Mon, 30 Jun 2025 01:16:47 +0200 Subject: [PATCH] buffer_atomic_imax_x2 (#3130) * buffer_atomic_imax_x2 * Define Int64Atomics SPIR-V capability --- .../backend/spirv/emit_spirv.cpp | 13 +++++++++++++ .../backend/spirv/emit_spirv_atomic.cpp | 8 ++++++++ .../backend/spirv/emit_spirv_instructions.h | 2 ++ .../frontend/translate/translate.h | 1 + .../frontend/translate/vector_memory.cpp | 17 ++++++++++++++++- src/shader_recompiler/info.h | 2 ++ src/shader_recompiler/ir/ir_emitter.cpp | 12 ++++++++++-- src/shader_recompiler/ir/microinstruction.cpp | 2 ++ src/shader_recompiler/ir/opcodes.inc | 2 ++ .../ir/passes/resource_tracking_pass.cpp | 2 ++ .../ir/passes/shader_info_collection_pass.cpp | 9 ++++++++- src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_vulkan/vk_instance.cpp | 4 +++- src/video_core/renderer_vulkan/vk_instance.h | 11 +++++++++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ 15 files changed, 84 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index b5b18eed1..c4c310586 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -310,6 +310,19 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct ctx.AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); ctx.AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); } + if (info.uses_buffer_int64_atomics || info.uses_shared_int64_atomics) { + if (info.uses_buffer_int64_atomics) { + ASSERT_MSG(ctx.profile.supports_buffer_int64_atomics, + "Shader requires support for atomic Int64 buffer operations that your " + "Vulkan instance does not advertise"); + } + if (info.uses_shared_int64_atomics) { + ASSERT_MSG(ctx.profile.supports_shared_int64_atomics, + "Shader requires support for atomic Int64 shared memory operations that " + "your Vulkan instance does not advertise"); + } + ctx.AddCapability(spv::Capability::Int64Atomics); + } } void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 3c833b87d..85e93f3fb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -226,10 +226,18 @@ Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMax); } +Id EmitBufferAtomicSMax64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMax); +} + Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax); } +Id EmitBufferAtomicUMax64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax); +} + Id EmitBufferAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { if (ctx.profile.supports_buffer_fp32_atomic_min_max) { return BufferAtomicU32(ctx, inst, handle, address, value, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 12d4fa671..15a8fd99b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -94,7 +94,9 @@ Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicSMax64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicUMax64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 086b325aa..ece334bcd 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -291,6 +291,7 @@ public: void BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_typed, const GcnInst& inst); void BUFFER_STORE(u32 num_dwords, bool is_inst_typed, bool is_buffer_typed, const GcnInst& inst); + template void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst); // Image Memory diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index a102ebf99..8dcf70a07 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -78,8 +78,12 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return BUFFER_ATOMIC(AtomicOp::Umin, inst); case Opcode::BUFFER_ATOMIC_SMAX: return BUFFER_ATOMIC(AtomicOp::Smax, inst); + case Opcode::BUFFER_ATOMIC_SMAX_X2: + return BUFFER_ATOMIC(AtomicOp::Smax, inst); case Opcode::BUFFER_ATOMIC_UMAX: return BUFFER_ATOMIC(AtomicOp::Umax, inst); + case Opcode::BUFFER_ATOMIC_UMAX_X2: + return BUFFER_ATOMIC(AtomicOp::Umax, inst); case Opcode::BUFFER_ATOMIC_AND: return BUFFER_ATOMIC(AtomicOp::And, inst); case Opcode::BUFFER_ATOMIC_OR: @@ -304,6 +308,7 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_inst_typed, bool is_buffer } } +template void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { const auto& mubuf = inst.control.mubuf; const IR::VectorReg vaddr{inst.src[0].code}; @@ -328,7 +333,17 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { buffer_info.globally_coherent.Assign(mubuf.glc); buffer_info.system_coherent.Assign(mubuf.slc); - IR::Value vdata_val = ir.GetVectorReg(vdata); + IR::Value vdata_val = [&] { + if constexpr (std::is_same_v) { + return ir.GetVectorReg(vdata); + } else if constexpr (std::is_same_v) { + return ir.PackUint2x32( + ir.CompositeConstruct(ir.GetVectorReg(vdata), + ir.GetVectorReg(vdata + 1))); + } else { + static_assert(false, "buffer_atomic: type not supported"); + } + }(); const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1), ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3)); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index eb56f28f6..5d159275b 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -226,6 +226,8 @@ struct Info { bool uses_fp64{}; bool uses_pack_10_11_11{}; bool uses_unpack_10_11_11{}; + bool uses_buffer_int64_atomics{}; + bool uses_shared_int64_atomics{}; bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; bool translation_failed{}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index ab6535af2..2497864c0 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -511,8 +511,16 @@ Value IREmitter::BufferAtomicFMin(const Value& handle, const Value& address, con Value IREmitter::BufferAtomicIMax(const Value& handle, const Value& address, const Value& value, bool is_signed, BufferInstInfo info) { - return is_signed ? Inst(Opcode::BufferAtomicSMax32, Flags{info}, handle, address, value) - : Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value); + switch (value.Type()) { + case Type::U32: + return is_signed ? Inst(Opcode::BufferAtomicSMax32, Flags{info}, handle, address, value) + : Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value); + case Type::U64: + return is_signed ? Inst(Opcode::BufferAtomicSMax64, Flags{info}, handle, address, value) + : Inst(Opcode::BufferAtomicUMax64, Flags{info}, handle, address, value); + default: + ThrowInvalidType(value.Type()); + } } Value IREmitter::BufferAtomicFMax(const Value& handle, const Value& address, const Value& value, diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 1ea5c0967..8d46a0071 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -73,7 +73,9 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::BufferAtomicUMin32: case Opcode::BufferAtomicFMin32: case Opcode::BufferAtomicSMax32: + case Opcode::BufferAtomicSMax64: case Opcode::BufferAtomicUMax32: + case Opcode::BufferAtomicUMax64: case Opcode::BufferAtomicFMax32: case Opcode::BufferAtomicInc32: case Opcode::BufferAtomicDec32: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 179a01945..7fc514de9 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -127,7 +127,9 @@ OPCODE(BufferAtomicSMin32, U32, Opaq OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicFMin32, U32, Opaque, Opaque, F32 ) OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicSMax64, U64, Opaque, Opaque, U64 ) OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicUMax64, U64, Opaque, Opaque, U64 ) OPCODE(BufferAtomicFMax32, U32, Opaque, Opaque, F32 ) OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, ) OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 40282cfcb..ffb785584 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -23,7 +23,9 @@ bool IsBufferAtomic(const IR::Inst& inst) { case IR::Opcode::BufferAtomicUMin32: case IR::Opcode::BufferAtomicFMin32: case IR::Opcode::BufferAtomicSMax32: + case IR::Opcode::BufferAtomicSMax64: case IR::Opcode::BufferAtomicUMax32: + case IR::Opcode::BufferAtomicUMax64: case IR::Opcode::BufferAtomicFMax32: case IR::Opcode::BufferAtomicInc32: case IR::Opcode::BufferAtomicDec32: diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 797d8bb4a..59668870b 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -53,9 +53,11 @@ void Visit(Info& info, const IR::Inst& inst) { case IR::Opcode::SharedAtomicXor32: info.shared_types |= IR::Type::U32; break; + case IR::Opcode::SharedAtomicIAdd64: + info.uses_shared_int64_atomics = true; + [[fallthrough]]; case IR::Opcode::LoadSharedU64: case IR::Opcode::WriteSharedU64: - case IR::Opcode::SharedAtomicIAdd64: info.shared_types |= IR::Type::U64; break; case IR::Opcode::ConvertF16F32: @@ -98,6 +100,11 @@ void Visit(Info& info, const IR::Inst& inst) { case IR::Opcode::BufferAtomicFMin32: info.uses_buffer_atomic_float_min_max = true; break; + case IR::Opcode::BufferAtomicIAdd64: + case IR::Opcode::BufferAtomicSMax64: + case IR::Opcode::BufferAtomicUMax64: + info.uses_buffer_int64_atomics = true; + break; case IR::Opcode::LaneId: info.uses_lane_id = true; break; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index d7eb307b6..ad36a2e13 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -30,6 +30,8 @@ struct Profile { bool supports_robust_buffer_access{}; bool supports_buffer_fp32_atomic_min_max{}; bool supports_image_fp32_atomic_min_max{}; + bool supports_buffer_int64_atomics{}; + bool supports_shared_int64_atomics{}; bool supports_workgroup_explicit_memory_layout{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 61ddd3f05..237fa202d 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -341,7 +341,7 @@ bool Instance::CreateDevice() { const auto topology_list_restart_features = feature_chain.get(); const auto vk11_features = feature_chain.get(); - const auto vk12_features = feature_chain.get(); + vk12_features = feature_chain.get(); const auto vk13_features = feature_chain.get(); vk::StructureChain device_chain = { vk::DeviceCreateInfo{ @@ -387,6 +387,8 @@ bool Instance::CreateDevice() { .drawIndirectCount = vk12_features.drawIndirectCount, .storageBuffer8BitAccess = vk12_features.storageBuffer8BitAccess, .uniformAndStorageBuffer8BitAccess = vk12_features.uniformAndStorageBuffer8BitAccess, + .shaderBufferInt64Atomics = vk12_features.shaderBufferInt64Atomics, + .shaderSharedInt64Atomics = vk12_features.shaderSharedInt64Atomics, .shaderFloat16 = vk12_features.shaderFloat16, .shaderInt8 = vk12_features.shaderInt8, .scalarBlockLayout = vk12_features.scalarBlockLayout, diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 991bfb031..c9e354186 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -178,6 +178,16 @@ public: return shader_atomic_float2 && shader_atomic_float2_features.shaderImageFloat32AtomicMinMax; } + /// Returns true if 64-bit integer atomic operations can be used on buffers + bool IsBufferInt64AtomicsSupported() const { + return vk12_features.shaderBufferInt64Atomics; + } + + /// Returns true if 64-bit integer atomic operations can be used on shared memory + bool IsSharedInt64AtomicsSupported() const { + return vk12_features.shaderSharedInt64Atomics; + } + /// Returns true when VK_KHR_workgroup_memory_explicit_layout is supported. bool IsWorkgroupMemoryExplicitLayoutSupported() const { return workgroup_memory_explicit_layout && @@ -358,6 +368,7 @@ private: vk::PhysicalDeviceVulkan12Properties vk12_props; vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props; vk::PhysicalDeviceFeatures features; + vk::PhysicalDeviceVulkan12Features vk12_features; vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features; vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features; vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 831995339..7dd468f9a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -219,6 +219,8 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .supports_buffer_fp32_atomic_min_max = instance_.IsShaderAtomicFloatBuffer32MinMaxSupported(), .supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(), + .supports_buffer_int64_atomics = instance_.IsBufferInt64AtomicsSupported(), + .supports_shared_int64_atomics = instance_.IsSharedInt64AtomicsSupported(), .supports_workgroup_explicit_memory_layout = instance_.IsWorkgroupMemoryExplicitLayoutSupported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&