Implement buffer atomic fmin/fmax instructions (#3123)
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions

This commit is contained in:
Marcin Mikołajczyk 2025-06-20 02:37:29 +02:00 committed by GitHub
parent 612f340292
commit 423254692a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 99 additions and 5 deletions

View file

@ -271,7 +271,8 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
if (info.has_image_query) { if (info.has_image_query) {
ctx.AddCapability(spv::Capability::ImageQuery); ctx.AddCapability(spv::Capability::ImageQuery);
} }
if (info.uses_atomic_float_min_max && profile.supports_image_fp32_atomic_min_max) { if ((info.uses_image_atomic_float_min_max && profile.supports_image_fp32_atomic_min_max) ||
(info.uses_buffer_atomic_float_min_max && profile.supports_buffer_fp32_atomic_min_max)) {
ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max"); ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max");
ctx.AddCapability(spv::Capability::AtomicFloat32MinMaxEXT); ctx.AddCapability(spv::Capability::AtomicFloat32MinMaxEXT);
} }

View file

@ -50,9 +50,17 @@ Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value,
}); });
} }
template <bool is_float = false>
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& buffer = ctx.buffers[handle]; const auto& buffer = ctx.buffers[handle];
const auto type = [&] {
if constexpr (is_float) {
return ctx.F32[1];
} else {
return ctx.U32[1];
}
}();
if (Sirit::ValidId(buffer.offset)) { if (Sirit::ValidId(buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
} }
@ -60,8 +68,8 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)}; const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] { return AccessBoundsCheck<32, 1, is_float>(ctx, index, buffer.size_dwords, [&] {
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value); return (ctx.*atomic_func)(type, ptr, scope, semantics, value);
}); });
} }
@ -196,6 +204,24 @@ Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin); return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin);
} }
Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
if (ctx.profile.supports_buffer_fp32_atomic_min_max) {
return BufferAtomicU32<true>(ctx, inst, handle, address, value,
&Sirit::Module::OpAtomicFMin);
}
const auto u32_value = ctx.OpBitcast(ctx.U32[1], value);
const auto sign_bit_set =
ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u));
const auto result = ctx.OpSelect(
ctx.F32[1], sign_bit_set,
EmitBitCastF32U32(ctx, EmitBufferAtomicUMax32(ctx, inst, handle, address, u32_value)),
EmitBitCastF32U32(ctx, EmitBufferAtomicSMin32(ctx, inst, handle, address, u32_value)));
return result;
}
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMax); return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMax);
} }
@ -204,6 +230,24 @@ Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax); return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax);
} }
Id EmitBufferAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
if (ctx.profile.supports_buffer_fp32_atomic_min_max) {
return BufferAtomicU32<true>(ctx, inst, handle, address, value,
&Sirit::Module::OpAtomicFMax);
}
const auto u32_value = ctx.OpBitcast(ctx.U32[1], value);
const auto sign_bit_set =
ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u));
const auto result = ctx.OpSelect(
ctx.F32[1], sign_bit_set,
EmitBitCastF32U32(ctx, EmitBufferAtomicUMin32(ctx, inst, handle, address, u32_value)),
EmitBitCastF32U32(ctx, EmitBufferAtomicSMax32(ctx, inst, handle, address, u32_value)));
return result;
}
Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return BufferAtomicU32IncDec(ctx, inst, handle, address, &Sirit::Module::OpAtomicIIncrement); return BufferAtomicU32IncDec(ctx, inst, handle, address, &Sirit::Module::OpAtomicIIncrement);
} }

View file

@ -92,8 +92,10 @@ Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
Id EmitBufferAtomicISub32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicISub32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);

View file

@ -90,6 +90,10 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return BUFFER_ATOMIC(AtomicOp::Inc, inst); return BUFFER_ATOMIC(AtomicOp::Inc, inst);
case Opcode::BUFFER_ATOMIC_DEC: case Opcode::BUFFER_ATOMIC_DEC:
return BUFFER_ATOMIC(AtomicOp::Dec, inst); return BUFFER_ATOMIC(AtomicOp::Dec, inst);
case Opcode::BUFFER_ATOMIC_FMIN:
return BUFFER_ATOMIC(AtomicOp::Fmin, inst);
case Opcode::BUFFER_ATOMIC_FMAX:
return BUFFER_ATOMIC(AtomicOp::Fmax, inst);
// MIMG // MIMG
// Image load operations // Image load operations
@ -357,6 +361,10 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
return ir.BufferAtomicInc(handle, address, buffer_info); return ir.BufferAtomicInc(handle, address, buffer_info);
case AtomicOp::Dec: case AtomicOp::Dec:
return ir.BufferAtomicDec(handle, address, buffer_info); return ir.BufferAtomicDec(handle, address, buffer_info);
case AtomicOp::Fmin:
return ir.BufferAtomicFMin(handle, address, vdata_val, buffer_info);
case AtomicOp::Fmax:
return ir.BufferAtomicFMax(handle, address, vdata_val, buffer_info);
default: default:
UNREACHABLE(); UNREACHABLE();
} }

View file

@ -215,7 +215,8 @@ struct Info {
bool has_image_query{}; bool has_image_query{};
bool has_perspective_interp{}; bool has_perspective_interp{};
bool has_linear_interp{}; bool has_linear_interp{};
bool uses_atomic_float_min_max{}; bool uses_buffer_atomic_float_min_max{};
bool uses_image_atomic_float_min_max{};
bool uses_lane_id{}; bool uses_lane_id{};
bool uses_group_quad{}; bool uses_group_quad{};
bool uses_group_ballot{}; bool uses_group_ballot{};

View file

@ -504,12 +504,22 @@ Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, con
: Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value); : Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value);
} }
Value IREmitter::BufferAtomicFMin(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicFMin32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicIMax(const Value& handle, const Value& address, const Value& value, Value IREmitter::BufferAtomicIMax(const Value& handle, const Value& address, const Value& value,
bool is_signed, BufferInstInfo info) { bool is_signed, BufferInstInfo info) {
return is_signed ? Inst(Opcode::BufferAtomicSMax32, Flags{info}, handle, address, value) return is_signed ? Inst(Opcode::BufferAtomicSMax32, Flags{info}, handle, address, value)
: Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value); : Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value);
} }
Value IREmitter::BufferAtomicFMax(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicFMax32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicInc(const Value& handle, const Value& address, BufferInstInfo info) { Value IREmitter::BufferAtomicInc(const Value& handle, const Value& address, BufferInstInfo info) {
return Inst(Opcode::BufferAtomicInc32, Flags{info}, handle, address); return Inst(Opcode::BufferAtomicInc32, Flags{info}, handle, address);
} }

View file

@ -140,8 +140,12 @@ public:
const Value& value, BufferInstInfo info); const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address, [[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address,
const Value& value, bool is_signed, BufferInstInfo info); const Value& value, bool is_signed, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicFMin(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address, [[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address,
const Value& value, bool is_signed, BufferInstInfo info); const Value& value, bool is_signed, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicFMax(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address, [[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address,
BufferInstInfo info); BufferInstInfo info);
[[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address, [[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address,

View file

@ -71,8 +71,10 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::BufferAtomicISub32: case Opcode::BufferAtomicISub32:
case Opcode::BufferAtomicSMin32: case Opcode::BufferAtomicSMin32:
case Opcode::BufferAtomicUMin32: case Opcode::BufferAtomicUMin32:
case Opcode::BufferAtomicFMin32:
case Opcode::BufferAtomicSMax32: case Opcode::BufferAtomicSMax32:
case Opcode::BufferAtomicUMax32: case Opcode::BufferAtomicUMax32:
case Opcode::BufferAtomicFMax32:
case Opcode::BufferAtomicInc32: case Opcode::BufferAtomicInc32:
case Opcode::BufferAtomicDec32: case Opcode::BufferAtomicDec32:
case Opcode::BufferAtomicAnd32: case Opcode::BufferAtomicAnd32:

View file

@ -125,8 +125,10 @@ OPCODE(BufferAtomicIAdd64, U64, Opaq
OPCODE(BufferAtomicISub32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicISub32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicFMin32, U32, Opaque, Opaque, F32 )
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 )
OPCODE(BufferAtomicFMax32, U32, Opaque, Opaque, F32 )
OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, ) OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, )
OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, ) OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, )
OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, )

View file

@ -21,8 +21,10 @@ bool IsBufferAtomic(const IR::Inst& inst) {
case IR::Opcode::BufferAtomicISub32: case IR::Opcode::BufferAtomicISub32:
case IR::Opcode::BufferAtomicSMin32: case IR::Opcode::BufferAtomicSMin32:
case IR::Opcode::BufferAtomicUMin32: case IR::Opcode::BufferAtomicUMin32:
case IR::Opcode::BufferAtomicFMin32:
case IR::Opcode::BufferAtomicSMax32: case IR::Opcode::BufferAtomicSMax32:
case IR::Opcode::BufferAtomicUMax32: case IR::Opcode::BufferAtomicUMax32:
case IR::Opcode::BufferAtomicFMax32:
case IR::Opcode::BufferAtomicInc32: case IR::Opcode::BufferAtomicInc32:
case IR::Opcode::BufferAtomicDec32: case IR::Opcode::BufferAtomicDec32:
case IR::Opcode::BufferAtomicAnd32: case IR::Opcode::BufferAtomicAnd32:

View file

@ -92,7 +92,11 @@ void Visit(Info& info, const IR::Inst& inst) {
break; break;
case IR::Opcode::ImageAtomicFMax32: case IR::Opcode::ImageAtomicFMax32:
case IR::Opcode::ImageAtomicFMin32: case IR::Opcode::ImageAtomicFMin32:
info.uses_atomic_float_min_max = true; info.uses_image_atomic_float_min_max = true;
break;
case IR::Opcode::BufferAtomicFMax32:
case IR::Opcode::BufferAtomicFMin32:
info.uses_buffer_atomic_float_min_max = true;
break; break;
case IR::Opcode::LaneId: case IR::Opcode::LaneId:
info.uses_lane_id = true; info.uses_lane_id = true;

View file

@ -28,6 +28,7 @@ struct Profile {
bool supports_native_cube_calc{}; bool supports_native_cube_calc{};
bool supports_trinary_minmax{}; bool supports_trinary_minmax{};
bool supports_robust_buffer_access{}; bool supports_robust_buffer_access{};
bool supports_buffer_fp32_atomic_min_max{};
bool supports_image_fp32_atomic_min_max{}; bool supports_image_fp32_atomic_min_max{};
bool supports_workgroup_explicit_memory_layout{}; bool supports_workgroup_explicit_memory_layout{};
bool has_broken_spirv_clamp{}; bool has_broken_spirv_clamp{};

View file

@ -281,6 +281,8 @@ bool Instance::CreateDevice() {
if (shader_atomic_float2) { if (shader_atomic_float2) {
shader_atomic_float2_features = shader_atomic_float2_features =
feature_chain.get<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>(); feature_chain.get<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
LOG_INFO(Render_Vulkan, "- shaderBufferFloat32AtomicMinMax: {}",
shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax);
LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}", LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}",
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax); shader_atomic_float2_features.shaderImageFloat32AtomicMinMax);
} }
@ -433,6 +435,8 @@ bool Instance::CreateDevice() {
.legacyVertexAttributes = true, .legacyVertexAttributes = true,
}, },
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{ vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{
.shaderBufferFloat32AtomicMinMax =
shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax,
.shaderImageFloat32AtomicMinMax = .shaderImageFloat32AtomicMinMax =
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax, shader_atomic_float2_features.shaderImageFloat32AtomicMinMax,
}, },

View file

@ -165,6 +165,13 @@ public:
return amd_shader_trinary_minmax; return amd_shader_trinary_minmax;
} }
/// Returns true when the shaderBufferFloat32AtomicMinMax feature of
/// VK_EXT_shader_atomic_float2 is supported.
bool IsShaderAtomicFloatBuffer32MinMaxSupported() const {
return shader_atomic_float2 &&
shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax;
}
/// Returns true when the shaderImageFloat32AtomicMinMax feature of /// Returns true when the shaderImageFloat32AtomicMinMax feature of
/// VK_EXT_shader_atomic_float2 is supported. /// VK_EXT_shader_atomic_float2 is supported.
bool IsShaderAtomicFloatImage32MinMaxSupported() const { bool IsShaderAtomicFloatImage32MinMaxSupported() const {

View file

@ -216,6 +216,8 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.supports_trinary_minmax = instance_.IsAmdShaderTrinaryMinMaxSupported(), .supports_trinary_minmax = instance_.IsAmdShaderTrinaryMinMaxSupported(),
// TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed. // TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed.
.supports_robust_buffer_access = true, // instance_.IsRobustBufferAccess2Supported(), .supports_robust_buffer_access = true, // instance_.IsRobustBufferAccess2Supported(),
.supports_buffer_fp32_atomic_min_max =
instance_.IsShaderAtomicFloatBuffer32MinMaxSupported(),
.supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(), .supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(),
.supports_workgroup_explicit_memory_layout = .supports_workgroup_explicit_memory_layout =
instance_.IsWorkgroupMemoryExplicitLayoutSupported(), instance_.IsWorkgroupMemoryExplicitLayoutSupported(),