Implement IMAGE_ATOMIC_FMIN and IMAGE_ATOMIC_FMAX for 32bit floats (#2820)

* Implement IMAGE_ATOMIC_FMIN and IMAGE_ATOMIC_FMAX for 32bit floats

* Handle missing VK_EXT_shader_atomic_float2
This commit is contained in:
Marcin Mikołajczyk 2025-04-30 20:42:08 +02:00 committed by GitHub
parent a3bbf2274f
commit c08f92aca1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 106 additions and 2 deletions

2
externals/sirit vendored

@ -1 +1 @@
Subproject commit 427a42c9ed99b38204d9107bc3dc14e92458acf1
Subproject commit 09a1416ab1b59ddfebd2618412f118f2004f3b2c

View file

@ -270,6 +270,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
if (info.has_image_query) {
ctx.AddCapability(spv::Capability::ImageQuery);
}
if (info.uses_atomic_float_min_max) {
ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max");
ctx.AddCapability(spv::Capability::AtomicFloat32MinMaxEXT);
}
if (info.uses_lane_id) {
ctx.AddCapability(spv::Capability::GroupNonUniform);
}

View file

@ -75,6 +75,14 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id va
const auto [scope, semantics]{AtomicArgs(ctx)};
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
}
Id ImageAtomicF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id pointer{ctx.OpImageTexelPointer(ctx.image_f32, texture.id, coords, ctx.ConstU32(0U))};
const auto [scope, semantics]{AtomicArgs(ctx)};
return (ctx.*atomic_func)(ctx.F32[1], pointer, scope, semantics, value);
}
} // Anonymous namespace
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
@ -187,6 +195,40 @@ Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicUMax);
}
Id EmitImageAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
if (ctx.profile.supports_image_fp32_atomic_min_max) {
return ImageAtomicF32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicFMax);
}
const auto u32_value = ctx.OpBitcast(ctx.U32[1], value);
const auto sign_bit_set =
ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u));
const auto result = ctx.OpSelect(
ctx.F32[1], sign_bit_set,
EmitBitCastF32U32(ctx, EmitImageAtomicUMin32(ctx, inst, handle, coords, u32_value)),
EmitBitCastF32U32(ctx, EmitImageAtomicSMax32(ctx, inst, handle, coords, u32_value)));
return result;
}
Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
if (ctx.profile.supports_image_fp32_atomic_min_max) {
return ImageAtomicF32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicFMin);
}
const auto u32_value = ctx.OpBitcast(ctx.U32[1], value);
const auto sign_bit_set =
ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u));
const auto result = ctx.OpSelect(
ctx.F32[1], sign_bit_set,
EmitBitCastF32U32(ctx, EmitImageAtomicUMax32(ctx, inst, handle, coords, u32_value)),
EmitBitCastF32U32(ctx, EmitImageAtomicSMin32(ctx, inst, handle, coords, u32_value)));
return result;
}
Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) {
// TODO: This is not yet implemented
throw NotImplementedException("SPIR-V Instruction");

View file

@ -482,6 +482,8 @@ Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords
Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);

View file

@ -869,6 +869,7 @@ void EmitContext::DefineImagesAndSamplers() {
}
if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) {
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
image_f32 = TypePointer(spv::StorageClass::Image, F32[1]);
}
if (info.samplers.empty()) {
return;

View file

@ -207,6 +207,7 @@ public:
Id invocation_id{};
Id subgroup_local_invocation_id{};
Id image_u32{};
Id image_f32{};
Id shared_memory_u8{};
Id shared_memory_u16{};

View file

@ -115,8 +115,12 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return IMAGE_ATOMIC(AtomicOp::Smin, inst);
case Opcode::IMAGE_ATOMIC_UMIN:
return IMAGE_ATOMIC(AtomicOp::Umin, inst);
case Opcode::IMAGE_ATOMIC_FMIN:
return IMAGE_ATOMIC(AtomicOp::Fmin, inst);
case Opcode::IMAGE_ATOMIC_SMAX:
return IMAGE_ATOMIC(AtomicOp::Smax, inst);
case Opcode::IMAGE_ATOMIC_FMAX:
return IMAGE_ATOMIC(AtomicOp::Fmax, inst);
case Opcode::IMAGE_ATOMIC_UMAX:
return IMAGE_ATOMIC(AtomicOp::Umax, inst);
case Opcode::IMAGE_ATOMIC_AND:
@ -466,6 +470,10 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
return ir.ImageAtomicIMax(handle, body, value, true, info);
case AtomicOp::Umax:
return ir.ImageAtomicUMax(handle, body, value, info);
case AtomicOp::Fmax:
return ir.ImageAtomicFMax(handle, body, value, info);
case AtomicOp::Fmin:
return ir.ImageAtomicFMin(handle, body, value, info);
case AtomicOp::And:
return ir.ImageAtomicAnd(handle, body, value, info);
case AtomicOp::Or:

View file

@ -196,6 +196,7 @@ struct Info {
bool has_discard{};
bool has_image_gather{};
bool has_image_query{};
bool uses_atomic_float_min_max{};
bool uses_lane_id{};
bool uses_group_quad{};
bool uses_group_ballot{};

View file

@ -1870,6 +1870,16 @@ Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const
return Inst(Opcode::ImageAtomicUMax32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicFMax(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicFMax32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicFMin(const Value& handle, const Value& coords, const Value& value,
TextureInstInfo info) {
return Inst(Opcode::ImageAtomicFMin32, Flags{info}, handle, coords, value);
}
Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
bool is_signed, TextureInstInfo info) {
return is_signed ? ImageAtomicSMax(handle, coords, value, info)

View file

@ -321,6 +321,10 @@ public:
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicFMax(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicFMin(const Value& handle, const Value& coords,
const Value& value, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
const Value& value, bool is_signed, TextureInstInfo info);
[[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,

View file

@ -420,6 +420,8 @@ OPCODE(ImageAtomicSMin32, U32, Opaq
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicFMax32, F32, Opaque, Opaque, F32, )
OPCODE(ImageAtomicFMin32, F32, Opaque, Opaque, F32, )
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )

View file

@ -71,6 +71,10 @@ void Visit(Info& info, const IR::Inst& inst) {
case IR::Opcode::ImageQueryLod:
info.has_image_query = true;
break;
case IR::Opcode::ImageAtomicFMax32:
case IR::Opcode::ImageAtomicFMin32:
info.uses_atomic_float_min_max = true;
break;
case IR::Opcode::LaneId:
info.uses_lane_id = true;
break;

View file

@ -29,6 +29,7 @@ struct Profile {
bool supports_native_cube_calc{};
bool supports_trinary_minmax{};
bool supports_robust_buffer_access{};
bool supports_image_fp32_atomic_min_max{};
bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{};
bool needs_manual_interpolation{};

View file

@ -210,7 +210,8 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceRobustness2FeaturesEXT,
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
vk::PhysicalDevicePortabilitySubsetFeaturesKHR,
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
features = feature_chain.get().features;
const vk::StructureChain properties_chain = physical_device.getProperties2<
@ -272,6 +273,13 @@ bool Instance::CreateDevice() {
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
amd_shader_trinary_minmax = add_extension(VK_AMD_SHADER_TRINARY_MINMAX_EXTENSION_NAME);
shader_atomic_float2 = add_extension(VK_EXT_SHADER_ATOMIC_FLOAT_2_EXTENSION_NAME);
if (shader_atomic_float2) {
shader_atomic_float2_features =
feature_chain.get<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}",
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax);
}
const bool calibrated_timestamps =
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
@ -401,6 +409,10 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
.legacyVertexAttributes = true,
},
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{
.shaderImageFloat32AtomicMinMax =
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax,
},
#ifdef __APPLE__
portability_features,
#endif
@ -430,6 +442,9 @@ bool Instance::CreateDevice() {
if (!legacy_vertex_attributes) {
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
}
if (!shader_atomic_float2) {
device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
}
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
if (device_result != vk::Result::eSuccess) {

View file

@ -165,6 +165,12 @@ public:
return amd_shader_trinary_minmax;
}
/// Returns true when the shaderImageFloat32AtomicMinMax feature of
/// VK_EXT_shader_atomic_float2 is supported.
bool IsShaderAtomicFloatImage32MinMaxSupported() const {
return shader_atomic_float2 && shader_atomic_float2_features.shaderImageFloat32AtomicMinMax;
}
/// Returns true when geometry shaders are supported by the device
bool IsGeometryStageSupported() const {
return features.geometryShader;
@ -336,6 +342,7 @@ private:
vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features;
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features;
vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features;
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features;
vk::DriverIdKHR driver_id;
vk::UniqueDebugUtilsMessengerEXT debug_callback{};
std::string vendor_name;
@ -360,6 +367,7 @@ private:
bool image_load_store_lod{};
bool amd_gcn_shader{};
bool amd_shader_trinary_minmax{};
bool shader_atomic_float2{};
bool portability_subset{};
};

View file

@ -206,6 +206,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
.supports_trinary_minmax = instance_.IsAmdShaderTrinaryMinMaxSupported(),
.supports_robust_buffer_access = instance_.IsRobustBufferAccess2Supported(),
.supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(),
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||