mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-26 04:16:18 +00:00
Implement IMAGE_ATOMIC_FMIN and IMAGE_ATOMIC_FMAX for 32bit floats (#2820)
* Implement IMAGE_ATOMIC_FMIN and IMAGE_ATOMIC_FMAX for 32bit floats * Handle missing VK_EXT_shader_atomic_float2
This commit is contained in:
parent
a3bbf2274f
commit
c08f92aca1
16 changed files with 106 additions and 2 deletions
|
@ -270,6 +270,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
|||
if (info.has_image_query) {
|
||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||
}
|
||||
if (info.uses_atomic_float_min_max) {
|
||||
ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max");
|
||||
ctx.AddCapability(spv::Capability::AtomicFloat32MinMaxEXT);
|
||||
}
|
||||
if (info.uses_lane_id) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniform);
|
||||
}
|
||||
|
|
|
@ -75,6 +75,14 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id va
|
|||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id ImageAtomicF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id pointer{ctx.OpImageTexelPointer(ctx.image_f32, texture.id, coords, ctx.ConstU32(0U))};
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return (ctx.*atomic_func)(ctx.F32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
|
||||
|
@ -187,6 +195,40 @@ Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords
|
|||
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicUMax);
|
||||
}
|
||||
|
||||
Id EmitImageAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||
if (ctx.profile.supports_image_fp32_atomic_min_max) {
|
||||
return ImageAtomicF32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicFMax);
|
||||
}
|
||||
|
||||
const auto u32_value = ctx.OpBitcast(ctx.U32[1], value);
|
||||
const auto sign_bit_set =
|
||||
ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u));
|
||||
|
||||
const auto result = ctx.OpSelect(
|
||||
ctx.F32[1], sign_bit_set,
|
||||
EmitBitCastF32U32(ctx, EmitImageAtomicUMin32(ctx, inst, handle, coords, u32_value)),
|
||||
EmitBitCastF32U32(ctx, EmitImageAtomicSMax32(ctx, inst, handle, coords, u32_value)));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||
if (ctx.profile.supports_image_fp32_atomic_min_max) {
|
||||
return ImageAtomicF32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicFMin);
|
||||
}
|
||||
|
||||
const auto u32_value = ctx.OpBitcast(ctx.U32[1], value);
|
||||
const auto sign_bit_set =
|
||||
ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u));
|
||||
|
||||
const auto result = ctx.OpSelect(
|
||||
ctx.F32[1], sign_bit_set,
|
||||
EmitBitCastF32U32(ctx, EmitImageAtomicUMax32(ctx, inst, handle, coords, u32_value)),
|
||||
EmitBitCastF32U32(ctx, EmitImageAtomicSMin32(ctx, inst, handle, coords, u32_value)));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, u32, Id, Id) {
|
||||
// TODO: This is not yet implemented
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
|
|
|
@ -482,6 +482,8 @@ Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords
|
|||
Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
|
|
|
@ -869,6 +869,7 @@ void EmitContext::DefineImagesAndSamplers() {
|
|||
}
|
||||
if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) {
|
||||
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
|
||||
image_f32 = TypePointer(spv::StorageClass::Image, F32[1]);
|
||||
}
|
||||
if (info.samplers.empty()) {
|
||||
return;
|
||||
|
|
|
@ -207,6 +207,7 @@ public:
|
|||
Id invocation_id{};
|
||||
Id subgroup_local_invocation_id{};
|
||||
Id image_u32{};
|
||||
Id image_f32{};
|
||||
|
||||
Id shared_memory_u8{};
|
||||
Id shared_memory_u16{};
|
||||
|
|
|
@ -115,8 +115,12 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
return IMAGE_ATOMIC(AtomicOp::Smin, inst);
|
||||
case Opcode::IMAGE_ATOMIC_UMIN:
|
||||
return IMAGE_ATOMIC(AtomicOp::Umin, inst);
|
||||
case Opcode::IMAGE_ATOMIC_FMIN:
|
||||
return IMAGE_ATOMIC(AtomicOp::Fmin, inst);
|
||||
case Opcode::IMAGE_ATOMIC_SMAX:
|
||||
return IMAGE_ATOMIC(AtomicOp::Smax, inst);
|
||||
case Opcode::IMAGE_ATOMIC_FMAX:
|
||||
return IMAGE_ATOMIC(AtomicOp::Fmax, inst);
|
||||
case Opcode::IMAGE_ATOMIC_UMAX:
|
||||
return IMAGE_ATOMIC(AtomicOp::Umax, inst);
|
||||
case Opcode::IMAGE_ATOMIC_AND:
|
||||
|
@ -466,6 +470,10 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
|||
return ir.ImageAtomicIMax(handle, body, value, true, info);
|
||||
case AtomicOp::Umax:
|
||||
return ir.ImageAtomicUMax(handle, body, value, info);
|
||||
case AtomicOp::Fmax:
|
||||
return ir.ImageAtomicFMax(handle, body, value, info);
|
||||
case AtomicOp::Fmin:
|
||||
return ir.ImageAtomicFMin(handle, body, value, info);
|
||||
case AtomicOp::And:
|
||||
return ir.ImageAtomicAnd(handle, body, value, info);
|
||||
case AtomicOp::Or:
|
||||
|
|
|
@ -196,6 +196,7 @@ struct Info {
|
|||
bool has_discard{};
|
||||
bool has_image_gather{};
|
||||
bool has_image_query{};
|
||||
bool uses_atomic_float_min_max{};
|
||||
bool uses_lane_id{};
|
||||
bool uses_group_quad{};
|
||||
bool uses_group_ballot{};
|
||||
|
|
|
@ -1870,6 +1870,16 @@ Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const
|
|||
return Inst(Opcode::ImageAtomicUMax32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicFMax(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicFMax32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicFMin(const Value& handle, const Value& coords, const Value& value,
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageAtomicFMin32, Flags{info}, handle, coords, value);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
|
||||
bool is_signed, TextureInstInfo info) {
|
||||
return is_signed ? ImageAtomicSMax(handle, coords, value, info)
|
||||
|
|
|
@ -321,6 +321,10 @@ public:
|
|||
const Value& value, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicFMax(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicFMin(const Value& handle, const Value& coords,
|
||||
const Value& value, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
|
||||
const Value& value, bool is_signed, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
|
||||
|
|
|
@ -420,6 +420,8 @@ OPCODE(ImageAtomicSMin32, U32, Opaq
|
|||
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicFMax32, F32, Opaque, Opaque, F32, )
|
||||
OPCODE(ImageAtomicFMin32, F32, Opaque, Opaque, F32, )
|
||||
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
|
||||
|
|
|
@ -71,6 +71,10 @@ void Visit(Info& info, const IR::Inst& inst) {
|
|||
case IR::Opcode::ImageQueryLod:
|
||||
info.has_image_query = true;
|
||||
break;
|
||||
case IR::Opcode::ImageAtomicFMax32:
|
||||
case IR::Opcode::ImageAtomicFMin32:
|
||||
info.uses_atomic_float_min_max = true;
|
||||
break;
|
||||
case IR::Opcode::LaneId:
|
||||
info.uses_lane_id = true;
|
||||
break;
|
||||
|
|
|
@ -29,6 +29,7 @@ struct Profile {
|
|||
bool supports_native_cube_calc{};
|
||||
bool supports_trinary_minmax{};
|
||||
bool supports_robust_buffer_access{};
|
||||
bool supports_image_fp32_atomic_min_max{};
|
||||
bool has_broken_spirv_clamp{};
|
||||
bool lower_left_origin_mode{};
|
||||
bool needs_manual_interpolation{};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue