mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-25 11:56:18 +00:00
Implement buffer atomic fmin/fmax instructions (#3123)
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions
This commit is contained in:
parent
612f340292
commit
423254692a
15 changed files with 99 additions and 5 deletions
|
@ -271,7 +271,8 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
|||
if (info.has_image_query) {
|
||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||
}
|
||||
if (info.uses_atomic_float_min_max && profile.supports_image_fp32_atomic_min_max) {
|
||||
if ((info.uses_image_atomic_float_min_max && profile.supports_image_fp32_atomic_min_max) ||
|
||||
(info.uses_buffer_atomic_float_min_max && profile.supports_buffer_fp32_atomic_min_max)) {
|
||||
ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max");
|
||||
ctx.AddCapability(spv::Capability::AtomicFloat32MinMaxEXT);
|
||||
}
|
||||
|
|
|
@ -50,9 +50,17 @@ Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value,
|
|||
});
|
||||
}
|
||||
|
||||
template <bool is_float = false>
|
||||
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
const auto type = [&] {
|
||||
if constexpr (is_float) {
|
||||
return ctx.F32[1];
|
||||
} else {
|
||||
return ctx.U32[1];
|
||||
}
|
||||
}();
|
||||
if (Sirit::ValidId(buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
}
|
||||
|
@ -60,8 +68,8 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
|
|||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] {
|
||||
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value);
|
||||
return AccessBoundsCheck<32, 1, is_float>(ctx, index, buffer.size_dwords, [&] {
|
||||
return (ctx.*atomic_func)(type, ptr, scope, semantics, value);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -196,6 +204,24 @@ Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
|||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMin);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
if (ctx.profile.supports_buffer_fp32_atomic_min_max) {
|
||||
return BufferAtomicU32<true>(ctx, inst, handle, address, value,
|
||||
&Sirit::Module::OpAtomicFMin);
|
||||
}
|
||||
|
||||
const auto u32_value = ctx.OpBitcast(ctx.U32[1], value);
|
||||
const auto sign_bit_set =
|
||||
ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u));
|
||||
|
||||
const auto result = ctx.OpSelect(
|
||||
ctx.F32[1], sign_bit_set,
|
||||
EmitBitCastF32U32(ctx, EmitBufferAtomicUMax32(ctx, inst, handle, address, u32_value)),
|
||||
EmitBitCastF32U32(ctx, EmitBufferAtomicSMin32(ctx, inst, handle, address, u32_value)));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMax);
|
||||
}
|
||||
|
@ -204,6 +230,24 @@ Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
|||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicUMax);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
if (ctx.profile.supports_buffer_fp32_atomic_min_max) {
|
||||
return BufferAtomicU32<true>(ctx, inst, handle, address, value,
|
||||
&Sirit::Module::OpAtomicFMax);
|
||||
}
|
||||
|
||||
const auto u32_value = ctx.OpBitcast(ctx.U32[1], value);
|
||||
const auto sign_bit_set =
|
||||
ctx.OpBitFieldUExtract(ctx.U32[1], u32_value, ctx.ConstU32(31u), ctx.ConstU32(1u));
|
||||
|
||||
const auto result = ctx.OpSelect(
|
||||
ctx.F32[1], sign_bit_set,
|
||||
EmitBitCastF32U32(ctx, EmitBufferAtomicUMin32(ctx, inst, handle, address, u32_value)),
|
||||
EmitBitCastF32U32(ctx, EmitBufferAtomicSMax32(ctx, inst, handle, address, u32_value)));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return BufferAtomicU32IncDec(ctx, inst, handle, address, &Sirit::Module::OpAtomicIIncrement);
|
||||
}
|
||||
|
|
|
@ -92,8 +92,10 @@ Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
|||
Id EmitBufferAtomicISub32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicFMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicUMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicFMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicInc32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitBufferAtomicDec32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
|
||||
Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
|
|
|
@ -90,6 +90,10 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
return BUFFER_ATOMIC(AtomicOp::Inc, inst);
|
||||
case Opcode::BUFFER_ATOMIC_DEC:
|
||||
return BUFFER_ATOMIC(AtomicOp::Dec, inst);
|
||||
case Opcode::BUFFER_ATOMIC_FMIN:
|
||||
return BUFFER_ATOMIC(AtomicOp::Fmin, inst);
|
||||
case Opcode::BUFFER_ATOMIC_FMAX:
|
||||
return BUFFER_ATOMIC(AtomicOp::Fmax, inst);
|
||||
|
||||
// MIMG
|
||||
// Image load operations
|
||||
|
@ -357,6 +361,10 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
|||
return ir.BufferAtomicInc(handle, address, buffer_info);
|
||||
case AtomicOp::Dec:
|
||||
return ir.BufferAtomicDec(handle, address, buffer_info);
|
||||
case AtomicOp::Fmin:
|
||||
return ir.BufferAtomicFMin(handle, address, vdata_val, buffer_info);
|
||||
case AtomicOp::Fmax:
|
||||
return ir.BufferAtomicFMax(handle, address, vdata_val, buffer_info);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
|
|
@ -215,7 +215,8 @@ struct Info {
|
|||
bool has_image_query{};
|
||||
bool has_perspective_interp{};
|
||||
bool has_linear_interp{};
|
||||
bool uses_atomic_float_min_max{};
|
||||
bool uses_buffer_atomic_float_min_max{};
|
||||
bool uses_image_atomic_float_min_max{};
|
||||
bool uses_lane_id{};
|
||||
bool uses_group_quad{};
|
||||
bool uses_group_ballot{};
|
||||
|
|
|
@ -504,12 +504,22 @@ Value IREmitter::BufferAtomicIMin(const Value& handle, const Value& address, con
|
|||
: Inst(Opcode::BufferAtomicUMin32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicFMin(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicFMin32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicIMax(const Value& handle, const Value& address, const Value& value,
|
||||
bool is_signed, BufferInstInfo info) {
|
||||
return is_signed ? Inst(Opcode::BufferAtomicSMax32, Flags{info}, handle, address, value)
|
||||
: Inst(Opcode::BufferAtomicUMax32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicFMax(const Value& handle, const Value& address, const Value& value,
|
||||
BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicFMax32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicInc(const Value& handle, const Value& address, BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicInc32, Flags{info}, handle, address);
|
||||
}
|
||||
|
|
|
@ -140,8 +140,12 @@ public:
|
|||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicIMin(const Value& handle, const Value& address,
|
||||
const Value& value, bool is_signed, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicFMin(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicIMax(const Value& handle, const Value& address,
|
||||
const Value& value, bool is_signed, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicFMax(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicInc(const Value& handle, const Value& address,
|
||||
BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicDec(const Value& handle, const Value& address,
|
||||
|
|
|
@ -71,8 +71,10 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::BufferAtomicISub32:
|
||||
case Opcode::BufferAtomicSMin32:
|
||||
case Opcode::BufferAtomicUMin32:
|
||||
case Opcode::BufferAtomicFMin32:
|
||||
case Opcode::BufferAtomicSMax32:
|
||||
case Opcode::BufferAtomicUMax32:
|
||||
case Opcode::BufferAtomicFMax32:
|
||||
case Opcode::BufferAtomicInc32:
|
||||
case Opcode::BufferAtomicDec32:
|
||||
case Opcode::BufferAtomicAnd32:
|
||||
|
|
|
@ -125,8 +125,10 @@ OPCODE(BufferAtomicIAdd64, U64, Opaq
|
|||
OPCODE(BufferAtomicISub32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicFMin32, U32, Opaque, Opaque, F32 )
|
||||
OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32 )
|
||||
OPCODE(BufferAtomicFMax32, U32, Opaque, Opaque, F32 )
|
||||
OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, )
|
||||
OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, )
|
||||
OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, )
|
||||
|
|
|
@ -21,8 +21,10 @@ bool IsBufferAtomic(const IR::Inst& inst) {
|
|||
case IR::Opcode::BufferAtomicISub32:
|
||||
case IR::Opcode::BufferAtomicSMin32:
|
||||
case IR::Opcode::BufferAtomicUMin32:
|
||||
case IR::Opcode::BufferAtomicFMin32:
|
||||
case IR::Opcode::BufferAtomicSMax32:
|
||||
case IR::Opcode::BufferAtomicUMax32:
|
||||
case IR::Opcode::BufferAtomicFMax32:
|
||||
case IR::Opcode::BufferAtomicInc32:
|
||||
case IR::Opcode::BufferAtomicDec32:
|
||||
case IR::Opcode::BufferAtomicAnd32:
|
||||
|
|
|
@ -92,7 +92,11 @@ void Visit(Info& info, const IR::Inst& inst) {
|
|||
break;
|
||||
case IR::Opcode::ImageAtomicFMax32:
|
||||
case IR::Opcode::ImageAtomicFMin32:
|
||||
info.uses_atomic_float_min_max = true;
|
||||
info.uses_image_atomic_float_min_max = true;
|
||||
break;
|
||||
case IR::Opcode::BufferAtomicFMax32:
|
||||
case IR::Opcode::BufferAtomicFMin32:
|
||||
info.uses_buffer_atomic_float_min_max = true;
|
||||
break;
|
||||
case IR::Opcode::LaneId:
|
||||
info.uses_lane_id = true;
|
||||
|
|
|
@ -28,6 +28,7 @@ struct Profile {
|
|||
bool supports_native_cube_calc{};
|
||||
bool supports_trinary_minmax{};
|
||||
bool supports_robust_buffer_access{};
|
||||
bool supports_buffer_fp32_atomic_min_max{};
|
||||
bool supports_image_fp32_atomic_min_max{};
|
||||
bool supports_workgroup_explicit_memory_layout{};
|
||||
bool has_broken_spirv_clamp{};
|
||||
|
|
|
@ -281,6 +281,8 @@ bool Instance::CreateDevice() {
|
|||
if (shader_atomic_float2) {
|
||||
shader_atomic_float2_features =
|
||||
feature_chain.get<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
|
||||
LOG_INFO(Render_Vulkan, "- shaderBufferFloat32AtomicMinMax: {}",
|
||||
shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax);
|
||||
LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}",
|
||||
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax);
|
||||
}
|
||||
|
@ -433,6 +435,8 @@ bool Instance::CreateDevice() {
|
|||
.legacyVertexAttributes = true,
|
||||
},
|
||||
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{
|
||||
.shaderBufferFloat32AtomicMinMax =
|
||||
shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax,
|
||||
.shaderImageFloat32AtomicMinMax =
|
||||
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax,
|
||||
},
|
||||
|
|
|
@ -165,6 +165,13 @@ public:
|
|||
return amd_shader_trinary_minmax;
|
||||
}
|
||||
|
||||
/// Returns true when the shaderBufferFloat32AtomicMinMax feature of
|
||||
/// VK_EXT_shader_atomic_float2 is supported.
|
||||
bool IsShaderAtomicFloatBuffer32MinMaxSupported() const {
|
||||
return shader_atomic_float2 &&
|
||||
shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax;
|
||||
}
|
||||
|
||||
/// Returns true when the shaderImageFloat32AtomicMinMax feature of
|
||||
/// VK_EXT_shader_atomic_float2 is supported.
|
||||
bool IsShaderAtomicFloatImage32MinMaxSupported() const {
|
||||
|
|
|
@ -216,6 +216,8 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||
.supports_trinary_minmax = instance_.IsAmdShaderTrinaryMinMaxSupported(),
|
||||
// TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed.
|
||||
.supports_robust_buffer_access = true, // instance_.IsRobustBufferAccess2Supported(),
|
||||
.supports_buffer_fp32_atomic_min_max =
|
||||
instance_.IsShaderAtomicFloatBuffer32MinMaxSupported(),
|
||||
.supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(),
|
||||
.supports_workgroup_explicit_memory_layout =
|
||||
instance_.IsWorkgroupMemoryExplicitLayoutSupported(),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue