Implemented load_buffer_format_* conversions (#295)

* Implemented load_buffer_format_* conversions * clang-format insists on ugly things
2025-07-12 20:55:56 +00:00 · 2024-07-16 15:03:07 +03:00 · 2024-07-16 15:03:07 +03:00 · f9e96793cc
commit f9e96793cc
parent c6cdfcfb0b
15 changed files with 475 additions and 91 deletions
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@ -4,6 +4,8 @@
 #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
 #include "shader_recompiler/backend/spirv/spirv_emit_context.h"

+#include <magic_enum.hpp>
+
 namespace Shader::Backend::SPIRV {
 namespace {

@ -209,57 +211,216 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
    ctx.OpStore(pointer, value);
 }

-Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
-    const auto info = inst->Flags<IR::BufferInstInfo>();
-    const auto& buffer = ctx.buffers[handle];
-    if (info.index_enable && info.offset_enable) {
-        UNREACHABLE();
-    } else if (info.index_enable) {
-        const Id ptr{
-            ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, address)};
-        return ctx.OpLoad(buffer.data_types->Get(1), ptr);
-    }
-    UNREACHABLE();
-}
-
 Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
    return EmitLoadBufferF32(ctx, inst, handle, address);
 }

-Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
-    const auto info = inst->Flags<IR::BufferInstInfo>();
+template <int N>
+static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) {
    const auto& buffer = ctx.buffers[handle];
-    boost::container::static_vector<Id, 2> ids;
-    for (u32 i = 0; i < 2; i++) {
-        const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
-        const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
-        ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr));
+    Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
+    if constexpr (N == 1) {
+        const Id ptr{
+            ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, address)};
+        return ctx.OpLoad(buffer.data_types->Get(1), ptr);
+    } else {
+        boost::container::static_vector<Id, N> ids;
+        for (u32 i = 0; i < N; i++) {
+            index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
+            const Id ptr{
+                ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
+            ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr));
+        }
+        return ctx.OpCompositeConstruct(buffer.data_types->Get(N), ids);
    }
-    return ctx.OpCompositeConstruct(buffer.data_types->Get(2), ids);
 }

-Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
-    const auto info = inst->Flags<IR::BufferInstInfo>();
-    const auto& buffer = ctx.buffers[handle];
-    boost::container::static_vector<Id, 3> ids;
-    for (u32 i = 0; i < 3; i++) {
-        const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
-        const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
-        ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr));
-    }
-    return ctx.OpCompositeConstruct(buffer.data_types->Get(3), ids);
+Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
+    return EmitLoadBufferF32xN<1>(ctx, handle, address);
 }

-Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
-    const auto info = inst->Flags<IR::BufferInstInfo>();
-    const auto& buffer = ctx.buffers[handle];
-    boost::container::static_vector<Id, 4> ids;
-    for (u32 i = 0; i < 4; i++) {
-        const Id index{ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i))};
-        const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
-        ids.push_back(ctx.OpLoad(buffer.data_types->Get(1), ptr));
+Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
+    return EmitLoadBufferF32xN<2>(ctx, handle, address);
+}
+
+Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
+    return EmitLoadBufferF32xN<3>(ctx, handle, address);
+}
+
+Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
+    return EmitLoadBufferF32xN<4>(ctx, handle, address);
+}
+
+static bool IsSignedInteger(AmdGpu::NumberFormat format) {
+    switch (format) {
+    case AmdGpu::NumberFormat::Unorm:
+    case AmdGpu::NumberFormat::Uscaled:
+    case AmdGpu::NumberFormat::Uint:
+        return false;
+    case AmdGpu::NumberFormat::Snorm:
+    case AmdGpu::NumberFormat::Sscaled:
+    case AmdGpu::NumberFormat::Sint:
+    case AmdGpu::NumberFormat::SnormNz:
+        return true;
+    case AmdGpu::NumberFormat::Float:
+    default:
+        UNREACHABLE();
    }
-    return ctx.OpCompositeConstruct(buffer.data_types->Get(4), ids);
+}
+
+static u32 UXBitsMax(u32 bit_width) {
+    return (1u << bit_width) - 1u;
+}
+
+static u32 SXBitsMax(u32 bit_width) {
+    return (1u << (bit_width - 1u)) - 1u;
+}
+
+static Id ConvertValue(EmitContext& ctx, Id value, AmdGpu::NumberFormat format, u32 bit_width) {
+    switch (format) {
+    case AmdGpu::NumberFormat::Unorm:
+        return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(UXBitsMax(bit_width))));
+    case AmdGpu::NumberFormat::Snorm:
+        return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(SXBitsMax(bit_width))));
+    case AmdGpu::NumberFormat::SnormNz:
+        // (x * 2 + 1) / (Format::SMAX * 2)
+        value = ctx.OpFMul(ctx.F32[1], value, ctx.ConstF32(2.f));
+        value = ctx.OpFAdd(ctx.F32[1], value, ctx.ConstF32(1.f));
+        return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(SXBitsMax(bit_width) * 2)));
+    case AmdGpu::NumberFormat::Uscaled:
+    case AmdGpu::NumberFormat::Sscaled:
+    case AmdGpu::NumberFormat::Uint:
+    case AmdGpu::NumberFormat::Sint:
+    case AmdGpu::NumberFormat::Float:
+        return value;
+    default:
+        UNREACHABLE_MSG("Unsupported number fromat for conversion: {}",
+                        magic_enum::enum_name(format));
+    }
+}
+
+static Id ComponentOffset(EmitContext& ctx, Id address, u32 stride, u32 bit_offset) {
+    Id comp_offset = ctx.ConstU32(bit_offset);
+    if (stride < 4) {
+        // comp_offset += (address % 4) * 8;
+        const Id byte_offset = ctx.OpUMod(ctx.U32[1], address, ctx.ConstU32(4u));
+        const Id bit_offset = ctx.OpShiftLeftLogical(ctx.U32[1], byte_offset, ctx.ConstU32(3u));
+        comp_offset = ctx.OpIAdd(ctx.U32[1], comp_offset, bit_offset);
+    }
+    return comp_offset;
+}
+
+static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 comp) {
+    const auto& buffer = ctx.buffers[handle];
+    const auto format = buffer.buffer.GetDataFmt();
+    switch (format) {
+    case AmdGpu::DataFormat::FormatInvalid:
+        return ctx.f32_zero_value;
+    case AmdGpu::DataFormat::Format8:
+    case AmdGpu::DataFormat::Format16:
+    case AmdGpu::DataFormat::Format32:
+    case AmdGpu::DataFormat::Format8_8:
+    case AmdGpu::DataFormat::Format16_16:
+    case AmdGpu::DataFormat::Format10_11_11:
+    case AmdGpu::DataFormat::Format11_11_10:
+    case AmdGpu::DataFormat::Format10_10_10_2:
+    case AmdGpu::DataFormat::Format2_10_10_10:
+    case AmdGpu::DataFormat::Format8_8_8_8:
+    case AmdGpu::DataFormat::Format32_32:
+    case AmdGpu::DataFormat::Format16_16_16_16:
+    case AmdGpu::DataFormat::Format32_32_32:
+    case AmdGpu::DataFormat::Format32_32_32_32: {
+        const u32 num_components = AmdGpu::NumComponents(format);
+        if (comp >= num_components) {
+            return ctx.f32_zero_value;
+        }
+
+        // uint index = address / 4;
+        Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
+        const u32 stride = buffer.buffer.GetStride();
+        if (stride > 4) {
+            const u32 index_offset = u32(AmdGpu::ComponentOffset(format, comp) / 32);
+            if (index_offset > 0) {
+                // index += index_offset;
+                index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(index_offset));
+            }
+        }
+        const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index);
+
+        const u32 bit_offset = AmdGpu::ComponentOffset(format, comp) % 32;
+        const u32 bit_width = AmdGpu::ComponentBits(format, comp);
+        const auto num_format = buffer.buffer.GetNumberFmt();
+        if (num_format == AmdGpu::NumberFormat::Float) {
+            if (bit_width == 32) {
+                return ctx.OpLoad(ctx.F32[1], ptr);
+            } else if (bit_width == 16) {
+                const Id comp_offset = ComponentOffset(ctx, address, stride, bit_offset);
+                Id value = ctx.OpLoad(ctx.U32[1], ptr);
+                value =
+                    ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset, ctx.ConstU32(bit_width));
+                value = ctx.OpSConvert(ctx.U16, value);
+                value = ctx.OpBitcast(ctx.F16[1], value);
+                return ctx.OpFConvert(ctx.F32[1], value);
+            } else {
+                UNREACHABLE_MSG("Invalid float bit width {}", bit_width);
+            }
+        } else {
+            Id value = ctx.OpLoad(ctx.U32[1], ptr);
+            const bool is_signed = IsSignedInteger(num_format);
+            if (bit_width < 32) {
+                const Id comp_offset = ComponentOffset(ctx, address, stride, bit_offset);
+                if (is_signed) {
+                    value = ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset,
+                                                   ctx.ConstU32(bit_width));
+                    value = ctx.OpConvertSToF(ctx.F32[1], value);
+                } else {
+                    value = ctx.OpBitFieldUExtract(ctx.U32[1], value, comp_offset,
+                                                   ctx.ConstU32(bit_width));
+                    value = ctx.OpConvertUToF(ctx.F32[1], value);
+                }
+            } else {
+                if (is_signed) {
+                    value = ctx.OpConvertSToF(ctx.F32[1], value);
+                } else {
+                    value = ctx.OpConvertUToF(ctx.F32[1], value);
+                }
+            }
+            return ConvertValue(ctx, value, num_format, bit_width);
+        }
+        break;
+    }
+    default:
+        UNREACHABLE_MSG("Invalid format for conversion: {}", magic_enum::enum_name(format));
+    }
+}
+
+template <int N>
+static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
+    if constexpr (N == 1) {
+        return GetBufferFormatValue(ctx, handle, address, 0);
+    } else {
+        boost::container::static_vector<Id, N> ids;
+        for (u32 i = 0; i < N; i++) {
+            ids.push_back(GetBufferFormatValue(ctx, handle, address, i));
+        }
+        return ctx.OpCompositeConstruct(ctx.F32[N], ids);
+    }
+}
+
+Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
+    return EmitLoadBufferFormatF32xN<1>(ctx, inst, handle, address);
+}
+
+Id EmitLoadBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
+    return EmitLoadBufferFormatF32xN<2>(ctx, inst, handle, address);
+}
+
+Id EmitLoadBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
+    return EmitLoadBufferFormatF32xN<3>(ctx, inst, handle, address);
+}
+
+Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
+    return EmitLoadBufferFormatF32xN<4>(ctx, inst, handle, address);
 }

 void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@ -66,6 +66,10 @@ Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
 Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
 Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
 Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
+Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
+Id EmitLoadBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
+Id EmitLoadBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
+Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
 Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
 void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
 void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@ -301,9 +301,7 @@ void EmitContext::DefineBuffers(const Info& info) {
    for (u32 i = 0; const auto& buffer : info.buffers) {
        const auto* data_types = True(buffer.used_types & IR::Type::F32) ? &F32 : &U32;
        const Id data_type = (*data_types)[1];
-        const u32 stride = buffer.stride == 0 ? 1 : buffer.stride;
-        const u32 num_elements = stride * buffer.num_records;
-        const Id record_array_type{TypeArray(data_type, ConstU32(num_elements))};
+        const Id record_array_type{TypeArray(data_type, ConstU32(buffer.length))};
        const Id struct_type{TypeStruct(record_array_type)};
        if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
            Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
@ -333,6 +331,7 @@ void EmitContext::DefineBuffers(const Info& info) {
            .id = id,
            .data_types = data_types,
            .pointer_type = pointer_type,
+            .buffer = buffer.GetVsharp(info),
        });
        interfaces.push_back(id);
        i++;
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@ -201,6 +201,7 @@ public:
        Id id;
        const VectorIds* data_types;
        Id pointer_type;
+        AmdGpu::Buffer buffer;
    };

    u32& binding;