video_core: Account of runtime state changes when compiling shaders (#575)

* video_core: Compile shader permutations * spirv: Only specific storage image format for atomics * ir: Avoid cube coord patching for storage image * spirv: Fix default attributes * data_share: Add more instructions * video_core: Query storage flag with runtime state * kernel: Use std::list for semaphore * video_core: Use texture buffers for untyped format load/store * buffer_cache: Limit view usage * vk_pipeline_cache: Fix invalid iterator * image_view: Reduce log spam when alpha=1 in storage swizzle * video_core: More features and proper spirv feature detection * video_core: Attempt no2 for specialization * spirv: Remove conflict * vk_shader_cache: Small cleanup
2025-05-24 12:25:00 +00:00 · 2024-08-29 19:29:54 +03:00 · 2024-08-29 19:29:54 +03:00 · 66e96dd944
commit 66e96dd944
parent 790d19e59b
43 changed files with 1058 additions and 976 deletions
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@ -325,20 +325,8 @@ Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& ad
    }
 }

-Value IREmitter::LoadBufferFormat(int num_dwords, const Value& handle, const Value& address,
-                                  BufferInstInfo info) {
-    switch (num_dwords) {
-    case 1:
-        return Inst(Opcode::LoadBufferFormatF32, Flags{info}, handle, address);
-    case 2:
-        return Inst(Opcode::LoadBufferFormatF32x2, Flags{info}, handle, address);
-    case 3:
-        return Inst(Opcode::LoadBufferFormatF32x3, Flags{info}, handle, address);
-    case 4:
-        return Inst(Opcode::LoadBufferFormatF32x4, Flags{info}, handle, address);
-    default:
-        UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
-    }
+Value IREmitter::LoadBufferFormat(const Value& handle, const Value& address, BufferInstInfo info) {
+    return Inst(Opcode::LoadBufferFormatF32, Flags{info}, handle, address);
 }

 void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& address,
@ -409,24 +397,9 @@ Value IREmitter::BufferAtomicSwap(const Value& handle, const Value& address, con
    return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value);
 }

-void IREmitter::StoreBufferFormat(int num_dwords, const Value& handle, const Value& address,
-                                  const Value& data, BufferInstInfo info) {
-    switch (num_dwords) {
-    case 1:
-        Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data);
-        break;
-    case 2:
-        Inst(Opcode::StoreBufferFormatF32x2, Flags{info}, handle, address, data);
-        break;
-    case 3:
-        Inst(Opcode::StoreBufferFormatF32x3, Flags{info}, handle, address, data);
-        break;
-    case 4:
-        Inst(Opcode::StoreBufferFormatF32x4, Flags{info}, handle, address, data);
-        break;
-    default:
-        UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
-    }
+void IREmitter::StoreBufferFormat(const Value& handle, const Value& address, const Value& data,
+                                  BufferInstInfo info) {
+    Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data);
 }

 U32 IREmitter::LaneId() {
--- a/src/shader_recompiler/ir/ir_emitter.h
+++ b/src/shader_recompiler/ir/ir_emitter.h
@ -92,12 +92,12 @@ public:

    [[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
                                   BufferInstInfo info);
-    [[nodiscard]] Value LoadBufferFormat(int num_dwords, const Value& handle, const Value& address,
+    [[nodiscard]] Value LoadBufferFormat(const Value& handle, const Value& address,
                                         BufferInstInfo info);
    void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data,
                     BufferInstInfo info);
-    void StoreBufferFormat(int num_dwords, const Value& handle, const Value& address,
-                           const Value& data, BufferInstInfo info);
+    void StoreBufferFormat(const Value& handle, const Value& address, const Value& data,
+                           BufferInstInfo info);

    [[nodiscard]] Value BufferAtomicIAdd(const Value& handle, const Value& address,
                                         const Value& value, BufferInstInfo info);
--- a/src/shader_recompiler/ir/microinstruction.cpp
+++ b/src/shader_recompiler/ir/microinstruction.cpp
@ -56,9 +56,6 @@ bool Inst::MayHaveSideEffects() const noexcept {
    case Opcode::StoreBufferF32x3:
    case Opcode::StoreBufferF32x4:
    case Opcode::StoreBufferFormatF32:
-    case Opcode::StoreBufferFormatF32x2:
-    case Opcode::StoreBufferFormatF32x3:
-    case Opcode::StoreBufferFormatF32x4:
    case Opcode::StoreBufferU32:
    case Opcode::BufferAtomicIAdd32:
    case Opcode::BufferAtomicSMin32:
--- a/src/shader_recompiler/ir/opcodes.inc
+++ b/src/shader_recompiler/ir/opcodes.inc
@ -79,19 +79,13 @@ OPCODE(LoadBufferF32,                                       F32,            Opaq
 OPCODE(LoadBufferF32x2,                                     F32x2,          Opaque,         Opaque,                                                         )
 OPCODE(LoadBufferF32x3,                                     F32x3,          Opaque,         Opaque,                                                         )
 OPCODE(LoadBufferF32x4,                                     F32x4,          Opaque,         Opaque,                                                         )
-OPCODE(LoadBufferFormatF32,                                 F32,            Opaque,         Opaque,                                                         )
-OPCODE(LoadBufferFormatF32x2,                               F32x2,          Opaque,         Opaque,                                                         )
-OPCODE(LoadBufferFormatF32x3,                               F32x3,          Opaque,         Opaque,                                                         )
-OPCODE(LoadBufferFormatF32x4,                               F32x4,          Opaque,         Opaque,                                                         )
+OPCODE(LoadBufferFormatF32,                                 F32x4,          Opaque,         Opaque,                                                         )
 OPCODE(LoadBufferU32,                                       U32,            Opaque,         Opaque,                                                         )
 OPCODE(StoreBufferF32,                                      Void,           Opaque,         Opaque,         F32,                                            )
 OPCODE(StoreBufferF32x2,                                    Void,           Opaque,         Opaque,         F32x2,                                          )
 OPCODE(StoreBufferF32x3,                                    Void,           Opaque,         Opaque,         F32x3,                                          )
 OPCODE(StoreBufferF32x4,                                    Void,           Opaque,         Opaque,         F32x4,                                          )
-OPCODE(StoreBufferFormatF32,                                Void,           Opaque,         Opaque,         F32,                                            )
-OPCODE(StoreBufferFormatF32x2,                              Void,           Opaque,         Opaque,         F32x2,                                          )
-OPCODE(StoreBufferFormatF32x3,                              Void,           Opaque,         Opaque,         F32x3,                                          )
-OPCODE(StoreBufferFormatF32x4,                              Void,           Opaque,         Opaque,         F32x4,                                          )
+OPCODE(StoreBufferFormatF32,                                Void,           Opaque,         Opaque,         F32x4,                                          )
 OPCODE(StoreBufferU32,                                      Void,           Opaque,         Opaque,         U32,                                            )

 // Buffer atomic operations
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@ -3,6 +3,7 @@

 #include <algorithm>
 #include <boost/container/small_vector.hpp>
+#include "common/alignment.h"
 #include "shader_recompiler/ir/basic_block.h"
 #include "shader_recompiler/ir/breadth_first_search.h"
 #include "shader_recompiler/ir/ir_emitter.h"
@ -45,10 +46,6 @@ bool IsBufferStore(const IR::Inst& inst) {
    case IR::Opcode::StoreBufferF32x2:
    case IR::Opcode::StoreBufferF32x3:
    case IR::Opcode::StoreBufferF32x4:
-    case IR::Opcode::StoreBufferFormatF32:
-    case IR::Opcode::StoreBufferFormatF32x2:
-    case IR::Opcode::StoreBufferFormatF32x3:
-    case IR::Opcode::StoreBufferFormatF32x4:
    case IR::Opcode::StoreBufferU32:
        return true;
    default:
@ -62,10 +59,6 @@ bool IsBufferInstruction(const IR::Inst& inst) {
    case IR::Opcode::LoadBufferF32x2:
    case IR::Opcode::LoadBufferF32x3:
    case IR::Opcode::LoadBufferF32x4:
-    case IR::Opcode::LoadBufferFormatF32:
-    case IR::Opcode::LoadBufferFormatF32x2:
-    case IR::Opcode::LoadBufferFormatF32x3:
-    case IR::Opcode::LoadBufferFormatF32x4:
    case IR::Opcode::LoadBufferU32:
    case IR::Opcode::ReadConstBuffer:
    case IR::Opcode::ReadConstBufferU32:
@ -75,6 +68,11 @@ bool IsBufferInstruction(const IR::Inst& inst) {
    }
 }

+bool IsTextureBufferInstruction(const IR::Inst& inst) {
+    return inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 ||
+           inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32;
+}
+
 static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
    switch (num_format) {
    case AmdGpu::NumberFormat::Float:
@ -100,28 +98,6 @@ static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_for

 IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
    switch (inst.GetOpcode()) {
-    case IR::Opcode::LoadBufferFormatF32:
-    case IR::Opcode::LoadBufferFormatF32x2:
-    case IR::Opcode::LoadBufferFormatF32x3:
-    case IR::Opcode::LoadBufferFormatF32x4:
-    case IR::Opcode::StoreBufferFormatF32:
-    case IR::Opcode::StoreBufferFormatF32x2:
-    case IR::Opcode::StoreBufferFormatF32x3:
-    case IR::Opcode::StoreBufferFormatF32x4:
-        switch (num_format) {
-        case AmdGpu::NumberFormat::Unorm:
-        case AmdGpu::NumberFormat::Snorm:
-        case AmdGpu::NumberFormat::Uscaled:
-        case AmdGpu::NumberFormat::Sscaled:
-        case AmdGpu::NumberFormat::Uint:
-        case AmdGpu::NumberFormat::Sint:
-        case AmdGpu::NumberFormat::SnormNz:
-            return IR::Type::U32;
-        case AmdGpu::NumberFormat::Float:
-            return IR::Type::F32;
-        default:
-            UNREACHABLE();
-        }
    case IR::Opcode::LoadBufferF32:
    case IR::Opcode::LoadBufferF32x2:
    case IR::Opcode::LoadBufferF32x3:
@ -143,20 +119,8 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
    }
 }

-bool IsImageInstruction(const IR::Inst& inst) {
+bool IsImageAtomicInstruction(const IR::Inst& inst) {
    switch (inst.GetOpcode()) {
-    case IR::Opcode::ImageSampleExplicitLod:
-    case IR::Opcode::ImageSampleImplicitLod:
-    case IR::Opcode::ImageSampleDrefExplicitLod:
-    case IR::Opcode::ImageSampleDrefImplicitLod:
-    case IR::Opcode::ImageFetch:
-    case IR::Opcode::ImageGather:
-    case IR::Opcode::ImageGatherDref:
-    case IR::Opcode::ImageQueryDimensions:
-    case IR::Opcode::ImageQueryLod:
-    case IR::Opcode::ImageGradient:
-    case IR::Opcode::ImageRead:
-    case IR::Opcode::ImageWrite:
    case IR::Opcode::ImageAtomicIAdd32:
    case IR::Opcode::ImageAtomicSMin32:
    case IR::Opcode::ImageAtomicUMin32:
@ -178,20 +142,27 @@ bool IsImageStorageInstruction(const IR::Inst& inst) {
    switch (inst.GetOpcode()) {
    case IR::Opcode::ImageWrite:
    case IR::Opcode::ImageRead:
-    case IR::Opcode::ImageAtomicIAdd32:
-    case IR::Opcode::ImageAtomicSMin32:
-    case IR::Opcode::ImageAtomicUMin32:
-    case IR::Opcode::ImageAtomicSMax32:
-    case IR::Opcode::ImageAtomicUMax32:
-    case IR::Opcode::ImageAtomicInc32:
-    case IR::Opcode::ImageAtomicDec32:
-    case IR::Opcode::ImageAtomicAnd32:
-    case IR::Opcode::ImageAtomicOr32:
-    case IR::Opcode::ImageAtomicXor32:
-    case IR::Opcode::ImageAtomicExchange32:
        return true;
    default:
-        return false;
+        return IsImageAtomicInstruction(inst);
+    }
+}
+
+bool IsImageInstruction(const IR::Inst& inst) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::ImageSampleExplicitLod:
+    case IR::Opcode::ImageSampleImplicitLod:
+    case IR::Opcode::ImageSampleDrefExplicitLod:
+    case IR::Opcode::ImageSampleDrefImplicitLod:
+    case IR::Opcode::ImageFetch:
+    case IR::Opcode::ImageGather:
+    case IR::Opcode::ImageGatherDref:
+    case IR::Opcode::ImageQueryDimensions:
+    case IR::Opcode::ImageQueryLod:
+    case IR::Opcode::ImageGradient:
+        return true;
+    default:
+        return IsImageStorageInstruction(inst);
    }
 }

@ -214,7 +185,8 @@ u32 ImageOffsetArgumentPosition(const IR::Inst& inst) {
 class Descriptors {
 public:
    explicit Descriptors(Info& info_)
-        : info{info_}, buffer_resources{info_.buffers}, image_resources{info_.images},
+        : info{info_}, buffer_resources{info_.buffers},
+          texture_buffer_resources{info_.texture_buffers}, image_resources{info_.images},
          sampler_resources{info_.samplers} {}

    u32 Add(const BufferResource& desc) {
@ -224,13 +196,21 @@ public:
                   desc.inline_cbuf == existing.inline_cbuf;
        })};
        auto& buffer = buffer_resources[index];
-        ASSERT(buffer.length == desc.length);
-        buffer.is_storage |= desc.is_storage;
        buffer.used_types |= desc.used_types;
        buffer.is_written |= desc.is_written;
        return index;
    }

+    u32 Add(const TextureBufferResource& desc) {
+        const u32 index{Add(texture_buffer_resources, desc, [&desc](const auto& existing) {
+            return desc.sgpr_base == existing.sgpr_base &&
+                   desc.dword_offset == existing.dword_offset;
+        })};
+        auto& buffer = texture_buffer_resources[index];
+        buffer.is_written |= desc.is_written;
+        return index;
+    }
+
    u32 Add(const ImageResource& desc) {
        const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
            return desc.sgpr_base == existing.sgpr_base &&
@ -247,7 +227,7 @@ public:
                return true;
            }
            // Samplers with different bindings might still be the same.
-            return existing.GetSsharp(info) == desc.GetSsharp(info);
+            return existing.GetSharp(info) == desc.GetSharp(info);
        })};
        return index;
    }
@ -265,6 +245,7 @@ private:

    const Info& info;
    BufferResourceList& buffer_resources;
+    TextureBufferResourceList& texture_buffer_resources;
    ImageResourceList& image_resources;
    SamplerResourceList& sampler_resources;
 };
@ -361,33 +342,6 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
    };
 }

-static constexpr size_t MaxUboSize = 65536;
-
-static bool IsLoadBufferFormat(const IR::Inst& inst) {
-    switch (inst.GetOpcode()) {
-    case IR::Opcode::LoadBufferFormatF32:
-    case IR::Opcode::LoadBufferFormatF32x2:
-    case IR::Opcode::LoadBufferFormatF32x3:
-    case IR::Opcode::LoadBufferFormatF32x4:
-        return true;
-    default:
-        return false;
-    }
-}
-
-static u32 BufferLength(const AmdGpu::Buffer& buffer) {
-    const auto stride = buffer.GetStride();
-    if (stride < sizeof(f32)) {
-        ASSERT(sizeof(f32) % stride == 0);
-        return (((buffer.num_records - 1) / sizeof(f32)) + 1) * stride;
-    } else if (stride == sizeof(f32)) {
-        return buffer.num_records;
-    } else {
-        ASSERT(stride % sizeof(f32) == 0);
-        return buffer.num_records * (stride / sizeof(f32));
-    }
-}
-
 s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
                        AmdGpu::Buffer& cbuf) {

@ -414,10 +368,8 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
    return descriptors.Add(BufferResource{
        .sgpr_base = std::numeric_limits<u32>::max(),
        .dword_offset = 0,
-        .length = BufferLength(cbuf),
        .used_types = BufferDataType(inst, cbuf.GetNumberFmt()),
        .inline_cbuf = cbuf,
-        .is_storage = IsBufferStore(inst) || cbuf.GetSize() > MaxUboSize,
    });
 }

@ -429,28 +381,17 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
        IR::Inst* handle = inst.Arg(0).InstRecursive();
        IR::Inst* producer = handle->Arg(0).InstRecursive();
        const auto sharp = TrackSharp(producer);
-        const bool is_store = IsBufferStore(inst);
        buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
        binding = descriptors.Add(BufferResource{
            .sgpr_base = sharp.sgpr_base,
            .dword_offset = sharp.dword_offset,
-            .length = BufferLength(buffer),
            .used_types = BufferDataType(inst, buffer.GetNumberFmt()),
-            .is_storage = is_store || buffer.GetSize() > MaxUboSize,
-            .is_written = is_store,
+            .is_written = IsBufferStore(inst),
        });
    }

    // Update buffer descriptor format.
    const auto inst_info = inst.Flags<IR::BufferInstInfo>();
-    auto& buffer_desc = info.buffers[binding];
-    if (inst_info.is_typed) {
-        buffer_desc.dfmt = inst_info.dmft;
-        buffer_desc.nfmt = inst_info.nfmt;
-    } else {
-        buffer_desc.dfmt = buffer.GetDataFmt();
-        buffer_desc.nfmt = buffer.GetNumberFmt();
-    }

    // Replace handle with binding index in buffer resource list.
    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
@ -463,20 +404,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
        return;
    }

-    if (IsLoadBufferFormat(inst)) {
-        if (UseFP16(buffer.GetDataFmt(), buffer.GetNumberFmt())) {
-            info.uses_fp16 = true;
-        }
-    } else {
-        const u32 stride = buffer.GetStride();
-        if (stride < 4) {
-            LOG_WARNING(Render_Vulkan,
-                        "non-formatting load_buffer_* is not implemented for stride {}", stride);
-        }
-    }
-
    // Compute address of the buffer using the stride.
-    // Todo: What if buffer is rebound with different stride?
    IR::U32 address = ir.Imm32(inst_info.inst_offset.Value());
    if (inst_info.index_enable) {
        const IR::U32 index = inst_info.offset_enable ? IR::U32{ir.CompositeExtract(inst.Arg(1), 0)}
@ -491,8 +419,31 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
    inst.SetArg(1, address);
 }

+void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
+                                   Descriptors& descriptors) {
+    const IR::Inst* handle = inst.Arg(0).InstRecursive();
+    const IR::Inst* producer = handle->Arg(0).InstRecursive();
+    const auto sharp = TrackSharp(producer);
+    const auto buffer = info.ReadUd<AmdGpu::Buffer>(sharp.sgpr_base, sharp.dword_offset);
+    const s32 binding = descriptors.Add(TextureBufferResource{
+        .sgpr_base = sharp.sgpr_base,
+        .dword_offset = sharp.dword_offset,
+        .nfmt = buffer.GetNumberFmt(),
+        .is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
+    });
+
+    // Replace handle with binding index in texture buffer resource list.
+    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+    inst.SetArg(0, ir.Imm32(binding));
+    ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
+}
+
 IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
-                         const IR::Value& z) {
+                         const IR::Value& z, bool is_storage) {
+    // When cubemap is written with imageStore it is treated like 2DArray.
+    if (is_storage) {
+        return ir.CompositeConstruct(s, t, z);
+    }
    // We need to fix x and y coordinate,
    // because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32.
    // We already force the scale value to be 1.0 when handling v_cubema_f32,
@ -530,13 +481,15 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
        return;
    }
    ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
+    const bool is_storage = IsImageStorageInstruction(inst);
    u32 image_binding = descriptors.Add(ImageResource{
        .sgpr_base = tsharp.sgpr_base,
        .dword_offset = tsharp.dword_offset,
        .type = image.GetType(),
        .nfmt = static_cast<AmdGpu::NumberFormat>(image.GetNumberFmt()),
-        .is_storage = IsImageStorageInstruction(inst),
+        .is_storage = is_storage,
        .is_depth = bool(inst_info.is_depth),
+        .is_atomic = IsImageAtomicInstruction(inst),
    });

    // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
@ -593,7 +546,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
        case AmdGpu::ImageType::Color3D: // x, y, z
            return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
        case AmdGpu::ImageType::Cube: // x, y, face
-            return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
+            return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_storage),
+                    body->Arg(3)};
        default:
            UNREACHABLE_MSG("Unknown image type {}", image.GetType());
        }
@ -668,6 +622,10 @@ void ResourceTrackingPass(IR::Program& program) {
                PatchBufferInstruction(*block, inst, info, descriptors);
                continue;
            }
+            if (IsTextureBufferInstruction(inst)) {
+                PatchTextureBufferInstruction(*block, inst, info, descriptors);
+                continue;
+            }
            if (IsImageInstruction(inst)) {
                PatchImageInstruction(*block, inst, info, descriptors);
            }
--- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
+++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
@ -29,6 +29,12 @@ void Visit(Info& info, IR::Inst& inst) {
    case IR::Opcode::ImageWrite:
        info.has_storage_images = true;
        break;
+    case IR::Opcode::LoadBufferFormatF32:
+        info.has_texel_buffers = true;
+        break;
+    case IR::Opcode::StoreBufferFormatF32:
+        info.has_image_buffers = true;
+        break;
    case IR::Opcode::QuadShuffle:
        info.uses_group_quad = true;
        break;
@ -44,6 +50,9 @@ void Visit(Info& info, IR::Inst& inst) {
    case IR::Opcode::ImageQueryLod:
        info.has_image_query = true;
        break;
+    case IR::Opcode::LaneId:
+        info.uses_lane_id = true;
+        break;
    default:
        break;
    }
--- a/src/shader_recompiler/ir/program.h
+++ b/src/shader_recompiler/ir/program.h
@ -12,11 +12,13 @@
 namespace Shader::IR {

 struct Program {
+    explicit Program(Info& info_) : info{info_} {}
+
    AbstractSyntaxList syntax_list;
    BlockList blocks;
    BlockList post_order_blocks;
    std::vector<Gcn::GcnInst> ins_list;
-    Info info;
+    Info& info;
 };

 [[nodiscard]] std::string DumpProgram(const Program& program);
--- a/src/shader_recompiler/ir/reg.h
+++ b/src/shader_recompiler/ir/reg.h
@ -66,9 +66,6 @@ union BufferInstInfo {
    BitField<0, 1, u32> index_enable;
    BitField<1, 1, u32> offset_enable;
    BitField<2, 12, u32> inst_offset;
-    BitField<14, 4, AmdGpu::DataFormat> dmft;
-    BitField<18, 3, AmdGpu::NumberFormat> nfmt;
-    BitField<21, 1, u32> is_typed;
 };

 enum class ScalarReg : u32 {