renderer_vulkan: Parse fetch shader per-pipeline (#1656)

* shader_recompiler: Read image format info directly from sharps instead of storing in shader info. * renderer_vulkan: Parse fetch shader per-pipeline * Few minor fixes. * shader_recompiler: Specialize on vertex attribute number types. * shader_recompiler: Move GetDrawOffsets to fetch shader
2025-07-12 04:35:56 +00:00 · 2024-12-04 03:03:47 -08:00 · 2024-12-04 03:03:47 -08:00 · 920acb8d8b
commit 920acb8d8b
parent 74b091fd08
21 changed files with 286 additions and 182 deletions
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@ -187,7 +187,8 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const
 Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool has_mips) {
    const auto& texture = ctx.images[handle & 0xFFFF];
    const Id image = ctx.OpLoad(texture.image_type, texture.id);
-    const auto type = ctx.info.images[handle & 0xFFFF].type;
+    const auto sharp = ctx.info.images[handle & 0xFFFF].GetSharp(ctx.info);
+    const auto type = sharp.GetBoundType();
    const Id zero = ctx.u32_zero_value;
    const auto mips{[&] { return has_mips ? ctx.OpImageQueryLevels(ctx.U32[1], image) : zero; }};
    const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage};
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@ -4,6 +4,7 @@
 #include "common/assert.h"
 #include "common/div_ceil.h"
 #include "shader_recompiler/backend/spirv/spirv_emit_context.h"
+#include "shader_recompiler/frontend/fetch_shader.h"
 #include "shader_recompiler/ir/passes/srt.h"
 #include "video_core/amdgpu/types.h"

@ -155,18 +156,12 @@ void EmitContext::DefineInterfaces() {
 }

 const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
-    switch (fmt) {
-    case AmdGpu::NumberFormat::Float:
-    case AmdGpu::NumberFormat::Unorm:
-    case AmdGpu::NumberFormat::Snorm:
-    case AmdGpu::NumberFormat::SnormNz:
-    case AmdGpu::NumberFormat::Sscaled:
-    case AmdGpu::NumberFormat::Uscaled:
-    case AmdGpu::NumberFormat::Srgb:
+    switch (GetNumberClass(fmt)) {
+    case AmdGpu::NumberClass::Float:
        return ctx.F32;
-    case AmdGpu::NumberFormat::Sint:
+    case AmdGpu::NumberClass::Sint:
        return ctx.S32;
-    case AmdGpu::NumberFormat::Uint:
+    case AmdGpu::NumberClass::Uint:
        return ctx.U32;
    default:
        break;
@ -176,18 +171,12 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {

 EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
                                                          u32 num_components, bool output) {
-    switch (fmt) {
-    case AmdGpu::NumberFormat::Float:
-    case AmdGpu::NumberFormat::Unorm:
-    case AmdGpu::NumberFormat::Snorm:
-    case AmdGpu::NumberFormat::SnormNz:
-    case AmdGpu::NumberFormat::Sscaled:
-    case AmdGpu::NumberFormat::Uscaled:
-    case AmdGpu::NumberFormat::Srgb:
+    switch (GetNumberClass(fmt)) {
+    case AmdGpu::NumberClass::Float:
        return {id, output ? output_f32 : input_f32, F32[1], num_components, false};
-    case AmdGpu::NumberFormat::Uint:
+    case AmdGpu::NumberClass::Uint:
        return {id, output ? output_u32 : input_u32, U32[1], num_components, true};
-    case AmdGpu::NumberFormat::Sint:
+    case AmdGpu::NumberClass::Sint:
        return {id, output ? output_s32 : input_s32, S32[1], num_components, true};
    default:
        break;
@ -280,33 +269,42 @@ void EmitContext::DefineInputs() {
        base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
        instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);

-        for (const auto& input : info.vs_inputs) {
-            ASSERT(input.binding < IR::NumParams);
-            const Id type{GetAttributeType(*this, input.fmt)[4]};
-            if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
-                input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
-
+        const auto fetch_shader = Gcn::ParseFetchShader(info);
+        if (!fetch_shader) {
+            break;
+        }
+        for (const auto& attrib : fetch_shader->attributes) {
+            ASSERT(attrib.semantic < IR::NumParams);
+            const auto sharp = attrib.GetSharp(info);
+            const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]};
+            if (attrib.UsesStepRates()) {
                const u32 rate_idx =
-                    input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0
-                                                                                             : 1;
+                    attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::OverStepRate0 ? 0
+                                                                                                : 1;
+                const u32 num_components = AmdGpu::NumComponents(sharp.GetDataFmt());
+                const auto buffer =
+                    std::ranges::find_if(info.buffers, [&attrib](const auto& buffer) {
+                        return buffer.instance_attrib == attrib.semantic;
+                    });
                // Note that we pass index rather than Id
-                input_params[input.binding] = SpirvAttribute{
+                input_params[attrib.semantic] = SpirvAttribute{
                    .id = rate_idx,
                    .pointer_type = input_u32,
                    .component_type = U32[1],
-                    .num_components = input.num_components,
+                    .num_components = std::min<u16>(attrib.num_elements, num_components),
                    .is_integer = true,
                    .is_loaded = false,
-                    .buffer_handle = input.instance_data_buf,
+                    .buffer_handle = int(buffer - info.buffers.begin()),
                };
            } else {
-                Id id{DefineInput(type, input.binding)};
-                if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) {
-                    Name(id, fmt::format("vs_instance_attr{}", input.binding));
+                Id id{DefineInput(type, attrib.semantic)};
+                if (attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::Plain) {
+                    Name(id, fmt::format("vs_instance_attr{}", attrib.semantic));
                } else {
-                    Name(id, fmt::format("vs_in_attr{}", input.binding));
+                    Name(id, fmt::format("vs_in_attr{}", attrib.semantic));
                }
-                input_params[input.binding] = GetAttributeInfo(input.fmt, id, 4, false);
+                input_params[attrib.semantic] =
+                    GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
                interfaces.push_back(id);
            }
        }
@ -553,9 +551,10 @@ void EmitContext::DefineBuffers() {

 void EmitContext::DefineTextureBuffers() {
    for (const auto& desc : info.texture_buffers) {
-        const bool is_integer =
-            desc.nfmt == AmdGpu::NumberFormat::Uint || desc.nfmt == AmdGpu::NumberFormat::Sint;
-        const VectorIds& sampled_type{GetAttributeType(*this, desc.nfmt)};
+        const auto sharp = desc.GetSharp(info);
+        const auto nfmt = sharp.GetNumberFmt();
+        const bool is_integer = AmdGpu::IsInteger(nfmt);
+        const VectorIds& sampled_type{GetAttributeType(*this, nfmt)};
        const u32 sampled = desc.is_written ? 2 : 1;
        const Id image_type{TypeImage(sampled_type[1], spv::Dim::Buffer, false, false, false,
                                      sampled, spv::ImageFormat::Unknown)};
@ -650,10 +649,11 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
 }

 Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
-    const auto image = ctx.info.ReadUdSharp<AmdGpu::Image>(desc.sharp_idx);
+    const auto image = desc.GetSharp(ctx.info);
    const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
+    const auto type = image.GetBoundType();
    const u32 sampled = desc.is_storage ? 2 : 1;
-    switch (desc.type) {
+    switch (type) {
    case AmdGpu::ImageType::Color1D:
        return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format);
    case AmdGpu::ImageType::Color1DArray:
@ -672,14 +672,15 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
    default:
        break;
    }
-    throw InvalidArgument("Invalid texture type {}", desc.type);
+    throw InvalidArgument("Invalid texture type {}", type);
 }

 void EmitContext::DefineImagesAndSamplers() {
    for (const auto& image_desc : info.images) {
-        const bool is_integer = image_desc.nfmt == AmdGpu::NumberFormat::Uint ||
-                                image_desc.nfmt == AmdGpu::NumberFormat::Sint;
-        const VectorIds& data_types = GetAttributeType(*this, image_desc.nfmt);
+        const auto sharp = image_desc.GetSharp(info);
+        const auto nfmt = sharp.GetNumberFmt();
+        const bool is_integer = AmdGpu::IsInteger(nfmt);
+        const VectorIds& data_types = GetAttributeType(*this, nfmt);
        const Id sampled_type = data_types[1];
        const Id image_type{ImageType(*this, image_desc, sampled_type)};
        const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
--- a/src/shader_recompiler/frontend/fetch_shader.cpp
+++ b/src/shader_recompiler/frontend/fetch_shader.cpp
@ -34,8 +34,14 @@ namespace Shader::Gcn {
 * We take the reverse way, extract the original input semantics from these instructions.
 **/

-FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
-    FetchShaderData data{};
+std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
+    if (!info.has_fetch_shader) {
+        return std::nullopt;
+    }
+    const u32* code;
+    std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code));
+
+    FetchShaderData data{.code = code};
    GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
    GcnDecodeContext decoder;

@ -49,7 +55,7 @@ FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
    u32 semantic_index = 0;
    while (!code_slice.atEnd()) {
        const auto inst = decoder.decodeInstruction(code_slice);
-        *out_size += inst.length;
+        data.size += inst.length;

        if (inst.opcode == Opcode::S_SETPC_B64) {
            break;
--- a/src/shader_recompiler/frontend/fetch_shader.h
+++ b/src/shader_recompiler/frontend/fetch_shader.h
@ -3,26 +3,80 @@

 #pragma once

+#include <ranges>
 #include <vector>
 #include "common/types.h"
+#include "shader_recompiler/info.h"

 namespace Shader::Gcn {

 struct VertexAttribute {
+    enum InstanceIdType : u8 {
+        None = 0,
+        OverStepRate0 = 1,
+        OverStepRate1 = 2,
+        Plain = 3,
+    };
+
    u8 semantic;      ///< Semantic index of the attribute
    u8 dest_vgpr;     ///< Destination VGPR to load first component.
    u8 num_elements;  ///< Number of components to load
    u8 sgpr_base;     ///< SGPR that contains the pointer to the list of vertex V#
    u8 dword_offset;  ///< The dword offset of the V# that describes this attribute.
    u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
+
+    [[nodiscard]] InstanceIdType GetStepRate() const {
+        return static_cast<InstanceIdType>(instance_data);
+    }
+
+    [[nodiscard]] bool UsesStepRates() const {
+        const auto step_rate = GetStepRate();
+        return step_rate == OverStepRate0 || step_rate == OverStepRate1;
+    }
+
+    [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
+        return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
+    }
+
+    bool operator==(const VertexAttribute& other) const {
+        return semantic == other.semantic && dest_vgpr == other.dest_vgpr &&
+               num_elements == other.num_elements && sgpr_base == other.sgpr_base &&
+               dword_offset == other.dword_offset && instance_data == other.instance_data;
+    }
 };

 struct FetchShaderData {
+    const u32* code;
+    u32 size = 0;
    std::vector<VertexAttribute> attributes;
    s8 vertex_offset_sgpr = -1;   ///< SGPR of vertex offset from VADDR
    s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
+
+    [[nodiscard]] bool UsesStepRates() const {
+        return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) {
+                   return attribute.UsesStepRates();
+               }) != attributes.end();
+    }
+
+    [[nodiscard]] std::pair<u32, u32> GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs,
+                                                     const Info& info) const {
+        u32 vertex_offset = regs.index_offset;
+        u32 instance_offset = 0;
+        if (vertex_offset == 0 && vertex_offset_sgpr != -1) {
+            vertex_offset = info.user_data[vertex_offset_sgpr];
+        }
+        if (instance_offset_sgpr != -1) {
+            instance_offset = info.user_data[instance_offset_sgpr];
+        }
+        return {vertex_offset, instance_offset};
+    }
+
+    bool operator==(const FetchShaderData& other) const {
+        return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
+               instance_offset_sgpr == other.instance_offset_sgpr;
+    }
 };

-FetchShaderData ParseFetchShader(const u32* code, u32* out_size);
+std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info);

 } // namespace Shader::Gcn
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@ -368,13 +368,11 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra

 void Translator::EmitFetch(const GcnInst& inst) {
    // Read the pointer to the fetch shader assembly.
-    const u32 sgpr_base = inst.src[0].code;
-    const u32* code;
-    std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
+    info.has_fetch_shader = true;
+    info.fetch_shader_sgpr_base = inst.src[0].code;

-    // Parse the assembly to generate a list of attributes.
-    u32 fetch_size{};
-    const auto fetch_data = ParseFetchShader(code, &fetch_size);
+    const auto fetch_data = ParseFetchShader(info);
+    ASSERT(fetch_data.has_value());

    if (Config::dumpShaders()) {
        using namespace Common::FS;
@ -384,13 +382,10 @@ void Translator::EmitFetch(const GcnInst& inst) {
        }
        const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
        const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
-        file.WriteRaw<u8>(code, fetch_size);
+        file.WriteRaw<u8>(fetch_data->code, fetch_data->size);
    }

-    info.vertex_offset_sgpr = fetch_data.vertex_offset_sgpr;
-    info.instance_offset_sgpr = fetch_data.instance_offset_sgpr;
-
-    for (const auto& attrib : fetch_data.attributes) {
+    for (const auto& attrib : fetch_data->attributes) {
        const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
        IR::VectorReg dst_reg{attrib.dest_vgpr};

@ -420,29 +415,14 @@ void Translator::EmitFetch(const GcnInst& inst) {

        // In case of programmable step rates we need to fallback to instance data pulling in
        // shader, so VBs should be bound as regular data buffers
-        s32 instance_buf_handle = -1;
-        const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
-        if (step_rate == Info::VsInput::OverStepRate0 ||
-            step_rate == Info::VsInput::OverStepRate1) {
+        if (attrib.UsesStepRates()) {
            info.buffers.push_back({
                .sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
                .used_types = IR::Type::F32,
                .is_instance_data = true,
+                .instance_attrib = attrib.semantic,
            });
-            instance_buf_handle = s32(info.buffers.size() - 1);
-            info.uses_step_rates = true;
        }
-
-        const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
-        info.vs_inputs.push_back({
-            .fmt = buffer.GetNumberFmt(),
-            .binding = attrib.semantic,
-            .num_components = std::min<u16>(attrib.num_elements, num_components),
-            .sgpr_base = attrib.sgpr_base,
-            .dword_offset = attrib.dword_offset,
-            .instance_step_rate = step_rate,
-            .instance_data_buf = instance_buf_handle,
-        });
    }
 }

--- a/src/shader_recompiler/info.h
+++ b/src/shader_recompiler/info.h
@ -45,6 +45,7 @@ struct BufferResource {
    AmdGpu::Buffer inline_cbuf;
    bool is_gds_buffer{};
    bool is_instance_data{};
+    u8 instance_attrib{};
    bool is_written{};

    bool IsStorage(AmdGpu::Buffer buffer) const noexcept {
@ -57,7 +58,6 @@ using BufferResourceList = boost::container::small_vector<BufferResource, 16>;

 struct TextureBufferResource {
    u32 sharp_idx;
-    AmdGpu::NumberFormat nfmt;
    bool is_written{};

    constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
@ -66,8 +66,6 @@ using TextureBufferResourceList = boost::container::small_vector<TextureBufferRe

 struct ImageResource {
    u32 sharp_idx;
-    AmdGpu::ImageType type;
-    AmdGpu::NumberFormat nfmt;
    bool is_storage{};
    bool is_depth{};
    bool is_atomic{};
@ -115,24 +113,6 @@ static_assert(sizeof(PushData) <= 128,
 * Contains general information generated by the shader recompiler for an input program.
 */
 struct Info {
-    struct VsInput {
-        enum InstanceIdType : u8 {
-            None = 0,
-            OverStepRate0 = 1,
-            OverStepRate1 = 2,
-            Plain = 3,
-        };
-
-        AmdGpu::NumberFormat fmt;
-        u16 binding;
-        u16 num_components;
-        u8 sgpr_base;
-        u8 dword_offset;
-        InstanceIdType instance_step_rate;
-        s32 instance_data_buf;
-    };
-    boost::container::static_vector<VsInput, 32> vs_inputs{};
-
    struct AttributeFlags {
        bool Get(IR::Attribute attrib, u32 comp = 0) const {
            return flags[Index(attrib)] & (1 << comp);
@ -179,9 +159,6 @@ struct Info {

    CopyShaderData gs_copy_data;

-    s8 vertex_offset_sgpr = -1;
-    s8 instance_offset_sgpr = -1;
-
    BufferResourceList buffers;
    TextureBufferResourceList texture_buffers;
    ImageResourceList images;
@ -208,10 +185,11 @@ struct Info {
    bool uses_shared{};
    bool uses_fp16{};
    bool uses_fp64{};
-    bool uses_step_rates{};
    bool translation_failed{}; // indicates that shader has unsupported instructions
    bool has_readconst{};
    u8 mrt_mask{0u};
+    bool has_fetch_shader{false};
+    u32 fetch_shader_sgpr_base{0u};

    explicit Info(Stage stage_, ShaderParams params)
        : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
@ -252,18 +230,6 @@ struct Info {
        bnd.user_data += ud_mask.NumRegs();
    }

-    [[nodiscard]] std::pair<u32, u32> GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs) const {
-        u32 vertex_offset = regs.index_offset;
-        u32 instance_offset = 0;
-        if (vertex_offset == 0 && vertex_offset_sgpr != -1) {
-            vertex_offset = user_data[vertex_offset_sgpr];
-        }
-        if (instance_offset_sgpr != -1) {
-            instance_offset = user_data[instance_offset_sgpr];
-        }
-        return {vertex_offset, instance_offset};
-    }
-
    void RefreshFlatBuf() {
        flattened_ud_buf.resize(srt_info.flattened_bufsize_dw);
        ASSERT(user_data.size() <= NumUserDataRegs);
@ -284,7 +250,12 @@ constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const
 }

 constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
-    return info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
+    const auto image = info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
+    if (!image.Valid()) {
+        // Fall back to null image if unbound.
+        return AmdGpu::Image::Null();
+    }
+    return image;
 }

 constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@ -381,7 +381,6 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
    const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
    const s32 binding = descriptors.Add(TextureBufferResource{
        .sharp_idx = sharp,
-        .nfmt = buffer.GetNumberFmt(),
        .is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
    });

@ -660,11 +659,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
        }
    }

-    const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType();
    u32 image_binding = descriptors.Add(ImageResource{
        .sharp_idx = tsharp,
-        .type = type,
-        .nfmt = image.GetNumberFmt(),
        .is_storage = is_storage,
        .is_depth = bool(inst_info.is_depth),
        .is_atomic = IsImageAtomicInstruction(inst),
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@ -22,6 +22,7 @@ struct Profile {
    bool support_fp32_denorm_preserve{};
    bool support_fp32_denorm_flush{};
    bool support_explicit_workgroup_layout{};
+    bool support_legacy_vertex_attributes{};
    bool has_broken_spirv_clamp{};
    bool lower_left_origin_mode{};
    bool needs_manual_interpolation{};
--- a/src/shader_recompiler/specialization.h
+++ b/src/shader_recompiler/specialization.h
@ -6,12 +6,19 @@
 #include <bitset>

 #include "common/types.h"
+#include "frontend/fetch_shader.h"
 #include "shader_recompiler/backend/bindings.h"
 #include "shader_recompiler/info.h"
 #include "shader_recompiler/ir/passes/srt.h"

 namespace Shader {

+struct VsAttribSpecialization {
+    AmdGpu::NumberClass num_class{};
+
+    auto operator<=>(const VsAttribSpecialization&) const = default;
+};
+
 struct BufferSpecialization {
    u16 stride : 14;
    u16 is_storage : 1;
@ -50,6 +57,8 @@ struct StageSpecialization {

    const Shader::Info* info;
    RuntimeInfo runtime_info;
+    Gcn::FetchShaderData fetch_shader_data{};
+    boost::container::small_vector<VsAttribSpecialization, 32> vs_attribs;
    std::bitset<MaxStageResources> bitset{};
    boost::container::small_vector<BufferSpecialization, 16> buffers;
    boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
@ -57,9 +66,19 @@ struct StageSpecialization {
    boost::container::small_vector<FMaskSpecialization, 8> fmasks;
    Backend::Bindings start{};

-    explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
-                                 Backend::Bindings start_)
+    explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
+                                 const Profile& profile_, Backend::Bindings start_)
        : info{&info_}, runtime_info{runtime_info_}, start{start_} {
+        if (const auto fetch_shader = Gcn::ParseFetchShader(info_)) {
+            fetch_shader_data = *fetch_shader;
+            if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) {
+                // Specialize shader on VS input number types to follow spec.
+                ForEachSharp(vs_attribs, fetch_shader_data.attributes,
+                             [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
+                                 spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt());
+                             });
+            }
+        }
        u32 binding{};
        if (info->has_readconst) {
            binding++;
@ -75,8 +94,7 @@ struct StageSpecialization {
                     });
        ForEachSharp(binding, images, info->images,
                     [](auto& spec, const auto& desc, AmdGpu::Image sharp) {
-                         spec.type = sharp.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray
-                                                              : sharp.GetType();
+                         spec.type = sharp.GetBoundType();
                         spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
                     });
        ForEachSharp(binding, fmasks, info->fmasks,
@ -86,6 +104,17 @@ struct StageSpecialization {
                     });
    }

+    void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
+        for (const auto& desc : desc_list) {
+            auto& spec = spec_list.emplace_back();
+            const auto sharp = desc.GetSharp(*info);
+            if (!sharp) {
+                continue;
+            }
+            func(spec, desc, sharp);
+        }
+    }
+
    void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
        for (const auto& desc : desc_list) {
            auto& spec = spec_list.emplace_back();
@ -106,6 +135,14 @@ struct StageSpecialization {
        if (runtime_info != other.runtime_info) {
            return false;
        }
+        if (fetch_shader_data != other.fetch_shader_data) {
+            return false;
+        }
+        for (u32 i = 0; i < vs_attribs.size(); i++) {
+            if (vs_attribs[i] != other.vs_attribs[i]) {
+                return false;
+            }
+        }
        u32 binding{};
        if (info->has_readconst != other.info->has_readconst) {
            return false;