renderer_vulkan: Parse fetch shader per-pipeline (#1656)

* shader_recompiler: Read image format info directly from sharps instead of storing in shader info. * renderer_vulkan: Parse fetch shader per-pipeline * Few minor fixes. * shader_recompiler: Specialize on vertex attribute number types. * shader_recompiler: Move GetDrawOffsets to fetch shader
2025-06-26 20:36:16 +00:00 · 2024-12-04 03:03:47 -08:00 · 2024-12-04 03:03:47 -08:00 · 920acb8d8b
commit 920acb8d8b
parent 74b091fd08
21 changed files with 286 additions and 182 deletions
--- a/src/shader_recompiler/frontend/fetch_shader.cpp
+++ b/src/shader_recompiler/frontend/fetch_shader.cpp
@ -34,8 +34,14 @@ namespace Shader::Gcn {
 * We take the reverse way, extract the original input semantics from these instructions.
 **/

-FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
-    FetchShaderData data{};
+std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
+    if (!info.has_fetch_shader) {
+        return std::nullopt;
+    }
+    const u32* code;
+    std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code));
+
+    FetchShaderData data{.code = code};
    GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
    GcnDecodeContext decoder;

@ -49,7 +55,7 @@ FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
    u32 semantic_index = 0;
    while (!code_slice.atEnd()) {
        const auto inst = decoder.decodeInstruction(code_slice);
-        *out_size += inst.length;
+        data.size += inst.length;

        if (inst.opcode == Opcode::S_SETPC_B64) {
            break;
--- a/src/shader_recompiler/frontend/fetch_shader.h
+++ b/src/shader_recompiler/frontend/fetch_shader.h
@ -3,26 +3,80 @@

 #pragma once

+#include <ranges>
 #include <vector>
 #include "common/types.h"
+#include "shader_recompiler/info.h"

 namespace Shader::Gcn {

 struct VertexAttribute {
+    enum InstanceIdType : u8 {
+        None = 0,
+        OverStepRate0 = 1,
+        OverStepRate1 = 2,
+        Plain = 3,
+    };
+
    u8 semantic;      ///< Semantic index of the attribute
    u8 dest_vgpr;     ///< Destination VGPR to load first component.
    u8 num_elements;  ///< Number of components to load
    u8 sgpr_base;     ///< SGPR that contains the pointer to the list of vertex V#
    u8 dword_offset;  ///< The dword offset of the V# that describes this attribute.
    u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
+
+    [[nodiscard]] InstanceIdType GetStepRate() const {
+        return static_cast<InstanceIdType>(instance_data);
+    }
+
+    [[nodiscard]] bool UsesStepRates() const {
+        const auto step_rate = GetStepRate();
+        return step_rate == OverStepRate0 || step_rate == OverStepRate1;
+    }
+
+    [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
+        return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
+    }
+
+    bool operator==(const VertexAttribute& other) const {
+        return semantic == other.semantic && dest_vgpr == other.dest_vgpr &&
+               num_elements == other.num_elements && sgpr_base == other.sgpr_base &&
+               dword_offset == other.dword_offset && instance_data == other.instance_data;
+    }
 };

 struct FetchShaderData {
+    const u32* code;
+    u32 size = 0;
    std::vector<VertexAttribute> attributes;
    s8 vertex_offset_sgpr = -1;   ///< SGPR of vertex offset from VADDR
    s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
+
+    [[nodiscard]] bool UsesStepRates() const {
+        return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) {
+                   return attribute.UsesStepRates();
+               }) != attributes.end();
+    }
+
+    [[nodiscard]] std::pair<u32, u32> GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs,
+                                                     const Info& info) const {
+        u32 vertex_offset = regs.index_offset;
+        u32 instance_offset = 0;
+        if (vertex_offset == 0 && vertex_offset_sgpr != -1) {
+            vertex_offset = info.user_data[vertex_offset_sgpr];
+        }
+        if (instance_offset_sgpr != -1) {
+            instance_offset = info.user_data[instance_offset_sgpr];
+        }
+        return {vertex_offset, instance_offset};
+    }
+
+    bool operator==(const FetchShaderData& other) const {
+        return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
+               instance_offset_sgpr == other.instance_offset_sgpr;
+    }
 };

-FetchShaderData ParseFetchShader(const u32* code, u32* out_size);
+std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info);

 } // namespace Shader::Gcn
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@ -368,13 +368,11 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra

 void Translator::EmitFetch(const GcnInst& inst) {
    // Read the pointer to the fetch shader assembly.
-    const u32 sgpr_base = inst.src[0].code;
-    const u32* code;
-    std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
+    info.has_fetch_shader = true;
+    info.fetch_shader_sgpr_base = inst.src[0].code;

-    // Parse the assembly to generate a list of attributes.
-    u32 fetch_size{};
-    const auto fetch_data = ParseFetchShader(code, &fetch_size);
+    const auto fetch_data = ParseFetchShader(info);
+    ASSERT(fetch_data.has_value());

    if (Config::dumpShaders()) {
        using namespace Common::FS;
@ -384,13 +382,10 @@ void Translator::EmitFetch(const GcnInst& inst) {
        }
        const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
        const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
-        file.WriteRaw<u8>(code, fetch_size);
+        file.WriteRaw<u8>(fetch_data->code, fetch_data->size);
    }

-    info.vertex_offset_sgpr = fetch_data.vertex_offset_sgpr;
-    info.instance_offset_sgpr = fetch_data.instance_offset_sgpr;
-
-    for (const auto& attrib : fetch_data.attributes) {
+    for (const auto& attrib : fetch_data->attributes) {
        const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
        IR::VectorReg dst_reg{attrib.dest_vgpr};

@ -420,29 +415,14 @@ void Translator::EmitFetch(const GcnInst& inst) {

        // In case of programmable step rates we need to fallback to instance data pulling in
        // shader, so VBs should be bound as regular data buffers
-        s32 instance_buf_handle = -1;
-        const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
-        if (step_rate == Info::VsInput::OverStepRate0 ||
-            step_rate == Info::VsInput::OverStepRate1) {
+        if (attrib.UsesStepRates()) {
            info.buffers.push_back({
                .sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
                .used_types = IR::Type::F32,
                .is_instance_data = true,
+                .instance_attrib = attrib.semantic,
            });
-            instance_buf_handle = s32(info.buffers.size() - 1);
-            info.uses_step_rates = true;
        }
-
-        const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
-        info.vs_inputs.push_back({
-            .fmt = buffer.GetNumberFmt(),
-            .binding = attrib.semantic,
-            .num_components = std::min<u16>(attrib.num_elements, num_components),
-            .sgpr_base = attrib.sgpr_base,
-            .dword_offset = attrib.dword_offset,
-            .instance_step_rate = step_rate,
-            .instance_data_buf = instance_buf_handle,
-        });
    }
 }