Initial instancing and asynchronous compute queues (#207)

* gnm_driver: added `sceGnmRegisterOwner` and `sceGnmRegisterResource` * video_out: `sceVideoOutGetDeviceCapabilityInfo` for sdk runtime * gnm_driver: correct vqid index range * amdgpu: indirect buffer, release mem and some additional irq modes * amdgpu: added ASC commands processor * shader_recompiler: added support for fetch instance id * amdgpu: classic bitfields for T# representation (debugging experience) * renderer_vulkan: skip zero sized VBs from binding * texture_cache: image upload logic moved into `Image` object * gnm_driver: `sceGnmDingDong` implementation * texture_cache: `Image` usage flags moved; correct VO buffer pitch
2025-07-04 08:06:20 +00:00 · 2024-06-22 18:50:20 +02:00 · 2024-06-22 18:50:20 +02:00 · cb6b21de1f
commit cb6b21de1f
parent a9cbd8287c
19 changed files with 361 additions and 100 deletions
--- a/src/shader_recompiler/frontend/fetch_shader.cpp
+++ b/src/shader_recompiler/frontend/fetch_shader.cpp
@ -72,6 +72,9 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
            attrib.sgpr_base = it->base_sgpr;
            attrib.dword_offset = it->dword_offset;

+            // Store instance id rate
+            attrib.instance_data = inst.src[0].code;
+
            // Mark load as used.
            it->dst_reg = -1;
        }
--- a/src/shader_recompiler/frontend/fetch_shader.h
+++ b/src/shader_recompiler/frontend/fetch_shader.h
@ -9,11 +9,12 @@
 namespace Shader::Gcn {

 struct VertexAttribute {
-    u8 semantic;     ///< Semantic index of the attribute
-    u8 dest_vgpr;    ///< Destination VGPR to load first component.
-    u8 num_elements; ///< Number of components to load
-    u8 sgpr_base;    ///< SGPR that contains the pointer to the list of vertex V#
-    u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
+    u8 semantic;      ///< Semantic index of the attribute
+    u8 dest_vgpr;     ///< Destination VGPR to load first component.
+    u8 num_elements;  ///< Number of components to load
+    u8 sgpr_base;     ///< SGPR that contains the pointer to the list of vertex V#
+    u8 dword_offset;  ///< The dword offset of the V# that describes this attribute.
+    u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
 };

 std::vector<VertexAttribute> ParseFetchShader(const u32* code);
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@ -194,6 +194,11 @@ void Translator::EmitFetch(const GcnInst& inst) {
            ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i));
        }

+        if (attrib.instance_data == 2 || attrib.instance_data == 3) {
+            LOG_WARNING(Render_Recompiler, "Unsupported instance step rate = {}",
+                        attrib.instance_data);
+        }
+
        // Read the V# of the attribute to figure out component number and type.
        const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
        const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
@ -203,6 +208,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
            .num_components = std::min<u16>(attrib.num_elements, num_components),
            .sgpr_base = attrib.sgpr_base,
            .dword_offset = attrib.dword_offset,
+            .instance_step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data),
        });
    }
 }
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@ -270,8 +270,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
    u32 image_binding = descriptors.Add(ImageResource{
        .sgpr_base = tsharp.sgpr_base,
        .dword_offset = tsharp.dword_offset,
-        .type = image.type,
-        .nfmt = static_cast<AmdGpu::NumberFormat>(image.num_format.Value()),
+        .type = image.GetType(),
+        .nfmt = static_cast<AmdGpu::NumberFormat>(image.GetNumberFmt()),
        .is_storage = IsImageStorageInstruction(inst),
        .is_depth = bool(inst_info.is_depth),
    });
@ -293,7 +293,7 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
    // Now that we know the image type, adjust texture coordinate vector.
    const IR::Inst* body = inst.Arg(1).InstRecursive();
    const auto [coords, arg] = [&] -> std::pair<IR::Value, IR::Value> {
-        switch (image.type) {
+        switch (image.GetType()) {
        case AmdGpu::ImageType::Color1D:
            return {body->Arg(0), body->Arg(1)};
        case AmdGpu::ImageType::Color1DArray:
@ -305,7 +305,7 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
        case AmdGpu::ImageType::Cube:
            return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
        default:
-            UNREACHABLE_MSG("Unknown image type {}", image.type.Value());
+            UNREACHABLE_MSG("Unknown image type {}", image.GetType());
        }
    }();
    inst.SetArg(1, coords);
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@ -72,11 +72,19 @@ using SamplerResourceList = boost::container::static_vector<SamplerResource, 8>;

 struct Info {
    struct VsInput {
+        enum InstanceIdType : u8 {
+            None = 0,
+            OverStepRate0 = 1,
+            OverStepRate1 = 2,
+            Plain = 3,
+        };
+
        AmdGpu::NumberFormat fmt;
        u16 binding;
        u16 num_components;
        u8 sgpr_base;
        u8 dword_offset;
+        InstanceIdType instance_step_rate;
    };
    boost::container::static_vector<VsInput, 32> vs_inputs{};