diff --git a/src/shader_recompiler/frontend/copy_shader.cpp b/src/shader_recompiler/frontend/copy_shader.cpp index 8750e2b18..4b5869e1d 100644 --- a/src/shader_recompiler/frontend/copy_shader.cpp +++ b/src/shader_recompiler/frontend/copy_shader.cpp @@ -67,6 +67,9 @@ CopyShaderData ParseCopyShader(std::span code) { if (last_attr != IR::Attribute::Position0) { data.num_attrs = static_cast(last_attr) - static_cast(IR::Attribute::Param0) + 1; + const auto it = data.attr_map.begin(); + const u32 comp_stride = std::next(it)->first - it->first; + data.output_vertices = comp_stride / 64; } return data; diff --git a/src/shader_recompiler/frontend/copy_shader.h b/src/shader_recompiler/frontend/copy_shader.h index 55cc31ebd..24c7060ed 100644 --- a/src/shader_recompiler/frontend/copy_shader.h +++ b/src/shader_recompiler/frontend/copy_shader.h @@ -3,8 +3,8 @@ #pragma once +#include #include -#include #include "common/types.h" #include "shader_recompiler/ir/attribute.h" @@ -12,8 +12,9 @@ namespace Shader { struct CopyShaderData { - std::unordered_map> attr_map; + std::map> attr_map; u32 num_attrs{0}; + u32 output_vertices{0}; }; CopyShaderData ParseCopyShader(std::span code); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index f8ffb9638..96ca924a3 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include "shader_recompiler/frontend/instruction.h" #include "shader_recompiler/info.h" #include "shader_recompiler/ir/basic_block.h" diff --git a/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp b/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp index fbe382d41..9c5f64f84 100644 --- a/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp +++ b/src/shader_recompiler/ir/passes/readlane_elimination_pass.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "shader_recompiler/ir/program.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp index 071b94ac0..02745bf9a 100644 --- a/src/shader_recompiler/ir/passes/ring_access_elimination.cpp +++ b/src/shader_recompiler/ir/passes/ring_access_elimination.cpp @@ -91,6 +91,19 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim const auto& gs_info = runtime_info.gs_info; info.gs_copy_data = Shader::ParseCopyShader(gs_info.vs_copy); + u32 output_vertices = gs_info.output_vertices; + if (info.gs_copy_data.output_vertices && + info.gs_copy_data.output_vertices != output_vertices) { + ASSERT_MSG(output_vertices > info.gs_copy_data.output_vertices && + gs_info.mode == AmdGpu::Liverpool::GsMode::Mode::ScenarioG, + "Invalid geometry shader vertex configuration scenario = {}, max_vert_out = " + "{}, output_vertices = {}", + u32(gs_info.mode), output_vertices, info.gs_copy_data.output_vertices); + LOG_WARNING(Render_Vulkan, "MAX_VERT_OUT {} is larger than actual output vertices {}", + output_vertices, info.gs_copy_data.output_vertices); + output_vertices = info.gs_copy_data.output_vertices; + } + ForEachInstruction([&](IR::IREmitter& ir, IR::Inst& inst) { const auto opcode = inst.GetOpcode(); switch (opcode) { @@ -122,7 +135,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim const auto offset = inst.Flags().inst_offset.Value(); const auto data = ir.BitCast(IR::U32{inst.Arg(2)}); - const auto comp_ofs = gs_info.output_vertices * 4u; + const auto comp_ofs = output_vertices * 4u; const auto output_size = comp_ofs * gs_info.out_vertex_data_size; const auto vc_read_ofs = (((offset / comp_ofs) * comp_ofs) % output_size) * 16u; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 53d2d5303..5a0408e2c 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -149,6 +149,7 @@ struct GeometryRuntimeInfo { u32 out_vertex_data_size{}; AmdGpu::PrimitiveType in_primitive; GsOutputPrimTypes out_primitive; + AmdGpu::Liverpool::GsMode::Mode mode; std::span vs_copy; u64 vs_copy_hash; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 2f33c7302..d88a44375 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -1179,8 +1179,16 @@ struct Liverpool { }; union GsMode { + enum class Mode : u32 { + Off = 0, + ScenarioA = 1, + ScenarioB = 2, + ScenarioG = 3, + ScenarioC = 4, + }; + u32 raw; - BitField<0, 3, u32> mode; + BitField<0, 3, Mode> mode; BitField<3, 2, u32> cut_mode; BitField<22, 2, u32> onchip; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b72f77e55..cd8552515 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -146,6 +146,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS } gs_info.in_vertex_data_size = regs.vgt_esgs_ring_itemsize; gs_info.out_vertex_data_size = regs.vgt_gs_vert_itemsize[0]; + gs_info.mode = regs.vgt_gs_mode.mode; const auto params_vc = Liverpool::GetParams(regs.vs_program); gs_info.vs_copy = params_vc.code; gs_info.vs_copy_hash = params_vc.hash;