renderer_vulkan: Parse fetch shader per-pipeline (#1656)

* shader_recompiler: Read image format info directly from sharps instead of storing in shader info.

* renderer_vulkan: Parse fetch shader per-pipeline

* Few minor fixes.

* shader_recompiler: Specialize on vertex attribute number types.

* shader_recompiler: Move GetDrawOffsets to fetch shader
This commit is contained in:
squidbus 2024-12-04 03:03:47 -08:00 committed by GitHub
parent 74b091fd08
commit 920acb8d8b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 286 additions and 182 deletions

View file

@ -34,8 +34,14 @@ namespace Shader::Gcn {
* We take the reverse way, extract the original input semantics from these instructions.
**/
FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
FetchShaderData data{};
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
if (!info.has_fetch_shader) {
return std::nullopt;
}
const u32* code;
std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code));
FetchShaderData data{.code = code};
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder;
@ -49,7 +55,7 @@ FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
u32 semantic_index = 0;
while (!code_slice.atEnd()) {
const auto inst = decoder.decodeInstruction(code_slice);
*out_size += inst.length;
data.size += inst.length;
if (inst.opcode == Opcode::S_SETPC_B64) {
break;

View file

@ -3,26 +3,80 @@
#pragma once
#include <ranges>
#include <vector>
#include "common/types.h"
#include "shader_recompiler/info.h"
namespace Shader::Gcn {
struct VertexAttribute {
enum InstanceIdType : u8 {
None = 0,
OverStepRate0 = 1,
OverStepRate1 = 2,
Plain = 3,
};
u8 semantic; ///< Semantic index of the attribute
u8 dest_vgpr; ///< Destination VGPR to load first component.
u8 num_elements; ///< Number of components to load
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
[[nodiscard]] InstanceIdType GetStepRate() const {
return static_cast<InstanceIdType>(instance_data);
}
[[nodiscard]] bool UsesStepRates() const {
const auto step_rate = GetStepRate();
return step_rate == OverStepRate0 || step_rate == OverStepRate1;
}
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
}
bool operator==(const VertexAttribute& other) const {
return semantic == other.semantic && dest_vgpr == other.dest_vgpr &&
num_elements == other.num_elements && sgpr_base == other.sgpr_base &&
dword_offset == other.dword_offset && instance_data == other.instance_data;
}
};
struct FetchShaderData {
const u32* code;
u32 size = 0;
std::vector<VertexAttribute> attributes;
s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR
s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
[[nodiscard]] bool UsesStepRates() const {
return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) {
return attribute.UsesStepRates();
}) != attributes.end();
}
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs,
const Info& info) const {
u32 vertex_offset = regs.index_offset;
u32 instance_offset = 0;
if (vertex_offset == 0 && vertex_offset_sgpr != -1) {
vertex_offset = info.user_data[vertex_offset_sgpr];
}
if (instance_offset_sgpr != -1) {
instance_offset = info.user_data[instance_offset_sgpr];
}
return {vertex_offset, instance_offset};
}
bool operator==(const FetchShaderData& other) const {
return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
instance_offset_sgpr == other.instance_offset_sgpr;
}
};
FetchShaderData ParseFetchShader(const u32* code, u32* out_size);
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info);
} // namespace Shader::Gcn

View file

@ -368,13 +368,11 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
void Translator::EmitFetch(const GcnInst& inst) {
// Read the pointer to the fetch shader assembly.
const u32 sgpr_base = inst.src[0].code;
const u32* code;
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
info.has_fetch_shader = true;
info.fetch_shader_sgpr_base = inst.src[0].code;
// Parse the assembly to generate a list of attributes.
u32 fetch_size{};
const auto fetch_data = ParseFetchShader(code, &fetch_size);
const auto fetch_data = ParseFetchShader(info);
ASSERT(fetch_data.has_value());
if (Config::dumpShaders()) {
using namespace Common::FS;
@ -384,13 +382,10 @@ void Translator::EmitFetch(const GcnInst& inst) {
}
const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteRaw<u8>(code, fetch_size);
file.WriteRaw<u8>(fetch_data->code, fetch_data->size);
}
info.vertex_offset_sgpr = fetch_data.vertex_offset_sgpr;
info.instance_offset_sgpr = fetch_data.instance_offset_sgpr;
for (const auto& attrib : fetch_data.attributes) {
for (const auto& attrib : fetch_data->attributes) {
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
IR::VectorReg dst_reg{attrib.dest_vgpr};
@ -420,29 +415,14 @@ void Translator::EmitFetch(const GcnInst& inst) {
// In case of programmable step rates we need to fallback to instance data pulling in
// shader, so VBs should be bound as regular data buffers
s32 instance_buf_handle = -1;
const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
if (step_rate == Info::VsInput::OverStepRate0 ||
step_rate == Info::VsInput::OverStepRate1) {
if (attrib.UsesStepRates()) {
info.buffers.push_back({
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
.used_types = IR::Type::F32,
.is_instance_data = true,
.instance_attrib = attrib.semantic,
});
instance_buf_handle = s32(info.buffers.size() - 1);
info.uses_step_rates = true;
}
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
info.vs_inputs.push_back({
.fmt = buffer.GetNumberFmt(),
.binding = attrib.semantic,
.num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
.instance_step_rate = step_rate,
.instance_data_buf = instance_buf_handle,
});
}
}