mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-26 20:36:16 +00:00
renderer_vulkan: Parse fetch shader per-pipeline (#1656)
* shader_recompiler: Read image format info directly from sharps instead of storing in shader info. * renderer_vulkan: Parse fetch shader per-pipeline * Few minor fixes. * shader_recompiler: Specialize on vertex attribute number types. * shader_recompiler: Move GetDrawOffsets to fetch shader
This commit is contained in:
parent
74b091fd08
commit
920acb8d8b
21 changed files with 286 additions and 182 deletions
|
@ -34,8 +34,14 @@ namespace Shader::Gcn {
|
|||
* We take the reverse way, extract the original input semantics from these instructions.
|
||||
**/
|
||||
|
||||
FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
|
||||
FetchShaderData data{};
|
||||
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
|
||||
if (!info.has_fetch_shader) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const u32* code;
|
||||
std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code));
|
||||
|
||||
FetchShaderData data{.code = code};
|
||||
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
|
||||
GcnDecodeContext decoder;
|
||||
|
||||
|
@ -49,7 +55,7 @@ FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
|
|||
u32 semantic_index = 0;
|
||||
while (!code_slice.atEnd()) {
|
||||
const auto inst = decoder.decodeInstruction(code_slice);
|
||||
*out_size += inst.length;
|
||||
data.size += inst.length;
|
||||
|
||||
if (inst.opcode == Opcode::S_SETPC_B64) {
|
||||
break;
|
||||
|
|
|
@ -3,26 +3,80 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <ranges>
|
||||
#include <vector>
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
struct VertexAttribute {
|
||||
enum InstanceIdType : u8 {
|
||||
None = 0,
|
||||
OverStepRate0 = 1,
|
||||
OverStepRate1 = 2,
|
||||
Plain = 3,
|
||||
};
|
||||
|
||||
u8 semantic; ///< Semantic index of the attribute
|
||||
u8 dest_vgpr; ///< Destination VGPR to load first component.
|
||||
u8 num_elements; ///< Number of components to load
|
||||
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
|
||||
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
|
||||
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
|
||||
|
||||
[[nodiscard]] InstanceIdType GetStepRate() const {
|
||||
return static_cast<InstanceIdType>(instance_data);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool UsesStepRates() const {
|
||||
const auto step_rate = GetStepRate();
|
||||
return step_rate == OverStepRate0 || step_rate == OverStepRate1;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
|
||||
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
bool operator==(const VertexAttribute& other) const {
|
||||
return semantic == other.semantic && dest_vgpr == other.dest_vgpr &&
|
||||
num_elements == other.num_elements && sgpr_base == other.sgpr_base &&
|
||||
dword_offset == other.dword_offset && instance_data == other.instance_data;
|
||||
}
|
||||
};
|
||||
|
||||
struct FetchShaderData {
|
||||
const u32* code;
|
||||
u32 size = 0;
|
||||
std::vector<VertexAttribute> attributes;
|
||||
s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR
|
||||
s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
|
||||
|
||||
[[nodiscard]] bool UsesStepRates() const {
|
||||
return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) {
|
||||
return attribute.UsesStepRates();
|
||||
}) != attributes.end();
|
||||
}
|
||||
|
||||
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs,
|
||||
const Info& info) const {
|
||||
u32 vertex_offset = regs.index_offset;
|
||||
u32 instance_offset = 0;
|
||||
if (vertex_offset == 0 && vertex_offset_sgpr != -1) {
|
||||
vertex_offset = info.user_data[vertex_offset_sgpr];
|
||||
}
|
||||
if (instance_offset_sgpr != -1) {
|
||||
instance_offset = info.user_data[instance_offset_sgpr];
|
||||
}
|
||||
return {vertex_offset, instance_offset};
|
||||
}
|
||||
|
||||
bool operator==(const FetchShaderData& other) const {
|
||||
return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
|
||||
instance_offset_sgpr == other.instance_offset_sgpr;
|
||||
}
|
||||
};
|
||||
|
||||
FetchShaderData ParseFetchShader(const u32* code, u32* out_size);
|
||||
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -368,13 +368,11 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
|
|||
|
||||
void Translator::EmitFetch(const GcnInst& inst) {
|
||||
// Read the pointer to the fetch shader assembly.
|
||||
const u32 sgpr_base = inst.src[0].code;
|
||||
const u32* code;
|
||||
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
|
||||
info.has_fetch_shader = true;
|
||||
info.fetch_shader_sgpr_base = inst.src[0].code;
|
||||
|
||||
// Parse the assembly to generate a list of attributes.
|
||||
u32 fetch_size{};
|
||||
const auto fetch_data = ParseFetchShader(code, &fetch_size);
|
||||
const auto fetch_data = ParseFetchShader(info);
|
||||
ASSERT(fetch_data.has_value());
|
||||
|
||||
if (Config::dumpShaders()) {
|
||||
using namespace Common::FS;
|
||||
|
@ -384,13 +382,10 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
}
|
||||
const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
|
||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||
file.WriteRaw<u8>(code, fetch_size);
|
||||
file.WriteRaw<u8>(fetch_data->code, fetch_data->size);
|
||||
}
|
||||
|
||||
info.vertex_offset_sgpr = fetch_data.vertex_offset_sgpr;
|
||||
info.instance_offset_sgpr = fetch_data.instance_offset_sgpr;
|
||||
|
||||
for (const auto& attrib : fetch_data.attributes) {
|
||||
for (const auto& attrib : fetch_data->attributes) {
|
||||
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
|
||||
IR::VectorReg dst_reg{attrib.dest_vgpr};
|
||||
|
||||
|
@ -420,29 +415,14 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
|
||||
// In case of programmable step rates we need to fallback to instance data pulling in
|
||||
// shader, so VBs should be bound as regular data buffers
|
||||
s32 instance_buf_handle = -1;
|
||||
const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
|
||||
if (step_rate == Info::VsInput::OverStepRate0 ||
|
||||
step_rate == Info::VsInput::OverStepRate1) {
|
||||
if (attrib.UsesStepRates()) {
|
||||
info.buffers.push_back({
|
||||
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
|
||||
.used_types = IR::Type::F32,
|
||||
.is_instance_data = true,
|
||||
.instance_attrib = attrib.semantic,
|
||||
});
|
||||
instance_buf_handle = s32(info.buffers.size() - 1);
|
||||
info.uses_step_rates = true;
|
||||
}
|
||||
|
||||
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
|
||||
info.vs_inputs.push_back({
|
||||
.fmt = buffer.GetNumberFmt(),
|
||||
.binding = attrib.semantic,
|
||||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||
.sgpr_base = attrib.sgpr_base,
|
||||
.dword_offset = attrib.dword_offset,
|
||||
.instance_step_rate = step_rate,
|
||||
.instance_data_buf = instance_buf_handle,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue