mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-23 20:05:01 +00:00
renderer_vulkan: Parse fetch shader per-pipeline (#1656)
* shader_recompiler: Read image format info directly from sharps instead of storing in shader info. * renderer_vulkan: Parse fetch shader per-pipeline * Few minor fixes. * shader_recompiler: Specialize on vertex attribute number types. * shader_recompiler: Move GetDrawOffsets to fetch shader
This commit is contained in:
parent
74b091fd08
commit
920acb8d8b
21 changed files with 286 additions and 182 deletions
|
@ -187,7 +187,8 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const
|
|||
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool has_mips) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||
const auto type = ctx.info.images[handle & 0xFFFF].type;
|
||||
const auto sharp = ctx.info.images[handle & 0xFFFF].GetSharp(ctx.info);
|
||||
const auto type = sharp.GetBoundType();
|
||||
const Id zero = ctx.u32_zero_value;
|
||||
const auto mips{[&] { return has_mips ? ctx.OpImageQueryLevels(ctx.U32[1], image) : zero; }};
|
||||
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage};
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "common/assert.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/ir/passes/srt.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
|
@ -155,18 +156,12 @@ void EmitContext::DefineInterfaces() {
|
|||
}
|
||||
|
||||
const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
||||
switch (fmt) {
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
case AmdGpu::NumberFormat::Sscaled:
|
||||
case AmdGpu::NumberFormat::Uscaled:
|
||||
case AmdGpu::NumberFormat::Srgb:
|
||||
switch (GetNumberClass(fmt)) {
|
||||
case AmdGpu::NumberClass::Float:
|
||||
return ctx.F32;
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
case AmdGpu::NumberClass::Sint:
|
||||
return ctx.S32;
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
case AmdGpu::NumberClass::Uint:
|
||||
return ctx.U32;
|
||||
default:
|
||||
break;
|
||||
|
@ -176,18 +171,12 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
|||
|
||||
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
|
||||
u32 num_components, bool output) {
|
||||
switch (fmt) {
|
||||
case AmdGpu::NumberFormat::Float:
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
case AmdGpu::NumberFormat::Sscaled:
|
||||
case AmdGpu::NumberFormat::Uscaled:
|
||||
case AmdGpu::NumberFormat::Srgb:
|
||||
switch (GetNumberClass(fmt)) {
|
||||
case AmdGpu::NumberClass::Float:
|
||||
return {id, output ? output_f32 : input_f32, F32[1], num_components, false};
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
case AmdGpu::NumberClass::Uint:
|
||||
return {id, output ? output_u32 : input_u32, U32[1], num_components, true};
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
case AmdGpu::NumberClass::Sint:
|
||||
return {id, output ? output_s32 : input_s32, S32[1], num_components, true};
|
||||
default:
|
||||
break;
|
||||
|
@ -280,33 +269,42 @@ void EmitContext::DefineInputs() {
|
|||
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
||||
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
||||
|
||||
for (const auto& input : info.vs_inputs) {
|
||||
ASSERT(input.binding < IR::NumParams);
|
||||
const Id type{GetAttributeType(*this, input.fmt)[4]};
|
||||
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
|
||||
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
|
||||
|
||||
const auto fetch_shader = Gcn::ParseFetchShader(info);
|
||||
if (!fetch_shader) {
|
||||
break;
|
||||
}
|
||||
for (const auto& attrib : fetch_shader->attributes) {
|
||||
ASSERT(attrib.semantic < IR::NumParams);
|
||||
const auto sharp = attrib.GetSharp(info);
|
||||
const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]};
|
||||
if (attrib.UsesStepRates()) {
|
||||
const u32 rate_idx =
|
||||
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0
|
||||
: 1;
|
||||
attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::OverStepRate0 ? 0
|
||||
: 1;
|
||||
const u32 num_components = AmdGpu::NumComponents(sharp.GetDataFmt());
|
||||
const auto buffer =
|
||||
std::ranges::find_if(info.buffers, [&attrib](const auto& buffer) {
|
||||
return buffer.instance_attrib == attrib.semantic;
|
||||
});
|
||||
// Note that we pass index rather than Id
|
||||
input_params[input.binding] = SpirvAttribute{
|
||||
input_params[attrib.semantic] = SpirvAttribute{
|
||||
.id = rate_idx,
|
||||
.pointer_type = input_u32,
|
||||
.component_type = U32[1],
|
||||
.num_components = input.num_components,
|
||||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||
.is_integer = true,
|
||||
.is_loaded = false,
|
||||
.buffer_handle = input.instance_data_buf,
|
||||
.buffer_handle = int(buffer - info.buffers.begin()),
|
||||
};
|
||||
} else {
|
||||
Id id{DefineInput(type, input.binding)};
|
||||
if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) {
|
||||
Name(id, fmt::format("vs_instance_attr{}", input.binding));
|
||||
Id id{DefineInput(type, attrib.semantic)};
|
||||
if (attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::Plain) {
|
||||
Name(id, fmt::format("vs_instance_attr{}", attrib.semantic));
|
||||
} else {
|
||||
Name(id, fmt::format("vs_in_attr{}", input.binding));
|
||||
Name(id, fmt::format("vs_in_attr{}", attrib.semantic));
|
||||
}
|
||||
input_params[input.binding] = GetAttributeInfo(input.fmt, id, 4, false);
|
||||
input_params[attrib.semantic] =
|
||||
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
}
|
||||
|
@ -553,9 +551,10 @@ void EmitContext::DefineBuffers() {
|
|||
|
||||
void EmitContext::DefineTextureBuffers() {
|
||||
for (const auto& desc : info.texture_buffers) {
|
||||
const bool is_integer =
|
||||
desc.nfmt == AmdGpu::NumberFormat::Uint || desc.nfmt == AmdGpu::NumberFormat::Sint;
|
||||
const VectorIds& sampled_type{GetAttributeType(*this, desc.nfmt)};
|
||||
const auto sharp = desc.GetSharp(info);
|
||||
const auto nfmt = sharp.GetNumberFmt();
|
||||
const bool is_integer = AmdGpu::IsInteger(nfmt);
|
||||
const VectorIds& sampled_type{GetAttributeType(*this, nfmt)};
|
||||
const u32 sampled = desc.is_written ? 2 : 1;
|
||||
const Id image_type{TypeImage(sampled_type[1], spv::Dim::Buffer, false, false, false,
|
||||
sampled, spv::ImageFormat::Unknown)};
|
||||
|
@ -650,10 +649,11 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
|||
}
|
||||
|
||||
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
||||
const auto image = ctx.info.ReadUdSharp<AmdGpu::Image>(desc.sharp_idx);
|
||||
const auto image = desc.GetSharp(ctx.info);
|
||||
const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
|
||||
const auto type = image.GetBoundType();
|
||||
const u32 sampled = desc.is_storage ? 2 : 1;
|
||||
switch (desc.type) {
|
||||
switch (type) {
|
||||
case AmdGpu::ImageType::Color1D:
|
||||
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format);
|
||||
case AmdGpu::ImageType::Color1DArray:
|
||||
|
@ -672,14 +672,15 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
|
|||
default:
|
||||
break;
|
||||
}
|
||||
throw InvalidArgument("Invalid texture type {}", desc.type);
|
||||
throw InvalidArgument("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
void EmitContext::DefineImagesAndSamplers() {
|
||||
for (const auto& image_desc : info.images) {
|
||||
const bool is_integer = image_desc.nfmt == AmdGpu::NumberFormat::Uint ||
|
||||
image_desc.nfmt == AmdGpu::NumberFormat::Sint;
|
||||
const VectorIds& data_types = GetAttributeType(*this, image_desc.nfmt);
|
||||
const auto sharp = image_desc.GetSharp(info);
|
||||
const auto nfmt = sharp.GetNumberFmt();
|
||||
const bool is_integer = AmdGpu::IsInteger(nfmt);
|
||||
const VectorIds& data_types = GetAttributeType(*this, nfmt);
|
||||
const Id sampled_type = data_types[1];
|
||||
const Id image_type{ImageType(*this, image_desc, sampled_type)};
|
||||
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
|
||||
|
|
|
@ -34,8 +34,14 @@ namespace Shader::Gcn {
|
|||
* We take the reverse way, extract the original input semantics from these instructions.
|
||||
**/
|
||||
|
||||
FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
|
||||
FetchShaderData data{};
|
||||
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
|
||||
if (!info.has_fetch_shader) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const u32* code;
|
||||
std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code));
|
||||
|
||||
FetchShaderData data{.code = code};
|
||||
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
|
||||
GcnDecodeContext decoder;
|
||||
|
||||
|
@ -49,7 +55,7 @@ FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
|
|||
u32 semantic_index = 0;
|
||||
while (!code_slice.atEnd()) {
|
||||
const auto inst = decoder.decodeInstruction(code_slice);
|
||||
*out_size += inst.length;
|
||||
data.size += inst.length;
|
||||
|
||||
if (inst.opcode == Opcode::S_SETPC_B64) {
|
||||
break;
|
||||
|
|
|
@ -3,26 +3,80 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <ranges>
|
||||
#include <vector>
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
struct VertexAttribute {
|
||||
enum InstanceIdType : u8 {
|
||||
None = 0,
|
||||
OverStepRate0 = 1,
|
||||
OverStepRate1 = 2,
|
||||
Plain = 3,
|
||||
};
|
||||
|
||||
u8 semantic; ///< Semantic index of the attribute
|
||||
u8 dest_vgpr; ///< Destination VGPR to load first component.
|
||||
u8 num_elements; ///< Number of components to load
|
||||
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
|
||||
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
|
||||
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
|
||||
|
||||
[[nodiscard]] InstanceIdType GetStepRate() const {
|
||||
return static_cast<InstanceIdType>(instance_data);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool UsesStepRates() const {
|
||||
const auto step_rate = GetStepRate();
|
||||
return step_rate == OverStepRate0 || step_rate == OverStepRate1;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
|
||||
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
bool operator==(const VertexAttribute& other) const {
|
||||
return semantic == other.semantic && dest_vgpr == other.dest_vgpr &&
|
||||
num_elements == other.num_elements && sgpr_base == other.sgpr_base &&
|
||||
dword_offset == other.dword_offset && instance_data == other.instance_data;
|
||||
}
|
||||
};
|
||||
|
||||
struct FetchShaderData {
|
||||
const u32* code;
|
||||
u32 size = 0;
|
||||
std::vector<VertexAttribute> attributes;
|
||||
s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR
|
||||
s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
|
||||
|
||||
[[nodiscard]] bool UsesStepRates() const {
|
||||
return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) {
|
||||
return attribute.UsesStepRates();
|
||||
}) != attributes.end();
|
||||
}
|
||||
|
||||
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs,
|
||||
const Info& info) const {
|
||||
u32 vertex_offset = regs.index_offset;
|
||||
u32 instance_offset = 0;
|
||||
if (vertex_offset == 0 && vertex_offset_sgpr != -1) {
|
||||
vertex_offset = info.user_data[vertex_offset_sgpr];
|
||||
}
|
||||
if (instance_offset_sgpr != -1) {
|
||||
instance_offset = info.user_data[instance_offset_sgpr];
|
||||
}
|
||||
return {vertex_offset, instance_offset};
|
||||
}
|
||||
|
||||
bool operator==(const FetchShaderData& other) const {
|
||||
return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
|
||||
instance_offset_sgpr == other.instance_offset_sgpr;
|
||||
}
|
||||
};
|
||||
|
||||
FetchShaderData ParseFetchShader(const u32* code, u32* out_size);
|
||||
std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -368,13 +368,11 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
|
|||
|
||||
void Translator::EmitFetch(const GcnInst& inst) {
|
||||
// Read the pointer to the fetch shader assembly.
|
||||
const u32 sgpr_base = inst.src[0].code;
|
||||
const u32* code;
|
||||
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
|
||||
info.has_fetch_shader = true;
|
||||
info.fetch_shader_sgpr_base = inst.src[0].code;
|
||||
|
||||
// Parse the assembly to generate a list of attributes.
|
||||
u32 fetch_size{};
|
||||
const auto fetch_data = ParseFetchShader(code, &fetch_size);
|
||||
const auto fetch_data = ParseFetchShader(info);
|
||||
ASSERT(fetch_data.has_value());
|
||||
|
||||
if (Config::dumpShaders()) {
|
||||
using namespace Common::FS;
|
||||
|
@ -384,13 +382,10 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
}
|
||||
const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
|
||||
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
||||
file.WriteRaw<u8>(code, fetch_size);
|
||||
file.WriteRaw<u8>(fetch_data->code, fetch_data->size);
|
||||
}
|
||||
|
||||
info.vertex_offset_sgpr = fetch_data.vertex_offset_sgpr;
|
||||
info.instance_offset_sgpr = fetch_data.instance_offset_sgpr;
|
||||
|
||||
for (const auto& attrib : fetch_data.attributes) {
|
||||
for (const auto& attrib : fetch_data->attributes) {
|
||||
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
|
||||
IR::VectorReg dst_reg{attrib.dest_vgpr};
|
||||
|
||||
|
@ -420,29 +415,14 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||
|
||||
// In case of programmable step rates we need to fallback to instance data pulling in
|
||||
// shader, so VBs should be bound as regular data buffers
|
||||
s32 instance_buf_handle = -1;
|
||||
const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
|
||||
if (step_rate == Info::VsInput::OverStepRate0 ||
|
||||
step_rate == Info::VsInput::OverStepRate1) {
|
||||
if (attrib.UsesStepRates()) {
|
||||
info.buffers.push_back({
|
||||
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
|
||||
.used_types = IR::Type::F32,
|
||||
.is_instance_data = true,
|
||||
.instance_attrib = attrib.semantic,
|
||||
});
|
||||
instance_buf_handle = s32(info.buffers.size() - 1);
|
||||
info.uses_step_rates = true;
|
||||
}
|
||||
|
||||
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
|
||||
info.vs_inputs.push_back({
|
||||
.fmt = buffer.GetNumberFmt(),
|
||||
.binding = attrib.semantic,
|
||||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
||||
.sgpr_base = attrib.sgpr_base,
|
||||
.dword_offset = attrib.dword_offset,
|
||||
.instance_step_rate = step_rate,
|
||||
.instance_data_buf = instance_buf_handle,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -45,6 +45,7 @@ struct BufferResource {
|
|||
AmdGpu::Buffer inline_cbuf;
|
||||
bool is_gds_buffer{};
|
||||
bool is_instance_data{};
|
||||
u8 instance_attrib{};
|
||||
bool is_written{};
|
||||
|
||||
bool IsStorage(AmdGpu::Buffer buffer) const noexcept {
|
||||
|
@ -57,7 +58,6 @@ using BufferResourceList = boost::container::small_vector<BufferResource, 16>;
|
|||
|
||||
struct TextureBufferResource {
|
||||
u32 sharp_idx;
|
||||
AmdGpu::NumberFormat nfmt;
|
||||
bool is_written{};
|
||||
|
||||
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
||||
|
@ -66,8 +66,6 @@ using TextureBufferResourceList = boost::container::small_vector<TextureBufferRe
|
|||
|
||||
struct ImageResource {
|
||||
u32 sharp_idx;
|
||||
AmdGpu::ImageType type;
|
||||
AmdGpu::NumberFormat nfmt;
|
||||
bool is_storage{};
|
||||
bool is_depth{};
|
||||
bool is_atomic{};
|
||||
|
@ -115,24 +113,6 @@ static_assert(sizeof(PushData) <= 128,
|
|||
* Contains general information generated by the shader recompiler for an input program.
|
||||
*/
|
||||
struct Info {
|
||||
struct VsInput {
|
||||
enum InstanceIdType : u8 {
|
||||
None = 0,
|
||||
OverStepRate0 = 1,
|
||||
OverStepRate1 = 2,
|
||||
Plain = 3,
|
||||
};
|
||||
|
||||
AmdGpu::NumberFormat fmt;
|
||||
u16 binding;
|
||||
u16 num_components;
|
||||
u8 sgpr_base;
|
||||
u8 dword_offset;
|
||||
InstanceIdType instance_step_rate;
|
||||
s32 instance_data_buf;
|
||||
};
|
||||
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
||||
|
||||
struct AttributeFlags {
|
||||
bool Get(IR::Attribute attrib, u32 comp = 0) const {
|
||||
return flags[Index(attrib)] & (1 << comp);
|
||||
|
@ -179,9 +159,6 @@ struct Info {
|
|||
|
||||
CopyShaderData gs_copy_data;
|
||||
|
||||
s8 vertex_offset_sgpr = -1;
|
||||
s8 instance_offset_sgpr = -1;
|
||||
|
||||
BufferResourceList buffers;
|
||||
TextureBufferResourceList texture_buffers;
|
||||
ImageResourceList images;
|
||||
|
@ -208,10 +185,11 @@ struct Info {
|
|||
bool uses_shared{};
|
||||
bool uses_fp16{};
|
||||
bool uses_fp64{};
|
||||
bool uses_step_rates{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
bool has_readconst{};
|
||||
u8 mrt_mask{0u};
|
||||
bool has_fetch_shader{false};
|
||||
u32 fetch_shader_sgpr_base{0u};
|
||||
|
||||
explicit Info(Stage stage_, ShaderParams params)
|
||||
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||
|
@ -252,18 +230,6 @@ struct Info {
|
|||
bnd.user_data += ud_mask.NumRegs();
|
||||
}
|
||||
|
||||
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs) const {
|
||||
u32 vertex_offset = regs.index_offset;
|
||||
u32 instance_offset = 0;
|
||||
if (vertex_offset == 0 && vertex_offset_sgpr != -1) {
|
||||
vertex_offset = user_data[vertex_offset_sgpr];
|
||||
}
|
||||
if (instance_offset_sgpr != -1) {
|
||||
instance_offset = user_data[instance_offset_sgpr];
|
||||
}
|
||||
return {vertex_offset, instance_offset};
|
||||
}
|
||||
|
||||
void RefreshFlatBuf() {
|
||||
flattened_ud_buf.resize(srt_info.flattened_bufsize_dw);
|
||||
ASSERT(user_data.size() <= NumUserDataRegs);
|
||||
|
@ -284,7 +250,12 @@ constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const
|
|||
}
|
||||
|
||||
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
|
||||
return info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
|
||||
const auto image = info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
|
||||
if (!image.Valid()) {
|
||||
// Fall back to null image if unbound.
|
||||
return AmdGpu::Image::Null();
|
||||
}
|
||||
return image;
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
|
||||
|
|
|
@ -381,7 +381,6 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||
const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
|
||||
const s32 binding = descriptors.Add(TextureBufferResource{
|
||||
.sharp_idx = sharp,
|
||||
.nfmt = buffer.GetNumberFmt(),
|
||||
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
||||
});
|
||||
|
||||
|
@ -660,11 +659,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||
}
|
||||
}
|
||||
|
||||
const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType();
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sharp_idx = tsharp,
|
||||
.type = type,
|
||||
.nfmt = image.GetNumberFmt(),
|
||||
.is_storage = is_storage,
|
||||
.is_depth = bool(inst_info.is_depth),
|
||||
.is_atomic = IsImageAtomicInstruction(inst),
|
||||
|
|
|
@ -22,6 +22,7 @@ struct Profile {
|
|||
bool support_fp32_denorm_preserve{};
|
||||
bool support_fp32_denorm_flush{};
|
||||
bool support_explicit_workgroup_layout{};
|
||||
bool support_legacy_vertex_attributes{};
|
||||
bool has_broken_spirv_clamp{};
|
||||
bool lower_left_origin_mode{};
|
||||
bool needs_manual_interpolation{};
|
||||
|
|
|
@ -6,12 +6,19 @@
|
|||
#include <bitset>
|
||||
|
||||
#include "common/types.h"
|
||||
#include "frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/backend/bindings.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/passes/srt.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
struct VsAttribSpecialization {
|
||||
AmdGpu::NumberClass num_class{};
|
||||
|
||||
auto operator<=>(const VsAttribSpecialization&) const = default;
|
||||
};
|
||||
|
||||
struct BufferSpecialization {
|
||||
u16 stride : 14;
|
||||
u16 is_storage : 1;
|
||||
|
@ -50,6 +57,8 @@ struct StageSpecialization {
|
|||
|
||||
const Shader::Info* info;
|
||||
RuntimeInfo runtime_info;
|
||||
Gcn::FetchShaderData fetch_shader_data{};
|
||||
boost::container::small_vector<VsAttribSpecialization, 32> vs_attribs;
|
||||
std::bitset<MaxStageResources> bitset{};
|
||||
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
||||
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
||||
|
@ -57,9 +66,19 @@ struct StageSpecialization {
|
|||
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
|
||||
Backend::Bindings start{};
|
||||
|
||||
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
|
||||
Backend::Bindings start_)
|
||||
explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
|
||||
const Profile& profile_, Backend::Bindings start_)
|
||||
: info{&info_}, runtime_info{runtime_info_}, start{start_} {
|
||||
if (const auto fetch_shader = Gcn::ParseFetchShader(info_)) {
|
||||
fetch_shader_data = *fetch_shader;
|
||||
if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) {
|
||||
// Specialize shader on VS input number types to follow spec.
|
||||
ForEachSharp(vs_attribs, fetch_shader_data.attributes,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt());
|
||||
});
|
||||
}
|
||||
}
|
||||
u32 binding{};
|
||||
if (info->has_readconst) {
|
||||
binding++;
|
||||
|
@ -75,8 +94,7 @@ struct StageSpecialization {
|
|||
});
|
||||
ForEachSharp(binding, images, info->images,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
spec.type = sharp.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray
|
||||
: sharp.GetType();
|
||||
spec.type = sharp.GetBoundType();
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
});
|
||||
ForEachSharp(binding, fmasks, info->fmasks,
|
||||
|
@ -86,6 +104,17 @@ struct StageSpecialization {
|
|||
});
|
||||
}
|
||||
|
||||
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
|
||||
for (const auto& desc : desc_list) {
|
||||
auto& spec = spec_list.emplace_back();
|
||||
const auto sharp = desc.GetSharp(*info);
|
||||
if (!sharp) {
|
||||
continue;
|
||||
}
|
||||
func(spec, desc, sharp);
|
||||
}
|
||||
}
|
||||
|
||||
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
|
||||
for (const auto& desc : desc_list) {
|
||||
auto& spec = spec_list.emplace_back();
|
||||
|
@ -106,6 +135,14 @@ struct StageSpecialization {
|
|||
if (runtime_info != other.runtime_info) {
|
||||
return false;
|
||||
}
|
||||
if (fetch_shader_data != other.fetch_shader_data) {
|
||||
return false;
|
||||
}
|
||||
for (u32 i = 0; i < vs_attribs.size(); i++) {
|
||||
if (vs_attribs[i] != other.vs_attribs[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
u32 binding{};
|
||||
if (info->has_readconst != other.info->has_readconst) {
|
||||
return false;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue