// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once #include #include "common/types.h" #include "frontend/fetch_shader.h" #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/info.h" namespace Shader { struct VsAttribSpecialization { s32 num_components{}; AmdGpu::NumberClass num_class{}; AmdGpu::CompMapping dst_select{}; auto operator<=>(const VsAttribSpecialization&) const = default; }; struct BufferSpecialization { u32 stride : 14; u32 is_storage : 1; u32 is_formatted : 1; u32 swizzle_enable : 1; u32 data_format : 6; u32 num_format : 4; u32 index_stride : 2; u32 element_size : 2; AmdGpu::CompMapping dst_select{}; AmdGpu::NumberConversion num_conversion{}; bool operator==(const BufferSpecialization& other) const { return stride == other.stride && is_storage == other.is_storage && is_formatted == other.is_formatted && swizzle_enable == other.swizzle_enable && (!is_formatted || (data_format == other.data_format && num_format == other.num_format && dst_select == other.dst_select && num_conversion == other.num_conversion)) && (!swizzle_enable || (index_stride == other.index_stride && element_size == other.element_size)); } }; struct ImageSpecialization { AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; bool is_integer = false; bool is_storage = false; bool is_cube = false; AmdGpu::CompMapping dst_select{}; AmdGpu::NumberConversion num_conversion{}; auto operator<=>(const ImageSpecialization&) const = default; }; struct FMaskSpecialization { u32 width; u32 height; auto operator<=>(const FMaskSpecialization&) const = default; }; struct SamplerSpecialization { bool force_unnormalized = false; auto operator<=>(const SamplerSpecialization&) const = default; }; /** * Alongside runtime information, this structure also checks bound resources * for compatibility. Can be used as a key for storing shader permutations. * Is separate from runtime information, because resource layout can only be deduced * after the first compilation of a module. */ struct StageSpecialization { static constexpr size_t MaxStageResources = 64; const Shader::Info* info; RuntimeInfo runtime_info; std::optional fetch_shader_data{}; boost::container::small_vector vs_attribs; std::bitset bitset{}; boost::container::small_vector buffers; boost::container::small_vector images; boost::container::small_vector fmasks; boost::container::small_vector samplers; Backend::Bindings start{}; StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, const Profile& profile_, Backend::Bindings start_) : info{&info_}, runtime_info{runtime_info_}, start{start_} { fetch_shader_data = Gcn::ParseFetchShader(info_); if (info_.stage == Stage::Vertex && fetch_shader_data) { // Specialize shader on VS input number types to follow spec. ForEachSharp(vs_attribs, fetch_shader_data->attributes, [&profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { spec.num_components = desc.UsesStepRates() ? AmdGpu::NumComponents(sharp.GetDataFmt()) : 0; spec.num_class = profile_.support_legacy_vertex_attributes ? AmdGpu::NumberClass{} : AmdGpu::GetNumberClass(sharp.GetNumberFmt()); spec.dst_select = sharp.DstSelect(); }); } u32 binding{}; ForEachSharp(binding, buffers, info->buffers, [profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { spec.stride = sharp.GetStride(); spec.is_storage = desc.IsStorage(sharp, profile_); spec.is_formatted = desc.is_formatted; spec.swizzle_enable = sharp.swizzle_enable; if (spec.is_formatted) { spec.data_format = static_cast(sharp.GetDataFmt()); spec.num_format = static_cast(sharp.GetNumberFmt()); spec.dst_select = sharp.DstSelect(); spec.num_conversion = sharp.GetNumberConversion(); } if (spec.swizzle_enable) { spec.index_stride = sharp.index_stride; spec.element_size = sharp.element_size; } }); ForEachSharp(binding, images, info->images, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { spec.type = sharp.GetViewType(desc.is_array); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); spec.is_storage = desc.is_written; spec.is_cube = sharp.IsCube(); if (spec.is_storage) { spec.dst_select = sharp.DstSelect(); } spec.num_conversion = sharp.GetNumberConversion(); }); ForEachSharp(binding, fmasks, info->fmasks, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { spec.width = sharp.width; spec.height = sharp.height; }); ForEachSharp(samplers, info->samplers, [](auto& spec, const auto& desc, AmdGpu::Sampler sharp) { spec.force_unnormalized = sharp.force_unnormalized; }); // Initialize runtime_info fields that rely on analysis in tessellation passes if (info->l_stage == LogicalStage::TessellationControl || info->l_stage == LogicalStage::TessellationEval) { Shader::TessellationDataConstantBuffer tess_constants; info->ReadTessConstantBuffer(tess_constants); if (info->l_stage == LogicalStage::TessellationControl) { runtime_info.hs_info.InitFromTessConstants(tess_constants); } else { runtime_info.vs_info.InitFromTessConstants(tess_constants); } } } void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) { for (const auto& desc : desc_list) { auto& spec = spec_list.emplace_back(); const auto sharp = desc.GetSharp(*info); if (!sharp) { continue; } func(spec, desc, sharp); } } void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { for (const auto& desc : desc_list) { auto& spec = spec_list.emplace_back(); const auto sharp = desc.GetSharp(*info); if (!sharp) { binding++; continue; } bitset.set(binding++); func(spec, desc, sharp); } } bool operator==(const StageSpecialization& other) const { if (start != other.start) { return false; } if (runtime_info != other.runtime_info) { return false; } if (fetch_shader_data != other.fetch_shader_data) { return false; } for (u32 i = 0; i < vs_attribs.size(); i++) { if (vs_attribs[i] != other.vs_attribs[i]) { return false; } } u32 binding{}; for (u32 i = 0; i < buffers.size(); i++) { if (other.bitset[binding++] && buffers[i] != other.buffers[i]) { return false; } } for (u32 i = 0; i < images.size(); i++) { if (other.bitset[binding++] && images[i] != other.images[i]) { return false; } } for (u32 i = 0; i < fmasks.size(); i++) { if (other.bitset[binding++] && fmasks[i] != other.fmasks[i]) { return false; } } for (u32 i = 0; i < samplers.size(); i++) { if (samplers[i] != other.samplers[i]) { return false; } } return true; } }; } // namespace Shader