shader_recompiler: Additional scope handling and user data as push constants (#1013)

* shader_recompiler: Use push constants for user data regs

* shader: Add some GR2 instructions

* shader: Add some instructions

* shader: Add instructions for knack

* touchups

* spirv: Better names

* buffer_cache: Ignore non gpu modified images

* clang format

* Add log

* more fixes
This commit is contained in:
TheTurtle 2024-09-23 09:55:43 +03:00 committed by GitHub
parent fb5bc371cb
commit ee38eec7fe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 180 additions and 87 deletions

View file

@ -9,10 +9,10 @@ namespace Shader::Backend {
struct Bindings {
u32 unified{};
u32 uniform_buffer{};
u32 storage_buffer{};
u32 texture{};
u32 image{};
u32 buffer{};
u32 user_data{};
auto operator<=>(const Bindings&) const = default;
};
} // namespace Shader::Backend

View file

@ -265,7 +265,7 @@ void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) {
} // Anonymous namespace
std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
const IR::Program& program, u32& binding) {
const IR::Program& program, Bindings& binding) {
EmitContext ctx{profile, runtime_info, program.info, binding};
const Id main{DefineMain(ctx, program)};
DefineEntryPoint(program, ctx, main);

View file

@ -4,12 +4,13 @@
#pragma once
#include <vector>
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/profile.h"
namespace Shader::Backend::SPIRV {
[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
const IR::Program& program, u32& binding);
const IR::Program& program, Bindings& binding);
} // namespace Shader::Backend::SPIRV

View file

@ -86,7 +86,14 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
} // Anonymous namespace
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
return ctx.ConstU32(ctx.info.user_data[static_cast<size_t>(reg)]);
const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg);
const u32 half = PushData::UdRegsIndex + (index >> 2);
const Id ud_ptr{ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
ctx.push_data_block, ctx.ConstU32(half),
ctx.ConstU32(index & 3))};
const Id ud_reg{ctx.OpLoad(ctx.U32[1], ud_ptr)};
ctx.Name(ud_reg, fmt::format("ud_{}", u32(reg)));
return ud_reg;
}
void EmitGetThreadBitScalarReg(EmitContext& ctx) {

View file

@ -181,6 +181,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod
case AmdGpu::ImageType::Color1DArray:
case AmdGpu::ImageType::Color2D:
case AmdGpu::ImageType::Cube:
case AmdGpu::ImageType::Color2DMsaa:
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips());
case AmdGpu::ImageType::Color2DArray:
case AmdGpu::ImageType::Color3D:

View file

@ -42,7 +42,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
} // Anonymous namespace
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
const Info& info_, u32& binding_)
const Info& info_, Bindings& binding_)
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
profile{profile_}, stage{info.stage}, binding{binding_} {
AddCapability(spv::Capability::Shader);
@ -173,7 +173,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
}
void EmitContext::DefineBufferOffsets() {
for (auto& buffer : buffers) {
for (BufferDefinition& buffer : buffers) {
const u32 binding = buffer.binding;
const u32 half = PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
@ -182,9 +182,11 @@ void EmitContext::DefineBufferOffsets() {
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
Name(buffer.offset, fmt::format("buf{}_off", binding));
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding));
}
for (auto& tex_buffer : texture_buffers) {
for (TextureBufferDefinition& tex_buffer : texture_buffers) {
const u32 binding = tex_buffer.binding;
const u32 half = PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
@ -192,7 +194,8 @@ void EmitContext::DefineBufferOffsets() {
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(6U));
Name(tex_buffer.coord_offset, fmt::format("texbuf{}_off", binding));
}
}
@ -330,18 +333,21 @@ void EmitContext::DefineOutputs() {
void EmitContext::DefinePushDataBlock() {
// Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4]), "AuxData")};
const Id struct_type{
Name(TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4], U32[4]), "AuxData")};
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "sr0");
MemberName(struct_type, 1, "sr1");
MemberName(struct_type, 2, "buf_offsets0");
MemberName(struct_type, 3, "buf_offsets1");
MemberName(struct_type, 4, "buf_offsets2");
MemberName(struct_type, 4, "ud_regs0");
MemberName(struct_type, 5, "ud_regs1");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
MemberDecorate(struct_type, 2, spv::Decoration::Offset, 8U);
MemberDecorate(struct_type, 3, spv::Decoration::Offset, 24U);
MemberDecorate(struct_type, 4, spv::Decoration::Offset, 40U);
MemberDecorate(struct_type, 5, spv::Decoration::Offset, 56U);
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
Name(push_data_block, "push_data");
interfaces.push_back(push_data_block);
@ -379,7 +385,7 @@ void EmitContext::DefineBuffers() {
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
const Id pointer_type = TypePointer(storage_class, data_type);
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
Decorate(id, spv::Decoration::Binding, binding);
Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::DescriptorSet, 0U);
if (is_storage && !desc.is_written) {
Decorate(id, spv::Decoration::NonWritable);
@ -388,7 +394,7 @@ void EmitContext::DefineBuffers() {
buffers.push_back({
.id = id,
.binding = binding++,
.binding = binding.buffer++,
.data_types = data_types,
.pointer_type = pointer_type,
});
@ -406,12 +412,12 @@ void EmitContext::DefineTextureBuffers() {
sampled, spv::ImageFormat::Unknown)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding);
Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sgpr_base));
texture_buffers.push_back({
.id = id,
.binding = binding++,
.binding = binding.buffer++,
.image_type = image_type,
.result_type = sampled_type[4],
.is_integer = is_integer,
@ -507,6 +513,8 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, sampled, format);
case AmdGpu::ImageType::Color2DArray:
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, sampled, format);
case AmdGpu::ImageType::Color2DMsaa:
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, true, sampled, format);
case AmdGpu::ImageType::Color3D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
case AmdGpu::ImageType::Cube:
@ -525,7 +533,7 @@ void EmitContext::DefineImagesAndSamplers() {
const Id image_type{ImageType(*this, image_desc, sampled_type)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding);
Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base,
image_desc.dword_offset));
@ -538,7 +546,6 @@ void EmitContext::DefineImagesAndSamplers() {
.is_storage = image_desc.is_storage,
});
interfaces.push_back(id);
++binding;
}
if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) {
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
@ -550,13 +557,12 @@ void EmitContext::DefineImagesAndSamplers() {
sampler_pointer_type = TypePointer(spv::StorageClass::UniformConstant, sampler_type);
for (const auto& samp_desc : info.samplers) {
const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding);
Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::DescriptorSet, 0U);
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "samp", samp_desc.sgpr_base,
samp_desc.dword_offset));
samplers.push_back(id);
interfaces.push_back(id);
++binding;
}
}

View file

@ -6,6 +6,7 @@
#include <array>
#include <sirit/sirit.h>
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/profile.h"
@ -37,7 +38,7 @@ struct VectorIds {
class EmitContext final : public Sirit::Module {
public:
explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, const Info& info,
u32& binding);
Bindings& binding);
~EmitContext();
Id Def(const IR::Value& value);
@ -221,7 +222,7 @@ public:
bool is_storage = false;
};
u32& binding;
Bindings& binding;
boost::container::small_vector<BufferDefinition, 16> buffers;
boost::container::small_vector<TextureBufferDefinition, 8> texture_buffers;
boost::container::small_vector<TextureDefinition, 8> images;

View file

@ -23,7 +23,6 @@ struct Compare {
static IR::Condition MakeCondition(const GcnInst& inst) {
if (inst.IsCmpx()) {
ASSERT(inst.opcode == Opcode::V_CMPX_NE_U32);
return IR::Condition::Execnz;
}
@ -99,7 +98,7 @@ void CFG::EmitDivergenceLabels() {
// with SAVEEXEC to mask the threads that didn't pass the condition
// of initial branch.
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) ||
inst.opcode == Opcode::V_CMPX_NE_U32;
inst.IsCmpx();
};
const auto is_close_scope = [](const GcnInst& inst) {
// Closing an EXEC scope can be either a branch instruction
@ -109,7 +108,7 @@ void CFG::EmitDivergenceLabels() {
// Sometimes compiler might insert instructions between the SAVEEXEC and the branch.
// Those instructions need to be wrapped in the condition as well so allow branch
// as end scope instruction.
inst.opcode == Opcode::S_CBRANCH_EXECZ ||
inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ENDPGM ||
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo);
};
@ -127,7 +126,8 @@ void CFG::EmitDivergenceLabels() {
s32 curr_begin = -1;
for (size_t index = GetIndex(start); index < end_index; index++) {
const auto& inst = inst_list[index];
if (is_close_scope(inst) && curr_begin != -1) {
const bool is_close = is_close_scope(inst);
if ((is_close || index == end_index - 1) && curr_begin != -1) {
// If there are no instructions inside scope don't do anything.
if (index - curr_begin == 1) {
curr_begin = -1;
@ -138,8 +138,16 @@ void CFG::EmitDivergenceLabels() {
const auto& save_inst = inst_list[curr_begin];
const Label label = index_to_pc[curr_begin] + save_inst.length;
AddLabel(label);
// Add a label to the close scope instruction as well.
AddLabel(index_to_pc[index]);
// Add a label to the close scope instruction.
// There are 3 cases where we need to close a scope.
// * Close scope instruction inside the block
// * Close scope instruction at the end of the block (cbranch or endpgm)
// * Normal instruction at the end of the block
// For the last case we must NOT add a label as that would cause
// the instruction to be separated into its own basic block.
if (is_close) {
AddLabel(index_to_pc[index]);
}
// Reset scope begin.
curr_begin = -1;
}
@ -194,7 +202,7 @@ void CFG::LinkBlocks() {
const auto end_inst{block.end_inst};
// Handle divergence block inserted here.
if (end_inst.opcode == Opcode::S_AND_SAVEEXEC_B64 ||
end_inst.opcode == Opcode::S_ANDN2_B64 || end_inst.opcode == Opcode::V_CMPX_NE_U32) {
end_inst.opcode == Opcode::S_ANDN2_B64 || end_inst.IsCmpx()) {
// Blocks are stored ordered by address in the set
auto next_it = std::next(it);
auto* target_block = &(*next_it);

View file

@ -281,6 +281,12 @@ void Translator::S_AND_B64(NegateMode negate, const GcnInst& inst) {
return ir.GetExec();
case OperandField::ScalarGPR:
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
case OperandField::ConstZero:
return ir.Imm1(false);
case OperandField::SignedConstIntNeg:
ASSERT_MSG(-s32(operand.code) + SignedConstIntNegMin - 1 == -1,
"SignedConstIntNeg must be -1");
return ir.Imm1(true);
default:
UNREACHABLE();
}
@ -506,6 +512,8 @@ void Translator::S_NOT_B64(const GcnInst& inst) {
return ir.GetExec();
case OperandField::ScalarGPR:
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
case OperandField::ConstZero:
return ir.Imm1(false);
default:
UNREACHABLE();
}
@ -520,6 +528,9 @@ void Translator::S_NOT_B64(const GcnInst& inst) {
case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
break;
case OperandField::ExecLo:
ir.SetExec(result);
break;
default:
UNREACHABLE();
}

View file

@ -174,7 +174,7 @@ T Translator::GetSrc(const InstOperand& operand) {
value = ir.IAbs(value);
}
if (operand.input_modifier.neg) {
UNREACHABLE();
value = ir.INeg(value);
}
}
return value;

View file

@ -155,6 +155,7 @@ public:
void V_SUBREV_I32(const GcnInst& inst);
void V_ADDC_U32(const GcnInst& inst);
void V_LDEXP_F32(const GcnInst& inst);
void V_CVT_PKNORM_U16_F32(const GcnInst& inst);
void V_CVT_PKRTZ_F16_F32(const GcnInst& inst);
// VOP1

View file

@ -89,6 +89,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_ADDC_U32(inst);
case Opcode::V_LDEXP_F32:
return V_LDEXP_F32(inst);
case Opcode::V_CVT_PKNORM_U16_F32:
return V_CVT_PKNORM_U16_F32(inst);
case Opcode::V_CVT_PKRTZ_F16_F32:
return V_CVT_PKRTZ_F16_F32(inst);
@ -244,6 +246,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
// V_CMPX_{OP8}_I32
case Opcode::V_CMPX_LT_I32:
return V_CMP_U32(ConditionOp::LT, true, true, inst);
case Opcode::V_CMPX_EQ_I32:
return V_CMP_U32(ConditionOp::EQ, true, true, inst);
case Opcode::V_CMPX_GT_I32:
return V_CMP_U32(ConditionOp::GT, true, true, inst);
case Opcode::V_CMPX_LG_I32:
@ -583,6 +587,15 @@ void Translator::V_LDEXP_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPLdexp(src0, src1));
}
void Translator::V_CVT_PKNORM_U16_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
const IR::U32 dst0 = ir.ConvertFToU(32, ir.FPMul(src0, ir.Imm32(65535.f)));
const IR::U32 dst1 = ir.ConvertFToU(32, ir.FPMul(src1, ir.Imm32(65535.f)));
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, ir.BitFieldInsert(dst0, dst1, ir.Imm32(16), ir.Imm32(16)));
}
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
const IR::Value vec_f32 =
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
@ -1046,6 +1059,11 @@ void Translator::V_LSHL_B64(const GcnInst& inst) {
const IR::U64 src0{GetSrc64(inst.src[0])};
const IR::U64 src1{GetSrc64(inst.src[1])};
const IR::VectorReg dst_reg{inst.dst[0].code};
if (src0.IsImmediate() && src0.U64() == -1) {
ir.SetVectorReg(dst_reg, ir.Imm32(0xFFFFFFFF));
ir.SetVectorReg(dst_reg + 1, ir.Imm32(0xFFFFFFFF));
return;
}
ASSERT_MSG(src0.IsImmediate() && src0.U64() == 0 && src1.IsImmediate() && src1.U64() == 0,
"V_LSHL_B64 with non-zero src0 or src1 is not supported");
ir.SetVectorReg(dst_reg, ir.Imm32(0));

View file

@ -7,6 +7,7 @@
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/types.h"
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/type.h"
@ -85,11 +86,14 @@ struct SamplerResource {
using SamplerResourceList = boost::container::small_vector<SamplerResource, 16>;
struct PushData {
static constexpr size_t BufOffsetIndex = 2;
static constexpr u32 BufOffsetIndex = 2;
static constexpr u32 UdRegsIndex = 4;
static constexpr u32 MaxUdRegs = 8;
u32 step0;
u32 step1;
std::array<u8, 48> buf_offsets;
std::array<u8, 32> buf_offsets;
std::array<u32, MaxUdRegs> ud_regs;
void AddOffset(u32 binding, u32 offset) {
ASSERT(offset < 256 && binding < buf_offsets.size());
@ -145,6 +149,24 @@ struct Info {
AttributeFlags loads{};
AttributeFlags stores{};
struct UserDataMask {
void Set(IR::ScalarReg reg) noexcept {
mask |= 1 << static_cast<u32>(reg);
}
u32 Index(IR::ScalarReg reg) const noexcept {
const u32 reg_mask = (1 << static_cast<u32>(reg)) - 1;
return std::popcount(mask & reg_mask);
}
u32 NumRegs() const noexcept {
return std::popcount(mask);
}
u32 mask;
};
UserDataMask ud_mask{};
s8 vertex_offset_sgpr = -1;
s8 instance_offset_sgpr = -1;
@ -190,11 +212,22 @@ struct Info {
return data;
}
size_t NumBindings() const noexcept {
return buffers.size() + texture_buffers.size() + images.size() + samplers.size();
void PushUd(Backend::Bindings& bnd, PushData& push) const {
u32 mask = ud_mask.mask;
while (mask) {
const u32 index = std::countr_zero(mask);
mask &= ~(1U << index);
push.ud_regs[bnd.user_data++] = user_data[index];
}
}
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets() const noexcept {
void AddBindings(Backend::Bindings& bnd) const {
bnd.buffer += buffers.size() + texture_buffers.size();
bnd.unified += bnd.buffer + images.size() + samplers.size();
bnd.user_data += ud_mask.NumRegs();
}
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets() const {
u32 vertex_offset = 0;
u32 instance_offset = 0;
if (vertex_offset_sgpr != -1) {

View file

@ -8,14 +8,15 @@ namespace Shader::Optimization {
void Visit(Info& info, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::GetAttribute:
case IR::Opcode::GetAttributeU32: {
case IR::Opcode::GetAttributeU32:
info.loads.Set(inst.Arg(0).Attribute(), inst.Arg(1).U32());
break;
}
case IR::Opcode::SetAttribute: {
case IR::Opcode::SetAttribute:
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
break;
}
case IR::Opcode::GetUserData:
info.ud_mask.Set(inst.Arg(0).ScalarReg());
break;
case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64:
case IR::Opcode::WriteSharedU32:

View file

@ -6,6 +6,7 @@
#include <bitset>
#include "common/types.h"
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/info.h"
namespace Shader {
@ -45,11 +46,11 @@ struct StageSpecialization {
boost::container::small_vector<BufferSpecialization, 16> buffers;
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
boost::container::small_vector<ImageSpecialization, 16> images;
u32 start_binding{};
Backend::Bindings start{};
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
u32 start_binding_)
: info{&info_}, runtime_info{runtime_info_}, start_binding{start_binding_} {
Backend::Bindings start_)
: info{&info_}, runtime_info{runtime_info_}, start{start_} {
u32 binding{};
ForEachSharp(binding, buffers, info->buffers,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
@ -82,7 +83,7 @@ struct StageSpecialization {
}
bool operator==(const StageSpecialization& other) const {
if (start_binding != other.start_binding) {
if (start != other.start) {
return false;
}
if (runtime_info != other.runtime_info) {