// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once #include #include #include #include #include #include #include #include #include #include "common/assert.h" #include "common/bit_field.h" #include "common/polyfill_thread.h" #include "common/slot_vector.h" #include "common/types.h" #include "common/unique_function.h" #include "shader_recompiler/params.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/types.h" namespace Vulkan { class Rasterizer; } namespace Libraries::VideoOut { struct VideoOutPort; } namespace AmdGpu { #define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32)) #define CONCAT2(x, y) DO_CONCAT2(x, y) #define DO_CONCAT2(x, y) x##y #define INSERT_PADDING_WORDS(num_words) \ [[maybe_unused]] std::array CONCAT2(pad, __LINE__) struct Liverpool { static constexpr u32 GfxQueueId = 0u; static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software static constexpr u32 NumQueuesPerPipe = 8u; static constexpr u32 NumComputeRings = NumComputePipes * NumQueuesPerPipe; static constexpr u32 NumTotalQueues = NumGfxRings + NumComputeRings; static_assert(NumTotalQueues < 64u); // need to fit into u64 bitmap for ffs static constexpr u32 NumColorBuffers = 8; static constexpr u32 NumViewports = 16; static constexpr u32 NumClipPlanes = 6; static constexpr u32 NumShaderUserData = 16; static constexpr u32 UconfigRegWordOffset = 0xC000; static constexpr u32 ContextRegWordOffset = 0xA000; static constexpr u32 ConfigRegWordOffset = 0x2000; static constexpr u32 ShRegWordOffset = 0x2C00; static constexpr u32 NumRegs = 0xD000; using UserData = std::array; struct BinaryInfo { static constexpr u8 signature_ref[] = {0x4f, 0x72, 0x62, 0x53, 0x68, 0x64, 0x72}; // OrbShdr std::array signature; u8 version; u32 pssl_or_cg : 1; u32 cached : 1; u32 type : 4; u32 source_type : 2; u32 length : 24; u8 chunk_usage_base_offset_in_dw; u8 num_input_usage_slots; u8 is_srt : 1; u8 is_srt_used_info_valid : 1; u8 is_extended_usage_info : 1; u8 reserved2 : 5; u8 reserved3; u64 shader_hash; u32 crc32; bool Valid() const { return shader_hash && crc32 && (std::memcmp(signature.data(), signature_ref, sizeof(signature_ref)) == 0); } }; static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x1000) { constexpr u32 token_mov_vcchi = 0xBEEB03FF; if (code[0] == token_mov_vcchi) { const auto* info = std::bit_cast(code + (code[1] + 1) * 2); if (info->Valid()) { return *info; } } // First instruction is not s_mov_b32 vcc_hi, #imm, // which means we cannot get the binary info via said instruction. // The easiest solution is to iterate through each dword and break // on the first instance of the binary info. constexpr size_t signature_size = sizeof(BinaryInfo::signature_ref) / sizeof(u8); const u32* end = code + search_limit; for (const u32* it = code; it < end; ++it) { if (const BinaryInfo* info = std::bit_cast(it); info->Valid()) { return *info; } } UNREACHABLE_MSG("Shader binary info not found."); } struct ShaderProgram { u32 address_lo; BitField<0, 8, u32> address_hi; union { BitField<0, 6, u64> num_vgprs; BitField<6, 4, u64> num_sgprs; BitField<10, 2, u64> priority; BitField<12, 2, FpRoundMode> fp_round_mode32; BitField<14, 2, FpRoundMode> fp_round_mode64; BitField<16, 2, FpDenormMode> fp_denorm_mode32; BitField<18, 2, FpDenormMode> fp_denorm_mode64; BitField<12, 8, u64> float_mode; BitField<24, 2, u64> vgpr_comp_cnt; // SPI provided per-thread inputs BitField<33, 5, u64> num_user_regs; } settings; UserData user_data; template const T Address() const { const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8; return reinterpret_cast(addr); } std::span Code() const { const u32* code = Address(); const BinaryInfo& bininfo = SearchBinaryInfo(code); const u32 num_dwords = bininfo.length / sizeof(u32); return std::span{code, num_dwords}; } }; struct HsTessFactorClamp { // I've only seen min=0.0, max=1.0 so far. // TODO why is max set to 1.0? Makes no sense float hs_max_tess; float hs_min_tess; }; struct ComputeProgram { u32 dispatch_initiator; u32 dim_x; u32 dim_y; u32 dim_z; u32 start_x; u32 start_y; u32 start_z; struct { u16 full; u16 partial; } num_thread_x, num_thread_y, num_thread_z; INSERT_PADDING_WORDS(1); BitField<0, 12, u32> max_wave_id; u32 address_lo; BitField<0, 8, u32> address_hi; INSERT_PADDING_WORDS(4); union { BitField<0, 6, u64> num_vgprs; BitField<6, 4, u64> num_sgprs; BitField<33, 5, u64> num_user_regs; BitField<39, 3, u64> tgid_enable; BitField<47, 9, u64> lds_dwords; } settings; INSERT_PADDING_WORDS(1); u32 resource_limits; INSERT_PADDING_WORDS(0x2A); UserData user_data; template const T Address() const { const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8; return reinterpret_cast(addr); } u32 SharedMemSize() const noexcept { // lds_dwords is in units of 128 dwords. We return bytes. return settings.lds_dwords.Value() * 128 * 4; } bool IsTgidEnabled(u32 i) const noexcept { return (settings.tgid_enable.Value() >> i) & 1; } std::span Code() const { const u32* code = Address(); const BinaryInfo& bininfo = SearchBinaryInfo(code); const u32 num_dwords = bininfo.length / sizeof(u32); return std::span{code, num_dwords}; } }; template static constexpr const BinaryInfo& GetBinaryInfo(const Shader& sh) { const auto* code = sh.template Address(); return SearchBinaryInfo(code); } static constexpr Shader::ShaderParams GetParams(const auto& sh) { auto& bininfo = GetBinaryInfo(sh); return { .user_data = sh.user_data, .code = sh.Code(), .hash = bininfo.shader_hash, }; } union PsInputControl { u32 raw; BitField<0, 5, u32> input_offset; BitField<5, 1, u32> use_default; BitField<8, 2, u32> default_value; BitField<10, 1, u32> flat_shade; }; enum class ShaderExportComp : u32 { None = 0, OneComp = 1, TwoComp = 2, FourCompCompressed = 3, FourComp = 4, }; union ShaderPosFormat { u32 raw; BitField<0, 4, ShaderExportComp> pos0; BitField<4, 4, ShaderExportComp> pos1; BitField<8, 4, ShaderExportComp> pos2; BitField<12, 4, ShaderExportComp> pos3; }; enum class ShaderExportFormat : u32 { Zero = 0, R_32 = 1, GR_32 = 2, AR_32 = 3, ABGR_FP16 = 4, ABGR_UNORM16 = 5, ABGR_SNORM16 = 6, ABGR_UINT16 = 7, ABGR_SINT16 = 8, ABGR_32 = 9, }; union ColorExportFormat { u32 raw; BitField<0, 4, ShaderExportFormat> col0; BitField<4, 4, ShaderExportFormat> col1; BitField<8, 4, ShaderExportFormat> col2; BitField<12, 4, ShaderExportFormat> col3; BitField<16, 4, ShaderExportFormat> col4; BitField<20, 4, ShaderExportFormat> col5; BitField<24, 4, ShaderExportFormat> col6; BitField<28, 4, ShaderExportFormat> col7; }; union VsOutputControl { u32 raw; BitField<0, 8, u32> clip_distance_enable; BitField<8, 8, u32> cull_distance_enable; BitField<16, 1, u32> use_vtx_point_size; BitField<17, 1, u32> use_vtx_edge_flag; BitField<18, 1, u32> use_vtx_render_target_idx; BitField<19, 1, u32> use_vtx_viewport_idx; BitField<20, 1, u32> use_vtx_kill_flag; BitField<21, 1, u32> vs_out_misc_enable; BitField<22, 1, u32> vs_out_ccdist0_enable; BitField<23, 1, u32> vs_out_ccdist1_enable; BitField<25, 1, u32> use_vtx_gs_cut_flag; bool IsClipDistEnabled(u32 index) const { return (clip_distance_enable.Value() >> index) & 1; } bool IsCullDistEnabled(u32 index) const { return (cull_distance_enable.Value() >> index) & 1; } }; struct ModeControl { s32 msaa_enable : 1; s32 vport_scissor_enable : 1; s32 line_stripple_enable : 1; s32 send_unlit_stiles_to_pkr : 1; }; enum class ZOrder : u32 { LateZ = 0, EarlyZLateZ = 1, ReZ = 2, EarlyZReZ = 3, }; enum class ConservativeDepth : u32 { Any = 0, LessThanZ = 1, GreaterThanZ = 2, }; union DepthBufferControl { u32 raw; BitField<0, 1, u32> z_export_enable; BitField<1, 1, u32> stencil_test_val_export_enable; BitField<2, 1, u32> stencil_op_val_export_enable; BitField<4, 2, ZOrder> z_order; BitField<6, 1, u32> kill_enable; BitField<7, 1, u32> coverage_to_mask_enable; BitField<8, 1, u32> mask_export_enable; BitField<9, 1, u32> exec_on_hier_fail; BitField<10, 1, u32> exec_on_noop; BitField<11, 1, u32> alpha_to_mask_disable; BitField<12, 1, u32> depth_before_shader; BitField<13, 2, ConservativeDepth> conservative_z_export; }; enum class CompareFunc : u32 { Never = 0, Less = 1, Equal = 2, LessEqual = 3, Greater = 4, NotEqual = 5, GreaterEqual = 6, Always = 7, }; union DepthControl { u32 raw; BitField<0, 1, u32> stencil_enable; BitField<1, 1, u32> depth_enable; BitField<2, 1, u32> depth_write_enable; BitField<3, 1, u32> depth_bounds_enable; BitField<4, 3, CompareFunc> depth_func; BitField<7, 1, u32> backface_enable; BitField<8, 3, CompareFunc> stencil_ref_func; BitField<20, 3, CompareFunc> stencil_bf_func; BitField<30, 1, u32> enable_color_writes_on_depth_fail; BitField<31, 1, u32> disable_color_writes_on_depth_pass; }; enum class StencilFunc : u32 { Keep = 0, Zero = 1, Ones = 2, ReplaceTest = 3, ReplaceOp = 4, AddClamp = 5, SubClamp = 6, Invert = 7, AddWrap = 8, SubWrap = 9, And = 10, Or = 11, Xor = 12, Nand = 13, Nor = 14, Xnor = 15, }; union StencilControl { u32 raw; BitField<0, 4, StencilFunc> stencil_fail_front; BitField<4, 4, StencilFunc> stencil_zpass_front; BitField<8, 4, StencilFunc> stencil_zfail_front; BitField<12, 4, StencilFunc> stencil_fail_back; BitField<16, 4, StencilFunc> stencil_zpass_back; BitField<20, 4, StencilFunc> stencil_zfail_back; }; union StencilRefMask { u32 raw; BitField<0, 8, u32> stencil_test_val; BitField<8, 8, u32> stencil_mask; BitField<16, 8, u32> stencil_write_mask; BitField<24, 8, u32> stencil_op_val; }; struct DepthBuffer { enum class ZFormat : u32 { Invalid = 0, Z16 = 1, Z32Float = 3, }; enum class StencilFormat : u32 { Invalid = 0, Stencil8 = 1, }; union ZInfo { BitField<0, 2, ZFormat> format; BitField<2, 2, u32> num_samples; BitField<13, 3, u32> tile_split; BitField<20, 3, u32> tile_mode_index; BitField<23, 4, u32> decompress_on_n_zplanes; BitField<27, 1, u32> allow_expclear; BitField<28, 1, u32> read_size; BitField<29, 1, u32> tile_surface_en; BitField<30, 1, u32> clear_disallowed; BitField<31, 1, u32> zrange_precision; } z_info; union { BitField<0, 1, StencilFormat> format; } stencil_info; u32 z_read_base; u32 stencil_read_base; u32 z_write_base; u32 stencil_write_base; union { BitField<0, 11, u32> pitch_tile_max; BitField<11, 11, u32> height_tile_max; } depth_size; union { BitField<0, 22, u32> tile_max; } depth_slice; bool DepthValid() const { return DepthAddress() != 0 && z_info.format != ZFormat::Invalid; } bool StencilValid() const { return StencilAddress() != 0 && stencil_info.format != StencilFormat::Invalid; } bool DepthWriteValid() const { return DepthWriteAddress() != 0 && z_info.format != ZFormat::Invalid; } bool StencilWriteValid() const { return StencilWriteAddress() != 0 && stencil_info.format != StencilFormat::Invalid; } u32 Pitch() const { return (depth_size.pitch_tile_max + 1) << 3; } u32 Height() const { return (depth_size.height_tile_max + 1) << 3; } u64 DepthAddress() const { return u64(z_read_base) << 8; } u64 StencilAddress() const { return u64(stencil_read_base) << 8; } u64 DepthWriteAddress() const { return u64(z_write_base) << 8; } u64 StencilWriteAddress() const { return u64(stencil_write_base) << 8; } u32 NumSamples() const { return 1u << z_info.num_samples; // spec doesn't say it is a log2 } u32 NumBits() const { return z_info.format == ZFormat::Z32Float ? 32 : 16; } size_t GetDepthSliceSize() const { ASSERT(z_info.format != ZFormat::Invalid); const auto bpe = NumBits() >> 3; // in bytes return (depth_slice.tile_max + 1) * 64 * bpe * NumSamples(); } }; enum class ClipSpace : u32 { MinusWToW = 0, ZeroToW = 1, }; enum class PrimKillCond : u32 { AllVtx = 0, AnyVtx = 1, }; union ClipperControl { u32 raw; BitField<0, 6, u32> user_clip_plane_enable; BitField<16, 1, u32> clip_disable; BitField<19, 1, ClipSpace> clip_space; BitField<21, 1, PrimKillCond> vtx_kill_or; BitField<22, 1, u32> dx_rasterization_kill; BitField<23, 1, u32> dx_linear_attr_clip_enable; BitField<26, 1, u32> zclip_near_disable; BitField<26, 1, u32> zclip_far_disable; }; enum class PolygonMode : u32 { Point = 0, Line = 1, Fill = 2, }; enum class ProvokingVtxLast : u32 { First = 0, Last = 1, }; enum class CullMode : u32 { None = 0, Front = 1, Back = 2, FrontAndBack = 3, }; enum class FrontFace : u32 { CounterClockwise = 0, Clockwise = 1, }; union PolygonControl { u32 raw; BitField<0, 1, u32> cull_front; BitField<1, 1, u32> cull_back; BitField<2, 1, FrontFace> front_face; BitField<3, 2, u32> enable_polygon_mode; BitField<5, 3, PolygonMode> polygon_mode_front; BitField<8, 3, PolygonMode> polygon_mode_back; BitField<11, 1, u32> enable_polygon_offset_front; BitField<12, 1, u32> enable_polygon_offset_back; BitField<13, 1, u32> enable_polygon_offset_para; BitField<16, 1, u32> enable_window_offset; BitField<19, 1, ProvokingVtxLast> provoking_vtx_last; BitField<20, 1, u32> persp_corr_dis; BitField<21, 1, u32> multi_prim_ib_ena; PolygonMode PolyMode() const { return enable_polygon_mode ? polygon_mode_front.Value() : PolygonMode::Fill; } CullMode CullingMode() const { return static_cast(cull_front | cull_back << 1); } bool NeedsBias() const { return enable_polygon_offset_back || enable_polygon_offset_front || enable_polygon_offset_para; } }; union VsOutputConfig { u32 raw; BitField<1, 5, u32> export_count_min_one; BitField<6, 1, u32> half_pack; u32 NumExports() const { return export_count_min_one.Value() + 1; } }; union ColorBufferMask { enum ColorComponent : u32 { ComponentR = (1u << 0), ComponentG = (1u << 1), ComponentB = (1u << 2), ComponentA = (1u << 3), }; u32 raw; BitField<0, 4, u32> output0_mask; BitField<4, 4, u32> output1_mask; BitField<8, 4, u32> output2_mask; BitField<12, 4, u32> output3_mask; BitField<16, 4, u32> output4_mask; BitField<20, 4, u32> output5_mask; BitField<24, 4, u32> output6_mask; BitField<28, 4, u32> output7_mask; u32 GetMask(int buf_id) const { return (raw >> (buf_id * 4)) & 0xfu; } void SetMask(int buf_id, u32 mask) { raw &= ~(0xf << (buf_id * 4)); raw |= (mask << (buf_id * 4)); } }; struct IndexBufferBase { BitField<0, 8, u32> base_addr_hi; u32 base_addr_lo; template T Address() const { return std::bit_cast((base_addr_lo & ~1U) | u64(base_addr_hi) << 32); } }; enum class IndexType : u32 { Index16 = 0, Index32 = 1, }; enum class IndexSwapMode : u32 { None = 0, Swap16 = 1, Swap32 = 2, SwapWord = 3, }; union IndexBufferType { u32 raw; BitField<0, 2, IndexType> index_type; BitField<2, 2, IndexSwapMode> swap_mode; }; union VgtNumInstances { u32 num_instances; u32 NumInstances() const { return num_instances == 0 ? 1 : num_instances; } }; struct Scissor { struct { s16 top_left_x; s16 top_left_y; }; struct { s16 bottom_right_x; s16 bottom_right_y; }; // From AMD spec: 'Negative numbers clamped to 0' static s16 Clamp(s16 value) { return std::max(s16(0), value); } u32 GetWidth() const { return static_cast(Clamp(bottom_right_x) - Clamp(top_left_x)); } u32 GetHeight() const { return static_cast(Clamp(bottom_right_y) - Clamp(top_left_y)); } }; struct WindowOffset { s32 window_x_offset : 16; s32 window_y_offset : 16; }; struct ViewportScissor { union { BitField<0, 15, s32> top_left_x; BitField<16, 15, s32> top_left_y; BitField<31, 1, s32> window_offset_disable; }; struct { s16 bottom_right_x; s16 bottom_right_y; }; u32 GetWidth() const { return bottom_right_x - top_left_x; } u32 GetHeight() const { return bottom_right_y - top_left_y; } }; struct ViewportDepth { float zmin; float zmax; }; struct ViewportBounds { float xscale; float xoffset; float yscale; float yoffset; float zscale; float zoffset; }; union ViewportControl { BitField<0, 1, u32> xscale_enable; BitField<1, 1, u32> xoffset_enable; BitField<2, 1, u32> yscale_enable; BitField<3, 1, u32> yoffset_enable; BitField<4, 1, u32> zscale_enable; BitField<5, 1, u32> zoffset_enable; BitField<8, 1, u32> xy_transformed; BitField<9, 1, u32> z_transformed; BitField<10, 1, u32> w_transformed; BitField<11, 1, u32> perfcounter_ref; }; struct ClipUserData { u32 data_x; u32 data_y; u32 data_z; u32 data_w; }; struct BlendConstants { float red; float green; float blue; float alpha; }; union BlendControl { enum class BlendFactor : u32 { Zero = 0, One = 1, SrcColor = 2, OneMinusSrcColor = 3, SrcAlpha = 4, OneMinusSrcAlpha = 5, DstAlpha = 6, OneMinusDstAlpha = 7, DstColor = 8, OneMinusDstColor = 9, SrcAlphaSaturate = 10, ConstantColor = 13, OneMinusConstantColor = 14, Src1Color = 15, InvSrc1Color = 16, Src1Alpha = 17, InvSrc1Alpha = 18, ConstantAlpha = 19, OneMinusConstantAlpha = 20, }; enum class BlendFunc : u32 { Add = 0, Subtract = 1, Min = 2, Max = 3, ReverseSubtract = 4, }; BitField<0, 5, BlendFactor> color_src_factor; BitField<5, 3, BlendFunc> color_func; BitField<8, 5, BlendFactor> color_dst_factor; BitField<16, 5, BlendFactor> alpha_src_factor; BitField<21, 3, BlendFunc> alpha_func; BitField<24, 5, BlendFactor> alpha_dst_factor; BitField<29, 1, u32> separate_alpha_blend; BitField<30, 1, u32> enable; BitField<31, 1, u32> disable_rop3; }; union ColorControl { enum class OperationMode : u32 { Disable = 0u, Normal = 1u, EliminateFastClear = 2u, Resolve = 3u, Err = 4u, FmaskDecompress = 5u, }; BitField<0, 1, u32> disable_dual_quad; BitField<3, 1, u32> degamma_enable; BitField<4, 3, OperationMode> mode; BitField<16, 8, u32> rop3; }; struct ColorBuffer { enum class EndianSwap : u32 { None = 0, Swap8In16 = 1, Swap8In32 = 2, Swap8In64 = 3, }; enum class SwapMode : u32 { Standard = 0, Alternate = 1, StandardReverse = 2, AlternateReverse = 3, }; enum class RoundMode : u32 { ByHalf = 0, Truncate = 1, }; u32 base_address; union { BitField<0, 11, u32> tile_max; BitField<20, 11, u32> fmask_tile_max; } pitch; union { BitField<0, 22, u32> tile_max; } slice; union { BitField<0, 11, u32> slice_start; BitField<13, 11, u32> slice_max; } view; union Color0Info { BitField<0, 2, EndianSwap> endian; BitField<2, 5, DataFormat> format; BitField<7, 1, u32> linear_general; BitField<8, 3, NumberFormat> number_type; BitField<11, 2, SwapMode> comp_swap; BitField<13, 1, u32> fast_clear; BitField<14, 1, u32> compression; BitField<15, 1, u32> blend_clamp; BitField<16, 1, u32> blend_bypass; BitField<17, 1, u32> simple_float; BitField<18, 1, RoundMode> round_mode; BitField<19, 1, u32> cmask_is_linear; BitField<20, 3, u32> blend_opt_dont_rd_dst; BitField<23, 3, u32> blend_opt_discard_pixel; BitField<26, 1, u32> fmask_compression_disable_ci; BitField<27, 1, u32> fmask_compress_1frag_only; BitField<28, 1, u32> dcc_enable; BitField<29, 2, u32> cmask_addr_type; /// Neo-mode only BitField<31, 1, u32> alt_tile_mode; u32 u32all; } info; union Color0Attrib { BitField<0, 5, TilingMode> tile_mode_index; BitField<5, 5, u32> fmask_tile_mode_index; BitField<10, 2, u32> fmask_bank_height; BitField<12, 3, u32> num_samples_log2; BitField<15, 2, u32> num_fragments_log2; BitField<17, 1, u32> force_dst_alpha_1; u32 u32all; } attrib; INSERT_PADDING_WORDS(1); u32 cmask_base_address; union { BitField<0, 14, u32> tile_max; } cmask_slice; u32 fmask_base_address; union { BitField<0, 14, u32> tile_max; } fmask_slice; u32 clear_word0; u32 clear_word1; INSERT_PADDING_WORDS(2); operator bool() const { return info.format != DataFormat::FormatInvalid; } u32 Pitch() const { return (pitch.tile_max + 1) << 3; } u32 Height() const { return (slice.tile_max + 1) * 64 / Pitch(); } u64 Address() const { return u64(base_address) << 8; } VAddr CmaskAddress() const { return VAddr(cmask_base_address) << 8; } VAddr FmaskAddress() const { return VAddr(fmask_base_address) << 8; } u32 NumSamples() const { return 1 << attrib.num_fragments_log2; } u32 NumSlices() const { return view.slice_max + 1; } size_t GetColorSliceSize() const { const auto num_bytes_per_element = NumBits(info.format) / 8u; const auto slice_size = num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples(); return slice_size; } TilingMode GetTilingMode() const { return info.linear_general ? TilingMode::Display_Linear : attrib.tile_mode_index.Value(); } bool IsTiled() const { return !info.linear_general; } [[nodiscard]] DataFormat GetDataFmt() const { return RemapDataFormat(info.format); } [[nodiscard]] NumberFormat GetNumberFmt() const { // There is a small difference between T# and CB number types, account for it. return RemapNumberFormat(info.number_type == NumberFormat::SnormNz ? NumberFormat::Srgb : info.number_type.Value(), info.format); } [[nodiscard]] NumberConversion GetNumberConversion() const { return MapNumberConversion(info.number_type); } [[nodiscard]] CompMapping Swizzle() const { // clang-format off static constexpr std::array, 4> mrt_swizzles{{ // Standard std::array{{ {.r = CompSwizzle::Red, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Zero}, {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Alpha}, }}, // Alternate std::array{{ {.r = CompSwizzle::Green, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, {.r = CompSwizzle::Red, .g = CompSwizzle::Alpha, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Alpha, .a = CompSwizzle::Zero}, {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Alpha}, }}, // StandardReverse std::array{{ {.r = CompSwizzle::Blue, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, {.r = CompSwizzle::Green, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, {.r = CompSwizzle::Alpha, .g = CompSwizzle::Blue, .b = CompSwizzle::Green, .a = CompSwizzle::Red}, }}, // AlternateReverse std::array{{ {.r = CompSwizzle::Alpha, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, {.r = CompSwizzle::Alpha, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Green, .a = CompSwizzle::Blue}, }}, }}; // clang-format on const auto swap_idx = static_cast(info.comp_swap.Value()); const auto components_idx = NumComponents(info.format) - 1; const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx]; return RemapSwizzle(info.format, mrt_swizzle); } }; enum ContextRegs : u32 { DbZInfo = 0xA010, CbColor0Base = 0xA318, CbColor1Base = 0xA327, CbColor2Base = 0xA336, CbColor3Base = 0xA345, CbColor4Base = 0xA354, CbColor5Base = 0xA363, CbColor6Base = 0xA372, CbColor7Base = 0xA381, CbColor0Cmask = 0xA31F, CbColor1Cmask = 0xA32E, CbColor2Cmask = 0xA33D, CbColor3Cmask = 0xA34C, CbColor4Cmask = 0xA35B, CbColor5Cmask = 0xA36A, CbColor6Cmask = 0xA379, CbColor7Cmask = 0xA388, }; struct PolygonOffset { float depth_bias; float front_scale; float front_offset; float back_scale; float back_offset; }; struct Address { u32 address; VAddr GetAddress() const { return u64(address) << 8; } }; union DepthRenderControl { u32 raw; BitField<0, 1, u32> depth_clear_enable; BitField<1, 1, u32> stencil_clear_enable; BitField<2, 1, u32> depth_copy; BitField<3, 1, u32> stencil_copy; BitField<4, 1, u32> resummarize_enable; BitField<5, 1, u32> stencil_compress_disable; BitField<6, 1, u32> depth_compress_disable; BitField<7, 1, u32> copy_centroid; BitField<8, 1, u32> copy_sample; BitField<9, 1, u32> decompress_enable; }; union DepthView { BitField<0, 11, u32> slice_start; BitField<13, 11, u32> slice_max; BitField<24, 1, u32> z_read_only; BitField<25, 1, u32> stencil_read_only; u32 NumSlices() const { return slice_max + 1u; } }; enum class ForceEnable : u32 { Off = 0, Enable = 1, Disable = 2, }; enum class ForceSumm : u32 { Off = 0, MinZ = 1, MaxZ = 2, Both = 3, }; union DepthRenderOverride { u32 raw; BitField<0, 2, ForceEnable> force_hiz_enable; BitField<2, 2, ForceEnable> force_his_enable0; BitField<4, 2, ForceEnable> force_his_enable1; BitField<6, 1, u32> force_shader_z_order; BitField<7, 1, u32> fast_z_disable; BitField<8, 1, u32> fast_stencil_disable; BitField<9, 1, u32> noop_cull_disable; BitField<10, 1, u32> force_color_kill; BitField<11, 1, u32> force_z_read; BitField<12, 1, u32> force_stencil_read; BitField<13, 2, ForceEnable> force_full_z_range; BitField<15, 1, u32> force_qc_smask_conflict; BitField<16, 1, u32> disable_viewport_clamp; BitField<17, 1, u32> ignore_sc_zrange; BitField<18, 1, u32> disable_fully_covered; BitField<19, 2, ForceSumm> force_z_limit_summ; BitField<21, 5, u32> max_tiles_in_dtt; BitField<26, 1, u32> disable_tile_rate_tiles; BitField<27, 1, u32> force_z_dirty; BitField<28, 1, u32> force_stencil_dirty; BitField<29, 1, u32> force_z_valid; BitField<30, 1, u32> force_stencil_valid; BitField<31, 1, u32> preserve_compression; }; union AaConfig { BitField<0, 3, u32> msaa_num_samples; BitField<4, 1, u32> aa_mask_centroid_dtmn; BitField<13, 4, u32> max_sample_dst; BitField<20, 3, u32> msaa_exposed_samples; BitField<24, 2, u32> detail_to_exposed_mode; u32 NumSamples() const { return 1 << msaa_num_samples; } }; union ShaderStageEnable { enum VgtStages : u32 { Vs = 0u, // always enabled EsGs = 0xB0u, LsHs = 0x45u, }; VgtStages raw; BitField<0, 2, u32> ls_en; BitField<2, 1, u32> hs_en; BitField<3, 2, u32> es_en; BitField<5, 1, u32> gs_en; BitField<6, 2, u32> vs_en; BitField<8, 1, u32> dynamic_hs; bool IsStageEnabled(u32 stage) const { switch (stage) { case 0: case 1: return true; case 2: return gs_en.Value(); case 3: return es_en.Value(); case 4: return hs_en.Value(); case 5: return ls_en.Value(); default: UNREACHABLE(); } } }; union GsInstances { u32 raw; struct { u32 enable : 2; u32 count : 6; }; bool IsEnabled() const { return enable && count > 0; } }; union GsOutPrimitiveType { u32 raw; struct { GsOutputPrimitiveType outprim_type : 6; GsOutputPrimitiveType outprim_type1 : 6; GsOutputPrimitiveType outprim_type2 : 6; GsOutputPrimitiveType outprim_type3 : 6; u32 reserved : 3; u32 unique_type_per_stream : 1; }; GsOutputPrimitiveType GetPrimitiveType(u32 stream) const { if (unique_type_per_stream == 0) { return outprim_type; } switch (stream) { case 0: return outprim_type; case 1: return outprim_type1; case 2: return outprim_type2; case 3: return outprim_type3; default: UNREACHABLE(); } } }; union GsMode { u32 raw; BitField<0, 3, u32> mode; BitField<3, 2, u32> cut_mode; BitField<22, 2, u32> onchip; }; union StreamOutConfig { u32 raw; struct { u32 streamout_0_en : 1; u32 streamout_1_en : 1; u32 streamout_2_en : 1; u32 streamout_3_en : 1; u32 rast_stream : 3; u32 : 1; u32 rast_stream_mask : 4; u32 : 19; u32 use_rast_stream_mask : 1; }; }; union StreamOutBufferConfig { u32 raw; struct { u32 stream_0_buf_en : 4; u32 stream_1_buf_en : 4; u32 stream_2_buf_en : 4; u32 stream_3_buf_en : 4; }; }; union LsHsConfig { u32 raw; BitField<0, 8, u32> num_patches; BitField<8, 6, u32> hs_input_control_points; BitField<14, 6, u32> hs_output_control_points; }; union TessellationConfig { u32 raw; BitField<0, 2, TessellationType> type; BitField<2, 3, TessellationPartitioning> partitioning; BitField<5, 3, TessellationTopology> topology; }; union TessFactorMemoryBase { u32 base; u64 MemoryBase() const { return static_cast(base) << 8; } }; union Eqaa { u32 raw; BitField<0, 1, u32> max_anchor_samples; BitField<4, 3, u32> ps_iter_samples; BitField<8, 3, u32> mask_export_num_samples; BitField<12, 3, u32> alpha_to_mask_num_samples; BitField<16, 1, u32> high_quality_intersections; BitField<17, 1, u32> incoherent_eqaa_reads; BitField<18, 1, u32> interpolate_comp_z; BitField<19, 1, u32> interpolate_src_z; BitField<20, 1, u32> static_anchor_associations; BitField<21, 1, u32> alpha_to_mask_eqaa_disable; BitField<24, 3, u32> overrasterization_amount; BitField<27, 1, u32> enable_postz_overrasterization; }; union PsInput { u32 raw; struct { u32 persp_sample_ena : 1; u32 persp_center_ena : 1; u32 persp_centroid_ena : 1; u32 persp_pull_model_ena : 1; u32 linear_sample_ena : 1; u32 linear_center_ena : 1; u32 linear_centroid_ena : 1; u32 line_stipple_tex_ena : 1; u32 pos_x_float_ena : 1; u32 pos_y_float_ena : 1; u32 pos_z_float_ena : 1; u32 pos_w_float_ena : 1; u32 front_face_ena : 1; u32 ancillary_ena : 1; u32 sample_coverage_ena : 1; u32 pos_fixed_pt_ena : 1; }; }; union Regs { struct { INSERT_PADDING_WORDS(0x2C08); ShaderProgram ps_program; INSERT_PADDING_WORDS(0x2C); ShaderProgram vs_program; INSERT_PADDING_WORDS(0x2C); ShaderProgram gs_program; INSERT_PADDING_WORDS(0x2C); ShaderProgram es_program; INSERT_PADDING_WORDS(0x2C); ShaderProgram hs_program; INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20); ShaderProgram ls_program; INSERT_PADDING_WORDS(0xA4); ComputeProgram cs_program; // shadowed by `cs_state` in `mapped_queues` INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5); DepthRenderControl depth_render_control; INSERT_PADDING_WORDS(1); DepthView depth_view; DepthRenderOverride depth_render_override; INSERT_PADDING_WORDS(1); Address depth_htile_data_base; INSERT_PADDING_WORDS(2); float depth_bounds_min; float depth_bounds_max; u32 stencil_clear; float depth_clear; Scissor screen_scissor; INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2); DepthBuffer depth_buffer; INSERT_PADDING_WORDS(0xA080 - 0xA018); WindowOffset window_offset; ViewportScissor window_scissor; INSERT_PADDING_WORDS(0xA08E - 0xA081 - 2); ColorBufferMask color_target_mask; ColorBufferMask color_shader_mask; ViewportScissor generic_scissor; INSERT_PADDING_WORDS(2); std::array viewport_scissors; std::array viewport_depths; INSERT_PADDING_WORDS(0xA102 - 0xA0D4); u32 index_offset; u32 primitive_restart_index; INSERT_PADDING_WORDS(1); BlendConstants blend_constants; INSERT_PADDING_WORDS(0xA10B - 0xA105 - 4); StencilControl stencil_control; StencilRefMask stencil_ref_front; StencilRefMask stencil_ref_back; INSERT_PADDING_WORDS(1); std::array viewports; std::array clip_user_data; INSERT_PADDING_WORDS(0xA191 - 0xA187); std::array ps_inputs; VsOutputConfig vs_output_config; INSERT_PADDING_WORDS(1); PsInput ps_input_ena; PsInput ps_input_addr; INSERT_PADDING_WORDS(1); BitField<0, 6, u32> num_interp; INSERT_PADDING_WORDS(0xA1C3 - 0xA1B6 - 1); ShaderPosFormat shader_pos_format; ShaderExportFormat z_export_format; ColorExportFormat color_export_format; INSERT_PADDING_WORDS(0xA1E0 - 0xA1C3 - 3); std::array blend_control; INSERT_PADDING_WORDS(0xA1F9 - 0xA1E0 - 8); IndexBufferBase index_base_address; INSERT_PADDING_WORDS(1); u32 draw_initiator; INSERT_PADDING_WORDS(0xA200 - 0xA1F9 - 4); DepthControl depth_control; INSERT_PADDING_WORDS(1); ColorControl color_control; DepthBufferControl depth_buffer_control; ClipperControl clipper_control; PolygonControl polygon_control; ViewportControl viewport_control; VsOutputControl vs_output_control; INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1); HsTessFactorClamp hs_clamp; INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2); GsMode vgt_gs_mode; INSERT_PADDING_WORDS(1); ModeControl mode_control; INSERT_PADDING_WORDS(8); GsOutPrimitiveType vgt_gs_out_prim_type; INSERT_PADDING_WORDS(1); u32 index_size; u32 max_index_size; IndexBufferType index_buffer_type; INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2); u32 enable_primitive_id; INSERT_PADDING_WORDS(3); u32 enable_primitive_restart; INSERT_PADDING_WORDS(0xA2A8 - 0xA2A5 - 1); u32 vgt_instance_step_rate_0; u32 vgt_instance_step_rate_1; INSERT_PADDING_WORDS(0xA2AB - 0xA2A9 - 1); u32 vgt_esgs_ring_itemsize; u32 vgt_gsvs_ring_itemsize; INSERT_PADDING_WORDS(0xA2CE - 0xA2AC - 1); BitField<0, 11, u32> vgt_gs_max_vert_out; INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1); ShaderStageEnable stage_enable; LsHsConfig ls_hs_config; u32 vgt_gs_vert_itemsize[4]; TessellationConfig tess_config; INSERT_PADDING_WORDS(3); PolygonOffset poly_offset; GsInstances vgt_gs_instance_cnt; StreamOutConfig vgt_strmout_config; StreamOutBufferConfig vgt_strmout_buffer_config; INSERT_PADDING_WORDS(0xA2F8 - 0xA2E6 - 1); AaConfig aa_config; INSERT_PADDING_WORDS(0xA318 - 0xA2F8 - 1); ColorBuffer color_buffers[NumColorBuffers]; INSERT_PADDING_WORDS(0xC242 - 0xA390); PrimitiveType primitive_type; INSERT_PADDING_WORDS(0xC24C - 0xC243); u32 num_indices; VgtNumInstances num_instances; INSERT_PADDING_WORDS(0xC250 - 0xC24D - 1); TessFactorMemoryBase vgt_tf_memory_base; }; std::array reg_array{}; const ShaderProgram* ProgramForStage(u32 index) const { switch (index) { case 0: return &ps_program; case 1: return &vs_program; case 2: return &gs_program; case 3: return &es_program; case 4: return &hs_program; case 5: return &ls_program; } return nullptr; } u32 NumSamples() const { // It seems that the number of samples > 1 set in the AA config doesn't mean we're // always rendering with MSAA, so we need to derive MS ratio from the CB and DB // settings. u32 num_samples = 1u; if (color_control.mode != ColorControl::OperationMode::Disable) { for (auto cb = 0u; cb < NumColorBuffers; ++cb) { const auto& col_buf = color_buffers[cb]; if (!col_buf) { continue; } num_samples = std::max(num_samples, col_buf.NumSamples()); } } if (depth_buffer.DepthValid() || depth_buffer.StencilValid()) { num_samples = std::max(num_samples, depth_buffer.NumSamples()); } return num_samples; } void SetDefaults(); }; Regs regs{}; // See for a comment in context reg parsing code union CbDbExtent { struct { u16 width; u16 height; }; u32 raw{0u}; [[nodiscard]] bool Valid() const { return raw != 0; } }; std::array last_cb_extent{}; CbDbExtent last_db_extent{}; public: Liverpool(); ~Liverpool(); void SubmitGfx(std::span dcb, std::span ccb); void SubmitAsc(u32 gnm_vqid, std::span acb); void SubmitDone() noexcept { std::scoped_lock lk{submit_mutex}; mapped_queues[GfxQueueId].ccb_buffer_offset = 0; mapped_queues[GfxQueueId].dcb_buffer_offset = 0; submit_done = true; submit_cv.notify_one(); } void WaitGpuIdle() noexcept { std::unique_lock lk{submit_mutex}; submit_cv.wait(lk, [this] { return num_submits == 0; }); } bool IsGpuIdle() const { return num_submits == 0; } void SetVoPort(Libraries::VideoOut::VideoOutPort* port) { vo_port = port; } void BindRasterizer(Vulkan::Rasterizer* rasterizer_) { rasterizer = rasterizer_; } void SendCommand(Common::UniqueFunction&& func) { std::scoped_lock lk{submit_mutex}; command_queue.emplace(std::move(func)); ++num_commands; submit_cv.notify_one(); } void reserveCopyBufferSpace() { GpuQueue& gfx_queue = mapped_queues[GfxQueueId]; std::scoped_lock lk(gfx_queue.m_access); constexpr size_t GfxReservedSize = 2_MB >> 2; gfx_queue.ccb_buffer.reserve(GfxReservedSize); gfx_queue.dcb_buffer.reserve(GfxReservedSize); } inline ComputeProgram& GetCsRegs() { return mapped_queues[curr_qid].cs_state; } struct AscQueueInfo { VAddr map_addr; u32* read_addr; u32 ring_size_dw; u32 pipe_id; }; Common::SlotVector asc_queues{}; private: struct Task { struct promise_type { auto get_return_object() { Task task{}; task.handle = std::coroutine_handle::from_promise(*this); return task; } static constexpr std::suspend_always initial_suspend() noexcept { // We want the task to be suspended at start return {}; } static constexpr std::suspend_always final_suspend() noexcept { return {}; } void unhandled_exception() { try { std::rethrow_exception(std::current_exception()); } catch (const std::exception& e) { UNREACHABLE_MSG("Unhandled exception: {}", e.what()); } } void return_void() {} struct empty {}; std::suspend_always yield_value(empty&&) { return {}; } }; using Handle = std::coroutine_handle; Handle handle; }; std::pair, std::span> CopyCmdBuffers(std::span dcb, std::span ccb); Task ProcessGraphics(std::span dcb, std::span ccb); Task ProcessCeUpdate(std::span ccb); template Task ProcessCompute(std::span acb, u32 vqid); void Process(std::stop_token stoken); struct GpuQueue { std::mutex m_access{}; std::atomic dcb_buffer_offset; std::atomic ccb_buffer_offset; std::vector dcb_buffer; std::vector ccb_buffer; std::queue submits{}; ComputeProgram cs_state{}; }; std::array mapped_queues{}; u32 num_mapped_queues{1u}; // GFX is always available VAddr indirect_args_addr{}; struct ConstantEngine { void Reset() { ce_count = 0; de_count = 0; ce_compare_count = 0; } [[nodiscard]] u32 Diff() const { ASSERT_MSG(ce_count >= de_count, "DE counter is ahead of CE"); return ce_count - de_count; } u32 ce_compare_count{}; u32 ce_count{}; u32 de_count{}; static std::array constants_heap; } cblock{}; Vulkan::Rasterizer* rasterizer{}; Libraries::VideoOut::VideoOutPort* vo_port{}; std::jthread process_thread{}; std::atomic num_submits{}; std::atomic num_commands{}; std::atomic submit_done{}; std::mutex submit_mutex; std::condition_variable_any submit_cv; std::queue> command_queue{}; int curr_qid{-1}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48); static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C); static_assert(GFX6_3D_REG_INDEX(gs_program) == 0x2C88); static_assert(GFX6_3D_REG_INDEX(es_program) == 0x2CC8); static_assert(GFX6_3D_REG_INDEX(hs_program) == 0x2D08); static_assert(GFX6_3D_REG_INDEX(ls_program) == 0x2D48); static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00); static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03); static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C); static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40); static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000); static_assert(GFX6_3D_REG_INDEX(depth_view) == 0xA002); static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005); static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C); static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010); static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017); static_assert(GFX6_3D_REG_INDEX(window_offset) == 0xA080); static_assert(GFX6_3D_REG_INDEX(window_scissor) == 0xA081); static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E); static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F); static_assert(GFX6_3D_REG_INDEX(generic_scissor) == 0xA090); static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094); static_assert(GFX6_3D_REG_INDEX(index_offset) == 0xA102); static_assert(GFX6_3D_REG_INDEX(primitive_restart_index) == 0xA103); static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B); static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F); static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F); static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191); static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1); static_assert(GFX6_3D_REG_INDEX(ps_input_ena) == 0xA1B3); static_assert(GFX6_3D_REG_INDEX(ps_input_addr) == 0xA1B4); static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6); static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3); static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4); static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5); static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0); static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9); static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC); static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200); static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202); static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204); static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206); static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287); static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290); static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292); static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B); static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D); static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F); static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); static_assert(GFX6_3D_REG_INDEX(enable_primitive_restart) == 0xA2A5); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8); static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9); static_assert(GFX6_3D_REG_INDEX(vgt_esgs_ring_itemsize) == 0xA2AB); static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC); static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE); static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5); static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7); static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB); static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4); static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5); static_assert(GFX6_3D_REG_INDEX(vgt_strmout_buffer_config) == 0xA2E6); static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A); static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381); static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242); static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D); static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250); #undef GFX6_3D_REG_INDEX } // namespace AmdGpu