mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-25 04:45:00 +00:00
1655 lines
54 KiB
C++
1655 lines
54 KiB
C++
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#pragma once
|
|
|
|
#include <array>
|
|
#include <condition_variable>
|
|
#include <coroutine>
|
|
#include <exception>
|
|
#include <mutex>
|
|
#include <span>
|
|
#include <thread>
|
|
#include <vector>
|
|
#include <queue>
|
|
|
|
#include "common/assert.h"
|
|
#include "common/bit_field.h"
|
|
#include "common/polyfill_thread.h"
|
|
#include "common/slot_vector.h"
|
|
#include "common/types.h"
|
|
#include "common/unique_function.h"
|
|
#include "shader_recompiler/params.h"
|
|
#include "video_core/amdgpu/pixel_format.h"
|
|
#include "video_core/amdgpu/resource.h"
|
|
#include "video_core/amdgpu/types.h"
|
|
|
|
namespace Vulkan {
|
|
class Rasterizer;
|
|
}
|
|
|
|
namespace Libraries::VideoOut {
|
|
struct VideoOutPort;
|
|
}
|
|
|
|
namespace AmdGpu {
|
|
|
|
#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32))
|
|
|
|
#define CONCAT2(x, y) DO_CONCAT2(x, y)
|
|
#define DO_CONCAT2(x, y) x##y
|
|
#define INSERT_PADDING_WORDS(num_words) \
|
|
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)
|
|
|
|
struct Liverpool {
|
|
static constexpr u32 GfxQueueId = 0u;
|
|
static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software
|
|
static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software
|
|
static constexpr u32 NumQueuesPerPipe = 8u;
|
|
static constexpr u32 NumComputeRings = NumComputePipes * NumQueuesPerPipe;
|
|
static constexpr u32 NumTotalQueues = NumGfxRings + NumComputeRings;
|
|
static_assert(NumTotalQueues < 64u); // need to fit into u64 bitmap for ffs
|
|
|
|
static constexpr u32 NumColorBuffers = 8;
|
|
static constexpr u32 NumViewports = 16;
|
|
static constexpr u32 NumClipPlanes = 6;
|
|
static constexpr u32 NumShaderUserData = 16;
|
|
static constexpr u32 UconfigRegWordOffset = 0xC000;
|
|
static constexpr u32 ContextRegWordOffset = 0xA000;
|
|
static constexpr u32 ConfigRegWordOffset = 0x2000;
|
|
static constexpr u32 ShRegWordOffset = 0x2C00;
|
|
static constexpr u32 NumRegs = 0xD000;
|
|
|
|
using UserData = std::array<u32, NumShaderUserData>;
|
|
|
|
struct BinaryInfo {
|
|
static constexpr u8 signature_ref[] = {0x4f, 0x72, 0x62, 0x53, 0x68, 0x64, 0x72}; // OrbShdr
|
|
|
|
std::array<u8, sizeof(signature_ref)> signature;
|
|
u8 version;
|
|
u32 pssl_or_cg : 1;
|
|
u32 cached : 1;
|
|
u32 type : 4;
|
|
u32 source_type : 2;
|
|
u32 length : 24;
|
|
u8 chunk_usage_base_offset_in_dw;
|
|
u8 num_input_usage_slots;
|
|
u8 is_srt : 1;
|
|
u8 is_srt_used_info_valid : 1;
|
|
u8 is_extended_usage_info : 1;
|
|
u8 reserved2 : 5;
|
|
u8 reserved3;
|
|
u64 shader_hash;
|
|
u32 crc32;
|
|
|
|
bool Valid() const {
|
|
return shader_hash && crc32 &&
|
|
(std::memcmp(signature.data(), signature_ref, sizeof(signature_ref)) == 0);
|
|
}
|
|
};
|
|
|
|
static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x1000) {
|
|
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
|
|
|
if (code[0] == token_mov_vcchi) {
|
|
const auto* info = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
|
|
if (info->Valid()) {
|
|
return *info;
|
|
}
|
|
}
|
|
|
|
// First instruction is not s_mov_b32 vcc_hi, #imm,
|
|
// which means we cannot get the binary info via said instruction.
|
|
// The easiest solution is to iterate through each dword and break
|
|
// on the first instance of the binary info.
|
|
constexpr size_t signature_size = sizeof(BinaryInfo::signature_ref) / sizeof(u8);
|
|
const u32* end = code + search_limit;
|
|
|
|
for (const u32* it = code; it < end; ++it) {
|
|
if (const BinaryInfo* info = std::bit_cast<const BinaryInfo*>(it); info->Valid()) {
|
|
return *info;
|
|
}
|
|
}
|
|
|
|
UNREACHABLE_MSG("Shader binary info not found.");
|
|
}
|
|
|
|
struct ShaderProgram {
|
|
u32 address_lo;
|
|
BitField<0, 8, u32> address_hi;
|
|
union {
|
|
BitField<0, 6, u64> num_vgprs;
|
|
BitField<6, 4, u64> num_sgprs;
|
|
BitField<10, 2, u64> priority;
|
|
BitField<12, 2, FpRoundMode> fp_round_mode32;
|
|
BitField<14, 2, FpRoundMode> fp_round_mode64;
|
|
BitField<16, 2, FpDenormMode> fp_denorm_mode32;
|
|
BitField<18, 2, FpDenormMode> fp_denorm_mode64;
|
|
BitField<12, 8, u64> float_mode;
|
|
BitField<24, 2, u64> vgpr_comp_cnt; // SPI provided per-thread inputs
|
|
BitField<33, 5, u64> num_user_regs;
|
|
} settings;
|
|
UserData user_data;
|
|
|
|
template <typename T = u8*>
|
|
const T Address() const {
|
|
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
|
|
return reinterpret_cast<const T>(addr);
|
|
}
|
|
|
|
std::span<const u32> Code() const {
|
|
const u32* code = Address<u32*>();
|
|
const BinaryInfo& bininfo = SearchBinaryInfo(code);
|
|
const u32 num_dwords = bininfo.length / sizeof(u32);
|
|
return std::span{code, num_dwords};
|
|
}
|
|
};
|
|
|
|
struct HsTessFactorClamp {
|
|
// I've only seen min=0.0, max=1.0 so far.
|
|
// TODO why is max set to 1.0? Makes no sense
|
|
float hs_max_tess;
|
|
float hs_min_tess;
|
|
};
|
|
|
|
struct ComputeProgram {
|
|
u32 dispatch_initiator;
|
|
u32 dim_x;
|
|
u32 dim_y;
|
|
u32 dim_z;
|
|
u32 start_x;
|
|
u32 start_y;
|
|
u32 start_z;
|
|
struct {
|
|
u16 full;
|
|
u16 partial;
|
|
} num_thread_x, num_thread_y, num_thread_z;
|
|
INSERT_PADDING_WORDS(1);
|
|
BitField<0, 12, u32> max_wave_id;
|
|
u32 address_lo;
|
|
BitField<0, 8, u32> address_hi;
|
|
INSERT_PADDING_WORDS(4);
|
|
union {
|
|
BitField<0, 6, u64> num_vgprs;
|
|
BitField<6, 4, u64> num_sgprs;
|
|
BitField<33, 5, u64> num_user_regs;
|
|
BitField<39, 3, u64> tgid_enable;
|
|
BitField<47, 9, u64> lds_dwords;
|
|
} settings;
|
|
INSERT_PADDING_WORDS(1);
|
|
u32 resource_limits;
|
|
INSERT_PADDING_WORDS(0x2A);
|
|
UserData user_data;
|
|
|
|
template <typename T = u8*>
|
|
const T Address() const {
|
|
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
|
|
return reinterpret_cast<const T>(addr);
|
|
}
|
|
|
|
u32 SharedMemSize() const noexcept {
|
|
// lds_dwords is in units of 128 dwords. We return bytes.
|
|
return settings.lds_dwords.Value() * 128 * 4;
|
|
}
|
|
|
|
bool IsTgidEnabled(u32 i) const noexcept {
|
|
return (settings.tgid_enable.Value() >> i) & 1;
|
|
}
|
|
|
|
std::span<const u32> Code() const {
|
|
const u32* code = Address<u32*>();
|
|
const BinaryInfo& bininfo = SearchBinaryInfo(code);
|
|
const u32 num_dwords = bininfo.length / sizeof(u32);
|
|
return std::span{code, num_dwords};
|
|
}
|
|
};
|
|
|
|
template <typename Shader>
|
|
static constexpr const BinaryInfo& GetBinaryInfo(const Shader& sh) {
|
|
const auto* code = sh.template Address<u32*>();
|
|
return SearchBinaryInfo(code);
|
|
}
|
|
|
|
static constexpr Shader::ShaderParams GetParams(const auto& sh) {
|
|
auto& bininfo = GetBinaryInfo(sh);
|
|
return {
|
|
.user_data = sh.user_data,
|
|
.code = sh.Code(),
|
|
.hash = bininfo.shader_hash,
|
|
};
|
|
}
|
|
|
|
union PsInputControl {
|
|
u32 raw;
|
|
BitField<0, 5, u32> input_offset;
|
|
BitField<5, 1, u32> use_default;
|
|
BitField<8, 2, u32> default_value;
|
|
BitField<10, 1, u32> flat_shade;
|
|
};
|
|
|
|
enum class ShaderExportComp : u32 {
|
|
None = 0,
|
|
OneComp = 1,
|
|
TwoComp = 2,
|
|
FourCompCompressed = 3,
|
|
FourComp = 4,
|
|
};
|
|
|
|
union ShaderPosFormat {
|
|
u32 raw;
|
|
BitField<0, 4, ShaderExportComp> pos0;
|
|
BitField<4, 4, ShaderExportComp> pos1;
|
|
BitField<8, 4, ShaderExportComp> pos2;
|
|
BitField<12, 4, ShaderExportComp> pos3;
|
|
};
|
|
|
|
enum class ShaderExportFormat : u32 {
|
|
Zero = 0,
|
|
R_32 = 1,
|
|
GR_32 = 2,
|
|
AR_32 = 3,
|
|
ABGR_FP16 = 4,
|
|
ABGR_UNORM16 = 5,
|
|
ABGR_SNORM16 = 6,
|
|
ABGR_UINT16 = 7,
|
|
ABGR_SINT16 = 8,
|
|
ABGR_32 = 9,
|
|
};
|
|
|
|
union ColorExportFormat {
|
|
u32 raw;
|
|
BitField<0, 4, ShaderExportFormat> col0;
|
|
BitField<4, 4, ShaderExportFormat> col1;
|
|
BitField<8, 4, ShaderExportFormat> col2;
|
|
BitField<12, 4, ShaderExportFormat> col3;
|
|
BitField<16, 4, ShaderExportFormat> col4;
|
|
BitField<20, 4, ShaderExportFormat> col5;
|
|
BitField<24, 4, ShaderExportFormat> col6;
|
|
BitField<28, 4, ShaderExportFormat> col7;
|
|
};
|
|
|
|
union VsOutputControl {
|
|
u32 raw;
|
|
BitField<0, 8, u32> clip_distance_enable;
|
|
BitField<8, 8, u32> cull_distance_enable;
|
|
BitField<16, 1, u32> use_vtx_point_size;
|
|
BitField<17, 1, u32> use_vtx_edge_flag;
|
|
BitField<18, 1, u32> use_vtx_render_target_idx;
|
|
BitField<19, 1, u32> use_vtx_viewport_idx;
|
|
BitField<20, 1, u32> use_vtx_kill_flag;
|
|
BitField<21, 1, u32> vs_out_misc_enable;
|
|
BitField<22, 1, u32> vs_out_ccdist0_enable;
|
|
BitField<23, 1, u32> vs_out_ccdist1_enable;
|
|
BitField<25, 1, u32> use_vtx_gs_cut_flag;
|
|
|
|
bool IsClipDistEnabled(u32 index) const {
|
|
return (clip_distance_enable.Value() >> index) & 1;
|
|
}
|
|
|
|
bool IsCullDistEnabled(u32 index) const {
|
|
return (cull_distance_enable.Value() >> index) & 1;
|
|
}
|
|
};
|
|
|
|
struct ModeControl {
|
|
s32 msaa_enable : 1;
|
|
s32 vport_scissor_enable : 1;
|
|
s32 line_stripple_enable : 1;
|
|
s32 send_unlit_stiles_to_pkr : 1;
|
|
};
|
|
|
|
enum class ZOrder : u32 {
|
|
LateZ = 0,
|
|
EarlyZLateZ = 1,
|
|
ReZ = 2,
|
|
EarlyZReZ = 3,
|
|
};
|
|
|
|
enum class ConservativeDepth : u32 {
|
|
Any = 0,
|
|
LessThanZ = 1,
|
|
GreaterThanZ = 2,
|
|
};
|
|
|
|
union DepthBufferControl {
|
|
u32 raw;
|
|
BitField<0, 1, u32> z_export_enable;
|
|
BitField<1, 1, u32> stencil_test_val_export_enable;
|
|
BitField<2, 1, u32> stencil_op_val_export_enable;
|
|
BitField<4, 2, ZOrder> z_order;
|
|
BitField<6, 1, u32> kill_enable;
|
|
BitField<7, 1, u32> coverage_to_mask_enable;
|
|
BitField<8, 1, u32> mask_export_enable;
|
|
BitField<9, 1, u32> exec_on_hier_fail;
|
|
BitField<10, 1, u32> exec_on_noop;
|
|
BitField<11, 1, u32> alpha_to_mask_disable;
|
|
BitField<12, 1, u32> depth_before_shader;
|
|
BitField<13, 2, ConservativeDepth> conservative_z_export;
|
|
};
|
|
|
|
enum class CompareFunc : u32 {
|
|
Never = 0,
|
|
Less = 1,
|
|
Equal = 2,
|
|
LessEqual = 3,
|
|
Greater = 4,
|
|
NotEqual = 5,
|
|
GreaterEqual = 6,
|
|
Always = 7,
|
|
};
|
|
|
|
union DepthControl {
|
|
u32 raw;
|
|
BitField<0, 1, u32> stencil_enable;
|
|
BitField<1, 1, u32> depth_enable;
|
|
BitField<2, 1, u32> depth_write_enable;
|
|
BitField<3, 1, u32> depth_bounds_enable;
|
|
BitField<4, 3, CompareFunc> depth_func;
|
|
BitField<7, 1, u32> backface_enable;
|
|
BitField<8, 3, CompareFunc> stencil_ref_func;
|
|
BitField<20, 3, CompareFunc> stencil_bf_func;
|
|
BitField<30, 1, u32> enable_color_writes_on_depth_fail;
|
|
BitField<31, 1, u32> disable_color_writes_on_depth_pass;
|
|
};
|
|
|
|
enum class StencilFunc : u32 {
|
|
Keep = 0,
|
|
Zero = 1,
|
|
Ones = 2,
|
|
ReplaceTest = 3,
|
|
ReplaceOp = 4,
|
|
AddClamp = 5,
|
|
SubClamp = 6,
|
|
Invert = 7,
|
|
AddWrap = 8,
|
|
SubWrap = 9,
|
|
And = 10,
|
|
Or = 11,
|
|
Xor = 12,
|
|
Nand = 13,
|
|
Nor = 14,
|
|
Xnor = 15,
|
|
};
|
|
|
|
union StencilControl {
|
|
u32 raw;
|
|
BitField<0, 4, StencilFunc> stencil_fail_front;
|
|
BitField<4, 4, StencilFunc> stencil_zpass_front;
|
|
BitField<8, 4, StencilFunc> stencil_zfail_front;
|
|
BitField<12, 4, StencilFunc> stencil_fail_back;
|
|
BitField<16, 4, StencilFunc> stencil_zpass_back;
|
|
BitField<20, 4, StencilFunc> stencil_zfail_back;
|
|
};
|
|
|
|
union StencilRefMask {
|
|
u32 raw;
|
|
BitField<0, 8, u32> stencil_test_val;
|
|
BitField<8, 8, u32> stencil_mask;
|
|
BitField<16, 8, u32> stencil_write_mask;
|
|
BitField<24, 8, u32> stencil_op_val;
|
|
};
|
|
|
|
struct DepthBuffer {
|
|
enum class ZFormat : u32 {
|
|
Invalid = 0,
|
|
Z16 = 1,
|
|
Z32Float = 3,
|
|
};
|
|
|
|
enum class StencilFormat : u32 {
|
|
Invalid = 0,
|
|
Stencil8 = 1,
|
|
};
|
|
|
|
union ZInfo {
|
|
BitField<0, 2, ZFormat> format;
|
|
BitField<2, 2, u32> num_samples;
|
|
BitField<13, 3, u32> tile_split;
|
|
BitField<20, 3, u32> tile_mode_index;
|
|
BitField<23, 4, u32> decompress_on_n_zplanes;
|
|
BitField<27, 1, u32> allow_expclear;
|
|
BitField<28, 1, u32> read_size;
|
|
BitField<29, 1, u32> tile_surface_en;
|
|
BitField<30, 1, u32> clear_disallowed;
|
|
BitField<31, 1, u32> zrange_precision;
|
|
} z_info;
|
|
union {
|
|
BitField<0, 1, StencilFormat> format;
|
|
} stencil_info;
|
|
u32 z_read_base;
|
|
u32 stencil_read_base;
|
|
u32 z_write_base;
|
|
u32 stencil_write_base;
|
|
union {
|
|
BitField<0, 11, u32> pitch_tile_max;
|
|
BitField<11, 11, u32> height_tile_max;
|
|
} depth_size;
|
|
union {
|
|
BitField<0, 22, u32> tile_max;
|
|
} depth_slice;
|
|
|
|
bool DepthValid() const {
|
|
return DepthAddress() != 0 && z_info.format != ZFormat::Invalid;
|
|
}
|
|
|
|
bool StencilValid() const {
|
|
return StencilAddress() != 0 && stencil_info.format != StencilFormat::Invalid;
|
|
}
|
|
|
|
bool DepthWriteValid() const {
|
|
return DepthWriteAddress() != 0 && z_info.format != ZFormat::Invalid;
|
|
}
|
|
|
|
bool StencilWriteValid() const {
|
|
return StencilWriteAddress() != 0 && stencil_info.format != StencilFormat::Invalid;
|
|
}
|
|
|
|
u32 Pitch() const {
|
|
return (depth_size.pitch_tile_max + 1) << 3;
|
|
}
|
|
|
|
u32 Height() const {
|
|
return (depth_size.height_tile_max + 1) << 3;
|
|
}
|
|
|
|
u64 DepthAddress() const {
|
|
return u64(z_read_base) << 8;
|
|
}
|
|
|
|
u64 StencilAddress() const {
|
|
return u64(stencil_read_base) << 8;
|
|
}
|
|
|
|
u64 DepthWriteAddress() const {
|
|
return u64(z_write_base) << 8;
|
|
}
|
|
|
|
u64 StencilWriteAddress() const {
|
|
return u64(stencil_write_base) << 8;
|
|
}
|
|
|
|
u32 NumSamples() const {
|
|
return 1u << z_info.num_samples; // spec doesn't say it is a log2
|
|
}
|
|
|
|
u32 NumBits() const {
|
|
return z_info.format == ZFormat::Z32Float ? 32 : 16;
|
|
}
|
|
|
|
size_t GetDepthSliceSize() const {
|
|
ASSERT(z_info.format != ZFormat::Invalid);
|
|
const auto bpe = NumBits() >> 3; // in bytes
|
|
return (depth_slice.tile_max + 1) * 64 * bpe * NumSamples();
|
|
}
|
|
};
|
|
|
|
enum class ClipSpace : u32 {
|
|
MinusWToW = 0,
|
|
ZeroToW = 1,
|
|
};
|
|
|
|
enum class PrimKillCond : u32 {
|
|
AllVtx = 0,
|
|
AnyVtx = 1,
|
|
};
|
|
|
|
union ClipperControl {
|
|
u32 raw;
|
|
BitField<0, 6, u32> user_clip_plane_enable;
|
|
BitField<16, 1, u32> clip_disable;
|
|
BitField<19, 1, ClipSpace> clip_space;
|
|
BitField<21, 1, PrimKillCond> vtx_kill_or;
|
|
BitField<22, 1, u32> dx_rasterization_kill;
|
|
BitField<23, 1, u32> dx_linear_attr_clip_enable;
|
|
BitField<26, 1, u32> zclip_near_disable;
|
|
BitField<26, 1, u32> zclip_far_disable;
|
|
};
|
|
|
|
enum class PolygonMode : u32 {
|
|
Point = 0,
|
|
Line = 1,
|
|
Fill = 2,
|
|
};
|
|
|
|
enum class ProvokingVtxLast : u32 {
|
|
First = 0,
|
|
Last = 1,
|
|
};
|
|
|
|
enum class CullMode : u32 {
|
|
None = 0,
|
|
Front = 1,
|
|
Back = 2,
|
|
FrontAndBack = 3,
|
|
};
|
|
|
|
enum class FrontFace : u32 {
|
|
CounterClockwise = 0,
|
|
Clockwise = 1,
|
|
};
|
|
|
|
union PolygonControl {
|
|
u32 raw;
|
|
BitField<0, 1, u32> cull_front;
|
|
BitField<1, 1, u32> cull_back;
|
|
BitField<2, 1, FrontFace> front_face;
|
|
BitField<3, 2, u32> enable_polygon_mode;
|
|
BitField<5, 3, PolygonMode> polygon_mode_front;
|
|
BitField<8, 3, PolygonMode> polygon_mode_back;
|
|
BitField<11, 1, u32> enable_polygon_offset_front;
|
|
BitField<12, 1, u32> enable_polygon_offset_back;
|
|
BitField<13, 1, u32> enable_polygon_offset_para;
|
|
BitField<16, 1, u32> enable_window_offset;
|
|
BitField<19, 1, ProvokingVtxLast> provoking_vtx_last;
|
|
BitField<20, 1, u32> persp_corr_dis;
|
|
BitField<21, 1, u32> multi_prim_ib_ena;
|
|
|
|
PolygonMode PolyMode() const {
|
|
return enable_polygon_mode ? polygon_mode_front.Value() : PolygonMode::Fill;
|
|
}
|
|
|
|
CullMode CullingMode() const {
|
|
return static_cast<CullMode>(cull_front | cull_back << 1);
|
|
}
|
|
|
|
bool NeedsBias() const {
|
|
return enable_polygon_offset_back || enable_polygon_offset_front ||
|
|
enable_polygon_offset_para;
|
|
}
|
|
};
|
|
|
|
union VsOutputConfig {
|
|
u32 raw;
|
|
BitField<1, 5, u32> export_count_min_one;
|
|
BitField<6, 1, u32> half_pack;
|
|
|
|
u32 NumExports() const {
|
|
return export_count_min_one.Value() + 1;
|
|
}
|
|
};
|
|
|
|
union ColorBufferMask {
|
|
enum ColorComponent : u32 {
|
|
ComponentR = (1u << 0),
|
|
ComponentG = (1u << 1),
|
|
ComponentB = (1u << 2),
|
|
ComponentA = (1u << 3),
|
|
};
|
|
|
|
u32 raw;
|
|
BitField<0, 4, u32> output0_mask;
|
|
BitField<4, 4, u32> output1_mask;
|
|
BitField<8, 4, u32> output2_mask;
|
|
BitField<12, 4, u32> output3_mask;
|
|
BitField<16, 4, u32> output4_mask;
|
|
BitField<20, 4, u32> output5_mask;
|
|
BitField<24, 4, u32> output6_mask;
|
|
BitField<28, 4, u32> output7_mask;
|
|
|
|
u32 GetMask(int buf_id) const {
|
|
return (raw >> (buf_id * 4)) & 0xfu;
|
|
}
|
|
|
|
void SetMask(int buf_id, u32 mask) {
|
|
raw &= ~(0xf << (buf_id * 4));
|
|
raw |= (mask << (buf_id * 4));
|
|
}
|
|
};
|
|
|
|
struct IndexBufferBase {
|
|
BitField<0, 8, u32> base_addr_hi;
|
|
u32 base_addr_lo;
|
|
|
|
template <typename T = VAddr>
|
|
T Address() const {
|
|
return std::bit_cast<T>((base_addr_lo & ~1U) | u64(base_addr_hi) << 32);
|
|
}
|
|
};
|
|
|
|
enum class IndexType : u32 {
|
|
Index16 = 0,
|
|
Index32 = 1,
|
|
};
|
|
|
|
enum class IndexSwapMode : u32 {
|
|
None = 0,
|
|
Swap16 = 1,
|
|
Swap32 = 2,
|
|
SwapWord = 3,
|
|
};
|
|
|
|
union IndexBufferType {
|
|
u32 raw;
|
|
BitField<0, 2, IndexType> index_type;
|
|
BitField<2, 2, IndexSwapMode> swap_mode;
|
|
};
|
|
|
|
union VgtNumInstances {
|
|
u32 num_instances;
|
|
|
|
u32 NumInstances() const {
|
|
return num_instances == 0 ? 1 : num_instances;
|
|
}
|
|
};
|
|
|
|
struct Scissor {
|
|
struct {
|
|
s16 top_left_x;
|
|
s16 top_left_y;
|
|
};
|
|
struct {
|
|
s16 bottom_right_x;
|
|
s16 bottom_right_y;
|
|
};
|
|
|
|
// From AMD spec: 'Negative numbers clamped to 0'
|
|
static s16 Clamp(s16 value) {
|
|
return std::max(s16(0), value);
|
|
}
|
|
|
|
u32 GetWidth() const {
|
|
return static_cast<u32>(Clamp(bottom_right_x) - Clamp(top_left_x));
|
|
}
|
|
|
|
u32 GetHeight() const {
|
|
return static_cast<u32>(Clamp(bottom_right_y) - Clamp(top_left_y));
|
|
}
|
|
};
|
|
|
|
struct WindowOffset {
|
|
s32 window_x_offset : 16;
|
|
s32 window_y_offset : 16;
|
|
};
|
|
|
|
struct ViewportScissor {
|
|
union {
|
|
BitField<0, 15, s32> top_left_x;
|
|
BitField<16, 15, s32> top_left_y;
|
|
BitField<31, 1, s32> window_offset_disable;
|
|
};
|
|
struct {
|
|
s16 bottom_right_x;
|
|
s16 bottom_right_y;
|
|
};
|
|
|
|
u32 GetWidth() const {
|
|
return bottom_right_x - top_left_x;
|
|
}
|
|
|
|
u32 GetHeight() const {
|
|
return bottom_right_y - top_left_y;
|
|
}
|
|
};
|
|
|
|
struct ViewportDepth {
|
|
float zmin;
|
|
float zmax;
|
|
};
|
|
|
|
struct ViewportBounds {
|
|
float xscale;
|
|
float xoffset;
|
|
float yscale;
|
|
float yoffset;
|
|
float zscale;
|
|
float zoffset;
|
|
};
|
|
|
|
union ViewportControl {
|
|
BitField<0, 1, u32> xscale_enable;
|
|
BitField<1, 1, u32> xoffset_enable;
|
|
BitField<2, 1, u32> yscale_enable;
|
|
BitField<3, 1, u32> yoffset_enable;
|
|
BitField<4, 1, u32> zscale_enable;
|
|
BitField<5, 1, u32> zoffset_enable;
|
|
BitField<8, 1, u32> xy_transformed;
|
|
BitField<9, 1, u32> z_transformed;
|
|
BitField<10, 1, u32> w_transformed;
|
|
BitField<11, 1, u32> perfcounter_ref;
|
|
};
|
|
|
|
struct ClipUserData {
|
|
u32 data_x;
|
|
u32 data_y;
|
|
u32 data_z;
|
|
u32 data_w;
|
|
};
|
|
|
|
struct BlendConstants {
|
|
float red;
|
|
float green;
|
|
float blue;
|
|
float alpha;
|
|
};
|
|
|
|
union BlendControl {
|
|
enum class BlendFactor : u32 {
|
|
Zero = 0,
|
|
One = 1,
|
|
SrcColor = 2,
|
|
OneMinusSrcColor = 3,
|
|
SrcAlpha = 4,
|
|
OneMinusSrcAlpha = 5,
|
|
DstAlpha = 6,
|
|
OneMinusDstAlpha = 7,
|
|
DstColor = 8,
|
|
OneMinusDstColor = 9,
|
|
SrcAlphaSaturate = 10,
|
|
ConstantColor = 13,
|
|
OneMinusConstantColor = 14,
|
|
Src1Color = 15,
|
|
InvSrc1Color = 16,
|
|
Src1Alpha = 17,
|
|
InvSrc1Alpha = 18,
|
|
ConstantAlpha = 19,
|
|
OneMinusConstantAlpha = 20,
|
|
};
|
|
|
|
enum class BlendFunc : u32 {
|
|
Add = 0,
|
|
Subtract = 1,
|
|
Min = 2,
|
|
Max = 3,
|
|
ReverseSubtract = 4,
|
|
};
|
|
|
|
BitField<0, 5, BlendFactor> color_src_factor;
|
|
BitField<5, 3, BlendFunc> color_func;
|
|
BitField<8, 5, BlendFactor> color_dst_factor;
|
|
BitField<16, 5, BlendFactor> alpha_src_factor;
|
|
BitField<21, 3, BlendFunc> alpha_func;
|
|
BitField<24, 5, BlendFactor> alpha_dst_factor;
|
|
BitField<29, 1, u32> separate_alpha_blend;
|
|
BitField<30, 1, u32> enable;
|
|
BitField<31, 1, u32> disable_rop3;
|
|
};
|
|
|
|
union ColorControl {
|
|
enum class OperationMode : u32 {
|
|
Disable = 0u,
|
|
Normal = 1u,
|
|
EliminateFastClear = 2u,
|
|
Resolve = 3u,
|
|
Err = 4u,
|
|
FmaskDecompress = 5u,
|
|
};
|
|
|
|
BitField<0, 1, u32> disable_dual_quad;
|
|
BitField<3, 1, u32> degamma_enable;
|
|
BitField<4, 3, OperationMode> mode;
|
|
BitField<16, 8, u32> rop3;
|
|
};
|
|
|
|
struct ColorBuffer {
|
|
enum class EndianSwap : u32 {
|
|
None = 0,
|
|
Swap8In16 = 1,
|
|
Swap8In32 = 2,
|
|
Swap8In64 = 3,
|
|
};
|
|
|
|
enum class SwapMode : u32 {
|
|
Standard = 0,
|
|
Alternate = 1,
|
|
StandardReverse = 2,
|
|
AlternateReverse = 3,
|
|
};
|
|
|
|
enum class RoundMode : u32 {
|
|
ByHalf = 0,
|
|
Truncate = 1,
|
|
};
|
|
|
|
u32 base_address;
|
|
union {
|
|
BitField<0, 11, u32> tile_max;
|
|
BitField<20, 11, u32> fmask_tile_max;
|
|
} pitch;
|
|
union {
|
|
BitField<0, 22, u32> tile_max;
|
|
} slice;
|
|
union {
|
|
BitField<0, 11, u32> slice_start;
|
|
BitField<13, 11, u32> slice_max;
|
|
} view;
|
|
union Color0Info {
|
|
BitField<0, 2, EndianSwap> endian;
|
|
BitField<2, 5, DataFormat> format;
|
|
BitField<7, 1, u32> linear_general;
|
|
BitField<8, 3, NumberFormat> number_type;
|
|
BitField<11, 2, SwapMode> comp_swap;
|
|
BitField<13, 1, u32> fast_clear;
|
|
BitField<14, 1, u32> compression;
|
|
BitField<15, 1, u32> blend_clamp;
|
|
BitField<16, 1, u32> blend_bypass;
|
|
BitField<17, 1, u32> simple_float;
|
|
BitField<18, 1, RoundMode> round_mode;
|
|
BitField<19, 1, u32> cmask_is_linear;
|
|
BitField<20, 3, u32> blend_opt_dont_rd_dst;
|
|
BitField<23, 3, u32> blend_opt_discard_pixel;
|
|
BitField<26, 1, u32> fmask_compression_disable_ci;
|
|
BitField<27, 1, u32> fmask_compress_1frag_only;
|
|
BitField<28, 1, u32> dcc_enable;
|
|
BitField<29, 2, u32> cmask_addr_type;
|
|
/// Neo-mode only
|
|
BitField<31, 1, u32> alt_tile_mode;
|
|
|
|
u32 u32all;
|
|
} info;
|
|
union Color0Attrib {
|
|
BitField<0, 5, TilingMode> tile_mode_index;
|
|
BitField<5, 5, u32> fmask_tile_mode_index;
|
|
BitField<10, 2, u32> fmask_bank_height;
|
|
BitField<12, 3, u32> num_samples_log2;
|
|
BitField<15, 2, u32> num_fragments_log2;
|
|
BitField<17, 1, u32> force_dst_alpha_1;
|
|
|
|
u32 u32all;
|
|
} attrib;
|
|
INSERT_PADDING_WORDS(1);
|
|
u32 cmask_base_address;
|
|
union {
|
|
BitField<0, 14, u32> tile_max;
|
|
} cmask_slice;
|
|
u32 fmask_base_address;
|
|
union {
|
|
BitField<0, 14, u32> tile_max;
|
|
} fmask_slice;
|
|
u32 clear_word0;
|
|
u32 clear_word1;
|
|
INSERT_PADDING_WORDS(2);
|
|
|
|
operator bool() const {
|
|
return info.format != DataFormat::FormatInvalid;
|
|
}
|
|
|
|
u32 Pitch() const {
|
|
return (pitch.tile_max + 1) << 3;
|
|
}
|
|
|
|
u32 Height() const {
|
|
return (slice.tile_max + 1) * 64 / Pitch();
|
|
}
|
|
|
|
u64 Address() const {
|
|
return u64(base_address) << 8;
|
|
}
|
|
|
|
VAddr CmaskAddress() const {
|
|
return VAddr(cmask_base_address) << 8;
|
|
}
|
|
|
|
VAddr FmaskAddress() const {
|
|
return VAddr(fmask_base_address) << 8;
|
|
}
|
|
|
|
u32 NumSamples() const {
|
|
return 1 << attrib.num_fragments_log2;
|
|
}
|
|
|
|
u32 NumSlices() const {
|
|
return view.slice_max + 1;
|
|
}
|
|
|
|
size_t GetColorSliceSize() const {
|
|
const auto num_bytes_per_element = NumBits(info.format) / 8u;
|
|
const auto slice_size =
|
|
num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples();
|
|
return slice_size;
|
|
}
|
|
|
|
TilingMode GetTilingMode() const {
|
|
return info.linear_general ? TilingMode::Display_Linear
|
|
: attrib.tile_mode_index.Value();
|
|
}
|
|
|
|
bool IsTiled() const {
|
|
return !info.linear_general;
|
|
}
|
|
|
|
[[nodiscard]] DataFormat GetDataFmt() const {
|
|
return RemapDataFormat(info.format);
|
|
}
|
|
|
|
[[nodiscard]] NumberFormat GetNumberFmt() const {
|
|
// There is a small difference between T# and CB number types, account for it.
|
|
return RemapNumberFormat(info.number_type == NumberFormat::SnormNz
|
|
? NumberFormat::Srgb
|
|
: info.number_type.Value(),
|
|
info.format);
|
|
}
|
|
|
|
[[nodiscard]] NumberConversion GetNumberConversion() const {
|
|
return MapNumberConversion(info.number_type);
|
|
}
|
|
|
|
[[nodiscard]] CompMapping Swizzle() const {
|
|
// clang-format off
|
|
static constexpr std::array<std::array<CompMapping, 4>, 4> mrt_swizzles{{
|
|
// Standard
|
|
std::array<CompMapping, 4>{{
|
|
{.r = CompSwizzle::Red, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
|
|
{.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
|
|
{.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Zero},
|
|
{.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Blue, .a = CompSwizzle::Alpha},
|
|
}},
|
|
// Alternate
|
|
std::array<CompMapping, 4>{{
|
|
{.r = CompSwizzle::Green, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
|
|
{.r = CompSwizzle::Red, .g = CompSwizzle::Alpha, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
|
|
{.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Alpha, .a = CompSwizzle::Zero},
|
|
{.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Alpha},
|
|
}},
|
|
// StandardReverse
|
|
std::array<CompMapping, 4>{{
|
|
{.r = CompSwizzle::Blue, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
|
|
{.r = CompSwizzle::Green, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
|
|
{.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero},
|
|
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Blue, .b = CompSwizzle::Green, .a = CompSwizzle::Red},
|
|
}},
|
|
// AlternateReverse
|
|
std::array<CompMapping, 4>{{
|
|
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
|
|
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero},
|
|
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero},
|
|
{.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Green, .a = CompSwizzle::Blue},
|
|
}},
|
|
}};
|
|
// clang-format on
|
|
const auto swap_idx = static_cast<u32>(info.comp_swap.Value());
|
|
const auto components_idx = NumComponents(info.format) - 1;
|
|
const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx];
|
|
return RemapSwizzle(info.format, mrt_swizzle);
|
|
}
|
|
};
|
|
|
|
enum ContextRegs : u32 {
|
|
DbZInfo = 0xA010,
|
|
CbColor0Base = 0xA318,
|
|
CbColor1Base = 0xA327,
|
|
CbColor2Base = 0xA336,
|
|
CbColor3Base = 0xA345,
|
|
CbColor4Base = 0xA354,
|
|
CbColor5Base = 0xA363,
|
|
CbColor6Base = 0xA372,
|
|
CbColor7Base = 0xA381,
|
|
CbColor0Cmask = 0xA31F,
|
|
CbColor1Cmask = 0xA32E,
|
|
CbColor2Cmask = 0xA33D,
|
|
CbColor3Cmask = 0xA34C,
|
|
CbColor4Cmask = 0xA35B,
|
|
CbColor5Cmask = 0xA36A,
|
|
CbColor6Cmask = 0xA379,
|
|
CbColor7Cmask = 0xA388,
|
|
};
|
|
|
|
struct PolygonOffset {
|
|
float depth_bias;
|
|
float front_scale;
|
|
float front_offset;
|
|
float back_scale;
|
|
float back_offset;
|
|
};
|
|
|
|
struct Address {
|
|
u32 address;
|
|
|
|
VAddr GetAddress() const {
|
|
return u64(address) << 8;
|
|
}
|
|
};
|
|
|
|
union DepthRenderControl {
|
|
u32 raw;
|
|
BitField<0, 1, u32> depth_clear_enable;
|
|
BitField<1, 1, u32> stencil_clear_enable;
|
|
BitField<2, 1, u32> depth_copy;
|
|
BitField<3, 1, u32> stencil_copy;
|
|
BitField<4, 1, u32> resummarize_enable;
|
|
BitField<5, 1, u32> stencil_compress_disable;
|
|
BitField<6, 1, u32> depth_compress_disable;
|
|
BitField<7, 1, u32> copy_centroid;
|
|
BitField<8, 1, u32> copy_sample;
|
|
BitField<9, 1, u32> decompress_enable;
|
|
};
|
|
|
|
union DepthView {
|
|
BitField<0, 11, u32> slice_start;
|
|
BitField<13, 11, u32> slice_max;
|
|
BitField<24, 1, u32> z_read_only;
|
|
BitField<25, 1, u32> stencil_read_only;
|
|
|
|
u32 NumSlices() const {
|
|
return slice_max + 1u;
|
|
}
|
|
};
|
|
|
|
enum class ForceEnable : u32 {
|
|
Off = 0,
|
|
Enable = 1,
|
|
Disable = 2,
|
|
};
|
|
|
|
enum class ForceSumm : u32 {
|
|
Off = 0,
|
|
MinZ = 1,
|
|
MaxZ = 2,
|
|
Both = 3,
|
|
};
|
|
|
|
union DepthRenderOverride {
|
|
u32 raw;
|
|
BitField<0, 2, ForceEnable> force_hiz_enable;
|
|
BitField<2, 2, ForceEnable> force_his_enable0;
|
|
BitField<4, 2, ForceEnable> force_his_enable1;
|
|
BitField<6, 1, u32> force_shader_z_order;
|
|
BitField<7, 1, u32> fast_z_disable;
|
|
BitField<8, 1, u32> fast_stencil_disable;
|
|
BitField<9, 1, u32> noop_cull_disable;
|
|
BitField<10, 1, u32> force_color_kill;
|
|
BitField<11, 1, u32> force_z_read;
|
|
BitField<12, 1, u32> force_stencil_read;
|
|
BitField<13, 2, ForceEnable> force_full_z_range;
|
|
BitField<15, 1, u32> force_qc_smask_conflict;
|
|
BitField<16, 1, u32> disable_viewport_clamp;
|
|
BitField<17, 1, u32> ignore_sc_zrange;
|
|
BitField<18, 1, u32> disable_fully_covered;
|
|
BitField<19, 2, ForceSumm> force_z_limit_summ;
|
|
BitField<21, 5, u32> max_tiles_in_dtt;
|
|
BitField<26, 1, u32> disable_tile_rate_tiles;
|
|
BitField<27, 1, u32> force_z_dirty;
|
|
BitField<28, 1, u32> force_stencil_dirty;
|
|
BitField<29, 1, u32> force_z_valid;
|
|
BitField<30, 1, u32> force_stencil_valid;
|
|
BitField<31, 1, u32> preserve_compression;
|
|
};
|
|
|
|
union AaConfig {
|
|
BitField<0, 3, u32> msaa_num_samples;
|
|
BitField<4, 1, u32> aa_mask_centroid_dtmn;
|
|
BitField<13, 4, u32> max_sample_dst;
|
|
BitField<20, 3, u32> msaa_exposed_samples;
|
|
BitField<24, 2, u32> detail_to_exposed_mode;
|
|
|
|
u32 NumSamples() const {
|
|
return 1 << msaa_num_samples;
|
|
}
|
|
};
|
|
|
|
union ShaderStageEnable {
|
|
enum VgtStages : u32 {
|
|
Vs = 0u, // always enabled
|
|
EsGs = 0xB0u,
|
|
LsHs = 0x45u,
|
|
};
|
|
|
|
VgtStages raw;
|
|
BitField<0, 2, u32> ls_en;
|
|
BitField<2, 1, u32> hs_en;
|
|
BitField<3, 2, u32> es_en;
|
|
BitField<5, 1, u32> gs_en;
|
|
BitField<6, 2, u32> vs_en;
|
|
BitField<8, 1, u32> dynamic_hs;
|
|
|
|
bool IsStageEnabled(u32 stage) const {
|
|
switch (stage) {
|
|
case 0:
|
|
case 1:
|
|
return true;
|
|
case 2:
|
|
return gs_en.Value();
|
|
case 3:
|
|
return es_en.Value();
|
|
case 4:
|
|
return hs_en.Value();
|
|
case 5:
|
|
return ls_en.Value();
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
};
|
|
|
|
union GsInstances {
|
|
u32 raw;
|
|
struct {
|
|
u32 enable : 2;
|
|
u32 count : 6;
|
|
};
|
|
|
|
bool IsEnabled() const {
|
|
return enable && count > 0;
|
|
}
|
|
};
|
|
|
|
union GsOutPrimitiveType {
|
|
u32 raw;
|
|
struct {
|
|
GsOutputPrimitiveType outprim_type : 6;
|
|
GsOutputPrimitiveType outprim_type1 : 6;
|
|
GsOutputPrimitiveType outprim_type2 : 6;
|
|
GsOutputPrimitiveType outprim_type3 : 6;
|
|
u32 reserved : 3;
|
|
u32 unique_type_per_stream : 1;
|
|
};
|
|
|
|
GsOutputPrimitiveType GetPrimitiveType(u32 stream) const {
|
|
if (unique_type_per_stream == 0) {
|
|
return outprim_type;
|
|
}
|
|
|
|
switch (stream) {
|
|
case 0:
|
|
return outprim_type;
|
|
case 1:
|
|
return outprim_type1;
|
|
case 2:
|
|
return outprim_type2;
|
|
case 3:
|
|
return outprim_type3;
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
};
|
|
|
|
union GsMode {
|
|
u32 raw;
|
|
BitField<0, 3, u32> mode;
|
|
BitField<3, 2, u32> cut_mode;
|
|
BitField<22, 2, u32> onchip;
|
|
};
|
|
|
|
union StreamOutConfig {
|
|
u32 raw;
|
|
struct {
|
|
u32 streamout_0_en : 1;
|
|
u32 streamout_1_en : 1;
|
|
u32 streamout_2_en : 1;
|
|
u32 streamout_3_en : 1;
|
|
u32 rast_stream : 3;
|
|
u32 : 1;
|
|
u32 rast_stream_mask : 4;
|
|
u32 : 19;
|
|
u32 use_rast_stream_mask : 1;
|
|
};
|
|
};
|
|
|
|
union StreamOutBufferConfig {
|
|
u32 raw;
|
|
struct {
|
|
u32 stream_0_buf_en : 4;
|
|
u32 stream_1_buf_en : 4;
|
|
u32 stream_2_buf_en : 4;
|
|
u32 stream_3_buf_en : 4;
|
|
};
|
|
};
|
|
|
|
union LsHsConfig {
|
|
u32 raw;
|
|
BitField<0, 8, u32> num_patches;
|
|
BitField<8, 6, u32> hs_input_control_points;
|
|
BitField<14, 6, u32> hs_output_control_points;
|
|
};
|
|
|
|
union TessellationConfig {
|
|
u32 raw;
|
|
BitField<0, 2, TessellationType> type;
|
|
BitField<2, 3, TessellationPartitioning> partitioning;
|
|
BitField<5, 3, TessellationTopology> topology;
|
|
};
|
|
|
|
union TessFactorMemoryBase {
|
|
u32 base;
|
|
|
|
u64 MemoryBase() const {
|
|
return static_cast<u64>(base) << 8;
|
|
}
|
|
};
|
|
|
|
union Eqaa {
|
|
u32 raw;
|
|
BitField<0, 1, u32> max_anchor_samples;
|
|
BitField<4, 3, u32> ps_iter_samples;
|
|
BitField<8, 3, u32> mask_export_num_samples;
|
|
BitField<12, 3, u32> alpha_to_mask_num_samples;
|
|
BitField<16, 1, u32> high_quality_intersections;
|
|
BitField<17, 1, u32> incoherent_eqaa_reads;
|
|
BitField<18, 1, u32> interpolate_comp_z;
|
|
BitField<19, 1, u32> interpolate_src_z;
|
|
BitField<20, 1, u32> static_anchor_associations;
|
|
BitField<21, 1, u32> alpha_to_mask_eqaa_disable;
|
|
BitField<24, 3, u32> overrasterization_amount;
|
|
BitField<27, 1, u32> enable_postz_overrasterization;
|
|
};
|
|
|
|
union PsInput {
|
|
u32 raw;
|
|
struct {
|
|
u32 persp_sample_ena : 1;
|
|
u32 persp_center_ena : 1;
|
|
u32 persp_centroid_ena : 1;
|
|
u32 persp_pull_model_ena : 1;
|
|
u32 linear_sample_ena : 1;
|
|
u32 linear_center_ena : 1;
|
|
u32 linear_centroid_ena : 1;
|
|
u32 line_stipple_tex_ena : 1;
|
|
u32 pos_x_float_ena : 1;
|
|
u32 pos_y_float_ena : 1;
|
|
u32 pos_z_float_ena : 1;
|
|
u32 pos_w_float_ena : 1;
|
|
u32 front_face_ena : 1;
|
|
u32 ancillary_ena : 1;
|
|
u32 sample_coverage_ena : 1;
|
|
u32 pos_fixed_pt_ena : 1;
|
|
};
|
|
};
|
|
|
|
union Regs {
|
|
struct {
|
|
INSERT_PADDING_WORDS(0x2C08);
|
|
ShaderProgram ps_program;
|
|
INSERT_PADDING_WORDS(0x2C);
|
|
ShaderProgram vs_program;
|
|
INSERT_PADDING_WORDS(0x2C);
|
|
ShaderProgram gs_program;
|
|
INSERT_PADDING_WORDS(0x2C);
|
|
ShaderProgram es_program;
|
|
INSERT_PADDING_WORDS(0x2C);
|
|
ShaderProgram hs_program;
|
|
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20);
|
|
ShaderProgram ls_program;
|
|
INSERT_PADDING_WORDS(0xA4);
|
|
ComputeProgram cs_program; // shadowed by `cs_state` in `mapped_queues`
|
|
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
|
|
DepthRenderControl depth_render_control;
|
|
INSERT_PADDING_WORDS(1);
|
|
DepthView depth_view;
|
|
DepthRenderOverride depth_render_override;
|
|
INSERT_PADDING_WORDS(1);
|
|
Address depth_htile_data_base;
|
|
INSERT_PADDING_WORDS(2);
|
|
float depth_bounds_min;
|
|
float depth_bounds_max;
|
|
u32 stencil_clear;
|
|
float depth_clear;
|
|
Scissor screen_scissor;
|
|
INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2);
|
|
DepthBuffer depth_buffer;
|
|
INSERT_PADDING_WORDS(0xA080 - 0xA018);
|
|
WindowOffset window_offset;
|
|
ViewportScissor window_scissor;
|
|
INSERT_PADDING_WORDS(0xA08E - 0xA081 - 2);
|
|
ColorBufferMask color_target_mask;
|
|
ColorBufferMask color_shader_mask;
|
|
ViewportScissor generic_scissor;
|
|
INSERT_PADDING_WORDS(2);
|
|
std::array<ViewportScissor, NumViewports> viewport_scissors;
|
|
std::array<ViewportDepth, NumViewports> viewport_depths;
|
|
INSERT_PADDING_WORDS(0xA102 - 0xA0D4);
|
|
u32 index_offset;
|
|
u32 primitive_restart_index;
|
|
INSERT_PADDING_WORDS(1);
|
|
BlendConstants blend_constants;
|
|
INSERT_PADDING_WORDS(0xA10B - 0xA105 - 4);
|
|
StencilControl stencil_control;
|
|
StencilRefMask stencil_ref_front;
|
|
StencilRefMask stencil_ref_back;
|
|
INSERT_PADDING_WORDS(1);
|
|
std::array<ViewportBounds, NumViewports> viewports;
|
|
std::array<ClipUserData, NumClipPlanes> clip_user_data;
|
|
INSERT_PADDING_WORDS(0xA191 - 0xA187);
|
|
std::array<PsInputControl, 32> ps_inputs;
|
|
VsOutputConfig vs_output_config;
|
|
INSERT_PADDING_WORDS(1);
|
|
PsInput ps_input_ena;
|
|
PsInput ps_input_addr;
|
|
INSERT_PADDING_WORDS(1);
|
|
BitField<0, 6, u32> num_interp;
|
|
INSERT_PADDING_WORDS(0xA1C3 - 0xA1B6 - 1);
|
|
ShaderPosFormat shader_pos_format;
|
|
ShaderExportFormat z_export_format;
|
|
ColorExportFormat color_export_format;
|
|
INSERT_PADDING_WORDS(0xA1E0 - 0xA1C3 - 3);
|
|
std::array<BlendControl, NumColorBuffers> blend_control;
|
|
INSERT_PADDING_WORDS(0xA1F9 - 0xA1E0 - 8);
|
|
IndexBufferBase index_base_address;
|
|
INSERT_PADDING_WORDS(1);
|
|
u32 draw_initiator;
|
|
INSERT_PADDING_WORDS(0xA200 - 0xA1F9 - 4);
|
|
DepthControl depth_control;
|
|
INSERT_PADDING_WORDS(1);
|
|
ColorControl color_control;
|
|
DepthBufferControl depth_buffer_control;
|
|
ClipperControl clipper_control;
|
|
PolygonControl polygon_control;
|
|
ViewportControl viewport_control;
|
|
VsOutputControl vs_output_control;
|
|
INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1);
|
|
HsTessFactorClamp hs_clamp;
|
|
INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2);
|
|
GsMode vgt_gs_mode;
|
|
INSERT_PADDING_WORDS(1);
|
|
ModeControl mode_control;
|
|
INSERT_PADDING_WORDS(8);
|
|
GsOutPrimitiveType vgt_gs_out_prim_type;
|
|
INSERT_PADDING_WORDS(1);
|
|
u32 index_size;
|
|
u32 max_index_size;
|
|
IndexBufferType index_buffer_type;
|
|
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
|
|
u32 enable_primitive_id;
|
|
INSERT_PADDING_WORDS(3);
|
|
u32 enable_primitive_restart;
|
|
INSERT_PADDING_WORDS(0xA2A8 - 0xA2A5 - 1);
|
|
u32 vgt_instance_step_rate_0;
|
|
u32 vgt_instance_step_rate_1;
|
|
INSERT_PADDING_WORDS(0xA2AB - 0xA2A9 - 1);
|
|
u32 vgt_esgs_ring_itemsize;
|
|
u32 vgt_gsvs_ring_itemsize;
|
|
INSERT_PADDING_WORDS(0xA2CE - 0xA2AC - 1);
|
|
BitField<0, 11, u32> vgt_gs_max_vert_out;
|
|
INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1);
|
|
ShaderStageEnable stage_enable;
|
|
LsHsConfig ls_hs_config;
|
|
u32 vgt_gs_vert_itemsize[4];
|
|
TessellationConfig tess_config;
|
|
INSERT_PADDING_WORDS(3);
|
|
PolygonOffset poly_offset;
|
|
GsInstances vgt_gs_instance_cnt;
|
|
StreamOutConfig vgt_strmout_config;
|
|
StreamOutBufferConfig vgt_strmout_buffer_config;
|
|
INSERT_PADDING_WORDS(0xA2F8 - 0xA2E6 - 1);
|
|
AaConfig aa_config;
|
|
INSERT_PADDING_WORDS(0xA318 - 0xA2F8 - 1);
|
|
ColorBuffer color_buffers[NumColorBuffers];
|
|
INSERT_PADDING_WORDS(0xC242 - 0xA390);
|
|
PrimitiveType primitive_type;
|
|
INSERT_PADDING_WORDS(0xC24C - 0xC243);
|
|
u32 num_indices;
|
|
VgtNumInstances num_instances;
|
|
INSERT_PADDING_WORDS(0xC250 - 0xC24D - 1);
|
|
TessFactorMemoryBase vgt_tf_memory_base;
|
|
};
|
|
std::array<u32, NumRegs> reg_array{};
|
|
|
|
const ShaderProgram* ProgramForStage(u32 index) const {
|
|
switch (index) {
|
|
case 0:
|
|
return &ps_program;
|
|
case 1:
|
|
return &vs_program;
|
|
case 2:
|
|
return &gs_program;
|
|
case 3:
|
|
return &es_program;
|
|
case 4:
|
|
return &hs_program;
|
|
case 5:
|
|
return &ls_program;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
u32 NumSamples() const {
|
|
// It seems that the number of samples > 1 set in the AA config doesn't mean we're
|
|
// always rendering with MSAA, so we need to derive MS ratio from the CB and DB
|
|
// settings.
|
|
u32 num_samples = 1u;
|
|
if (color_control.mode != ColorControl::OperationMode::Disable) {
|
|
for (auto cb = 0u; cb < NumColorBuffers; ++cb) {
|
|
const auto& col_buf = color_buffers[cb];
|
|
if (!col_buf) {
|
|
continue;
|
|
}
|
|
num_samples = std::max(num_samples, col_buf.NumSamples());
|
|
}
|
|
}
|
|
if (depth_buffer.DepthValid() || depth_buffer.StencilValid()) {
|
|
num_samples = std::max(num_samples, depth_buffer.NumSamples());
|
|
}
|
|
return num_samples;
|
|
}
|
|
|
|
void SetDefaults();
|
|
};
|
|
|
|
Regs regs{};
|
|
|
|
// See for a comment in context reg parsing code
|
|
union CbDbExtent {
|
|
struct {
|
|
u16 width;
|
|
u16 height;
|
|
};
|
|
u32 raw{0u};
|
|
|
|
[[nodiscard]] bool Valid() const {
|
|
return raw != 0;
|
|
}
|
|
};
|
|
std::array<CbDbExtent, NumColorBuffers> last_cb_extent{};
|
|
CbDbExtent last_db_extent{};
|
|
|
|
public:
|
|
Liverpool();
|
|
~Liverpool();
|
|
|
|
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
|
|
void SubmitAsc(u32 gnm_vqid, std::span<const u32> acb);
|
|
|
|
void SubmitDone() noexcept {
|
|
std::scoped_lock lk{submit_mutex};
|
|
mapped_queues[GfxQueueId].ccb_buffer_offset = 0;
|
|
mapped_queues[GfxQueueId].dcb_buffer_offset = 0;
|
|
submit_done = true;
|
|
submit_cv.notify_one();
|
|
}
|
|
|
|
void WaitGpuIdle() noexcept {
|
|
std::unique_lock lk{submit_mutex};
|
|
submit_cv.wait(lk, [this] { return num_submits == 0; });
|
|
}
|
|
|
|
bool IsGpuIdle() const {
|
|
return num_submits == 0;
|
|
}
|
|
|
|
void SetVoPort(Libraries::VideoOut::VideoOutPort* port) {
|
|
vo_port = port;
|
|
}
|
|
|
|
void BindRasterizer(Vulkan::Rasterizer* rasterizer_) {
|
|
rasterizer = rasterizer_;
|
|
}
|
|
|
|
void SendCommand(Common::UniqueFunction<void>&& func) {
|
|
std::scoped_lock lk{submit_mutex};
|
|
command_queue.emplace(std::move(func));
|
|
++num_commands;
|
|
submit_cv.notify_one();
|
|
}
|
|
|
|
void reserveCopyBufferSpace() {
|
|
GpuQueue& gfx_queue = mapped_queues[GfxQueueId];
|
|
std::scoped_lock<std::mutex> lk(gfx_queue.m_access);
|
|
|
|
constexpr size_t GfxReservedSize = 2_MB >> 2;
|
|
gfx_queue.ccb_buffer.reserve(GfxReservedSize);
|
|
gfx_queue.dcb_buffer.reserve(GfxReservedSize);
|
|
}
|
|
|
|
inline ComputeProgram& GetCsRegs() {
|
|
return mapped_queues[curr_qid].cs_state;
|
|
}
|
|
|
|
struct AscQueueInfo {
|
|
VAddr map_addr;
|
|
u32* read_addr;
|
|
u32 ring_size_dw;
|
|
u32 pipe_id;
|
|
};
|
|
Common::SlotVector<AscQueueInfo> asc_queues{};
|
|
|
|
private:
|
|
struct Task {
|
|
struct promise_type {
|
|
auto get_return_object() {
|
|
Task task{};
|
|
task.handle = std::coroutine_handle<promise_type>::from_promise(*this);
|
|
return task;
|
|
}
|
|
static constexpr std::suspend_always initial_suspend() noexcept {
|
|
// We want the task to be suspended at start
|
|
return {};
|
|
}
|
|
static constexpr std::suspend_always final_suspend() noexcept {
|
|
return {};
|
|
}
|
|
void unhandled_exception() {
|
|
try {
|
|
std::rethrow_exception(std::current_exception());
|
|
} catch (const std::exception& e) {
|
|
UNREACHABLE_MSG("Unhandled exception: {}", e.what());
|
|
}
|
|
}
|
|
void return_void() {}
|
|
struct empty {};
|
|
std::suspend_always yield_value(empty&&) {
|
|
return {};
|
|
}
|
|
};
|
|
|
|
using Handle = std::coroutine_handle<promise_type>;
|
|
Handle handle;
|
|
};
|
|
|
|
std::pair<std::span<const u32>, std::span<const u32>> CopyCmdBuffers(std::span<const u32> dcb,
|
|
std::span<const u32> ccb);
|
|
Task ProcessGraphics(std::span<const u32> dcb, std::span<const u32> ccb);
|
|
Task ProcessCeUpdate(std::span<const u32> ccb);
|
|
template <bool is_indirect = false>
|
|
Task ProcessCompute(std::span<const u32> acb, u32 vqid);
|
|
|
|
void Process(std::stop_token stoken);
|
|
|
|
struct GpuQueue {
|
|
std::mutex m_access{};
|
|
std::atomic<u32> dcb_buffer_offset;
|
|
std::atomic<u32> ccb_buffer_offset;
|
|
std::vector<u32> dcb_buffer;
|
|
std::vector<u32> ccb_buffer;
|
|
std::queue<Task::Handle> submits{};
|
|
ComputeProgram cs_state{};
|
|
};
|
|
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
|
u32 num_mapped_queues{1u}; // GFX is always available
|
|
|
|
VAddr indirect_args_addr{};
|
|
|
|
struct ConstantEngine {
|
|
void Reset() {
|
|
ce_count = 0;
|
|
de_count = 0;
|
|
ce_compare_count = 0;
|
|
}
|
|
|
|
[[nodiscard]] u32 Diff() const {
|
|
ASSERT_MSG(ce_count >= de_count, "DE counter is ahead of CE");
|
|
return ce_count - de_count;
|
|
}
|
|
|
|
u32 ce_compare_count{};
|
|
u32 ce_count{};
|
|
u32 de_count{};
|
|
static std::array<u8, 48_KB> constants_heap;
|
|
} cblock{};
|
|
|
|
Vulkan::Rasterizer* rasterizer{};
|
|
Libraries::VideoOut::VideoOutPort* vo_port{};
|
|
std::jthread process_thread{};
|
|
std::atomic<u32> num_submits{};
|
|
std::atomic<u32> num_commands{};
|
|
std::atomic<bool> submit_done{};
|
|
std::mutex submit_mutex;
|
|
std::condition_variable_any submit_cv;
|
|
std::queue<Common::UniqueFunction<void>> command_queue{};
|
|
int curr_qid{-1};
|
|
};
|
|
|
|
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
|
|
static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
|
|
static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
|
|
static_assert(GFX6_3D_REG_INDEX(gs_program) == 0x2C88);
|
|
static_assert(GFX6_3D_REG_INDEX(es_program) == 0x2CC8);
|
|
static_assert(GFX6_3D_REG_INDEX(hs_program) == 0x2D08);
|
|
static_assert(GFX6_3D_REG_INDEX(ls_program) == 0x2D48);
|
|
static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00);
|
|
static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03);
|
|
static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C);
|
|
static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
|
|
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
|
|
static_assert(GFX6_3D_REG_INDEX(depth_view) == 0xA002);
|
|
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
|
|
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
|
|
static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010);
|
|
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
|
|
static_assert(GFX6_3D_REG_INDEX(window_offset) == 0xA080);
|
|
static_assert(GFX6_3D_REG_INDEX(window_scissor) == 0xA081);
|
|
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
|
|
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
|
|
static_assert(GFX6_3D_REG_INDEX(generic_scissor) == 0xA090);
|
|
static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);
|
|
static_assert(GFX6_3D_REG_INDEX(index_offset) == 0xA102);
|
|
static_assert(GFX6_3D_REG_INDEX(primitive_restart_index) == 0xA103);
|
|
static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B);
|
|
static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F);
|
|
static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F);
|
|
static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191);
|
|
static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1);
|
|
static_assert(GFX6_3D_REG_INDEX(ps_input_ena) == 0xA1B3);
|
|
static_assert(GFX6_3D_REG_INDEX(ps_input_addr) == 0xA1B4);
|
|
static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6);
|
|
static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3);
|
|
static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4);
|
|
static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);
|
|
static_assert(GFX6_3D_REG_INDEX(blend_control) == 0xA1E0);
|
|
static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9);
|
|
static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC);
|
|
static_assert(GFX6_3D_REG_INDEX(depth_control) == 0xA200);
|
|
static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
|
|
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
|
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
|
|
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
|
static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287);
|
|
static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
|
|
static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);
|
|
static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B);
|
|
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
|
|
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
|
|
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
|
|
static_assert(GFX6_3D_REG_INDEX(enable_primitive_restart) == 0xA2A5);
|
|
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8);
|
|
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9);
|
|
static_assert(GFX6_3D_REG_INDEX(vgt_esgs_ring_itemsize) == 0xA2AB);
|
|
static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC);
|
|
static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE);
|
|
static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5);
|
|
static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7);
|
|
static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB);
|
|
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
|
static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4);
|
|
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5);
|
|
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_buffer_config) == 0xA2E6);
|
|
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
|
|
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
|
|
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319);
|
|
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
|
|
static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
|
|
static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
|
|
static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
|
|
static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250);
|
|
|
|
#undef GFX6_3D_REG_INDEX
|
|
|
|
} // namespace AmdGpu
|