video_core: Add basic command list processing (#117)

This commit is contained in:
TheTurtle 2024-04-30 01:23:28 +03:00 committed by GitHub
parent 2696733cad
commit b94efcba5a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 1560 additions and 163 deletions

View file

@ -0,0 +1,95 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "common/io_file.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/pm4_cmds.h"
namespace AmdGpu {
Liverpool::Liverpool() = default;
void Liverpool::ProcessCmdList(u32* cmdbuf, u32 size_in_bytes) {
auto* header = reinterpret_cast<PM4Header*>(cmdbuf);
u32 processed_cmd_size = 0;
while (processed_cmd_size < size_in_bytes) {
PM4Header* next_header{};
const u32 type = header->type;
switch (type) {
case 3: {
const PM4ItOpcode opcode = header->type3.opcode;
const u32 count = header->type3.NumWords();
switch (opcode) {
case PM4ItOpcode::Nop:
break;
case PM4ItOpcode::SetContextReg: {
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[ContextRegWordOffset + set_data->regOffset], header + 2,
(count - 1) * sizeof(u32));
break;
}
case PM4ItOpcode::SetShReg: {
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[ShRegWordOffset + set_data->regOffset], header + 2,
(count - 1) * sizeof(u32));
break;
}
case PM4ItOpcode::SetUconfigReg: {
auto* set_data = reinterpret_cast<PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[UconfigRegWordOffset + set_data->regOffset], header + 2,
(count - 1) * sizeof(u32));
break;
}
case PM4ItOpcode::IndexType: {
auto* index_type = reinterpret_cast<PM4CmdDrawIndexType*>(header);
regs.index_buffer_type.raw = index_type->raw;
break;
}
case PM4ItOpcode::DrawIndex2: {
auto* draw_index = reinterpret_cast<PM4CmdDrawIndex2*>(header);
regs.max_index_size = draw_index->maxSize;
regs.index_base_address.base_addr_lo = draw_index->indexBaseLo;
regs.index_base_address.base_addr_hi.Assign(draw_index->indexBaseHi);
regs.num_indices = draw_index->indexCount;
regs.draw_initiator = draw_index->drawInitiator;
// rasterizer->DrawIndex();
break;
}
case PM4ItOpcode::DrawIndexAuto: {
auto* draw_index = reinterpret_cast<PM4CmdDrawIndexAuto*>(header);
regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator;
// rasterizer->DrawIndex();
break;
}
case PM4ItOpcode::EventWriteEop: {
auto* event_write = reinterpret_cast<PM4CmdEventWriteEop*>(header);
const InterruptSelect irq_sel = event_write->intSel;
const DataSelect data_sel = event_write->dataSel;
ASSERT(irq_sel == InterruptSelect::None && data_sel == DataSelect::Data64);
*event_write->Address() = event_write->DataQWord();
break;
}
case PM4ItOpcode::DmaData: {
auto* dma_data = reinterpret_cast<PM4DmaData*>(header);
break;
}
default:
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
static_cast<u32>(opcode), count);
}
next_header = header + header->type3.NumWords() + 1;
break;
}
default:
UNREACHABLE_MSG("Invalid PM4 type {}", type);
}
processed_cmd_size += uintptr_t(next_header) - uintptr_t(header);
header = next_header;
}
}
} // namespace AmdGpu

View file

@ -0,0 +1,647 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include "common/bit_field.h"
#include "common/types.h"
namespace AmdGpu {
#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32))
#define CONCAT2(x, y) DO_CONCAT2(x, y)
#define DO_CONCAT2(x, y) x##y
#define INSERT_PADDING_WORDS(num_words) \
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)
struct Liverpool {
static constexpr u32 NumColorBuffers = 8;
static constexpr u32 NumViewports = 16;
static constexpr u32 NumClipPlanes = 6;
static constexpr u32 NumWordsShaderUserData = 16;
static constexpr u32 UconfigRegWordOffset = 0xC000;
static constexpr u32 ContextRegWordOffset = 0xA000;
static constexpr u32 ShRegWordOffset = 0x2C00;
static constexpr u32 NumRegs = 0xD000;
using UserData = std::array<u32, NumWordsShaderUserData>;
struct ShaderProgram {
u32 address_lo;
u32 address_hi;
union {
BitField<0, 6, u64> num_vgprs;
BitField<6, 4, u64> num_sgprs;
BitField<33, 5, u64> num_user_regs;
} settings;
UserData user_data;
const u8* Address() const {
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
return reinterpret_cast<const u8*>(addr);
}
};
enum class ShaderExportComp : u32 {
None = 0,
OneComp = 1,
TwoComp = 2,
FourCompCompressed = 3,
FourComp = 4,
};
union ShaderPosFormat {
u32 raw;
BitField<0, 4, ShaderExportComp> pos0;
BitField<4, 4, ShaderExportComp> pos1;
BitField<8, 4, ShaderExportComp> pos2;
BitField<12, 4, ShaderExportComp> pos3;
};
enum class ShaderExportFormat : u32 {
Zero = 0,
R_32 = 1,
GR_32 = 2,
AR_32 = 3,
ABGR_FP16 = 4,
ABGR_UNORM16 = 5,
ABGR_SNORM16 = 6,
ABGR_UINT16 = 7,
ABGR_SINT16 = 8,
ABGR_32 = 9,
};
union ColorExportFormat {
u32 raw;
BitField<0, 4, ShaderExportFormat> col0;
BitField<4, 4, ShaderExportFormat> col1;
BitField<8, 4, ShaderExportFormat> col2;
BitField<12, 4, ShaderExportFormat> col3;
BitField<16, 4, ShaderExportFormat> col4;
BitField<20, 4, ShaderExportFormat> col5;
BitField<24, 4, ShaderExportFormat> col6;
BitField<28, 4, ShaderExportFormat> col7;
};
union VsOutputControl {
u32 raw;
BitField<0, 8, u32> clip_distance_enable;
BitField<8, 8, u32> cull_distance_enable;
BitField<16, 1, u32> use_vtx_point_size;
BitField<17, 1, u32> use_vtx_edge_flag;
BitField<18, 1, u32> use_vtx_render_target_idx;
BitField<19, 1, u32> use_vtx_viewport_idx;
BitField<20, 1, u32> use_vtx_kill_flag;
bool IsClipDistEnabled(u32 index) const {
return (clip_distance_enable.Value() >> index) & 1;
}
bool IsCullDistEnabled(u32 index) const {
return (cull_distance_enable.Value() >> index) & 1;
}
};
enum class ZOrder : u32 {
LateZ = 0,
EarlyZLateZ = 1,
ReZ = 2,
EarlyZReZ = 3,
};
enum class ConservativeDepth : u32 {
Any = 0,
LessThanZ = 1,
GreaterThanZ = 2,
};
union DepthBufferControl {
u32 raw;
BitField<0, 1, u32> z_export_enable;
BitField<1, 1, u32> stencil_test_val_export_enable;
BitField<2, 1, u32> stencil_op_val_export_enable;
BitField<4, 2, ZOrder> z_order;
BitField<6, 1, u32> kill_enable;
BitField<7, 1, u32> coverage_to_mask_enable;
BitField<8, 1, u32> mask_export_enable;
BitField<9, 1, u32> exec_on_hier_fail;
BitField<10, 1, u32> exec_on_noop;
BitField<11, 1, u32> alpha_to_mask_disable;
BitField<12, 1, u32> depth_before_shader;
BitField<13, 2, ConservativeDepth> conservative_z_export;
};
enum class CompareFunc : u32 {
Never = 0,
Less = 1,
Equal = 2,
LessEqual = 3,
Greater = 4,
NotEqual = 5,
GreaterEqual = 6,
Always = 7,
};
union DepthControl {
u32 raw;
BitField<0, 1, u32> stencil_enable;
BitField<1, 1, u32> depth_enable;
BitField<2, 1, u32> depth_write_enable;
BitField<3, 1, u32> depth_bounds_enable;
BitField<4, 3, CompareFunc> depth_func;
BitField<7, 1, u32> backface_enable;
BitField<8, 3, CompareFunc> stencil_ref_func;
BitField<20, 3, CompareFunc> stencil_bf_func;
BitField<30, 1, u32> enable_color_writes_on_depth_fail;
BitField<31, 1, u32> disable_color_writes_on_depth_pass;
};
union DepthSize {
u32 raw;
BitField<0, 11, u32> pitch_tile_max;
BitField<11, 11, u32> height_tile_max;
u32 Pitch() const {
return (pitch_tile_max + 1) << 3;
}
u32 Height() const {
return (height_tile_max + 1) << 3;
}
};
union DepthSlice {
u32 raw;
BitField<0, 22, u32> slice_tile_max;
};
enum class StencilFunc : u32 {
Keep = 0,
Zero = 1,
Ones = 2,
ReplaceTest = 3,
ReplaceOp = 4,
AddClamp = 5,
SubClamp = 6,
Invert = 7,
AddWrap = 8,
SubWrap = 9,
And = 10,
Or = 11,
Xor = 12,
Nand = 13,
Nor = 14,
Xnor = 15,
};
union StencilControl {
u32 raw;
BitField<0, 4, StencilFunc> stencil_fail_front;
BitField<4, 4, StencilFunc> stencil_zpass_front;
BitField<8, 4, StencilFunc> stencil_zfail_front;
BitField<12, 4, StencilFunc> stencil_fail_back;
BitField<16, 4, StencilFunc> stencil_zpass_back;
BitField<20, 4, StencilFunc> stencil_zfail_back;
};
union StencilRefMask {
u32 raw;
BitField<0, 8, u32> stencil_test_val;
BitField<8, 8, u32> stencil_mask;
BitField<16, 8, u32> stencil_write_mask;
BitField<24, 8, u32> stencil_op_val;
};
union StencilInfo {
u32 raw;
BitField<0, 1, u32> format;
};
enum class ClipSpace : u32 {
MinusWToW = 0,
ZeroToW = 1,
};
enum class PrimKillCond : u32 {
AllVtx = 0,
AnyVtx = 1,
};
union ClipperControl {
u32 raw;
BitField<0, 6, u32> user_clip_plane_enable;
BitField<16, 1, u32> clip_disable;
BitField<19, 1, ClipSpace> clip_space;
BitField<21, 1, PrimKillCond> vtx_kill_or;
BitField<22, 1, u32> dx_rasterization_kill;
BitField<23, 1, u32> dx_linear_attr_clip_enable;
BitField<26, 1, u32> zclip_near_disable;
BitField<26, 1, u32> zclip_far_disable;
};
enum class PolygonMode : u32 {
Point = 0,
Line = 1,
Fill = 2,
};
enum class ProvokingVtxLast : u32 {
First = 0,
Last = 1,
};
enum class CullMode : u32 {
None = 0,
Front = 1,
Back = 2,
FrontAndBack = 3,
};
union PolygonControl {
u32 raw;
BitField<0, 1, u32> cull_front;
BitField<1, 1, u32> cull_back;
BitField<3, 2, u32> enable_polygon_mode;
BitField<5, 3, PolygonMode> polygon_mode_front;
BitField<8, 3, PolygonMode> polygon_mode_back;
BitField<11, 1, u32> enable_polygon_offset_front;
BitField<12, 1, u32> enable_polygon_offset_back;
BitField<13, 1, u32> enable_polygon_offset_para;
BitField<13, 1, u32> enable_window_offset;
BitField<19, 1, ProvokingVtxLast> provoking_vtx_last;
PolygonMode PolyMode() const {
return enable_polygon_mode ? polygon_mode_front.Value() : PolygonMode::Fill;
}
CullMode CullingMode() const {
return static_cast<CullMode>(cull_front | cull_back << 1);
}
};
union VsOutputConfig {
u32 raw;
BitField<1, 5, u32> export_count_min_one;
BitField<6, 1, u32> half_pack;
u32 NumExports() const {
return export_count_min_one.Value() + 1;
}
};
union ColorBufferMask {
u32 raw;
BitField<0, 4, u32> output0_mask;
BitField<4, 4, u32> output1_mask;
BitField<8, 4, u32> output2_mask;
BitField<12, 4, u32> output3_mask;
BitField<16, 4, u32> output4_mask;
BitField<20, 4, u32> output5_mask;
BitField<24, 4, u32> output6_mask;
BitField<28, 4, u32> output7_mask;
};
struct IndexBufferBase {
BitField<0, 8, u32> base_addr_hi;
u32 base_addr_lo;
VAddr Address() const {
return base_addr_lo | u64(base_addr_hi) << 32;
}
};
enum class IndexType : u32 {
Index16 = 0,
Index32 = 1,
};
enum class IndexSwapMode : u32 {
None = 0,
Swap16 = 1,
Swap32 = 2,
SwapWord = 3,
};
union IndexBufferType {
u32 raw;
BitField<0, 2, IndexType> index_type;
BitField<2, 2, IndexSwapMode> swap_mode;
};
union VgtNumInstances {
u32 num_instances;
u32 NumInstances() const {
return num_instances == 0 ? 1 : num_instances;
}
};
struct Scissor {
union {
BitField<0, 16, s32> top_left_x;
BitField<16, 16, s32> top_left_y;
};
union {
BitField<0, 15, u32> bottom_right_x;
BitField<16, 15, u32> bottom_right_y;
};
u32 GetWidth() const {
return static_cast<u32>(bottom_right_x - top_left_x);
}
u32 GetHeight() const {
return static_cast<u32>(bottom_right_y - top_left_y);
}
};
struct ViewportScissor {
union {
BitField<0, 15, s32> top_left_x;
BitField<15, 15, s32> top_left_y;
BitField<30, 1, s32> window_offset_disble;
};
union {
BitField<0, 15, s32> bottom_right_x;
BitField<15, 15, s32> bottom_right_y;
};
};
struct ViewportDepth {
float zmin;
float zmax;
};
struct ViewportBounds {
float xscale;
float xoffset;
float yscale;
float yoffset;
float zoffset;
float zscale;
};
union ViewportControl {
BitField<0, 1, u32> xscale_enable;
BitField<1, 1, u32> xoffset_enable;
BitField<2, 1, u32> yscale_enable;
BitField<3, 1, u32> yoffset_enable;
BitField<4, 1, u32> zscale_enable;
BitField<5, 1, u32> zoffset_enable;
BitField<8, 1, u32> xy_transformed;
BitField<9, 1, u32> z_transformed;
BitField<10, 1, u32> w_transformed;
};
struct ClipUserData {
u32 data_x;
u32 data_y;
u32 data_z;
u32 data_w;
};
struct ColorBuffer {
enum class EndianSwap : u32 {
None = 0,
Swap8In16 = 1,
Swap8In32 = 2,
Swap8In64 = 3,
};
enum class Format : u32 {
Invalid = 0,
Color_8 = 1,
Color_16 = 2,
Color_8_8 = 3,
Color_32 = 4,
Color_16_16 = 5,
Color_10_11_11 = 6,
Color_11_11_10 = 7,
Color_10_10_10_2 = 8,
Color_2_10_10_10 = 9,
Color_8_8_8_8 = 10,
Color_32_32 = 11,
Color_16_16_16_16 = 12,
Color_32_32_32_32 = 14,
Color_5_6_5 = 16,
Color_1_5_5_5 = 17,
Color_5_5_5_1 = 18,
Color_4_4_4_4 = 19,
Color_8_24 = 20,
Color_24_8 = 21,
Color_X24_8_32_FL = 22,
};
enum class NumberType : u32 {
Unorm = 0,
Snorm = 1,
Uint = 4,
Sint = 5,
Srgb = 6,
Float = 7,
};
enum class SwapMode : u32 {
Standard = 0,
Alternate = 1,
StandardReverse = 2,
AlternateReverse = 3,
};
enum class RoundMode : u32 {
ByHalf = 0,
Truncate = 1,
};
u32 base_address;
union {
BitField<0, 11, u32> tile_max;
BitField<20, 11, u32> fmask_tile_max;
} pitch;
union {
BitField<0, 22, u32> tile_max;
} slice;
union {
BitField<0, 11, u32> slice_start;
BitField<13, 11, u32> slice_max;
} view;
union {
BitField<0, 2, EndianSwap> endian;
BitField<2, 5, Format> format;
BitField<7, 1, u32> linear_general;
BitField<8, 2, NumberType> number_type;
BitField<11, 2, SwapMode> comp_swap;
BitField<13, 1, u32> fast_clear;
BitField<14, 1, u32> compression;
BitField<15, 1, u32> blend_clamp;
BitField<16, 1, u32> blend_bypass;
BitField<17, 1, u32> simple_float;
BitField<18, 1, RoundMode> round_mode;
BitField<19, 1, u32> cmask_is_linear;
} info;
union {
BitField<0, 5, u32> tile_mode_index;
BitField<5, 5, u32> fmask_tile_mode_index;
BitField<12, 3, u32> num_samples_log2;
BitField<15, 3, u32> num_fragments_log2;
BitField<18, 1, u32> force_dst_alpha_1;
} attrib;
INSERT_PADDING_WORDS(1);
u32 cmask_base_address;
union {
BitField<0, 14, u32> tile_max;
} cmask_slice;
u32 fmask_base_address;
union {
BitField<0, 14, u32> tile_max;
} fmask_slice;
u32 clear_word0;
u32 clear_word1;
INSERT_PADDING_WORDS(2);
u32 Pitch() const {
return (pitch.tile_max + 1) << 3;
}
u32 Height() const {
return (slice.tile_max + 1) * 64 / Pitch();
}
u64 Address() const {
return u64(base_address) << 8;
}
u64 CmaskAddress() const {
return u64(cmask_base_address) << 8;
}
};
enum class PrimitiveType : u32 {
None = 0,
PointList = 1,
LineList = 2,
LineStrip = 3,
TriangleList = 4,
TriangleFan = 5,
TriangleStrip = 6,
PatchPrimitive = 9,
AdjLineList = 10,
AdjLineStrip = 11,
AdjTriangleList = 12,
AdjTriangleStrip = 13,
RectList = 17,
LineLoop = 18,
QuadList = 19,
QuadStrip = 20,
Polygon = 21,
};
union Regs {
struct {
INSERT_PADDING_WORDS(0x2C08);
ShaderProgram ps_program;
INSERT_PADDING_WORDS(0x2C);
ShaderProgram vs_program;
INSERT_PADDING_WORDS(0xA008 - 0x2C4C - 16);
u32 depth_bounds_min;
u32 depth_bounds_max;
u32 stencil_clear;
u32 depth_clear;
Scissor screen_scissor;
INSERT_PADDING_WORDS(0xA011 - 0xA00C - 2);
StencilInfo stencil_info;
u32 z_read_base;
u32 stencil_read_base;
u32 z_write_base;
u32 stencil_write_base;
DepthSize depth_size;
DepthSlice depth_slice;
INSERT_PADDING_WORDS(0xA08E - 0xA018);
ColorBufferMask color_target_mask;
ColorBufferMask color_shader_mask;
INSERT_PADDING_WORDS(0xA094 - 0xA08E - 2);
std::array<ViewportScissor, NumViewports> viewport_scissors;
std::array<ViewportDepth, NumViewports> viewport_depths;
INSERT_PADDING_WORDS(0xA10B - 0xA0D4);
StencilControl stencil_control;
StencilRefMask stencil_ref_front;
StencilRefMask stencil_ref_back;
INSERT_PADDING_WORDS(1);
std::array<ViewportBounds, NumViewports> viewports;
std::array<ClipUserData, NumClipPlanes> clip_user_data;
INSERT_PADDING_WORDS(0xA1B1 - 0xA187);
VsOutputConfig vs_output_config;
INSERT_PADDING_WORDS(0xA1C3 - 0xA1B1 - 1);
ShaderPosFormat shader_pos_format;
ShaderExportFormat z_export_format;
ColorExportFormat color_export_format;
INSERT_PADDING_WORDS(0xA1F9 - 0xA1C3 - 3);
IndexBufferBase index_base_address;
INSERT_PADDING_WORDS(1);
u32 draw_initiator;
INSERT_PADDING_WORDS(0xA200 - 0xA1F9 - 4);
DepthControl depth_control;
INSERT_PADDING_WORDS(2);
DepthBufferControl depth_buffer_control;
ClipperControl clipper_control;
PolygonControl polygon_control;
ViewportControl viewport_control;
VsOutputControl vs_output_control;
INSERT_PADDING_WORDS(0xA29E - 0xA207 - 1);
u32 max_index_size;
IndexBufferType index_buffer_type;
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
u32 enable_primitive_id;
INSERT_PADDING_WORDS(0xA318 - 0xA2A1 - 1);
ColorBuffer color_buffers[NumColorBuffers];
INSERT_PADDING_WORDS(0xC242 - 0xA390);
PrimitiveType primitive_type;
INSERT_PADDING_WORDS(0xC24C - 0xC243);
u32 num_indices;
VgtNumInstances num_instances;
};
std::array<u32, NumRegs> reg_array{};
};
Regs regs{};
public:
Liverpool();
void ProcessCmdList(u32* cmdbuf, u32 size_in_bytes);
};
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
static_assert(GFX6_3D_REG_INDEX(depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);
static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B);
static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F);
static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F);
static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1);
static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3);
static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4);
static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);
static_assert(GFX6_3D_REG_INDEX(index_base_address) == 0xA1F9);
static_assert(GFX6_3D_REG_INDEX(draw_initiator) == 0xA1FC);
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
#undef GFX6_3D_REG_INDEX
} // namespace AmdGpu

View file

@ -0,0 +1,22 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include "video_core/amdgpu/pixel_format.h"
namespace AmdGpu {
u32 getNumComponents(DataFormat format) {
constexpr std::array numComponentsPerElement = {
0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2,
2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4,
-1, -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1, 1};
const u32 index = static_cast<u32>(format);
if (index >= numComponentsPerElement.size()) {
return 0;
}
return numComponentsPerElement[index];
}
} // namespace AmdGpu

View file

@ -0,0 +1,64 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace AmdGpu {
// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture]
enum class DataFormat : u32 {
FormatInvalid = 0,
Format8 = 1,
Format16 = 2,
Format8_8 = 3,
Format32 = 4,
Format16_16 = 5,
Format10_11_11 = 6,
Format11_11_10 = 7,
Format10_10_10_2 = 8,
Format2_10_10_10 = 9,
Format8_8_8_8 = 10,
Format32_32 = 11,
Format16_16_16_16 = 12,
Format32_32_32 = 13,
Format32_32_32_32 = 14,
Format5_6_5 = 16,
Format1_5_5_5 = 17,
Format5_5_5_1 = 18,
Format4_4_4_4 = 19,
Format8_24 = 20,
Format24_8 = 21,
FormatX24_8_32 = 22,
FormatGB_GR = 32,
FormatBG_RG = 33,
Format5_9_9_9 = 34,
FormatBc1 = 35,
FormatBc2 = 36,
FormatBc3 = 37,
FormatBc4 = 38,
FormatBc5 = 39,
FormatBc6 = 40,
FormatBc7 = 41,
};
enum class NumberFormat : u32 {
Unorm = 0,
Snorm = 1,
Uscaled = 2,
Sscaled = 3,
Uint = 4,
Sint = 5,
SnormNz = 6,
Float = 7,
Srgb = 9,
Ubnorm = 10,
UbnromNz = 11,
Ubint = 12,
Ubscaled = 13,
};
u32 getNumComponents(DataFormat format);
} // namespace AmdGpu

View file

@ -0,0 +1,290 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstring>
#include "common/bit_field.h"
#include "common/types.h"
#include "video_core/amdgpu/pm4_opcodes.h"
namespace AmdGpu {
/// This enum defines the Shader types supported in PM4 type 3 header
enum class PM4ShaderType : u32 {
ShaderGraphics = 0, ///< Graphics shader
ShaderCompute = 1 ///< Compute shader
};
/// This enum defines the predicate value supported in PM4 type 3 header
enum class PM4Predicate : u32 {
PredDisable = 0, ///< Predicate disabled
PredEnable = 1 ///< Predicate enabled
};
union PM4Type0Header {
u32 raw;
BitField<0, 16, u32> base; ///< DWORD Memory-mapped address
BitField<16, 14, u32> count; ///< Count of DWORDs in the *information* body (N - 1 for N dwords)
BitField<30, 2, u32> type; ///< Packet identifier. It should be 0 for type 0 packets.
u32 NumWords() const {
return count + 1;
}
};
union PM4Type3Header {
constexpr PM4Type3Header(PM4ItOpcode code, u32 num_words_min_one,
PM4ShaderType stype = PM4ShaderType::ShaderGraphics,
PM4Predicate pred = PM4Predicate::PredDisable) {
raw = 0;
predicate.Assign(pred);
shaderType.Assign(stype);
opcode.Assign(code);
count.Assign(num_words_min_one);
type.Assign(3);
}
u32 NumWords() const {
return count + 1;
}
u32 raw;
BitField<0, 1, PM4Predicate> predicate; ///< Predicated version of packet when set
BitField<1, 1, PM4ShaderType> shaderType; ///< 0: Graphics, 1: Compute Shader
BitField<8, 8, PM4ItOpcode> opcode; ///< IT opcode
BitField<16, 14, u32> count; ///< Number of DWORDs - 1 in the information body.
BitField<30, 2, u32> type; ///< Packet identifier. It should be 3 for type 3 packets
};
union PM4Header {
u32 raw;
PM4Type0Header type0;
PM4Type3Header type3;
BitField<30, 2, u32> type;
};
template <PM4ItOpcode opcode, typename... Args>
constexpr u32* Write(u32* cmdbuf, PM4ShaderType type, Args... data) {
// Write the PM4 header.
PM4Type3Header header{opcode, sizeof...(Args) - 1, type};
std::memcpy(cmdbuf, &header, sizeof(header));
// Write arguments
const std::array<u32, sizeof...(Args)> args{data...};
std::memcpy(++cmdbuf, args.data(), sizeof(args));
cmdbuf += args.size();
return cmdbuf;
}
union ContextControlEnable {
u32 raw;
BitField<0, 1, u32> enableSingleCntxConfigReg; ///< single context config reg
BitField<1, 1, u32> enableMultiCntxRenderReg; ///< multi context render state reg
BitField<15, 1, u32> enableUserConfigReg__CI; ///< User Config Reg on CI(reserved for SI)
BitField<16, 1, u32> enableGfxSHReg; ///< Gfx SH Registers
BitField<24, 1, u32> enableCSSHReg; ///< CS SH Registers
BitField<31, 1, u32> enableDw; ///< DW enable
};
struct PM4CmdContextControl {
PM4Type3Header header;
ContextControlEnable loadControl; ///< Enable bits for loading
ContextControlEnable shadowEnable; ///< Enable bits for shadowing
};
union LoadAddressHigh {
u32 raw;
BitField<0, 16, u32>
addrHi; ///< bits for the block in Memory from where the CP will fetch the state
BitField<31, 1, u32>
waitIdle; ///< if set the CP will wait for the graphics pipe to be idle by writing
///< to the GRBM Wait Until register with "Wait for 3D idle"
};
/**
* PM4CMDLOADDATA can be used with the following opcodes
* - IT_LOAD_CONFIG_REG
* - IT_LOAD_CONTEXT_REG
* - IT_LOAD_SH_REG
*/
struct PM4CmdLoadData {
PM4Type3Header header;
u32 addrLo; ///< low 32 address bits for the block in memory from where the CP will fetch the
///< state
LoadAddressHigh addrHi;
u32 regOffset; ///< offset in DWords from the register base address
u32 numDwords; ///< number of DWords that the CP will fetch and write into the chip. A value of
///< zero will fetch nothing
};
enum class LoadDataIndex : u32 {
DirectAddress = 0, /// ADDR_LO is direct address
Offset = 1, /// ARRD_LO is ignored and memory offset is in addrOffset
};
enum class LoadDataFormat : u32 {
OffsetAndSize = 0, /// Data is consecutive DWORDs
OffsetAndData = 1, /// Register offset and data is interleaved
};
union LoadAddressLow {
u32 raw;
BitField<0, 1, LoadDataIndex> index;
BitField<2, 30, u32> addrLo; ///< bits for the block in Memory from where the CP will fetch the
///< state. DWORD aligned
};
/**
* PM4CMDLOADDATAINDEX can be used with the following opcodes (VI+)
* - IT_LOAD_CONTEXT_REG_INDEX
* - IT_LOAD_SH_REG_INDEX
*/
struct PM4CmdLoadDataIndex {
PM4Type3Header header;
LoadAddressLow addrLo; ///< low 32 address bits for the block in memory from where the CP will
///< fetch the state
u32 addrOffset; ///< addrLo.index = 1 Indexed mode
union {
BitField<0, 16, u32> regOffset; ///< offset in DWords from the register base address
BitField<31, 1, LoadDataFormat> dataFormat;
u32 raw;
};
u32 numDwords; ///< Number of DWords that the CP will fetch and write
///< into the chip. A value of zero will fetch nothing
};
/**
* PM4CMDSETDATA can be used with the following opcodes:
*
* - IT_SET_CONFIG_REG
* - IT_SET_CONTEXT_REG
* - IT_SET_CONTEXT_REG_INDIRECT
* - IT_SET_SH_REG
* - IT_SET_SH_REG_INDEX
* - IT_SET_UCONFIG_REG
*/
struct PM4CmdSetData {
PM4Type3Header header;
union {
u32 raw;
BitField<0, 16, u32> regOffset; ///< Offset in DWords from the register base address
BitField<28, 4, u32> index; ///< Index for UCONFIG/CONTEXT on CI+
///< Program to zero for other opcodes and on SI
};
template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args>
static constexpr u32* SetContextReg(u32* cmdbuf, Args... data) {
return Write<PM4ItOpcode::SetContextReg>(cmdbuf, type, data...);
}
template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args>
static constexpr u32* SetShReg(u32* cmdbuf, Args... data) {
return Write<PM4ItOpcode::SetShReg>(cmdbuf, type, data...);
}
};
struct PM4CmdNop {
PM4Type3Header header;
};
struct PM4CmdDrawIndexOffset2 {
PM4Type3Header header;
u32 maxSize; ///< Maximum number of indices
u32 indexOffset; ///< Zero based starting index number in the index buffer
u32 indexCount; ///< number of indices in the Index Buffer
u32 drawInitiator; ///< draw Initiator Register
};
struct PM4CmdDrawIndex2 {
PM4Type3Header header;
u32 maxSize; ///< maximum number of indices
u32 indexBaseLo; ///< base Address Lo [31:1] of Index Buffer
///< (Word-Aligned). Written to the VGT_DMA_BASE register.
u32 indexBaseHi; ///< base Address Hi [39:32] of Index Buffer.
///< Written to the VGT_DMA_BASE_HI register
u32 indexCount; ///< number of indices in the Index Buffer.
///< Written to the VGT_NUM_INDICES register.
u32 drawInitiator; ///< written to the VGT_DRAW_INITIATOR register
};
struct PM4CmdDrawIndexType {
PM4Type3Header header;
union {
u32 raw;
BitField<0, 2, u32> indexType; ///< Select 16 Vs 32bit index
BitField<2, 2, u32> swapMode; ///< DMA swap mode
};
};
struct PM4CmdDrawIndexAuto {
PM4Type3Header header;
u32 index_count;
u32 draw_initiator;
};
enum class DataSelect : u32 {
None = 0,
Data32Low = 1,
Data64 = 2,
GpuClock64 = 3,
PerfCounter = 4,
};
enum class InterruptSelect : u32 {
None = 0,
IrqOnly = 1,
IrqWhenWriteConfirm = 2,
};
struct PM4CmdEventWriteEop {
PM4Type3Header header;
union {
u32 event_control;
BitField<0, 6, u32> eventType; ///< Event type written to VGT_EVENT_INITIATOR
BitField<8, 4, u32> eventIndex; ///< Event index
};
u32 addressLo;
union {
u32 data_control;
BitField<0, 16, u32> addressHi; ///< High bits of address
BitField<24, 2, InterruptSelect> intSel; ///< Selects interrupt action for end-of-pipe
BitField<29, 3, DataSelect> dataSel; ///< Selects source of data
};
u32 dataLo; ///< Value that will be written to memory when event occurs
u32 dataHi; ///< Value that will be written to memory when event occurs
u64* Address() const {
return reinterpret_cast<u64*>(addressLo | u64(addressHi) << 32);
}
u64 DataQWord() const {
return dataLo | u64(dataHi) << 32;
}
};
struct PM4DmaData {
PM4Type3Header header;
union {
BitField<0, 1, u32> engine;
BitField<12, 1, u32> src_atc;
BitField<13, 2, u32> src_cache_policy;
BitField<15, 1, u32> src_volatile;
BitField<20, 2, u32> dst_sel;
BitField<24, 1, u32> dst_atc;
BitField<25, 2, u32> dst_cache_policy;
BitField<27, 1, u32> dst_volatile;
BitField<29, 2, u32> src_sel;
BitField<31, 1, u32> cp_sync;
};
union {
u32 src_addr_lo;
u32 data;
};
u32 src_addr_hi;
u32 dst_addr_lo;
u32 dst_addr_hi;
u32 command;
};
} // namespace AmdGpu

View file

@ -0,0 +1,62 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace AmdGpu {
enum class PM4ItOpcode : u32 {
Nop = 0x10,
SetBase = 0x11,
ClearState = 0x12,
IndexBufferSize = 0x13,
DispatchDirect = 0x15,
DispatchIndirect = 0x16,
AtomicGds = 0x1D,
Atomic = 0x1E,
OcclusionQuery = 0x1F,
SetPredication = 0x20,
RegRmw = 0x21,
CondExec = 0x22,
PredExec = 0x23,
DrawIndirect = 0x24,
DrawIndexIndirect = 0x25,
IndexBase = 0x26,
DrawIndex2 = 0x27,
ContextControl = 0x28,
IndexType = 0x2A,
DrawIndirectMulti = 0x2C,
DrawIndexAuto = 0x2D,
NumInstances = 0x2F,
DrawIndexMultiAuto = 0x30,
IndirectBufferConst = 0x33,
DrawIndexOffset2 = 0x35,
WriteData = 0x37,
DrawIndexIndirectMulti = 0x38,
MemSemaphore = 0x39,
IndirectBuffer = 0x3F,
CondIndirectBuffer = 0x3F,
CopyData = 0x40,
CommandProcessorDma = 0x41,
SurfaceSync = 0x43,
CondWrite = 0x45,
EventWrite = 0x46,
EventWriteEop = 0x47,
EventWriteEos = 0x48,
PremableCntl = 0x4A,
DmaData = 0x50,
ContextRegRmw = 0x51,
LoadShReg = 0x5F,
LoadConfigReg = 0x60,
LoadContextReg = 0x61,
SetConfigReg = 0x68,
SetContextReg = 0x69,
SetContextRegIndirect = 0x73,
SetShReg = 0x76,
SetShRegOffset = 0x77,
SetUconfigReg = 0x79
};
} // namespace AmdGpu

View file

@ -0,0 +1,33 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/bit_field.h"
#include "common/types.h"
#include "video_core/amdgpu/pixel_format.h"
namespace AmdGpu {
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
struct Buffer {
union {
BitField<0, 44, u64> base_address;
BitField<48, 14, u64> stride;
BitField<62, 1, u64> cache_swizzle;
BitField<63, 1, u64> swizzle_enable;
};
u32 num_records;
union {
BitField<0, 3, u32> dst_sel_x;
BitField<3, 3, u32> dst_sel_y;
BitField<6, 3, u32> dst_sel_z;
BitField<9, 3, u32> dst_sel_w;
BitField<12, 3, NumberFormat> num_format;
BitField<15, 4, DataFormat> data_format;
BitField<19, 2, u32> element_size;
BitField<21, 2, u32> index_stride;
};
};
} // namespace AmdGpu

View file

@ -1,86 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <climits>
#include <utility>
#include "common/types.h"
namespace VideoCore {
// Based on Table 8.13 Data and Image Formats in Sea Islands Series Instruction Set Architecture
enum class PixelFormat : u32 {
Invalid,
R32G32B32A32_Float,
B32G32R32A32_Float,
R32G32B32X32_Float,
B32G32R32X32_Float,
R32G32B32A32_Uint,
R32G32B32A32_Sint,
R32G32B32_Float,
R32G32B32_Uint,
R32G32B32_Sint,
R16G16B16A16_Float,
R16G16B16X16_Float,
B16G16R16X16_Float,
R16G16B16A16_Uint,
R16G16B16A16_Sint,
R16G16B16A16_Unorm,
B16G16R16A16_Unorm,
R16G16B16X16_Unorm,
B16G16R16X16_Unorm,
R16G16B16A16_Snorm,
L32A32_Float,
R32G32_Float,
R32G32_Uint,
R32G32_Sint,
R11G11B10_Float,
R8G8B8A8_Unorm,
R8G8B8X8_Unorm,
R8G8B8A8_UnormSrgb,
R8G8B8X8_UnormSrgb,
R8G8B8A8_Uint,
R8G8B8A8_Snorm,
R8G8B8A8_Sint,
L16A16_Float,
R16G16_Float,
L16A16_Unorm,
R16G16_Unorm,
R16G16_Uint,
R16G16_Snorm,
R16G16_Sint,
R32_Float,
L32_Float,
A32_Float,
R32_Uint,
R32_Sint,
R8G8_Unorm,
R8G8_Uint,
R8G8_Snorm,
R8G8_Sint,
L8A8_Unorm,
L8A8_UnormSrgb,
R16_Float,
L16_Float,
A16_Float,
R16_Unorm,
L16_Unorm,
A16_Unorm,
R16_Uint,
R16_Snorm,
R16_Sint,
R8_Unorm,
L8_Unorm,
L8_UnormSrgb,
R8_Uint,
R8_Snorm,
R8_Sint,
A8_Unorm,
};
constexpr bool IsDepthStencilFormat(PixelFormat format) {
return false;
}
} // namespace VideoCore

View file

@ -95,9 +95,9 @@ void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) {
Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
const ImageInfo& info_, VAddr cpu_addr)
: instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(),
instance->GetAllocator()},
cpu_addr{cpu_addr}, cpu_addr_end{cpu_addr + info.guest_size_bytes} {
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{cpu_addr},
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
vk::ImageCreateFlags flags{};
if (info.type == vk::ImageType::e2D && info.resources.layers >= 6 &&
info.size.width == info.size.height) {

View file

@ -6,7 +6,6 @@
#include "common/enum.h"
#include "common/types.h"
#include "core/libraries/videoout/buffer.h"
#include "video_core/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/types.h"

View file

@ -6,45 +6,14 @@
namespace VideoCore {
[[nodiscard]] vk::ImageViewType ConvertImageViewType(const ImageViewType type) {
switch (type) {
case ImageViewType::e1D:
return vk::ImageViewType::e1D;
case ImageViewType::e2D:
return vk::ImageViewType::e2D;
case ImageViewType::e3D:
return vk::ImageViewType::e3D;
case ImageViewType::Buffer:
break;
default:
break;
}
UNREACHABLE_MSG("Invalid image type={}", static_cast<u32>(type));
return {};
}
[[nodiscard]] vk::Format ConvertPixelFormat(const PixelFormat format) {
switch (format) {
default:
break;
}
UNREACHABLE_MSG("Unknown format={}", static_cast<u32>(format));
return {};
}
ImageView::ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
const ImageViewInfo& info_, vk::Image image)
: info{info_} {
const vk::ImageViewCreateInfo image_view_ci = {
.image = image,
.viewType = ConvertImageViewType(info.type),
.format = ConvertPixelFormat(info.format),
.components{
.r = vk::ComponentSwizzle::eIdentity,
.g = vk::ComponentSwizzle::eIdentity,
.b = vk::ComponentSwizzle::eIdentity,
.a = vk::ComponentSwizzle::eIdentity,
},
.viewType = info.type,
.format = info.format,
.components = info.mapping,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0U,

View file

@ -3,7 +3,6 @@
#pragma once
#include "video_core/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/texture_cache/types.h"
@ -25,30 +24,26 @@ enum class ImageViewType : u32 {
Buffer,
};
enum class SwizzleSource : u32 {
Zero = 0,
One = 1,
R = 2,
G = 3,
B = 4,
A = 5,
};
struct ImageViewInfo {
ImageViewType type{};
PixelFormat format{};
vk::ImageViewType type{};
vk::Format format{};
SubresourceRange range;
u8 x_source = static_cast<u8>(SwizzleSource::R);
u8 y_source = static_cast<u8>(SwizzleSource::G);
u8 z_source = static_cast<u8>(SwizzleSource::B);
u8 w_source = static_cast<u8>(SwizzleSource::A);
vk::ComponentMapping mapping{};
auto operator<=>(const ImageViewInfo&) const = default;
};
class ImageView {
struct ImageView {
explicit ImageView(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
const ImageViewInfo& info, vk::Image image);
~ImageView();
ImageView(const ImageView&) = delete;
ImageView& operator=(const ImageView&) = delete;
ImageView(ImageView&&) = default;
ImageView& operator=(ImageView&&) = default;
ImageId image_id{};
Extent3D size{0, 0, 0};
ImageViewInfo info{};