shader_recompiler: Implement most integer image atomics, workgroup barriers and shared memory load/store (#231)

* shader_recompiler: Add LDEXP

* shader_recompiler: Add most image integer atomic ops

* shader_recompiler: Implement shared memory load/store

* shader_recompiler: More image atomics

* externals: Update sirit

* clang format

* cmake: Add missing files

* shader_recompiler: Fix some atomic bugs

* shader_recompiler: Vs outputs

* shader_recompiler: Shared mem has side-effects, fix format component order

* shader_recompiler: Inline constant buffer impl

* video_core: Fix regressions

* Work

* Fixup a few things
This commit is contained in:
TheTurtle 2024-07-05 00:15:44 +03:00 committed by GitHub
parent af3bbc33e9
commit 6ceab6dfac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
69 changed files with 1597 additions and 310 deletions

View file

@ -323,6 +323,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
break;
}
case PM4ItOpcode::IndexBufferSize: {
const auto* index_size = reinterpret_cast<const PM4CmdDrawIndexBufferSize*>(header);
regs.num_indices = index_size->num_indices;
break;
}
case PM4ItOpcode::EventWrite: {
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
break;

View file

@ -85,14 +85,14 @@ struct Liverpool {
} settings;
UserData user_data;
template <typename T = u8>
const T* Address() const {
template <typename T = u8*>
const T Address() const {
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
return reinterpret_cast<const T*>(addr);
return reinterpret_cast<const T>(addr);
}
std::span<const u32> Code() const {
const u32* code = Address<u32>();
const u32* code = Address<u32*>();
BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32);
@ -121,20 +121,26 @@ struct Liverpool {
BitField<0, 6, u64> num_vgprs;
BitField<6, 4, u64> num_sgprs;
BitField<33, 5, u64> num_user_regs;
BitField<47, 9, u64> lds_dwords;
} settings;
INSERT_PADDING_WORDS(1);
u32 resource_limits;
INSERT_PADDING_WORDS(0x2A);
UserData user_data;
template <typename T = u8>
const T* Address() const {
template <typename T = u8*>
const T Address() const {
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
return reinterpret_cast<const T*>(addr);
return reinterpret_cast<const T>(addr);
}
u32 SharedMemSize() const noexcept {
// lds_dwords is in units of 128 dwords. We return bytes.
return settings.lds_dwords.Value() * 128 * 4;
}
std::span<const u32> Code() const {
const u32* code = Address<u32>();
const u32* code = Address<u32*>();
BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32);
@ -144,7 +150,7 @@ struct Liverpool {
template <typename Shader>
static constexpr auto* GetBinaryInfo(const Shader& sh) {
const auto* code = sh.template Address<u32>();
const auto* code = sh.template Address<u32*>();
const auto* bininfo = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header");
return bininfo;
@ -208,6 +214,10 @@ struct Liverpool {
BitField<18, 1, u32> use_vtx_render_target_idx;
BitField<19, 1, u32> use_vtx_viewport_idx;
BitField<20, 1, u32> use_vtx_kill_flag;
BitField<21, 1, u32> vs_out_misc_enable;
BitField<22, 1, u32> vs_out_ccdist0_enable;
BitField<23, 1, u32> vs_out_ccdist1_enable;
BitField<25, 1, u32> use_vtx_gs_cut_flag;
bool IsClipDistEnabled(u32 index) const {
return (clip_distance_enable.Value() >> index) & 1;
@ -469,7 +479,7 @@ struct Liverpool {
template <typename T = VAddr>
T Address() const {
return reinterpret_cast<T>(base_addr_lo | u64(base_addr_hi) << 32);
return reinterpret_cast<T>((base_addr_lo & ~1U) | u64(base_addr_hi) << 32);
}
};
@ -1021,6 +1031,7 @@ static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010);
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);

View file

@ -549,8 +549,8 @@ struct PM4DumpConstRam {
u32 addr_hi;
template <typename T>
T* Address() const {
return reinterpret_cast<T*>((u64(addr_hi) << 32u) | addr_lo);
T Address() const {
return reinterpret_cast<T>((u64(addr_hi) << 32u) | addr_lo);
}
[[nodiscard]] u32 Offset() const {
@ -581,6 +581,11 @@ struct PM4CmdDrawIndexBase {
u32 addr_hi;
};
struct PM4CmdDrawIndexBufferSize {
PM4Type3Header header;
u32 num_indices;
};
struct PM4CmdIndirectBuffer {
PM4Type3Header header;
u32 ibase_lo; ///< Indirect buffer base address, must be 4 byte aligned

View file

@ -21,32 +21,45 @@ enum class CompSwizzle : u32 {
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
struct Buffer {
union {
BitField<0, 44, u64> base_address;
BitField<48, 14, u64> stride;
BitField<62, 1, u64> cache_swizzle;
BitField<63, 1, u64> swizzle_enable;
};
u64 base_address : 44;
u64 : 4;
u64 stride : 14;
u64 cache_swizzle : 1;
u64 swizzle_enable : 1;
u32 num_records;
union {
BitField<0, 3, u32> dst_sel_x;
BitField<3, 3, u32> dst_sel_y;
BitField<6, 3, u32> dst_sel_z;
BitField<9, 3, u32> dst_sel_w;
BitField<0, 12, u32> dst_sel;
BitField<12, 3, NumberFormat> num_format;
BitField<15, 4, DataFormat> data_format;
BitField<19, 2, u32> element_size;
BitField<21, 2, u32> index_stride;
BitField<23, 1, u32> add_tid_enable;
};
u32 dst_sel_x : 3;
u32 dst_sel_y : 3;
u32 dst_sel_z : 3;
u32 dst_sel_w : 3;
u32 num_format : 3;
u32 data_format : 4;
u32 element_size : 2;
u32 index_stride : 2;
u32 add_tid_enable : 1;
operator bool() const noexcept {
return base_address != 0;
}
bool operator==(const Buffer& other) const noexcept {
return std::memcmp(this, &other, sizeof(Buffer)) == 0;
}
CompSwizzle GetSwizzle(u32 comp) const noexcept {
return static_cast<CompSwizzle>((dst_sel.Value() >> (comp * 3)) & 0x7);
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
return static_cast<CompSwizzle>(select[comp]);
}
NumberFormat GetNumberFmt() const noexcept {
return static_cast<NumberFormat>(num_format);
}
DataFormat GetDataFmt() const noexcept {
return static_cast<DataFormat>(data_format);
}
u32 GetStride() const noexcept {
return stride == 0 ? 1U : stride.Value();
return stride == 0 ? 1U : stride;
}
u32 GetStrideElements(u32 element_size) const noexcept {
@ -61,6 +74,7 @@ struct Buffer {
return GetStride() * num_records;
}
};
static_assert(sizeof(Buffer) == 16); // 128bits
enum class ImageType : u64 {
Buffer = 0,