mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-06-06 18:53:16 +00:00
shader_recompiler: Implement most integer image atomics, workgroup barriers and shared memory load/store (#231)
* shader_recompiler: Add LDEXP * shader_recompiler: Add most image integer atomic ops * shader_recompiler: Implement shared memory load/store * shader_recompiler: More image atomics * externals: Update sirit * clang format * cmake: Add missing files * shader_recompiler: Fix some atomic bugs * shader_recompiler: Vs outputs * shader_recompiler: Shared mem has side-effects, fix format component order * shader_recompiler: Inline constant buffer impl * video_core: Fix regressions * Work * Fixup a few things
This commit is contained in:
parent
af3bbc33e9
commit
6ceab6dfac
69 changed files with 1597 additions and 310 deletions
|
@ -323,6 +323,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||
regs.index_base_address.base_addr_hi.Assign(index_base->addr_hi);
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndexBufferSize: {
|
||||
const auto* index_size = reinterpret_cast<const PM4CmdDrawIndexBufferSize*>(header);
|
||||
regs.num_indices = index_size->num_indices;
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWrite: {
|
||||
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
|
||||
break;
|
||||
|
|
|
@ -85,14 +85,14 @@ struct Liverpool {
|
|||
} settings;
|
||||
UserData user_data;
|
||||
|
||||
template <typename T = u8>
|
||||
const T* Address() const {
|
||||
template <typename T = u8*>
|
||||
const T Address() const {
|
||||
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
|
||||
return reinterpret_cast<const T*>(addr);
|
||||
return reinterpret_cast<const T>(addr);
|
||||
}
|
||||
|
||||
std::span<const u32> Code() const {
|
||||
const u32* code = Address<u32>();
|
||||
const u32* code = Address<u32*>();
|
||||
BinaryInfo bininfo;
|
||||
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
|
||||
const u32 num_dwords = bininfo.length / sizeof(u32);
|
||||
|
@ -121,20 +121,26 @@ struct Liverpool {
|
|||
BitField<0, 6, u64> num_vgprs;
|
||||
BitField<6, 4, u64> num_sgprs;
|
||||
BitField<33, 5, u64> num_user_regs;
|
||||
BitField<47, 9, u64> lds_dwords;
|
||||
} settings;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32 resource_limits;
|
||||
INSERT_PADDING_WORDS(0x2A);
|
||||
UserData user_data;
|
||||
|
||||
template <typename T = u8>
|
||||
const T* Address() const {
|
||||
template <typename T = u8*>
|
||||
const T Address() const {
|
||||
const uintptr_t addr = uintptr_t(address_hi) << 40 | uintptr_t(address_lo) << 8;
|
||||
return reinterpret_cast<const T*>(addr);
|
||||
return reinterpret_cast<const T>(addr);
|
||||
}
|
||||
|
||||
u32 SharedMemSize() const noexcept {
|
||||
// lds_dwords is in units of 128 dwords. We return bytes.
|
||||
return settings.lds_dwords.Value() * 128 * 4;
|
||||
}
|
||||
|
||||
std::span<const u32> Code() const {
|
||||
const u32* code = Address<u32>();
|
||||
const u32* code = Address<u32*>();
|
||||
BinaryInfo bininfo;
|
||||
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
|
||||
const u32 num_dwords = bininfo.length / sizeof(u32);
|
||||
|
@ -144,7 +150,7 @@ struct Liverpool {
|
|||
|
||||
template <typename Shader>
|
||||
static constexpr auto* GetBinaryInfo(const Shader& sh) {
|
||||
const auto* code = sh.template Address<u32>();
|
||||
const auto* code = sh.template Address<u32*>();
|
||||
const auto* bininfo = std::bit_cast<const BinaryInfo*>(code + (code[1] + 1) * 2);
|
||||
ASSERT_MSG(bininfo->Valid(), "Invalid shader binary header");
|
||||
return bininfo;
|
||||
|
@ -208,6 +214,10 @@ struct Liverpool {
|
|||
BitField<18, 1, u32> use_vtx_render_target_idx;
|
||||
BitField<19, 1, u32> use_vtx_viewport_idx;
|
||||
BitField<20, 1, u32> use_vtx_kill_flag;
|
||||
BitField<21, 1, u32> vs_out_misc_enable;
|
||||
BitField<22, 1, u32> vs_out_ccdist0_enable;
|
||||
BitField<23, 1, u32> vs_out_ccdist1_enable;
|
||||
BitField<25, 1, u32> use_vtx_gs_cut_flag;
|
||||
|
||||
bool IsClipDistEnabled(u32 index) const {
|
||||
return (clip_distance_enable.Value() >> index) & 1;
|
||||
|
@ -469,7 +479,7 @@ struct Liverpool {
|
|||
|
||||
template <typename T = VAddr>
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>(base_addr_lo | u64(base_addr_hi) << 32);
|
||||
return reinterpret_cast<T>((base_addr_lo & ~1U) | u64(base_addr_hi) << 32);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1021,6 +1031,7 @@ static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
|
|||
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
|
||||
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010);
|
||||
static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
|
||||
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
|
||||
|
|
|
@ -549,8 +549,8 @@ struct PM4DumpConstRam {
|
|||
u32 addr_hi;
|
||||
|
||||
template <typename T>
|
||||
T* Address() const {
|
||||
return reinterpret_cast<T*>((u64(addr_hi) << 32u) | addr_lo);
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>((u64(addr_hi) << 32u) | addr_lo);
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 Offset() const {
|
||||
|
@ -581,6 +581,11 @@ struct PM4CmdDrawIndexBase {
|
|||
u32 addr_hi;
|
||||
};
|
||||
|
||||
struct PM4CmdDrawIndexBufferSize {
|
||||
PM4Type3Header header;
|
||||
u32 num_indices;
|
||||
};
|
||||
|
||||
struct PM4CmdIndirectBuffer {
|
||||
PM4Type3Header header;
|
||||
u32 ibase_lo; ///< Indirect buffer base address, must be 4 byte aligned
|
||||
|
|
|
@ -21,32 +21,45 @@ enum class CompSwizzle : u32 {
|
|||
|
||||
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
|
||||
struct Buffer {
|
||||
union {
|
||||
BitField<0, 44, u64> base_address;
|
||||
BitField<48, 14, u64> stride;
|
||||
BitField<62, 1, u64> cache_swizzle;
|
||||
BitField<63, 1, u64> swizzle_enable;
|
||||
};
|
||||
u64 base_address : 44;
|
||||
u64 : 4;
|
||||
u64 stride : 14;
|
||||
u64 cache_swizzle : 1;
|
||||
u64 swizzle_enable : 1;
|
||||
u32 num_records;
|
||||
union {
|
||||
BitField<0, 3, u32> dst_sel_x;
|
||||
BitField<3, 3, u32> dst_sel_y;
|
||||
BitField<6, 3, u32> dst_sel_z;
|
||||
BitField<9, 3, u32> dst_sel_w;
|
||||
BitField<0, 12, u32> dst_sel;
|
||||
BitField<12, 3, NumberFormat> num_format;
|
||||
BitField<15, 4, DataFormat> data_format;
|
||||
BitField<19, 2, u32> element_size;
|
||||
BitField<21, 2, u32> index_stride;
|
||||
BitField<23, 1, u32> add_tid_enable;
|
||||
};
|
||||
u32 dst_sel_x : 3;
|
||||
u32 dst_sel_y : 3;
|
||||
u32 dst_sel_z : 3;
|
||||
u32 dst_sel_w : 3;
|
||||
u32 num_format : 3;
|
||||
u32 data_format : 4;
|
||||
u32 element_size : 2;
|
||||
u32 index_stride : 2;
|
||||
u32 add_tid_enable : 1;
|
||||
|
||||
operator bool() const noexcept {
|
||||
return base_address != 0;
|
||||
}
|
||||
|
||||
bool operator==(const Buffer& other) const noexcept {
|
||||
return std::memcmp(this, &other, sizeof(Buffer)) == 0;
|
||||
}
|
||||
|
||||
CompSwizzle GetSwizzle(u32 comp) const noexcept {
|
||||
return static_cast<CompSwizzle>((dst_sel.Value() >> (comp * 3)) & 0x7);
|
||||
const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w};
|
||||
return static_cast<CompSwizzle>(select[comp]);
|
||||
}
|
||||
|
||||
NumberFormat GetNumberFmt() const noexcept {
|
||||
return static_cast<NumberFormat>(num_format);
|
||||
}
|
||||
|
||||
DataFormat GetDataFmt() const noexcept {
|
||||
return static_cast<DataFormat>(data_format);
|
||||
}
|
||||
|
||||
u32 GetStride() const noexcept {
|
||||
return stride == 0 ? 1U : stride.Value();
|
||||
return stride == 0 ? 1U : stride;
|
||||
}
|
||||
|
||||
u32 GetStrideElements(u32 element_size) const noexcept {
|
||||
|
@ -61,6 +74,7 @@ struct Buffer {
|
|||
return GetStride() * num_records;
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Buffer) == 16); // 128bits
|
||||
|
||||
enum class ImageType : u64 {
|
||||
Buffer = 0,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue