shader_recompiler: Remove special case buffers and add support for aliasing (#2428)

* shader_recompiler: Move shared mem lowering into emitter

* IR can be quite verbose during first stages of translation, before ssa and constant prop passes have run that drastically simplify it. This lowering can also be done during emission so why not do it then to save some compilation time

* runtime_info: Pack PsColorBuffer into 8 bytes

* Drops the size of the total structure by half from 396 to 204 bytes. Also should make comparison of the array a bit faster, since its a hot path done every draw

* emit_spirv_context: Add infrastructure for buffer aliases

* Splits out the buffer creation function so it can be reused when defining multiple type aliases

* shader_recompiler: Merge srt_flatbuf into buffers list

* Its no longer a special case, yay

* shader_recompiler: Complete buffer aliasing support

* Add a bunch more types into buffers, such as F32 for float reads/writes and 8/16 bit integer types for formatted buffers

* shader_recompiler: Remove existing shared memory emulation

* The current impl relies on backend side implementaton and hooking into every shared memory access. It also doesnt handle atomics. Will be replaced by an IR pass that solves these issues

* shader_recompiler: Reintroduce shared memory on ssbo emulation

* Now it is performed with an IR pass, and combined with the previous commit cleanup, is fully transparent from the backend, other than requiring workgroup_index be provided as an attribute (computing this on every shared memory access is gonna be too verbose

* clang format

* buffer_cache: Reduce buffer sizes

* vk_rasterizer: Cleanup resource binding code

* Reduce noise in the functions, also remove some arguments which are class members

* Fix gcc
This commit is contained in:
TheTurtle 2025-02-15 14:06:56 +02:00 committed by GitHub
parent 290e127a4f
commit 82cacec8eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 675 additions and 625 deletions

View file

@ -78,7 +78,20 @@ bool IsDataRingInstruction(const IR::Inst& inst) {
}
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
return IR::Type::U32;
switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferU8:
case IR::Opcode::StoreBufferU8:
return IR::Type::U8;
case IR::Opcode::LoadBufferU16:
case IR::Opcode::StoreBufferU16:
return IR::Type::U16;
case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32:
// Formatted buffer loads can use a variety of types.
return IR::Type::U32 | IR::Type::F32 | IR::Type::U16 | IR::Type::U8;
default:
return IR::Type::U32;
}
}
bool IsImageAtomicInstruction(const IR::Inst& inst) {
@ -121,11 +134,9 @@ public:
u32 Add(const BufferResource& desc) {
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
// Only one GDS binding can exist.
if (desc.is_gds_buffer && existing.is_gds_buffer) {
return true;
}
return desc.sharp_idx == existing.sharp_idx && desc.inline_cbuf == existing.inline_cbuf;
return desc.sharp_idx == existing.sharp_idx &&
desc.inline_cbuf == existing.inline_cbuf &&
desc.buffer_type == existing.buffer_type;
})};
auto& buffer = buffer_resources[index];
buffer.used_types |= desc.used_types;
@ -272,6 +283,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
.sharp_idx = std::numeric_limits<u32>::max(),
.used_types = BufferDataType(inst, cbuf.GetNumberFmt()),
.inline_cbuf = cbuf,
.buffer_type = BufferType::Guest,
});
}
@ -286,6 +298,7 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
binding = descriptors.Add(BufferResource{
.sharp_idx = sharp,
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
.buffer_type = BufferType::Guest,
.is_written = IsBufferStore(inst),
.is_formatted = inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 ||
inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
@ -402,13 +415,10 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
}
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
// Insert gds binding in the shader if it doesn't exist already.
// The buffer is used for append/consume counters.
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
const u32 binding = descriptors.Add(BufferResource{
.used_types = IR::Type::U32,
.inline_cbuf = GdsSharp,
.is_gds_buffer = true,
.inline_cbuf = AmdGpu::Buffer::Null(),
.buffer_type = BufferType::GdsBuffer,
.is_written = true,
});
@ -420,12 +430,12 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto
};
// Attempt to deduce the GDS address of counter at compile time.
const u32 gds_addr = [&] {
const IR::Value& gds_offset = inst.Arg(0);
if (gds_offset.IsImmediate()) {
// Nothing to do, offset is known.
return gds_offset.U32() & 0xFFFF;
}
u32 gds_addr = 0;
const IR::Value& gds_offset = inst.Arg(0);
if (gds_offset.IsImmediate()) {
// Nothing to do, offset is known.
gds_addr = gds_offset.U32() & 0xFFFF;
} else {
const auto result = IR::BreadthFirstSearch(&inst, pred);
ASSERT_MSG(result, "Unable to track M0 source");
@ -436,8 +446,8 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
m0_val += prod->Arg(1).U32();
}
return m0_val & 0xFFFF;
}();
gds_addr = m0_val & 0xFFFF;
}
// Patch instruction.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};