shader_recompiler: BUFFER_ATOMIC & DS_* Opcodes (#428)

* BUFFER_ATOMIC | DS_MINMAX_U32

- Emission of BufferAtomicU32
- Addition of Buffer opcodes to IR
- Translator for BUFFER_ATOMIC Opcode
- Translators for DS_MAXMIN_U32 Opcodes

* Clang Format & UNREACHABLE_MSG

* clang

* no crash on compile

* clang

* Shared Atomics

* reuse

* rm vscode

* resolve

* opcodes

* side effects

* attempt fix shader comp

* failed attempt to fix

* clang

* do correct vdata set (still fails)

* clang

* fixed BUFFER_ATOMIC_ADD, DS_ADD_U32 fails

* data share should work

* clang

* resource tracking for buffer atomic

* clang

* distinguish RTN opcodes

* clean IsBufferInstruction

---------

Co-authored-by: microsoftv <6063922+microsoftv@users.noreply.github.com>
This commit is contained in:
Lizardy 2024-08-17 15:06:06 -04:00 committed by GitHub
parent 3be2e4b2b8
commit 63938ba8dd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 375 additions and 27 deletions

View file

@ -25,6 +25,18 @@ void Translator::EmitDataShare(const GcnInst& inst) {
return DS_WRITE(32, false, true, inst);
case Opcode::DS_WRITE2_B64:
return DS_WRITE(64, false, true, inst);
case Opcode::DS_ADD_U32:
return DS_ADD_U32(inst, false);
case Opcode::DS_MIN_U32:
return DS_MIN_U32(inst, false);
case Opcode::DS_MAX_U32:
return DS_MAX_U32(inst, false);
case Opcode::DS_ADD_RTN_U32:
return DS_ADD_U32(inst, true);
case Opcode::DS_MIN_RTN_U32:
return DS_MIN_U32(inst, true);
case Opcode::DS_MAX_RTN_U32:
return DS_MAX_U32(inst, true);
default:
LogMissingOpcode(inst);
}
@ -110,6 +122,42 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
}
}
void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_MIN_U32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::DS_MAX_U32(const GcnInst& inst, bool rtn) {
const IR::U32 addr{GetSrc(inst.src[0])};
const IR::U32 data{GetSrc(inst.src[1])};
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
const IR::U32 addr_offset = ir.IAdd(addr, offset);
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false);
if (rtn) {
SetDst(inst.dst[0], IR::U32{original_val});
}
}
void Translator::S_BARRIER() {
ir.Barrier();
}

View file

@ -187,6 +187,7 @@ public:
// Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst);
// Vector interpolation
void V_INTERP_P2_F32(const GcnInst& inst);
@ -196,6 +197,9 @@ public:
void DS_SWIZZLE_B32(const GcnInst& inst);
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void DS_ADD_U32(const GcnInst& inst, bool rtn);
void DS_MIN_U32(const GcnInst& inst, bool rtn);
void DS_MAX_U32(const GcnInst& inst, bool rtn);
void V_READFIRSTLANE_B32(const GcnInst& inst);
void V_READLANE_B32(const GcnInst& inst);
void V_WRITELANE_B32(const GcnInst& inst);

View file

@ -104,6 +104,10 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return BUFFER_STORE_FORMAT(3, false, false, inst);
case Opcode::BUFFER_STORE_DWORDX4:
return BUFFER_STORE_FORMAT(4, false, false, inst);
// Buffer atomic operations
case Opcode::BUFFER_ATOMIC_ADD:
return BUFFER_ATOMIC(AtomicOp::Add, inst);
default:
LogMissingOpcode(inst);
}
@ -435,6 +439,60 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_form
}
}
// TODO: U64
void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
const auto& mubuf = inst.control.mubuf;
const IR::VectorReg vaddr{inst.src[0].code};
const IR::VectorReg vdata{inst.src[1].code};
const IR::ScalarReg srsrc{inst.src[2].code * 4};
const IR::U32 soffset{GetSrc(inst.src[3])};
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
IR::BufferInstInfo info{};
info.index_enable.Assign(mubuf.idxen);
info.inst_offset.Assign(mubuf.offset);
info.offset_enable.Assign(mubuf.offen);
IR::Value vdata_val = ir.GetVectorReg<Shader::IR::U32>(vdata);
const IR::U32 address = ir.GetVectorReg(vaddr);
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1),
ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3));
const IR::Value original_val = [&] {
switch (op) {
case AtomicOp::Swap:
return ir.BufferAtomicExchange(handle, address, vdata_val, info);
case AtomicOp::Add:
return ir.BufferAtomicIAdd(handle, address, vdata_val, info);
case AtomicOp::Smin:
return ir.BufferAtomicIMin(handle, address, vdata_val, true, info);
case AtomicOp::Umin:
return ir.BufferAtomicIMin(handle, address, vdata_val, false, info);
case AtomicOp::Smax:
return ir.BufferAtomicIMax(handle, address, vdata_val, true, info);
case AtomicOp::Umax:
return ir.BufferAtomicIMax(handle, address, vdata_val, false, info);
case AtomicOp::And:
return ir.BufferAtomicAnd(handle, address, vdata_val, info);
case AtomicOp::Or:
return ir.BufferAtomicOr(handle, address, vdata_val, info);
case AtomicOp::Xor:
return ir.BufferAtomicXor(handle, address, vdata_val, info);
case AtomicOp::Inc:
return ir.BufferAtomicInc(handle, address, vdata_val, info);
case AtomicOp::Dec:
return ir.BufferAtomicDec(handle, address, vdata_val, info);
default:
UNREACHABLE();
}
}();
if (mubuf.glc) {
ir.SetVectorReg(vdata, IR::U32{original_val});
}
}
void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
const auto& mimg = inst.control.mimg;
IR::VectorReg dst_reg{inst.dst[0].code};