mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-18 17:34:52 +00:00
shader_recompiler: BUFFER_ATOMIC & DS_* Opcodes (#428)
* BUFFER_ATOMIC | DS_MINMAX_U32 - Emission of BufferAtomicU32 - Addition of Buffer opcodes to IR - Translator for BUFFER_ATOMIC Opcode - Translators for DS_MAXMIN_U32 Opcodes * Clang Format & UNREACHABLE_MSG * clang * no crash on compile * clang * Shared Atomics * reuse * rm vscode * resolve * opcodes * side effects * attempt fix shader comp * failed attempt to fix * clang * do correct vdata set (still fails) * clang * fixed BUFFER_ATOMIC_ADD, DS_ADD_U32 fails * data share should work * clang * resource tracking for buffer atomic * clang * distinguish RTN opcodes * clean IsBufferInstruction --------- Co-authored-by: microsoftv <6063922+microsoftv@users.noreply.github.com>
This commit is contained in:
parent
3be2e4b2b8
commit
63938ba8dd
11 changed files with 375 additions and 27 deletions
|
@ -25,6 +25,18 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||
return DS_WRITE(32, false, true, inst);
|
||||
case Opcode::DS_WRITE2_B64:
|
||||
return DS_WRITE(64, false, true, inst);
|
||||
case Opcode::DS_ADD_U32:
|
||||
return DS_ADD_U32(inst, false);
|
||||
case Opcode::DS_MIN_U32:
|
||||
return DS_MIN_U32(inst, false);
|
||||
case Opcode::DS_MAX_U32:
|
||||
return DS_MAX_U32(inst, false);
|
||||
case Opcode::DS_ADD_RTN_U32:
|
||||
return DS_ADD_U32(inst, true);
|
||||
case Opcode::DS_MIN_RTN_U32:
|
||||
return DS_MIN_U32(inst, true);
|
||||
case Opcode::DS_MAX_RTN_U32:
|
||||
return DS_MAX_U32(inst, true);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
@ -110,6 +122,42 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_MIN_U32(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_MAX_U32(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::S_BARRIER() {
|
||||
ir.Barrier();
|
||||
}
|
||||
|
|
|
@ -187,6 +187,7 @@ public:
|
|||
// Vector Memory
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
|
||||
void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
|
||||
void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst);
|
||||
|
||||
// Vector interpolation
|
||||
void V_INTERP_P2_F32(const GcnInst& inst);
|
||||
|
@ -196,6 +197,9 @@ public:
|
|||
void DS_SWIZZLE_B32(const GcnInst& inst);
|
||||
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||
void DS_ADD_U32(const GcnInst& inst, bool rtn);
|
||||
void DS_MIN_U32(const GcnInst& inst, bool rtn);
|
||||
void DS_MAX_U32(const GcnInst& inst, bool rtn);
|
||||
void V_READFIRSTLANE_B32(const GcnInst& inst);
|
||||
void V_READLANE_B32(const GcnInst& inst);
|
||||
void V_WRITELANE_B32(const GcnInst& inst);
|
||||
|
|
|
@ -104,6 +104,10 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||
return BUFFER_STORE_FORMAT(3, false, false, inst);
|
||||
case Opcode::BUFFER_STORE_DWORDX4:
|
||||
return BUFFER_STORE_FORMAT(4, false, false, inst);
|
||||
|
||||
// Buffer atomic operations
|
||||
case Opcode::BUFFER_ATOMIC_ADD:
|
||||
return BUFFER_ATOMIC(AtomicOp::Add, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
@ -435,6 +439,60 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, bool is_form
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: U64
|
||||
void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
||||
const auto& mubuf = inst.control.mubuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::VectorReg vdata{inst.src[1].code};
|
||||
const IR::ScalarReg srsrc{inst.src[2].code * 4};
|
||||
const IR::U32 soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mubuf.idxen);
|
||||
info.inst_offset.Assign(mubuf.offset);
|
||||
info.offset_enable.Assign(mubuf.offen);
|
||||
|
||||
IR::Value vdata_val = ir.GetVectorReg<Shader::IR::U32>(vdata);
|
||||
const IR::U32 address = ir.GetVectorReg(vaddr);
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1),
|
||||
ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3));
|
||||
|
||||
const IR::Value original_val = [&] {
|
||||
switch (op) {
|
||||
case AtomicOp::Swap:
|
||||
return ir.BufferAtomicExchange(handle, address, vdata_val, info);
|
||||
case AtomicOp::Add:
|
||||
return ir.BufferAtomicIAdd(handle, address, vdata_val, info);
|
||||
case AtomicOp::Smin:
|
||||
return ir.BufferAtomicIMin(handle, address, vdata_val, true, info);
|
||||
case AtomicOp::Umin:
|
||||
return ir.BufferAtomicIMin(handle, address, vdata_val, false, info);
|
||||
case AtomicOp::Smax:
|
||||
return ir.BufferAtomicIMax(handle, address, vdata_val, true, info);
|
||||
case AtomicOp::Umax:
|
||||
return ir.BufferAtomicIMax(handle, address, vdata_val, false, info);
|
||||
case AtomicOp::And:
|
||||
return ir.BufferAtomicAnd(handle, address, vdata_val, info);
|
||||
case AtomicOp::Or:
|
||||
return ir.BufferAtomicOr(handle, address, vdata_val, info);
|
||||
case AtomicOp::Xor:
|
||||
return ir.BufferAtomicXor(handle, address, vdata_val, info);
|
||||
case AtomicOp::Inc:
|
||||
return ir.BufferAtomicInc(handle, address, vdata_val, info);
|
||||
case AtomicOp::Dec:
|
||||
return ir.BufferAtomicDec(handle, address, vdata_val, info);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
|
||||
if (mubuf.glc) {
|
||||
ir.SetVectorReg(vdata, IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue