mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-07 09:36:21 +00:00
shader_recompiler: Implement data share append and consume operations (#814)
* shader_recompiler: Add more format swap modes * texture_cache: Handle stencil texture reads * emulator: Support loading font library * readme: Add thanks section * shader_recompiler: Constant buffers as integers * shader_recompiler: Typed buffers as integers * shader_recompiler: Separate thread bit scalars * We can assume guest shader never mixes them with normal sgprs. This helps avoid errors where ssa could view an sgpr write dominating a thread bit read, due to how control flow is structurized, even though its not possible in actual control flow * shader_recompiler: Implement data append/consume operations * clang format * buffer_cache: Simplify invalidation scheme * video_core: Remove some invalidation remnants * adjust
This commit is contained in:
parent
649527a235
commit
13743b27fc
34 changed files with 512 additions and 272 deletions
|
@ -43,6 +43,10 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||
return DS_MIN_U32(inst, false, true);
|
||||
case Opcode::DS_MAX_RTN_U32:
|
||||
return DS_MAX_U32(inst, false, true);
|
||||
case Opcode::DS_APPEND:
|
||||
return DS_APPEND(inst);
|
||||
case Opcode::DS_CONSUME:
|
||||
return DS_CONSUME(inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
@ -192,4 +196,18 @@ void Translator::V_WRITELANE_B32(const GcnInst& inst) {
|
|||
ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane));
|
||||
}
|
||||
|
||||
void Translator::DS_APPEND(const GcnInst& inst) {
|
||||
const u32 inst_offset = inst.control.ds.offset0;
|
||||
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
|
||||
const IR::U32 prev = ir.DataAppend(gds_offset);
|
||||
SetDst(inst.dst[0], prev);
|
||||
}
|
||||
|
||||
void Translator::DS_CONSUME(const GcnInst& inst) {
|
||||
const u32 inst_offset = inst.control.ds.offset0;
|
||||
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
|
||||
const IR::U32 prev = ir.DataConsume(gds_offset);
|
||||
SetDst(inst.dst[0], prev);
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -31,6 +31,12 @@ void Translator::EmitExport(const GcnInst& inst) {
|
|||
case MrtSwizzle::Alt:
|
||||
static constexpr std::array<u32, 4> AltSwizzle = {2, 1, 0, 3};
|
||||
return AltSwizzle[comp];
|
||||
case MrtSwizzle::Reverse:
|
||||
static constexpr std::array<u32, 4> RevSwizzle = {3, 2, 1, 0};
|
||||
return RevSwizzle[comp];
|
||||
case MrtSwizzle::ReverseAlt:
|
||||
static constexpr std::array<u32, 4> AltRevSwizzle = {3, 0, 1, 2};
|
||||
return AltRevSwizzle[comp];
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
|
|
@ -73,9 +73,13 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
|||
case Opcode::S_SUB_I32:
|
||||
return S_SUB_U32(inst);
|
||||
case Opcode::S_MIN_U32:
|
||||
return S_MIN_U32(inst);
|
||||
return S_MIN_U32(false, inst);
|
||||
case Opcode::S_MIN_I32:
|
||||
return S_MIN_U32(true, inst);
|
||||
case Opcode::S_MAX_U32:
|
||||
return S_MAX_U32(inst);
|
||||
return S_MAX_U32(false, inst);
|
||||
case Opcode::S_MAX_I32:
|
||||
return S_MAX_U32(true, inst);
|
||||
case Opcode::S_WQM_B64:
|
||||
break;
|
||||
default:
|
||||
|
@ -533,18 +537,18 @@ void Translator::S_ADDC_U32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry));
|
||||
}
|
||||
|
||||
void Translator::S_MAX_U32(const GcnInst& inst) {
|
||||
void Translator::S_MAX_U32(bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result = ir.UMax(src0, src1);
|
||||
const IR::U32 result = ir.IMax(src0, src1, is_signed);
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.IEqual(result, src0));
|
||||
}
|
||||
|
||||
void Translator::S_MIN_U32(const GcnInst& inst) {
|
||||
void Translator::S_MIN_U32(bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result = ir.UMin(src0, src1);
|
||||
const IR::U32 result = ir.IMin(src0, src1, is_signed);
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.IEqual(result, src0));
|
||||
}
|
||||
|
|
|
@ -101,8 +101,8 @@ public:
|
|||
void S_ADDC_U32(const GcnInst& inst);
|
||||
void S_MULK_I32(const GcnInst& inst);
|
||||
void S_ADDK_I32(const GcnInst& inst);
|
||||
void S_MAX_U32(const GcnInst& inst);
|
||||
void S_MIN_U32(const GcnInst& inst);
|
||||
void S_MAX_U32(bool is_signed, const GcnInst& inst);
|
||||
void S_MIN_U32(bool is_signed, const GcnInst& inst);
|
||||
void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
|
@ -173,7 +173,7 @@ public:
|
|||
void V_BCNT_U32_B32(const GcnInst& inst);
|
||||
void V_COS_F32(const GcnInst& inst);
|
||||
void V_MAX3_F32(const GcnInst& inst);
|
||||
void V_MAX3_U32(const GcnInst& inst);
|
||||
void V_MAX3_U32(bool is_signed, const GcnInst& inst);
|
||||
void V_CVT_I32_F32(const GcnInst& inst);
|
||||
void V_MIN_I32(const GcnInst& inst);
|
||||
void V_MUL_LO_U32(const GcnInst& inst);
|
||||
|
@ -217,6 +217,8 @@ public:
|
|||
void V_READFIRSTLANE_B32(const GcnInst& inst);
|
||||
void V_READLANE_B32(const GcnInst& inst);
|
||||
void V_WRITELANE_B32(const GcnInst& inst);
|
||||
void DS_APPEND(const GcnInst& inst);
|
||||
void DS_CONSUME(const GcnInst& inst);
|
||||
void S_BARRIER();
|
||||
|
||||
// MIMG
|
||||
|
|
|
@ -227,7 +227,9 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||
case Opcode::V_MAX3_F32:
|
||||
return V_MAX3_F32(inst);
|
||||
case Opcode::V_MAX3_U32:
|
||||
return V_MAX3_U32(inst);
|
||||
return V_MAX3_U32(false, inst);
|
||||
case Opcode::V_MAX3_I32:
|
||||
return V_MAX_U32(true, inst);
|
||||
case Opcode::V_TRUNC_F32:
|
||||
return V_TRUNC_F32(inst);
|
||||
case Opcode::V_CEIL_F32:
|
||||
|
@ -831,11 +833,11 @@ void Translator::V_MAX3_F32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
|
||||
}
|
||||
|
||||
void Translator::V_MAX3_U32(const GcnInst& inst) {
|
||||
void Translator::V_MAX3_U32(bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 src2{GetSrc(inst.src[2])};
|
||||
SetDst(inst.dst[0], ir.UMax(src0, ir.UMax(src1, src2)));
|
||||
SetDst(inst.dst[0], ir.IMax(src0, ir.IMax(src1, src2, is_signed), is_signed));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_I32_F32(const GcnInst& inst) {
|
||||
|
@ -967,14 +969,29 @@ void Translator::V_FFBL_B32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
if (!is_low) {
|
||||
ASSERT(src0.IsImmediate() && src0.U32() == ~0U && src1.IsImmediate() && src1.U32() == 0U);
|
||||
return;
|
||||
// v_mbcnt_hi_u32_b32 v2, -1, 0
|
||||
if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193 &&
|
||||
inst.src[1].field == OperandField::ConstZero) {
|
||||
return;
|
||||
}
|
||||
// v_mbcnt_hi_u32_b32 vX, exec_hi, 0
|
||||
if (inst.src[0].field == OperandField::ExecHi &&
|
||||
inst.src[1].field == OperandField::ConstZero) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// v_mbcnt_lo_u32_b32 v2, -1, vX
|
||||
// used combined with above to fetch lane id in non-compute stages
|
||||
if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193) {
|
||||
SetDst(inst.dst[0], ir.LaneId());
|
||||
}
|
||||
// v_mbcnt_lo_u32_b32 v20, exec_lo, vX
|
||||
// used combined in above for append buffer indexing.
|
||||
if (inst.src[0].field == OperandField::ExecLo) {
|
||||
SetDst(inst.dst[0], ir.Imm32(0));
|
||||
}
|
||||
}
|
||||
ASSERT(src0.IsImmediate() && src0.U32() == ~0U);
|
||||
SetDst(inst.dst[0], ir.LaneId());
|
||||
}
|
||||
|
||||
void Translator::V_BFM_B32(const GcnInst& inst) {
|
||||
|
|
|
@ -147,10 +147,6 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
|
|||
|
||||
void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
if (mimg.da) {
|
||||
LOG_WARNING(Render_Vulkan, "Image instruction declares an array");
|
||||
}
|
||||
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
IR::VectorReg dest_reg{inst.dst[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
|
@ -388,11 +384,11 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
|
|||
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
if (num_dwords == 1) {
|
||||
ir.SetVectorReg(dst_reg, IR::F32{value});
|
||||
ir.SetVectorReg(dst_reg, IR::U32{value});
|
||||
return;
|
||||
}
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)});
|
||||
ir.SetVectorReg(dst_reg + i, IR::U32{ir.CompositeExtract(value, i)});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -456,21 +452,18 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
|||
const IR::VectorReg src_reg{inst.src[1].code};
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
value = ir.GetVectorReg<IR::F32>(src_reg);
|
||||
value = ir.GetVectorReg(src_reg);
|
||||
break;
|
||||
case 2:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg<IR::F32>(src_reg),
|
||||
ir.GetVectorReg<IR::F32>(src_reg + 1));
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1));
|
||||
break;
|
||||
case 3:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg<IR::F32>(src_reg),
|
||||
ir.GetVectorReg<IR::F32>(src_reg + 1),
|
||||
ir.GetVectorReg<IR::F32>(src_reg + 2));
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
|
||||
ir.GetVectorReg(src_reg + 2));
|
||||
break;
|
||||
case 4:
|
||||
value = ir.CompositeConstruct(
|
||||
ir.GetVectorReg<IR::F32>(src_reg), ir.GetVectorReg<IR::F32>(src_reg + 1),
|
||||
ir.GetVectorReg<IR::F32>(src_reg + 2), ir.GetVectorReg<IR::F32>(src_reg + 3));
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
|
||||
ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3));
|
||||
break;
|
||||
}
|
||||
const IR::Value handle =
|
||||
|
@ -518,6 +511,15 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
|||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::VectorReg vdata{inst.src[1].code};
|
||||
const IR::ScalarReg srsrc{inst.src[2].code * 4};
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mubuf.idxen && mubuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
if (mubuf.idxen || mubuf.offen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::U32 soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
|
@ -527,7 +529,6 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
|||
info.offset_enable.Assign(mubuf.offen);
|
||||
|
||||
IR::Value vdata_val = ir.GetVectorReg<Shader::IR::U32>(vdata);
|
||||
const IR::U32 address = ir.GetVectorReg(vaddr);
|
||||
const IR::Value handle =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1),
|
||||
ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue