mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-18 01:14:56 +00:00
video_core: Preliminary storage image support and more (#188)
* vk_rasterizer: Clear depth buffer when DB_RENDER_CONTROL says so * video_core: Preliminary storage image support, more opcodes * renderer_vulkan: a fix for vertex buffers merging * renderer_vulkan: a heuristic for blend override when alpha out is masked --------- Co-authored-by: psucien <bad_cast@protonmail.com>
This commit is contained in:
parent
23f11a3fda
commit
7b1a317b09
30 changed files with 429 additions and 101 deletions
|
@ -5,6 +5,19 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
|
||||
const u8 offset0 = inst.control.ds.offset0;
|
||||
const u8 offset1 = inst.control.ds.offset1;
|
||||
const IR::U32 src{GetSrc(inst.src[1])};
|
||||
ASSERT(offset1 & 0x80);
|
||||
const IR::U32 lane_id = ir.LaneId();
|
||||
const IR::U32 id_in_group = ir.BitwiseAnd(lane_id, ir.Imm32(0b11));
|
||||
const IR::U32 base = ir.ShiftLeftLogical(id_in_group, ir.Imm32(1));
|
||||
const IR::U32 index =
|
||||
ir.IAdd(lane_id, ir.BitFieldExtract(ir.Imm32(offset0), base, ir.Imm32(2)));
|
||||
SetDst(inst.dst[0], ir.QuadShuffle(src, index));
|
||||
}
|
||||
|
||||
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst) {
|
||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
|
|
|
@ -75,9 +75,17 @@ void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
|||
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
|
||||
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
|
||||
// SGPR we have a special IR opcode for SPGRs that act as thread masks.
|
||||
ASSERT(inst.src[0].field == OperandField::VccLo);
|
||||
const IR::U1 exec{ir.GetExec()};
|
||||
const IR::U1 vcc{ir.GetVcc()};
|
||||
const IR::U1 src = [&] {
|
||||
switch (inst.src[0].field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
|
||||
// Mark destination SPGR as an EXEC context. This means we will use 1-bit
|
||||
// IR instruction whenever it's loaded.
|
||||
|
@ -96,7 +104,7 @@ void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
// Update EXEC.
|
||||
ir.SetExec(ir.LogicalAnd(exec, vcc));
|
||||
ir.SetExec(ir.LogicalAnd(exec, src));
|
||||
}
|
||||
|
||||
void Translator::S_MOV_B64(const GcnInst& inst) {
|
||||
|
@ -258,4 +266,11 @@ void Translator::S_LSHL_B32(const GcnInst& inst) {
|
|||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_BFM_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{ir.BitwiseAnd(GetSrc(inst.src[0]), ir.Imm32(0x1F))};
|
||||
const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))};
|
||||
const IR::U32 mask{ir.ISub(ir.ShiftLeftLogical(ir.Imm32(1u), src0), ir.Imm32(1))};
|
||||
SetDst(inst.dst[0], ir.ShiftLeftLogical(mask, src1));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -306,6 +306,15 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::IMAGE_SAMPLE:
|
||||
translator.IMAGE_SAMPLE(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_STORE:
|
||||
translator.IMAGE_STORE(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_LOAD_MIP:
|
||||
translator.IMAGE_LOAD_MIP(inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GE_I32:
|
||||
translator.V_CMP_U32(ConditionOp::GE, true, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_EQ_I32:
|
||||
translator.V_CMP_U32(ConditionOp::EQ, true, false, inst);
|
||||
break;
|
||||
|
@ -331,28 +340,31 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
translator.V_CMP_U32(ConditionOp::TRU, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NEQ_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LG, inst);
|
||||
translator.V_CMP_F32(ConditionOp::LG, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_F_F32:
|
||||
translator.V_CMP_F32(ConditionOp::F, inst);
|
||||
translator.V_CMP_F32(ConditionOp::F, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LT, inst);
|
||||
translator.V_CMP_F32(ConditionOp::LT, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_EQ_F32:
|
||||
translator.V_CMP_F32(ConditionOp::EQ, inst);
|
||||
translator.V_CMP_F32(ConditionOp::EQ, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LE, inst);
|
||||
translator.V_CMP_F32(ConditionOp::LE, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GT, inst);
|
||||
translator.V_CMP_F32(ConditionOp::GT, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LG_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LG, inst);
|
||||
translator.V_CMP_F32(ConditionOp::LG, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GE, inst);
|
||||
translator.V_CMP_F32(ConditionOp::GE, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NLE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GT, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LG_U32:
|
||||
translator.S_CMP(ConditionOp::LG, false, inst);
|
||||
|
@ -378,6 +390,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CNDMASK_B32:
|
||||
translator.V_CNDMASK_B32(inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
|
||||
translator.BUFFER_LOAD_FORMAT(3, true, inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZW:
|
||||
translator.BUFFER_LOAD_FORMAT(4, true, inst);
|
||||
break;
|
||||
|
@ -414,6 +429,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_MIN_F32:
|
||||
translator.V_MIN_F32(inst);
|
||||
break;
|
||||
case Opcode::V_MIN_I32:
|
||||
translator.V_MIN_I32(inst);
|
||||
break;
|
||||
case Opcode::V_MIN3_F32:
|
||||
translator.V_MIN3_F32(inst);
|
||||
break;
|
||||
|
@ -435,6 +453,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CVT_U32_F32:
|
||||
translator.V_CVT_U32_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_I32_F32:
|
||||
translator.V_CVT_I32_F32(inst);
|
||||
break;
|
||||
case Opcode::V_SUBREV_F32:
|
||||
translator.V_SUBREV_F32(inst);
|
||||
break;
|
||||
|
@ -447,12 +468,61 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_SUBREV_I32:
|
||||
translator.V_SUBREV_I32(inst);
|
||||
break;
|
||||
|
||||
case Opcode::V_CMPX_F_F32:
|
||||
translator.V_CMP_F32(ConditionOp::F, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_LT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LT, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_EQ_F32:
|
||||
translator.V_CMP_F32(ConditionOp::EQ, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_LE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LE, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_GT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GT, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_LG_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LG, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_GE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GE, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_NGE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LT, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_NLG_F32:
|
||||
translator.V_CMP_F32(ConditionOp::EQ, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_NGT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LE, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_NLE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GT, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_NEQ_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LG, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_NLT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GE, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_TRU_F32:
|
||||
translator.V_CMP_F32(ConditionOp::TRU, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LE_U32:
|
||||
translator.V_CMP_U32(ConditionOp::LE, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GT_I32:
|
||||
translator.V_CMP_U32(ConditionOp::GT, true, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LT_I32:
|
||||
translator.V_CMP_U32(ConditionOp::LT, true, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_LT_I32:
|
||||
translator.V_CMP_U32(ConditionOp::LT, true, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_F_U32:
|
||||
translator.V_CMP_U32(ConditionOp::F, false, true, inst);
|
||||
break;
|
||||
|
@ -540,6 +610,18 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_BCNT_U32_B32:
|
||||
translator.V_BCNT_U32_B32(inst);
|
||||
break;
|
||||
case Opcode::V_MAX3_F32:
|
||||
translator.V_MAX3_F32(inst);
|
||||
break;
|
||||
case Opcode::DS_SWIZZLE_B32:
|
||||
translator.DS_SWIZZLE_B32(inst);
|
||||
break;
|
||||
case Opcode::V_MUL_LO_U32:
|
||||
translator.V_MUL_LO_U32(inst);
|
||||
break;
|
||||
case Opcode::S_BFM_B32:
|
||||
translator.S_BFM_B32(inst);
|
||||
break;
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
|
|
|
@ -49,6 +49,7 @@ public:
|
|||
void S_CSELECT_B64(const GcnInst& inst);
|
||||
void S_BFE_U32(const GcnInst& inst);
|
||||
void S_LSHL_B32(const GcnInst& inst);
|
||||
void S_BFM_B32(const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||
|
@ -75,7 +76,7 @@ public:
|
|||
void V_SUB_F32(const GcnInst& inst);
|
||||
void V_RCP_F32(const GcnInst& inst);
|
||||
void V_FMA_F32(const GcnInst& inst);
|
||||
void V_CMP_F32(ConditionOp op, const GcnInst& inst);
|
||||
void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst);
|
||||
void V_MAX_F32(const GcnInst& inst);
|
||||
void V_RSQ_F32(const GcnInst& inst);
|
||||
void V_SIN_F32(const GcnInst& inst);
|
||||
|
@ -106,6 +107,10 @@ public:
|
|||
void V_RNDNE_F32(const GcnInst& inst);
|
||||
void V_BCNT_U32_B32(const GcnInst& inst);
|
||||
void V_COS_F32(const GcnInst& inst);
|
||||
void V_MAX3_F32(const GcnInst& inst);
|
||||
void V_CVT_I32_F32(const GcnInst& inst);
|
||||
void V_MIN_I32(const GcnInst& inst);
|
||||
void V_MUL_LO_U32(const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
|
@ -115,12 +120,15 @@ public:
|
|||
void V_INTERP_P2_F32(const GcnInst& inst);
|
||||
|
||||
// Data share
|
||||
void DS_SWIZZLE_B32(const GcnInst& inst);
|
||||
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
|
||||
|
||||
// MIMG
|
||||
void IMAGE_GET_RESINFO(const GcnInst& inst);
|
||||
void IMAGE_SAMPLE(const GcnInst& inst);
|
||||
void IMAGE_STORE(const GcnInst& inst);
|
||||
void IMAGE_LOAD_MIP(const GcnInst& inst);
|
||||
|
||||
// Export
|
||||
void EXP(const GcnInst& inst);
|
||||
|
|
|
@ -20,7 +20,8 @@ void Translator::V_MAC_F32(const GcnInst& inst) {
|
|||
|
||||
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
const IR::Value vec_f32 = ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1]));
|
||||
const IR::Value vec_f32 =
|
||||
ir.CompositeConstruct(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true));
|
||||
ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32));
|
||||
}
|
||||
|
||||
|
@ -143,7 +144,7 @@ void Translator::V_FMA_F32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
||||
}
|
||||
|
||||
void Translator::V_CMP_F32(ConditionOp op, const GcnInst& inst) {
|
||||
void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::U1 result = [&] {
|
||||
|
@ -166,6 +167,9 @@ void Translator::V_CMP_F32(ConditionOp op, const GcnInst& inst) {
|
|||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
if (set_exec) {
|
||||
ir.SetExec(result);
|
||||
}
|
||||
|
||||
switch (inst.dst[1].field) {
|
||||
case OperandField::VccLo:
|
||||
|
@ -382,4 +386,28 @@ void Translator::V_COS_F32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPCos(src0));
|
||||
}
|
||||
|
||||
void Translator::V_MAX3_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
||||
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_I32_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
SetDst(inst.dst[0], ir.ConvertFToS(32, src0));
|
||||
}
|
||||
|
||||
void Translator::V_MIN_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.SMin(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_MUL_LO_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IMul(src0, src1));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -107,6 +107,48 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_LOAD_MIP(const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
IR::VectorReg dest_reg{inst.dst[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
|
||||
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value body =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||
|
||||
IR::TextureInstInfo info{};
|
||||
info.explicit_lod.Assign(1);
|
||||
const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info);
|
||||
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
if (((mimg.dmask >> i) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
|
||||
ir.SetVectorReg(dest_reg++, value);
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::IMAGE_STORE(const GcnInst& inst) {
|
||||
const auto& mimg = inst.control.mimg;
|
||||
IR::VectorReg addr_reg{inst.src[0].code};
|
||||
IR::VectorReg data_reg{inst.dst[0].code};
|
||||
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||
|
||||
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
|
||||
const IR::Value body =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
|
||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||
|
||||
ASSERT(mimg.dmask == 0xF);
|
||||
const IR::Value value = ir.CompositeConstruct(
|
||||
ir.GetVectorReg<IR::F32>(data_reg), ir.GetVectorReg<IR::F32>(data_reg + 1),
|
||||
ir.GetVectorReg<IR::F32>(data_reg + 2), ir.GetVectorReg<IR::F32>(data_reg + 3));
|
||||
ir.ImageWrite(handle, body, value, {});
|
||||
}
|
||||
|
||||
void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue