mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-05-17 17:05:02 +00:00
shader_recompiler: Better branch detection + more opcodes
This commit is contained in:
parent
f624f7749c
commit
02a50265f8
31 changed files with 772 additions and 120 deletions
|
@ -38,8 +38,145 @@ void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::S_ANDN2_B64(const GcnInst& inst) {
|
||||
// TODO: Actually implement this.
|
||||
ir.SetScc(ir.GetVcc());
|
||||
// TODO: What if this is used for something other than EXEC masking?
|
||||
const auto get_src = [&](const InstOperand& operand) {
|
||||
switch (operand.field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ExecLo:
|
||||
return ir.GetExec();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
};
|
||||
|
||||
const IR::U1 src0{get_src(inst.src[0])};
|
||||
const IR::U1 src1{get_src(inst.src[1])};
|
||||
const IR::U1 result{ir.LogicalAnd(src0, ir.LogicalNot(src1))};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(result);
|
||||
}
|
||||
|
||||
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
|
||||
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
|
||||
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
|
||||
// SGPR we have a special IR opcode for SPGRs that act as thread masks.
|
||||
const IR::U1 exec{ir.GetExec()};
|
||||
|
||||
// Mark destination SPGR as an EXEC context. This means we will use 1-bit
|
||||
// IR instruction whenever it's loaded.
|
||||
ASSERT(inst.dst[0].field == OperandField::ScalarGPR);
|
||||
const u32 reg = inst.dst[0].code;
|
||||
exec_contexts[reg] = true;
|
||||
ir.SetThreadBitScalarReg(IR::ScalarReg(reg), exec);
|
||||
|
||||
// Update EXEC.
|
||||
ASSERT(inst.src[0].field == OperandField::VccLo);
|
||||
ir.SetExec(ir.LogicalAnd(exec, ir.GetVcc()));
|
||||
}
|
||||
|
||||
void Translator::S_MOV_B64(const GcnInst& inst) {
|
||||
// TODO: Using VCC as EXEC context.
|
||||
if (inst.src[0].field == OperandField::VccLo || inst.dst[0].field == OperandField::VccLo) {
|
||||
return;
|
||||
}
|
||||
const IR::U1 src0{GetSrc(inst.src[0])};
|
||||
if (inst.dst[0].field == OperandField::ScalarGPR && inst.src[0].field == OperandField::ExecLo) {
|
||||
// Exec context push
|
||||
exec_contexts[inst.dst[0].code] = true;
|
||||
} else if (inst.dst[0].field == OperandField::ExecLo &&
|
||||
inst.src[0].field == OperandField::ScalarGPR) {
|
||||
// Exec context pop
|
||||
exec_contexts[inst.src[0].code] = false;
|
||||
} else if (inst.src[0].field != OperandField::ConstZero) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
SetDst(inst.dst[0], src0);
|
||||
}
|
||||
|
||||
void Translator::S_OR_B64(bool negate, const GcnInst& inst) {
|
||||
const auto get_src = [&](const InstOperand& operand) {
|
||||
switch (operand.field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
};
|
||||
|
||||
const IR::U1 src0{get_src(inst.src[0])};
|
||||
const IR::U1 src1{get_src(inst.src[1])};
|
||||
IR::U1 result = ir.LogicalOr(src0, src1);
|
||||
if (negate) {
|
||||
result = ir.LogicalNot(result);
|
||||
}
|
||||
ASSERT(inst.dst[0].field == OperandField::VccLo);
|
||||
ir.SetVcc(result);
|
||||
ir.SetScc(result);
|
||||
}
|
||||
|
||||
void Translator::S_AND_B64(const GcnInst& inst) {
|
||||
const auto get_src = [&](const InstOperand& operand) {
|
||||
switch (operand.field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.GetVcc();
|
||||
case OperandField::ExecLo:
|
||||
return ir.GetExec();
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
};
|
||||
const IR::U1 src0{get_src(inst.src[0])};
|
||||
const IR::U1 src1{get_src(inst.src[1])};
|
||||
const IR::U1 result = ir.LogicalAnd(src0, src1);
|
||||
ASSERT(inst.dst[0].field == OperandField::VccLo);
|
||||
ir.SetVcc(result);
|
||||
ir.SetScc(result);
|
||||
}
|
||||
|
||||
void Translator::S_ADD_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IAdd(src0, src1));
|
||||
// TODO: Overflow flag
|
||||
}
|
||||
|
||||
void Translator::S_AND_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result{ir.BitwiseAnd(src0, src1)};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_LSHR_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 result{ir.ShiftRightLogical(src0, src1)};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
void Translator::S_CSELECT_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], IR::U32{ir.Select(ir.GetScc(), src0, src1)});
|
||||
}
|
||||
|
||||
void Translator::S_BFE_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))};
|
||||
const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))};
|
||||
const IR::U32 result{ir.BitFieldExtract(src0, offset, count)};
|
||||
SetDst(inst.dst[0], result);
|
||||
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -9,11 +9,15 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
std::array<bool, IR::NumScalarRegs> Translator::exec_contexts{};
|
||||
|
||||
Translator::Translator(IR::Block* block_, Info& info_)
|
||||
: ir{*block_, block_->begin()}, info{info_} {}
|
||||
|
||||
void Translator::EmitPrologue() {
|
||||
exec_contexts.fill(false);
|
||||
ir.Prologue();
|
||||
ir.SetExec(ir.Imm1(true));
|
||||
|
||||
// Initialize user data.
|
||||
IR::ScalarReg dst_sreg = IR::ScalarReg::S0;
|
||||
|
@ -54,10 +58,16 @@ void Translator::EmitPrologue() {
|
|||
}
|
||||
}
|
||||
|
||||
IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||
IR::U32F32 value{};
|
||||
IR::U1U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||
// Input modifiers work on float values.
|
||||
force_flt |= operand.input_modifier.abs | operand.input_modifier.neg;
|
||||
|
||||
IR::U1U32F32 value{};
|
||||
switch (operand.field) {
|
||||
case OperandField::ScalarGPR:
|
||||
if (exec_contexts[operand.code]) {
|
||||
value = ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
|
||||
}
|
||||
if (operand.type == ScalarType::Float32 || force_flt) {
|
||||
value = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
||||
} else {
|
||||
|
@ -114,9 +124,15 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
|||
case OperandField::ConstFloatNeg_2_0:
|
||||
value = ir.Imm32(-2.0f);
|
||||
break;
|
||||
case OperandField::ExecLo:
|
||||
value = ir.GetExec();
|
||||
break;
|
||||
case OperandField::VccLo:
|
||||
value = ir.GetVccLo();
|
||||
break;
|
||||
case OperandField::VccHi:
|
||||
value = ir.GetVccHi();
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -130,8 +146,8 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
|||
return value;
|
||||
}
|
||||
|
||||
void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
|
||||
IR::U32F32 result = value;
|
||||
void Translator::SetDst(const InstOperand& operand, const IR::U1U32F32& value) {
|
||||
IR::U1U32F32 result = value;
|
||||
if (operand.output_modifier.multiplier != 0.f) {
|
||||
result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier));
|
||||
}
|
||||
|
@ -140,14 +156,20 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
|
|||
}
|
||||
switch (operand.field) {
|
||||
case OperandField::ScalarGPR:
|
||||
if (value.Type() == IR::Type::U1) {
|
||||
return ir.SetThreadBitScalarReg(IR::ScalarReg(operand.code), result);
|
||||
}
|
||||
return ir.SetScalarReg(IR::ScalarReg(operand.code), result);
|
||||
case OperandField::VectorGPR:
|
||||
return ir.SetVectorReg(IR::VectorReg(operand.code), result);
|
||||
case OperandField::ExecLo:
|
||||
return ir.SetExec(result);
|
||||
case OperandField::VccLo:
|
||||
return ir.SetVccLo(result);
|
||||
case OperandField::VccHi:
|
||||
return ir.SetVccHi(result);
|
||||
case OperandField::M0:
|
||||
break; // Ignore for now
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -279,11 +301,32 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::IMAGE_SAMPLE:
|
||||
translator.IMAGE_SAMPLE(inst);
|
||||
break;
|
||||
case Opcode::V_CMP_EQ_U32:
|
||||
translator.V_CMP_EQ_U32(inst);
|
||||
case Opcode::V_CMP_EQ_I32:
|
||||
translator.V_CMP_U32(ConditionOp::EQ, true, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_GT_U32:
|
||||
translator.V_CMPX_GT_U32(inst);
|
||||
case Opcode::V_CMP_NE_U32:
|
||||
translator.V_CMP_U32(ConditionOp::LG, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_EQ_U32:
|
||||
translator.V_CMP_U32(ConditionOp::EQ, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_F_U32:
|
||||
translator.V_CMP_U32(ConditionOp::F, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LT_U32:
|
||||
translator.V_CMP_U32(ConditionOp::LT, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GT_U32:
|
||||
translator.V_CMP_U32(ConditionOp::GT, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GE_U32:
|
||||
translator.V_CMP_U32(ConditionOp::GE, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_TRU_U32:
|
||||
translator.V_CMP_U32(ConditionOp::TRU, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_NEQ_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LG, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_F_F32:
|
||||
translator.V_CMP_F32(ConditionOp::F, inst);
|
||||
|
@ -309,6 +352,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::S_CMP_LG_U32:
|
||||
translator.S_CMP(ConditionOp::LG, false, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_EQ_I32:
|
||||
translator.S_CMP(ConditionOp::EQ, true, inst);
|
||||
break;
|
||||
case Opcode::V_CNDMASK_B32:
|
||||
translator.V_CNDMASK_B32(inst);
|
||||
break;
|
||||
|
@ -348,13 +394,125 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_MIN3_F32:
|
||||
translator.V_MIN3_F32(inst);
|
||||
break;
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::V_MADMK_F32:
|
||||
translator.V_MADMK_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CUBEMA_F32:
|
||||
translator.V_CUBEMA_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CUBESC_F32:
|
||||
translator.V_CUBESC_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CUBETC_F32:
|
||||
translator.V_CUBETC_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CUBEID_F32:
|
||||
translator.V_CUBEID_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_U32_F32:
|
||||
translator.V_CVT_U32_F32(inst);
|
||||
break;
|
||||
case Opcode::V_SUBREV_F32:
|
||||
translator.V_SUBREV_F32(inst);
|
||||
break;
|
||||
case Opcode::S_AND_SAVEEXEC_B64:
|
||||
translator.S_AND_SAVEEXEC_B64(inst);
|
||||
break;
|
||||
case Opcode::S_MOV_B64:
|
||||
translator.S_MOV_B64(inst);
|
||||
break;
|
||||
case Opcode::V_SUBREV_I32:
|
||||
translator.V_SUBREV_I32(inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LE_U32:
|
||||
translator.V_CMP_U32(ConditionOp::LE, false, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GT_I32:
|
||||
translator.V_CMP_U32(ConditionOp::GT, true, false, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_F_U32:
|
||||
translator.V_CMP_U32(ConditionOp::F, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_LT_U32:
|
||||
translator.V_CMP_U32(ConditionOp::LT, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_EQ_U32:
|
||||
translator.V_CMP_U32(ConditionOp::EQ, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_LE_U32:
|
||||
translator.V_CMP_U32(ConditionOp::LE, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_GT_U32:
|
||||
translator.V_CMP_U32(ConditionOp::GT, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_NE_U32:
|
||||
translator.V_CMP_U32(ConditionOp::LG, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_GE_U32:
|
||||
translator.V_CMP_U32(ConditionOp::GE, false, true, inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_TRU_U32:
|
||||
translator.V_CMP_U32(ConditionOp::TRU, false, true, inst);
|
||||
break;
|
||||
case Opcode::S_OR_B64:
|
||||
translator.S_OR_B64(false, inst);
|
||||
break;
|
||||
case Opcode::S_NOR_B64:
|
||||
translator.S_OR_B64(true, inst);
|
||||
break;
|
||||
case Opcode::S_AND_B64:
|
||||
translator.S_AND_B64(inst);
|
||||
break;
|
||||
case Opcode::V_LSHRREV_B32:
|
||||
translator.V_LSHRREV_B32(inst);
|
||||
break;
|
||||
case Opcode::S_ADD_I32:
|
||||
translator.S_ADD_I32(inst);
|
||||
break;
|
||||
case Opcode::V_MUL_LO_I32:
|
||||
translator.V_MUL_LO_I32(inst);
|
||||
break;
|
||||
case Opcode::V_SAD_U32:
|
||||
translator.V_SAD_U32(inst);
|
||||
break;
|
||||
case Opcode::V_BFE_U32:
|
||||
translator.V_BFE_U32(inst);
|
||||
break;
|
||||
case Opcode::V_MAD_I32_I24:
|
||||
translator.V_MAD_I32_I24(inst);
|
||||
break;
|
||||
case Opcode::V_MUL_I32_I24:
|
||||
translator.V_MUL_I32_I24(inst);
|
||||
break;
|
||||
case Opcode::V_SUB_I32:
|
||||
translator.V_SUB_I32(inst);
|
||||
break;
|
||||
case Opcode::V_LSHR_B32:
|
||||
translator.V_LSHR_B32(inst);
|
||||
break;
|
||||
case Opcode::V_ASHRREV_I32:
|
||||
translator.V_ASHRREV_I32(inst);
|
||||
break;
|
||||
case Opcode::V_MAD_U32_U24:
|
||||
translator.V_MAD_U32_U24(inst);
|
||||
break;
|
||||
case Opcode::S_AND_B32:
|
||||
translator.S_AND_B32(inst);
|
||||
break;
|
||||
case Opcode::S_LSHR_B32:
|
||||
translator.S_LSHR_B32(inst);
|
||||
break;
|
||||
case Opcode::S_CSELECT_B32:
|
||||
translator.S_CSELECT_B32(inst);
|
||||
break;
|
||||
case Opcode::S_BFE_U32:
|
||||
translator.S_BFE_U32(inst);
|
||||
break;
|
||||
case Opcode::S_NOP:
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
case Opcode::S_CBRANCH_SCC1:
|
||||
case Opcode::S_BRANCH:
|
||||
case Opcode::S_MOV_B64:
|
||||
case Opcode::S_WQM_B64:
|
||||
case Opcode::V_INTERP_P1_F32:
|
||||
case Opcode::S_ENDPGM:
|
||||
|
|
|
@ -23,6 +23,7 @@ enum class ConditionOp : u32 {
|
|||
GE,
|
||||
LT,
|
||||
LE,
|
||||
TRU,
|
||||
};
|
||||
|
||||
class Translator {
|
||||
|
@ -37,6 +38,15 @@ public:
|
|||
void S_MUL_I32(const GcnInst& inst);
|
||||
void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
||||
void S_ANDN2_B64(const GcnInst& inst);
|
||||
void S_AND_SAVEEXEC_B64(const GcnInst& inst);
|
||||
void S_MOV_B64(const GcnInst& inst);
|
||||
void S_OR_B64(bool negate, const GcnInst& inst);
|
||||
void S_AND_B64(const GcnInst& inst);
|
||||
void S_ADD_I32(const GcnInst& inst);
|
||||
void S_AND_B32(const GcnInst& inst);
|
||||
void S_LSHR_B32(const GcnInst& inst);
|
||||
void S_CSELECT_B32(const GcnInst& inst);
|
||||
void S_BFE_U32(const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||
|
@ -48,7 +58,6 @@ public:
|
|||
void V_MAC_F32(const GcnInst& inst);
|
||||
void V_CVT_PKRTZ_F16_F32(const GcnInst& inst);
|
||||
void V_MUL_F32(const GcnInst& inst);
|
||||
void V_CMP_EQ_U32(const GcnInst& inst);
|
||||
void V_CNDMASK_B32(const GcnInst& inst);
|
||||
void V_AND_B32(const GcnInst& inst);
|
||||
void V_LSHLREV_B32(const GcnInst& inst);
|
||||
|
@ -63,7 +72,6 @@ public:
|
|||
void V_FLOOR_F32(const GcnInst& inst);
|
||||
void V_SUB_F32(const GcnInst& inst);
|
||||
void V_RCP_F32(const GcnInst& inst);
|
||||
void V_CMPX_GT_U32(const GcnInst& inst);
|
||||
void V_FMA_F32(const GcnInst& inst);
|
||||
void V_CMP_F32(ConditionOp op, const GcnInst& inst);
|
||||
void V_MAX_F32(const GcnInst& inst);
|
||||
|
@ -74,6 +82,25 @@ public:
|
|||
void V_SQRT_F32(const GcnInst& inst);
|
||||
void V_MIN_F32(const GcnInst& inst);
|
||||
void V_MIN3_F32(const GcnInst& inst);
|
||||
void V_MADMK_F32(const GcnInst& inst);
|
||||
void V_CUBEMA_F32(const GcnInst& inst);
|
||||
void V_CUBESC_F32(const GcnInst& inst);
|
||||
void V_CUBETC_F32(const GcnInst& inst);
|
||||
void V_CUBEID_F32(const GcnInst& inst);
|
||||
void V_CVT_U32_F32(const GcnInst& inst);
|
||||
void V_SUBREV_F32(const GcnInst& inst);
|
||||
void V_SUBREV_I32(const GcnInst& inst);
|
||||
void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst);
|
||||
void V_LSHRREV_B32(const GcnInst& inst);
|
||||
void V_MUL_LO_I32(const GcnInst& inst);
|
||||
void V_SAD_U32(const GcnInst& inst);
|
||||
void V_BFE_U32(const GcnInst& inst);
|
||||
void V_MAD_I32_I24(const GcnInst& inst);
|
||||
void V_MUL_I32_I24(const GcnInst& inst);
|
||||
void V_SUB_I32(const GcnInst& inst);
|
||||
void V_LSHR_B32(const GcnInst& inst);
|
||||
void V_ASHRREV_I32(const GcnInst& inst);
|
||||
void V_MAD_U32_U24(const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
|
@ -94,12 +121,13 @@ public:
|
|||
void EXP(const GcnInst& inst);
|
||||
|
||||
private:
|
||||
IR::U32F32 GetSrc(const InstOperand& operand, bool flt_zero = false);
|
||||
void SetDst(const InstOperand& operand, const IR::U32F32& value);
|
||||
IR::U1U32F32 GetSrc(const InstOperand& operand, bool flt_zero = false);
|
||||
void SetDst(const InstOperand& operand, const IR::U1U32F32& value);
|
||||
|
||||
private:
|
||||
IR::IREmitter ir;
|
||||
Info& info;
|
||||
static std::array<bool, IR::NumScalarRegs> exec_contexts;
|
||||
};
|
||||
|
||||
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info);
|
||||
|
|
|
@ -29,17 +29,6 @@ void Translator::V_MUL_F32(const GcnInst& inst) {
|
|||
ir.SetVectorReg(dst_reg, ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
|
||||
}
|
||||
|
||||
void Translator::V_CMP_EQ_U32(const GcnInst& inst) {
|
||||
const IR::U1 result = ir.IEqual(GetSrc(inst.src[0]), GetSrc(inst.src[1]));
|
||||
if (inst.dst[1].field == OperandField::VccLo) {
|
||||
return ir.SetVcc(result);
|
||||
} else if (inst.dst[1].field == OperandField::ScalarGPR) {
|
||||
const IR::ScalarReg dst_reg{inst.dst[1].code};
|
||||
return ir.SetScalarReg(dst_reg, IR::U32{ir.Select(result, ir.Imm32(1U), ir.Imm32(0U))});
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void Translator::V_CNDMASK_B32(const GcnInst& inst) {
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
const IR::ScalarReg flag_reg{inst.src[2].code};
|
||||
|
@ -70,9 +59,9 @@ void Translator::V_AND_B32(const GcnInst& inst) {
|
|||
|
||||
void Translator::V_LSHLREV_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.ShiftLeftLogical(src1, src0));
|
||||
ir.SetVectorReg(dst_reg, ir.ShiftLeftLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F))));
|
||||
}
|
||||
|
||||
void Translator::V_ADD_I32(const GcnInst& inst) {
|
||||
|
@ -148,14 +137,6 @@ void Translator::V_RCP_F32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPRecip(src0));
|
||||
}
|
||||
|
||||
void Translator::V_CMPX_GT_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U1 result = ir.IGreaterThan(src0, src1, false);
|
||||
ir.SetVcc(result);
|
||||
ir.SetExec(result);
|
||||
}
|
||||
|
||||
void Translator::V_FMA_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
|
@ -182,6 +163,8 @@ void Translator::V_CMP_F32(ConditionOp op, const GcnInst& inst) {
|
|||
return ir.FPLessThanEqual(src0, src1);
|
||||
case ConditionOp::GE:
|
||||
return ir.FPGreaterThanEqual(src0, src1);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
ir.SetVcc(result);
|
||||
|
@ -231,4 +214,147 @@ void Translator::V_MIN3_F32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
|
||||
}
|
||||
|
||||
void Translator::V_MADMK_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 k{GetSrc(inst.src[2], true)};
|
||||
SetDst(inst.dst[0], ir.FPFma(src0, k, src1));
|
||||
}
|
||||
|
||||
void Translator::V_CUBEMA_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.Imm32(1.f));
|
||||
}
|
||||
|
||||
void Translator::V_CUBESC_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[0], true));
|
||||
}
|
||||
|
||||
void Translator::V_CUBETC_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[1], true));
|
||||
}
|
||||
|
||||
void Translator::V_CUBEID_F32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], GetSrc(inst.src[2], true));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_U32_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
SetDst(inst.dst[0], ir.ConvertFToU(32, src0));
|
||||
}
|
||||
|
||||
void Translator::V_SUBREV_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
SetDst(inst.dst[0], ir.FPSub(src1, src0));
|
||||
}
|
||||
|
||||
void Translator::V_SUBREV_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.ISub(src1, src0));
|
||||
// TODO: Carry-out
|
||||
}
|
||||
|
||||
void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U1 result = [&] {
|
||||
switch (op) {
|
||||
case ConditionOp::F:
|
||||
return ir.Imm1(false);
|
||||
case ConditionOp::TRU:
|
||||
return ir.Imm1(true);
|
||||
case ConditionOp::EQ:
|
||||
return ir.IEqual(src0, src1);
|
||||
case ConditionOp::LG:
|
||||
return ir.INotEqual(src0, src1);
|
||||
case ConditionOp::GT:
|
||||
return ir.IGreaterThan(src0, src1, is_signed);
|
||||
case ConditionOp::LT:
|
||||
return ir.ILessThan(src0, src1, is_signed);
|
||||
case ConditionOp::LE:
|
||||
return ir.ILessThanEqual(src0, src1, is_signed);
|
||||
case ConditionOp::GE:
|
||||
return ir.IGreaterThanEqual(src0, src1, is_signed);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
if (set_exec) {
|
||||
ir.SetExec(result);
|
||||
}
|
||||
switch (inst.dst[1].field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.SetVcc(result);
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), result);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::V_LSHRREV_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.ShiftRightLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F))));
|
||||
}
|
||||
|
||||
void Translator::V_MUL_LO_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.IMul(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_SAD_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 src2{GetSrc(inst.src[2])};
|
||||
const IR::U32 max{ir.IMax(src0, src1, false)};
|
||||
const IR::U32 min{ir.IMin(src0, src1, false)};
|
||||
SetDst(inst.dst[0], ir.IAdd(ir.ISub(max, min), src2));
|
||||
}
|
||||
|
||||
void Translator::V_BFE_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))};
|
||||
const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))};
|
||||
SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2));
|
||||
}
|
||||
|
||||
void Translator::V_MAD_I32_I24(const GcnInst& inst) {
|
||||
const IR::U32 src0{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), true)};
|
||||
const IR::U32 src1{ir.BitFieldExtract(GetSrc(inst.src[1]), ir.Imm32(0), ir.Imm32(24), true)};
|
||||
const IR::U32 src2{GetSrc(inst.src[2])};
|
||||
SetDst(inst.dst[0], ir.IAdd(ir.IMul(src0, src1), src2));
|
||||
}
|
||||
|
||||
void Translator::V_MUL_I32_I24(const GcnInst& inst) {
|
||||
const IR::U32 src0{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), true)};
|
||||
const IR::U32 src1{ir.BitFieldExtract(GetSrc(inst.src[1]), ir.Imm32(0), ir.Imm32(24), true)};
|
||||
SetDst(inst.dst[0], ir.IMul(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_SUB_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.ISub(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_LSHR_B32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.ShiftRightLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F))));
|
||||
}
|
||||
|
||||
void Translator::V_ASHRREV_I32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.ShiftRightArithmetic(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F))));
|
||||
}
|
||||
|
||||
void Translator::V_MAD_U32_U24(const GcnInst& inst) {
|
||||
// TODO:
|
||||
V_MAD_I32_I24(inst);
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -8,7 +8,6 @@ namespace Shader::Gcn {
|
|||
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
auto& attr = info.ps_inputs.at(inst.control.vintrp.attr);
|
||||
attr.semantic = inst.control.vintrp.attr;
|
||||
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
|
||||
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue