shader_recompiler: Better branch detection + more opcodes

This commit is contained in:
raphaelthegreat 2024-06-01 20:25:31 +03:00
parent f624f7749c
commit 02a50265f8
31 changed files with 772 additions and 120 deletions

View file

@ -9,11 +9,15 @@
namespace Shader::Gcn {
std::array<bool, IR::NumScalarRegs> Translator::exec_contexts{};
Translator::Translator(IR::Block* block_, Info& info_)
: ir{*block_, block_->begin()}, info{info_} {}
void Translator::EmitPrologue() {
exec_contexts.fill(false);
ir.Prologue();
ir.SetExec(ir.Imm1(true));
// Initialize user data.
IR::ScalarReg dst_sreg = IR::ScalarReg::S0;
@ -54,10 +58,16 @@ void Translator::EmitPrologue() {
}
}
IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
IR::U32F32 value{};
IR::U1U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
// Input modifiers work on float values.
force_flt |= operand.input_modifier.abs | operand.input_modifier.neg;
IR::U1U32F32 value{};
switch (operand.field) {
case OperandField::ScalarGPR:
if (exec_contexts[operand.code]) {
value = ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code));
}
if (operand.type == ScalarType::Float32 || force_flt) {
value = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
} else {
@ -114,9 +124,15 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
case OperandField::ConstFloatNeg_2_0:
value = ir.Imm32(-2.0f);
break;
case OperandField::ExecLo:
value = ir.GetExec();
break;
case OperandField::VccLo:
value = ir.GetVccLo();
break;
case OperandField::VccHi:
value = ir.GetVccHi();
break;
default:
UNREACHABLE();
}
@ -130,8 +146,8 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
return value;
}
void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
IR::U32F32 result = value;
void Translator::SetDst(const InstOperand& operand, const IR::U1U32F32& value) {
IR::U1U32F32 result = value;
if (operand.output_modifier.multiplier != 0.f) {
result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier));
}
@ -140,14 +156,20 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
}
switch (operand.field) {
case OperandField::ScalarGPR:
if (value.Type() == IR::Type::U1) {
return ir.SetThreadBitScalarReg(IR::ScalarReg(operand.code), result);
}
return ir.SetScalarReg(IR::ScalarReg(operand.code), result);
case OperandField::VectorGPR:
return ir.SetVectorReg(IR::VectorReg(operand.code), result);
case OperandField::ExecLo:
return ir.SetExec(result);
case OperandField::VccLo:
return ir.SetVccLo(result);
case OperandField::VccHi:
return ir.SetVccHi(result);
case OperandField::M0:
break; // Ignore for now
break;
default:
UNREACHABLE();
}
@ -279,11 +301,32 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::IMAGE_SAMPLE:
translator.IMAGE_SAMPLE(inst);
break;
case Opcode::V_CMP_EQ_U32:
translator.V_CMP_EQ_U32(inst);
case Opcode::V_CMP_EQ_I32:
translator.V_CMP_U32(ConditionOp::EQ, true, false, inst);
break;
case Opcode::V_CMPX_GT_U32:
translator.V_CMPX_GT_U32(inst);
case Opcode::V_CMP_NE_U32:
translator.V_CMP_U32(ConditionOp::LG, false, false, inst);
break;
case Opcode::V_CMP_EQ_U32:
translator.V_CMP_U32(ConditionOp::EQ, false, false, inst);
break;
case Opcode::V_CMP_F_U32:
translator.V_CMP_U32(ConditionOp::F, false, false, inst);
break;
case Opcode::V_CMP_LT_U32:
translator.V_CMP_U32(ConditionOp::LT, false, false, inst);
break;
case Opcode::V_CMP_GT_U32:
translator.V_CMP_U32(ConditionOp::GT, false, false, inst);
break;
case Opcode::V_CMP_GE_U32:
translator.V_CMP_U32(ConditionOp::GE, false, false, inst);
break;
case Opcode::V_CMP_TRU_U32:
translator.V_CMP_U32(ConditionOp::TRU, false, false, inst);
break;
case Opcode::V_CMP_NEQ_F32:
translator.V_CMP_F32(ConditionOp::LG, inst);
break;
case Opcode::V_CMP_F_F32:
translator.V_CMP_F32(ConditionOp::F, inst);
@ -309,6 +352,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_CMP_LG_U32:
translator.S_CMP(ConditionOp::LG, false, inst);
break;
case Opcode::S_CMP_EQ_I32:
translator.S_CMP(ConditionOp::EQ, true, inst);
break;
case Opcode::V_CNDMASK_B32:
translator.V_CNDMASK_B32(inst);
break;
@ -348,13 +394,125 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_MIN3_F32:
translator.V_MIN3_F32(inst);
break;
case Opcode::S_NOP:
case Opcode::V_MADMK_F32:
translator.V_MADMK_F32(inst);
break;
case Opcode::V_CUBEMA_F32:
translator.V_CUBEMA_F32(inst);
break;
case Opcode::V_CUBESC_F32:
translator.V_CUBESC_F32(inst);
break;
case Opcode::V_CUBETC_F32:
translator.V_CUBETC_F32(inst);
break;
case Opcode::V_CUBEID_F32:
translator.V_CUBEID_F32(inst);
break;
case Opcode::V_CVT_U32_F32:
translator.V_CVT_U32_F32(inst);
break;
case Opcode::V_SUBREV_F32:
translator.V_SUBREV_F32(inst);
break;
case Opcode::S_AND_SAVEEXEC_B64:
translator.S_AND_SAVEEXEC_B64(inst);
break;
case Opcode::S_MOV_B64:
translator.S_MOV_B64(inst);
break;
case Opcode::V_SUBREV_I32:
translator.V_SUBREV_I32(inst);
break;
case Opcode::V_CMP_LE_U32:
translator.V_CMP_U32(ConditionOp::LE, false, false, inst);
break;
case Opcode::V_CMP_GT_I32:
translator.V_CMP_U32(ConditionOp::GT, true, false, inst);
break;
case Opcode::V_CMPX_F_U32:
translator.V_CMP_U32(ConditionOp::F, false, true, inst);
break;
case Opcode::V_CMPX_LT_U32:
translator.V_CMP_U32(ConditionOp::LT, false, true, inst);
break;
case Opcode::V_CMPX_EQ_U32:
translator.V_CMP_U32(ConditionOp::EQ, false, true, inst);
break;
case Opcode::V_CMPX_LE_U32:
translator.V_CMP_U32(ConditionOp::LE, false, true, inst);
break;
case Opcode::V_CMPX_GT_U32:
translator.V_CMP_U32(ConditionOp::GT, false, true, inst);
break;
case Opcode::V_CMPX_NE_U32:
translator.V_CMP_U32(ConditionOp::LG, false, true, inst);
break;
case Opcode::V_CMPX_GE_U32:
translator.V_CMP_U32(ConditionOp::GE, false, true, inst);
break;
case Opcode::V_CMPX_TRU_U32:
translator.V_CMP_U32(ConditionOp::TRU, false, true, inst);
break;
case Opcode::S_OR_B64:
translator.S_OR_B64(false, inst);
break;
case Opcode::S_NOR_B64:
translator.S_OR_B64(true, inst);
break;
case Opcode::S_AND_B64:
translator.S_AND_B64(inst);
break;
case Opcode::V_LSHRREV_B32:
translator.V_LSHRREV_B32(inst);
break;
case Opcode::S_ADD_I32:
translator.S_ADD_I32(inst);
break;
case Opcode::V_MUL_LO_I32:
translator.V_MUL_LO_I32(inst);
break;
case Opcode::V_SAD_U32:
translator.V_SAD_U32(inst);
break;
case Opcode::V_BFE_U32:
translator.V_BFE_U32(inst);
break;
case Opcode::V_MAD_I32_I24:
translator.V_MAD_I32_I24(inst);
break;
case Opcode::V_MUL_I32_I24:
translator.V_MUL_I32_I24(inst);
break;
case Opcode::V_SUB_I32:
translator.V_SUB_I32(inst);
break;
case Opcode::V_LSHR_B32:
translator.V_LSHR_B32(inst);
break;
case Opcode::V_ASHRREV_I32:
translator.V_ASHRREV_I32(inst);
break;
case Opcode::V_MAD_U32_U24:
translator.V_MAD_U32_U24(inst);
break;
case Opcode::S_AND_B32:
translator.S_AND_B32(inst);
break;
case Opcode::S_LSHR_B32:
translator.S_LSHR_B32(inst);
break;
case Opcode::S_CSELECT_B32:
translator.S_CSELECT_B32(inst);
break;
case Opcode::S_BFE_U32:
translator.S_BFE_U32(inst);
break;
case Opcode::S_NOP:
case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0:
case Opcode::S_CBRANCH_SCC1:
case Opcode::S_BRANCH:
case Opcode::S_MOV_B64:
case Opcode::S_WQM_B64:
case Opcode::V_INTERP_P1_F32:
case Opcode::S_ENDPGM: