Merge branch 'main' into shader_recompiler/format

This commit is contained in:
georgemoralis 2024-08-29 10:18:12 +03:00 committed by GitHub
commit 18e95ae4c0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
138 changed files with 24513 additions and 1242 deletions

View file

@ -21,8 +21,13 @@ struct Compare {
}
};
static IR::Condition MakeCondition(Opcode opcode) {
switch (opcode) {
static IR::Condition MakeCondition(const GcnInst& inst) {
if (inst.IsCmpx()) {
ASSERT(inst.opcode == Opcode::V_CMPX_NE_U32);
return IR::Condition::Execnz;
}
switch (inst.opcode) {
case Opcode::S_CBRANCH_SCC0:
return IR::Condition::Scc0;
case Opcode::S_CBRANCH_SCC1:
@ -37,7 +42,6 @@ static IR::Condition MakeCondition(Opcode opcode) {
return IR::Condition::Execnz;
case Opcode::S_AND_SAVEEXEC_B64:
case Opcode::S_ANDN2_B64:
case Opcode::V_CMPX_NE_U32:
return IR::Condition::Execnz;
default:
return IR::Condition::True;
@ -94,7 +98,8 @@ void CFG::EmitDivergenceLabels() {
// While this instruction does not save EXEC it is often used paired
// with SAVEEXEC to mask the threads that didn't pass the condition
// of initial branch.
inst.opcode == Opcode::S_ANDN2_B64 || inst.opcode == Opcode::V_CMPX_NE_U32;
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) ||
inst.opcode == Opcode::V_CMPX_NE_U32;
};
const auto is_close_scope = [](const GcnInst& inst) {
// Closing an EXEC scope can be either a branch instruction
@ -104,7 +109,8 @@ void CFG::EmitDivergenceLabels() {
// Sometimes compiler might insert instructions between the SAVEEXEC and the branch.
// Those instructions need to be wrapped in the condition as well so allow branch
// as end scope instruction.
inst.opcode == Opcode::S_CBRANCH_EXECZ || inst.opcode == Opcode::S_ANDN2_B64;
inst.opcode == Opcode::S_CBRANCH_EXECZ ||
(inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo);
};
// Since we will be adding new labels, avoid iterating those as well.
@ -171,7 +177,7 @@ void CFG::EmitBlocks() {
block->begin_index = GetIndex(start);
block->end_index = end_index;
block->end_inst = end_inst;
block->cond = MakeCondition(end_inst.opcode);
block->cond = MakeCondition(end_inst);
blocks.insert(*block);
}
}

View file

@ -47,4 +47,18 @@ bool GcnInst::IsConditionalBranch() const {
return false;
}
bool GcnInst::IsCmpx() const {
if ((opcode >= Opcode::V_CMPX_F_F32 && opcode <= Opcode::V_CMPX_T_F32) ||
(opcode >= Opcode::V_CMPX_F_F64 && opcode <= Opcode::V_CMPX_T_F64) ||
(opcode >= Opcode::V_CMPSX_F_F32 && opcode <= Opcode::V_CMPSX_T_F32) ||
(opcode >= Opcode::V_CMPSX_F_F64 && opcode <= Opcode::V_CMPSX_T_F64) ||
(opcode >= Opcode::V_CMPX_F_I32 && opcode <= Opcode::V_CMPX_CLASS_F32) ||
(opcode >= Opcode::V_CMPX_F_I64 && opcode <= Opcode::V_CMPX_CLASS_F64) ||
(opcode >= Opcode::V_CMPX_F_U32 && opcode <= Opcode::V_CMPX_T_U32) ||
(opcode >= Opcode::V_CMPX_F_U64 && opcode <= Opcode::V_CMPX_T_U64)) {
return true;
}
return false;
}
} // namespace Shader::Gcn

View file

@ -203,6 +203,7 @@ struct GcnInst {
bool IsUnconditionalBranch() const;
bool IsConditionalBranch() const;
bool IsFork() const;
bool IsCmpx() const;
};
} // namespace Shader::Gcn

View file

@ -3,6 +3,7 @@
#include <algorithm>
#include <memory>
#include <optional>
#include <string>
#include <unordered_map>
#include <utility>

View file

@ -6,96 +6,150 @@
namespace Shader::Gcn {
void Translator::EmitScalarAlu(const GcnInst& inst) {
switch (inst.encoding) {
case InstEncoding::SOPC: {
EmitSOPC(inst);
break;
}
case InstEncoding::SOPK: {
EmitSOPK(inst);
break;
}
default:
switch (inst.opcode) {
case Opcode::S_MOV_B32:
return S_MOV(inst);
case Opcode::S_MUL_I32:
return S_MUL_I32(inst);
case Opcode::S_AND_SAVEEXEC_B64:
return S_AND_SAVEEXEC_B64(inst);
case Opcode::S_MOV_B64:
return S_MOV_B64(inst);
case Opcode::S_OR_B64:
return S_OR_B64(NegateMode::None, false, inst);
case Opcode::S_NOR_B64:
return S_OR_B64(NegateMode::Result, false, inst);
case Opcode::S_XOR_B64:
return S_OR_B64(NegateMode::None, true, inst);
case Opcode::S_ORN2_B64:
return S_OR_B64(NegateMode::Src1, false, inst);
case Opcode::S_AND_B64:
return S_AND_B64(NegateMode::None, inst);
case Opcode::S_NAND_B64:
return S_AND_B64(NegateMode::Result, inst);
case Opcode::S_ANDN2_B64:
return S_AND_B64(NegateMode::Src1, inst);
case Opcode::S_NOT_B64:
return S_NOT_B64(inst);
case Opcode::S_ADD_I32:
return S_ADD_I32(inst);
case Opcode::S_AND_B32:
return S_AND_B32(inst);
case Opcode::S_ASHR_I32:
return S_ASHR_I32(inst);
case Opcode::S_OR_B32:
return S_OR_B32(inst);
case Opcode::S_LSHL_B32:
return S_LSHL_B32(inst);
case Opcode::S_LSHR_B32:
return S_LSHR_B32(inst);
case Opcode::S_CSELECT_B32:
return S_CSELECT_B32(inst);
case Opcode::S_CSELECT_B64:
return S_CSELECT_B64(inst);
case Opcode::S_BFE_U32:
return S_BFE_U32(inst);
case Opcode::S_BFM_B32:
return S_BFM_B32(inst);
case Opcode::S_BREV_B32:
return S_BREV_B32(inst);
case Opcode::S_ADD_U32:
return S_ADD_U32(inst);
case Opcode::S_ADDC_U32:
return S_ADDC_U32(inst);
case Opcode::S_SUB_U32:
case Opcode::S_SUB_I32:
return S_SUB_U32(inst);
case Opcode::S_MIN_U32:
return S_MIN_U32(inst);
case Opcode::S_MAX_U32:
return S_MAX_U32(inst);
case Opcode::S_WQM_B64:
break;
default:
LogMissingOpcode(inst);
}
break;
}
}
void Translator::EmitSOPC(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::S_MOVK_I32:
return S_MOVK(inst);
case Opcode::S_MOV_B32:
return S_MOV(inst);
case Opcode::S_MUL_I32:
return S_MUL_I32(inst);
case Opcode::S_AND_SAVEEXEC_B64:
return S_AND_SAVEEXEC_B64(inst);
case Opcode::S_MOV_B64:
return S_MOV_B64(inst);
case Opcode::S_CMP_LT_U32:
return S_CMP(ConditionOp::LT, false, inst);
case Opcode::S_CMP_LE_U32:
return S_CMP(ConditionOp::LE, false, inst);
case Opcode::S_CMP_LG_U32:
return S_CMP(ConditionOp::LG, false, inst);
case Opcode::S_CMP_LT_I32:
return S_CMP(ConditionOp::LT, true, inst);
case Opcode::S_CMP_EQ_I32:
return S_CMP(ConditionOp::EQ, true, inst);
case Opcode::S_CMP_LG_I32:
return S_CMP(ConditionOp::LG, true, inst);
case Opcode::S_CMP_GT_I32:
return S_CMP(ConditionOp::GT, true, inst);
case Opcode::S_CMP_LE_I32:
return S_CMP(ConditionOp::LE, true, inst);
case Opcode::S_CMP_GE_I32:
return S_CMP(ConditionOp::GE, true, inst);
case Opcode::S_CMP_EQ_I32:
return S_CMP(ConditionOp::EQ, true, inst);
case Opcode::S_CMP_LT_I32:
return S_CMP(ConditionOp::LT, true, inst);
case Opcode::S_CMP_LE_I32:
return S_CMP(ConditionOp::LE, true, inst);
case Opcode::S_CMP_EQ_U32:
return S_CMP(ConditionOp::EQ, false, inst);
case Opcode::S_CMP_GE_U32:
return S_CMP(ConditionOp::GE, false, inst);
case Opcode::S_CMP_LG_U32:
return S_CMP(ConditionOp::LG, false, inst);
case Opcode::S_CMP_GT_U32:
return S_CMP(ConditionOp::GT, false, inst);
case Opcode::S_OR_B64:
return S_OR_B64(NegateMode::None, false, inst);
case Opcode::S_NOR_B64:
return S_OR_B64(NegateMode::Result, false, inst);
case Opcode::S_XOR_B64:
return S_OR_B64(NegateMode::None, true, inst);
case Opcode::S_ORN2_B64:
return S_OR_B64(NegateMode::Src1, false, inst);
case Opcode::S_AND_B64:
return S_AND_B64(NegateMode::None, inst);
case Opcode::S_NAND_B64:
return S_AND_B64(NegateMode::Result, inst);
case Opcode::S_ANDN2_B64:
return S_AND_B64(NegateMode::Src1, inst);
case Opcode::S_NOT_B64:
return S_NOT_B64(inst);
case Opcode::S_ADD_I32:
return S_ADD_I32(inst);
case Opcode::S_AND_B32:
return S_AND_B32(inst);
case Opcode::S_ASHR_I32:
return S_ASHR_I32(inst);
case Opcode::S_OR_B32:
return S_OR_B32(inst);
case Opcode::S_LSHL_B32:
return S_LSHL_B32(inst);
case Opcode::S_LSHR_B32:
return S_LSHR_B32(inst);
case Opcode::S_CSELECT_B32:
return S_CSELECT_B32(inst);
case Opcode::S_CSELECT_B64:
return S_CSELECT_B64(inst);
case Opcode::S_BFE_U32:
return S_BFE_U32(inst);
case Opcode::S_BFM_B32:
return S_BFM_B32(inst);
case Opcode::S_BREV_B32:
return S_BREV_B32(inst);
case Opcode::S_ADD_U32:
return S_ADD_U32(inst);
case Opcode::S_ADDC_U32:
return S_ADDC_U32(inst);
case Opcode::S_CMP_GE_U32:
return S_CMP(ConditionOp::GE, false, inst);
case Opcode::S_CMP_LT_U32:
return S_CMP(ConditionOp::LT, false, inst);
case Opcode::S_CMP_LE_U32:
return S_CMP(ConditionOp::LE, false, inst);
default:
LogMissingOpcode(inst);
}
}
void Translator::EmitSOPK(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::S_MOVK_I32:
return S_MOVK(inst);
case Opcode::S_CMPK_EQ_I32:
return S_CMPK(ConditionOp::EQ, true, inst);
case Opcode::S_CMPK_LG_I32:
return S_CMPK(ConditionOp::LG, true, inst);
case Opcode::S_CMPK_GT_I32:
return S_CMPK(ConditionOp::GT, true, inst);
case Opcode::S_CMPK_GE_I32:
return S_CMPK(ConditionOp::GE, true, inst);
case Opcode::S_CMPK_LT_I32:
return S_CMPK(ConditionOp::LT, true, inst);
case Opcode::S_CMPK_LE_I32:
return S_CMPK(ConditionOp::LE, true, inst);
case Opcode::S_CMPK_EQ_U32:
return S_CMPK(ConditionOp::EQ, false, inst);
case Opcode::S_CMPK_LG_U32:
return S_CMPK(ConditionOp::LG, false, inst);
case Opcode::S_CMPK_GT_U32:
return S_CMPK(ConditionOp::GT, false, inst);
case Opcode::S_CMPK_GE_U32:
return S_CMPK(ConditionOp::GE, false, inst);
case Opcode::S_CMPK_LT_U32:
return S_CMPK(ConditionOp::LT, false, inst);
case Opcode::S_CMPK_LE_U32:
return S_CMPK(ConditionOp::LE, false, inst);
case Opcode::S_ADDK_I32:
return S_ADDK_I32(inst);
case Opcode::S_MULK_I32:
return S_MULK_I32(inst);
case Opcode::S_SUB_U32:
case Opcode::S_SUB_I32:
return S_SUB_U32(inst);
case Opcode::S_MIN_U32:
return S_MIN_U32(inst);
case Opcode::S_MAX_U32:
return S_MAX_U32(inst);
case Opcode::S_WQM_B64:
break;
default:
LogMissingOpcode(inst);
}
@ -152,6 +206,31 @@ void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) {
ir.SetScc(result);
}
void Translator::S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst) {
const s32 simm16 = inst.control.sopk.simm;
const IR::U32 lhs = GetSrc(inst.dst[0]);
const IR::U32 rhs = ir.Imm32(simm16);
const IR::U1 result = [&] {
switch (cond) {
case ConditionOp::EQ:
return ir.IEqual(lhs, rhs);
case ConditionOp::LG:
return ir.INotEqual(lhs, rhs);
case ConditionOp::GT:
return ir.IGreaterThan(lhs, rhs, is_signed);
case ConditionOp::GE:
return ir.IGreaterThanEqual(lhs, rhs, is_signed);
case ConditionOp::LT:
return ir.ILessThan(lhs, rhs, is_signed);
case ConditionOp::LE:
return ir.ILessThanEqual(lhs, rhs, is_signed);
default:
UNREACHABLE();
}
}();
ir.SetScc(result);
}
void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination

View file

@ -69,6 +69,10 @@ public:
void EmitScalarAlu(const GcnInst& inst);
void EmitVectorAlu(const GcnInst& inst);
// Instruction encodings
void EmitSOPC(const GcnInst& inst);
void EmitSOPK(const GcnInst& inst);
// Scalar ALU
void S_MOVK(const GcnInst& inst);
void S_MOV(const GcnInst& inst);
@ -98,6 +102,7 @@ public:
void S_ADDK_I32(const GcnInst& inst);
void S_MAX_U32(const GcnInst& inst);
void S_MIN_U32(const GcnInst& inst);
void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst);
// Scalar Memory
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
@ -116,6 +121,7 @@ public:
void V_AND_B32(const GcnInst& inst);
void V_LSHLREV_B32(const GcnInst& inst);
void V_LSHL_B32(const GcnInst& inst);
void V_LSHL_B64(const GcnInst& inst);
void V_ADD_I32(const GcnInst& inst);
void V_ADDC_U32(const GcnInst& inst);
void V_CVT_F32_I32(const GcnInst& inst);

View file

@ -11,6 +11,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_LSHLREV_B32(inst);
case Opcode::V_LSHL_B32:
return V_LSHL_B32(inst);
case Opcode::V_LSHL_B64:
return V_LSHL_B64(inst);
case Opcode::V_BFREV_B32:
return V_BFREV_B32(inst);
case Opcode::V_BFE_U32:
@ -280,6 +282,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_CMP_U32(ConditionOp::GT, true, false, inst);
case Opcode::V_CMP_LT_I32:
return V_CMP_U32(ConditionOp::LT, true, false, inst);
case Opcode::V_CMPX_GT_I32:
return V_CMP_U32(ConditionOp::GT, true, true, inst);
case Opcode::V_CMPX_LT_I32:
return V_CMP_U32(ConditionOp::LT, true, true, inst);
case Opcode::V_CMPX_F_U32:
@ -305,7 +309,6 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_MBCNT_U32_B32(true, inst);
case Opcode::V_MBCNT_HI_U32_B32:
return V_MBCNT_U32_B32(false, inst);
case Opcode::V_NOP:
return;
default:
@ -389,6 +392,16 @@ void Translator::V_LSHL_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F))));
}
void Translator::V_LSHL_B64(const GcnInst& inst) {
const IR::U64 src0{GetSrc64(inst.src[0])};
const IR::U64 src1{GetSrc64(inst.src[1])};
const IR::VectorReg dst_reg{inst.dst[0].code};
ASSERT_MSG(src0.IsImmediate() && src0.U64() == 0 && src1.IsImmediate() && src1.U64() == 0,
"V_LSHL_B64 with non-zero src0 or src1 is not supported");
ir.SetVectorReg(dst_reg, ir.Imm32(0));
ir.SetVectorReg(dst_reg + 1, ir.Imm32(0));
}
void Translator::V_ADD_I32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};

View file

@ -94,6 +94,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
case Opcode::TBUFFER_STORE_FORMAT_X:
return BUFFER_STORE_FORMAT(1, true, true, inst);
case Opcode::TBUFFER_STORE_FORMAT_XY:
return BUFFER_STORE_FORMAT(2, true, true, inst);
case Opcode::TBUFFER_STORE_FORMAT_XYZ:
return BUFFER_STORE_FORMAT(3, true, true, inst);
@ -109,6 +111,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
// Buffer atomic operations
case Opcode::BUFFER_ATOMIC_ADD:
return BUFFER_ATOMIC(AtomicOp::Add, inst);
case Opcode::BUFFER_ATOMIC_SWAP:
return BUFFER_ATOMIC(AtomicOp::Swap, inst);
default:
LogMissingOpcode(inst);
}
@ -474,7 +478,7 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
const IR::Value original_val = [&] {
switch (op) {
case AtomicOp::Swap:
return ir.BufferAtomicExchange(handle, address, vdata_val, info);
return ir.BufferAtomicSwap(handle, address, vdata_val, info);
case AtomicOp::Add:
return ir.BufferAtomicIAdd(handle, address, vdata_val, info);
case AtomicOp::Smin: