mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-08 01:56:21 +00:00
video_core: Implement basic compute shaders and more instructions
This commit is contained in:
parent
10bceb1643
commit
58de7ff55a
58 changed files with 1234 additions and 293 deletions
|
@ -42,7 +42,7 @@ static IR::Condition MakeCondition(Opcode opcode) {
|
|||
|
||||
CFG::CFG(ObjectPool<Block>& block_pool_, std::span<const GcnInst> inst_list_)
|
||||
: block_pool{block_pool_}, inst_list{inst_list_} {
|
||||
index_to_pc.resize(inst_list.size());
|
||||
index_to_pc.resize(inst_list.size() + 1);
|
||||
EmitLabels();
|
||||
EmitBlocks();
|
||||
LinkBlocks();
|
||||
|
@ -78,6 +78,7 @@ void CFG::EmitLabels() {
|
|||
}
|
||||
pc += inst.length;
|
||||
}
|
||||
index_to_pc[inst_list.size()] = pc;
|
||||
|
||||
// Sort labels to make sure block insertion is correct.
|
||||
std::ranges::sort(labels);
|
||||
|
@ -90,7 +91,7 @@ void CFG::EmitBlocks() {
|
|||
}
|
||||
const auto it_index = std::ranges::lower_bound(index_to_pc, label);
|
||||
ASSERT(it_index != index_to_pc.end() || label > index_to_pc.back());
|
||||
return std::distance(index_to_pc.begin(), std::prev(it_index));
|
||||
return std::distance(index_to_pc.begin(), it_index);
|
||||
};
|
||||
|
||||
for (auto it = labels.begin(); it != labels.end(); it++) {
|
||||
|
@ -102,7 +103,7 @@ void CFG::EmitBlocks() {
|
|||
return;
|
||||
}
|
||||
const Label end = *next_it;
|
||||
const size_t end_index = get_index(end);
|
||||
const size_t end_index = get_index(end) - 1;
|
||||
const auto& end_inst = inst_list[end_index];
|
||||
|
||||
// Insert block between the labels using the last instruction
|
||||
|
@ -146,9 +147,15 @@ void CFG::LinkBlocks() {
|
|||
block.branch_true = get_block(target_pc);
|
||||
block.branch_false = get_block(block.end);
|
||||
block.end_class = EndClass::Branch;
|
||||
} else if (end_inst.opcode == Opcode::S_ENDPGM) {
|
||||
const auto& prev_inst = inst_list[block.end_index - 1];
|
||||
if (prev_inst.opcode == Opcode::EXP && prev_inst.control.exp.en == 0) {
|
||||
block.end_class = EndClass::Kill;
|
||||
} else {
|
||||
block.end_class = EndClass::Exit;
|
||||
}
|
||||
} else {
|
||||
// Exit blocks don't link to anything.
|
||||
block.end_class = EndClass::Exit;
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -187,12 +194,12 @@ std::string CFG::Dot() const {
|
|||
fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", node_uid);
|
||||
++node_uid;
|
||||
break;
|
||||
// case EndClass::Kill:
|
||||
// dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
|
||||
// dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n",
|
||||
// node_uid);
|
||||
// ++node_uid;
|
||||
// break;
|
||||
case EndClass::Kill:
|
||||
dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
|
||||
dot +=
|
||||
fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", node_uid);
|
||||
++node_uid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
dot += "\t\tlabel = \"main\";\n\t}\n";
|
||||
|
|
|
@ -21,6 +21,7 @@ using Hook =
|
|||
enum class EndClass {
|
||||
Branch, ///< Block ends with a (un)conditional branch.
|
||||
Exit, ///< Block ends with an exit instruction.
|
||||
Kill, ///< Block ends with a discard instruction.
|
||||
};
|
||||
|
||||
/// A block represents a linear range of instructions.
|
||||
|
|
|
@ -684,7 +684,7 @@ void GcnDecodeContext::decodeInstructionVOP3(uint64_t hexInstruction) {
|
|||
outputMod.clamp = static_cast<bool>(control.clmp);
|
||||
switch (control.omod) {
|
||||
case 0:
|
||||
outputMod.multiplier = std::numeric_limits<float>::quiet_NaN();
|
||||
outputMod.multiplier = 0.f;
|
||||
break;
|
||||
case 1:
|
||||
outputMod.multiplier = 2.0f;
|
||||
|
|
|
@ -33,7 +33,7 @@ struct InputModifiers {
|
|||
/// These are applied before storing an operand register.
|
||||
struct OutputModifiers {
|
||||
bool clamp = false;
|
||||
float multiplier = std::numeric_limits<float>::quiet_NaN();
|
||||
float multiplier = 0.f;
|
||||
};
|
||||
|
||||
struct InstOperand {
|
||||
|
|
|
@ -409,9 +409,9 @@ private:
|
|||
case EndClass::Exit:
|
||||
root.insert(ip, *pool.Create(Return{}, &root_stmt));
|
||||
break;
|
||||
// case EndClass::Kill:
|
||||
// root.insert(ip, *pool.Create(Kill{}, &root_stmt));
|
||||
// break;
|
||||
case EndClass::Kill:
|
||||
root.insert(ip, *pool.Create(Kill{}, &root_stmt));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -606,8 +606,7 @@ public:
|
|||
Visit(root_stmt, nullptr, nullptr);
|
||||
|
||||
IR::Block& first_block{*syntax_list.front().data.block};
|
||||
IR::IREmitter ir(first_block, first_block.begin());
|
||||
ir.Prologue();
|
||||
Translator{&first_block, info}.EmitPrologue();
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -767,7 +766,7 @@ private:
|
|||
case StatementType::Kill: {
|
||||
ensure_block();
|
||||
IR::Block* demote_block{MergeBlock(parent, stmt)};
|
||||
// IR::IREmitter{*current_block}.DemoteToHelperInvocation();
|
||||
IR::IREmitter{*current_block}.Discard();
|
||||
current_block->AddBranch(demote_block);
|
||||
current_block = demote_block;
|
||||
|
||||
|
|
|
@ -30,9 +30,16 @@ void Translator::S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst) {
|
|||
return ir.ILessThan(lhs, rhs, is_signed);
|
||||
case ConditionOp::LE:
|
||||
return ir.ILessThanEqual(lhs, rhs, is_signed);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
// ir.SetScc(result);
|
||||
ir.SetScc(result);
|
||||
}
|
||||
|
||||
void Translator::S_ANDN2_B64(const GcnInst& inst) {
|
||||
// TODO: Actually implement this.
|
||||
ir.SetScc(ir.GetVcc());
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -34,13 +34,11 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
|||
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
const auto& smrd = inst.control.smrd;
|
||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||
const IR::U32 offset =
|
||||
smrd.imm ? ir.Imm32(smrd.offset * 4)
|
||||
: IR::U32{ir.ShiftLeftLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)),
|
||||
ir.Imm32(2))};
|
||||
const IR::U32 dword_offset =
|
||||
smrd.imm ? ir.Imm32(smrd.offset) : ir.GetScalarReg(IR::ScalarReg(smrd.offset));
|
||||
const IR::Value vsharp = ir.GetScalarReg(sbase);
|
||||
const IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
Load(ir, num_dwords, vsharp, dst_reg, offset);
|
||||
Load(ir, num_dwords, vsharp, dst_reg, dword_offset);
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -9,7 +9,18 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
Translator::Translator(IR::Block* block_, Info& info_) : block{block_}, ir{*block}, info{info_} {
|
||||
Translator::Translator(IR::Block* block_, Info& info_)
|
||||
: ir{*block_, block_->begin()}, info{info_} {}
|
||||
|
||||
void Translator::EmitPrologue() {
|
||||
ir.Prologue();
|
||||
|
||||
// Initialize user data.
|
||||
IR::ScalarReg dst_sreg = IR::ScalarReg::S0;
|
||||
for (u32 i = 0; i < info.num_user_data; i++) {
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetUserData(dst_sreg));
|
||||
}
|
||||
|
||||
IR::VectorReg dst_vreg = IR::VectorReg::V0;
|
||||
switch (info.stage) {
|
||||
case Stage::Vertex:
|
||||
|
@ -29,69 +40,108 @@ Translator::Translator(IR::Block* block_, Info& info_) : block{block_}, ir{*bloc
|
|||
}
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::IsFrontFace));
|
||||
break;
|
||||
case Stage::Compute:
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0));
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
|
||||
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 0));
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 1));
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Unknown shader stage");
|
||||
}
|
||||
|
||||
// Initialize user data.
|
||||
IR::ScalarReg dst_sreg = IR::ScalarReg::S0;
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetUserData(dst_sreg));
|
||||
}
|
||||
}
|
||||
|
||||
IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
|
||||
IR::U32F32 value{};
|
||||
switch (operand.field) {
|
||||
case OperandField::ScalarGPR:
|
||||
if (operand.type == ScalarType::Float32 || force_flt) {
|
||||
return ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
||||
value = ir.GetScalarReg<IR::F32>(IR::ScalarReg(operand.code));
|
||||
} else {
|
||||
return ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code));
|
||||
value = ir.GetScalarReg<IR::U32>(IR::ScalarReg(operand.code));
|
||||
}
|
||||
break;
|
||||
case OperandField::VectorGPR:
|
||||
if (operand.type == ScalarType::Float32 || force_flt) {
|
||||
return ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code));
|
||||
value = ir.GetVectorReg<IR::F32>(IR::VectorReg(operand.code));
|
||||
} else {
|
||||
return ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code));
|
||||
value = ir.GetVectorReg<IR::U32>(IR::VectorReg(operand.code));
|
||||
}
|
||||
break;
|
||||
case OperandField::ConstZero:
|
||||
if (force_flt) {
|
||||
return ir.Imm32(0.f);
|
||||
value = ir.Imm32(0.f);
|
||||
} else {
|
||||
return ir.Imm32(0U);
|
||||
value = ir.Imm32(0U);
|
||||
}
|
||||
break;
|
||||
case OperandField::SignedConstIntPos:
|
||||
ASSERT(!force_flt);
|
||||
return ir.Imm32(operand.code - SignedConstIntPosMin + 1);
|
||||
value = ir.Imm32(operand.code - SignedConstIntPosMin + 1);
|
||||
break;
|
||||
case OperandField::SignedConstIntNeg:
|
||||
ASSERT(!force_flt);
|
||||
return ir.Imm32(-s32(operand.code) + SignedConstIntNegMin - 1);
|
||||
value = ir.Imm32(-s32(operand.code) + SignedConstIntNegMin - 1);
|
||||
break;
|
||||
case OperandField::LiteralConst:
|
||||
ASSERT(!force_flt);
|
||||
return ir.Imm32(operand.code);
|
||||
if (force_flt) {
|
||||
value = ir.Imm32(std::bit_cast<float>(operand.code));
|
||||
} else {
|
||||
value = ir.Imm32(operand.code);
|
||||
}
|
||||
break;
|
||||
case OperandField::ConstFloatPos_1_0:
|
||||
return ir.Imm32(1.f);
|
||||
value = ir.Imm32(1.f);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_0_5:
|
||||
return ir.Imm32(0.5f);
|
||||
value = ir.Imm32(0.5f);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_2_0:
|
||||
return ir.Imm32(2.0f);
|
||||
value = ir.Imm32(2.0f);
|
||||
break;
|
||||
case OperandField::ConstFloatPos_4_0:
|
||||
return ir.Imm32(4.0f);
|
||||
value = ir.Imm32(4.0f);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_0_5:
|
||||
return ir.Imm32(-0.5f);
|
||||
value = ir.Imm32(-0.5f);
|
||||
break;
|
||||
case OperandField::ConstFloatNeg_1_0:
|
||||
return ir.Imm32(-1.0f);
|
||||
value = ir.Imm32(-1.0f);
|
||||
break;
|
||||
case OperandField::VccLo:
|
||||
value = ir.GetVccLo();
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
if (operand.input_modifier.abs) {
|
||||
value = ir.FPAbs(value);
|
||||
}
|
||||
if (operand.input_modifier.neg) {
|
||||
value = ir.FPNeg(value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
|
||||
IR::U32F32 result = value;
|
||||
if (operand.output_modifier.multiplier != 0.f) {
|
||||
result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier));
|
||||
}
|
||||
if (operand.output_modifier.clamp) {
|
||||
result = ir.FPSaturate(value);
|
||||
}
|
||||
switch (operand.field) {
|
||||
case OperandField::ScalarGPR:
|
||||
return ir.SetScalarReg(IR::ScalarReg(operand.code), value);
|
||||
return ir.SetScalarReg(IR::ScalarReg(operand.code), result);
|
||||
case OperandField::VectorGPR:
|
||||
return ir.SetVectorReg(IR::VectorReg(operand.code), value);
|
||||
return ir.SetVectorReg(IR::VectorReg(operand.code), result);
|
||||
case OperandField::VccLo:
|
||||
return ir.SetVccLo(result);
|
||||
case OperandField::VccHi:
|
||||
case OperandField::M0:
|
||||
break; // Ignore for now
|
||||
|
@ -168,6 +218,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CVT_F32_U32:
|
||||
translator.V_CVT_F32_U32(inst);
|
||||
break;
|
||||
case Opcode::V_RCP_F32:
|
||||
translator.V_RCP_F32(inst);
|
||||
break;
|
||||
case Opcode::S_SWAPPC_B64:
|
||||
ASSERT(info.stage == Stage::Vertex);
|
||||
translator.EmitFetch(inst);
|
||||
|
@ -198,18 +251,81 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
|
|||
case Opcode::V_CVT_PKRTZ_F16_F32:
|
||||
translator.V_CVT_PKRTZ_F16_F32(inst);
|
||||
break;
|
||||
case Opcode::V_FRACT_F32:
|
||||
translator.V_FRACT_F32(inst);
|
||||
break;
|
||||
case Opcode::V_ADD_F32:
|
||||
translator.V_ADD_F32(inst);
|
||||
break;
|
||||
case Opcode::V_CVT_OFF_F32_I4:
|
||||
translator.V_CVT_OFF_F32_I4(inst);
|
||||
break;
|
||||
case Opcode::V_MED3_F32:
|
||||
translator.V_MED3_F32(inst);
|
||||
break;
|
||||
case Opcode::V_FLOOR_F32:
|
||||
translator.V_FLOOR_F32(inst);
|
||||
break;
|
||||
case Opcode::V_SUB_F32:
|
||||
translator.V_SUB_F32(inst);
|
||||
break;
|
||||
case Opcode::V_FMA_F32:
|
||||
case Opcode::V_MADAK_F32: // Yes these can share the opcode
|
||||
translator.V_FMA_F32(inst);
|
||||
break;
|
||||
case Opcode::IMAGE_SAMPLE:
|
||||
translator.IMAGE_SAMPLE(inst);
|
||||
break;
|
||||
case Opcode::V_CMP_EQ_U32:
|
||||
translator.V_CMP_EQ_U32(inst);
|
||||
break;
|
||||
case Opcode::V_CMPX_GT_U32:
|
||||
translator.V_CMPX_GT_U32(inst);
|
||||
break;
|
||||
case Opcode::V_CMP_F_F32:
|
||||
translator.V_CMP_F32(ConditionOp::F, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LT, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_EQ_F32:
|
||||
translator.V_CMP_F32(ConditionOp::EQ, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LE, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GT_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GT, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_LG_F32:
|
||||
translator.V_CMP_F32(ConditionOp::LG, inst);
|
||||
break;
|
||||
case Opcode::V_CMP_GE_F32:
|
||||
translator.V_CMP_F32(ConditionOp::GE, inst);
|
||||
break;
|
||||
case Opcode::S_CMP_LG_U32:
|
||||
translator.S_CMP(ConditionOp::LG, false, inst);
|
||||
break;
|
||||
case Opcode::V_CNDMASK_B32:
|
||||
translator.V_CNDMASK_B32(inst);
|
||||
break;
|
||||
case Opcode::TBUFFER_LOAD_FORMAT_XYZW:
|
||||
translator.TBUFFER_LOAD_FORMAT_XYZW(inst);
|
||||
translator.BUFFER_LOAD_FORMAT(4, true, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_LOAD_FORMAT_X:
|
||||
translator.BUFFER_LOAD_FORMAT(1, false, inst);
|
||||
break;
|
||||
case Opcode::BUFFER_STORE_FORMAT_X:
|
||||
translator.BUFFER_STORE_FORMAT(1, false, inst);
|
||||
break;
|
||||
case Opcode::V_MAX_F32:
|
||||
translator.V_MAX_F32(inst);
|
||||
break;
|
||||
case Opcode::S_ANDN2_B64:
|
||||
translator.S_ANDN2_B64(inst);
|
||||
break;
|
||||
case Opcode::S_CBRANCH_EXECZ:
|
||||
case Opcode::S_CBRANCH_SCC0:
|
||||
case Opcode::S_MOV_B64:
|
||||
case Opcode::S_WQM_B64:
|
||||
case Opcode::V_INTERP_P1_F32:
|
||||
|
|
|
@ -16,6 +16,7 @@ struct Info;
|
|||
namespace Shader::Gcn {
|
||||
|
||||
enum class ConditionOp : u32 {
|
||||
F,
|
||||
EQ,
|
||||
LG,
|
||||
GT,
|
||||
|
@ -28,12 +29,14 @@ class Translator {
|
|||
public:
|
||||
explicit Translator(IR::Block* block_, Info& info);
|
||||
|
||||
void EmitPrologue();
|
||||
void EmitFetch(const GcnInst& inst);
|
||||
|
||||
// Scalar ALU
|
||||
void S_MOV(const GcnInst& inst);
|
||||
void S_MUL_I32(const GcnInst& inst);
|
||||
void S_CMP(ConditionOp cond, bool is_signed, const GcnInst& inst);
|
||||
void S_ANDN2_B64(const GcnInst& inst);
|
||||
|
||||
// Scalar Memory
|
||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||
|
@ -53,9 +56,21 @@ public:
|
|||
void V_CVT_F32_I32(const GcnInst& inst);
|
||||
void V_CVT_F32_U32(const GcnInst& inst);
|
||||
void V_MAD_F32(const GcnInst& inst);
|
||||
void V_FRACT_F32(const GcnInst& inst);
|
||||
void V_ADD_F32(const GcnInst& inst);
|
||||
void V_CVT_OFF_F32_I4(const GcnInst& inst);
|
||||
void V_MED3_F32(const GcnInst& inst);
|
||||
void V_FLOOR_F32(const GcnInst& inst);
|
||||
void V_SUB_F32(const GcnInst& inst);
|
||||
void V_RCP_F32(const GcnInst& inst);
|
||||
void V_CMPX_GT_U32(const GcnInst& inst);
|
||||
void V_FMA_F32(const GcnInst& inst);
|
||||
void V_CMP_F32(ConditionOp op, const GcnInst& inst);
|
||||
void V_MAX_F32(const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst);
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
|
||||
|
||||
// Vector interpolation
|
||||
void V_INTERP_P2_F32(const GcnInst& inst);
|
||||
|
@ -76,7 +91,6 @@ private:
|
|||
void SetDst(const InstOperand& operand, const IR::U32F32& value);
|
||||
|
||||
private:
|
||||
IR::Block* block;
|
||||
IR::IREmitter ir;
|
||||
Info& info;
|
||||
};
|
||||
|
|
|
@ -102,4 +102,95 @@ void Translator::V_MAD_F32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
||||
}
|
||||
|
||||
void Translator::V_FRACT_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.Fract(src0));
|
||||
}
|
||||
|
||||
void Translator::V_ADD_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.FPAdd(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(
|
||||
dst_reg,
|
||||
ir.FPMul(ir.ConvertUToF(32, 32, ir.ISub(ir.BitwiseAnd(src0, ir.Imm32(0xF)), ir.Imm32(8))),
|
||||
ir.Imm32(1.f / 16.f)));
|
||||
}
|
||||
|
||||
void Translator::V_MED3_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1])};
|
||||
const IR::F32 src2{GetSrc(inst.src[2])};
|
||||
const IR::F32 mmx = ir.FPMin(ir.FPMax(src0, src1), src2);
|
||||
SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx));
|
||||
}
|
||||
|
||||
void Translator::V_FLOOR_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
ir.SetVectorReg(dst_reg, ir.FPFloor(src0));
|
||||
}
|
||||
|
||||
void Translator::V_SUB_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
const IR::F32 src1{GetSrc(inst.src[1])};
|
||||
SetDst(inst.dst[0], ir.FPSub(src0, src1));
|
||||
}
|
||||
|
||||
void Translator::V_RCP_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0])};
|
||||
SetDst(inst.dst[0], ir.FPRecip(src0));
|
||||
}
|
||||
|
||||
void Translator::V_CMPX_GT_U32(const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U1 result = ir.IGreaterThan(src0, src1, false);
|
||||
ir.SetVcc(result);
|
||||
ir.SetExec(result);
|
||||
}
|
||||
|
||||
void Translator::V_FMA_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::F32 src2{GetSrc(inst.src[2], true)};
|
||||
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
|
||||
}
|
||||
|
||||
void Translator::V_CMP_F32(ConditionOp op, const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
const IR::U1 result = [&] {
|
||||
switch (op) {
|
||||
case ConditionOp::F:
|
||||
return ir.Imm1(false);
|
||||
case ConditionOp::EQ:
|
||||
return ir.FPEqual(src0, src1);
|
||||
case ConditionOp::LG:
|
||||
return ir.FPNotEqual(src0, src1);
|
||||
case ConditionOp::GT:
|
||||
return ir.FPGreaterThan(src0, src1);
|
||||
case ConditionOp::LT:
|
||||
return ir.FPLessThan(src0, src1);
|
||||
case ConditionOp::LE:
|
||||
return ir.FPLessThanEqual(src0, src1);
|
||||
case ConditionOp::GE:
|
||||
return ir.FPGreaterThanEqual(src0, src1);
|
||||
}
|
||||
}();
|
||||
ir.SetVcc(result);
|
||||
}
|
||||
|
||||
void Translator::V_MAX_F32(const GcnInst& inst) {
|
||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||
const IR::F32 src1{GetSrc(inst.src[1], true)};
|
||||
SetDst(inst.dst[0], ir.FPMax(src0, src1));
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -107,7 +107,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void Translator::TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst) {
|
||||
void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
|
@ -127,15 +127,68 @@ void Translator::TBUFFER_LOAD_FORMAT_XYZW(const GcnInst& inst) {
|
|||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
info.dmft.Assign(static_cast<AmdGpu::DataFormat>(mtbuf.dfmt));
|
||||
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
|
||||
info.is_typed.Assign(1);
|
||||
info.is_typed.Assign(is_typed);
|
||||
if (is_typed) {
|
||||
info.dmft.Assign(static_cast<AmdGpu::DataFormat>(mtbuf.dfmt));
|
||||
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
|
||||
}
|
||||
|
||||
const IR::Value value = ir.LoadBuffer(4, ir.GetScalarReg(sharp), address, info);
|
||||
const IR::Value value = ir.LoadBuffer(num_dwords, ir.GetScalarReg(sharp), address, info);
|
||||
const IR::VectorReg dst_reg{inst.src[1].code};
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
if (num_dwords == 1) {
|
||||
ir.SetVectorReg(dst_reg, IR::F32{value});
|
||||
return;
|
||||
}
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst) {
|
||||
const auto& mtbuf = inst.control.mtbuf;
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
if (mtbuf.idxen && mtbuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
if (mtbuf.idxen || mtbuf.offen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported");
|
||||
|
||||
IR::BufferInstInfo info{};
|
||||
info.index_enable.Assign(mtbuf.idxen);
|
||||
info.offset_enable.Assign(mtbuf.offen);
|
||||
info.inst_offset.Assign(mtbuf.offset);
|
||||
info.is_typed.Assign(is_typed);
|
||||
if (is_typed) {
|
||||
info.dmft.Assign(static_cast<AmdGpu::DataFormat>(mtbuf.dfmt));
|
||||
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
|
||||
}
|
||||
|
||||
IR::Value value{};
|
||||
const IR::VectorReg src_reg{inst.src[1].code};
|
||||
switch (num_dwords) {
|
||||
case 1:
|
||||
value = ir.GetVectorReg(src_reg);
|
||||
break;
|
||||
case 2:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1));
|
||||
break;
|
||||
case 3:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
|
||||
ir.GetVectorReg(src_reg + 2));
|
||||
break;
|
||||
case 4:
|
||||
value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1),
|
||||
ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3));
|
||||
break;
|
||||
}
|
||||
ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info);
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue