shader_recompiler: Small instruction parsing refactor/bugfixes (#340)

* translator: Implemtn f32 to f16 convert

* shader_recompiler: Add bit instructions

* shader_recompiler: More data share instructions

* shader_recompiler: Remove exec contexts, fix S_MOV_B64

* shader_recompiler: Split instruction parsing into categories

* shader_recompiler: Better BFS search

* shader_recompiler: Constant propagation pass for cmp_class_f32

* shader_recompiler: Partial readfirstlane implementation

* shader_recompiler: Stub readlane/writelane only for non-compute

* hack: Fix swizzle on RDR

* Will properly fix this when merging this

* clang format

* address_space: Bump user area size to full

* shader_recompiler: V_INTERP_MOV_F32

* Should work the same as spirv will emit flat decoration on demand

* kernel: Add MAP_OP_MAP_FLEXIBLE

* image_view: Attempt to apply storage swizzle on format

* vk_scheduler: Barrier attachments on renderpass end

* clang format

* liverpool: cs state backup

* shader_recompiler: More instructions and formats

* vector_alu: Proper V_MBCNT_U32_B32

* shader_recompiler: Port some dark souls things

* file_system: Implement sceKernelRename

* more formats

* clang format

* resource_tracking_pass: Back to assert

* translate: Tracedata

* kernel: Remove tracy lock

* Solves random crashes in Dark Souls

* code: Review comments
This commit is contained in:
TheTurtle 2024-07-31 00:32:40 +03:00 committed by GitHub
parent ac6dc20c3b
commit a7c9bfa5c5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
66 changed files with 1349 additions and 904 deletions

View file

@ -1479,7 +1479,7 @@ constexpr std::array<InstFormat, 455> InstructionFormatVOP3 = {{
{InstClass::VectorFpGraph32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
ScalarType::Float32},
// 337 = V_MIN3_F32
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
{InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
ScalarType::Float32},
// 338 = V_MIN3_I32
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32,
@ -1488,7 +1488,7 @@ constexpr std::array<InstFormat, 455> InstructionFormatVOP3 = {{
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 340 = V_MAX3_F32
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
{InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
ScalarType::Float32},
// 341 = V_MAX3_I32
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32,
@ -1497,7 +1497,7 @@ constexpr std::array<InstFormat, 455> InstructionFormatVOP3 = {{
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Uint32,
ScalarType::Uint32},
// 343 = V_MED3_F32
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
{InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
ScalarType::Float32},
// 344 = V_MED3_I32
{InstClass::VectorIntArith32, InstCategory::VectorALU, 3, 1, ScalarType::Sint32,
@ -2779,11 +2779,9 @@ constexpr std::array<InstFormat, 256> InstructionFormatDS = {{
// 60 = DS_READ_U16
{InstClass::DsIdxRd, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32},
// 61 = DS_CONSUME
{InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32},
// 62 = DS_APPEND
{InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Undefined,
ScalarType::Undefined},
{InstClass::DsAppendCon, InstCategory::DataShare, 3, 1, ScalarType::Uint32, ScalarType::Uint32},
// 63 = DS_ORDERED_COUNT
{InstClass::GdsOrdCnt, InstCategory::DataShare, 3, 1, ScalarType::Undefined,
ScalarType::Undefined},

View file

@ -76,11 +76,11 @@ struct SMRD {
};
struct InstControlSOPK {
BitField<0, 16, u32> simm;
s16 simm;
};
struct InstControlSOPP {
BitField<0, 16, u32> simm;
s16 simm;
};
struct InstControlVOP3 {

View file

@ -600,13 +600,13 @@ public:
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
Info& info_)
Info& info_, const Profile& profile_)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_} {
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, profile{profile_} {
Visit(root_stmt, nullptr, nullptr);
IR::Block& first_block{*syntax_list.front().data.block};
Translator{&first_block, info}.EmitPrologue();
Translator{&first_block, info, profile}.EmitPrologue();
}
private:
@ -635,7 +635,7 @@ private:
const u32 start = stmt.block->begin_index;
const u32 size = stmt.block->end_index - start + 1;
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
info);
info, profile);
}
break;
}
@ -815,16 +815,18 @@ private:
const Block dummy_flow_block{.is_dummy = true};
std::span<const GcnInst> inst_list;
Info& info;
const Profile& profile;
};
} // Anonymous namespace
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
CFG& cfg, Info& info) {
CFG& cfg, Info& info, const Profile& profile) {
ObjectPool<Statement> stmt_pool{64};
GotoPass goto_pass{cfg, stmt_pool};
Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list;
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info};
TranslatePass{inst_pool, block_pool, stmt_pool, root,
syntax_list, cfg.inst_list, info, profile};
ASSERT_MSG(!info.translation_failed, "Shader translation has failed");
return syntax_list;
}

View file

@ -11,12 +11,13 @@
namespace Shader {
struct Info;
}
struct Profile;
} // namespace Shader
namespace Shader::Gcn {
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, CFG& cfg,
Info& info);
Info& info, const Profile& profile);
} // namespace Shader::Gcn

View file

@ -5,6 +5,31 @@
namespace Shader::Gcn {
void Translator::EmitDataShare(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::DS_SWIZZLE_B32:
return DS_SWIZZLE_B32(inst);
case Opcode::DS_READ_B32:
return DS_READ(32, false, false, inst);
case Opcode::DS_READ_B64:
return DS_READ(64, false, false, inst);
case Opcode::DS_READ2_B32:
return DS_READ(32, false, true, inst);
case Opcode::DS_READ2_B64:
return DS_READ(64, false, true, inst);
case Opcode::DS_WRITE_B32:
return DS_WRITE(32, false, false, inst);
case Opcode::DS_WRITE_B64:
return DS_WRITE(64, false, false, inst);
case Opcode::DS_WRITE2_B32:
return DS_WRITE(32, false, true, inst);
case Opcode::DS_WRITE2_B64:
return DS_WRITE(64, false, true, inst);
default:
LogMissingOpcode(inst);
}
}
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
const u8 offset0 = inst.control.ds.offset0;
const u8 offset1 = inst.control.ds.offset1;
@ -20,14 +45,25 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst) {
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
const IR::VectorReg dst_reg{inst.dst[0].code};
IR::VectorReg dst_reg{inst.dst[0].code};
if (is_pair) {
// Pair loads are either 32 or 64-bit. We assume 32-bit for now.
ASSERT(bit_size == 32);
// Pair loads are either 32 or 64-bit
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
ir.SetVectorReg(dst_reg, IR::U32{ir.LoadShared(32, is_signed, addr0)});
const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0);
if (bit_size == 32) {
ir.SetVectorReg(dst_reg++, IR::U32{data0});
} else {
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 0)});
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 1)});
}
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.LoadShared(32, is_signed, addr1)});
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1);
if (bit_size == 32) {
ir.SetVectorReg(dst_reg++, IR::U32{data1});
} else {
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 0)});
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)});
}
} else if (bit_size == 64) {
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr);
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
@ -43,11 +79,22 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
const IR::VectorReg data0{inst.src[1].code};
const IR::VectorReg data1{inst.src[2].code};
if (is_pair) {
ASSERT(bit_size == 32);
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
if (bit_size == 32) {
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
} else {
ir.WriteShared(
64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)),
addr0);
}
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
if (bit_size == 32) {
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
} else {
ir.WriteShared(
64, ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1)),
addr1);
}
} else if (bit_size == 64) {
const IR::Value data =
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
@ -62,7 +109,18 @@ void Translator::S_BARRIER() {
}
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
UNREACHABLE();
ASSERT(info.stage != Stage::Compute);
SetDst(inst.dst[0], GetSrc(inst.src[0]));
}
void Translator::V_READLANE_B32(const GcnInst& inst) {
ASSERT(info.stage != Stage::Compute);
SetDst(inst.dst[0], GetSrc(inst.src[0]));
}
void Translator::V_WRITELANE_B32(const GcnInst& inst) {
ASSERT(info.stage != Stage::Compute);
SetDst(inst.dst[0], GetSrc(inst.src[0]));
}
} // namespace Shader::Gcn

View file

@ -6,7 +6,7 @@
namespace Shader::Gcn {
void Translator::EXP(const GcnInst& inst) {
void Translator::EmitExport(const GcnInst& inst) {
if (ir.block->has_multiple_predecessors && info.stage == Stage::Fragment) {
LOG_WARNING(Render_Recompiler, "An ambiguous export appeared in translation");
ir.Discard(ir.LogicalNot(ir.GetExec()));

View file

@ -5,8 +5,102 @@
namespace Shader::Gcn {
void Translator::EmitScalarAlu(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::S_MOVK_I32:
return S_MOVK(inst);
case Opcode::S_MOV_B32:
return S_MOV(inst);
case Opcode::S_MUL_I32:
return S_MUL_I32(inst);
case Opcode::S_AND_SAVEEXEC_B64:
return S_AND_SAVEEXEC_B64(inst);
case Opcode::S_MOV_B64:
return S_MOV_B64(inst);
case Opcode::S_CMP_LT_U32:
return S_CMP(ConditionOp::LT, false, inst);
case Opcode::S_CMP_LE_U32:
return S_CMP(ConditionOp::LE, false, inst);
case Opcode::S_CMP_LG_U32:
return S_CMP(ConditionOp::LG, false, inst);
case Opcode::S_CMP_LT_I32:
return S_CMP(ConditionOp::LT, true, inst);
case Opcode::S_CMP_LG_I32:
return S_CMP(ConditionOp::LG, true, inst);
case Opcode::S_CMP_GT_I32:
return S_CMP(ConditionOp::GT, true, inst);
case Opcode::S_CMP_GE_I32:
return S_CMP(ConditionOp::GE, true, inst);
case Opcode::S_CMP_EQ_I32:
return S_CMP(ConditionOp::EQ, true, inst);
case Opcode::S_CMP_EQ_U32:
return S_CMP(ConditionOp::EQ, false, inst);
case Opcode::S_CMP_GE_U32:
return S_CMP(ConditionOp::GE, false, inst);
case Opcode::S_CMP_GT_U32:
return S_CMP(ConditionOp::GT, false, inst);
case Opcode::S_OR_B64:
return S_OR_B64(NegateMode::None, false, inst);
case Opcode::S_NOR_B64:
return S_OR_B64(NegateMode::Result, false, inst);
case Opcode::S_XOR_B64:
return S_OR_B64(NegateMode::None, true, inst);
case Opcode::S_ORN2_B64:
return S_OR_B64(NegateMode::Src1, false, inst);
case Opcode::S_AND_B64:
return S_AND_B64(NegateMode::None, inst);
case Opcode::S_NAND_B64:
return S_AND_B64(NegateMode::Result, inst);
case Opcode::S_ANDN2_B64:
return S_AND_B64(NegateMode::Src1, inst);
case Opcode::S_NOT_B64:
return S_NOT_B64(inst);
case Opcode::S_ADD_I32:
return S_ADD_I32(inst);
case Opcode::S_AND_B32:
return S_AND_B32(inst);
case Opcode::S_ASHR_I32:
return S_ASHR_I32(inst);
case Opcode::S_OR_B32:
return S_OR_B32(inst);
case Opcode::S_LSHL_B32:
return S_LSHL_B32(inst);
case Opcode::S_LSHR_B32:
return S_LSHR_B32(inst);
case Opcode::S_CSELECT_B32:
return S_CSELECT_B32(inst);
case Opcode::S_CSELECT_B64:
return S_CSELECT_B64(inst);
case Opcode::S_BFE_U32:
return S_BFE_U32(inst);
case Opcode::S_BFM_B32:
return S_BFM_B32(inst);
case Opcode::S_BREV_B32:
return S_BREV_B32(inst);
case Opcode::S_ADD_U32:
return S_ADD_U32(inst);
case Opcode::S_ADDC_U32:
return S_ADDC_U32(inst);
case Opcode::S_ADDK_I32:
return S_ADDK_I32(inst);
case Opcode::S_MULK_I32:
return S_MULK_I32(inst);
case Opcode::S_SUB_U32:
case Opcode::S_SUB_I32:
return S_SUB_U32(inst);
case Opcode::S_MIN_U32:
return S_MIN_U32(inst);
case Opcode::S_MAX_U32:
return S_MAX_U32(inst);
case Opcode::S_WQM_B64:
break;
default:
LogMissingOpcode(inst);
}
}
void Translator::S_MOVK(const GcnInst& inst) {
const auto simm16 = inst.control.sopk.simm.Value();
const auto simm16 = inst.control.sopk.simm;
if (simm16 & (1 << 15)) {
// TODO: need to verify the case of imm sign extension
UNREACHABLE();
@ -14,6 +108,16 @@ void Translator::S_MOVK(const GcnInst& inst) {
SetDst(inst.dst[0], ir.Imm32(simm16));
}
void Translator::S_ADDK_I32(const GcnInst& inst) {
const s32 simm16 = inst.control.sopk.simm;
SetDst(inst.dst[0], ir.IAdd(GetSrc(inst.dst[0]), ir.Imm32(simm16)));
}
void Translator::S_MULK_I32(const GcnInst& inst) {
const s32 simm16 = inst.control.sopk.simm;
SetDst(inst.dst[0], ir.IMul(GetSrc(inst.dst[0]), ir.Imm32(simm16)));
}
void Translator::S_MOV(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc(inst.src[0]));
}
@ -62,15 +166,10 @@ void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
}
}();
// Mark destination SPGR as an EXEC context. This means we will use 1-bit
// IR instruction whenever it's loaded.
switch (inst.dst[0].field) {
case OperandField::ScalarGPR: {
const u32 reg = inst.dst[0].code;
exec_contexts[reg] = true;
ir.SetThreadBitScalarReg(IR::ScalarReg(reg), exec);
case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), exec);
break;
}
case OperandField::VccLo:
ir.SetVcc(exec);
break;
@ -79,27 +178,37 @@ void Translator::S_AND_SAVEEXEC_B64(const GcnInst& inst) {
}
// Update EXEC.
ir.SetExec(ir.LogicalAnd(exec, src));
const IR::U1 result = ir.LogicalAnd(exec, src);
ir.SetExec(result);
ir.SetScc(result);
}
void Translator::S_MOV_B64(const GcnInst& inst) {
// TODO: Using VCC as EXEC context.
if (inst.src[0].field == OperandField::VccLo || inst.dst[0].field == OperandField::VccLo) {
return;
}
if (inst.dst[0].field == OperandField::ScalarGPR && inst.src[0].field == OperandField::ExecLo) {
// Exec context push
exec_contexts[inst.dst[0].code] = true;
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), ir.GetExec());
} else if (inst.dst[0].field == OperandField::ExecLo &&
inst.src[0].field == OperandField::ScalarGPR) {
// Exec context pop
exec_contexts[inst.src[0].code] = false;
ir.SetExec(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)));
} else if (inst.dst[0].field == OperandField::ExecLo &&
inst.src[0].field == OperandField::ConstZero) {
ir.SetExec(ir.Imm1(false));
} else {
const IR::U1 src = [&] {
switch (inst.src[0].field) {
case OperandField::VccLo:
return ir.GetVcc();
case OperandField::ExecLo:
return ir.GetExec();
case OperandField::ScalarGPR:
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
case OperandField::ConstZero:
return ir.Imm1(false);
default:
UNREACHABLE();
}
}();
switch (inst.dst[0].field) {
case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[0].code), src);
break;
case OperandField::ExecLo:
ir.SetExec(src);
break;
case OperandField::VccLo:
ir.SetVcc(src);
break;
default:
UNREACHABLE();
}
}
@ -338,4 +447,20 @@ void Translator::S_ADDC_U32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo()));
}
void Translator::S_MAX_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 result = ir.UMax(src0, src1);
SetDst(inst.dst[0], result);
ir.SetScc(ir.IEqual(result, src0));
}
void Translator::S_MIN_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 result = ir.UMin(src0, src1);
SetDst(inst.dst[0], result);
ir.SetScc(ir.IEqual(result, src0));
}
} // namespace Shader::Gcn

View file

@ -7,6 +7,29 @@ namespace Shader::Gcn {
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
void Translator::EmitScalarMemory(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::S_LOAD_DWORDX4:
return S_LOAD_DWORD(4, inst);
case Opcode::S_LOAD_DWORDX8:
return S_LOAD_DWORD(8, inst);
case Opcode::S_LOAD_DWORDX16:
return S_LOAD_DWORD(16, inst);
case Opcode::S_BUFFER_LOAD_DWORD:
return S_BUFFER_LOAD_DWORD(1, inst);
case Opcode::S_BUFFER_LOAD_DWORDX2:
return S_BUFFER_LOAD_DWORD(2, inst);
case Opcode::S_BUFFER_LOAD_DWORDX4:
return S_BUFFER_LOAD_DWORD(4, inst);
case Opcode::S_BUFFER_LOAD_DWORDX8:
return S_BUFFER_LOAD_DWORD(8, inst);
case Opcode::S_BUFFER_LOAD_DWORDX16:
return S_BUFFER_LOAD_DWORD(16, inst);
default:
LogMissingOpcode(inst);
}
}
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd;
const u32 dword_offset = [&] -> u32 {

View file

@ -16,13 +16,10 @@
namespace Shader::Gcn {
std::array<bool, IR::NumScalarRegs> Translator::exec_contexts{};
Translator::Translator(IR::Block* block_, Info& info_)
: ir{*block_, block_->begin()}, info{info_} {}
Translator::Translator(IR::Block* block_, Info& info_, const Profile& profile_)
: ir{*block_, block_->begin()}, info{info_}, profile{profile_} {}
void Translator::EmitPrologue() {
exec_contexts.fill(false);
ir.Prologue();
ir.SetExec(ir.Imm1(true));
@ -97,7 +94,7 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
}
break;
case OperandField::ConstZero:
if (force_flt) {
if (is_float) {
value = ir.Imm32(0.f);
} else {
value = ir.Imm32(0U);
@ -112,14 +109,14 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
value = ir.Imm32(-s32(operand.code) + SignedConstIntNegMin - 1);
break;
case OperandField::LiteralConst:
if (force_flt) {
if (is_float) {
value = ir.Imm32(std::bit_cast<float>(operand.code));
} else {
value = ir.Imm32(operand.code);
}
break;
case OperandField::ConstFloatPos_1_0:
if (force_flt) {
if (is_float) {
value = ir.Imm32(1.f);
} else {
value = ir.Imm32(std::bit_cast<u32>(1.f));
@ -138,7 +135,11 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
value = ir.Imm32(-0.5f);
break;
case OperandField::ConstFloatNeg_1_0:
value = ir.Imm32(-1.0f);
if (is_float) {
value = ir.Imm32(-1.0f);
} else {
value = ir.Imm32(std::bit_cast<u32>(-1.0f));
}
break;
case OperandField::ConstFloatNeg_2_0:
value = ir.Imm32(-2.0f);
@ -160,6 +161,8 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) {
value = ir.GetVccHi();
}
break;
case OperandField::M0:
return m0_value;
default:
UNREACHABLE();
}
@ -336,6 +339,7 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
case OperandField::VccHi:
return ir.SetVccHi(result);
case OperandField::M0:
m0_value = result;
break;
default:
UNREACHABLE();
@ -458,712 +462,84 @@ void Translator::EmitFetch(const GcnInst& inst) {
}
}
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info) {
void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::S_BARRIER:
return S_BARRIER();
case Opcode::S_TTRACEDATA:
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
return;
case Opcode::S_GETPC_B64:
return S_GETPC_B64(pc, inst);
case Opcode::S_WAITCNT:
case Opcode::S_NOP:
case Opcode::S_ENDPGM:
case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0:
case Opcode::S_CBRANCH_SCC1:
case Opcode::S_CBRANCH_VCCNZ:
case Opcode::S_CBRANCH_VCCZ:
case Opcode::S_BRANCH:
return;
default:
UNREACHABLE();
}
}
void Translator::LogMissingOpcode(const GcnInst& inst) {
const u32 opcode = u32(inst.opcode);
LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({}, category = {})",
magic_enum::enum_name(inst.opcode), u32(inst.opcode),
magic_enum::enum_name(inst.category));
info.translation_failed = true;
}
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info,
const Profile& profile) {
if (inst_list.empty()) {
return;
}
Translator translator{block, info};
Translator translator{block, info, profile};
for (const auto& inst : inst_list) {
block_base += inst.length;
switch (inst.opcode) {
case Opcode::S_MOVK_I32:
translator.S_MOVK(inst);
break;
case Opcode::S_MOV_B32:
translator.S_MOV(inst);
break;
case Opcode::S_MUL_I32:
translator.S_MUL_I32(inst);
break;
case Opcode::V_MAD_F32:
translator.V_MAD_F32(inst);
break;
case Opcode::V_MOV_B32:
translator.V_MOV(inst);
break;
case Opcode::V_MAC_F32:
translator.V_MAC_F32(inst);
break;
case Opcode::V_MUL_F32:
translator.V_MUL_F32(inst);
break;
case Opcode::V_AND_B32:
translator.V_AND_B32(inst);
break;
case Opcode::V_OR_B32:
translator.V_OR_B32(false, inst);
break;
case Opcode::V_XOR_B32:
translator.V_OR_B32(true, inst);
break;
case Opcode::V_LSHLREV_B32:
translator.V_LSHLREV_B32(inst);
break;
case Opcode::V_ADD_I32:
translator.V_ADD_I32(inst);
break;
case Opcode::V_ADDC_U32:
translator.V_ADDC_U32(inst);
break;
case Opcode::V_CVT_F32_I32:
translator.V_CVT_F32_I32(inst);
break;
case Opcode::V_CVT_F32_U32:
translator.V_CVT_F32_U32(inst);
break;
case Opcode::V_RCP_F32:
translator.V_RCP_F32(inst);
break;
case Opcode::S_SWAPPC_B64:
pc += inst.length;
// Special case for emitting fetch shader.
if (inst.opcode == Opcode::S_SWAPPC_B64) {
ASSERT(info.stage == Stage::Vertex);
translator.EmitFetch(inst);
break;
case Opcode::S_WAITCNT:
break;
case Opcode::S_LOAD_DWORDX4:
translator.S_LOAD_DWORD(4, inst);
break;
case Opcode::S_LOAD_DWORDX8:
translator.S_LOAD_DWORD(8, inst);
break;
case Opcode::S_LOAD_DWORDX16:
translator.S_LOAD_DWORD(16, inst);
break;
case Opcode::S_BUFFER_LOAD_DWORD:
translator.S_BUFFER_LOAD_DWORD(1, inst);
break;
case Opcode::S_BUFFER_LOAD_DWORDX2:
translator.S_BUFFER_LOAD_DWORD(2, inst);
break;
case Opcode::S_BUFFER_LOAD_DWORDX4:
translator.S_BUFFER_LOAD_DWORD(4, inst);
break;
case Opcode::S_BUFFER_LOAD_DWORDX8:
translator.S_BUFFER_LOAD_DWORD(8, inst);
break;
case Opcode::S_BUFFER_LOAD_DWORDX16:
translator.S_BUFFER_LOAD_DWORD(16, inst);
break;
case Opcode::EXP:
translator.EXP(inst);
break;
case Opcode::V_INTERP_P2_F32:
translator.V_INTERP_P2_F32(inst);
break;
case Opcode::V_CVT_PKRTZ_F16_F32:
translator.V_CVT_PKRTZ_F16_F32(inst);
break;
case Opcode::V_CVT_F32_F16:
translator.V_CVT_F32_F16(inst);
break;
case Opcode::V_CVT_F32_UBYTE0:
translator.V_CVT_F32_UBYTE(0, inst);
break;
case Opcode::V_CVT_F32_UBYTE1:
translator.V_CVT_F32_UBYTE(1, inst);
break;
case Opcode::V_CVT_F32_UBYTE2:
translator.V_CVT_F32_UBYTE(2, inst);
break;
case Opcode::V_CVT_F32_UBYTE3:
translator.V_CVT_F32_UBYTE(3, inst);
break;
case Opcode::V_BFREV_B32:
translator.V_BFREV_B32(inst);
break;
case Opcode::V_LDEXP_F32:
translator.V_LDEXP_F32(inst);
break;
case Opcode::V_FRACT_F32:
translator.V_FRACT_F32(inst);
break;
case Opcode::V_ADD_F32:
translator.V_ADD_F32(inst);
break;
case Opcode::V_CVT_OFF_F32_I4:
translator.V_CVT_OFF_F32_I4(inst);
break;
case Opcode::V_MED3_F32:
translator.V_MED3_F32(inst);
break;
case Opcode::V_FLOOR_F32:
translator.V_FLOOR_F32(inst);
break;
case Opcode::V_SUB_F32:
translator.V_SUB_F32(inst);
break;
case Opcode::V_FMA_F32:
case Opcode::V_MADAK_F32: // Yes these can share the opcode
translator.V_FMA_F32(inst);
break;
case Opcode::IMAGE_SAMPLE_LZ_O:
case Opcode::IMAGE_SAMPLE_O:
case Opcode::IMAGE_SAMPLE_C:
case Opcode::IMAGE_SAMPLE_C_LZ:
case Opcode::IMAGE_SAMPLE_LZ:
case Opcode::IMAGE_SAMPLE:
case Opcode::IMAGE_SAMPLE_L:
case Opcode::IMAGE_SAMPLE_C_O:
case Opcode::IMAGE_SAMPLE_B:
case Opcode::IMAGE_SAMPLE_C_LZ_O:
translator.IMAGE_SAMPLE(inst);
break;
case Opcode::IMAGE_ATOMIC_ADD:
translator.IMAGE_ATOMIC(AtomicOp::Add, inst);
break;
case Opcode::IMAGE_ATOMIC_AND:
translator.IMAGE_ATOMIC(AtomicOp::And, inst);
break;
case Opcode::IMAGE_ATOMIC_OR:
translator.IMAGE_ATOMIC(AtomicOp::Or, inst);
break;
case Opcode::IMAGE_ATOMIC_XOR:
translator.IMAGE_ATOMIC(AtomicOp::Xor, inst);
break;
case Opcode::IMAGE_ATOMIC_UMAX:
translator.IMAGE_ATOMIC(AtomicOp::Umax, inst);
break;
case Opcode::IMAGE_ATOMIC_SMAX:
translator.IMAGE_ATOMIC(AtomicOp::Smax, inst);
break;
case Opcode::IMAGE_ATOMIC_UMIN:
translator.IMAGE_ATOMIC(AtomicOp::Umin, inst);
break;
case Opcode::IMAGE_ATOMIC_SMIN:
translator.IMAGE_ATOMIC(AtomicOp::Smin, inst);
break;
case Opcode::IMAGE_ATOMIC_INC:
translator.IMAGE_ATOMIC(AtomicOp::Inc, inst);
break;
case Opcode::IMAGE_ATOMIC_DEC:
translator.IMAGE_ATOMIC(AtomicOp::Dec, inst);
break;
case Opcode::IMAGE_GET_LOD:
translator.IMAGE_GET_LOD(inst);
break;
case Opcode::IMAGE_GATHER4_C:
case Opcode::IMAGE_GATHER4_LZ:
case Opcode::IMAGE_GATHER4_LZ_O:
translator.IMAGE_GATHER(inst);
break;
case Opcode::IMAGE_STORE:
translator.IMAGE_STORE(inst);
break;
case Opcode::IMAGE_LOAD_MIP:
translator.IMAGE_LOAD(true, inst);
break;
case Opcode::IMAGE_LOAD:
translator.IMAGE_LOAD(false, inst);
break;
case Opcode::V_MAD_U64_U32:
translator.V_MAD_U64_U32(inst);
break;
case Opcode::V_CMP_GE_I32:
translator.V_CMP_U32(ConditionOp::GE, true, false, inst);
break;
case Opcode::V_CMP_EQ_I32:
translator.V_CMP_U32(ConditionOp::EQ, true, false, inst);
break;
case Opcode::V_CMP_LE_I32:
translator.V_CMP_U32(ConditionOp::LE, true, false, inst);
break;
case Opcode::V_CMP_NE_I32:
translator.V_CMP_U32(ConditionOp::LG, true, false, inst);
break;
case Opcode::V_CMP_NE_U32:
translator.V_CMP_U32(ConditionOp::LG, false, false, inst);
break;
case Opcode::V_CMP_EQ_U32:
translator.V_CMP_U32(ConditionOp::EQ, false, false, inst);
break;
case Opcode::V_CMP_F_U32:
translator.V_CMP_U32(ConditionOp::F, false, false, inst);
break;
case Opcode::V_CMP_LT_U32:
translator.V_CMP_U32(ConditionOp::LT, false, false, inst);
break;
case Opcode::V_CMP_GT_U32:
translator.V_CMP_U32(ConditionOp::GT, false, false, inst);
break;
case Opcode::V_CMP_GE_U32:
translator.V_CMP_U32(ConditionOp::GE, false, false, inst);
break;
case Opcode::V_CMP_TRU_U32:
translator.V_CMP_U32(ConditionOp::TRU, false, false, inst);
break;
case Opcode::V_CMP_NEQ_F32:
translator.V_CMP_F32(ConditionOp::LG, false, inst);
break;
case Opcode::V_CMP_F_F32:
translator.V_CMP_F32(ConditionOp::F, false, inst);
break;
case Opcode::V_CMP_LT_F32:
translator.V_CMP_F32(ConditionOp::LT, false, inst);
break;
case Opcode::V_CMP_EQ_F32:
translator.V_CMP_F32(ConditionOp::EQ, false, inst);
break;
case Opcode::V_CMP_LE_F32:
translator.V_CMP_F32(ConditionOp::LE, false, inst);
break;
case Opcode::V_CMP_GT_F32:
translator.V_CMP_F32(ConditionOp::GT, false, inst);
break;
case Opcode::V_CMP_LG_F32:
translator.V_CMP_F32(ConditionOp::LG, false, inst);
break;
case Opcode::V_CMP_GE_F32:
translator.V_CMP_F32(ConditionOp::GE, false, inst);
break;
case Opcode::V_CMP_NLE_F32:
translator.V_CMP_F32(ConditionOp::GT, false, inst);
break;
case Opcode::V_CMP_NLT_F32:
translator.V_CMP_F32(ConditionOp::GE, false, inst);
break;
case Opcode::V_CMP_NGT_F32:
translator.V_CMP_F32(ConditionOp::LE, false, inst);
break;
case Opcode::V_CMP_NGE_F32:
translator.V_CMP_F32(ConditionOp::LT, false, inst);
break;
case Opcode::S_CMP_LT_U32:
translator.S_CMP(ConditionOp::LT, false, inst);
break;
case Opcode::S_CMP_LE_U32:
translator.S_CMP(ConditionOp::LE, false, inst);
break;
case Opcode::S_CMP_LG_U32:
translator.S_CMP(ConditionOp::LG, false, inst);
break;
case Opcode::S_CMP_LT_I32:
translator.S_CMP(ConditionOp::LT, true, inst);
break;
case Opcode::S_CMP_LG_I32:
translator.S_CMP(ConditionOp::LG, true, inst);
break;
case Opcode::S_CMP_GT_I32:
translator.S_CMP(ConditionOp::GT, true, inst);
break;
case Opcode::S_CMP_GE_I32:
translator.S_CMP(ConditionOp::GE, true, inst);
break;
case Opcode::S_CMP_EQ_I32:
translator.S_CMP(ConditionOp::EQ, true, inst);
break;
case Opcode::S_CMP_EQ_U32:
translator.S_CMP(ConditionOp::EQ, false, inst);
break;
case Opcode::S_LSHL_B32:
translator.S_LSHL_B32(inst);
break;
case Opcode::V_CNDMASK_B32:
translator.V_CNDMASK_B32(inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_X:
translator.BUFFER_LOAD_FORMAT(1, true, true, inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_XY:
translator.BUFFER_LOAD_FORMAT(2, true, true, inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
translator.BUFFER_LOAD_FORMAT(3, true, true, inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_XYZW:
translator.BUFFER_LOAD_FORMAT(4, true, true, inst);
break;
case Opcode::BUFFER_LOAD_FORMAT_X:
translator.BUFFER_LOAD_FORMAT(1, false, true, inst);
break;
case Opcode::BUFFER_LOAD_FORMAT_XY:
translator.BUFFER_LOAD_FORMAT(2, false, true, inst);
break;
case Opcode::BUFFER_LOAD_FORMAT_XYZ:
translator.BUFFER_LOAD_FORMAT(3, false, true, inst);
break;
case Opcode::BUFFER_LOAD_FORMAT_XYZW:
translator.BUFFER_LOAD_FORMAT(4, false, true, inst);
break;
case Opcode::BUFFER_LOAD_DWORD:
translator.BUFFER_LOAD_FORMAT(1, false, false, inst);
break;
case Opcode::BUFFER_LOAD_DWORDX2:
translator.BUFFER_LOAD_FORMAT(2, false, false, inst);
break;
case Opcode::BUFFER_LOAD_DWORDX3:
translator.BUFFER_LOAD_FORMAT(3, false, false, inst);
break;
case Opcode::BUFFER_LOAD_DWORDX4:
translator.BUFFER_LOAD_FORMAT(4, false, false, inst);
break;
case Opcode::BUFFER_STORE_FORMAT_X:
case Opcode::BUFFER_STORE_DWORD:
translator.BUFFER_STORE_FORMAT(1, false, inst);
break;
case Opcode::BUFFER_STORE_DWORDX2:
translator.BUFFER_STORE_FORMAT(2, false, inst);
break;
case Opcode::BUFFER_STORE_DWORDX3:
translator.BUFFER_STORE_FORMAT(3, false, inst);
break;
case Opcode::BUFFER_STORE_FORMAT_XYZW:
case Opcode::BUFFER_STORE_DWORDX4:
translator.BUFFER_STORE_FORMAT(4, false, inst);
break;
case Opcode::V_MAX_F32:
translator.V_MAX_F32(inst);
break;
case Opcode::V_MAX_I32:
translator.V_MAX_U32(true, inst);
break;
case Opcode::V_MAX_U32:
translator.V_MAX_U32(false, inst);
break;
case Opcode::V_NOT_B32:
translator.V_NOT_B32(inst);
break;
case Opcode::V_RSQ_F32:
translator.V_RSQ_F32(inst);
break;
case Opcode::S_ANDN2_B64:
translator.S_AND_B64(NegateMode::Src1, inst);
break;
case Opcode::S_ORN2_B64:
translator.S_OR_B64(NegateMode::Src1, false, inst);
break;
case Opcode::V_SIN_F32:
translator.V_SIN_F32(inst);
break;
case Opcode::V_COS_F32:
translator.V_COS_F32(inst);
break;
case Opcode::V_LOG_F32:
translator.V_LOG_F32(inst);
break;
case Opcode::V_EXP_F32:
translator.V_EXP_F32(inst);
break;
case Opcode::V_SQRT_F32:
translator.V_SQRT_F32(inst);
break;
case Opcode::V_MIN_F32:
translator.V_MIN_F32(inst);
break;
case Opcode::V_MIN_I32:
translator.V_MIN_I32(inst);
break;
case Opcode::V_MIN3_F32:
translator.V_MIN3_F32(inst);
break;
case Opcode::V_MIN_LEGACY_F32:
translator.V_MIN_F32(inst, true);
break;
case Opcode::V_MADMK_F32:
translator.V_MADMK_F32(inst);
break;
case Opcode::V_CUBEMA_F32:
translator.V_CUBEMA_F32(inst);
break;
case Opcode::V_CUBESC_F32:
translator.V_CUBESC_F32(inst);
break;
case Opcode::V_CUBETC_F32:
translator.V_CUBETC_F32(inst);
break;
case Opcode::V_CUBEID_F32:
translator.V_CUBEID_F32(inst);
break;
case Opcode::V_CVT_U32_F32:
translator.V_CVT_U32_F32(inst);
break;
case Opcode::V_CVT_I32_F32:
translator.V_CVT_I32_F32(inst);
break;
case Opcode::V_CVT_FLR_I32_F32:
translator.V_CVT_FLR_I32_F32(inst);
break;
case Opcode::V_SUBREV_F32:
translator.V_SUBREV_F32(inst);
break;
case Opcode::S_AND_SAVEEXEC_B64:
translator.S_AND_SAVEEXEC_B64(inst);
break;
case Opcode::S_MOV_B64:
translator.S_MOV_B64(inst);
break;
case Opcode::V_SUBREV_I32:
translator.V_SUBREV_I32(inst);
break;
continue;
}
case Opcode::V_CMPX_F_F32:
translator.V_CMP_F32(ConditionOp::F, true, inst);
// Emit instructions for each category.
switch (inst.category) {
case InstCategory::DataShare:
translator.EmitDataShare(inst);
break;
case Opcode::V_CMPX_LT_F32:
translator.V_CMP_F32(ConditionOp::LT, true, inst);
case InstCategory::VectorInterpolation:
translator.EmitVectorInterpolation(inst);
break;
case Opcode::V_CMPX_EQ_F32:
translator.V_CMP_F32(ConditionOp::EQ, true, inst);
case InstCategory::ScalarMemory:
translator.EmitScalarMemory(inst);
break;
case Opcode::V_CMPX_LE_F32:
translator.V_CMP_F32(ConditionOp::LE, true, inst);
case InstCategory::VectorMemory:
translator.EmitVectorMemory(inst);
break;
case Opcode::V_CMPX_GT_F32:
translator.V_CMP_F32(ConditionOp::GT, true, inst);
case InstCategory::Export:
translator.EmitExport(inst);
break;
case Opcode::V_CMPX_LG_F32:
translator.V_CMP_F32(ConditionOp::LG, true, inst);
case InstCategory::FlowControl:
translator.EmitFlowControl(pc, inst);
break;
case Opcode::V_CMPX_GE_F32:
translator.V_CMP_F32(ConditionOp::GE, true, inst);
case InstCategory::ScalarALU:
translator.EmitScalarAlu(inst);
break;
case Opcode::V_CMPX_NGE_F32:
translator.V_CMP_F32(ConditionOp::LT, true, inst);
case InstCategory::VectorALU:
translator.EmitVectorAlu(inst);
break;
case Opcode::V_CMPX_NLG_F32:
translator.V_CMP_F32(ConditionOp::EQ, true, inst);
break;
case Opcode::V_CMPX_NGT_F32:
translator.V_CMP_F32(ConditionOp::LE, true, inst);
break;
case Opcode::V_CMPX_NLE_F32:
translator.V_CMP_F32(ConditionOp::GT, true, inst);
break;
case Opcode::V_CMPX_NEQ_F32:
translator.V_CMP_F32(ConditionOp::LG, true, inst);
break;
case Opcode::V_CMPX_NLT_F32:
translator.V_CMP_F32(ConditionOp::GE, true, inst);
break;
case Opcode::V_CMPX_TRU_F32:
translator.V_CMP_F32(ConditionOp::TRU, true, inst);
break;
case Opcode::V_CMP_LE_U32:
translator.V_CMP_U32(ConditionOp::LE, false, false, inst);
break;
case Opcode::V_CMP_GT_I32:
translator.V_CMP_U32(ConditionOp::GT, true, false, inst);
break;
case Opcode::V_CMP_LT_I32:
translator.V_CMP_U32(ConditionOp::LT, true, false, inst);
break;
case Opcode::V_CMPX_LT_I32:
translator.V_CMP_U32(ConditionOp::LT, true, true, inst);
break;
case Opcode::V_CMPX_F_U32:
translator.V_CMP_U32(ConditionOp::F, false, true, inst);
break;
case Opcode::V_CMPX_LT_U32:
translator.V_CMP_U32(ConditionOp::LT, false, true, inst);
break;
case Opcode::V_CMPX_EQ_U32:
translator.V_CMP_U32(ConditionOp::EQ, false, true, inst);
break;
case Opcode::V_CMPX_LE_U32:
translator.V_CMP_U32(ConditionOp::LE, false, true, inst);
break;
case Opcode::V_CMPX_GT_U32:
translator.V_CMP_U32(ConditionOp::GT, false, true, inst);
break;
case Opcode::V_CMPX_NE_U32:
translator.V_CMP_U32(ConditionOp::LG, false, true, inst);
break;
case Opcode::V_CMPX_GE_U32:
translator.V_CMP_U32(ConditionOp::GE, false, true, inst);
break;
case Opcode::V_CMPX_TRU_U32:
translator.V_CMP_U32(ConditionOp::TRU, false, true, inst);
break;
case Opcode::S_OR_B64:
translator.S_OR_B64(NegateMode::None, false, inst);
break;
case Opcode::S_NOR_B64:
translator.S_OR_B64(NegateMode::Result, false, inst);
break;
case Opcode::S_XOR_B64:
translator.S_OR_B64(NegateMode::None, true, inst);
break;
case Opcode::S_AND_B64:
translator.S_AND_B64(NegateMode::None, inst);
break;
case Opcode::S_NOT_B64:
translator.S_NOT_B64(inst);
break;
case Opcode::S_NAND_B64:
translator.S_AND_B64(NegateMode::Result, inst);
break;
case Opcode::V_LSHRREV_B32:
translator.V_LSHRREV_B32(inst);
break;
case Opcode::S_ADD_I32:
translator.S_ADD_I32(inst);
break;
case Opcode::V_MUL_HI_U32:
translator.V_MUL_HI_U32(false, inst);
break;
case Opcode::V_MUL_LO_I32:
translator.V_MUL_LO_U32(inst);
break;
case Opcode::V_SAD_U32:
translator.V_SAD_U32(inst);
break;
case Opcode::V_BFE_U32:
translator.V_BFE_U32(false, inst);
break;
case Opcode::V_BFE_I32:
translator.V_BFE_U32(true, inst);
break;
case Opcode::V_MAD_I32_I24:
translator.V_MAD_I32_I24(inst);
break;
case Opcode::V_MUL_I32_I24:
case Opcode::V_MUL_U32_U24:
translator.V_MUL_I32_I24(inst);
break;
case Opcode::V_SUB_I32:
translator.V_SUB_I32(inst);
break;
case Opcode::V_LSHR_B32:
translator.V_LSHR_B32(inst);
break;
case Opcode::V_ASHRREV_I32:
translator.V_ASHRREV_I32(inst);
break;
case Opcode::V_MAD_U32_U24:
translator.V_MAD_U32_U24(inst);
break;
case Opcode::S_AND_B32:
translator.S_AND_B32(inst);
break;
case Opcode::S_ASHR_I32:
translator.S_ASHR_I32(inst);
break;
case Opcode::S_OR_B32:
translator.S_OR_B32(inst);
break;
case Opcode::S_LSHR_B32:
translator.S_LSHR_B32(inst);
break;
case Opcode::S_CSELECT_B32:
translator.S_CSELECT_B32(inst);
break;
case Opcode::S_CSELECT_B64:
translator.S_CSELECT_B64(inst);
break;
case Opcode::S_BFE_U32:
translator.S_BFE_U32(inst);
break;
case Opcode::V_RNDNE_F32:
translator.V_RNDNE_F32(inst);
break;
case Opcode::V_BCNT_U32_B32:
translator.V_BCNT_U32_B32(inst);
break;
case Opcode::V_MAX3_F32:
translator.V_MAX3_F32(inst);
break;
case Opcode::DS_SWIZZLE_B32:
translator.DS_SWIZZLE_B32(inst);
break;
case Opcode::V_MUL_LO_U32:
translator.V_MUL_LO_U32(inst);
break;
case Opcode::S_BFM_B32:
translator.S_BFM_B32(inst);
break;
case Opcode::V_MIN_U32:
translator.V_MIN_U32(inst);
break;
case Opcode::V_CMP_NE_U64:
translator.V_CMP_NE_U64(inst);
break;
case Opcode::V_CMP_CLASS_F32:
translator.V_CMP_CLASS_F32(inst);
break;
case Opcode::V_TRUNC_F32:
translator.V_TRUNC_F32(inst);
break;
case Opcode::V_CEIL_F32:
translator.V_CEIL_F32(inst);
break;
case Opcode::V_BFI_B32:
translator.V_BFI_B32(inst);
break;
case Opcode::S_BREV_B32:
translator.S_BREV_B32(inst);
break;
case Opcode::S_ADD_U32:
translator.S_ADD_U32(inst);
break;
case Opcode::S_ADDC_U32:
translator.S_ADDC_U32(inst);
break;
case Opcode::S_SUB_U32:
case Opcode::S_SUB_I32:
translator.S_SUB_U32(inst);
break;
// TODO: Separate implementation for legacy variants.
case Opcode::V_MUL_LEGACY_F32:
translator.V_MUL_F32(inst);
break;
case Opcode::V_MAC_LEGACY_F32:
translator.V_MAC_F32(inst);
break;
case Opcode::V_MAD_LEGACY_F32:
translator.V_MAD_F32(inst);
break;
case Opcode::V_MAX_LEGACY_F32:
translator.V_MAX_F32(inst, true);
break;
case Opcode::V_RSQ_LEGACY_F32:
case Opcode::V_RSQ_CLAMP_F32:
translator.V_RSQ_F32(inst);
break;
case Opcode::V_RCP_IFLAG_F32:
translator.V_RCP_F32(inst);
break;
case Opcode::IMAGE_GET_RESINFO:
translator.IMAGE_GET_RESINFO(inst);
break;
case Opcode::S_BARRIER:
translator.S_BARRIER();
break;
case Opcode::S_TTRACEDATA:
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
break;
case Opcode::DS_READ_B32:
translator.DS_READ(32, false, false, inst);
break;
case Opcode::DS_READ2_B32:
translator.DS_READ(32, false, true, inst);
break;
case Opcode::DS_WRITE_B32:
translator.DS_WRITE(32, false, false, inst);
break;
case Opcode::DS_WRITE2_B32:
translator.DS_WRITE(32, false, true, inst);
break;
case Opcode::V_READFIRSTLANE_B32:
translator.V_READFIRSTLANE_B32(inst);
break;
case Opcode::S_GETPC_B64:
translator.S_GETPC_B64(block_base, inst);
break;
case Opcode::S_NOP:
case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0:
case Opcode::S_CBRANCH_SCC1:
case Opcode::S_CBRANCH_VCCNZ:
case Opcode::S_CBRANCH_VCCZ:
case Opcode::S_BRANCH:
case Opcode::S_WQM_B64:
case Opcode::V_INTERP_P1_F32:
case Opcode::S_ENDPGM:
case InstCategory::DebugProfile:
break;
default:
const u32 opcode = u32(inst.opcode);
LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({})",
magic_enum::enum_name(inst.opcode), opcode);
info.translation_failed = true;
UNREACHABLE();
}
}
}

View file

@ -11,7 +11,8 @@
namespace Shader {
struct Info;
}
struct Profile;
} // namespace Shader
namespace Shader::Gcn {
@ -24,6 +25,7 @@ enum class ConditionOp : u32 {
LT,
LE,
TRU,
U,
};
enum class AtomicOp : u32 {
@ -53,10 +55,19 @@ enum class NegateMode : u32 {
class Translator {
public:
explicit Translator(IR::Block* block_, Info& info);
explicit Translator(IR::Block* block_, Info& info, const Profile& profile);
// Instruction categories
void EmitPrologue();
void EmitFetch(const GcnInst& inst);
void EmitDataShare(const GcnInst& inst);
void EmitVectorInterpolation(const GcnInst& inst);
void EmitScalarMemory(const GcnInst& inst);
void EmitVectorMemory(const GcnInst& inst);
void EmitExport(const GcnInst& inst);
void EmitFlowControl(u32 pc, const GcnInst& inst);
void EmitScalarAlu(const GcnInst& inst);
void EmitVectorAlu(const GcnInst& inst);
// Scalar ALU
void S_MOVK(const GcnInst& inst);
@ -83,6 +94,10 @@ public:
void S_SUB_U32(const GcnInst& inst);
void S_GETPC_B64(u32 pc, const GcnInst& inst);
void S_ADDC_U32(const GcnInst& inst);
void S_MULK_I32(const GcnInst& inst);
void S_ADDK_I32(const GcnInst& inst);
void S_MAX_U32(const GcnInst& inst);
void S_MIN_U32(const GcnInst& inst);
// Scalar Memory
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
@ -94,11 +109,13 @@ public:
void V_MAC_F32(const GcnInst& inst);
void V_CVT_PKRTZ_F16_F32(const GcnInst& inst);
void V_CVT_F32_F16(const GcnInst& inst);
void V_CVT_F16_F32(const GcnInst& inst);
void V_MUL_F32(const GcnInst& inst);
void V_CNDMASK_B32(const GcnInst& inst);
void V_OR_B32(bool is_xor, const GcnInst& inst);
void V_AND_B32(const GcnInst& inst);
void V_LSHLREV_B32(const GcnInst& inst);
void V_LSHL_B32(const GcnInst& inst);
void V_ADD_I32(const GcnInst& inst);
void V_ADDC_U32(const GcnInst& inst);
void V_CVT_F32_I32(const GcnInst& inst);
@ -122,6 +139,7 @@ public:
void V_SQRT_F32(const GcnInst& inst);
void V_MIN_F32(const GcnInst& inst, bool is_legacy = false);
void V_MIN3_F32(const GcnInst& inst);
void V_MIN3_I32(const GcnInst& inst);
void V_MADMK_F32(const GcnInst& inst);
void V_CUBEMA_F32(const GcnInst& inst);
void V_CUBESC_F32(const GcnInst& inst);
@ -146,6 +164,7 @@ public:
void V_BCNT_U32_B32(const GcnInst& inst);
void V_COS_F32(const GcnInst& inst);
void V_MAX3_F32(const GcnInst& inst);
void V_MAX3_U32(const GcnInst& inst);
void V_CVT_I32_F32(const GcnInst& inst);
void V_MIN_I32(const GcnInst& inst);
void V_MUL_LO_U32(const GcnInst& inst);
@ -160,6 +179,8 @@ public:
void V_LDEXP_F32(const GcnInst& inst);
void V_CVT_FLR_I32_F32(const GcnInst& inst);
void V_CMP_CLASS_F32(const GcnInst& inst);
void V_FFBL_B32(const GcnInst& inst);
void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst);
// Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
@ -167,12 +188,15 @@ public:
// Vector interpolation
void V_INTERP_P2_F32(const GcnInst& inst);
void V_INTERP_MOV_F32(const GcnInst& inst);
// Data share
void DS_SWIZZLE_B32(const GcnInst& inst);
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void V_READFIRSTLANE_B32(const GcnInst& inst);
void V_READLANE_B32(const GcnInst& inst);
void V_WRITELANE_B32(const GcnInst& inst);
void S_BARRIER();
// MIMG
@ -184,9 +208,6 @@ public:
void IMAGE_GET_LOD(const GcnInst& inst);
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
// Export
void EXP(const GcnInst& inst);
private:
template <typename T = IR::U32F32>
[[nodiscard]] T GetSrc(const InstOperand& operand, bool flt_zero = false);
@ -195,12 +216,17 @@ private:
void SetDst(const InstOperand& operand, const IR::U32F32& value);
void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw);
void LogMissingOpcode(const GcnInst& inst);
private:
IR::IREmitter ir;
Info& info;
static std::array<bool, IR::NumScalarRegs> exec_contexts;
const Profile& profile;
IR::U32 m0_value;
bool opcode_missing = false;
};
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info);
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info,
const Profile& profile);
} // namespace Shader::Gcn

View file

@ -2,9 +2,311 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/profile.h"
namespace Shader::Gcn {
void Translator::EmitVectorAlu(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::V_LSHLREV_B32:
return V_LSHLREV_B32(inst);
case Opcode::V_LSHL_B32:
return V_LSHL_B32(inst);
case Opcode::V_BFREV_B32:
return V_BFREV_B32(inst);
case Opcode::V_BFE_U32:
return V_BFE_U32(false, inst);
case Opcode::V_BFE_I32:
return V_BFE_U32(true, inst);
case Opcode::V_BFI_B32:
return V_BFI_B32(inst);
case Opcode::V_LSHR_B32:
return V_LSHR_B32(inst);
case Opcode::V_ASHRREV_I32:
return V_ASHRREV_I32(inst);
case Opcode::V_LSHRREV_B32:
return V_LSHRREV_B32(inst);
case Opcode::V_NOT_B32:
return V_NOT_B32(inst);
case Opcode::V_AND_B32:
return V_AND_B32(inst);
case Opcode::V_OR_B32:
return V_OR_B32(false, inst);
case Opcode::V_XOR_B32:
return V_OR_B32(true, inst);
case Opcode::V_FFBL_B32:
return V_FFBL_B32(inst);
case Opcode::V_MOV_B32:
return V_MOV(inst);
case Opcode::V_ADD_I32:
return V_ADD_I32(inst);
case Opcode::V_ADDC_U32:
return V_ADDC_U32(inst);
case Opcode::V_CVT_F32_I32:
return V_CVT_F32_I32(inst);
case Opcode::V_CVT_F32_U32:
return V_CVT_F32_U32(inst);
case Opcode::V_CVT_PKRTZ_F16_F32:
return V_CVT_PKRTZ_F16_F32(inst);
case Opcode::V_CVT_F32_F16:
return V_CVT_F32_F16(inst);
case Opcode::V_CVT_F16_F32:
return V_CVT_F16_F32(inst);
case Opcode::V_CVT_F32_UBYTE0:
return V_CVT_F32_UBYTE(0, inst);
case Opcode::V_CVT_F32_UBYTE1:
return V_CVT_F32_UBYTE(1, inst);
case Opcode::V_CVT_F32_UBYTE2:
return V_CVT_F32_UBYTE(2, inst);
case Opcode::V_CVT_F32_UBYTE3:
return V_CVT_F32_UBYTE(3, inst);
case Opcode::V_CVT_OFF_F32_I4:
return V_CVT_OFF_F32_I4(inst);
case Opcode::V_MAD_U64_U32:
return V_MAD_U64_U32(inst);
case Opcode::V_CMP_GE_I32:
return V_CMP_U32(ConditionOp::GE, true, false, inst);
case Opcode::V_CMP_EQ_I32:
return V_CMP_U32(ConditionOp::EQ, true, false, inst);
case Opcode::V_CMP_LE_I32:
return V_CMP_U32(ConditionOp::LE, true, false, inst);
case Opcode::V_CMP_NE_I32:
return V_CMP_U32(ConditionOp::LG, true, false, inst);
case Opcode::V_CMP_NE_U32:
return V_CMP_U32(ConditionOp::LG, false, false, inst);
case Opcode::V_CMP_EQ_U32:
return V_CMP_U32(ConditionOp::EQ, false, false, inst);
case Opcode::V_CMP_F_U32:
return V_CMP_U32(ConditionOp::F, false, false, inst);
case Opcode::V_CMP_LT_U32:
return V_CMP_U32(ConditionOp::LT, false, false, inst);
case Opcode::V_CMP_GT_U32:
return V_CMP_U32(ConditionOp::GT, false, false, inst);
case Opcode::V_CMP_GE_U32:
return V_CMP_U32(ConditionOp::GE, false, false, inst);
case Opcode::V_CMP_TRU_U32:
return V_CMP_U32(ConditionOp::TRU, false, false, inst);
case Opcode::V_CMP_NEQ_F32:
return V_CMP_F32(ConditionOp::LG, false, inst);
case Opcode::V_CMP_F_F32:
return V_CMP_F32(ConditionOp::F, false, inst);
case Opcode::V_CMP_LT_F32:
return V_CMP_F32(ConditionOp::LT, false, inst);
case Opcode::V_CMP_EQ_F32:
return V_CMP_F32(ConditionOp::EQ, false, inst);
case Opcode::V_CMP_LE_F32:
return V_CMP_F32(ConditionOp::LE, false, inst);
case Opcode::V_CMP_GT_F32:
return V_CMP_F32(ConditionOp::GT, false, inst);
case Opcode::V_CMP_LG_F32:
return V_CMP_F32(ConditionOp::LG, false, inst);
case Opcode::V_CMP_GE_F32:
return V_CMP_F32(ConditionOp::GE, false, inst);
case Opcode::V_CMP_NLE_F32:
return V_CMP_F32(ConditionOp::GT, false, inst);
case Opcode::V_CMP_NLT_F32:
return V_CMP_F32(ConditionOp::GE, false, inst);
case Opcode::V_CMP_NGT_F32:
return V_CMP_F32(ConditionOp::LE, false, inst);
case Opcode::V_CMP_NGE_F32:
return V_CMP_F32(ConditionOp::LT, false, inst);
case Opcode::V_CMP_U_F32:
return V_CMP_F32(ConditionOp::U, false, inst);
case Opcode::V_CNDMASK_B32:
return V_CNDMASK_B32(inst);
case Opcode::V_MAX_I32:
return V_MAX_U32(true, inst);
case Opcode::V_MAX_U32:
return V_MAX_U32(false, inst);
case Opcode::V_MIN_I32:
return V_MIN_I32(inst);
case Opcode::V_CUBEMA_F32:
return V_CUBEMA_F32(inst);
case Opcode::V_CUBESC_F32:
return V_CUBESC_F32(inst);
case Opcode::V_CUBETC_F32:
return V_CUBETC_F32(inst);
case Opcode::V_CUBEID_F32:
return V_CUBEID_F32(inst);
case Opcode::V_CVT_U32_F32:
return V_CVT_U32_F32(inst);
case Opcode::V_CVT_I32_F32:
return V_CVT_I32_F32(inst);
case Opcode::V_CVT_FLR_I32_F32:
return V_CVT_FLR_I32_F32(inst);
case Opcode::V_SUBREV_I32:
return V_SUBREV_I32(inst);
case Opcode::V_MUL_HI_U32:
return V_MUL_HI_U32(false, inst);
case Opcode::V_MUL_LO_I32:
return V_MUL_LO_U32(inst);
case Opcode::V_SAD_U32:
return V_SAD_U32(inst);
case Opcode::V_SUB_I32:
return V_SUB_I32(inst);
case Opcode::V_MAD_I32_I24:
return V_MAD_I32_I24(inst);
case Opcode::V_MUL_I32_I24:
case Opcode::V_MUL_U32_U24:
return V_MUL_I32_I24(inst);
case Opcode::V_MAD_U32_U24:
return V_MAD_U32_U24(inst);
case Opcode::V_BCNT_U32_B32:
return V_BCNT_U32_B32(inst);
case Opcode::V_MUL_LO_U32:
return V_MUL_LO_U32(inst);
case Opcode::V_MIN_U32:
return V_MIN_U32(inst);
case Opcode::V_CMP_NE_U64:
return V_CMP_NE_U64(inst);
case Opcode::V_READFIRSTLANE_B32:
return V_READFIRSTLANE_B32(inst);
case Opcode::V_READLANE_B32:
return V_READLANE_B32(inst);
case Opcode::V_WRITELANE_B32:
return V_WRITELANE_B32(inst);
case Opcode::V_MAD_F32:
return V_MAD_F32(inst);
case Opcode::V_MAC_F32:
return V_MAC_F32(inst);
case Opcode::V_MUL_F32:
return V_MUL_F32(inst);
case Opcode::V_RCP_F32:
return V_RCP_F32(inst);
case Opcode::V_LDEXP_F32:
return V_LDEXP_F32(inst);
case Opcode::V_FRACT_F32:
return V_FRACT_F32(inst);
case Opcode::V_ADD_F32:
return V_ADD_F32(inst);
case Opcode::V_MED3_F32:
return V_MED3_F32(inst);
case Opcode::V_FLOOR_F32:
return V_FLOOR_F32(inst);
case Opcode::V_SUB_F32:
return V_SUB_F32(inst);
case Opcode::V_FMA_F32:
case Opcode::V_MADAK_F32:
return V_FMA_F32(inst);
case Opcode::V_MAX_F32:
return V_MAX_F32(inst);
case Opcode::V_RSQ_F32:
return V_RSQ_F32(inst);
case Opcode::V_SIN_F32:
return V_SIN_F32(inst);
case Opcode::V_COS_F32:
return V_COS_F32(inst);
case Opcode::V_LOG_F32:
return V_LOG_F32(inst);
case Opcode::V_EXP_F32:
return V_EXP_F32(inst);
case Opcode::V_SQRT_F32:
return V_SQRT_F32(inst);
case Opcode::V_MIN_F32:
return V_MIN_F32(inst, false);
case Opcode::V_MIN3_F32:
return V_MIN3_F32(inst);
case Opcode::V_MIN3_I32:
return V_MIN3_I32(inst);
case Opcode::V_MIN_LEGACY_F32:
return V_MIN_F32(inst, true);
case Opcode::V_MADMK_F32:
return V_MADMK_F32(inst);
case Opcode::V_SUBREV_F32:
return V_SUBREV_F32(inst);
case Opcode::V_RNDNE_F32:
return V_RNDNE_F32(inst);
case Opcode::V_MAX3_F32:
return V_MAX3_F32(inst);
case Opcode::V_MAX3_U32:
return V_MAX3_U32(inst);
case Opcode::V_TRUNC_F32:
return V_TRUNC_F32(inst);
case Opcode::V_CEIL_F32:
return V_CEIL_F32(inst);
case Opcode::V_MUL_LEGACY_F32:
return V_MUL_F32(inst);
case Opcode::V_MAC_LEGACY_F32:
return V_MAC_F32(inst);
case Opcode::V_MAD_LEGACY_F32:
return V_MAD_F32(inst);
case Opcode::V_MAX_LEGACY_F32:
return V_MAX_F32(inst, true);
case Opcode::V_RSQ_LEGACY_F32:
case Opcode::V_RSQ_CLAMP_F32:
return V_RSQ_F32(inst);
case Opcode::V_RCP_IFLAG_F32:
return V_RCP_F32(inst);
case Opcode::V_CMPX_F_F32:
return V_CMP_F32(ConditionOp::F, true, inst);
case Opcode::V_CMPX_LT_F32:
return V_CMP_F32(ConditionOp::LT, true, inst);
case Opcode::V_CMPX_EQ_F32:
return V_CMP_F32(ConditionOp::EQ, true, inst);
case Opcode::V_CMPX_LE_F32:
return V_CMP_F32(ConditionOp::LE, true, inst);
case Opcode::V_CMPX_GT_F32:
return V_CMP_F32(ConditionOp::GT, true, inst);
case Opcode::V_CMPX_LG_F32:
return V_CMP_F32(ConditionOp::LG, true, inst);
case Opcode::V_CMPX_GE_F32:
return V_CMP_F32(ConditionOp::GE, true, inst);
case Opcode::V_CMPX_NGE_F32:
return V_CMP_F32(ConditionOp::LT, true, inst);
case Opcode::V_CMPX_NLG_F32:
return V_CMP_F32(ConditionOp::EQ, true, inst);
case Opcode::V_CMPX_NGT_F32:
return V_CMP_F32(ConditionOp::LE, true, inst);
case Opcode::V_CMPX_NLE_F32:
return V_CMP_F32(ConditionOp::GT, true, inst);
case Opcode::V_CMPX_NEQ_F32:
return V_CMP_F32(ConditionOp::LG, true, inst);
case Opcode::V_CMPX_NLT_F32:
return V_CMP_F32(ConditionOp::GE, true, inst);
case Opcode::V_CMPX_TRU_F32:
return V_CMP_F32(ConditionOp::TRU, true, inst);
case Opcode::V_CMP_CLASS_F32:
return V_CMP_CLASS_F32(inst);
case Opcode::V_CMP_LE_U32:
return V_CMP_U32(ConditionOp::LE, false, false, inst);
case Opcode::V_CMP_GT_I32:
return V_CMP_U32(ConditionOp::GT, true, false, inst);
case Opcode::V_CMP_LT_I32:
return V_CMP_U32(ConditionOp::LT, true, false, inst);
case Opcode::V_CMPX_LT_I32:
return V_CMP_U32(ConditionOp::LT, true, true, inst);
case Opcode::V_CMPX_F_U32:
return V_CMP_U32(ConditionOp::F, false, true, inst);
case Opcode::V_CMPX_LT_U32:
return V_CMP_U32(ConditionOp::LT, false, true, inst);
case Opcode::V_CMPX_EQ_U32:
return V_CMP_U32(ConditionOp::EQ, false, true, inst);
case Opcode::V_CMPX_LE_U32:
return V_CMP_U32(ConditionOp::LE, false, true, inst);
case Opcode::V_CMPX_GT_U32:
return V_CMP_U32(ConditionOp::GT, false, true, inst);
case Opcode::V_CMPX_NE_U32:
return V_CMP_U32(ConditionOp::LG, false, true, inst);
case Opcode::V_CMPX_GE_U32:
return V_CMP_U32(ConditionOp::GE, false, true, inst);
case Opcode::V_CMPX_TRU_U32:
return V_CMP_U32(ConditionOp::TRU, false, true, inst);
case Opcode::V_CMPX_LG_I32:
return V_CMP_U32(ConditionOp::LG, true, true, inst);
case Opcode::V_MBCNT_LO_U32_B32:
return V_MBCNT_U32_B32(true, inst);
case Opcode::V_MBCNT_HI_U32_B32:
return V_MBCNT_U32_B32(false, inst);
default:
LogMissingOpcode(inst);
}
}
void Translator::V_MOV(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc(inst.src[0]));
}
@ -32,6 +334,12 @@ void Translator::V_CVT_F32_F16(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPConvert(32, ir.BitCast<IR::F16>(src0l)));
}
void Translator::V_CVT_F16_F32(const GcnInst& inst) {
const IR::F32 src0 = GetSrc(inst.src[0], true);
const IR::F16 src0fp16 = ir.FPConvert(16, src0);
SetDst(inst.dst[0], ir.UConvert(32, ir.BitCast<IR::U16>(src0fp16)));
}
void Translator::V_MUL_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true)));
}
@ -85,6 +393,12 @@ void Translator::V_LSHLREV_B32(const GcnInst& inst) {
ir.SetVectorReg(dst_reg, ir.ShiftLeftLogical(src1, ir.BitwiseAnd(src0, ir.Imm32(0x1F))));
}
void Translator::V_LSHL_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.ShiftLeftLogical(src0, ir.BitwiseAnd(src1, ir.Imm32(0x1F))));
}
void Translator::V_ADD_I32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
@ -208,6 +522,8 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
return ir.FPLessThanEqual(src0, src1);
case ConditionOp::GE:
return ir.FPGreaterThanEqual(src0, src1);
case ConditionOp::U:
return ir.LogicalNot(ir.LogicalAnd(ir.FPIsNan(src0), ir.FPIsNan(src1)));
default:
UNREACHABLE();
}
@ -278,6 +594,13 @@ void Translator::V_MIN3_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
}
void Translator::V_MIN3_I32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 src2{GetSrc(inst.src[2])};
SetDst(inst.dst[0], ir.SMin(src0, ir.SMin(src1, src2)));
}
void Translator::V_MADMK_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
@ -320,12 +643,13 @@ void Translator::V_SUBREV_I32(const GcnInst& inst) {
}
void Translator::V_MAD_U64_U32(const GcnInst& inst) {
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
const auto src2 = GetSrc64<IR::U64>(inst.src[2]);
const IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1));
// const IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1));
const IR::U64 mul_result =
ir.PackUint2x32(ir.CompositeConstruct(ir.IMul(src0, src1), ir.Imm32(0U)));
const IR::U64 sum_result = ir.IAdd(mul_result, src2);
SetDst64(inst.dst[0], sum_result);
@ -463,6 +787,13 @@ void Translator::V_MAX3_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
}
void Translator::V_MAX3_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 src2{GetSrc(inst.src[2])};
SetDst(inst.dst[0], ir.UMax(src0, ir.UMax(src1, src2)));
}
void Translator::V_CVT_I32_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.ConvertFToS(32, src0));
@ -561,38 +892,58 @@ void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
}
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
constexpr u32 SIGNALING_NAN = 1 << 0;
constexpr u32 QUIET_NAN = 1 << 1;
constexpr u32 NEGATIVE_INFINITY = 1 << 2;
constexpr u32 NEGATIVE_NORMAL = 1 << 3;
constexpr u32 NEGATIVE_DENORM = 1 << 4;
constexpr u32 NEGATIVE_ZERO = 1 << 5;
constexpr u32 POSITIVE_ZERO = 1 << 6;
constexpr u32 POSITIVE_DENORM = 1 << 7;
constexpr u32 POSITIVE_NORMAL = 1 << 8;
constexpr u32 POSITIVE_INFINITY = 1 << 9;
const IR::F32F64 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
IR::U1 value;
if (src1.IsImmediate()) {
const u32 class_mask = src1.U32();
IR::U1 value;
if ((class_mask & (SIGNALING_NAN | QUIET_NAN)) == (SIGNALING_NAN | QUIET_NAN)) {
const auto class_mask = static_cast<IR::FloatClassFunc>(src1.U32());
if ((class_mask & IR::FloatClassFunc::NaN) == IR::FloatClassFunc::NaN) {
value = ir.FPIsNan(src0);
} else if ((class_mask & (POSITIVE_INFINITY | NEGATIVE_INFINITY)) ==
(POSITIVE_INFINITY | NEGATIVE_INFINITY)) {
} else if ((class_mask & IR::FloatClassFunc::Infinity) == IR::FloatClassFunc::Infinity) {
value = ir.FPIsInf(src0);
} else {
UNREACHABLE();
}
if (inst.dst[1].field == OperandField::VccLo) {
return ir.SetVcc(value);
} else {
UNREACHABLE();
}
} else {
// We don't know the type yet, delay its resolution.
value = ir.FPCmpClass32(src0, src1);
}
switch (inst.dst[1].field) {
case OperandField::VccLo:
return ir.SetVcc(value);
default:
UNREACHABLE();
}
}
void Translator::V_FFBL_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
SetDst(inst.dst[0], ir.FindILsb(src0));
}
void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 lane_id = ir.LaneId();
const auto [warp_half, mask_shift] = [&]() -> std::pair<IR::U32, IR::U32> {
if (profile.subgroup_size == 32) {
const IR::U32 warp_half = ir.BitwiseAnd(ir.WarpId(), ir.Imm32(1));
return std::make_pair(warp_half, lane_id);
}
const IR::U32 warp_half = ir.ShiftRightLogical(lane_id, ir.Imm32(5));
const IR::U32 mask_shift = ir.BitwiseAnd(lane_id, ir.Imm32(0x1F));
return std::make_pair(warp_half, mask_shift);
}();
const IR::U32 thread_mask = ir.ISub(ir.ShiftLeftLogical(ir.Imm32(1), mask_shift), ir.Imm32(1));
const IR::U1 is_odd_warp = ir.INotEqual(warp_half, ir.Imm32(0));
const IR::U32 mask = IR::U32{ir.Select(is_odd_warp, is_low ? ir.Imm32(~0U) : thread_mask,
is_low ? thread_mask : ir.Imm32(0))};
const IR::U32 masked_value = ir.BitwiseAnd(src0, mask);
const IR::U32 result = ir.IAdd(src1, ir.BitCount(masked_value));
SetDst(inst.dst[0], result);
}
} // namespace Shader::Gcn

View file

@ -12,4 +12,24 @@ void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
}
void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code};
auto& attr = info.ps_inputs.at(inst.control.vintrp.attr);
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
}
void Translator::EmitVectorInterpolation(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::V_INTERP_P1_F32:
return;
case Opcode::V_INTERP_P2_F32:
return V_INTERP_P2_F32(inst);
case Opcode::V_INTERP_MOV_F32:
return V_INTERP_MOV_F32(inst);
default:
LogMissingOpcode(inst);
}
}
} // namespace Shader::Gcn

View file

@ -5,9 +5,96 @@
namespace Shader::Gcn {
void Translator::EmitVectorMemory(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::IMAGE_SAMPLE_LZ_O:
case Opcode::IMAGE_SAMPLE_O:
case Opcode::IMAGE_SAMPLE_C:
case Opcode::IMAGE_SAMPLE_C_LZ:
case Opcode::IMAGE_SAMPLE_LZ:
case Opcode::IMAGE_SAMPLE:
case Opcode::IMAGE_SAMPLE_L:
case Opcode::IMAGE_SAMPLE_C_O:
case Opcode::IMAGE_SAMPLE_B:
case Opcode::IMAGE_SAMPLE_C_LZ_O:
return IMAGE_SAMPLE(inst);
case Opcode::IMAGE_GATHER4_C:
case Opcode::IMAGE_GATHER4_LZ:
case Opcode::IMAGE_GATHER4_LZ_O:
return IMAGE_GATHER(inst);
case Opcode::IMAGE_ATOMIC_ADD:
return IMAGE_ATOMIC(AtomicOp::Add, inst);
case Opcode::IMAGE_ATOMIC_AND:
return IMAGE_ATOMIC(AtomicOp::And, inst);
case Opcode::IMAGE_ATOMIC_OR:
return IMAGE_ATOMIC(AtomicOp::Or, inst);
case Opcode::IMAGE_ATOMIC_XOR:
return IMAGE_ATOMIC(AtomicOp::Xor, inst);
case Opcode::IMAGE_ATOMIC_UMAX:
return IMAGE_ATOMIC(AtomicOp::Umax, inst);
case Opcode::IMAGE_ATOMIC_SMAX:
return IMAGE_ATOMIC(AtomicOp::Smax, inst);
case Opcode::IMAGE_ATOMIC_UMIN:
return IMAGE_ATOMIC(AtomicOp::Umin, inst);
case Opcode::IMAGE_ATOMIC_SMIN:
return IMAGE_ATOMIC(AtomicOp::Smin, inst);
case Opcode::IMAGE_ATOMIC_INC:
return IMAGE_ATOMIC(AtomicOp::Inc, inst);
case Opcode::IMAGE_ATOMIC_DEC:
return IMAGE_ATOMIC(AtomicOp::Dec, inst);
case Opcode::IMAGE_GET_LOD:
return IMAGE_GET_LOD(inst);
case Opcode::IMAGE_STORE:
return IMAGE_STORE(inst);
case Opcode::IMAGE_LOAD_MIP:
return IMAGE_LOAD(true, inst);
case Opcode::IMAGE_LOAD:
return IMAGE_LOAD(false, inst);
case Opcode::IMAGE_GET_RESINFO:
return IMAGE_GET_RESINFO(inst);
case Opcode::TBUFFER_LOAD_FORMAT_X:
return BUFFER_LOAD_FORMAT(1, true, true, inst);
case Opcode::TBUFFER_LOAD_FORMAT_XY:
return BUFFER_LOAD_FORMAT(2, true, true, inst);
case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
return BUFFER_LOAD_FORMAT(3, true, true, inst);
case Opcode::TBUFFER_LOAD_FORMAT_XYZW:
return BUFFER_LOAD_FORMAT(4, true, true, inst);
case Opcode::BUFFER_LOAD_FORMAT_X:
return BUFFER_LOAD_FORMAT(1, false, true, inst);
case Opcode::BUFFER_LOAD_FORMAT_XY:
return BUFFER_LOAD_FORMAT(2, false, true, inst);
case Opcode::BUFFER_LOAD_FORMAT_XYZ:
return BUFFER_LOAD_FORMAT(3, false, true, inst);
case Opcode::BUFFER_LOAD_FORMAT_XYZW:
return BUFFER_LOAD_FORMAT(4, false, true, inst);
case Opcode::BUFFER_LOAD_DWORD:
return BUFFER_LOAD_FORMAT(1, false, false, inst);
case Opcode::BUFFER_LOAD_DWORDX2:
return BUFFER_LOAD_FORMAT(2, false, false, inst);
case Opcode::BUFFER_LOAD_DWORDX3:
return BUFFER_LOAD_FORMAT(3, false, false, inst);
case Opcode::BUFFER_LOAD_DWORDX4:
return BUFFER_LOAD_FORMAT(4, false, false, inst);
case Opcode::BUFFER_STORE_FORMAT_X:
case Opcode::BUFFER_STORE_DWORD:
return BUFFER_STORE_FORMAT(1, false, inst);
case Opcode::BUFFER_STORE_DWORDX2:
return BUFFER_STORE_FORMAT(2, false, inst);
case Opcode::BUFFER_STORE_DWORDX3:
return BUFFER_STORE_FORMAT(3, false, inst);
case Opcode::BUFFER_STORE_FORMAT_XYZW:
case Opcode::BUFFER_STORE_DWORDX4:
return BUFFER_STORE_FORMAT(4, false, inst);
default:
LogMissingOpcode(inst);
}
}
void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
const auto flags = ImageResFlags(inst.control.mimg.dmask);
const bool has_mips = flags.test(ImageResComponent::MipCount);
const IR::U32 lod = ir.GetVectorReg(IR::VectorReg(inst.src[0].code));
@ -157,7 +244,7 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
info.has_bias.Assign(flags.test(MimgModifier::LodBias));
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
info.force_level0.Assign(flags.test(MimgModifier::Level0));
info.explicit_lod.Assign(explicit_lod);
// info.explicit_lod.Assign(explicit_lod);
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
// Issue IR instruction, leaving unknown fields blank to patch later.