Implement V_MOVREL variants (#745)

* shader_recompiler: Implement V_MOVRELS_B32, V_MOVRELD_B32,
V_MOVRELSD_B32

Generates a ton of OpSelects to hardcode reading or writing from each
possible vgpr depending on the value of m0

Future work is to do range analysis to put an upper bound on m0 and
check fewer registers.

* fix runtime info after rebase
This commit is contained in:
baggins183 2024-09-06 13:47:47 -07:00 committed by GitHub
parent 416e23fe76
commit bb29224daf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 110 additions and 6 deletions

View file

@ -217,6 +217,10 @@ U32 IREmitter::GetVccHi() {
return Inst<U32>(Opcode::GetVccHi);
}
U32 IREmitter::GetM0() {
return Inst<U32>(Opcode::GetM0);
}
void IREmitter::SetScc(const U1& value) {
Inst(Opcode::SetScc, value);
}
@ -241,6 +245,10 @@ void IREmitter::SetVccHi(const U32& value) {
Inst(Opcode::SetVccHi, value);
}
void IREmitter::SetM0(const U32& value) {
Inst(Opcode::SetM0, value);
}
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp));
}

View file

@ -67,12 +67,14 @@ public:
[[nodiscard]] U1 GetVcc();
[[nodiscard]] U32 GetVccLo();
[[nodiscard]] U32 GetVccHi();
[[nodiscard]] U32 GetM0();
void SetScc(const U1& value);
void SetExec(const U1& value);
void SetVcc(const U1& value);
void SetSccLo(const U32& value);
void SetVccLo(const U32& value);
void SetVccHi(const U32& value);
void SetM0(const U32& value);
[[nodiscard]] U1 Condition(IR::Condition cond);

View file

@ -60,12 +60,14 @@ OPCODE(GetExec, U1, Void,
OPCODE(GetVcc, U1, Void, )
OPCODE(GetVccLo, U32, Void, )
OPCODE(GetVccHi, U32, Void, )
OPCODE(GetM0, U32, Void, )
OPCODE(SetScc, Void, U1, )
OPCODE(SetExec, Void, U1, )
OPCODE(SetVcc, Void, U1, )
OPCODE(SetSccLo, Void, U32, )
OPCODE(SetVccLo, Void, U32, )
OPCODE(SetVccHi, Void, U32, )
OPCODE(SetM0, Void, U32, )
// Undefined
OPCODE(UndefU1, U1, )

View file

@ -33,6 +33,7 @@ struct ExecFlagTag : FlagTag {};
struct VccFlagTag : FlagTag {};
struct VccLoTag : FlagTag {};
struct VccHiTag : FlagTag {};
struct M0Tag : FlagTag {};
struct GotoVariable : FlagTag {
GotoVariable() = default;
@ -44,7 +45,7 @@ struct GotoVariable : FlagTag {
};
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
VccFlagTag, VccLoTag, VccHiTag>;
VccFlagTag, VccLoTag, VccHiTag, M0Tag>;
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
struct DefTable {
@ -103,6 +104,12 @@ struct DefTable {
void SetDef(IR::Block* block, VccFlagTag, const IR::Value& value) {
vcc_flag.insert_or_assign(block, value);
}
const IR::Value& Def(IR::Block* block, M0Tag) {
return m0_flag[block];
}
void SetDef(IR::Block* block, M0Tag, const IR::Value& value) {
m0_flag.insert_or_assign(block, value);
}
std::unordered_map<u32, ValueMap> goto_vars;
ValueMap scc_flag;
@ -111,6 +118,7 @@ struct DefTable {
ValueMap scc_lo_flag;
ValueMap vcc_lo_flag;
ValueMap vcc_hi_flag;
ValueMap m0_flag;
};
IR::Opcode UndefOpcode(IR::ScalarReg) noexcept {
@ -129,6 +137,10 @@ IR::Opcode UndefOpcode(const VccHiTag) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(const M0Tag) noexcept {
return IR::Opcode::UndefU32;
}
IR::Opcode UndefOpcode(const FlagTag) noexcept {
return IR::Opcode::UndefU1;
}
@ -330,6 +342,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::SetVccHi:
pass.WriteVariable(VccHiTag{}, block, inst.Arg(0));
break;
case IR::Opcode::SetM0:
pass.WriteVariable(M0Tag{}, block, inst.Arg(0));
break;
case IR::Opcode::GetThreadBitScalarReg:
case IR::Opcode::GetScalarRegister: {
const IR::ScalarReg reg{inst.Arg(0).ScalarReg()};
@ -362,6 +377,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
case IR::Opcode::GetVccHi:
inst.ReplaceUsesWith(pass.ReadVariable(VccHiTag{}, block));
break;
case IR::Opcode::GetM0:
inst.ReplaceUsesWith(pass.ReadVariable(M0Tag{}, block));
break;
default:
break;
}