shader_recompiler: Implement S_BCNT1_I32_B64 and S_FF1_I32_B64 (#1889)

* shader_recompiler: Implement S_BCNT1_I32_B64

* shader_recompiler: Implement S_FF1_I32_B64

* shader_recompiler: Implement IEqual for 64-bit.

* shader_recompiler: Fix immediate type in S_FF1_I32_B32
This commit is contained in:
squidbus 2024-12-27 06:46:07 -08:00 committed by GitHub
parent 1c5947d93b
commit b1f74660df
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 68 additions and 12 deletions

View file

@ -304,10 +304,12 @@ Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id
Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count);
Id EmitBitReverse32(EmitContext& ctx, Id value); Id EmitBitReverse32(EmitContext& ctx, Id value);
Id EmitBitCount32(EmitContext& ctx, Id value); Id EmitBitCount32(EmitContext& ctx, Id value);
Id EmitBitCount64(EmitContext& ctx, Id value);
Id EmitBitwiseNot32(EmitContext& ctx, Id value); Id EmitBitwiseNot32(EmitContext& ctx, Id value);
Id EmitFindSMsb32(EmitContext& ctx, Id value); Id EmitFindSMsb32(EmitContext& ctx, Id value);
Id EmitFindUMsb32(EmitContext& ctx, Id value); Id EmitFindUMsb32(EmitContext& ctx, Id value);
Id EmitFindILsb32(EmitContext& ctx, Id value); Id EmitFindILsb32(EmitContext& ctx, Id value);
Id EmitFindILsb64(EmitContext& ctx, Id value);
Id EmitSMin32(EmitContext& ctx, Id a, Id b); Id EmitSMin32(EmitContext& ctx, Id a, Id b);
Id EmitUMin32(EmitContext& ctx, Id a, Id b); Id EmitUMin32(EmitContext& ctx, Id a, Id b);
Id EmitSMax32(EmitContext& ctx, Id a, Id b); Id EmitSMax32(EmitContext& ctx, Id a, Id b);
@ -318,7 +320,8 @@ Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs); Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs);
Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs);

View file

@ -201,6 +201,10 @@ Id EmitBitCount32(EmitContext& ctx, Id value) {
return ctx.OpBitCount(ctx.U32[1], value); return ctx.OpBitCount(ctx.U32[1], value);
} }
Id EmitBitCount64(EmitContext& ctx, Id value) {
return ctx.OpBitCount(ctx.U64, value);
}
Id EmitBitwiseNot32(EmitContext& ctx, Id value) { Id EmitBitwiseNot32(EmitContext& ctx, Id value) {
return ctx.OpNot(ctx.U32[1], value); return ctx.OpNot(ctx.U32[1], value);
} }
@ -217,6 +221,10 @@ Id EmitFindILsb32(EmitContext& ctx, Id value) {
return ctx.OpFindILsb(ctx.U32[1], value); return ctx.OpFindILsb(ctx.U32[1], value);
} }
Id EmitFindILsb64(EmitContext& ctx, Id value) {
return ctx.OpFindILsb(ctx.U64, value);
}
Id EmitSMin32(EmitContext& ctx, Id a, Id b) { Id EmitSMin32(EmitContext& ctx, Id a, Id b) {
return ctx.OpSMin(ctx.U32[1], a, b); return ctx.OpSMin(ctx.U32[1], a, b);
} }
@ -277,7 +285,11 @@ Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpULessThan(ctx.U1[1], lhs, rhs); return ctx.OpULessThan(ctx.U1[1], lhs, rhs);
} }
Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) { Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpIEqual(ctx.U1[1], lhs, rhs);
}
Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpIEqual(ctx.U1[1], lhs, rhs); return ctx.OpIEqual(ctx.U1[1], lhs, rhs);
} }

View file

@ -100,8 +100,12 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
return S_BREV_B32(inst); return S_BREV_B32(inst);
case Opcode::S_BCNT1_I32_B32: case Opcode::S_BCNT1_I32_B32:
return S_BCNT1_I32_B32(inst); return S_BCNT1_I32_B32(inst);
case Opcode::S_BCNT1_I32_B64:
return S_BCNT1_I32_B64(inst);
case Opcode::S_FF1_I32_B32: case Opcode::S_FF1_I32_B32:
return S_FF1_I32_B32(inst); return S_FF1_I32_B32(inst);
case Opcode::S_FF1_I32_B64:
return S_FF1_I32_B64(inst);
case Opcode::S_AND_SAVEEXEC_B64: case Opcode::S_AND_SAVEEXEC_B64:
return S_SAVEEXEC_B64(NegateMode::None, false, inst); return S_SAVEEXEC_B64(NegateMode::None, false, inst);
case Opcode::S_ORN2_SAVEEXEC_B64: case Opcode::S_ORN2_SAVEEXEC_B64:
@ -585,12 +589,25 @@ void Translator::S_BCNT1_I32_B32(const GcnInst& inst) {
ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
} }
void Translator::S_BCNT1_I32_B64(const GcnInst& inst) {
const IR::U32 result = ir.BitCount(GetSrc64(inst.src[0]));
SetDst(inst.dst[0], result);
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
}
void Translator::S_FF1_I32_B32(const GcnInst& inst) { void Translator::S_FF1_I32_B32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 result{ir.Select(ir.IEqual(src0, ir.Imm32(0U)), ir.Imm32(-1), ir.FindILsb(src0))}; const IR::U32 result{ir.Select(ir.IEqual(src0, ir.Imm32(0U)), ir.Imm32(-1), ir.FindILsb(src0))};
SetDst(inst.dst[0], result); SetDst(inst.dst[0], result);
} }
void Translator::S_FF1_I32_B64(const GcnInst& inst) {
const IR::U64 src0{GetSrc64(inst.src[0])};
const IR::U32 result{
ir.Select(ir.IEqual(src0, ir.Imm64(u64(0))), ir.Imm32(-1), ir.FindILsb(src0))};
SetDst(inst.dst[0], result);
}
void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst) { void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst) {
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs) // This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination // However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination

View file

@ -111,7 +111,9 @@ public:
void S_NOT_B64(const GcnInst& inst); void S_NOT_B64(const GcnInst& inst);
void S_BREV_B32(const GcnInst& inst); void S_BREV_B32(const GcnInst& inst);
void S_BCNT1_I32_B32(const GcnInst& inst); void S_BCNT1_I32_B32(const GcnInst& inst);
void S_BCNT1_I32_B64(const GcnInst& inst);
void S_FF1_I32_B32(const GcnInst& inst); void S_FF1_I32_B32(const GcnInst& inst);
void S_FF1_I32_B64(const GcnInst& inst);
void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_GETPC_B64(u32 pc, const GcnInst& inst);
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
void S_ABS_I32(const GcnInst& inst); void S_ABS_I32(const GcnInst& inst);

View file

@ -1273,8 +1273,15 @@ U32 IREmitter::BitReverse(const U32& value) {
return Inst<U32>(Opcode::BitReverse32, value); return Inst<U32>(Opcode::BitReverse32, value);
} }
U32 IREmitter::BitCount(const U32& value) { U32 IREmitter::BitCount(const U32U64& value) {
return Inst<U32>(Opcode::BitCount32, value); switch (value.Type()) {
case Type::U32:
return Inst<U32>(Opcode::BitCount32, value);
case Type::U64:
return Inst<U32>(Opcode::BitCount64, value);
default:
ThrowInvalidType(value.Type());
}
} }
U32 IREmitter::BitwiseNot(const U32& value) { U32 IREmitter::BitwiseNot(const U32& value) {
@ -1289,8 +1296,15 @@ U32 IREmitter::FindUMsb(const U32& value) {
return Inst<U32>(Opcode::FindUMsb32, value); return Inst<U32>(Opcode::FindUMsb32, value);
} }
U32 IREmitter::FindILsb(const U32& value) { U32 IREmitter::FindILsb(const U32U64& value) {
return Inst<U32>(Opcode::FindILsb32, value); switch (value.Type()) {
case Type::U32:
return Inst<U32>(Opcode::FindILsb32, value);
case Type::U64:
return Inst<U32>(Opcode::FindILsb64, value);
default:
ThrowInvalidType(value.Type());
}
} }
U32 IREmitter::SMin(const U32& a, const U32& b) { U32 IREmitter::SMin(const U32& a, const U32& b) {
@ -1345,7 +1359,9 @@ U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
} }
switch (lhs.Type()) { switch (lhs.Type()) {
case Type::U32: case Type::U32:
return Inst<U1>(Opcode::IEqual, lhs, rhs); return Inst<U1>(Opcode::IEqual32, lhs, rhs);
case Type::U64:
return Inst<U1>(Opcode::IEqual64, lhs, rhs);
default: default:
ThrowInvalidType(lhs.Type()); ThrowInvalidType(lhs.Type());
} }

View file

@ -229,12 +229,12 @@ public:
[[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
bool is_signed = false); bool is_signed = false);
[[nodiscard]] U32 BitReverse(const U32& value); [[nodiscard]] U32 BitReverse(const U32& value);
[[nodiscard]] U32 BitCount(const U32& value); [[nodiscard]] U32 BitCount(const U32U64& value);
[[nodiscard]] U32 BitwiseNot(const U32& value); [[nodiscard]] U32 BitwiseNot(const U32& value);
[[nodiscard]] U32 FindSMsb(const U32& value); [[nodiscard]] U32 FindSMsb(const U32& value);
[[nodiscard]] U32 FindUMsb(const U32& value); [[nodiscard]] U32 FindUMsb(const U32& value);
[[nodiscard]] U32 FindILsb(const U32& value); [[nodiscard]] U32 FindILsb(const U32U64& value);
[[nodiscard]] U32 SMin(const U32& a, const U32& b); [[nodiscard]] U32 SMin(const U32& a, const U32& b);
[[nodiscard]] U32 UMin(const U32& a, const U32& b); [[nodiscard]] U32 UMin(const U32& a, const U32& b);
[[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed); [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed);

View file

@ -284,11 +284,13 @@ OPCODE(BitFieldSExtract, U32, U32,
OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) OPCODE(BitFieldUExtract, U32, U32, U32, U32, )
OPCODE(BitReverse32, U32, U32, ) OPCODE(BitReverse32, U32, U32, )
OPCODE(BitCount32, U32, U32, ) OPCODE(BitCount32, U32, U32, )
OPCODE(BitCount64, U32, U64, )
OPCODE(BitwiseNot32, U32, U32, ) OPCODE(BitwiseNot32, U32, U32, )
OPCODE(FindSMsb32, U32, U32, ) OPCODE(FindSMsb32, U32, U32, )
OPCODE(FindUMsb32, U32, U32, ) OPCODE(FindUMsb32, U32, U32, )
OPCODE(FindILsb32, U32, U32, ) OPCODE(FindILsb32, U32, U32, )
OPCODE(FindILsb64, U32, U64, )
OPCODE(SMin32, U32, U32, U32, ) OPCODE(SMin32, U32, U32, U32, )
OPCODE(UMin32, U32, U32, U32, ) OPCODE(UMin32, U32, U32, U32, )
OPCODE(SMax32, U32, U32, U32, ) OPCODE(SMax32, U32, U32, U32, )
@ -299,7 +301,8 @@ OPCODE(SLessThan32, U1, U32,
OPCODE(SLessThan64, U1, U64, U64, ) OPCODE(SLessThan64, U1, U64, U64, )
OPCODE(ULessThan32, U1, U32, U32, ) OPCODE(ULessThan32, U1, U32, U32, )
OPCODE(ULessThan64, U1, U64, U64, ) OPCODE(ULessThan64, U1, U64, U64, )
OPCODE(IEqual, U1, U32, U32, ) OPCODE(IEqual32, U1, U32, U32, )
OPCODE(IEqual64, U1, U64, U64, )
OPCODE(SLessThanEqual, U1, U32, U32, ) OPCODE(SLessThanEqual, U1, U32, U32, )
OPCODE(ULessThanEqual, U1, U32, U32, ) OPCODE(ULessThanEqual, U1, U32, U32, )
OPCODE(SGreaterThan, U1, U32, U32, ) OPCODE(SGreaterThan, U1, U32, U32, )

View file

@ -391,9 +391,12 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::UGreaterThanEqual: case IR::Opcode::UGreaterThanEqual:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; }); FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; });
return; return;
case IR::Opcode::IEqual: case IR::Opcode::IEqual32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; }); FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; });
return; return;
case IR::Opcode::IEqual64:
FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a == b; });
return;
case IR::Opcode::INotEqual: case IR::Opcode::INotEqual:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; }); FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; });
return; return;

View file

@ -249,7 +249,7 @@ std::pair<const IR::Inst*, bool> TryDisableAnisoLod0(const IR::Inst* inst) {
// Select should be based on zero check // Select should be based on zero check
const auto* prod0 = inst->Arg(0).InstRecursive(); const auto* prod0 = inst->Arg(0).InstRecursive();
if (prod0->GetOpcode() != IR::Opcode::IEqual || if (prod0->GetOpcode() != IR::Opcode::IEqual32 ||
!(prod0->Arg(1).IsImmediate() && prod0->Arg(1).U32() == 0u)) { !(prod0->Arg(1).IsImmediate() && prod0->Arg(1).U32() == 0u)) {
return not_found; return not_found;
} }