v_rcp_legacy_f32 (#3040)

This commit is contained in:
Marcin Mikołajczyk 2025-06-05 01:55:47 +02:00 committed by GitHub
parent 23710f397e
commit 4d1a1ce9c2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 17 additions and 0 deletions

View file

@ -204,6 +204,7 @@ public:
void V_EXP_F32(const GcnInst& inst); void V_EXP_F32(const GcnInst& inst);
void V_LOG_F32(const GcnInst& inst); void V_LOG_F32(const GcnInst& inst);
void V_RCP_F32(const GcnInst& inst); void V_RCP_F32(const GcnInst& inst);
void V_RCP_LEGACY_F32(const GcnInst& inst);
void V_RCP_F64(const GcnInst& inst); void V_RCP_F64(const GcnInst& inst);
void V_RSQ_F32(const GcnInst& inst); void V_RSQ_F32(const GcnInst& inst);
void V_SQRT_F32(const GcnInst& inst); void V_SQRT_F32(const GcnInst& inst);

View file

@ -158,6 +158,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_LOG_F32(inst); return V_LOG_F32(inst);
case Opcode::V_RCP_F32: case Opcode::V_RCP_F32:
return V_RCP_F32(inst); return V_RCP_F32(inst);
case Opcode::V_RCP_LEGACY_F32:
return V_RCP_LEGACY_F32(inst);
case Opcode::V_RCP_F64: case Opcode::V_RCP_F64:
return V_RCP_F64(inst); return V_RCP_F64(inst);
case Opcode::V_RCP_IFLAG_F32: case Opcode::V_RCP_IFLAG_F32:
@ -798,6 +800,20 @@ void Translator::V_RCP_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPRecip(src0)); SetDst(inst.dst[0], ir.FPRecip(src0));
} }
void Translator::V_RCP_LEGACY_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const auto result = ir.FPRecip(src0);
const auto inf = ir.FPIsInf(result);
const auto raw_result = ir.ConvertFToU(32, result);
const auto sign_bit = ir.ShiftRightLogical(raw_result, ir.Imm32(31u));
const auto sign_bit_set = ir.INotEqual(sign_bit, ir.Imm32(0u));
const IR::F32 inf_result{ir.Select(sign_bit_set, ir.Imm32(-0.0f), ir.Imm32(0.0f))};
const IR::F32 val{ir.Select(inf, inf_result, result)};
SetDst(inst.dst[0], val);
}
void Translator::V_RCP_F64(const GcnInst& inst) { void Translator::V_RCP_F64(const GcnInst& inst) {
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])}; const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
SetDst64(inst.dst[0], ir.FPRecip(src0)); SetDst64(inst.dst[0], ir.FPRecip(src0));