64 bits OP, impl V_ADDC_U32 & V_MAD_U64_U32 (#310)

* impl V_ADDC_U32 & V_MAD_U64_U32

* shader recompiler: add 64 bits version to get register / GetSrc

* fix V_ADDC_U32 carry

* shader recompiler: removed automatic conversion to force_flt in GetSRc

* shader recompiler: auto cast between u32 and u64 during ssa pass

* shader recompiler: fix SetVectorReg64 & standardize switches-case

* shader translate: fix overflow detection in V_ADD_I32

use vcc lo instead of vcc thread bit

* shader recompiler: more 64-bit work

- removed bit_size parameter from Get[Scalar/Vector]Register
- add BitwiseOr64
- add SetDst64 as a replacement for SetScalarReg64 & SetVectorReg64
- add GetSrc64 for 64-bit value

* shader recompiler: add V_MAD_U64_U32 vcc output

- add V_MAD_U64_U32 vcc output
- ILessThan for 64-bits

* shader recompiler: removed unnecessary changes & missing consts

* shader_recompiler: Add s64 type in constant propagation
This commit is contained in:
Vinicius Rangel 2024-07-27 11:23:59 -03:00 committed by GitHub
parent d84b4adc83
commit 680192a0c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 361 additions and 40 deletions

View file

@ -67,7 +67,8 @@ void Translator::V_OR_B32(bool is_xor, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, is_xor ? ir.BitwiseXor(src0, src1) : ir.BitwiseOr(src0, src1));
ir.SetVectorReg(dst_reg,
is_xor ? ir.BitwiseXor(src0, src1) : IR::U32(ir.BitwiseOr(src0, src1)));
}
void Translator::V_AND_B32(const GcnInst& inst) {
@ -92,6 +93,30 @@ void Translator::V_ADD_I32(const GcnInst& inst) {
// TODO: Carry
}
void Translator::V_ADDC_U32(const GcnInst& inst) {
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
IR::U32 scarry;
if (inst.src_count == 3) { // VOP3
IR::U1 thread_bit{ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code))};
scarry = IR::U32{ir.Select(thread_bit, ir.Imm32(1), ir.Imm32(0))};
} else { // VOP2
scarry = ir.GetVccLo();
}
const IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry);
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(dst_reg, result);
const IR::U1 less_src0 = ir.ILessThan(result, src0, false);
const IR::U1 less_src1 = ir.ILessThan(result, src1, false);
const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1);
ir.SetVcc(did_overflow);
}
void Translator::V_CVT_F32_I32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::VectorReg dst_reg{inst.dst[0].code};
@ -294,6 +319,23 @@ void Translator::V_SUBREV_I32(const GcnInst& inst) {
// TODO: Carry-out
}
void Translator::V_MAD_U64_U32(const GcnInst& inst) {
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
const auto src2 = GetSrc64<IR::U64>(inst.src[2]);
const IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1));
const IR::U64 sum_result = ir.IAdd(mul_result, src2);
SetDst64(inst.dst[0], sum_result);
const IR::U1 less_src0 = ir.ILessThan(sum_result, mul_result, false);
const IR::U1 less_src1 = ir.ILessThan(sum_result, src2, false);
const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1);
ir.SetVcc(did_overflow);
}
void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};