shader: FMUL, select, RRO, and MUFU fixes

2021-02-22 22:59:16 -03:00 · 2021-02-22 22:59:16 -03:00 · e44752ddc8
commit e44752ddc8
parent 18a766b362
18 changed files with 507 additions and 119 deletions
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@ -361,19 +361,21 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
    }
 }

-UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) {
+Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
    if (true_value.Type() != false_value.Type()) {
        throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
    }
    switch (true_value.Type()) {
    case Type::U8:
-        return Inst<UAny>(Opcode::Select8, condition, true_value, false_value);
+        return Inst(Opcode::SelectU8, condition, true_value, false_value);
    case Type::U16:
-        return Inst<UAny>(Opcode::Select16, condition, true_value, false_value);
+        return Inst(Opcode::SelectU16, condition, true_value, false_value);
    case Type::U32:
-        return Inst<UAny>(Opcode::Select32, condition, true_value, false_value);
+        return Inst(Opcode::SelectU32, condition, true_value, false_value);
    case Type::U64:
-        return Inst<UAny>(Opcode::Select64, condition, true_value, false_value);
+        return Inst(Opcode::SelectU64, condition, true_value, false_value);
+    case Type::F32:
+        return Inst(Opcode::SelectF32, condition, true_value, false_value);
    default:
        throw InvalidArgument("Invalid type {}", true_value.Type());
    }
@ -503,12 +505,16 @@ F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) {
    return result;
 }

-F32 IREmitter::FPCosNotReduced(const F32& value) {
-    return Inst<F32>(Opcode::FPCosNotReduced, value);
+F32 IREmitter::FPCos(const F32& value) {
+    return Inst<F32>(Opcode::FPCos, value);
 }

-F32 IREmitter::FPExp2NotReduced(const F32& value) {
-    return Inst<F32>(Opcode::FPExp2NotReduced, value);
+F32 IREmitter::FPSin(const F32& value) {
+    return Inst<F32>(Opcode::FPSin, value);
+}
+
+F32 IREmitter::FPExp2(const F32& value) {
+    return Inst<F32>(Opcode::FPExp2, value);
 }

 F32 IREmitter::FPLog2(const F32& value) {
@ -517,9 +523,9 @@ F32 IREmitter::FPLog2(const F32& value) {

 F32F64 IREmitter::FPRecip(const F32F64& value) {
    switch (value.Type()) {
-    case Type::U32:
+    case Type::F32:
        return Inst<F32>(Opcode::FPRecip32, value);
-    case Type::U64:
+    case Type::F64:
        return Inst<F64>(Opcode::FPRecip64, value);
    default:
        ThrowInvalidType(value.Type());
@ -528,19 +534,15 @@ F32F64 IREmitter::FPRecip(const F32F64& value) {

 F32F64 IREmitter::FPRecipSqrt(const F32F64& value) {
    switch (value.Type()) {
-    case Type::U32:
+    case Type::F32:
        return Inst<F32>(Opcode::FPRecipSqrt32, value);
-    case Type::U64:
+    case Type::F64:
        return Inst<F64>(Opcode::FPRecipSqrt64, value);
    default:
        ThrowInvalidType(value.Type());
    }
 }

-F32 IREmitter::FPSinNotReduced(const F32& value) {
-    return Inst<F32>(Opcode::FPSinNotReduced, value);
-}
-
 F32 IREmitter::FPSqrt(const F32& value) {
    return Inst<F32>(Opcode::FPSqrt, value);
 }
@ -610,6 +612,114 @@ F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) {
    }
 }

+U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, lhs,
+                        rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, lhs,
+                        rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, lhs,
+                        rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16,
+                        lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32,
+                        lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64,
+                        lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
+U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
+    if (lhs.Type() != rhs.Type()) {
+        throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
+    }
+    switch (lhs.Type()) {
+    case Type::F16:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual16
+                                : Opcode::FPUnordGreaterThanEqual16,
+                        lhs, rhs);
+    case Type::F32:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual32
+                                : Opcode::FPUnordGreaterThanEqual32,
+                        lhs, rhs);
+    case Type::F64:
+        return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual64
+                                : Opcode::FPUnordGreaterThanEqual64,
+                        lhs, rhs);
+    default:
+        ThrowInvalidType(lhs.Type());
+    }
+}
+
 U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
    if (a.Type() != b.Type()) {
        throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@ -98,7 +98,8 @@ public:
                                           const Value& e4);
    [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);

-    [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value);
+    [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
+                               const Value& false_value);

    template <typename Dest, typename Source>
    [[nodiscard]] Dest BitCast(const Source& value);
@ -121,12 +122,12 @@ public:
    [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value);
    [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg);

-    [[nodiscard]] F32 FPCosNotReduced(const F32& value);
-    [[nodiscard]] F32 FPExp2NotReduced(const F32& value);
+    [[nodiscard]] F32 FPCos(const F32& value);
+    [[nodiscard]] F32 FPSin(const F32& value);
+    [[nodiscard]] F32 FPExp2(const F32& value);
    [[nodiscard]] F32 FPLog2(const F32& value);
    [[nodiscard]] F32F64 FPRecip(const F32F64& value);
    [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
-    [[nodiscard]] F32 FPSinNotReduced(const F32& value);
    [[nodiscard]] F32 FPSqrt(const F32& value);
    [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
    [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
@ -134,6 +135,15 @@ public:
    [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
    [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {});

+    [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
+    [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
+    [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
+    [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
+    [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
+                                     bool ordered = true);
+    [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
+                                        bool ordered = true);
+
    [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
    [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
    [[nodiscard]] U32 IMul(const U32& a, const U32& b);
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@ -103,10 +103,12 @@ OPCODE(CompositeExtractF64x3,                               F64,            F64x
 OPCODE(CompositeExtractF64x4,                               F64,            F64x4,          U32,                                            )

 // Select operations
-OPCODE(Select8,                                             U8,             U1,             U8,             U8,                             )
-OPCODE(Select16,                                            U16,            U1,             U16,            U16,                            )
-OPCODE(Select32,                                            U32,            U1,             U32,            U32,                            )
-OPCODE(Select64,                                            U64,            U1,             U64,            U64,                            )
+OPCODE(SelectU8,                                            U8,             U1,             U8,             U8,                             )
+OPCODE(SelectU16,                                           U16,            U1,             U16,            U16,                            )
+OPCODE(SelectU32,                                           U32,            U1,             U32,            U32,                            )
+OPCODE(SelectU64,                                           U64,            U1,             U64,            U64,                            )
+OPCODE(SelectF16,                                           F16,            U1,             F16,            F16,                            )
+OPCODE(SelectF32,                                           F32,            U1,             F32,            F32,                            )

 // Bitwise conversions
 OPCODE(BitCastU16F16,                                       U16,            F16,                                                            )
@ -156,11 +158,8 @@ OPCODE(FPRecipSqrt32,                                       F32,            F32,
 OPCODE(FPRecipSqrt64,                                       F64,            F64,                                                            )
 OPCODE(FPSqrt,                                              F32,            F32,                                                            )
 OPCODE(FPSin,                                               F32,            F32,                                                            )
-OPCODE(FPSinNotReduced,                                     F32,            F32,                                                            )
 OPCODE(FPExp2,                                              F32,            F32,                                                            )
-OPCODE(FPExp2NotReduced,                                    F32,            F32,                                                            )
 OPCODE(FPCos,                                               F32,            F32,                                                            )
-OPCODE(FPCosNotReduced,                                     F32,            F32,                                                            )
 OPCODE(FPLog2,                                              F32,            F32,                                                            )
 OPCODE(FPSaturate16,                                        F16,            F16,                                                            )
 OPCODE(FPSaturate32,                                        F32,            F32,                                                            )
@ -178,6 +177,43 @@ OPCODE(FPTrunc16,                                           F16,            F16,
 OPCODE(FPTrunc32,                                           F32,            F32,                                                            )
 OPCODE(FPTrunc64,                                           F64,            F64,                                                            )

+OPCODE(FPOrdEqual16,                                        U1,             F16,            F16,                                            )
+OPCODE(FPOrdEqual32,                                        U1,             F32,            F32,                                            )
+OPCODE(FPOrdEqual64,                                        U1,             F64,            F64,                                            )
+OPCODE(FPUnordEqual16,                                      U1,             F16,            F16,                                            )
+OPCODE(FPUnordEqual32,                                      U1,             F32,            F32,                                            )
+OPCODE(FPUnordEqual64,                                      U1,             F64,            F64,                                            )
+OPCODE(FPOrdNotEqual16,                                     U1,             F16,            F16,                                            )
+OPCODE(FPOrdNotEqual32,                                     U1,             F32,            F32,                                            )
+OPCODE(FPOrdNotEqual64,                                     U1,             F64,            F64,                                            )
+OPCODE(FPUnordNotEqual16,                                   U1,             F16,            F16,                                            )
+OPCODE(FPUnordNotEqual32,                                   U1,             F32,            F32,                                            )
+OPCODE(FPUnordNotEqual64,                                   U1,             F64,            F64,                                            )
+OPCODE(FPOrdLessThan16,                                     U1,             F16,            F16,                                            )
+OPCODE(FPOrdLessThan32,                                     U1,             F32,            F32,                                            )
+OPCODE(FPOrdLessThan64,                                     U1,             F64,            F64,                                            )
+OPCODE(FPUnordLessThan16,                                   U1,             F16,            F16,                                            )
+OPCODE(FPUnordLessThan32,                                   U1,             F32,            F32,                                            )
+OPCODE(FPUnordLessThan64,                                   U1,             F64,            F64,                                            )
+OPCODE(FPOrdGreaterThan16,                                  U1,             F16,            F16,                                            )
+OPCODE(FPOrdGreaterThan32,                                  U1,             F32,            F32,                                            )
+OPCODE(FPOrdGreaterThan64,                                  U1,             F64,            F64,                                            )
+OPCODE(FPUnordGreaterThan16,                                U1,             F16,            F16,                                            )
+OPCODE(FPUnordGreaterThan32,                                U1,             F32,            F32,                                            )
+OPCODE(FPUnordGreaterThan64,                                U1,             F64,            F64,                                            )
+OPCODE(FPOrdLessThanEqual16,                                U1,             F16,            F16,                                            )
+OPCODE(FPOrdLessThanEqual32,                                U1,             F32,            F32,                                            )
+OPCODE(FPOrdLessThanEqual64,                                U1,             F64,            F64,                                            )
+OPCODE(FPUnordLessThanEqual16,                              U1,             F16,            F16,                                            )
+OPCODE(FPUnordLessThanEqual32,                              U1,             F32,            F32,                                            )
+OPCODE(FPUnordLessThanEqual64,                              U1,             F64,            F64,                                            )
+OPCODE(FPOrdGreaterThanEqual16,                             U1,             F16,            F16,                                            )
+OPCODE(FPOrdGreaterThanEqual32,                             U1,             F32,            F32,                                            )
+OPCODE(FPOrdGreaterThanEqual64,                             U1,             F64,            F64,                                            )
+OPCODE(FPUnordGreaterThanEqual16,                           U1,             F16,            F16,                                            )
+OPCODE(FPUnordGreaterThanEqual32,                           U1,             F32,            F32,                                            )
+OPCODE(FPUnordGreaterThanEqual64,                           U1,             F64,            F64,                                            )
+
 // Integer operations
 OPCODE(IAdd32,                                              U32,            U32,            U32,                                            )
 OPCODE(IAdd64,                                              U64,            U64,            U64,                                            )