shader: Implement HADD2

2021-03-03 03:07:19 -03:00 · 2021-03-03 03:07:19 -03:00 · 4006929c98
commit 4006929c98
parent 980cafdc27
12 changed files with 400 additions and 42 deletions
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@ -334,12 +334,12 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
 }

 Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
-    const auto read = [&](Opcode opcode, size_t limit) -> Value {
+    const auto read{[&](Opcode opcode, size_t limit) -> Value {
        if (element >= limit) {
            throw InvalidArgument("Out of bounds element {}", element);
        }
        return Inst(opcode, vector, Value{static_cast<u32>(element)});
-    };
+    }};
    switch (vector.Type()) {
    case Type::U32x2:
        return read(Opcode::CompositeExtractU32x2, 2);
@ -370,6 +370,43 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
    }
 }

+Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) {
+    const auto insert{[&](Opcode opcode, size_t limit) {
+        if (element >= limit) {
+            throw InvalidArgument("Out of bounds element {}", element);
+        }
+        return Inst(opcode, vector, object, Value{static_cast<u32>(element)});
+    }};
+    switch (vector.Type()) {
+    case Type::U32x2:
+        return insert(Opcode::CompositeInsertU32x2, 2);
+    case Type::U32x3:
+        return insert(Opcode::CompositeInsertU32x3, 3);
+    case Type::U32x4:
+        return insert(Opcode::CompositeInsertU32x4, 4);
+    case Type::F16x2:
+        return insert(Opcode::CompositeInsertF16x2, 2);
+    case Type::F16x3:
+        return insert(Opcode::CompositeInsertF16x3, 3);
+    case Type::F16x4:
+        return insert(Opcode::CompositeInsertF16x4, 4);
+    case Type::F32x2:
+        return insert(Opcode::CompositeInsertF32x2, 2);
+    case Type::F32x3:
+        return insert(Opcode::CompositeInsertF32x3, 3);
+    case Type::F32x4:
+        return insert(Opcode::CompositeInsertF32x4, 4);
+    case Type::F64x2:
+        return insert(Opcode::CompositeInsertF64x2, 2);
+    case Type::F64x3:
+        return insert(Opcode::CompositeInsertF64x3, 3);
+    case Type::F64x4:
+        return insert(Opcode::CompositeInsertF64x4, 4);
+    default:
+        ThrowInvalidType(vector.Type());
+    }
+}
+
 Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
    if (true_value.Type() != false_value.Type()) {
        throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
@ -433,7 +470,7 @@ U32 IREmitter::PackFloat2x16(const Value& vector) {
 }

 Value IREmitter::UnpackFloat2x16(const U32& value) {
-    return Inst<Value>(Opcode::UnpackFloat2x16, value);
+    return Inst(Opcode::UnpackFloat2x16, value);
 }

 F64 IREmitter::PackDouble2x32(const Value& vector) {
@ -968,7 +1005,7 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v
    }
 }

-U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) {
+U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
    switch (result_bitsize) {
    case 32:
        switch (value.Type()) {
@ -995,4 +1032,49 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) {
    throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
 }

+F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
+    switch (result_bitsize) {
+    case 16:
+        switch (value.Type()) {
+        case Type::F16:
+            // Nothing to do
+            return value;
+        case Type::F32:
+            return Inst<F16>(Opcode::ConvertF16F32, value);
+        case Type::F64:
+            throw LogicError("Illegal conversion from F64 to F16");
+        default:
+            break;
+        }
+        break;
+    case 32:
+        switch (value.Type()) {
+        case Type::F16:
+            return Inst<F32>(Opcode::ConvertF32F16, value);
+        case Type::F32:
+            // Nothing to do
+            return value;
+        case Type::F64:
+            return Inst<F64>(Opcode::ConvertF32F64, value);
+        default:
+            break;
+        }
+        break;
+    case 64:
+        switch (value.Type()) {
+        case Type::F16:
+            throw LogicError("Illegal conversion from F16 to F64");
+        case Type::F32:
+            // Nothing to do
+            return value;
+        case Type::F64:
+            return Inst<F64>(Opcode::ConvertF32F64, value);
+        default:
+            break;
+        }
+        break;
+    }
+    throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
+}
+
 } // namespace Shader::IR
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@ -97,6 +97,7 @@ public:
    [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
                                           const Value& e4);
    [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
+    [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);

    [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
                               const Value& false_value);
@ -186,7 +187,8 @@ public:
    [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
    [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);

-    [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value);
+    [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
+    [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);

 private:
    IR::Block::iterator insertion_point;
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@ -83,24 +83,36 @@ OPCODE(CompositeConstructU32x4,                             U32x4,          U32,
 OPCODE(CompositeExtractU32x2,                               U32,            U32x2,          U32,                                            )
 OPCODE(CompositeExtractU32x3,                               U32,            U32x3,          U32,                                            )
 OPCODE(CompositeExtractU32x4,                               U32,            U32x4,          U32,                                            )
+OPCODE(CompositeInsertU32x2,                                U32x2,          U32x2,          U32,            U32,                            )
+OPCODE(CompositeInsertU32x3,                                U32x3,          U32x3,          U32,            U32,                            )
+OPCODE(CompositeInsertU32x4,                                U32x4,          U32x4,          U32,            U32,                            )
 OPCODE(CompositeConstructF16x2,                             F16x2,          F16,            F16,                                            )
 OPCODE(CompositeConstructF16x3,                             F16x3,          F16,            F16,            F16,                            )
 OPCODE(CompositeConstructF16x4,                             F16x4,          F16,            F16,            F16,            F16,            )
 OPCODE(CompositeExtractF16x2,                               F16,            F16x2,          U32,                                            )
 OPCODE(CompositeExtractF16x3,                               F16,            F16x3,          U32,                                            )
 OPCODE(CompositeExtractF16x4,                               F16,            F16x4,          U32,                                            )
+OPCODE(CompositeInsertF16x2,                                F16x2,          F16x2,          F16,            U32,                            )
+OPCODE(CompositeInsertF16x3,                                F16x3,          F16x3,          F16,            U32,                            )
+OPCODE(CompositeInsertF16x4,                                F16x4,          F16x4,          F16,            U32,                            )
 OPCODE(CompositeConstructF32x2,                             F32x2,          F32,            F32,                                            )
 OPCODE(CompositeConstructF32x3,                             F32x3,          F32,            F32,            F32,                            )
 OPCODE(CompositeConstructF32x4,                             F32x4,          F32,            F32,            F32,            F32,            )
 OPCODE(CompositeExtractF32x2,                               F32,            F32x2,          U32,                                            )
 OPCODE(CompositeExtractF32x3,                               F32,            F32x3,          U32,                                            )
 OPCODE(CompositeExtractF32x4,                               F32,            F32x4,          U32,                                            )
+OPCODE(CompositeInsertF32x2,                                F32x2,          F32x2,          F32,            U32,                            )
+OPCODE(CompositeInsertF32x3,                                F32x3,          F32x3,          F32,            U32,                            )
+OPCODE(CompositeInsertF32x4,                                F32x4,          F32x4,          F32,            U32,                            )
 OPCODE(CompositeConstructF64x2,                             F64x2,          F64,            F64,                                            )
 OPCODE(CompositeConstructF64x3,                             F64x3,          F64,            F64,            F64,                            )
 OPCODE(CompositeConstructF64x4,                             F64x4,          F64,            F64,            F64,            F64,            )
 OPCODE(CompositeExtractF64x2,                               F64,            F64x2,          U32,                                            )
 OPCODE(CompositeExtractF64x3,                               F64,            F64x3,          U32,                                            )
 OPCODE(CompositeExtractF64x4,                               F64,            F64x4,          U32,                                            )
+OPCODE(CompositeInsertF64x2,                                F64x2,          F64x2,          F64,            U32,                            )
+OPCODE(CompositeInsertF64x3,                                F64x3,          F64x3,          F64,            U32,                            )
+OPCODE(CompositeInsertF64x4,                                F64x4,          F64x4,          F64,            U32,                            )

 // Select operations
 OPCODE(SelectU8,                                            U8,             U1,             U8,             U8,                             )
@ -277,6 +289,9 @@ OPCODE(ConvertU32F64,                                       U32,            F64,
 OPCODE(ConvertU64F16,                                       U64,            F16,                                                            )
 OPCODE(ConvertU64F32,                                       U64,            F32,                                                            )
 OPCODE(ConvertU64F64,                                       U64,            F64,                                                            )
-
 OPCODE(ConvertU64U32,                                       U64,            U32,                                                            )
 OPCODE(ConvertU32U64,                                       U32,            U64,                                                            )
+OPCODE(ConvertF16F32,                                       F16,            F32,                                                            )
+OPCODE(ConvertF32F16,                                       F32,            F16,                                                            )
+OPCODE(ConvertF32F64,                                       F32,            F64,                                                            )
+OPCODE(ConvertF64F32,                                       F64,            F32,                                                            )