Fix Vnmls_S fast path (F64: losing input d value). Fix Vnmla_S & Vnmls_S slow paths (using fused inst.s). Fix Vfma_V slow path not using StandardFPSCRValue(). (#1775)

* Fix Vnmls_S fast path (F64: losing input d value). Fix Vnmla_S & Vnmls_S slow paths (using fused inst.s).

Add Vfma_S & Vfms_S Fma fast paths.
Add Vfnma_S inst. with Fma/Sse fast paths and slow path.
Add Vfnms_S Sse fast path.

Add Tests for affected inst.s.

Nits.

* InternalVersion = 1775

* Nits.

* Fix Vfma_V slow path not using StandardFPSCRValue().

* Nit: Fix Vfma_V order.

* Add Vfms_V Sse fast path and slow path.

* Add Vfma_V and Vfms_V Test.
This commit is contained in:
LDj3SNuD 2020-12-17 20:43:41 +01:00 committed by GitHub
parent b5c215111d
commit 8a33e884f8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 292 additions and 221 deletions

View file

@ -820,15 +820,15 @@ namespace ARMeilleure.Instructions
});
}
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
Debug.Assert((op.Size & 1) == 0);
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
{
return context.AddIntrinsic(inst, d, n, m);
return context.AddIntrinsic(inst32, d, n, m);
});
}
@ -927,7 +927,13 @@ namespace ARMeilleure.Instructions
});
}
public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
public static void EmitScalarTernaryOpF32(
ArmEmitterContext context,
Intrinsic inst32pt1,
Intrinsic inst64pt1,
Intrinsic inst32pt2,
Intrinsic inst64pt2,
bool isNegD = false)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
@ -939,6 +945,18 @@ namespace ARMeilleure.Instructions
EmitScalarTernaryOpSimd32(context, (d, n, m) =>
{
Operand res = context.AddIntrinsic(inst1, n, m);
if (isNegD)
{
Operand mask = doubleSize
? X86GetScalar(context, -0d)
: X86GetScalar(context, -0f);
d = doubleSize
? context.AddIntrinsic(Intrinsic.X86Xorpd, mask, d)
: context.AddIntrinsic(Intrinsic.X86Xorps, mask, d);
}
return context.AddIntrinsic(inst2, d, res);
});
}