Add Rshrn_V & Shrn_V Sse opt.. Add Mla_V, Mls_V & Mul_V Sse opt.; add Tests. (#614)

* Update CountLeadingZeros().

* Remove obsolete Tests.

* Follow-up.

* Follow-up.

* Follow-up.

* Add Mla_V, Mls_V & Mul_V Tests.

* Update PackageReferences.

* Remove EmitLd/Stvectmp2().

* Remove Dup. Nits.

* Remove EmitLd/Stvectmp2() & Dup; nits.

* Remove Tmp stuff & Dup; rework Fcvtz() as Fcvtn().

* Remove Tmp stuff, EmitLd/Stvectmp2() & Dup. Nits.

* Add (R)shrn_V Sse opt.; add "Part" & "Shift" opt..

Remove Tmp stuff; remove Dup.
Nits.

* Add Mla/Mls/Mul_V Sse opt.. Add "Part" opt..

Remove EmitLd/Stvectmp2(), remove Dup.
Nits.

* Nits.

* Nits.

* Nit.

* Add "Part" opt.. Nit.

* Nit.

* Nit.

* Add Cmhi_V & Cmhs_V Sse opt..
This commit is contained in:
LDj3SNuD 2019-03-13 09:23:52 +01:00 committed by jduncanator
parent a0aecd1ff8
commit 1bef70c068
14 changed files with 707 additions and 484 deletions

View file

@ -21,26 +21,24 @@ namespace ChocolArm64.Instructions
if (op.Size == 1 && op.Opc == 0)
{
//Double -> Single.
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
Type[] typesCvt = new Type[] { typeof(Vector128<float>), typeof(Vector128<double>) };
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitLdvec(op.Rn);
Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<double>) };
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Single), types));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Single), typesCvt));
context.EmitStvec(op.Rd);
}
else if (op.Size == 0 && op.Opc == 1)
{
//Single -> Double.
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
Type[] typesCvt = new Type[] { typeof(Vector128<double>), typeof(Vector128<float>) };
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
context.EmitLdvec(op.Rn);
Type[] types = new Type[] { typeof(Vector128<double>), typeof(Vector128<float>) };
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Double), types));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Double), typesCvt));
context.EmitStvec(op.Rd);
}
@ -80,14 +78,14 @@ namespace ChocolArm64.Instructions
{
Type[] typesCvt = new Type[] { typeof(Vector128<float>) };
string nameMov = op.RegisterSize == RegisterSize.Simd128
? nameof(Sse.MoveHighToLow)
: nameof(Sse.MoveLowToHigh);
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitCall(typeof(Sse).GetMethod(nameMov));
if (op.RegisterSize == RegisterSize.Simd128)
{
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow)));
}
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Double), typesCvt));
@ -249,12 +247,12 @@ namespace ChocolArm64.Instructions
public static void Fcvtzs_S(ILEmitterCtx context)
{
EmitScalarFcvtzs(context);
EmitFcvtz(context, signed: true, scalar: true);
}
public static void Fcvtzs_V(ILEmitterCtx context)
{
EmitVectorFcvtzs(context);
EmitFcvtz(context, signed: true, scalar: false);
}
public static void Fcvtzu_Gp(ILEmitterCtx context)
@ -269,12 +267,12 @@ namespace ChocolArm64.Instructions
public static void Fcvtzu_S(ILEmitterCtx context)
{
EmitScalarFcvtzu(context);
EmitFcvtz(context, signed: false, scalar: true);
}
public static void Fcvtzu_V(ILEmitterCtx context)
{
EmitVectorFcvtzu(context);
EmitFcvtz(context, signed: false, scalar: false);
}
public static void Scvtf_Gp(ILEmitterCtx context)
@ -415,11 +413,6 @@ namespace ChocolArm64.Instructions
int bytes = op.GetBitsCount() >> 3;
int elems = !scalar ? bytes >> sizeI : 1;
if (scalar && (sizeF == 0))
{
EmitVectorZeroLowerTmp(context);
}
for (int index = 0; index < elems; index++)
{
EmitVectorExtractF(context, op.Rn, index, sizeF);
@ -441,13 +434,62 @@ namespace ChocolArm64.Instructions
: nameof(VectorHelper.SatF64ToU64));
}
EmitVectorInsertTmp(context, index, sizeI);
if (scalar)
{
EmitVectorZeroAll(context, op.Rd);
}
EmitVectorInsert(context, op.Rd, index, sizeI);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
private static void EmitFcvtz(ILEmitterCtx context, bool signed, bool scalar)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
int sizeI = sizeF + 2;
int fBits = GetFBits(context);
int bytes = op.GetBitsCount() >> 3;
int elems = !scalar ? bytes >> sizeI : 1;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractF(context, op.Rn, index, sizeF);
EmitF2iFBitsMul(context, sizeF, fBits);
if (sizeF == 0)
{
VectorHelper.EmitCall(context, signed
? nameof(VectorHelper.SatF32ToS32)
: nameof(VectorHelper.SatF32ToU32));
context.Emit(OpCodes.Conv_U8);
}
else /* if (sizeF == 1) */
{
VectorHelper.EmitCall(context, signed
? nameof(VectorHelper.SatF64ToS64)
: nameof(VectorHelper.SatF64ToU64));
}
if (scalar)
{
EmitVectorZeroAll(context, op.Rd);
}
EmitVectorInsert(context, op.Rd, index, sizeI);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
@ -555,105 +597,6 @@ namespace ChocolArm64.Instructions
}
}
private static void EmitScalarFcvtzs(ILEmitterCtx context)
{
EmitScalarFcvtz(context, true);
}
private static void EmitScalarFcvtzu(ILEmitterCtx context)
{
EmitScalarFcvtz(context, false);
}
private static void EmitScalarFcvtz(ILEmitterCtx context, bool signed)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
int sizeI = sizeF + 2;
int fBits = GetFBits(context);
EmitVectorExtractF(context, op.Rn, 0, sizeF);
EmitF2iFBitsMul(context, sizeF, fBits);
if (sizeF == 0)
{
VectorHelper.EmitCall(context, signed
? nameof(VectorHelper.SatF32ToS32)
: nameof(VectorHelper.SatF32ToU32));
}
else /* if (sizeF == 1) */
{
VectorHelper.EmitCall(context, signed
? nameof(VectorHelper.SatF64ToS64)
: nameof(VectorHelper.SatF64ToU64));
}
if (sizeF == 0)
{
context.Emit(OpCodes.Conv_U8);
}
EmitScalarSet(context, op.Rd, sizeI);
}
private static void EmitVectorFcvtzs(ILEmitterCtx context)
{
EmitVectorFcvtz(context, true);
}
private static void EmitVectorFcvtzu(ILEmitterCtx context)
{
EmitVectorFcvtz(context, false);
}
private static void EmitVectorFcvtz(ILEmitterCtx context, bool signed)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
int sizeI = sizeF + 2;
int fBits = GetFBits(context);
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> sizeI;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractF(context, op.Rn, index, sizeF);
EmitF2iFBitsMul(context, sizeF, fBits);
if (sizeF == 0)
{
VectorHelper.EmitCall(context, signed
? nameof(VectorHelper.SatF32ToS32)
: nameof(VectorHelper.SatF32ToU32));
}
else /* if (sizeF == 1) */
{
VectorHelper.EmitCall(context, signed
? nameof(VectorHelper.SatF64ToS64)
: nameof(VectorHelper.SatF64ToU64));
}
if (sizeF == 0)
{
context.Emit(OpCodes.Conv_U8);
}
EmitVectorInsert(context, op.Rd, index, sizeI);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
private static int GetFBits(ILEmitterCtx context)
{
if (context.CurrOp is OpCodeSimdShImm64 op)