CPU (A64): Add Pmull_V Inst. with Clmul fast path for the "1/2D -> 1Q" variant & Sse fast path and slow path for both the "8/16B -> 8H" and "1/2D -> 1Q" variants; with Test. (#1817)
* Add Pmull_V Sse fast path only, both "8/16B -> 8H" and "1/2D -> 1Q" variants; with Test. * Add Clmul fast path for the 128 bits variant. * Small optimisation (save 60 instructions) for the Sse fast path about the 128 bits variant. * Add slow path, both variants. Fix V128 Shl/Shr when shift = 0. * A32: Add Vmull_I P64 variant (slow path); not tested. * A32: Add Vmull_I_P8_P64 Test and fix P64 variant.
This commit is contained in:
parent
a03ab0c4a0
commit
430ba6da65
11 changed files with 264 additions and 25 deletions
|
@ -100,6 +100,13 @@ namespace Ryujinx.Tests.Cpu
|
|||
#endregion
|
||||
|
||||
#region "ValueSource (Types)"
|
||||
private static ulong[] _8B1D_()
|
||||
{
|
||||
return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful,
|
||||
0x8080808080808080ul, 0x7FFFFFFFFFFFFFFFul,
|
||||
0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul };
|
||||
}
|
||||
|
||||
private static ulong[] _8B4H2S1D_()
|
||||
{
|
||||
return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful,
|
||||
|
@ -530,6 +537,36 @@ namespace Ryujinx.Tests.Cpu
|
|||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("VMULL.<P8, P64> <Qd>, <Dn>, <Dm>")]
|
||||
public void Vmull_I_P8_P64([Values(0u, 1u)] uint rd,
|
||||
[Values(0u, 1u)] uint rn,
|
||||
[Values(0u, 1u)] uint rm,
|
||||
[ValueSource(nameof(_8B1D_))] [Random(RndCnt)] ulong d0,
|
||||
[ValueSource(nameof(_8B1D_))] [Random(RndCnt)] ulong d1,
|
||||
[Values(0u/*, 2u*/)] uint size) // <P8, P64>
|
||||
{
|
||||
/*if (size == 2u)
|
||||
{
|
||||
Assert.Ignore("Ryujinx.Tests.Unicorn.UnicornException : Invalid instruction (UC_ERR_INSN_INVALID)");
|
||||
}*/
|
||||
|
||||
uint opcode = 0xf2800e00u; // VMULL.P8 Q0, D0, D0
|
||||
|
||||
rd >>= 1; rd <<= 1;
|
||||
|
||||
opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12);
|
||||
opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16);
|
||||
opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0);
|
||||
|
||||
opcode |= (size & 0x3) << 20;
|
||||
|
||||
V128 v0 = MakeVectorE0E1(d0, d1);
|
||||
|
||||
SingleOpcode(opcode, v0: v0);
|
||||
|
||||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("VSHL.<size> {<Vd>}, <Vm>, <Vn>")]
|
||||
public void Vshl([Values(0u)] uint rd,
|
||||
[Values(1u, 0u)] uint rn,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue