CPU (A64): Add Pmull_V Inst. with Clmul fast path for the "1/2D -> 1Q" variant & Sse fast path and slow path for both the "8/16B -> 8H" and "1/2D -> 1Q" variants; with Test. (#1817)
* Add Pmull_V Sse fast path only, both "8/16B -> 8H" and "1/2D -> 1Q" variants; with Test. * Add Clmul fast path for the 128 bits variant. * Small optimisation (save 60 instructions) for the Sse fast path about the 128 bits variant. * Add slow path, both variants. Fix V128 Shl/Shr when shift = 0. * A32: Add Vmull_I P64 variant (slow path); not tested. * A32: Add Vmull_I_P8_P64 Test and fix P64 variant.
This commit is contained in:
parent
a03ab0c4a0
commit
430ba6da65
11 changed files with 264 additions and 25 deletions
|
@ -60,6 +60,13 @@ namespace Ryujinx.Tests.Cpu
|
|||
0x8080808080808080ul, 0xFFFFFFFFFFFFFFFFul };
|
||||
}
|
||||
|
||||
private static ulong[] _8B1D_()
|
||||
{
|
||||
return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful,
|
||||
0x8080808080808080ul, 0x7FFFFFFFFFFFFFFFul,
|
||||
0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul };
|
||||
}
|
||||
|
||||
private static ulong[] _8B4H2S_()
|
||||
{
|
||||
return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful,
|
||||
|
@ -1977,6 +1984,33 @@ namespace Ryujinx.Tests.Cpu
|
|||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("PMULL{2} <Vd>.<Ta>, <Vn>.<Tb>, <Vm>.<Tb>")]
|
||||
public void Pmull_V([Values(0u)] uint rd,
|
||||
[Values(1u, 0u)] uint rn,
|
||||
[Values(2u, 0u)] uint rm,
|
||||
[ValueSource(nameof(_8B1D_))] [Random(RndCnt)] ulong z0,
|
||||
[ValueSource(nameof(_8B1D_))] [Random(RndCnt)] ulong z1,
|
||||
[ValueSource(nameof(_8B1D_))] [Random(RndCnt)] ulong a0,
|
||||
[ValueSource(nameof(_8B1D_))] [Random(RndCnt)] ulong a1,
|
||||
[ValueSource(nameof(_8B1D_))] [Random(RndCnt)] ulong b0,
|
||||
[ValueSource(nameof(_8B1D_))] [Random(RndCnt)] ulong b1,
|
||||
[Values(0b00u, 0b11u)] uint size, // Q0: <8B, 1D> => <8H, 1Q>
|
||||
[Values(0b0u, 0b1u)] uint q) // Q1: <16B, 2D> => <8H, 1Q>
|
||||
{
|
||||
uint opcode = 0x0E20E000; // PMULL V0.8H, V0.8B, V0.8B
|
||||
opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
|
||||
opcode |= ((size & 3) << 22);
|
||||
opcode |= ((q & 1) << 30);
|
||||
|
||||
V128 v0 = MakeVectorE0E1(z0, z1);
|
||||
V128 v1 = MakeVectorE0E1(a0, a1);
|
||||
V128 v2 = MakeVectorE0E1(b0, b1);
|
||||
|
||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
|
||||
|
||||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("RADDHN{2} <Vd>.<Tb>, <Vn>.<Ta>, <Vm>.<Ta>")]
|
||||
public void Raddhn_V_8H8B_4S4H_2D2S([Values(0u)] uint rd,
|
||||
[Values(1u, 0u)] uint rn,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue