Implement VCNT instruction (#1963)

* Implement VCNT based on AArch64 CNT

Add tests

* Update PTC version

* Address LDj's comments

* Explicit size in encoding
* Tighter tests
* Replace SoftFallback with IR helper

Co-authored-by: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>

* Reduce one BitwiseAnd from IR fallback

Based on popcount64b from https://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation

* Rename parameter and add assert

Co-authored-by: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>

Co-authored-by: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>
This commit is contained in:
mageven 2021-02-22 20:56:13 +05:30 committed by GitHub
parent dc0adb533d
commit 9bda7b4699
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 81 additions and 11 deletions

View file

@ -154,6 +154,15 @@ namespace Ryujinx.Tests.Cpu
yield return rnd2;
}
}
private static IEnumerable<ulong> _GenPopCnt8B_()
{
for (ulong cnt = 0ul; cnt <= 255ul; cnt++)
{
yield return (cnt << 56) | (cnt << 48) | (cnt << 40) | (cnt << 32) |
(cnt << 24) | (cnt << 16) | (cnt << 08) | cnt;
}
}
#endregion
private const int RndCnt = 2;
@ -217,6 +226,34 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn();
}
[Test, Pairwise, Description("VCNT.8 D0, D0 | VCNT.8 Q0, Q0")]
public void Vcnt([Values(0u, 1u)] uint rd,
[Values(0u, 1u)] uint rm,
[ValueSource(nameof(_GenPopCnt8B_))] [Random(RndCnt)] ulong d0,
[Values] bool q)
{
ulong d1 = ~d0; // It's expensive to have a second generator.
uint opcode = 0xf3b00500u; // VCNT.8 D0, D0
if (q)
{
opcode |= 1u << 6;
rd &= ~1u;
rm &= ~1u;
}
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
V128 v0 = MakeVectorE0E1(d0, d1);
SingleOpcode(opcode, v0: v0);
CompareAgainstUnicorn();
}
#endif
}
}