Improve CPU initial translation speeds (#50)

* Add background translation to the CPU

* Do not use a separate thread for translation, implement 2 tiers translation

* Remove unnecessary usings

* Lower MinCallCountForReJit

* Remove unused variable
This commit is contained in:
gdkchan 2018-03-04 14:09:59 -03:00 committed by GitHub
parent ee9df32e3e
commit 3edb66f389
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 319 additions and 107 deletions

View file

@ -67,14 +67,15 @@ namespace ChocolArm64.Translation
public long VecOutputs;
}
private const int MaxOptGraphLength = 55;
private const int MaxOptGraphLength = 40;
public ALocalAlloc(AILBlock[] Graph, AILBlock Root)
{
IntPaths = new Dictionary<AILBlock, PathIo>();
VecPaths = new Dictionary<AILBlock, PathIo>();
if (Graph.Length < MaxOptGraphLength)
if (Graph.Length > 1 &&
Graph.Length < MaxOptGraphLength)
{
InitializeOptimal(Graph, Root);
}
@ -179,10 +180,8 @@ namespace ChocolArm64.Translation
{
//This is WAY faster than InitializeOptimal, but results in
//uneeded loads and stores, so the resulting code will be slower.
long IntInputs = 0;
long IntOutputs = 0;
long VecInputs = 0;
long VecOutputs = 0;
long IntInputs = 0, IntOutputs = 0;
long VecInputs = 0, VecOutputs = 0;
foreach (AILBlock Block in Graph)
{
@ -196,8 +195,11 @@ namespace ChocolArm64.Translation
//in those cases if we attempt to write an output registers that was
//not written, we will be just writing zero and messing up the old register value.
//So we just need to ensure that all outputs are loaded.
IntInputs |= IntOutputs;
VecInputs |= VecOutputs;
if (Graph.Length > 1)
{
IntInputs |= IntOutputs;
VecInputs |= VecOutputs;
}
foreach (AILBlock Block in Graph)
{