Small OpenGL Renderer refactoring (#177)
* Call OpenGL functions directly, remove the pfifo thread, some refactoring * Fix PerformanceStatistics calculating the wrong host fps, remove wait event on PFIFO as this wasn't exactly was causing the freezes (may replace with an exception later) * Organized the Gpu folder a bit more, renamed a few things, address PR feedback * Make PerformanceStatistics thread safe * Remove unused constant * Use unlimited update rate for better pref
This commit is contained in:
parent
69697957e6
commit
e7559f128f
58 changed files with 518 additions and 633 deletions
11
Ryujinx.HLE/Gpu/Engines/INvGpuEngine.cs
Normal file
11
Ryujinx.HLE/Gpu/Engines/INvGpuEngine.cs
Normal file
|
@ -0,0 +1,11 @@
|
|||
using Ryujinx.HLE.Gpu.Memory;
|
||||
|
||||
namespace Ryujinx.HLE.Gpu.Engines
|
||||
{
|
||||
interface INvGpuEngine
|
||||
{
|
||||
int[] Registers { get; }
|
||||
|
||||
void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry);
|
||||
}
|
||||
}
|
434
Ryujinx.HLE/Gpu/Engines/MacroInterpreter.cs
Normal file
434
Ryujinx.HLE/Gpu/Engines/MacroInterpreter.cs
Normal file
|
@ -0,0 +1,434 @@
|
|||
using Ryujinx.HLE.Gpu.Exceptions;
|
||||
using Ryujinx.HLE.Gpu.Memory;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.HLE.Gpu.Engines
|
||||
{
|
||||
class MacroInterpreter
|
||||
{
|
||||
private const int MaxCallCountPerRun = 500;
|
||||
|
||||
private int CallCount;
|
||||
|
||||
private enum AssignmentOperation
|
||||
{
|
||||
IgnoreAndFetch = 0,
|
||||
Move = 1,
|
||||
MoveAndSetMaddr = 2,
|
||||
FetchAndSend = 3,
|
||||
MoveAndSend = 4,
|
||||
FetchAndSetMaddr = 5,
|
||||
MoveAndSetMaddrThenFetchAndSend = 6,
|
||||
MoveAndSetMaddrThenSendHigh = 7
|
||||
}
|
||||
|
||||
private enum AluOperation
|
||||
{
|
||||
AluReg = 0,
|
||||
AddImmediate = 1,
|
||||
BitfieldReplace = 2,
|
||||
BitfieldExtractLslImm = 3,
|
||||
BitfieldExtractLslReg = 4,
|
||||
ReadImmediate = 5
|
||||
}
|
||||
|
||||
private enum AluRegOperation
|
||||
{
|
||||
Add = 0,
|
||||
AddWithCarry = 1,
|
||||
Subtract = 2,
|
||||
SubtractWithBorrow = 3,
|
||||
BitwiseExclusiveOr = 8,
|
||||
BitwiseOr = 9,
|
||||
BitwiseAnd = 10,
|
||||
BitwiseAndNot = 11,
|
||||
BitwiseNotAnd = 12
|
||||
}
|
||||
|
||||
private NvGpuFifo PFifo;
|
||||
private INvGpuEngine Engine;
|
||||
|
||||
public Queue<int> Fifo { get; private set; }
|
||||
|
||||
private int[] Gprs;
|
||||
|
||||
private int MethAddr;
|
||||
private int MethIncr;
|
||||
|
||||
private bool Carry;
|
||||
|
||||
private int OpCode;
|
||||
|
||||
private int PipeOp;
|
||||
|
||||
private int Pc;
|
||||
|
||||
public MacroInterpreter(NvGpuFifo PFifo, INvGpuEngine Engine)
|
||||
{
|
||||
this.PFifo = PFifo;
|
||||
this.Engine = Engine;
|
||||
|
||||
Fifo = new Queue<int>();
|
||||
|
||||
Gprs = new int[8];
|
||||
}
|
||||
|
||||
public void Execute(NvGpuVmm Vmm, int[] Mme, int Position, int Param)
|
||||
{
|
||||
Reset();
|
||||
|
||||
Gprs[1] = Param;
|
||||
|
||||
Pc = Position;
|
||||
|
||||
FetchOpCode(Mme);
|
||||
|
||||
while (Step(Vmm, Mme));
|
||||
|
||||
//Due to the delay slot, we still need to execute
|
||||
//one more instruction before we actually exit.
|
||||
Step(Vmm, Mme);
|
||||
}
|
||||
|
||||
private void Reset()
|
||||
{
|
||||
for (int Index = 0; Index < Gprs.Length; Index++)
|
||||
{
|
||||
Gprs[Index] = 0;
|
||||
}
|
||||
|
||||
MethAddr = 0;
|
||||
MethIncr = 0;
|
||||
|
||||
Carry = false;
|
||||
|
||||
CallCount = 0;
|
||||
}
|
||||
|
||||
private bool Step(NvGpuVmm Vmm, int[] Mme)
|
||||
{
|
||||
int BaseAddr = Pc - 1;
|
||||
|
||||
FetchOpCode(Mme);
|
||||
|
||||
if ((OpCode & 7) < 7)
|
||||
{
|
||||
//Operation produces a value.
|
||||
AssignmentOperation AsgOp = (AssignmentOperation)((OpCode >> 4) & 7);
|
||||
|
||||
int Result = GetAluResult();
|
||||
|
||||
switch (AsgOp)
|
||||
{
|
||||
//Fetch parameter and ignore result.
|
||||
case AssignmentOperation.IgnoreAndFetch:
|
||||
{
|
||||
SetDstGpr(FetchParam());
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
//Move result.
|
||||
case AssignmentOperation.Move:
|
||||
{
|
||||
SetDstGpr(Result);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
//Move result and use as Method Address.
|
||||
case AssignmentOperation.MoveAndSetMaddr:
|
||||
{
|
||||
SetDstGpr(Result);
|
||||
|
||||
SetMethAddr(Result);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
//Fetch parameter and send result.
|
||||
case AssignmentOperation.FetchAndSend:
|
||||
{
|
||||
SetDstGpr(FetchParam());
|
||||
|
||||
Send(Vmm, Result);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
//Move and send result.
|
||||
case AssignmentOperation.MoveAndSend:
|
||||
{
|
||||
SetDstGpr(Result);
|
||||
|
||||
Send(Vmm, Result);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
//Fetch parameter and use result as Method Address.
|
||||
case AssignmentOperation.FetchAndSetMaddr:
|
||||
{
|
||||
SetDstGpr(FetchParam());
|
||||
|
||||
SetMethAddr(Result);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
//Move result and use as Method Address, then fetch and send paramter.
|
||||
case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend:
|
||||
{
|
||||
SetDstGpr(Result);
|
||||
|
||||
SetMethAddr(Result);
|
||||
|
||||
Send(Vmm, FetchParam());
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
//Move result and use as Method Address, then send bits 17:12 of result.
|
||||
case AssignmentOperation.MoveAndSetMaddrThenSendHigh:
|
||||
{
|
||||
SetDstGpr(Result);
|
||||
|
||||
SetMethAddr(Result);
|
||||
|
||||
Send(Vmm, (Result >> 12) & 0x3f);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//Branch.
|
||||
bool OnNotZero = ((OpCode >> 4) & 1) != 0;
|
||||
|
||||
bool Taken = OnNotZero
|
||||
? GetGprA() != 0
|
||||
: GetGprA() == 0;
|
||||
|
||||
if (Taken)
|
||||
{
|
||||
Pc = BaseAddr + GetImm();
|
||||
|
||||
bool NoDelays = (OpCode & 0x20) != 0;
|
||||
|
||||
if (NoDelays)
|
||||
{
|
||||
FetchOpCode(Mme);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool Exit = (OpCode & 0x80) != 0;
|
||||
|
||||
return !Exit;
|
||||
}
|
||||
|
||||
private void FetchOpCode(int[] Mme)
|
||||
{
|
||||
OpCode = PipeOp;
|
||||
|
||||
PipeOp = Mme[Pc++];
|
||||
}
|
||||
|
||||
private int GetAluResult()
|
||||
{
|
||||
AluOperation Op = (AluOperation)(OpCode & 7);
|
||||
|
||||
switch (Op)
|
||||
{
|
||||
case AluOperation.AluReg:
|
||||
{
|
||||
AluRegOperation AluOp = (AluRegOperation)((OpCode >> 17) & 0x1f);
|
||||
|
||||
return GetAluResult(AluOp, GetGprA(), GetGprB());
|
||||
}
|
||||
|
||||
case AluOperation.AddImmediate:
|
||||
{
|
||||
return GetGprA() + GetImm();
|
||||
}
|
||||
|
||||
case AluOperation.BitfieldReplace:
|
||||
case AluOperation.BitfieldExtractLslImm:
|
||||
case AluOperation.BitfieldExtractLslReg:
|
||||
{
|
||||
int BfSrcBit = (OpCode >> 17) & 0x1f;
|
||||
int BfSize = (OpCode >> 22) & 0x1f;
|
||||
int BfDstBit = (OpCode >> 27) & 0x1f;
|
||||
|
||||
int BfMask = (1 << BfSize) - 1;
|
||||
|
||||
int Dst = GetGprA();
|
||||
int Src = GetGprB();
|
||||
|
||||
switch (Op)
|
||||
{
|
||||
case AluOperation.BitfieldReplace:
|
||||
{
|
||||
Src = (int)((uint)Src >> BfSrcBit) & BfMask;
|
||||
|
||||
Dst &= ~(BfMask << BfDstBit);
|
||||
|
||||
Dst |= Src << BfDstBit;
|
||||
|
||||
return Dst;
|
||||
}
|
||||
|
||||
case AluOperation.BitfieldExtractLslImm:
|
||||
{
|
||||
Src = (int)((uint)Src >> Dst) & BfMask;
|
||||
|
||||
return Src << BfDstBit;
|
||||
}
|
||||
|
||||
case AluOperation.BitfieldExtractLslReg:
|
||||
{
|
||||
Src = (int)((uint)Src >> BfSrcBit) & BfMask;
|
||||
|
||||
return Src << Dst;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case AluOperation.ReadImmediate:
|
||||
{
|
||||
return Read(GetGprA() + GetImm());
|
||||
}
|
||||
}
|
||||
|
||||
throw new ArgumentException(nameof(OpCode));
|
||||
}
|
||||
|
||||
private int GetAluResult(AluRegOperation AluOp, int A, int B)
|
||||
{
|
||||
switch (AluOp)
|
||||
{
|
||||
case AluRegOperation.Add:
|
||||
{
|
||||
ulong Result = (ulong)A + (ulong)B;
|
||||
|
||||
Carry = Result > 0xffffffff;
|
||||
|
||||
return (int)Result;
|
||||
}
|
||||
|
||||
case AluRegOperation.AddWithCarry:
|
||||
{
|
||||
ulong Result = (ulong)A + (ulong)B + (Carry ? 1UL : 0UL);
|
||||
|
||||
Carry = Result > 0xffffffff;
|
||||
|
||||
return (int)Result;
|
||||
}
|
||||
|
||||
case AluRegOperation.Subtract:
|
||||
{
|
||||
ulong Result = (ulong)A - (ulong)B;
|
||||
|
||||
Carry = Result < 0x100000000;
|
||||
|
||||
return (int)Result;
|
||||
}
|
||||
|
||||
case AluRegOperation.SubtractWithBorrow:
|
||||
{
|
||||
ulong Result = (ulong)A - (ulong)B - (Carry ? 0UL : 1UL);
|
||||
|
||||
Carry = Result < 0x100000000;
|
||||
|
||||
return (int)Result;
|
||||
}
|
||||
|
||||
case AluRegOperation.BitwiseExclusiveOr: return A ^ B;
|
||||
case AluRegOperation.BitwiseOr: return A | B;
|
||||
case AluRegOperation.BitwiseAnd: return A & B;
|
||||
case AluRegOperation.BitwiseAndNot: return A & ~B;
|
||||
case AluRegOperation.BitwiseNotAnd: return ~(A & B);
|
||||
}
|
||||
|
||||
throw new ArgumentOutOfRangeException(nameof(AluOp));
|
||||
}
|
||||
|
||||
private int GetImm()
|
||||
{
|
||||
//Note: The immediate is signed, the sign-extension is intended here.
|
||||
return OpCode >> 14;
|
||||
}
|
||||
|
||||
private void SetMethAddr(int Value)
|
||||
{
|
||||
MethAddr = (Value >> 0) & 0xfff;
|
||||
MethIncr = (Value >> 12) & 0x3f;
|
||||
}
|
||||
|
||||
private void SetDstGpr(int Value)
|
||||
{
|
||||
Gprs[(OpCode >> 8) & 7] = Value;
|
||||
}
|
||||
|
||||
private int GetGprA()
|
||||
{
|
||||
return GetGprValue((OpCode >> 11) & 7);
|
||||
}
|
||||
|
||||
private int GetGprB()
|
||||
{
|
||||
return GetGprValue((OpCode >> 14) & 7);
|
||||
}
|
||||
|
||||
private int GetGprValue(int Index)
|
||||
{
|
||||
return Index != 0 ? Gprs[Index] : 0;
|
||||
}
|
||||
|
||||
private int FetchParam()
|
||||
{
|
||||
int Value;
|
||||
|
||||
//If we don't have any parameters in the FIFO,
|
||||
//keep running the PFIFO engine until it writes the parameters.
|
||||
while (!Fifo.TryDequeue(out Value))
|
||||
{
|
||||
if (!PFifo.Step())
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return Value;
|
||||
}
|
||||
|
||||
private int Read(int Reg)
|
||||
{
|
||||
return Engine.Registers[Reg];
|
||||
}
|
||||
|
||||
private void Send(NvGpuVmm Vmm, int Value)
|
||||
{
|
||||
//This is an artificial limit that prevents excessive calls
|
||||
//to VertexEndGl since that triggers rendering, and in the
|
||||
//case that something is bugged and causes an absurd amount of
|
||||
//draw calls, this prevents the system from freezing (and throws instead).
|
||||
if (MethAddr == 0x585 && ++CallCount > MaxCallCountPerRun)
|
||||
{
|
||||
GpuExceptionHelper.ThrowCallCoundExceeded();
|
||||
}
|
||||
|
||||
NvGpuPBEntry PBEntry = new NvGpuPBEntry(MethAddr, 0, Value);
|
||||
|
||||
Engine.CallMethod(Vmm, PBEntry);
|
||||
|
||||
MethAddr += MethIncr;
|
||||
}
|
||||
}
|
||||
}
|
11
Ryujinx.HLE/Gpu/Engines/NvGpuEngine.cs
Normal file
11
Ryujinx.HLE/Gpu/Engines/NvGpuEngine.cs
Normal file
|
@ -0,0 +1,11 @@
|
|||
namespace Ryujinx.HLE.Gpu.Engines
|
||||
{
|
||||
enum NvGpuEngine
|
||||
{
|
||||
_2d = 0x902d,
|
||||
_3d = 0xb197,
|
||||
Compute = 0xb1c0,
|
||||
Kepler = 0xa140,
|
||||
Dma = 0xb0b5
|
||||
}
|
||||
}
|
170
Ryujinx.HLE/Gpu/Engines/NvGpuEngine2d.cs
Normal file
170
Ryujinx.HLE/Gpu/Engines/NvGpuEngine2d.cs
Normal file
|
@ -0,0 +1,170 @@
|
|||
using Ryujinx.Graphics.Gal;
|
||||
using Ryujinx.HLE.Gpu.Memory;
|
||||
using Ryujinx.HLE.Gpu.Texture;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.HLE.Gpu.Engines
|
||||
{
|
||||
class NvGpuEngine2d : INvGpuEngine
|
||||
{
|
||||
private enum CopyOperation
|
||||
{
|
||||
SrcCopyAnd,
|
||||
RopAnd,
|
||||
Blend,
|
||||
SrcCopy,
|
||||
Rop,
|
||||
SrcCopyPremult,
|
||||
BlendPremult
|
||||
}
|
||||
|
||||
public int[] Registers { get; private set; }
|
||||
|
||||
private NvGpu Gpu;
|
||||
|
||||
private Dictionary<int, NvGpuMethod> Methods;
|
||||
|
||||
public NvGpuEngine2d(NvGpu Gpu)
|
||||
{
|
||||
this.Gpu = Gpu;
|
||||
|
||||
Registers = new int[0xe00];
|
||||
|
||||
Methods = new Dictionary<int, NvGpuMethod>();
|
||||
|
||||
void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method)
|
||||
{
|
||||
while (Count-- > 0)
|
||||
{
|
||||
Methods.Add(Meth, Method);
|
||||
|
||||
Meth += Stride;
|
||||
}
|
||||
}
|
||||
|
||||
AddMethod(0xb5, 1, 1, TextureCopy);
|
||||
}
|
||||
|
||||
public void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
|
||||
{
|
||||
if (Methods.TryGetValue(PBEntry.Method, out NvGpuMethod Method))
|
||||
{
|
||||
Method(Vmm, PBEntry);
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteRegister(PBEntry);
|
||||
}
|
||||
}
|
||||
|
||||
private void TextureCopy(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
|
||||
{
|
||||
CopyOperation Operation = (CopyOperation)ReadRegister(NvGpuEngine2dReg.CopyOperation);
|
||||
|
||||
bool SrcLinear = ReadRegister(NvGpuEngine2dReg.SrcLinear) != 0;
|
||||
int SrcWidth = ReadRegister(NvGpuEngine2dReg.SrcWidth);
|
||||
int SrcHeight = ReadRegister(NvGpuEngine2dReg.SrcHeight);
|
||||
|
||||
bool DstLinear = ReadRegister(NvGpuEngine2dReg.DstLinear) != 0;
|
||||
int DstWidth = ReadRegister(NvGpuEngine2dReg.DstWidth);
|
||||
int DstHeight = ReadRegister(NvGpuEngine2dReg.DstHeight);
|
||||
int DstPitch = ReadRegister(NvGpuEngine2dReg.DstPitch);
|
||||
int DstBlkDim = ReadRegister(NvGpuEngine2dReg.DstBlockDimensions);
|
||||
|
||||
TextureSwizzle DstSwizzle = DstLinear
|
||||
? TextureSwizzle.Pitch
|
||||
: TextureSwizzle.BlockLinear;
|
||||
|
||||
int DstBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf);
|
||||
|
||||
long Key = Vmm.GetPhysicalAddress(MakeInt64From2xInt32(NvGpuEngine2dReg.SrcAddress));
|
||||
|
||||
long SrcAddress = MakeInt64From2xInt32(NvGpuEngine2dReg.SrcAddress);
|
||||
long DstAddress = MakeInt64From2xInt32(NvGpuEngine2dReg.DstAddress);
|
||||
|
||||
bool IsFbTexture = Gpu.Engine3d.IsFrameBufferPosition(Key);
|
||||
|
||||
if (IsFbTexture && DstLinear)
|
||||
{
|
||||
DstSwizzle = TextureSwizzle.BlockLinear;
|
||||
}
|
||||
|
||||
TextureInfo DstTexture = new TextureInfo(
|
||||
DstAddress,
|
||||
DstWidth,
|
||||
DstHeight,
|
||||
DstBlockHeight,
|
||||
DstBlockHeight,
|
||||
DstSwizzle,
|
||||
GalTextureFormat.A8B8G8R8);
|
||||
|
||||
if (IsFbTexture)
|
||||
{
|
||||
//TODO: Change this when the correct frame buffer resolution is used.
|
||||
//Currently, the frame buffer size is hardcoded to 1280x720.
|
||||
SrcWidth = 1280;
|
||||
SrcHeight = 720;
|
||||
|
||||
Gpu.Renderer.FrameBuffer.GetBufferData(Key, (byte[] Buffer) =>
|
||||
{
|
||||
CopyTexture(
|
||||
Vmm,
|
||||
DstTexture,
|
||||
Buffer,
|
||||
SrcWidth,
|
||||
SrcHeight);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
long Size = SrcWidth * SrcHeight * 4;
|
||||
|
||||
byte[] Buffer = Vmm.ReadBytes(SrcAddress, Size);
|
||||
|
||||
CopyTexture(
|
||||
Vmm,
|
||||
DstTexture,
|
||||
Buffer,
|
||||
SrcWidth,
|
||||
SrcHeight);
|
||||
}
|
||||
}
|
||||
|
||||
private void CopyTexture(
|
||||
NvGpuVmm Vmm,
|
||||
TextureInfo Texture,
|
||||
byte[] Buffer,
|
||||
int Width,
|
||||
int Height)
|
||||
{
|
||||
TextureWriter.Write(Vmm, Texture, Buffer, Width, Height);
|
||||
}
|
||||
|
||||
private long MakeInt64From2xInt32(NvGpuEngine2dReg Reg)
|
||||
{
|
||||
return
|
||||
(long)Registers[(int)Reg + 0] << 32 |
|
||||
(uint)Registers[(int)Reg + 1];
|
||||
}
|
||||
|
||||
private void WriteRegister(NvGpuPBEntry PBEntry)
|
||||
{
|
||||
int ArgsCount = PBEntry.Arguments.Count;
|
||||
|
||||
if (ArgsCount > 0)
|
||||
{
|
||||
Registers[PBEntry.Method] = PBEntry.Arguments[ArgsCount - 1];
|
||||
}
|
||||
}
|
||||
|
||||
private int ReadRegister(NvGpuEngine2dReg Reg)
|
||||
{
|
||||
return Registers[(int)Reg];
|
||||
}
|
||||
|
||||
private void WriteRegister(NvGpuEngine2dReg Reg, int Value)
|
||||
{
|
||||
Registers[(int)Reg] = Value;
|
||||
}
|
||||
}
|
||||
}
|
25
Ryujinx.HLE/Gpu/Engines/NvGpuEngine2dReg.cs
Normal file
25
Ryujinx.HLE/Gpu/Engines/NvGpuEngine2dReg.cs
Normal file
|
@ -0,0 +1,25 @@
|
|||
namespace Ryujinx.HLE.Gpu.Engines
|
||||
{
|
||||
enum NvGpuEngine2dReg
|
||||
{
|
||||
DstFormat = 0x80,
|
||||
DstLinear = 0x81,
|
||||
DstBlockDimensions = 0x82,
|
||||
DstDepth = 0x83,
|
||||
DstLayer = 0x84,
|
||||
DstPitch = 0x85,
|
||||
DstWidth = 0x86,
|
||||
DstHeight = 0x87,
|
||||
DstAddress = 0x88,
|
||||
SrcFormat = 0x8c,
|
||||
SrcLinear = 0x8d,
|
||||
SrcBlockDimensions = 0x8e,
|
||||
SrcDepth = 0x8f,
|
||||
SrcLayer = 0x90,
|
||||
SrcPitch = 0x91,
|
||||
SrcWidth = 0x92,
|
||||
SrcHeight = 0x93,
|
||||
SrcAddress = 0x94,
|
||||
CopyOperation = 0xab
|
||||
}
|
||||
}
|
556
Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs
Normal file
556
Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs
Normal file
|
@ -0,0 +1,556 @@
|
|||
using Ryujinx.Graphics.Gal;
|
||||
using Ryujinx.HLE.Gpu.Memory;
|
||||
using Ryujinx.HLE.Gpu.Texture;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.HLE.Gpu.Engines
|
||||
{
|
||||
class NvGpuEngine3d : INvGpuEngine
|
||||
{
|
||||
public int[] Registers { get; private set; }
|
||||
|
||||
private NvGpu Gpu;
|
||||
|
||||
private Dictionary<int, NvGpuMethod> Methods;
|
||||
|
||||
private struct ConstBuffer
|
||||
{
|
||||
public bool Enabled;
|
||||
public long Position;
|
||||
public int Size;
|
||||
}
|
||||
|
||||
private ConstBuffer[][] ConstBuffers;
|
||||
|
||||
private HashSet<long> FrameBuffers;
|
||||
|
||||
public NvGpuEngine3d(NvGpu Gpu)
|
||||
{
|
||||
this.Gpu = Gpu;
|
||||
|
||||
Registers = new int[0xe00];
|
||||
|
||||
Methods = new Dictionary<int, NvGpuMethod>();
|
||||
|
||||
void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method)
|
||||
{
|
||||
while (Count-- > 0)
|
||||
{
|
||||
Methods.Add(Meth, Method);
|
||||
|
||||
Meth += Stride;
|
||||
}
|
||||
}
|
||||
|
||||
AddMethod(0x585, 1, 1, VertexEndGl);
|
||||
AddMethod(0x674, 1, 1, ClearBuffers);
|
||||
AddMethod(0x6c3, 1, 1, QueryControl);
|
||||
AddMethod(0x8e4, 16, 1, CbData);
|
||||
AddMethod(0x904, 5, 8, CbBind);
|
||||
|
||||
ConstBuffers = new ConstBuffer[6][];
|
||||
|
||||
for (int Index = 0; Index < ConstBuffers.Length; Index++)
|
||||
{
|
||||
ConstBuffers[Index] = new ConstBuffer[18];
|
||||
}
|
||||
|
||||
FrameBuffers = new HashSet<long>();
|
||||
}
|
||||
|
||||
public void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
|
||||
{
|
||||
if (Methods.TryGetValue(PBEntry.Method, out NvGpuMethod Method))
|
||||
{
|
||||
Method(Vmm, PBEntry);
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteRegister(PBEntry);
|
||||
}
|
||||
}
|
||||
|
||||
private void VertexEndGl(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
|
||||
{
|
||||
SetFrameBuffer(Vmm, 0);
|
||||
|
||||
long[] Keys = UploadShaders(Vmm);
|
||||
|
||||
Gpu.Renderer.Shader.BindProgram();
|
||||
|
||||
SetAlphaBlending();
|
||||
|
||||
UploadTextures(Vmm, Keys);
|
||||
UploadUniforms(Vmm);
|
||||
UploadVertexArrays(Vmm);
|
||||
}
|
||||
|
||||
private void ClearBuffers(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
|
||||
{
|
||||
int Arg0 = PBEntry.Arguments[0];
|
||||
|
||||
int FbIndex = (Arg0 >> 6) & 0xf;
|
||||
|
||||
int Layer = (Arg0 >> 10) & 0x3ff;
|
||||
|
||||
GalClearBufferFlags Flags = (GalClearBufferFlags)(Arg0 & 0x3f);
|
||||
|
||||
SetFrameBuffer(Vmm, 0);
|
||||
|
||||
//TODO: Enable this once the frame buffer problems are fixed.
|
||||
//Gpu.Renderer.ClearBuffers(Layer, Flags);
|
||||
}
|
||||
|
||||
private void SetFrameBuffer(NvGpuVmm Vmm, int FbIndex)
|
||||
{
|
||||
long VA = MakeInt64From2xInt32(NvGpuEngine3dReg.FrameBufferNAddress + FbIndex * 0x10);
|
||||
|
||||
long PA = Vmm.GetPhysicalAddress(VA);
|
||||
|
||||
FrameBuffers.Add(PA);
|
||||
|
||||
int Width = ReadRegister(NvGpuEngine3dReg.FrameBufferNWidth + FbIndex * 0x10);
|
||||
int Height = ReadRegister(NvGpuEngine3dReg.FrameBufferNHeight + FbIndex * 0x10);
|
||||
|
||||
//Note: Using the Width/Height results seems to give incorrect results.
|
||||
//Maybe the size of all frame buffers is hardcoded to screen size? This seems unlikely.
|
||||
Gpu.Renderer.FrameBuffer.Create(PA, 1280, 720);
|
||||
Gpu.Renderer.FrameBuffer.Bind(PA);
|
||||
}
|
||||
|
||||
private long[] UploadShaders(NvGpuVmm Vmm)
|
||||
{
|
||||
long[] Keys = new long[5];
|
||||
|
||||
long BasePosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress);
|
||||
|
||||
for (int Index = 0; Index < 6; Index++)
|
||||
{
|
||||
int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + Index * 0x10);
|
||||
int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + Index * 0x10);
|
||||
|
||||
//Note: Vertex Program (B) is always enabled.
|
||||
bool Enable = (Control & 1) != 0 || Index == 1;
|
||||
|
||||
if (!Enable)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
long Key = BasePosition + (uint)Offset;
|
||||
|
||||
GalShaderType ShaderType = GetTypeFromProgram(Index);
|
||||
|
||||
Keys[(int)ShaderType] = Key;
|
||||
|
||||
Gpu.Renderer.Shader.Create(Vmm, Key, ShaderType);
|
||||
Gpu.Renderer.Shader.Bind(Key);
|
||||
}
|
||||
|
||||
int RawSX = ReadRegister(NvGpuEngine3dReg.ViewportScaleX);
|
||||
int RawSY = ReadRegister(NvGpuEngine3dReg.ViewportScaleY);
|
||||
|
||||
float SX = BitConverter.Int32BitsToSingle(RawSX);
|
||||
float SY = BitConverter.Int32BitsToSingle(RawSY);
|
||||
|
||||
float SignX = MathF.Sign(SX);
|
||||
float SignY = MathF.Sign(SY);
|
||||
|
||||
Gpu.Renderer.Shader.SetFlip(SignX, SignY);
|
||||
|
||||
return Keys;
|
||||
}
|
||||
|
||||
private static GalShaderType GetTypeFromProgram(int Program)
|
||||
{
|
||||
switch (Program)
|
||||
{
|
||||
case 0:
|
||||
case 1: return GalShaderType.Vertex;
|
||||
case 2: return GalShaderType.TessControl;
|
||||
case 3: return GalShaderType.TessEvaluation;
|
||||
case 4: return GalShaderType.Geometry;
|
||||
case 5: return GalShaderType.Fragment;
|
||||
}
|
||||
|
||||
throw new ArgumentOutOfRangeException(nameof(Program));
|
||||
}
|
||||
|
||||
private void SetAlphaBlending()
|
||||
{
|
||||
//TODO: Support independent blend properly.
|
||||
bool Enable = (ReadRegister(NvGpuEngine3dReg.IBlendNEnable) & 1) != 0;
|
||||
|
||||
if (Enable)
|
||||
{
|
||||
Gpu.Renderer.Blend.Enable();
|
||||
}
|
||||
else
|
||||
{
|
||||
Gpu.Renderer.Blend.Disable();
|
||||
}
|
||||
|
||||
if (!Enable)
|
||||
{
|
||||
//If blend is not enabled, then the other values have no effect.
|
||||
//Note that if it is disabled, the register may contain invalid values.
|
||||
return;
|
||||
}
|
||||
|
||||
bool BlendSeparateAlpha = (ReadRegister(NvGpuEngine3dReg.IBlendNSeparateAlpha) & 1) != 0;
|
||||
|
||||
GalBlendEquation EquationRgb = (GalBlendEquation)ReadRegister(NvGpuEngine3dReg.IBlendNEquationRgb);
|
||||
|
||||
GalBlendFactor FuncSrcRgb = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.IBlendNFuncSrcRgb);
|
||||
GalBlendFactor FuncDstRgb = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.IBlendNFuncDstRgb);
|
||||
|
||||
if (BlendSeparateAlpha)
|
||||
{
|
||||
GalBlendEquation EquationAlpha = (GalBlendEquation)ReadRegister(NvGpuEngine3dReg.IBlendNEquationAlpha);
|
||||
|
||||
GalBlendFactor FuncSrcAlpha = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.IBlendNFuncSrcAlpha);
|
||||
GalBlendFactor FuncDstAlpha = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.IBlendNFuncDstAlpha);
|
||||
|
||||
Gpu.Renderer.Blend.SetSeparate(
|
||||
EquationRgb,
|
||||
EquationAlpha,
|
||||
FuncSrcRgb,
|
||||
FuncDstRgb,
|
||||
FuncSrcAlpha,
|
||||
FuncDstAlpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
Gpu.Renderer.Blend.Set(EquationRgb, FuncSrcRgb, FuncDstRgb);
|
||||
}
|
||||
}
|
||||
|
||||
private void UploadTextures(NvGpuVmm Vmm, long[] Keys)
|
||||
{
|
||||
long BaseShPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress);
|
||||
|
||||
int TextureCbIndex = ReadRegister(NvGpuEngine3dReg.TextureCbIndex);
|
||||
|
||||
//Note: On the emulator renderer, Texture Unit 0 is
|
||||
//reserved for drawing the frame buffer.
|
||||
int TexIndex = 1;
|
||||
|
||||
for (int Index = 0; Index < Keys.Length; Index++)
|
||||
{
|
||||
foreach (ShaderDeclInfo DeclInfo in Gpu.Renderer.Shader.GetTextureUsage(Keys[Index]))
|
||||
{
|
||||
long Position = ConstBuffers[Index][TextureCbIndex].Position;
|
||||
|
||||
UploadTexture(Vmm, Position, TexIndex, DeclInfo.Index);
|
||||
|
||||
Gpu.Renderer.Shader.EnsureTextureBinding(DeclInfo.Name, TexIndex);
|
||||
|
||||
TexIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void UploadTexture(NvGpuVmm Vmm, long BasePosition, int TexIndex, int HndIndex)
|
||||
{
|
||||
long Position = BasePosition + HndIndex * 4;
|
||||
|
||||
int TextureHandle = Vmm.ReadInt32(Position);
|
||||
|
||||
if (TextureHandle == 0)
|
||||
{
|
||||
//TODO: Is this correct?
|
||||
//Some games like puyo puyo will have 0 handles.
|
||||
//It may be just normal behaviour or a bug caused by sync issues.
|
||||
//The game does initialize the value properly after through.
|
||||
return;
|
||||
}
|
||||
|
||||
int TicIndex = (TextureHandle >> 0) & 0xfffff;
|
||||
int TscIndex = (TextureHandle >> 20) & 0xfff;
|
||||
|
||||
long TicPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.TexHeaderPoolOffset);
|
||||
long TscPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.TexSamplerPoolOffset);
|
||||
|
||||
TicPosition += TicIndex * 0x20;
|
||||
TscPosition += TscIndex * 0x20;
|
||||
|
||||
GalTextureSampler Sampler = TextureFactory.MakeSampler(Gpu, Vmm, TscPosition);
|
||||
|
||||
long TextureAddress = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff;
|
||||
|
||||
long Key = TextureAddress;
|
||||
|
||||
TextureAddress = Vmm.GetPhysicalAddress(TextureAddress);
|
||||
|
||||
if (IsFrameBufferPosition(TextureAddress))
|
||||
{
|
||||
//This texture is a frame buffer texture,
|
||||
//we shouldn't read anything from memory and bind
|
||||
//the frame buffer texture instead, since we're not
|
||||
//really writing anything to memory.
|
||||
Gpu.Renderer.FrameBuffer.BindTexture(TextureAddress, TexIndex);
|
||||
}
|
||||
else
|
||||
{
|
||||
GalTexture NewTexture = TextureFactory.MakeTexture(Vmm, TicPosition);
|
||||
|
||||
long Size = (uint)TextureHelper.GetTextureSize(NewTexture);
|
||||
|
||||
bool HasCachedTexture = false;
|
||||
|
||||
if (Gpu.Renderer.Texture.TryGetCachedTexture(Key, Size, out GalTexture Texture))
|
||||
{
|
||||
if (NewTexture.Equals(Texture) && !Vmm.IsRegionModified(Key, Size, NvGpuBufferType.Texture))
|
||||
{
|
||||
Gpu.Renderer.Texture.Bind(Key, TexIndex);
|
||||
|
||||
HasCachedTexture = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!HasCachedTexture)
|
||||
{
|
||||
byte[] Data = TextureFactory.GetTextureData(Vmm, TicPosition);
|
||||
|
||||
Gpu.Renderer.Texture.Create(Key, Data, NewTexture);
|
||||
}
|
||||
|
||||
Gpu.Renderer.Texture.Bind(Key, TexIndex);
|
||||
}
|
||||
|
||||
Gpu.Renderer.Texture.SetSampler(Sampler);
|
||||
}
|
||||
|
||||
private void UploadUniforms(NvGpuVmm Vmm)
|
||||
{
|
||||
long BasePosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress);
|
||||
|
||||
for (int Index = 0; Index < 5; Index++)
|
||||
{
|
||||
int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + (Index + 1) * 0x10);
|
||||
int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + (Index + 1) * 0x10);
|
||||
|
||||
//Note: Vertex Program (B) is always enabled.
|
||||
bool Enable = (Control & 1) != 0 || Index == 0;
|
||||
|
||||
if (!Enable)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int Cbuf = 0; Cbuf < ConstBuffers[Index].Length; Cbuf++)
|
||||
{
|
||||
ConstBuffer Cb = ConstBuffers[Index][Cbuf];
|
||||
|
||||
if (Cb.Enabled)
|
||||
{
|
||||
byte[] Data = Vmm.ReadBytes(Cb.Position, (uint)Cb.Size);
|
||||
|
||||
Gpu.Renderer.Shader.SetConstBuffer(BasePosition + (uint)Offset, Cbuf, Data);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void UploadVertexArrays(NvGpuVmm Vmm)
|
||||
{
|
||||
long IndexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.IndexArrayAddress);
|
||||
|
||||
int IndexEntryFmt = ReadRegister(NvGpuEngine3dReg.IndexArrayFormat);
|
||||
int IndexFirst = ReadRegister(NvGpuEngine3dReg.IndexBatchFirst);
|
||||
int IndexCount = ReadRegister(NvGpuEngine3dReg.IndexBatchCount);
|
||||
|
||||
GalIndexFormat IndexFormat = (GalIndexFormat)IndexEntryFmt;
|
||||
|
||||
int IndexEntrySize = 1 << IndexEntryFmt;
|
||||
|
||||
if (IndexEntrySize > 4)
|
||||
{
|
||||
throw new InvalidOperationException();
|
||||
}
|
||||
|
||||
if (IndexCount != 0)
|
||||
{
|
||||
int IbSize = IndexCount * IndexEntrySize;
|
||||
|
||||
bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IndexPosition, (uint)IbSize);
|
||||
|
||||
if (!IboCached || Vmm.IsRegionModified(IndexPosition, (uint)IbSize, NvGpuBufferType.Index))
|
||||
{
|
||||
byte[] Data = Vmm.ReadBytes(IndexPosition, (uint)IbSize);
|
||||
|
||||
Gpu.Renderer.Rasterizer.CreateIbo(IndexPosition, Data);
|
||||
}
|
||||
|
||||
Gpu.Renderer.Rasterizer.SetIndexArray(IndexPosition, IbSize, IndexFormat);
|
||||
}
|
||||
|
||||
List<GalVertexAttrib>[] Attribs = new List<GalVertexAttrib>[32];
|
||||
|
||||
for (int Attr = 0; Attr < 16; Attr++)
|
||||
{
|
||||
int Packed = ReadRegister(NvGpuEngine3dReg.VertexAttribNFormat + Attr);
|
||||
|
||||
int ArrayIndex = Packed & 0x1f;
|
||||
|
||||
if (Attribs[ArrayIndex] == null)
|
||||
{
|
||||
Attribs[ArrayIndex] = new List<GalVertexAttrib>();
|
||||
}
|
||||
|
||||
Attribs[ArrayIndex].Add(new GalVertexAttrib(
|
||||
Attr,
|
||||
((Packed >> 6) & 0x1) != 0,
|
||||
(Packed >> 7) & 0x3fff,
|
||||
(GalVertexAttribSize)((Packed >> 21) & 0x3f),
|
||||
(GalVertexAttribType)((Packed >> 27) & 0x7),
|
||||
((Packed >> 31) & 0x1) != 0));
|
||||
}
|
||||
|
||||
int VertexFirst = ReadRegister(NvGpuEngine3dReg.VertexArrayFirst);
|
||||
int VertexCount = ReadRegister(NvGpuEngine3dReg.VertexArrayCount);
|
||||
|
||||
int PrimCtrl = ReadRegister(NvGpuEngine3dReg.VertexBeginGl);
|
||||
|
||||
for (int Index = 0; Index < 32; Index++)
|
||||
{
|
||||
if (Attribs[Index] == null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
int Control = ReadRegister(NvGpuEngine3dReg.VertexArrayNControl + Index * 4);
|
||||
|
||||
bool Enable = (Control & 0x1000) != 0;
|
||||
|
||||
long VertexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNAddress + Index * 4);
|
||||
long VertexEndPos = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNEndAddr + Index * 2);
|
||||
|
||||
if (!Enable)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
int Stride = Control & 0xfff;
|
||||
|
||||
long VbSize = 0;
|
||||
|
||||
if (IndexCount != 0)
|
||||
{
|
||||
VbSize = (VertexEndPos - VertexPosition) + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
VbSize = VertexCount * Stride;
|
||||
}
|
||||
|
||||
bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VertexPosition, VbSize);
|
||||
|
||||
if (!VboCached || Vmm.IsRegionModified(VertexPosition, VbSize, NvGpuBufferType.Vertex))
|
||||
{
|
||||
byte[] Data = Vmm.ReadBytes(VertexPosition, VbSize);
|
||||
|
||||
Gpu.Renderer.Rasterizer.CreateVbo(VertexPosition, Data);
|
||||
}
|
||||
|
||||
Gpu.Renderer.Rasterizer.SetVertexArray(Index, Stride, VertexPosition, Attribs[Index].ToArray());
|
||||
}
|
||||
|
||||
GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff);
|
||||
|
||||
if (IndexCount != 0)
|
||||
{
|
||||
Gpu.Renderer.Rasterizer.DrawElements(IndexPosition, IndexFirst, PrimType);
|
||||
}
|
||||
else
|
||||
{
|
||||
Gpu.Renderer.Rasterizer.DrawArrays(VertexFirst, VertexCount, PrimType);
|
||||
}
|
||||
}
|
||||
|
||||
private void QueryControl(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
|
||||
{
|
||||
long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.QueryAddress);
|
||||
|
||||
int Seq = Registers[(int)NvGpuEngine3dReg.QuerySequence];
|
||||
int Ctrl = Registers[(int)NvGpuEngine3dReg.QueryControl];
|
||||
|
||||
int Mode = Ctrl & 3;
|
||||
|
||||
if (Mode == 0)
|
||||
{
|
||||
//Write mode.
|
||||
Vmm.WriteInt32(Position, Seq);
|
||||
}
|
||||
|
||||
WriteRegister(PBEntry);
|
||||
}
|
||||
|
||||
private void CbData(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
|
||||
{
|
||||
long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.ConstBufferAddress);
|
||||
|
||||
int Offset = ReadRegister(NvGpuEngine3dReg.ConstBufferOffset);
|
||||
|
||||
foreach (int Arg in PBEntry.Arguments)
|
||||
{
|
||||
Vmm.WriteInt32(Position + Offset, Arg);
|
||||
|
||||
Offset += 4;
|
||||
}
|
||||
|
||||
WriteRegister(NvGpuEngine3dReg.ConstBufferOffset, Offset);
|
||||
}
|
||||
|
||||
private void CbBind(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
|
||||
{
|
||||
int Stage = (PBEntry.Method - 0x904) >> 3;
|
||||
|
||||
int Index = PBEntry.Arguments[0];
|
||||
|
||||
bool Enabled = (Index & 1) != 0;
|
||||
|
||||
Index = (Index >> 4) & 0x1f;
|
||||
|
||||
long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.ConstBufferAddress);
|
||||
|
||||
ConstBuffers[Stage][Index].Position = Position;
|
||||
ConstBuffers[Stage][Index].Enabled = Enabled;
|
||||
|
||||
ConstBuffers[Stage][Index].Size = ReadRegister(NvGpuEngine3dReg.ConstBufferSize);
|
||||
}
|
||||
|
||||
private long MakeInt64From2xInt32(NvGpuEngine3dReg Reg)
|
||||
{
|
||||
return
|
||||
(long)Registers[(int)Reg + 0] << 32 |
|
||||
(uint)Registers[(int)Reg + 1];
|
||||
}
|
||||
|
||||
private void WriteRegister(NvGpuPBEntry PBEntry)
|
||||
{
|
||||
int ArgsCount = PBEntry.Arguments.Count;
|
||||
|
||||
if (ArgsCount > 0)
|
||||
{
|
||||
Registers[PBEntry.Method] = PBEntry.Arguments[ArgsCount - 1];
|
||||
}
|
||||
}
|
||||
|
||||
private int ReadRegister(NvGpuEngine3dReg Reg)
|
||||
{
|
||||
return Registers[(int)Reg];
|
||||
}
|
||||
|
||||
private void WriteRegister(NvGpuEngine3dReg Reg, int Value)
|
||||
{
|
||||
Registers[(int)Reg] = Value;
|
||||
}
|
||||
|
||||
public bool IsFrameBufferPosition(long Position)
|
||||
{
|
||||
return FrameBuffers.Contains(Position);
|
||||
}
|
||||
}
|
||||
}
|
61
Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs
Normal file
61
Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs
Normal file
|
@ -0,0 +1,61 @@
|
|||
namespace Ryujinx.HLE.Gpu.Engines
|
||||
{
|
||||
enum NvGpuEngine3dReg
|
||||
{
|
||||
FrameBufferNAddress = 0x200,
|
||||
FrameBufferNWidth = 0x202,
|
||||
FrameBufferNHeight = 0x203,
|
||||
FrameBufferNFormat = 0x204,
|
||||
ViewportScaleX = 0x280,
|
||||
ViewportScaleY = 0x281,
|
||||
ViewportScaleZ = 0x282,
|
||||
ViewportTranslateX = 0x283,
|
||||
ViewportTranslateY = 0x284,
|
||||
ViewportTranslateZ = 0x285,
|
||||
VertexArrayFirst = 0x35d,
|
||||
VertexArrayCount = 0x35e,
|
||||
VertexAttribNFormat = 0x458,
|
||||
IBlendEnable = 0x4b9,
|
||||
BlendSeparateAlpha = 0x4cf,
|
||||
BlendEquationRgb = 0x4d0,
|
||||
BlendFuncSrcRgb = 0x4d1,
|
||||
BlendFuncDstRgb = 0x4d2,
|
||||
BlendEquationAlpha = 0x4d3,
|
||||
BlendFuncSrcAlpha = 0x4d4,
|
||||
BlendFuncDstAlpha = 0x4d6,
|
||||
BlendEnableMaster = 0x4d7,
|
||||
IBlendNEnable = 0x4d8,
|
||||
VertexArrayElemBase = 0x50d,
|
||||
TexHeaderPoolOffset = 0x55d,
|
||||
TexSamplerPoolOffset = 0x557,
|
||||
ShaderAddress = 0x582,
|
||||
VertexBeginGl = 0x586,
|
||||
IndexArrayAddress = 0x5f2,
|
||||
IndexArrayEndAddr = 0x5f4,
|
||||
IndexArrayFormat = 0x5f6,
|
||||
IndexBatchFirst = 0x5f7,
|
||||
IndexBatchCount = 0x5f8,
|
||||
QueryAddress = 0x6c0,
|
||||
QuerySequence = 0x6c2,
|
||||
QueryControl = 0x6c3,
|
||||
VertexArrayNControl = 0x700,
|
||||
VertexArrayNAddress = 0x701,
|
||||
VertexArrayNDivisor = 0x703,
|
||||
IBlendNSeparateAlpha = 0x780,
|
||||
IBlendNEquationRgb = 0x781,
|
||||
IBlendNFuncSrcRgb = 0x782,
|
||||
IBlendNFuncDstRgb = 0x783,
|
||||
IBlendNEquationAlpha = 0x784,
|
||||
IBlendNFuncSrcAlpha = 0x785,
|
||||
IBlendNFuncDstAlpha = 0x786,
|
||||
VertexArrayNEndAddr = 0x7c0,
|
||||
ShaderNControl = 0x800,
|
||||
ShaderNOffset = 0x801,
|
||||
ShaderNMaxGprs = 0x803,
|
||||
ShaderNType = 0x804,
|
||||
ConstBufferSize = 0x8e0,
|
||||
ConstBufferAddress = 0x8e1,
|
||||
ConstBufferOffset = 0x8e3,
|
||||
TextureCbIndex = 0x982
|
||||
}
|
||||
}
|
143
Ryujinx.HLE/Gpu/Engines/NvGpuEngineDma.cs
Normal file
143
Ryujinx.HLE/Gpu/Engines/NvGpuEngineDma.cs
Normal file
|
@ -0,0 +1,143 @@
|
|||
using Ryujinx.HLE.Gpu.Memory;
|
||||
using Ryujinx.HLE.Gpu.Texture;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.HLE.Gpu.Engines
|
||||
{
|
||||
class NvGpuEngineDma : INvGpuEngine
|
||||
{
|
||||
public int[] Registers { get; private set; }
|
||||
|
||||
private NvGpu Gpu;
|
||||
|
||||
private Dictionary<int, NvGpuMethod> Methods;
|
||||
|
||||
public NvGpuEngineDma(NvGpu Gpu)
|
||||
{
|
||||
this.Gpu = Gpu;
|
||||
|
||||
Registers = new int[0x1d6];
|
||||
|
||||
Methods = new Dictionary<int, NvGpuMethod>();
|
||||
|
||||
void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method)
|
||||
{
|
||||
while (Count-- > 0)
|
||||
{
|
||||
Methods.Add(Meth, Method);
|
||||
|
||||
Meth += Stride;
|
||||
}
|
||||
}
|
||||
|
||||
AddMethod(0xc0, 1, 1, Execute);
|
||||
}
|
||||
|
||||
public void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
|
||||
{
|
||||
if (Methods.TryGetValue(PBEntry.Method, out NvGpuMethod Method))
|
||||
{
|
||||
Method(Vmm, PBEntry);
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteRegister(PBEntry);
|
||||
}
|
||||
}
|
||||
|
||||
private void Execute(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
|
||||
{
|
||||
int Control = PBEntry.Arguments[0];
|
||||
|
||||
bool SrcLinear = ((Control >> 7) & 1) != 0;
|
||||
bool DstLinear = ((Control >> 8) & 1) != 0;
|
||||
|
||||
long SrcAddress = MakeInt64From2xInt32(NvGpuEngineDmaReg.SrcAddress);
|
||||
long DstAddress = MakeInt64From2xInt32(NvGpuEngineDmaReg.DstAddress);
|
||||
|
||||
int SrcPitch = ReadRegister(NvGpuEngineDmaReg.SrcPitch);
|
||||
int DstPitch = ReadRegister(NvGpuEngineDmaReg.DstPitch);
|
||||
|
||||
int DstBlkDim = ReadRegister(NvGpuEngineDmaReg.DstBlkDim);
|
||||
int DstSizeX = ReadRegister(NvGpuEngineDmaReg.DstSizeX);
|
||||
int DstSizeY = ReadRegister(NvGpuEngineDmaReg.DstSizeY);
|
||||
int DstSizeZ = ReadRegister(NvGpuEngineDmaReg.DstSizeZ);
|
||||
int DstPosXY = ReadRegister(NvGpuEngineDmaReg.DstPosXY);
|
||||
int DstPosZ = ReadRegister(NvGpuEngineDmaReg.DstPosZ);
|
||||
|
||||
int SrcBlkDim = ReadRegister(NvGpuEngineDmaReg.SrcBlkDim);
|
||||
int SrcSizeX = ReadRegister(NvGpuEngineDmaReg.SrcSizeX);
|
||||
int SrcSizeY = ReadRegister(NvGpuEngineDmaReg.SrcSizeY);
|
||||
int SrcSizeZ = ReadRegister(NvGpuEngineDmaReg.SrcSizeZ);
|
||||
int SrcPosXY = ReadRegister(NvGpuEngineDmaReg.SrcPosXY);
|
||||
int SrcPosZ = ReadRegister(NvGpuEngineDmaReg.SrcPosZ);
|
||||
|
||||
int DstPosX = (DstPosXY >> 0) & 0xffff;
|
||||
int DstPosY = (DstPosXY >> 16) & 0xffff;
|
||||
|
||||
int SrcPosX = (SrcPosXY >> 0) & 0xffff;
|
||||
int SrcPosY = (SrcPosXY >> 16) & 0xffff;
|
||||
|
||||
int SrcBlockHeight = 1 << ((SrcBlkDim >> 4) & 0xf);
|
||||
int DstBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf);
|
||||
|
||||
ISwizzle SrcSwizzle;
|
||||
|
||||
if (SrcLinear)
|
||||
{
|
||||
SrcSwizzle = new LinearSwizzle(SrcPitch, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
SrcSwizzle = new BlockLinearSwizzle(SrcSizeX, 1, SrcBlockHeight);
|
||||
}
|
||||
|
||||
ISwizzle DstSwizzle;
|
||||
|
||||
if (DstLinear)
|
||||
{
|
||||
DstSwizzle = new LinearSwizzle(DstPitch, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
DstSwizzle = new BlockLinearSwizzle(DstSizeX, 1, DstBlockHeight);
|
||||
}
|
||||
|
||||
for (int Y = 0; Y < DstSizeY; Y++)
|
||||
for (int X = 0; X < DstSizeX; X++)
|
||||
{
|
||||
long SrcOffset = SrcAddress + (uint)SrcSwizzle.GetSwizzleOffset(X, Y);
|
||||
long DstOffset = DstAddress + (uint)DstSwizzle.GetSwizzleOffset(X, Y);
|
||||
|
||||
Vmm.WriteByte(DstOffset, Vmm.ReadByte(SrcOffset));
|
||||
}
|
||||
}
|
||||
|
||||
private long MakeInt64From2xInt32(NvGpuEngineDmaReg Reg)
|
||||
{
|
||||
return
|
||||
(long)Registers[(int)Reg + 0] << 32 |
|
||||
(uint)Registers[(int)Reg + 1];
|
||||
}
|
||||
|
||||
private void WriteRegister(NvGpuPBEntry PBEntry)
|
||||
{
|
||||
int ArgsCount = PBEntry.Arguments.Count;
|
||||
|
||||
if (ArgsCount > 0)
|
||||
{
|
||||
Registers[PBEntry.Method] = PBEntry.Arguments[ArgsCount - 1];
|
||||
}
|
||||
}
|
||||
|
||||
private int ReadRegister(NvGpuEngineDmaReg Reg)
|
||||
{
|
||||
return Registers[(int)Reg];
|
||||
}
|
||||
|
||||
private void WriteRegister(NvGpuEngineDmaReg Reg, int Value)
|
||||
{
|
||||
Registers[(int)Reg] = Value;
|
||||
}
|
||||
}
|
||||
}
|
22
Ryujinx.HLE/Gpu/Engines/NvGpuEngineDmaReg.cs
Normal file
22
Ryujinx.HLE/Gpu/Engines/NvGpuEngineDmaReg.cs
Normal file
|
@ -0,0 +1,22 @@
|
|||
namespace Ryujinx.HLE.Gpu.Engines
|
||||
{
|
||||
enum NvGpuEngineDmaReg
|
||||
{
|
||||
SrcAddress = 0x100,
|
||||
DstAddress = 0x102,
|
||||
SrcPitch = 0x104,
|
||||
DstPitch = 0x105,
|
||||
DstBlkDim = 0x1c3,
|
||||
DstSizeX = 0x1c4,
|
||||
DstSizeY = 0x1c5,
|
||||
DstSizeZ = 0x1c6,
|
||||
DstPosZ = 0x1c7,
|
||||
DstPosXY = 0x1c8,
|
||||
SrcBlkDim = 0x1ca,
|
||||
SrcSizeX = 0x1cb,
|
||||
SrcSizeY = 0x1cc,
|
||||
SrcSizeZ = 0x1cd,
|
||||
SrcPosZ = 0x1ce,
|
||||
SrcPosXY = 0x1cf
|
||||
}
|
||||
}
|
183
Ryujinx.HLE/Gpu/Engines/NvGpuFifo.cs
Normal file
183
Ryujinx.HLE/Gpu/Engines/NvGpuFifo.cs
Normal file
|
@ -0,0 +1,183 @@
|
|||
using Ryujinx.HLE.Gpu.Memory;
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
namespace Ryujinx.HLE.Gpu.Engines
|
||||
{
|
||||
class NvGpuFifo
|
||||
{
|
||||
private const int MacrosCount = 0x80;
|
||||
private const int MacroIndexMask = MacrosCount - 1;
|
||||
|
||||
//Note: The size of the macro memory is unknown, we just make
|
||||
//a guess here and use 256kb as the size. Increase if needed.
|
||||
private const int MmeWords = 256 * 256;
|
||||
|
||||
private NvGpu Gpu;
|
||||
|
||||
private ConcurrentQueue<(NvGpuVmm, NvGpuPBEntry)> BufferQueue;
|
||||
|
||||
private NvGpuEngine[] SubChannels;
|
||||
|
||||
private struct CachedMacro
|
||||
{
|
||||
public int Position { get; private set; }
|
||||
|
||||
private MacroInterpreter Interpreter;
|
||||
|
||||
public CachedMacro(NvGpuFifo PFifo, INvGpuEngine Engine, int Position)
|
||||
{
|
||||
this.Position = Position;
|
||||
|
||||
Interpreter = new MacroInterpreter(PFifo, Engine);
|
||||
}
|
||||
|
||||
public void PushParam(int Param)
|
||||
{
|
||||
Interpreter?.Fifo.Enqueue(Param);
|
||||
}
|
||||
|
||||
public void Execute(NvGpuVmm Vmm, int[] Mme, int Param)
|
||||
{
|
||||
Interpreter?.Execute(Vmm, Mme, Position, Param);
|
||||
}
|
||||
}
|
||||
|
||||
private int CurrMacroPosition;
|
||||
private int CurrMacroBindIndex;
|
||||
|
||||
private CachedMacro[] Macros;
|
||||
|
||||
private int[] Mme;
|
||||
|
||||
public NvGpuFifo(NvGpu Gpu)
|
||||
{
|
||||
this.Gpu = Gpu;
|
||||
|
||||
BufferQueue = new ConcurrentQueue<(NvGpuVmm, NvGpuPBEntry)>();
|
||||
|
||||
SubChannels = new NvGpuEngine[8];
|
||||
|
||||
Macros = new CachedMacro[MacrosCount];
|
||||
|
||||
Mme = new int[MmeWords];
|
||||
}
|
||||
|
||||
public void PushBuffer(NvGpuVmm Vmm, NvGpuPBEntry[] Buffer)
|
||||
{
|
||||
foreach (NvGpuPBEntry PBEntry in Buffer)
|
||||
{
|
||||
BufferQueue.Enqueue((Vmm, PBEntry));
|
||||
}
|
||||
}
|
||||
|
||||
public void DispatchCalls()
|
||||
{
|
||||
while (Step());
|
||||
}
|
||||
|
||||
public bool Step()
|
||||
{
|
||||
if (BufferQueue.TryDequeue(out (NvGpuVmm Vmm, NvGpuPBEntry PBEntry) Tuple))
|
||||
{
|
||||
CallMethod(Tuple.Vmm, Tuple.PBEntry);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
|
||||
{
|
||||
if (PBEntry.Method < 0x80)
|
||||
{
|
||||
switch ((NvGpuFifoMeth)PBEntry.Method)
|
||||
{
|
||||
case NvGpuFifoMeth.BindChannel:
|
||||
{
|
||||
NvGpuEngine Engine = (NvGpuEngine)PBEntry.Arguments[0];
|
||||
|
||||
SubChannels[PBEntry.SubChannel] = Engine;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case NvGpuFifoMeth.SetMacroUploadAddress:
|
||||
{
|
||||
CurrMacroPosition = PBEntry.Arguments[0];
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case NvGpuFifoMeth.SendMacroCodeData:
|
||||
{
|
||||
foreach (int Arg in PBEntry.Arguments)
|
||||
{
|
||||
Mme[CurrMacroPosition++] = Arg;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case NvGpuFifoMeth.SetMacroBindingIndex:
|
||||
{
|
||||
CurrMacroBindIndex = PBEntry.Arguments[0];
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case NvGpuFifoMeth.BindMacro:
|
||||
{
|
||||
int Position = PBEntry.Arguments[0];
|
||||
|
||||
Macros[CurrMacroBindIndex] = new CachedMacro(this, Gpu.Engine3d, Position);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (SubChannels[PBEntry.SubChannel])
|
||||
{
|
||||
case NvGpuEngine._2d: Call2dMethod (Vmm, PBEntry); break;
|
||||
case NvGpuEngine._3d: Call3dMethod (Vmm, PBEntry); break;
|
||||
case NvGpuEngine.Dma: CallDmaMethod(Vmm, PBEntry); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void Call2dMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
|
||||
{
|
||||
Gpu.Engine2d.CallMethod(Vmm, PBEntry);
|
||||
}
|
||||
|
||||
private void Call3dMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
|
||||
{
|
||||
if (PBEntry.Method < 0xe00)
|
||||
{
|
||||
Gpu.Engine3d.CallMethod(Vmm, PBEntry);
|
||||
}
|
||||
else
|
||||
{
|
||||
int MacroIndex = (PBEntry.Method >> 1) & MacroIndexMask;
|
||||
|
||||
if ((PBEntry.Method & 1) != 0)
|
||||
{
|
||||
foreach (int Arg in PBEntry.Arguments)
|
||||
{
|
||||
Macros[MacroIndex].PushParam(Arg);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Macros[MacroIndex].Execute(Vmm, Mme, PBEntry.Arguments[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void CallDmaMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
|
||||
{
|
||||
Gpu.EngineDma.CallMethod(Vmm, PBEntry);
|
||||
}
|
||||
}
|
||||
}
|
11
Ryujinx.HLE/Gpu/Engines/NvGpuFifoMeth.cs
Normal file
11
Ryujinx.HLE/Gpu/Engines/NvGpuFifoMeth.cs
Normal file
|
@ -0,0 +1,11 @@
|
|||
namespace Ryujinx.HLE.Gpu.Engines
|
||||
{
|
||||
enum NvGpuFifoMeth
|
||||
{
|
||||
BindChannel = 0,
|
||||
SetMacroUploadAddress = 0x45,
|
||||
SendMacroCodeData = 0x46,
|
||||
SetMacroBindingIndex = 0x47,
|
||||
BindMacro = 0x48
|
||||
}
|
||||
}
|
6
Ryujinx.HLE/Gpu/Engines/NvGpuMethod.cs
Normal file
6
Ryujinx.HLE/Gpu/Engines/NvGpuMethod.cs
Normal file
|
@ -0,0 +1,6 @@
|
|||
using Ryujinx.HLE.Gpu.Memory;
|
||||
|
||||
namespace Ryujinx.HLE.Gpu.Engines
|
||||
{
|
||||
delegate void NvGpuMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue