Small OpenGL Renderer refactoring (#177)

* Call OpenGL functions directly, remove the pfifo thread, some refactoring

* Fix PerformanceStatistics calculating the wrong host fps, remove wait event on PFIFO as this wasn't exactly was causing the freezes (may replace with an exception later)

* Organized the Gpu folder a bit more, renamed a few things, address PR feedback

* Make PerformanceStatistics thread safe

* Remove unused constant

* Use unlimited update rate for better pref
This commit is contained in:
gdkchan 2018-06-23 21:39:25 -03:00 committed by GitHub
parent 69697957e6
commit e7559f128f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
58 changed files with 518 additions and 633 deletions

View file

@ -0,0 +1,11 @@
using Ryujinx.HLE.Gpu.Memory;
namespace Ryujinx.HLE.Gpu.Engines
{
interface INvGpuEngine
{
int[] Registers { get; }
void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry);
}
}

View file

@ -0,0 +1,434 @@
using Ryujinx.HLE.Gpu.Exceptions;
using Ryujinx.HLE.Gpu.Memory;
using System;
using System.Collections.Generic;
namespace Ryujinx.HLE.Gpu.Engines
{
class MacroInterpreter
{
private const int MaxCallCountPerRun = 500;
private int CallCount;
private enum AssignmentOperation
{
IgnoreAndFetch = 0,
Move = 1,
MoveAndSetMaddr = 2,
FetchAndSend = 3,
MoveAndSend = 4,
FetchAndSetMaddr = 5,
MoveAndSetMaddrThenFetchAndSend = 6,
MoveAndSetMaddrThenSendHigh = 7
}
private enum AluOperation
{
AluReg = 0,
AddImmediate = 1,
BitfieldReplace = 2,
BitfieldExtractLslImm = 3,
BitfieldExtractLslReg = 4,
ReadImmediate = 5
}
private enum AluRegOperation
{
Add = 0,
AddWithCarry = 1,
Subtract = 2,
SubtractWithBorrow = 3,
BitwiseExclusiveOr = 8,
BitwiseOr = 9,
BitwiseAnd = 10,
BitwiseAndNot = 11,
BitwiseNotAnd = 12
}
private NvGpuFifo PFifo;
private INvGpuEngine Engine;
public Queue<int> Fifo { get; private set; }
private int[] Gprs;
private int MethAddr;
private int MethIncr;
private bool Carry;
private int OpCode;
private int PipeOp;
private int Pc;
public MacroInterpreter(NvGpuFifo PFifo, INvGpuEngine Engine)
{
this.PFifo = PFifo;
this.Engine = Engine;
Fifo = new Queue<int>();
Gprs = new int[8];
}
public void Execute(NvGpuVmm Vmm, int[] Mme, int Position, int Param)
{
Reset();
Gprs[1] = Param;
Pc = Position;
FetchOpCode(Mme);
while (Step(Vmm, Mme));
//Due to the delay slot, we still need to execute
//one more instruction before we actually exit.
Step(Vmm, Mme);
}
private void Reset()
{
for (int Index = 0; Index < Gprs.Length; Index++)
{
Gprs[Index] = 0;
}
MethAddr = 0;
MethIncr = 0;
Carry = false;
CallCount = 0;
}
private bool Step(NvGpuVmm Vmm, int[] Mme)
{
int BaseAddr = Pc - 1;
FetchOpCode(Mme);
if ((OpCode & 7) < 7)
{
//Operation produces a value.
AssignmentOperation AsgOp = (AssignmentOperation)((OpCode >> 4) & 7);
int Result = GetAluResult();
switch (AsgOp)
{
//Fetch parameter and ignore result.
case AssignmentOperation.IgnoreAndFetch:
{
SetDstGpr(FetchParam());
break;
}
//Move result.
case AssignmentOperation.Move:
{
SetDstGpr(Result);
break;
}
//Move result and use as Method Address.
case AssignmentOperation.MoveAndSetMaddr:
{
SetDstGpr(Result);
SetMethAddr(Result);
break;
}
//Fetch parameter and send result.
case AssignmentOperation.FetchAndSend:
{
SetDstGpr(FetchParam());
Send(Vmm, Result);
break;
}
//Move and send result.
case AssignmentOperation.MoveAndSend:
{
SetDstGpr(Result);
Send(Vmm, Result);
break;
}
//Fetch parameter and use result as Method Address.
case AssignmentOperation.FetchAndSetMaddr:
{
SetDstGpr(FetchParam());
SetMethAddr(Result);
break;
}
//Move result and use as Method Address, then fetch and send paramter.
case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend:
{
SetDstGpr(Result);
SetMethAddr(Result);
Send(Vmm, FetchParam());
break;
}
//Move result and use as Method Address, then send bits 17:12 of result.
case AssignmentOperation.MoveAndSetMaddrThenSendHigh:
{
SetDstGpr(Result);
SetMethAddr(Result);
Send(Vmm, (Result >> 12) & 0x3f);
break;
}
}
}
else
{
//Branch.
bool OnNotZero = ((OpCode >> 4) & 1) != 0;
bool Taken = OnNotZero
? GetGprA() != 0
: GetGprA() == 0;
if (Taken)
{
Pc = BaseAddr + GetImm();
bool NoDelays = (OpCode & 0x20) != 0;
if (NoDelays)
{
FetchOpCode(Mme);
}
return true;
}
}
bool Exit = (OpCode & 0x80) != 0;
return !Exit;
}
private void FetchOpCode(int[] Mme)
{
OpCode = PipeOp;
PipeOp = Mme[Pc++];
}
private int GetAluResult()
{
AluOperation Op = (AluOperation)(OpCode & 7);
switch (Op)
{
case AluOperation.AluReg:
{
AluRegOperation AluOp = (AluRegOperation)((OpCode >> 17) & 0x1f);
return GetAluResult(AluOp, GetGprA(), GetGprB());
}
case AluOperation.AddImmediate:
{
return GetGprA() + GetImm();
}
case AluOperation.BitfieldReplace:
case AluOperation.BitfieldExtractLslImm:
case AluOperation.BitfieldExtractLslReg:
{
int BfSrcBit = (OpCode >> 17) & 0x1f;
int BfSize = (OpCode >> 22) & 0x1f;
int BfDstBit = (OpCode >> 27) & 0x1f;
int BfMask = (1 << BfSize) - 1;
int Dst = GetGprA();
int Src = GetGprB();
switch (Op)
{
case AluOperation.BitfieldReplace:
{
Src = (int)((uint)Src >> BfSrcBit) & BfMask;
Dst &= ~(BfMask << BfDstBit);
Dst |= Src << BfDstBit;
return Dst;
}
case AluOperation.BitfieldExtractLslImm:
{
Src = (int)((uint)Src >> Dst) & BfMask;
return Src << BfDstBit;
}
case AluOperation.BitfieldExtractLslReg:
{
Src = (int)((uint)Src >> BfSrcBit) & BfMask;
return Src << Dst;
}
}
break;
}
case AluOperation.ReadImmediate:
{
return Read(GetGprA() + GetImm());
}
}
throw new ArgumentException(nameof(OpCode));
}
private int GetAluResult(AluRegOperation AluOp, int A, int B)
{
switch (AluOp)
{
case AluRegOperation.Add:
{
ulong Result = (ulong)A + (ulong)B;
Carry = Result > 0xffffffff;
return (int)Result;
}
case AluRegOperation.AddWithCarry:
{
ulong Result = (ulong)A + (ulong)B + (Carry ? 1UL : 0UL);
Carry = Result > 0xffffffff;
return (int)Result;
}
case AluRegOperation.Subtract:
{
ulong Result = (ulong)A - (ulong)B;
Carry = Result < 0x100000000;
return (int)Result;
}
case AluRegOperation.SubtractWithBorrow:
{
ulong Result = (ulong)A - (ulong)B - (Carry ? 0UL : 1UL);
Carry = Result < 0x100000000;
return (int)Result;
}
case AluRegOperation.BitwiseExclusiveOr: return A ^ B;
case AluRegOperation.BitwiseOr: return A | B;
case AluRegOperation.BitwiseAnd: return A & B;
case AluRegOperation.BitwiseAndNot: return A & ~B;
case AluRegOperation.BitwiseNotAnd: return ~(A & B);
}
throw new ArgumentOutOfRangeException(nameof(AluOp));
}
private int GetImm()
{
//Note: The immediate is signed, the sign-extension is intended here.
return OpCode >> 14;
}
private void SetMethAddr(int Value)
{
MethAddr = (Value >> 0) & 0xfff;
MethIncr = (Value >> 12) & 0x3f;
}
private void SetDstGpr(int Value)
{
Gprs[(OpCode >> 8) & 7] = Value;
}
private int GetGprA()
{
return GetGprValue((OpCode >> 11) & 7);
}
private int GetGprB()
{
return GetGprValue((OpCode >> 14) & 7);
}
private int GetGprValue(int Index)
{
return Index != 0 ? Gprs[Index] : 0;
}
private int FetchParam()
{
int Value;
//If we don't have any parameters in the FIFO,
//keep running the PFIFO engine until it writes the parameters.
while (!Fifo.TryDequeue(out Value))
{
if (!PFifo.Step())
{
return 0;
}
}
return Value;
}
private int Read(int Reg)
{
return Engine.Registers[Reg];
}
private void Send(NvGpuVmm Vmm, int Value)
{
//This is an artificial limit that prevents excessive calls
//to VertexEndGl since that triggers rendering, and in the
//case that something is bugged and causes an absurd amount of
//draw calls, this prevents the system from freezing (and throws instead).
if (MethAddr == 0x585 && ++CallCount > MaxCallCountPerRun)
{
GpuExceptionHelper.ThrowCallCoundExceeded();
}
NvGpuPBEntry PBEntry = new NvGpuPBEntry(MethAddr, 0, Value);
Engine.CallMethod(Vmm, PBEntry);
MethAddr += MethIncr;
}
}
}

View file

@ -0,0 +1,11 @@
namespace Ryujinx.HLE.Gpu.Engines
{
enum NvGpuEngine
{
_2d = 0x902d,
_3d = 0xb197,
Compute = 0xb1c0,
Kepler = 0xa140,
Dma = 0xb0b5
}
}

View file

@ -0,0 +1,170 @@
using Ryujinx.Graphics.Gal;
using Ryujinx.HLE.Gpu.Memory;
using Ryujinx.HLE.Gpu.Texture;
using System.Collections.Generic;
namespace Ryujinx.HLE.Gpu.Engines
{
class NvGpuEngine2d : INvGpuEngine
{
private enum CopyOperation
{
SrcCopyAnd,
RopAnd,
Blend,
SrcCopy,
Rop,
SrcCopyPremult,
BlendPremult
}
public int[] Registers { get; private set; }
private NvGpu Gpu;
private Dictionary<int, NvGpuMethod> Methods;
public NvGpuEngine2d(NvGpu Gpu)
{
this.Gpu = Gpu;
Registers = new int[0xe00];
Methods = new Dictionary<int, NvGpuMethod>();
void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method)
{
while (Count-- > 0)
{
Methods.Add(Meth, Method);
Meth += Stride;
}
}
AddMethod(0xb5, 1, 1, TextureCopy);
}
public void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
{
if (Methods.TryGetValue(PBEntry.Method, out NvGpuMethod Method))
{
Method(Vmm, PBEntry);
}
else
{
WriteRegister(PBEntry);
}
}
private void TextureCopy(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
{
CopyOperation Operation = (CopyOperation)ReadRegister(NvGpuEngine2dReg.CopyOperation);
bool SrcLinear = ReadRegister(NvGpuEngine2dReg.SrcLinear) != 0;
int SrcWidth = ReadRegister(NvGpuEngine2dReg.SrcWidth);
int SrcHeight = ReadRegister(NvGpuEngine2dReg.SrcHeight);
bool DstLinear = ReadRegister(NvGpuEngine2dReg.DstLinear) != 0;
int DstWidth = ReadRegister(NvGpuEngine2dReg.DstWidth);
int DstHeight = ReadRegister(NvGpuEngine2dReg.DstHeight);
int DstPitch = ReadRegister(NvGpuEngine2dReg.DstPitch);
int DstBlkDim = ReadRegister(NvGpuEngine2dReg.DstBlockDimensions);
TextureSwizzle DstSwizzle = DstLinear
? TextureSwizzle.Pitch
: TextureSwizzle.BlockLinear;
int DstBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf);
long Key = Vmm.GetPhysicalAddress(MakeInt64From2xInt32(NvGpuEngine2dReg.SrcAddress));
long SrcAddress = MakeInt64From2xInt32(NvGpuEngine2dReg.SrcAddress);
long DstAddress = MakeInt64From2xInt32(NvGpuEngine2dReg.DstAddress);
bool IsFbTexture = Gpu.Engine3d.IsFrameBufferPosition(Key);
if (IsFbTexture && DstLinear)
{
DstSwizzle = TextureSwizzle.BlockLinear;
}
TextureInfo DstTexture = new TextureInfo(
DstAddress,
DstWidth,
DstHeight,
DstBlockHeight,
DstBlockHeight,
DstSwizzle,
GalTextureFormat.A8B8G8R8);
if (IsFbTexture)
{
//TODO: Change this when the correct frame buffer resolution is used.
//Currently, the frame buffer size is hardcoded to 1280x720.
SrcWidth = 1280;
SrcHeight = 720;
Gpu.Renderer.FrameBuffer.GetBufferData(Key, (byte[] Buffer) =>
{
CopyTexture(
Vmm,
DstTexture,
Buffer,
SrcWidth,
SrcHeight);
});
}
else
{
long Size = SrcWidth * SrcHeight * 4;
byte[] Buffer = Vmm.ReadBytes(SrcAddress, Size);
CopyTexture(
Vmm,
DstTexture,
Buffer,
SrcWidth,
SrcHeight);
}
}
private void CopyTexture(
NvGpuVmm Vmm,
TextureInfo Texture,
byte[] Buffer,
int Width,
int Height)
{
TextureWriter.Write(Vmm, Texture, Buffer, Width, Height);
}
private long MakeInt64From2xInt32(NvGpuEngine2dReg Reg)
{
return
(long)Registers[(int)Reg + 0] << 32 |
(uint)Registers[(int)Reg + 1];
}
private void WriteRegister(NvGpuPBEntry PBEntry)
{
int ArgsCount = PBEntry.Arguments.Count;
if (ArgsCount > 0)
{
Registers[PBEntry.Method] = PBEntry.Arguments[ArgsCount - 1];
}
}
private int ReadRegister(NvGpuEngine2dReg Reg)
{
return Registers[(int)Reg];
}
private void WriteRegister(NvGpuEngine2dReg Reg, int Value)
{
Registers[(int)Reg] = Value;
}
}
}

View file

@ -0,0 +1,25 @@
namespace Ryujinx.HLE.Gpu.Engines
{
enum NvGpuEngine2dReg
{
DstFormat = 0x80,
DstLinear = 0x81,
DstBlockDimensions = 0x82,
DstDepth = 0x83,
DstLayer = 0x84,
DstPitch = 0x85,
DstWidth = 0x86,
DstHeight = 0x87,
DstAddress = 0x88,
SrcFormat = 0x8c,
SrcLinear = 0x8d,
SrcBlockDimensions = 0x8e,
SrcDepth = 0x8f,
SrcLayer = 0x90,
SrcPitch = 0x91,
SrcWidth = 0x92,
SrcHeight = 0x93,
SrcAddress = 0x94,
CopyOperation = 0xab
}
}

View file

@ -0,0 +1,556 @@
using Ryujinx.Graphics.Gal;
using Ryujinx.HLE.Gpu.Memory;
using Ryujinx.HLE.Gpu.Texture;
using System;
using System.Collections.Generic;
namespace Ryujinx.HLE.Gpu.Engines
{
class NvGpuEngine3d : INvGpuEngine
{
public int[] Registers { get; private set; }
private NvGpu Gpu;
private Dictionary<int, NvGpuMethod> Methods;
private struct ConstBuffer
{
public bool Enabled;
public long Position;
public int Size;
}
private ConstBuffer[][] ConstBuffers;
private HashSet<long> FrameBuffers;
public NvGpuEngine3d(NvGpu Gpu)
{
this.Gpu = Gpu;
Registers = new int[0xe00];
Methods = new Dictionary<int, NvGpuMethod>();
void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method)
{
while (Count-- > 0)
{
Methods.Add(Meth, Method);
Meth += Stride;
}
}
AddMethod(0x585, 1, 1, VertexEndGl);
AddMethod(0x674, 1, 1, ClearBuffers);
AddMethod(0x6c3, 1, 1, QueryControl);
AddMethod(0x8e4, 16, 1, CbData);
AddMethod(0x904, 5, 8, CbBind);
ConstBuffers = new ConstBuffer[6][];
for (int Index = 0; Index < ConstBuffers.Length; Index++)
{
ConstBuffers[Index] = new ConstBuffer[18];
}
FrameBuffers = new HashSet<long>();
}
public void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
{
if (Methods.TryGetValue(PBEntry.Method, out NvGpuMethod Method))
{
Method(Vmm, PBEntry);
}
else
{
WriteRegister(PBEntry);
}
}
private void VertexEndGl(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
{
SetFrameBuffer(Vmm, 0);
long[] Keys = UploadShaders(Vmm);
Gpu.Renderer.Shader.BindProgram();
SetAlphaBlending();
UploadTextures(Vmm, Keys);
UploadUniforms(Vmm);
UploadVertexArrays(Vmm);
}
private void ClearBuffers(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
{
int Arg0 = PBEntry.Arguments[0];
int FbIndex = (Arg0 >> 6) & 0xf;
int Layer = (Arg0 >> 10) & 0x3ff;
GalClearBufferFlags Flags = (GalClearBufferFlags)(Arg0 & 0x3f);
SetFrameBuffer(Vmm, 0);
//TODO: Enable this once the frame buffer problems are fixed.
//Gpu.Renderer.ClearBuffers(Layer, Flags);
}
private void SetFrameBuffer(NvGpuVmm Vmm, int FbIndex)
{
long VA = MakeInt64From2xInt32(NvGpuEngine3dReg.FrameBufferNAddress + FbIndex * 0x10);
long PA = Vmm.GetPhysicalAddress(VA);
FrameBuffers.Add(PA);
int Width = ReadRegister(NvGpuEngine3dReg.FrameBufferNWidth + FbIndex * 0x10);
int Height = ReadRegister(NvGpuEngine3dReg.FrameBufferNHeight + FbIndex * 0x10);
//Note: Using the Width/Height results seems to give incorrect results.
//Maybe the size of all frame buffers is hardcoded to screen size? This seems unlikely.
Gpu.Renderer.FrameBuffer.Create(PA, 1280, 720);
Gpu.Renderer.FrameBuffer.Bind(PA);
}
private long[] UploadShaders(NvGpuVmm Vmm)
{
long[] Keys = new long[5];
long BasePosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress);
for (int Index = 0; Index < 6; Index++)
{
int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + Index * 0x10);
int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + Index * 0x10);
//Note: Vertex Program (B) is always enabled.
bool Enable = (Control & 1) != 0 || Index == 1;
if (!Enable)
{
continue;
}
long Key = BasePosition + (uint)Offset;
GalShaderType ShaderType = GetTypeFromProgram(Index);
Keys[(int)ShaderType] = Key;
Gpu.Renderer.Shader.Create(Vmm, Key, ShaderType);
Gpu.Renderer.Shader.Bind(Key);
}
int RawSX = ReadRegister(NvGpuEngine3dReg.ViewportScaleX);
int RawSY = ReadRegister(NvGpuEngine3dReg.ViewportScaleY);
float SX = BitConverter.Int32BitsToSingle(RawSX);
float SY = BitConverter.Int32BitsToSingle(RawSY);
float SignX = MathF.Sign(SX);
float SignY = MathF.Sign(SY);
Gpu.Renderer.Shader.SetFlip(SignX, SignY);
return Keys;
}
private static GalShaderType GetTypeFromProgram(int Program)
{
switch (Program)
{
case 0:
case 1: return GalShaderType.Vertex;
case 2: return GalShaderType.TessControl;
case 3: return GalShaderType.TessEvaluation;
case 4: return GalShaderType.Geometry;
case 5: return GalShaderType.Fragment;
}
throw new ArgumentOutOfRangeException(nameof(Program));
}
private void SetAlphaBlending()
{
//TODO: Support independent blend properly.
bool Enable = (ReadRegister(NvGpuEngine3dReg.IBlendNEnable) & 1) != 0;
if (Enable)
{
Gpu.Renderer.Blend.Enable();
}
else
{
Gpu.Renderer.Blend.Disable();
}
if (!Enable)
{
//If blend is not enabled, then the other values have no effect.
//Note that if it is disabled, the register may contain invalid values.
return;
}
bool BlendSeparateAlpha = (ReadRegister(NvGpuEngine3dReg.IBlendNSeparateAlpha) & 1) != 0;
GalBlendEquation EquationRgb = (GalBlendEquation)ReadRegister(NvGpuEngine3dReg.IBlendNEquationRgb);
GalBlendFactor FuncSrcRgb = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.IBlendNFuncSrcRgb);
GalBlendFactor FuncDstRgb = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.IBlendNFuncDstRgb);
if (BlendSeparateAlpha)
{
GalBlendEquation EquationAlpha = (GalBlendEquation)ReadRegister(NvGpuEngine3dReg.IBlendNEquationAlpha);
GalBlendFactor FuncSrcAlpha = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.IBlendNFuncSrcAlpha);
GalBlendFactor FuncDstAlpha = (GalBlendFactor)ReadRegister(NvGpuEngine3dReg.IBlendNFuncDstAlpha);
Gpu.Renderer.Blend.SetSeparate(
EquationRgb,
EquationAlpha,
FuncSrcRgb,
FuncDstRgb,
FuncSrcAlpha,
FuncDstAlpha);
}
else
{
Gpu.Renderer.Blend.Set(EquationRgb, FuncSrcRgb, FuncDstRgb);
}
}
private void UploadTextures(NvGpuVmm Vmm, long[] Keys)
{
long BaseShPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress);
int TextureCbIndex = ReadRegister(NvGpuEngine3dReg.TextureCbIndex);
//Note: On the emulator renderer, Texture Unit 0 is
//reserved for drawing the frame buffer.
int TexIndex = 1;
for (int Index = 0; Index < Keys.Length; Index++)
{
foreach (ShaderDeclInfo DeclInfo in Gpu.Renderer.Shader.GetTextureUsage(Keys[Index]))
{
long Position = ConstBuffers[Index][TextureCbIndex].Position;
UploadTexture(Vmm, Position, TexIndex, DeclInfo.Index);
Gpu.Renderer.Shader.EnsureTextureBinding(DeclInfo.Name, TexIndex);
TexIndex++;
}
}
}
private void UploadTexture(NvGpuVmm Vmm, long BasePosition, int TexIndex, int HndIndex)
{
long Position = BasePosition + HndIndex * 4;
int TextureHandle = Vmm.ReadInt32(Position);
if (TextureHandle == 0)
{
//TODO: Is this correct?
//Some games like puyo puyo will have 0 handles.
//It may be just normal behaviour or a bug caused by sync issues.
//The game does initialize the value properly after through.
return;
}
int TicIndex = (TextureHandle >> 0) & 0xfffff;
int TscIndex = (TextureHandle >> 20) & 0xfff;
long TicPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.TexHeaderPoolOffset);
long TscPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.TexSamplerPoolOffset);
TicPosition += TicIndex * 0x20;
TscPosition += TscIndex * 0x20;
GalTextureSampler Sampler = TextureFactory.MakeSampler(Gpu, Vmm, TscPosition);
long TextureAddress = Vmm.ReadInt64(TicPosition + 4) & 0xffffffffffff;
long Key = TextureAddress;
TextureAddress = Vmm.GetPhysicalAddress(TextureAddress);
if (IsFrameBufferPosition(TextureAddress))
{
//This texture is a frame buffer texture,
//we shouldn't read anything from memory and bind
//the frame buffer texture instead, since we're not
//really writing anything to memory.
Gpu.Renderer.FrameBuffer.BindTexture(TextureAddress, TexIndex);
}
else
{
GalTexture NewTexture = TextureFactory.MakeTexture(Vmm, TicPosition);
long Size = (uint)TextureHelper.GetTextureSize(NewTexture);
bool HasCachedTexture = false;
if (Gpu.Renderer.Texture.TryGetCachedTexture(Key, Size, out GalTexture Texture))
{
if (NewTexture.Equals(Texture) && !Vmm.IsRegionModified(Key, Size, NvGpuBufferType.Texture))
{
Gpu.Renderer.Texture.Bind(Key, TexIndex);
HasCachedTexture = true;
}
}
if (!HasCachedTexture)
{
byte[] Data = TextureFactory.GetTextureData(Vmm, TicPosition);
Gpu.Renderer.Texture.Create(Key, Data, NewTexture);
}
Gpu.Renderer.Texture.Bind(Key, TexIndex);
}
Gpu.Renderer.Texture.SetSampler(Sampler);
}
private void UploadUniforms(NvGpuVmm Vmm)
{
long BasePosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress);
for (int Index = 0; Index < 5; Index++)
{
int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + (Index + 1) * 0x10);
int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + (Index + 1) * 0x10);
//Note: Vertex Program (B) is always enabled.
bool Enable = (Control & 1) != 0 || Index == 0;
if (!Enable)
{
continue;
}
for (int Cbuf = 0; Cbuf < ConstBuffers[Index].Length; Cbuf++)
{
ConstBuffer Cb = ConstBuffers[Index][Cbuf];
if (Cb.Enabled)
{
byte[] Data = Vmm.ReadBytes(Cb.Position, (uint)Cb.Size);
Gpu.Renderer.Shader.SetConstBuffer(BasePosition + (uint)Offset, Cbuf, Data);
}
}
}
}
private void UploadVertexArrays(NvGpuVmm Vmm)
{
long IndexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.IndexArrayAddress);
int IndexEntryFmt = ReadRegister(NvGpuEngine3dReg.IndexArrayFormat);
int IndexFirst = ReadRegister(NvGpuEngine3dReg.IndexBatchFirst);
int IndexCount = ReadRegister(NvGpuEngine3dReg.IndexBatchCount);
GalIndexFormat IndexFormat = (GalIndexFormat)IndexEntryFmt;
int IndexEntrySize = 1 << IndexEntryFmt;
if (IndexEntrySize > 4)
{
throw new InvalidOperationException();
}
if (IndexCount != 0)
{
int IbSize = IndexCount * IndexEntrySize;
bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IndexPosition, (uint)IbSize);
if (!IboCached || Vmm.IsRegionModified(IndexPosition, (uint)IbSize, NvGpuBufferType.Index))
{
byte[] Data = Vmm.ReadBytes(IndexPosition, (uint)IbSize);
Gpu.Renderer.Rasterizer.CreateIbo(IndexPosition, Data);
}
Gpu.Renderer.Rasterizer.SetIndexArray(IndexPosition, IbSize, IndexFormat);
}
List<GalVertexAttrib>[] Attribs = new List<GalVertexAttrib>[32];
for (int Attr = 0; Attr < 16; Attr++)
{
int Packed = ReadRegister(NvGpuEngine3dReg.VertexAttribNFormat + Attr);
int ArrayIndex = Packed & 0x1f;
if (Attribs[ArrayIndex] == null)
{
Attribs[ArrayIndex] = new List<GalVertexAttrib>();
}
Attribs[ArrayIndex].Add(new GalVertexAttrib(
Attr,
((Packed >> 6) & 0x1) != 0,
(Packed >> 7) & 0x3fff,
(GalVertexAttribSize)((Packed >> 21) & 0x3f),
(GalVertexAttribType)((Packed >> 27) & 0x7),
((Packed >> 31) & 0x1) != 0));
}
int VertexFirst = ReadRegister(NvGpuEngine3dReg.VertexArrayFirst);
int VertexCount = ReadRegister(NvGpuEngine3dReg.VertexArrayCount);
int PrimCtrl = ReadRegister(NvGpuEngine3dReg.VertexBeginGl);
for (int Index = 0; Index < 32; Index++)
{
if (Attribs[Index] == null)
{
continue;
}
int Control = ReadRegister(NvGpuEngine3dReg.VertexArrayNControl + Index * 4);
bool Enable = (Control & 0x1000) != 0;
long VertexPosition = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNAddress + Index * 4);
long VertexEndPos = MakeInt64From2xInt32(NvGpuEngine3dReg.VertexArrayNEndAddr + Index * 2);
if (!Enable)
{
continue;
}
int Stride = Control & 0xfff;
long VbSize = 0;
if (IndexCount != 0)
{
VbSize = (VertexEndPos - VertexPosition) + 1;
}
else
{
VbSize = VertexCount * Stride;
}
bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VertexPosition, VbSize);
if (!VboCached || Vmm.IsRegionModified(VertexPosition, VbSize, NvGpuBufferType.Vertex))
{
byte[] Data = Vmm.ReadBytes(VertexPosition, VbSize);
Gpu.Renderer.Rasterizer.CreateVbo(VertexPosition, Data);
}
Gpu.Renderer.Rasterizer.SetVertexArray(Index, Stride, VertexPosition, Attribs[Index].ToArray());
}
GalPrimitiveType PrimType = (GalPrimitiveType)(PrimCtrl & 0xffff);
if (IndexCount != 0)
{
Gpu.Renderer.Rasterizer.DrawElements(IndexPosition, IndexFirst, PrimType);
}
else
{
Gpu.Renderer.Rasterizer.DrawArrays(VertexFirst, VertexCount, PrimType);
}
}
private void QueryControl(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
{
long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.QueryAddress);
int Seq = Registers[(int)NvGpuEngine3dReg.QuerySequence];
int Ctrl = Registers[(int)NvGpuEngine3dReg.QueryControl];
int Mode = Ctrl & 3;
if (Mode == 0)
{
//Write mode.
Vmm.WriteInt32(Position, Seq);
}
WriteRegister(PBEntry);
}
private void CbData(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
{
long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.ConstBufferAddress);
int Offset = ReadRegister(NvGpuEngine3dReg.ConstBufferOffset);
foreach (int Arg in PBEntry.Arguments)
{
Vmm.WriteInt32(Position + Offset, Arg);
Offset += 4;
}
WriteRegister(NvGpuEngine3dReg.ConstBufferOffset, Offset);
}
private void CbBind(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
{
int Stage = (PBEntry.Method - 0x904) >> 3;
int Index = PBEntry.Arguments[0];
bool Enabled = (Index & 1) != 0;
Index = (Index >> 4) & 0x1f;
long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.ConstBufferAddress);
ConstBuffers[Stage][Index].Position = Position;
ConstBuffers[Stage][Index].Enabled = Enabled;
ConstBuffers[Stage][Index].Size = ReadRegister(NvGpuEngine3dReg.ConstBufferSize);
}
private long MakeInt64From2xInt32(NvGpuEngine3dReg Reg)
{
return
(long)Registers[(int)Reg + 0] << 32 |
(uint)Registers[(int)Reg + 1];
}
private void WriteRegister(NvGpuPBEntry PBEntry)
{
int ArgsCount = PBEntry.Arguments.Count;
if (ArgsCount > 0)
{
Registers[PBEntry.Method] = PBEntry.Arguments[ArgsCount - 1];
}
}
private int ReadRegister(NvGpuEngine3dReg Reg)
{
return Registers[(int)Reg];
}
private void WriteRegister(NvGpuEngine3dReg Reg, int Value)
{
Registers[(int)Reg] = Value;
}
public bool IsFrameBufferPosition(long Position)
{
return FrameBuffers.Contains(Position);
}
}
}

View file

@ -0,0 +1,61 @@
namespace Ryujinx.HLE.Gpu.Engines
{
enum NvGpuEngine3dReg
{
FrameBufferNAddress = 0x200,
FrameBufferNWidth = 0x202,
FrameBufferNHeight = 0x203,
FrameBufferNFormat = 0x204,
ViewportScaleX = 0x280,
ViewportScaleY = 0x281,
ViewportScaleZ = 0x282,
ViewportTranslateX = 0x283,
ViewportTranslateY = 0x284,
ViewportTranslateZ = 0x285,
VertexArrayFirst = 0x35d,
VertexArrayCount = 0x35e,
VertexAttribNFormat = 0x458,
IBlendEnable = 0x4b9,
BlendSeparateAlpha = 0x4cf,
BlendEquationRgb = 0x4d0,
BlendFuncSrcRgb = 0x4d1,
BlendFuncDstRgb = 0x4d2,
BlendEquationAlpha = 0x4d3,
BlendFuncSrcAlpha = 0x4d4,
BlendFuncDstAlpha = 0x4d6,
BlendEnableMaster = 0x4d7,
IBlendNEnable = 0x4d8,
VertexArrayElemBase = 0x50d,
TexHeaderPoolOffset = 0x55d,
TexSamplerPoolOffset = 0x557,
ShaderAddress = 0x582,
VertexBeginGl = 0x586,
IndexArrayAddress = 0x5f2,
IndexArrayEndAddr = 0x5f4,
IndexArrayFormat = 0x5f6,
IndexBatchFirst = 0x5f7,
IndexBatchCount = 0x5f8,
QueryAddress = 0x6c0,
QuerySequence = 0x6c2,
QueryControl = 0x6c3,
VertexArrayNControl = 0x700,
VertexArrayNAddress = 0x701,
VertexArrayNDivisor = 0x703,
IBlendNSeparateAlpha = 0x780,
IBlendNEquationRgb = 0x781,
IBlendNFuncSrcRgb = 0x782,
IBlendNFuncDstRgb = 0x783,
IBlendNEquationAlpha = 0x784,
IBlendNFuncSrcAlpha = 0x785,
IBlendNFuncDstAlpha = 0x786,
VertexArrayNEndAddr = 0x7c0,
ShaderNControl = 0x800,
ShaderNOffset = 0x801,
ShaderNMaxGprs = 0x803,
ShaderNType = 0x804,
ConstBufferSize = 0x8e0,
ConstBufferAddress = 0x8e1,
ConstBufferOffset = 0x8e3,
TextureCbIndex = 0x982
}
}

View file

@ -0,0 +1,143 @@
using Ryujinx.HLE.Gpu.Memory;
using Ryujinx.HLE.Gpu.Texture;
using System.Collections.Generic;
namespace Ryujinx.HLE.Gpu.Engines
{
class NvGpuEngineDma : INvGpuEngine
{
public int[] Registers { get; private set; }
private NvGpu Gpu;
private Dictionary<int, NvGpuMethod> Methods;
public NvGpuEngineDma(NvGpu Gpu)
{
this.Gpu = Gpu;
Registers = new int[0x1d6];
Methods = new Dictionary<int, NvGpuMethod>();
void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method)
{
while (Count-- > 0)
{
Methods.Add(Meth, Method);
Meth += Stride;
}
}
AddMethod(0xc0, 1, 1, Execute);
}
public void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
{
if (Methods.TryGetValue(PBEntry.Method, out NvGpuMethod Method))
{
Method(Vmm, PBEntry);
}
else
{
WriteRegister(PBEntry);
}
}
private void Execute(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
{
int Control = PBEntry.Arguments[0];
bool SrcLinear = ((Control >> 7) & 1) != 0;
bool DstLinear = ((Control >> 8) & 1) != 0;
long SrcAddress = MakeInt64From2xInt32(NvGpuEngineDmaReg.SrcAddress);
long DstAddress = MakeInt64From2xInt32(NvGpuEngineDmaReg.DstAddress);
int SrcPitch = ReadRegister(NvGpuEngineDmaReg.SrcPitch);
int DstPitch = ReadRegister(NvGpuEngineDmaReg.DstPitch);
int DstBlkDim = ReadRegister(NvGpuEngineDmaReg.DstBlkDim);
int DstSizeX = ReadRegister(NvGpuEngineDmaReg.DstSizeX);
int DstSizeY = ReadRegister(NvGpuEngineDmaReg.DstSizeY);
int DstSizeZ = ReadRegister(NvGpuEngineDmaReg.DstSizeZ);
int DstPosXY = ReadRegister(NvGpuEngineDmaReg.DstPosXY);
int DstPosZ = ReadRegister(NvGpuEngineDmaReg.DstPosZ);
int SrcBlkDim = ReadRegister(NvGpuEngineDmaReg.SrcBlkDim);
int SrcSizeX = ReadRegister(NvGpuEngineDmaReg.SrcSizeX);
int SrcSizeY = ReadRegister(NvGpuEngineDmaReg.SrcSizeY);
int SrcSizeZ = ReadRegister(NvGpuEngineDmaReg.SrcSizeZ);
int SrcPosXY = ReadRegister(NvGpuEngineDmaReg.SrcPosXY);
int SrcPosZ = ReadRegister(NvGpuEngineDmaReg.SrcPosZ);
int DstPosX = (DstPosXY >> 0) & 0xffff;
int DstPosY = (DstPosXY >> 16) & 0xffff;
int SrcPosX = (SrcPosXY >> 0) & 0xffff;
int SrcPosY = (SrcPosXY >> 16) & 0xffff;
int SrcBlockHeight = 1 << ((SrcBlkDim >> 4) & 0xf);
int DstBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf);
ISwizzle SrcSwizzle;
if (SrcLinear)
{
SrcSwizzle = new LinearSwizzle(SrcPitch, 1);
}
else
{
SrcSwizzle = new BlockLinearSwizzle(SrcSizeX, 1, SrcBlockHeight);
}
ISwizzle DstSwizzle;
if (DstLinear)
{
DstSwizzle = new LinearSwizzle(DstPitch, 1);
}
else
{
DstSwizzle = new BlockLinearSwizzle(DstSizeX, 1, DstBlockHeight);
}
for (int Y = 0; Y < DstSizeY; Y++)
for (int X = 0; X < DstSizeX; X++)
{
long SrcOffset = SrcAddress + (uint)SrcSwizzle.GetSwizzleOffset(X, Y);
long DstOffset = DstAddress + (uint)DstSwizzle.GetSwizzleOffset(X, Y);
Vmm.WriteByte(DstOffset, Vmm.ReadByte(SrcOffset));
}
}
private long MakeInt64From2xInt32(NvGpuEngineDmaReg Reg)
{
return
(long)Registers[(int)Reg + 0] << 32 |
(uint)Registers[(int)Reg + 1];
}
private void WriteRegister(NvGpuPBEntry PBEntry)
{
int ArgsCount = PBEntry.Arguments.Count;
if (ArgsCount > 0)
{
Registers[PBEntry.Method] = PBEntry.Arguments[ArgsCount - 1];
}
}
private int ReadRegister(NvGpuEngineDmaReg Reg)
{
return Registers[(int)Reg];
}
private void WriteRegister(NvGpuEngineDmaReg Reg, int Value)
{
Registers[(int)Reg] = Value;
}
}
}

View file

@ -0,0 +1,22 @@
namespace Ryujinx.HLE.Gpu.Engines
{
enum NvGpuEngineDmaReg
{
SrcAddress = 0x100,
DstAddress = 0x102,
SrcPitch = 0x104,
DstPitch = 0x105,
DstBlkDim = 0x1c3,
DstSizeX = 0x1c4,
DstSizeY = 0x1c5,
DstSizeZ = 0x1c6,
DstPosZ = 0x1c7,
DstPosXY = 0x1c8,
SrcBlkDim = 0x1ca,
SrcSizeX = 0x1cb,
SrcSizeY = 0x1cc,
SrcSizeZ = 0x1cd,
SrcPosZ = 0x1ce,
SrcPosXY = 0x1cf
}
}

View file

@ -0,0 +1,183 @@
using Ryujinx.HLE.Gpu.Memory;
using System.Collections.Concurrent;
namespace Ryujinx.HLE.Gpu.Engines
{
class NvGpuFifo
{
private const int MacrosCount = 0x80;
private const int MacroIndexMask = MacrosCount - 1;
//Note: The size of the macro memory is unknown, we just make
//a guess here and use 256kb as the size. Increase if needed.
private const int MmeWords = 256 * 256;
private NvGpu Gpu;
private ConcurrentQueue<(NvGpuVmm, NvGpuPBEntry)> BufferQueue;
private NvGpuEngine[] SubChannels;
private struct CachedMacro
{
public int Position { get; private set; }
private MacroInterpreter Interpreter;
public CachedMacro(NvGpuFifo PFifo, INvGpuEngine Engine, int Position)
{
this.Position = Position;
Interpreter = new MacroInterpreter(PFifo, Engine);
}
public void PushParam(int Param)
{
Interpreter?.Fifo.Enqueue(Param);
}
public void Execute(NvGpuVmm Vmm, int[] Mme, int Param)
{
Interpreter?.Execute(Vmm, Mme, Position, Param);
}
}
private int CurrMacroPosition;
private int CurrMacroBindIndex;
private CachedMacro[] Macros;
private int[] Mme;
public NvGpuFifo(NvGpu Gpu)
{
this.Gpu = Gpu;
BufferQueue = new ConcurrentQueue<(NvGpuVmm, NvGpuPBEntry)>();
SubChannels = new NvGpuEngine[8];
Macros = new CachedMacro[MacrosCount];
Mme = new int[MmeWords];
}
public void PushBuffer(NvGpuVmm Vmm, NvGpuPBEntry[] Buffer)
{
foreach (NvGpuPBEntry PBEntry in Buffer)
{
BufferQueue.Enqueue((Vmm, PBEntry));
}
}
public void DispatchCalls()
{
while (Step());
}
public bool Step()
{
if (BufferQueue.TryDequeue(out (NvGpuVmm Vmm, NvGpuPBEntry PBEntry) Tuple))
{
CallMethod(Tuple.Vmm, Tuple.PBEntry);
return true;
}
return false;
}
private void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
{
if (PBEntry.Method < 0x80)
{
switch ((NvGpuFifoMeth)PBEntry.Method)
{
case NvGpuFifoMeth.BindChannel:
{
NvGpuEngine Engine = (NvGpuEngine)PBEntry.Arguments[0];
SubChannels[PBEntry.SubChannel] = Engine;
break;
}
case NvGpuFifoMeth.SetMacroUploadAddress:
{
CurrMacroPosition = PBEntry.Arguments[0];
break;
}
case NvGpuFifoMeth.SendMacroCodeData:
{
foreach (int Arg in PBEntry.Arguments)
{
Mme[CurrMacroPosition++] = Arg;
}
break;
}
case NvGpuFifoMeth.SetMacroBindingIndex:
{
CurrMacroBindIndex = PBEntry.Arguments[0];
break;
}
case NvGpuFifoMeth.BindMacro:
{
int Position = PBEntry.Arguments[0];
Macros[CurrMacroBindIndex] = new CachedMacro(this, Gpu.Engine3d, Position);
break;
}
}
}
else
{
switch (SubChannels[PBEntry.SubChannel])
{
case NvGpuEngine._2d: Call2dMethod (Vmm, PBEntry); break;
case NvGpuEngine._3d: Call3dMethod (Vmm, PBEntry); break;
case NvGpuEngine.Dma: CallDmaMethod(Vmm, PBEntry); break;
}
}
}
private void Call2dMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
{
Gpu.Engine2d.CallMethod(Vmm, PBEntry);
}
private void Call3dMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
{
if (PBEntry.Method < 0xe00)
{
Gpu.Engine3d.CallMethod(Vmm, PBEntry);
}
else
{
int MacroIndex = (PBEntry.Method >> 1) & MacroIndexMask;
if ((PBEntry.Method & 1) != 0)
{
foreach (int Arg in PBEntry.Arguments)
{
Macros[MacroIndex].PushParam(Arg);
}
}
else
{
Macros[MacroIndex].Execute(Vmm, Mme, PBEntry.Arguments[0]);
}
}
}
private void CallDmaMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
{
Gpu.EngineDma.CallMethod(Vmm, PBEntry);
}
}
}

View file

@ -0,0 +1,11 @@
namespace Ryujinx.HLE.Gpu.Engines
{
enum NvGpuFifoMeth
{
BindChannel = 0,
SetMacroUploadAddress = 0x45,
SendMacroCodeData = 0x46,
SetMacroBindingIndex = 0x47,
BindMacro = 0x48
}
}

View file

@ -0,0 +1,6 @@
using Ryujinx.HLE.Gpu.Memory;
namespace Ryujinx.HLE.Gpu.Engines
{
delegate void NvGpuMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry);
}