Add support for Vertex Program A and other small shader improvements (#192)

* Add WIP support for Vertex Program A, add the FADD_I32 shader instruction, small fix on FFMA_I encoding, nits * Add separate subroutines for program A/B, and copy attributes to a temp * Move finalization code to main * Add new line after flip uniform on the shader * Handle possible case where VPB uses an output attribute written by VPA but not available on the vbo * Address PR feedback
2018-06-27 23:55:08 -03:00 · 2018-06-27 23:55:08 -03:00 · e6eeb6f09f
commit e6eeb6f09f
parent 900a84ae0a
9 changed files with 303 additions and 78 deletions
--- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs
+++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs
@ -126,7 +126,33 @@ namespace Ryujinx.HLE.Gpu.Engines

            long BasePosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress);

-            for (int Index = 0; Index < 6; Index++)
+            int Index = 1;
+
+            int VpAControl = ReadRegister(NvGpuEngine3dReg.ShaderNControl);
+
+            bool VpAEnable = (VpAControl & 1) != 0;
+
+            if (VpAEnable)
+            {
+                //Note: The maxwell supports 2 vertex programs, usually
+                //only VP B is used, but in some cases VP A is also used.
+                //In this case, it seems to function as an extra vertex
+                //shader stage.
+                //The graphics abstraction layer has a special overload for this
+                //case, which should merge the two shaders into one vertex shader.
+                int VpAOffset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset);
+                int VpBOffset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + 0x10);
+
+                long VpAPos = BasePosition + (uint)VpAOffset;
+                long VpBPos = BasePosition + (uint)VpBOffset;
+
+                Gpu.Renderer.Shader.Create(Vmm, VpAPos, VpBPos, GalShaderType.Vertex);
+                Gpu.Renderer.Shader.Bind(VpBPos);
+
+                Index = 2;
+            }
+
+            for (; Index < 6; Index++)
            {
                int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + Index * 0x10);
                int Offset  = ReadRegister(NvGpuEngine3dReg.ShaderNOffset  + Index * 0x10);