Move solution and projects to src

This commit is contained in:
TSR Berry 2023-04-08 01:22:00 +02:00 committed by Mary
parent cd124bda58
commit cee7121058
3466 changed files with 55 additions and 55 deletions

View file

@ -0,0 +1,9 @@
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal enum BitDepth
{
Bits8 = 8, /**< 8 bits */
Bits10 = 10, /**< 10 bits */
Bits12 = 12, /**< 12 bits */
}
}

View file

@ -0,0 +1,56 @@
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal enum CodecErr
{
/*!\brief Operation completed without error */
CodecOk,
/*!\brief Unspecified error */
CodecError,
/*!\brief Memory operation failed */
CodecMemError,
/*!\brief ABI version mismatch */
CodecAbiMismatch,
/*!\brief Algorithm does not have required capability */
CodecIncapable,
/*!\brief The given bitstream is not supported.
*
* The bitstream was unable to be parsed at the highest level. The decoder
* is unable to proceed. This error \ref SHOULD be treated as fatal to the
* stream. */
CodecUnsupBitstream,
/*!\brief Encoded bitstream uses an unsupported feature
*
* The decoder does not implement a feature required by the encoder. This
* return code should only be used for features that prevent future
* pictures from being properly decoded. This error \ref MAY be treated as
* fatal to the stream or \ref MAY be treated as fatal to the current GOP.
*/
CodecUnsupFeature,
/*!\brief The coded data for this stream is corrupt or incomplete
*
* There was a problem decoding the current frame. This return code
* should only be used for failures that prevent future pictures from
* being properly decoded. This error \ref MAY be treated as fatal to the
* stream or \ref MAY be treated as fatal to the current GOP. If decoding
* is continued for the current GOP, artifacts may be present.
*/
CodecCorruptFrame,
/*!\brief An application-supplied parameter is not valid.
*
*/
CodecInvalidParam,
/*!\brief An iterator reached the end of list.
*
*/
CodecListEnd
}
}

View file

@ -0,0 +1,58 @@
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
namespace Ryujinx.Graphics.Nvdec.Vp9.Common
{
internal static class BitUtils
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte ClipPixel(int val)
{
return (byte)((val > 255) ? 255 : (val < 0) ? 0 : val);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ushort ClipPixelHighbd(int val, int bd)
{
return bd switch
{
10 => (ushort)Math.Clamp(val, 0, 1023),
12 => (ushort)Math.Clamp(val, 0, 4095),
_ => (ushort)Math.Clamp(val, 0, 255)
};
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int RoundPowerOfTwo(int value, int n)
{
return (value + (1 << (n - 1))) >> n;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long RoundPowerOfTwo(long value, int n)
{
return (value + (1L << (n - 1))) >> n;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int AlignPowerOfTwo(int value, int n)
{
return (value + ((1 << n) - 1)) & ~((1 << n) - 1);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int GetMsb(uint n)
{
Debug.Assert(n != 0);
return 31 ^ BitOperations.LeadingZeroCount(n);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int GetUnsignedBits(uint numValues)
{
return numValues > 0 ? GetMsb(numValues) + 1 : 0;
}
}
}

View file

@ -0,0 +1,94 @@
using Ryujinx.Common.Memory;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Nvdec.Vp9.Common
{
internal class MemoryAllocator : IDisposable
{
private const int PoolEntries = 10;
private struct PoolItem
{
public IntPtr Pointer;
public int Length;
public bool InUse;
}
private PoolItem[] _pool = new PoolItem[PoolEntries];
public ArrayPtr<T> Allocate<T>(int length) where T : unmanaged
{
int lengthInBytes = Unsafe.SizeOf<T>() * length;
IntPtr ptr = IntPtr.Zero;
for (int i = 0; i < PoolEntries; i++)
{
ref PoolItem item = ref _pool[i];
if (!item.InUse && item.Length == lengthInBytes)
{
item.InUse = true;
ptr = item.Pointer;
break;
}
}
if (ptr == IntPtr.Zero)
{
ptr = Marshal.AllocHGlobal(lengthInBytes);
for (int i = 0; i < PoolEntries; i++)
{
ref PoolItem item = ref _pool[i];
if (!item.InUse)
{
item.InUse = true;
if (item.Pointer != IntPtr.Zero)
{
Marshal.FreeHGlobal(item.Pointer);
}
item.Pointer = ptr;
item.Length = lengthInBytes;
break;
}
}
}
return new ArrayPtr<T>(ptr, length);
}
public unsafe void Free<T>(ArrayPtr<T> arr) where T : unmanaged
{
IntPtr ptr = (IntPtr)arr.ToPointer();
for (int i = 0; i < PoolEntries; i++)
{
ref PoolItem item = ref _pool[i];
if (item.Pointer == ptr)
{
item.InUse = false;
break;
}
}
}
public void Dispose()
{
for (int i = 0; i < PoolEntries; i++)
{
ref PoolItem item = ref _pool[i];
if (item.Pointer != IntPtr.Zero)
{
Marshal.FreeHGlobal(item.Pointer);
item.Pointer = IntPtr.Zero;
}
}
}
}
}

View file

@ -0,0 +1,23 @@
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Nvdec.Vp9.Common
{
internal static class MemoryUtil
{
public static unsafe void Copy<T>(T* dest, T* source, int length) where T : unmanaged
{
new Span<T>(source, length).CopyTo(new Span<T>(dest, length));
}
public static void Copy<T>(ref T dest, ref T source) where T : unmanaged
{
MemoryMarshal.CreateSpan(ref source, 1).CopyTo(MemoryMarshal.CreateSpan(ref dest, 1));
}
public static unsafe void Fill<T>(T* ptr, T value, int length) where T : unmanaged
{
new Span<T>(ptr, length).Fill(value);
}
}
}

View file

@ -0,0 +1,69 @@
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class Constants
{
public const int Vp9InterpExtend = 4;
public const int MaxMbPlane = 3;
public const int None = -1;
public const int IntraFrame = 0;
public const int LastFrame = 1;
public const int GoldenFrame = 2;
public const int AltRefFrame = 3;
public const int MaxRefFrames = 4;
public const int MiSizeLog2 = 3;
public const int MiBlockSizeLog2 = 6 - MiSizeLog2; // 64 = 2^6
public const int MiSize = 1 << MiSizeLog2; // pixels per mi-unit
public const int MiBlockSize = 1 << MiBlockSizeLog2; // mi-units per max block
public const int MiMask = MiBlockSize - 1;
public const int PartitionPloffset = 4; // number of probability models per block size
/* Segment Feature Masks */
public const int MaxMvRefCandidates = 2;
public const int CompInterContexts = 5;
public const int RefContexts = 5;
public const int EightTap = 0;
public const int EightTapSmooth = 1;
public const int EightTapSharp = 2;
public const int SwitchableFilters = 3; /* Number of switchable filters */
public const int Bilinear = 3;
public const int Switchable = 4; /* should be the last one */
// Frame
public const int RefsPerFrame = 3;
public const int NumPingPongBuffers = 2;
public const int Class0Bits = 1; /* bits at integer precision for class 0 */
public const int Class0Size = 1 << Class0Bits;
public const int MvInUseBits = 14;
public const int MvUpp = (1 << MvInUseBits) - 1;
public const int MvLow = -(1 << MvInUseBits);
// Coefficient token alphabet
public const int ZeroToken = 0; // 0 Extra Bits 0+0
public const int OneToken = 1; // 1 Extra Bits 0+1
public const int TwoToken = 2; // 2 Extra Bits 0+1
public const int PivotNode = 2;
public const int Cat1MinVal = 5;
public const int Cat2MinVal = 7;
public const int Cat3MinVal = 11;
public const int Cat4MinVal = 19;
public const int Cat5MinVal = 35;
public const int Cat6MinVal = 67;
public const int EobModelToken = 3;
public const int SegmentAbsData = 1;
public const int MaxSegments = 8;
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,181 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using Ryujinx.Graphics.Video;
using System;
using Vp9MvRef = Ryujinx.Graphics.Video.Vp9MvRef;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
public sealed class Decoder : IVp9Decoder
{
public bool IsHardwareAccelerated => false;
private readonly MemoryAllocator _allocator = new MemoryAllocator();
public ISurface CreateSurface(int width, int height) => new Surface(width, height);
private static ReadOnlySpan<byte> LiteralToFilter => new byte[]
{
Constants.EightTapSmooth,
Constants.EightTap,
Constants.EightTapSharp,
Constants.Bilinear
};
public unsafe bool Decode(
ref Vp9PictureInfo pictureInfo,
ISurface output,
ReadOnlySpan<byte> bitstream,
ReadOnlySpan<Vp9MvRef> mvsIn,
Span<Vp9MvRef> mvsOut)
{
Vp9Common cm = new Vp9Common();
cm.FrameType = pictureInfo.IsKeyFrame ? FrameType.KeyFrame : FrameType.InterFrame;
cm.IntraOnly = pictureInfo.IntraOnly;
cm.Width = output.Width;
cm.Height = output.Height;
cm.SubsamplingX = 1;
cm.SubsamplingY = 1;
cm.UsePrevFrameMvs = pictureInfo.UsePrevInFindMvRefs;
cm.RefFrameSignBias = pictureInfo.RefFrameSignBias;
cm.BaseQindex = pictureInfo.BaseQIndex;
cm.YDcDeltaQ = pictureInfo.YDcDeltaQ;
cm.UvAcDeltaQ = pictureInfo.UvAcDeltaQ;
cm.UvDcDeltaQ = pictureInfo.UvDcDeltaQ;
cm.Mb.Lossless = pictureInfo.Lossless;
cm.Mb.Bd = 8;
cm.TxMode = (TxMode)pictureInfo.TransformMode;
cm.AllowHighPrecisionMv = pictureInfo.AllowHighPrecisionMv;
cm.InterpFilter = (byte)pictureInfo.InterpFilter;
if (cm.InterpFilter != Constants.Switchable)
{
cm.InterpFilter = LiteralToFilter[cm.InterpFilter];
}
cm.ReferenceMode = (ReferenceMode)pictureInfo.ReferenceMode;
cm.CompFixedRef = pictureInfo.CompFixedRef;
cm.CompVarRef = pictureInfo.CompVarRef;
cm.Log2TileCols = pictureInfo.Log2TileCols;
cm.Log2TileRows = pictureInfo.Log2TileRows;
cm.Seg.Enabled = pictureInfo.SegmentEnabled;
cm.Seg.UpdateMap = pictureInfo.SegmentMapUpdate;
cm.Seg.TemporalUpdate = pictureInfo.SegmentMapTemporalUpdate;
cm.Seg.AbsDelta = (byte)pictureInfo.SegmentAbsDelta;
cm.Seg.FeatureMask = pictureInfo.SegmentFeatureEnable;
cm.Seg.FeatureData = pictureInfo.SegmentFeatureData;
cm.Lf.ModeRefDeltaEnabled = pictureInfo.ModeRefDeltaEnabled;
cm.Lf.RefDeltas = pictureInfo.RefDeltas;
cm.Lf.ModeDeltas = pictureInfo.ModeDeltas;
cm.Fc = new Ptr<Vp9EntropyProbs>(ref pictureInfo.Entropy);
cm.Counts = new Ptr<Vp9BackwardUpdates>(ref pictureInfo.BackwardUpdateCounts);
cm.FrameRefs[0].Buf = (Surface)pictureInfo.LastReference;
cm.FrameRefs[1].Buf = (Surface)pictureInfo.GoldenReference;
cm.FrameRefs[2].Buf = (Surface)pictureInfo.AltReference;
cm.Mb.CurBuf = (Surface)output;
cm.Mb.SetupBlockPlanes(1, 1);
int tileCols = 1 << pictureInfo.Log2TileCols;
int tileRows = 1 << pictureInfo.Log2TileRows;
// Video usually have only 4 columns, so more threads won't make a difference for those.
// Try to not take all CPU cores for video decoding.
int maxThreads = Math.Min(4, Environment.ProcessorCount / 2);
cm.AllocTileWorkerData(_allocator, tileCols, tileRows, maxThreads);
cm.AllocContextBuffers(_allocator, output.Width, output.Height);
cm.InitContextBuffers();
cm.SetupSegmentationDequant();
cm.SetupScaleFactors();
SetMvs(ref cm, mvsIn);
fixed (byte* dataPtr = bitstream)
{
try
{
if (maxThreads > 1 && tileRows == 1 && tileCols > 1)
{
DecodeFrame.DecodeTilesMt(ref cm, new ArrayPtr<byte>(dataPtr, bitstream.Length), maxThreads);
}
else
{
DecodeFrame.DecodeTiles(ref cm, new ArrayPtr<byte>(dataPtr, bitstream.Length));
}
}
catch (InternalErrorException)
{
return false;
}
}
GetMvs(ref cm, mvsOut);
cm.FreeTileWorkerData(_allocator);
cm.FreeContextBuffers(_allocator);
return true;
}
private static void SetMvs(ref Vp9Common cm, ReadOnlySpan<Vp9MvRef> mvs)
{
if (mvs.Length > cm.PrevFrameMvs.Length)
{
throw new ArgumentException($"Size mismatch, expected: {cm.PrevFrameMvs.Length}, but got: {mvs.Length}.");
}
for (int i = 0; i < mvs.Length; i++)
{
ref var mv = ref cm.PrevFrameMvs[i];
mv.Mv[0].Row = mvs[i].Mvs[0].Row;
mv.Mv[0].Col = mvs[i].Mvs[0].Col;
mv.Mv[1].Row = mvs[i].Mvs[1].Row;
mv.Mv[1].Col = mvs[i].Mvs[1].Col;
mv.RefFrame[0] = (sbyte)mvs[i].RefFrames[0];
mv.RefFrame[1] = (sbyte)mvs[i].RefFrames[1];
}
}
private static void GetMvs(ref Vp9Common cm, Span<Vp9MvRef> mvs)
{
if (mvs.Length > cm.CurFrameMvs.Length)
{
throw new ArgumentException($"Size mismatch, expected: {cm.CurFrameMvs.Length}, but got: {mvs.Length}.");
}
for (int i = 0; i < mvs.Length; i++)
{
ref var mv = ref cm.CurFrameMvs[i];
mvs[i].Mvs[0].Row = mv.Mv[0].Row;
mvs[i].Mvs[0].Col = mv.Mv[0].Col;
mvs[i].Mvs[1].Row = mv.Mv[1].Row;
mvs[i].Mvs[1].Col = mv.Mv[1].Col;
mvs[i].RefFrames[0] = mv.RefFrame[0];
mvs[i].RefFrames[1] = mv.RefFrame[1];
}
}
public void Dispose() => _allocator.Dispose();
}
}

View file

@ -0,0 +1,325 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using Ryujinx.Graphics.Video;
using System;
using System.Diagnostics;
using System.Runtime.InteropServices;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class Detokenize
{
private const int EobContextNode = 0;
private const int ZeroContextNode = 1;
private const int OneContextNode = 2;
private static int GetCoefContext(ReadOnlySpan<short> neighbors, ReadOnlySpan<byte> tokenCache, int c)
{
const int maxNeighbors = 2;
return (1 + tokenCache[neighbors[maxNeighbors * c + 0]] + tokenCache[neighbors[maxNeighbors * c + 1]]) >> 1;
}
private static int ReadCoeff(
ref Reader r,
ReadOnlySpan<byte> probs,
int n,
ref ulong value,
ref int count,
ref uint range)
{
int i, val = 0;
for (i = 0; i < n; ++i)
{
val = (val << 1) | r.ReadBool(probs[i], ref value, ref count, ref range);
}
return val;
}
private static int DecodeCoefs(
ref MacroBlockD xd,
PlaneType type,
Span<int> dqcoeff,
TxSize txSize,
ref Array2<short> dq,
int ctx,
ReadOnlySpan<short> scan,
ReadOnlySpan<short> nb,
ref Reader r)
{
ref Vp9BackwardUpdates counts = ref xd.Counts.Value;
int maxEob = 16 << ((int)txSize << 1);
ref Vp9EntropyProbs fc = ref xd.Fc.Value;
int refr = xd.Mi[0].Value.IsInterBlock() ? 1 : 0;
int band, c = 0;
ref Array6<Array6<Array3<byte>>> coefProbs = ref fc.CoefProbs[(int)txSize][(int)type][refr];
Span<byte> tokenCache = stackalloc byte[32 * 32];
ReadOnlySpan<byte> bandTranslate = Luts.get_band_translate(txSize);
int dqShift = (txSize == TxSize.Tx32x32) ? 1 : 0;
int v;
short dqv = dq[0];
ReadOnlySpan<byte> cat6Prob = (xd.Bd == 12)
? Luts.Vp9Cat6ProbHigh12
: (xd.Bd == 10) ? Luts.Vp9Cat6ProbHigh12.Slice(2) : Luts.Vp9Cat6Prob;
int cat6Bits = (xd.Bd == 12) ? 18 : (xd.Bd == 10) ? 16 : 14;
// Keep value, range, and count as locals. The compiler produces better
// results with the locals than using r directly.
ulong value = r.Value;
uint range = r.Range;
int count = r.Count;
while (c < maxEob)
{
int val = -1;
band = bandTranslate[0];
bandTranslate = bandTranslate.Slice(1);
ref Array3<byte> prob = ref coefProbs[band][ctx];
if (!xd.Counts.IsNull)
{
++counts.EobBranch[(int)txSize][(int)type][refr][band][ctx];
}
if (r.ReadBool(prob[EobContextNode], ref value, ref count, ref range) == 0)
{
if (!xd.Counts.IsNull)
{
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.EobModelToken];
}
break;
}
while (r.ReadBool(prob[ZeroContextNode], ref value, ref count, ref range) == 0)
{
if (!xd.Counts.IsNull)
{
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.ZeroToken];
}
dqv = dq[1];
tokenCache[scan[c]] = 0;
++c;
if (c >= maxEob)
{
r.Value = value;
r.Range = range;
r.Count = count;
return c; // Zero tokens at the end (no eob token)
}
ctx = GetCoefContext(nb, tokenCache, c);
band = bandTranslate[0];
bandTranslate = bandTranslate.Slice(1);
prob = ref coefProbs[band][ctx];
}
if (r.ReadBool(prob[OneContextNode], ref value, ref count, ref range) != 0)
{
ReadOnlySpan<byte> p = Luts.Vp9Pareto8Full[prob[Constants.PivotNode] - 1];
if (!xd.Counts.IsNull)
{
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.TwoToken];
}
if (r.ReadBool(p[0], ref value, ref count, ref range) != 0)
{
if (r.ReadBool(p[3], ref value, ref count, ref range) != 0)
{
tokenCache[scan[c]] = 5;
if (r.ReadBool(p[5], ref value, ref count, ref range) != 0)
{
if (r.ReadBool(p[7], ref value, ref count, ref range) != 0)
{
val = Constants.Cat6MinVal + ReadCoeff(ref r, cat6Prob, cat6Bits, ref value, ref count, ref range);
}
else
{
val = Constants.Cat5MinVal + ReadCoeff(ref r, Luts.Vp9Cat5Prob, 5, ref value, ref count, ref range);
}
}
else if (r.ReadBool(p[6], ref value, ref count, ref range) != 0)
{
val = Constants.Cat4MinVal + ReadCoeff(ref r, Luts.Vp9Cat4Prob, 4, ref value, ref count, ref range);
}
else
{
val = Constants.Cat3MinVal + ReadCoeff(ref r, Luts.Vp9Cat3Prob, 3, ref value, ref count, ref range);
}
}
else
{
tokenCache[scan[c]] = 4;
if (r.ReadBool(p[4], ref value, ref count, ref range) != 0)
{
val = Constants.Cat2MinVal + ReadCoeff(ref r, Luts.Vp9Cat2Prob, 2, ref value, ref count, ref range);
}
else
{
val = Constants.Cat1MinVal + ReadCoeff(ref r, Luts.Vp9Cat1Prob, 1, ref value, ref count, ref range);
}
}
// Val may use 18-bits
v = (int)(((long)val * dqv) >> dqShift);
}
else
{
if (r.ReadBool(p[1], ref value, ref count, ref range) != 0)
{
tokenCache[scan[c]] = 3;
v = ((3 + r.ReadBool(p[2], ref value, ref count, ref range)) * dqv) >> dqShift;
}
else
{
tokenCache[scan[c]] = 2;
v = (2 * dqv) >> dqShift;
}
}
}
else
{
if (!xd.Counts.IsNull)
{
++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.OneToken];
}
tokenCache[scan[c]] = 1;
v = dqv >> dqShift;
}
dqcoeff[scan[c]] = (int)HighbdCheckRange(r.ReadBool(128, ref value, ref count, ref range) != 0 ? -v : v, xd.Bd);
++c;
ctx = GetCoefContext(nb, tokenCache, c);
dqv = dq[1];
}
r.Value = value;
r.Range = range;
r.Count = count;
return c;
}
private static void GetCtxShift(ref MacroBlockD xd, ref int ctxShiftA, ref int ctxShiftL, int x, int y, uint txSizeInBlocks)
{
if (xd.MaxBlocksWide != 0)
{
if (txSizeInBlocks + x > xd.MaxBlocksWide)
{
ctxShiftA = (int)(txSizeInBlocks - (xd.MaxBlocksWide - x)) * 8;
}
}
if (xd.MaxBlocksHigh != 0)
{
if (txSizeInBlocks + y > xd.MaxBlocksHigh)
{
ctxShiftL = (int)(txSizeInBlocks - (xd.MaxBlocksHigh - y)) * 8;
}
}
}
private static PlaneType GetPlaneType(int plane)
{
return (PlaneType)(plane > 0 ? 1 : 0);
}
public static int DecodeBlockTokens(
ref TileWorkerData twd,
int plane,
Luts.ScanOrder sc,
int x,
int y,
TxSize txSize,
int segId)
{
ref Reader r = ref twd.BitReader;
ref MacroBlockD xd = ref twd.Xd;
ref MacroBlockDPlane pd = ref xd.Plane[plane];
ref Array2<short> dequant = ref pd.SegDequant[segId];
int eob;
Span<sbyte> a = pd.AboveContext.AsSpan().Slice(x);
Span<sbyte> l = pd.LeftContext.AsSpan().Slice(y);
int ctx;
int ctxShiftA = 0;
int ctxShiftL = 0;
switch (txSize)
{
case TxSize.Tx4x4:
ctx = a[0] != 0 ? 1 : 0;
ctx += l[0] != 0 ? 1 : 0;
eob = DecodeCoefs(
ref xd,
GetPlaneType(plane),
pd.DqCoeff.AsSpan(),
txSize,
ref dequant,
ctx,
sc.Scan,
sc.Neighbors,
ref r);
a[0] = l[0] = (sbyte)(eob > 0 ? 1 : 0);
break;
case TxSize.Tx8x8:
GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx8x8);
ctx = MemoryMarshal.Cast<sbyte, ushort>(a)[0] != 0 ? 1 : 0;
ctx += MemoryMarshal.Cast<sbyte, ushort>(l)[0] != 0 ? 1 : 0;
eob = DecodeCoefs(
ref xd,
GetPlaneType(plane),
pd.DqCoeff.AsSpan(),
txSize,
ref dequant,
ctx,
sc.Scan,
sc.Neighbors,
ref r);
MemoryMarshal.Cast<sbyte, ushort>(a)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftA);
MemoryMarshal.Cast<sbyte, ushort>(l)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftL);
break;
case TxSize.Tx16x16:
GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx16x16);
ctx = MemoryMarshal.Cast<sbyte, uint>(a)[0] != 0 ? 1 : 0;
ctx += MemoryMarshal.Cast<sbyte, uint>(l)[0] != 0 ? 1 : 0;
eob = DecodeCoefs(
ref xd,
GetPlaneType(plane),
pd.DqCoeff.AsSpan(),
txSize,
ref dequant,
ctx,
sc.Scan,
sc.Neighbors,
ref r);
MemoryMarshal.Cast<sbyte, uint>(a)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftA);
MemoryMarshal.Cast<sbyte, uint>(l)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftL);
break;
case TxSize.Tx32x32:
GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx32x32);
// NOTE: Casting to ulong here is safe because the default memory
// alignment is at least 8 bytes and the Tx32x32 is aligned on 8 byte
// boundaries.
ctx = MemoryMarshal.Cast<sbyte, ulong>(a)[0] != 0 ? 1 : 0;
ctx += MemoryMarshal.Cast<sbyte, ulong>(l)[0] != 0 ? 1 : 0;
eob = DecodeCoefs(
ref xd,
GetPlaneType(plane),
pd.DqCoeff.AsSpan(),
txSize,
ref dequant,
ctx,
sc.Scan,
sc.Neighbors,
ref r);
MemoryMarshal.Cast<sbyte, ulong>(a)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftA;
MemoryMarshal.Cast<sbyte, ulong>(l)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftL;
break;
default:
Debug.Assert(false, "Invalid transform size.");
eob = 0;
break;
}
return eob;
}
}
}

View file

@ -0,0 +1,943 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter;
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal static class Convolve
{
private const bool UseIntrinsics = true;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<int> MultiplyAddAdjacent(
Vector128<short> vsrc0,
Vector128<short> vsrc1,
Vector128<short> vsrc2,
Vector128<short> vsrc3,
Vector128<short> vfilter,
Vector128<int> zero)
{
// < sumN, sumN, sumN, sumN >
Vector128<int> sum0 = Sse2.MultiplyAddAdjacent(vsrc0, vfilter);
Vector128<int> sum1 = Sse2.MultiplyAddAdjacent(vsrc1, vfilter);
Vector128<int> sum2 = Sse2.MultiplyAddAdjacent(vsrc2, vfilter);
Vector128<int> sum3 = Sse2.MultiplyAddAdjacent(vsrc3, vfilter);
// < 0, 0, sumN, sumN >
sum0 = Ssse3.HorizontalAdd(sum0, zero);
sum1 = Ssse3.HorizontalAdd(sum1, zero);
sum2 = Ssse3.HorizontalAdd(sum2, zero);
sum3 = Ssse3.HorizontalAdd(sum3, zero);
// < 0, 0, 0, sumN >
sum0 = Ssse3.HorizontalAdd(sum0, zero);
sum1 = Ssse3.HorizontalAdd(sum1, zero);
sum2 = Ssse3.HorizontalAdd(sum2, zero);
sum3 = Ssse3.HorizontalAdd(sum3, zero);
// < 0, 0, sum1, sum0 >
Vector128<int> sum01 = Sse2.UnpackLow(sum0, sum1);
// < 0, 0, sum3, sum2 >
Vector128<int> sum23 = Sse2.UnpackLow(sum2, sum3);
// < sum3, sum2, sum1, sum0 >
return Sse.MoveLowToHigh(sum01.AsSingle(), sum23.AsSingle()).AsInt32();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<int> RoundShift(Vector128<int> value, Vector128<int> const64)
{
return Sse2.ShiftRightArithmetic(Sse2.Add(value, const64), FilterBits);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<byte> PackUnsignedSaturate(Vector128<int> value, Vector128<int> zero)
{
return Sse2.PackUnsignedSaturate(Sse41.PackUnsignedSaturate(value, zero).AsInt16(), zero.AsInt16());
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void ConvolveHorizSse41(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] xFilters,
int x0Q4,
int w,
int h)
{
Vector128<int> zero = Vector128<int>.Zero;
Vector128<int> const64 = Vector128.Create(64);
ulong x, y;
src -= SubpelTaps / 2 - 1;
fixed (Array8<short>* xFilter = xFilters)
{
Vector128<short> vfilter = Sse2.LoadVector128((short*)xFilter + (uint)(x0Q4 & SubpelMask) * 8);
for (y = 0; y < (uint)h; ++y)
{
ulong srcOffset = (uint)x0Q4 >> SubpelBits;
for (x = 0; x < (uint)w; x += 4)
{
Vector128<short> vsrc0 = Sse41.ConvertToVector128Int16(&src[srcOffset + x]);
Vector128<short> vsrc1 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 1]);
Vector128<short> vsrc2 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 2]);
Vector128<short> vsrc3 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 3]);
Vector128<int> sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero);
Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
}
src += srcStride;
dst += dstStride;
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void ConvolveHoriz(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] xFilters,
int x0Q4,
int xStepQ4,
int w,
int h)
{
if (Sse41.IsSupported && UseIntrinsics && xStepQ4 == 1 << SubpelBits)
{
ConvolveHorizSse41(src, srcStride, dst, dstStride, xFilters, x0Q4, w, h);
return;
}
int x, y;
src -= SubpelTaps / 2 - 1;
for (y = 0; y < h; ++y)
{
int xQ4 = x0Q4;
for (x = 0; x < w; ++x)
{
byte* srcX = &src[xQ4 >> SubpelBits];
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcX[k] * xFilter[k];
}
dst[x] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits));
xQ4 += xStepQ4;
}
src += srcStride;
dst += dstStride;
}
}
private static unsafe void ConvolveAvgHoriz(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] xFilters,
int x0Q4,
int xStepQ4,
int w,
int h)
{
int x, y;
src -= SubpelTaps / 2 - 1;
for (y = 0; y < h; ++y)
{
int xQ4 = x0Q4;
for (x = 0; x < w; ++x)
{
byte* srcX = &src[xQ4 >> SubpelBits];
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcX[k] * xFilter[k];
}
dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
xQ4 += xStepQ4;
}
src += srcStride;
dst += dstStride;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void ConvolveVertAvx2(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] yFilters,
int y0Q4,
int w,
int h)
{
Vector128<int> zero = Vector128<int>.Zero;
Vector128<int> const64 = Vector128.Create(64);
Vector256<int> indices = Vector256.Create(
0,
srcStride,
srcStride * 2,
srcStride * 3,
srcStride * 4,
srcStride * 5,
srcStride * 6,
srcStride * 7);
ulong x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
fixed (Array8<short>* yFilter = yFilters)
{
Vector128<short> vfilter = Sse2.LoadVector128((short*)yFilter + (uint)(y0Q4 & SubpelMask) * 8);
ulong srcBaseY = (uint)y0Q4 >> SubpelBits;
for (y = 0; y < (uint)h; ++y)
{
ulong srcOffset = (srcBaseY + y) * (uint)srcStride;
for (x = 0; x < (uint)w; x += 4)
{
Vector256<int> vsrc = Avx2.GatherVector256((uint*)&src[srcOffset + x], indices, 1).AsInt32();
Vector128<int> vsrcL = vsrc.GetLower();
Vector128<int> vsrcH = vsrc.GetUpper();
Vector128<byte> vsrcUnpck11 = Sse2.UnpackLow(vsrcL.AsByte(), vsrcH.AsByte());
Vector128<byte> vsrcUnpck12 = Sse2.UnpackHigh(vsrcL.AsByte(), vsrcH.AsByte());
Vector128<byte> vsrcUnpck21 = Sse2.UnpackLow(vsrcUnpck11, vsrcUnpck12);
Vector128<byte> vsrcUnpck22 = Sse2.UnpackHigh(vsrcUnpck11, vsrcUnpck12);
Vector128<byte> vsrc01 = Sse2.UnpackLow(vsrcUnpck21, vsrcUnpck22);
Vector128<byte> vsrc23 = Sse2.UnpackHigh(vsrcUnpck21, vsrcUnpck22);
Vector128<byte> vsrc11 = Sse.MoveHighToLow(vsrc01.AsSingle(), vsrc01.AsSingle()).AsByte();
Vector128<byte> vsrc33 = Sse.MoveHighToLow(vsrc23.AsSingle(), vsrc23.AsSingle()).AsByte();
Vector128<short> vsrc0 = Sse41.ConvertToVector128Int16(vsrc01);
Vector128<short> vsrc1 = Sse41.ConvertToVector128Int16(vsrc11);
Vector128<short> vsrc2 = Sse41.ConvertToVector128Int16(vsrc23);
Vector128<short> vsrc3 = Sse41.ConvertToVector128Int16(vsrc33);
Vector128<int> sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero);
Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
}
dst += dstStride;
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void ConvolveVert(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] yFilters,
int y0Q4,
int yStepQ4,
int w,
int h)
{
if (Avx2.IsSupported && UseIntrinsics && yStepQ4 == 1 << SubpelBits)
{
ConvolveVertAvx2(src, srcStride, dst, dstStride, yFilters, y0Q4, w, h);
return;
}
int x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
for (x = 0; x < w; ++x)
{
int yQ4 = y0Q4;
for (y = 0; y < h; ++y)
{
byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcY[k * srcStride] * yFilter[k];
}
dst[y * dstStride] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits));
yQ4 += yStepQ4;
}
++src;
++dst;
}
}
private static unsafe void ConvolveAvgVert(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] yFilters,
int y0Q4,
int yStepQ4,
int w,
int h)
{
int x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
for (x = 0; x < w; ++x)
{
int yQ4 = y0Q4;
for (y = 0; y < h; ++y)
{
byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcY[k * srcStride] * yFilter[k];
}
dst[y * dstStride] = (byte)BitUtils.RoundPowerOfTwo(
dst[y * dstStride] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
yQ4 += yStepQ4;
}
++src;
++dst;
}
}
public static unsafe void Convolve8Horiz(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
ConvolveHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h);
}
public static unsafe void Convolve8AvgHoriz(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
ConvolveAvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h);
}
public static unsafe void Convolve8Vert(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
ConvolveVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
}
public static unsafe void Convolve8AvgVert(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
ConvolveAvgVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
}
[SkipLocalsInit]
public static unsafe void Convolve8(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
// 2d filtering proceeds in 2 steps:
// (1) Interpolate horizontally into an intermediate buffer, temp.
// (2) Interpolate temp vertically to derive the sub-pixel result.
// Deriving the maximum number of rows in the temp buffer (135):
// --Smallest scaling factor is x1/2 ==> yStepQ4 = 32 (Normative).
// --Largest block size is 64x64 pixels.
// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
// original frame (in 1/16th pixel units).
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SubpelTaps rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
// When calling in frame scaling function, the smallest scaling factor is x1/4
// ==> yStepQ4 = 64. Since w and h are at most 16, the temp buffer is still
// big enough.
byte* temp = stackalloc byte[64 * 135];
int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
Debug.Assert(w <= 64);
Debug.Assert(h <= 64);
Debug.Assert(yStepQ4 <= 32 || (yStepQ4 <= 64 && h <= 32));
Debug.Assert(xStepQ4 <= 64);
ConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight);
ConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
}
public static unsafe void Convolve8Avg(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
// Fixed size intermediate buffer places limits on parameters.
byte* temp = stackalloc byte[64 * 64];
Debug.Assert(w <= 64);
Debug.Assert(h <= 64);
Convolve8(src, srcStride, temp, 64, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
ConvolveAvg(temp, 64, dst, dstStride, null, 0, 0, 0, 0, w, h);
}
public static unsafe void ConvolveCopy(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
int r;
for (r = h; r > 0; --r)
{
MemoryUtil.Copy(dst, src, w);
src += srcStride;
dst += dstStride;
}
}
public static unsafe void ConvolveAvg(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
int x, y;
for (y = 0; y < h; ++y)
{
for (x = 0; x < w; ++x)
{
dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1);
}
src += srcStride;
dst += dstStride;
}
}
public static unsafe void ScaledHoriz(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
Convolve8Horiz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
}
public static unsafe void ScaledVert(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
Convolve8Vert(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
}
public static unsafe void Scaled2D(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
Convolve8(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
}
public static unsafe void ScaledAvgHoriz(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
Convolve8AvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
}
public static unsafe void ScaledAvgVert(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
Convolve8AvgVert(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
}
public static unsafe void ScaledAvg2D(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h)
{
Convolve8Avg(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
}
private static unsafe void HighbdConvolveHoriz(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] xFilters,
int x0Q4,
int xStepQ4,
int w,
int h,
int bd)
{
int x, y;
src -= SubpelTaps / 2 - 1;
for (y = 0; y < h; ++y)
{
int xQ4 = x0Q4;
for (x = 0; x < w; ++x)
{
ushort* srcX = &src[xQ4 >> SubpelBits];
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcX[k] * xFilter[k];
}
dst[x] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd);
xQ4 += xStepQ4;
}
src += srcStride;
dst += dstStride;
}
}
private static unsafe void HighbdConvolveAvgHoriz(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] xFilters,
int x0Q4,
int xStepQ4,
int w,
int h,
int bd)
{
int x, y;
src -= SubpelTaps / 2 - 1;
for (y = 0; y < h; ++y)
{
int xQ4 = x0Q4;
for (x = 0; x < w; ++x)
{
ushort* srcX = &src[xQ4 >> SubpelBits];
ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcX[k] * xFilter[k];
}
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
xQ4 += xStepQ4;
}
src += srcStride;
dst += dstStride;
}
}
private static unsafe void HighbdConvolveVert(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] yFilters,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
int x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
for (x = 0; x < w; ++x)
{
int yQ4 = y0Q4;
for (y = 0; y < h; ++y)
{
ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcY[k * srcStride] * yFilter[k];
}
dst[y * dstStride] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd);
yQ4 += yStepQ4;
}
++src;
++dst;
}
}
private static unsafe void HighConvolveAvgVert(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] yFilters,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
int x, y;
src -= srcStride * (SubpelTaps / 2 - 1);
for (x = 0; x < w; ++x)
{
int yQ4 = y0Q4;
for (y = 0; y < h; ++y)
{
ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
int k, sum = 0;
for (k = 0; k < SubpelTaps; ++k)
{
sum += srcY[k * srcStride] * yFilter[k];
}
dst[y * dstStride] = (ushort)BitUtils.RoundPowerOfTwo(
dst[y * dstStride] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
yQ4 += yStepQ4;
}
++src;
++dst;
}
}
private static unsafe void HighbdConvolve(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
// Note: Fixed size intermediate buffer, temp, places limits on parameters.
// 2d filtering proceeds in 2 steps:
// (1) Interpolate horizontally into an intermediate buffer, temp.
// (2) Interpolate temp vertically to derive the sub-pixel result.
// Deriving the maximum number of rows in the temp buffer (135):
// --Smallest scaling factor is x1/2 ==> yStepQ4 = 32 (Normative).
// --Largest block size is 64x64 pixels.
// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
// original frame (in 1/16th pixel units).
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SubpelTaps rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
ushort* temp = stackalloc ushort[64 * 135];
int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
Debug.Assert(w <= 64);
Debug.Assert(h <= 64);
Debug.Assert(yStepQ4 <= 32);
Debug.Assert(xStepQ4 <= 32);
HighbdConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight, bd);
HighbdConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
}
public static unsafe void HighbdConvolve8Horiz(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
HighbdConvolveHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h, bd);
}
public static unsafe void HighbdConvolve8AvgHoriz(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
HighbdConvolveAvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h, bd);
}
public static unsafe void HighbdConvolve8Vert(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
HighbdConvolveVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
}
public static unsafe void HighbdConvolve8AvgVert(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
HighConvolveAvgVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
}
public static unsafe void HighbdConvolve8(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
HighbdConvolve(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h, bd);
}
public static unsafe void HighbdConvolve8Avg(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
// Fixed size intermediate buffer places limits on parameters.
ushort* temp = stackalloc ushort[64 * 64];
Debug.Assert(w <= 64);
Debug.Assert(h <= 64);
HighbdConvolve8(src, srcStride, temp, 64, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h, bd);
HighbdConvolveAvg(temp, 64, dst, dstStride, null, 0, 0, 0, 0, w, h, bd);
}
public static unsafe void HighbdConvolveCopy(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
int r;
for (r = h; r > 0; --r)
{
MemoryUtil.Copy(dst, src, w);
src += srcStride;
dst += dstStride;
}
}
public static unsafe void HighbdConvolveAvg(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd)
{
int x, y;
for (y = 0; y < h; ++y)
{
for (x = 0; x < w; ++x)
{
dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1);
}
src += srcStride;
dst += dstStride;
}
}
}
}

View file

@ -0,0 +1,12 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal static class Filter
{
public const int FilterBits = 7;
public const int SubpelBits = 4;
public const int SubpelMask = (1 << SubpelBits) - 1;
public const int SubpelShifts = 1 << SubpelBits;
public const int SubpelTaps = 8;
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,73 @@
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using System;
using System.Diagnostics;
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal static class Prob
{
public const int MaxProb = 255;
private static byte GetProb(uint num, uint den)
{
Debug.Assert(den != 0);
{
int p = (int)(((ulong)num * 256 + (den >> 1)) / den);
// (p > 255) ? 255 : (p < 1) ? 1 : p;
int clippedProb = p | ((255 - p) >> 23) | (p == 0 ? 1 : 0);
return (byte)clippedProb;
}
}
/* This function assumes prob1 and prob2 are already within [1,255] range. */
public static byte WeightedProb(int prob1, int prob2, int factor)
{
return (byte)BitUtils.RoundPowerOfTwo(prob1 * (256 - factor) + prob2 * factor, 8);
}
// MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT;
private static readonly uint[] CountToUpdateFactor = new uint[]
{
0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64,
70, 76, 83, 89, 96, 102, 108, 115, 121, 128
};
private const int ModeMvCountSat = 20;
public static byte ModeMvMergeProbs(byte preProb, uint ct0, uint ct1)
{
uint den = ct0 + ct1;
if (den == 0)
{
return preProb;
}
else
{
uint count = Math.Min(den, ModeMvCountSat);
uint factor = CountToUpdateFactor[(int)count];
byte prob = GetProb(ct0, den);
return WeightedProb(preProb, prob, (int)factor);
}
}
private static uint TreeMergeProbsImpl(
uint i,
sbyte[] tree,
ReadOnlySpan<byte> preProbs,
ReadOnlySpan<uint> counts,
Span<byte> probs)
{
int l = tree[i];
uint leftCount = (l <= 0) ? counts[-l] : TreeMergeProbsImpl((uint)l, tree, preProbs, counts, probs);
int r = tree[i + 1];
uint rightCount = (r <= 0) ? counts[-r] : TreeMergeProbsImpl((uint)r, tree, preProbs, counts, probs);
probs[(int)(i >> 1)] = ModeMvMergeProbs(preProbs[(int)(i >> 1)], leftCount, rightCount);
return leftCount + rightCount;
}
public static void TreeMergeProbs(sbyte[] tree, ReadOnlySpan<byte> preProbs, ReadOnlySpan<uint> counts, Span<byte> probs)
{
TreeMergeProbsImpl(0, tree, preProbs, counts, probs);
}
}
}

View file

@ -0,0 +1,237 @@
using Ryujinx.Common.Memory;
using System;
using System.Buffers.Binary;
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal struct Reader
{
private static readonly byte[] Norm = new byte[]
{
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
private const int BdValueSize = sizeof(ulong) * 8;
// This is meant to be a large, positive constant that can still be efficiently
// loaded as an immediate (on platforms like ARM, for example).
// Even relatively modest values like 100 would work fine.
private const int LotsOfBits = 0x40000000;
public ulong Value;
public uint Range;
public int Count;
private ArrayPtr<byte> _buffer;
public bool Init(ArrayPtr<byte> buffer, int size)
{
if (size != 0 && buffer.IsNull)
{
return true;
}
else
{
_buffer = new ArrayPtr<byte>(ref buffer[0], size);
Value = 0;
Count = -8;
Range = 255;
Fill();
return ReadBit() != 0; // Marker bit
}
}
private void Fill()
{
ReadOnlySpan<byte> buffer = _buffer.AsSpan();
ReadOnlySpan<byte> bufferStart = buffer;
ulong value = Value;
int count = Count;
ulong bytesLeft = (ulong)buffer.Length;
ulong bitsLeft = bytesLeft * 8;
int shift = BdValueSize - 8 - (count + 8);
if (bitsLeft > BdValueSize)
{
int bits = (shift & unchecked((int)0xfffffff8)) + 8;
ulong nv;
ulong bigEndianValues = BinaryPrimitives.ReadUInt64BigEndian(buffer);
nv = bigEndianValues >> (BdValueSize - bits);
count += bits;
buffer = buffer.Slice(bits >> 3);
value = Value | (nv << (shift & 0x7));
}
else
{
int bitsOver = shift + 8 - (int)bitsLeft;
int loopEnd = 0;
if (bitsOver >= 0)
{
count += LotsOfBits;
loopEnd = bitsOver;
}
if (bitsOver < 0 || bitsLeft != 0)
{
while (shift >= loopEnd)
{
count += 8;
value |= (ulong)buffer[0] << shift;
buffer = buffer.Slice(1);
shift -= 8;
}
}
}
// NOTE: Variable 'buffer' may not relate to '_buffer' after decryption,
// so we increase '_buffer' by the amount that 'buffer' moved, rather than
// assign 'buffer' to '_buffer'.
_buffer = _buffer.Slice(bufferStart.Length - buffer.Length);
Value = value;
Count = count;
}
public bool HasError()
{
// Check if we have reached the end of the buffer.
//
// Variable 'count' stores the number of bits in the 'value' buffer, minus
// 8. The top byte is part of the algorithm, and the remainder is buffered
// to be shifted into it. So if count == 8, the top 16 bits of 'value' are
// occupied, 8 for the algorithm and 8 in the buffer.
//
// When reading a byte from the user's buffer, count is filled with 8 and
// one byte is filled into the value buffer. When we reach the end of the
// data, count is additionally filled with LotsOfBits. So when
// count == LotsOfBits - 1, the user's data has been exhausted.
//
// 1 if we have tried to decode bits after the end of stream was encountered.
// 0 No error.
return Count > BdValueSize && Count < LotsOfBits;
}
public int Read(int prob)
{
uint bit = 0;
ulong value;
ulong bigsplit;
int count;
uint range;
uint split = (Range * (uint)prob + (256 - (uint)prob)) >> 8;
if (Count < 0)
{
Fill();
}
value = Value;
count = Count;
bigsplit = (ulong)split << (BdValueSize - 8);
range = split;
if (value >= bigsplit)
{
range = Range - split;
value -= bigsplit;
bit = 1;
}
{
int shift = Norm[range];
range <<= shift;
value <<= shift;
count -= shift;
}
Value = value;
Count = count;
Range = range;
return (int)bit;
}
public int ReadBit()
{
return Read(128); // vpx_prob_half
}
public int ReadLiteral(int bits)
{
int literal = 0, bit;
for (bit = bits - 1; bit >= 0; bit--)
{
literal |= ReadBit() << bit;
}
return literal;
}
public int ReadTree(ReadOnlySpan<sbyte> tree, ReadOnlySpan<byte> probs)
{
sbyte i = 0;
while ((i = tree[i + Read(probs[i >> 1])]) > 0)
{
continue;
}
return -i;
}
public int ReadBool(int prob, ref ulong value, ref int count, ref uint range)
{
uint split = (range * (uint)prob + (256 - (uint)prob)) >> 8;
ulong bigsplit = (ulong)split << (BdValueSize - 8);
if (count < 0)
{
Value = value;
Count = count;
Fill();
value = Value;
count = Count;
}
if (value >= bigsplit)
{
range = range - split;
value = value - bigsplit;
{
int shift = Norm[range];
range <<= shift;
value <<= shift;
count -= shift;
}
return 1;
}
range = split;
{
int shift = Norm[range];
range <<= shift;
value <<= shift;
count -= shift;
}
return 0;
}
public ArrayPtr<byte> FindEnd()
{
// Find the end of the coded buffer
while (Count > 8 && Count < BdValueSize)
{
Count -= 8;
_buffer = _buffer.Slice(-1);
}
return _buffer;
}
}
}

View file

@ -0,0 +1,54 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
{
internal static class TxfmCommon
{
// Constants used by all idct/dct functions
public const int DctConstBits = 14;
public const int DctConstRounding = 1 << (DctConstBits - 1);
public const int UnitQuantShift = 2;
public const int UnitQuantFactor = 1 << UnitQuantShift;
// Constants:
// for (int i = 1; i < 32; ++i)
// Console.WriteLine("public const short CosPi{0}_64 = {1};", i, MathF.Round(16384 * MathF.Cos(i * MathF.PI / 64)));
// Note: sin(k * Pi / 64) = cos((32 - k) * Pi / 64)
public const short CosPi1_64 = 16364;
public const short CosPi2_64 = 16305;
public const short CosPi3_64 = 16207;
public const short CosPi4_64 = 16069;
public const short CosPi5_64 = 15893;
public const short CosPi6_64 = 15679;
public const short CosPi7_64 = 15426;
public const short CosPi8_64 = 15137;
public const short CosPi9_64 = 14811;
public const short CosPi10_64 = 14449;
public const short CosPi11_64 = 14053;
public const short CosPi12_64 = 13623;
public const short CosPi13_64 = 13160;
public const short CosPi14_64 = 12665;
public const short CosPi15_64 = 12140;
public const short CosPi16_64 = 11585;
public const short CosPi17_64 = 11003;
public const short CosPi18_64 = 10394;
public const short CosPi19_64 = 9760;
public const short CosPi20_64 = 9102;
public const short CosPi21_64 = 8423;
public const short CosPi22_64 = 7723;
public const short CosPi23_64 = 7005;
public const short CosPi24_64 = 6270;
public const short CosPi25_64 = 5520;
public const short CosPi26_64 = 4756;
public const short CosPi27_64 = 3981;
public const short CosPi28_64 = 3196;
public const short CosPi29_64 = 2404;
public const short CosPi30_64 = 1606;
public const short CosPi31_64 = 804;
// 16384 * sqrt(2) * sin(kPi / 9) * 2 / 3
public const short SinPi1_9 = 5283;
public const short SinPi2_9 = 9929;
public const short SinPi3_9 = 13377;
public const short SinPi4_9 = 15212;
}
}

View file

@ -0,0 +1,536 @@
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using System;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class Idct
{
private delegate void Transform1D(ReadOnlySpan<int> input, Span<int> output);
private delegate void HighbdTransform1D(ReadOnlySpan<int> input, Span<int> output, int bd);
private struct Transform2D
{
public Transform1D Cols, Rows; // Vertical and horizontal
public Transform2D(Transform1D cols, Transform1D rows)
{
Cols = cols;
Rows = rows;
}
}
private struct HighbdTransform2D
{
public HighbdTransform1D Cols, Rows; // Vertical and horizontal
public HighbdTransform2D(HighbdTransform1D cols, HighbdTransform1D rows)
{
Cols = cols;
Rows = rows;
}
}
private static readonly Transform2D[] Iht4 = new Transform2D[]
{
new Transform2D(Idct4, Idct4), // DCT_DCT = 0
new Transform2D(Iadst4, Idct4), // ADST_DCT = 1
new Transform2D(Idct4, Iadst4), // DCT_ADST = 2
new Transform2D(Iadst4, Iadst4) // ADST_ADST = 3
};
public static void Iht4x416Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
{
int i, j;
Span<int> output = stackalloc int[4 * 4];
Span<int> outptr = output;
Span<int> tempIn = stackalloc int[4];
Span<int> tempOut = stackalloc int[4];
// Inverse transform row vectors
for (i = 0; i < 4; ++i)
{
Iht4[txType].Rows(input, outptr);
input = input.Slice(4);
outptr = outptr.Slice(4);
}
// Inverse transform column vectors
for (i = 0; i < 4; ++i)
{
for (j = 0; j < 4; ++j)
{
tempIn[j] = output[j * 4 + i];
}
Iht4[txType].Cols(tempIn, tempOut);
for (j = 0; j < 4; ++j)
{
dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4));
}
}
}
private static readonly Transform2D[] Iht8 = new Transform2D[]
{
new Transform2D(Idct8, Idct8), // DCT_DCT = 0
new Transform2D(Iadst8, Idct8), // ADST_DCT = 1
new Transform2D(Idct8, Iadst8), // DCT_ADST = 2
new Transform2D(Iadst8, Iadst8) // ADST_ADST = 3
};
public static void Iht8x864Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
{
int i, j;
Span<int> output = stackalloc int[8 * 8];
Span<int> outptr = output;
Span<int> tempIn = stackalloc int[8];
Span<int> tempOut = stackalloc int[8];
Transform2D ht = Iht8[txType];
// Inverse transform row vectors
for (i = 0; i < 8; ++i)
{
ht.Rows(input, outptr);
input = input.Slice(8);
outptr = outptr.Slice(8);
}
// Inverse transform column vectors
for (i = 0; i < 8; ++i)
{
for (j = 0; j < 8; ++j)
{
tempIn[j] = output[j * 8 + i];
}
ht.Cols(tempIn, tempOut);
for (j = 0; j < 8; ++j)
{
dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5));
}
}
}
private static readonly Transform2D[] Iht16 = new Transform2D[]
{
new Transform2D(Idct16, Idct16), // DCT_DCT = 0
new Transform2D(Iadst16, Idct16), // ADST_DCT = 1
new Transform2D(Idct16, Iadst16), // DCT_ADST = 2
new Transform2D(Iadst16, Iadst16) // ADST_ADST = 3
};
public static void Iht16x16256Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
{
int i, j;
Span<int> output = stackalloc int[16 * 16];
Span<int> outptr = output;
Span<int> tempIn = stackalloc int[16];
Span<int> tempOut = stackalloc int[16];
Transform2D ht = Iht16[txType];
// Rows
for (i = 0; i < 16; ++i)
{
ht.Rows(input, outptr);
input = input.Slice(16);
outptr = outptr.Slice(16);
}
// Columns
for (i = 0; i < 16; ++i)
{
for (j = 0; j < 16; ++j)
{
tempIn[j] = output[j * 16 + i];
}
ht.Cols(tempIn, tempOut);
for (j = 0; j < 16; ++j)
{
dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6));
}
}
}
// Idct
public static void Idct4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
{
if (eob > 1)
{
Idct4x416Add(input, dest, stride);
}
else
{
Idct4x41Add(input, dest, stride);
}
}
public static void Iwht4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
{
if (eob > 1)
{
Iwht4x416Add(input, dest, stride);
}
else
{
Iwht4x41Add(input, dest, stride);
}
}
public static void Idct8x8Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
{
// If dc is 1, then input[0] is the reconstructed value, do not need
// dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
// The calculation can be simplified if there are not many non-zero dct
// coefficients. Use eobs to decide what to do.
if (eob == 1)
{
// DC only DCT coefficient
Idct8x81Add(input, dest, stride);
}
else if (eob <= 12)
{
Idct8x812Add(input, dest, stride);
}
else
{
Idct8x864Add(input, dest, stride);
}
}
public static void Idct16x16Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
{
/* The calculation can be simplified if there are not many non-zero dct
* coefficients. Use eobs to separate different cases. */
if (eob == 1) /* DC only DCT coefficient. */
{
Idct16x161Add(input, dest, stride);
}
else if (eob <= 10)
{
Idct16x1610Add(input, dest, stride);
}
else if (eob <= 38)
{
Idct16x1638Add(input, dest, stride);
}
else
{
Idct16x16256Add(input, dest, stride);
}
}
public static void Idct32x32Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
{
if (eob == 1)
{
Idct32x321Add(input, dest, stride);
}
else if (eob <= 34)
{
// Non-zero coeff only in upper-left 8x8
Idct32x3234Add(input, dest, stride);
}
else if (eob <= 135)
{
// Non-zero coeff only in upper-left 16x16
Idct32x32135Add(input, dest, stride);
}
else
{
Idct32x321024Add(input, dest, stride);
}
}
// Iht
public static void Iht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
{
if (txType == TxType.DctDct)
{
Idct4x4Add(input, dest, stride, eob);
}
else
{
Iht4x416Add(input, dest, stride, (int)txType);
}
}
public static void Iht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
{
if (txType == TxType.DctDct)
{
Idct8x8Add(input, dest, stride, eob);
}
else
{
Iht8x864Add(input, dest, stride, (int)txType);
}
}
public static void Iht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest,
int stride, int eob)
{
if (txType == TxType.DctDct)
{
Idct16x16Add(input, dest, stride, eob);
}
else
{
Iht16x16256Add(input, dest, stride, (int)txType);
}
}
private static readonly HighbdTransform2D[] HighbdIht4 = new HighbdTransform2D[]
{
new HighbdTransform2D(HighbdIdct4, HighbdIdct4), // DCT_DCT = 0
new HighbdTransform2D(HighbdIadst4, HighbdIdct4), // ADST_DCT = 1
new HighbdTransform2D(HighbdIdct4, HighbdIadst4), // DCT_ADST = 2
new HighbdTransform2D(HighbdIadst4, HighbdIadst4) // ADST_ADST = 3
};
public static void HighbdIht4x416Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
{
int i, j;
Span<int> output = stackalloc int[4 * 4];
Span<int> outptr = output;
Span<int> tempIn = stackalloc int[4];
Span<int> tempOut = stackalloc int[4];
// Inverse transform row vectors.
for (i = 0; i < 4; ++i)
{
HighbdIht4[txType].Rows(input, outptr, bd);
input = input.Slice(4);
outptr = outptr.Slice(4);
}
// Inverse transform column vectors.
for (i = 0; i < 4; ++i)
{
for (j = 0; j < 4; ++j)
{
tempIn[j] = output[j * 4 + i];
}
HighbdIht4[txType].Cols(tempIn, tempOut, bd);
for (j = 0; j < 4; ++j)
{
dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4), bd);
}
}
}
private static readonly HighbdTransform2D[] HighIht8 = new HighbdTransform2D[]
{
new HighbdTransform2D(HighbdIdct8, HighbdIdct8), // DCT_DCT = 0
new HighbdTransform2D(HighbdIadst8, HighbdIdct8), // ADST_DCT = 1
new HighbdTransform2D(HighbdIdct8, HighbdIadst8), // DCT_ADST = 2
new HighbdTransform2D(HighbdIadst8, HighbdIadst8) // ADST_ADST = 3
};
public static void HighbdIht8x864Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
{
int i, j;
Span<int> output = stackalloc int[8 * 8];
Span<int> outptr = output;
Span<int> tempIn = stackalloc int[8];
Span<int> tempOut = stackalloc int[8];
HighbdTransform2D ht = HighIht8[txType];
// Inverse transform row vectors.
for (i = 0; i < 8; ++i)
{
ht.Rows(input, outptr, bd);
input = input.Slice(8);
outptr = output.Slice(8);
}
// Inverse transform column vectors.
for (i = 0; i < 8; ++i)
{
for (j = 0; j < 8; ++j)
{
tempIn[j] = output[j * 8 + i];
}
ht.Cols(tempIn, tempOut, bd);
for (j = 0; j < 8; ++j)
{
dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5), bd);
}
}
}
private static readonly HighbdTransform2D[] HighIht16 = new HighbdTransform2D[]
{
new HighbdTransform2D(HighbdIdct16, HighbdIdct16), // DCT_DCT = 0
new HighbdTransform2D(HighbdIadst16, HighbdIdct16), // ADST_DCT = 1
new HighbdTransform2D(HighbdIdct16, HighbdIadst16), // DCT_ADST = 2
new HighbdTransform2D(HighbdIadst16, HighbdIadst16) // ADST_ADST = 3
};
public static void HighbdIht16x16256Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
{
int i, j;
Span<int> output = stackalloc int[16 * 16];
Span<int> outptr = output;
Span<int> tempIn = stackalloc int[16];
Span<int> tempOut = stackalloc int[16];
HighbdTransform2D ht = HighIht16[txType];
// Rows
for (i = 0; i < 16; ++i)
{
ht.Rows(input, outptr, bd);
input = input.Slice(16);
outptr = output.Slice(16);
}
// Columns
for (i = 0; i < 16; ++i)
{
for (j = 0; j < 16; ++j)
{
tempIn[j] = output[j * 16 + i];
}
ht.Cols(tempIn, tempOut, bd);
for (j = 0; j < 16; ++j)
{
dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd);
}
}
}
// Idct
public static void HighbdIdct4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
if (eob > 1)
{
HighbdIdct4x416Add(input, dest, stride, bd);
}
else
{
HighbdIdct4x41Add(input, dest, stride, bd);
}
}
public static void HighbdIwht4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
if (eob > 1)
{
HighbdIwht4x416Add(input, dest, stride, bd);
}
else
{
HighbdIwht4x41Add(input, dest, stride, bd);
}
}
public static void HighbdIdct8x8Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
// If dc is 1, then input[0] is the reconstructed value, do not need
// dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
// The calculation can be simplified if there are not many non-zero dct
// coefficients. Use eobs to decide what to do.
// DC only DCT coefficient
if (eob == 1)
{
vpx_Highbdidct8x8_1_add_c(input, dest, stride, bd);
}
else if (eob <= 12)
{
HighbdIdct8x812Add(input, dest, stride, bd);
}
else
{
HighbdIdct8x864Add(input, dest, stride, bd);
}
}
public static void HighbdIdct16x16Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
// The calculation can be simplified if there are not many non-zero dct
// coefficients. Use eobs to separate different cases.
// DC only DCT coefficient.
if (eob == 1)
{
HighbdIdct16x161Add(input, dest, stride, bd);
}
else if (eob <= 10)
{
HighbdIdct16x1610Add(input, dest, stride, bd);
}
else if (eob <= 38)
{
HighbdIdct16x1638Add(input, dest, stride, bd);
}
else
{
HighbdIdct16x16256Add(input, dest, stride, bd);
}
}
public static void HighbdIdct32x32Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
// Non-zero coeff only in upper-left 8x8
if (eob == 1)
{
HighbdIdct32x321Add(input, dest, stride, bd);
}
else if (eob <= 34)
{
HighbdIdct32x3234Add(input, dest, stride, bd);
}
else if (eob <= 135)
{
HighbdIdct32x32135Add(input, dest, stride, bd);
}
else
{
HighbdIdct32x321024Add(input, dest, stride, bd);
}
}
// Iht
public static void HighbdIht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
if (txType == TxType.DctDct)
{
HighbdIdct4x4Add(input, dest, stride, eob, bd);
}
else
{
HighbdIht4x416Add(input, dest, stride, (int)txType, bd);
}
}
public static void HighbdIht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
if (txType == TxType.DctDct)
{
HighbdIdct8x8Add(input, dest, stride, eob, bd);
}
else
{
HighbdIht8x864Add(input, dest, stride, (int)txType, bd);
}
}
public static void HighbdIht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
{
if (txType == TxType.DctDct)
{
HighbdIdct16x16Add(input, dest, stride, eob, bd);
}
else
{
HighbdIht16x16256Add(input, dest, stride, (int)txType, bd);
}
}
}
}

View file

@ -0,0 +1,15 @@
using System;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
class InternalErrorException : Exception
{
public InternalErrorException(string message) : base(message)
{
}
public InternalErrorException(string message, Exception innerException) : base(message, innerException)
{
}
}
}

View file

@ -0,0 +1,14 @@
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal struct InternalErrorInfo
{
public CodecErr ErrorCode;
public void InternalError(CodecErr error, string message)
{
ErrorCode = error;
throw new InternalErrorException(message);
}
}
}

View file

@ -0,0 +1,418 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class LoopFilter
{
public const int MaxLoopFilter = 63;
public const int MaxRefLfDeltas = 4;
public const int MaxModeLfDeltas = 2;
// 64 bit masks for left transform size. Each 1 represents a position where
// we should apply a loop filter across the left border of an 8x8 block
// boundary.
//
// In the case of TX_16X16 -> ( in low order byte first we end up with
// a mask that looks like this
//
// 10101010
// 10101010
// 10101010
// 10101010
// 10101010
// 10101010
// 10101010
// 10101010
//
// A loopfilter should be applied to every other 8x8 horizontally.
private static readonly ulong[] Left64X64TxformMask = new ulong[]
{
0xffffffffffffffffUL, // TX_4X4
0xffffffffffffffffUL, // TX_8x8
0x5555555555555555UL, // TX_16x16
0x1111111111111111UL, // TX_32x32
};
// 64 bit masks for above transform size. Each 1 represents a position where
// we should apply a loop filter across the top border of an 8x8 block
// boundary.
//
// In the case of TX_32x32 -> ( in low order byte first we end up with
// a mask that looks like this
//
// 11111111
// 00000000
// 00000000
// 00000000
// 11111111
// 00000000
// 00000000
// 00000000
//
// A loopfilter should be applied to every other 4 the row vertically.
private static readonly ulong[] Above64X64TxformMask = new ulong[]
{
0xffffffffffffffffUL, // TX_4X4
0xffffffffffffffffUL, // TX_8x8
0x00ff00ff00ff00ffUL, // TX_16x16
0x000000ff000000ffUL, // TX_32x32
};
// 64 bit masks for prediction sizes (left). Each 1 represents a position
// where left border of an 8x8 block. These are aligned to the right most
// appropriate bit, and then shifted into place.
//
// In the case of TX_16x32 -> ( low order byte first ) we end up with
// a mask that looks like this :
//
// 10000000
// 10000000
// 10000000
// 10000000
// 00000000
// 00000000
// 00000000
// 00000000
private static readonly ulong[] LeftPredictionMask = new ulong[]
{
0x0000000000000001UL, // BLOCK_4X4,
0x0000000000000001UL, // BLOCK_4X8,
0x0000000000000001UL, // BLOCK_8X4,
0x0000000000000001UL, // BLOCK_8X8,
0x0000000000000101UL, // BLOCK_8X16,
0x0000000000000001UL, // BLOCK_16X8,
0x0000000000000101UL, // BLOCK_16X16,
0x0000000001010101UL, // BLOCK_16X32,
0x0000000000000101UL, // BLOCK_32X16,
0x0000000001010101UL, // BLOCK_32X32,
0x0101010101010101UL, // BLOCK_32X64,
0x0000000001010101UL, // BLOCK_64X32,
0x0101010101010101UL, // BLOCK_64X64
};
// 64 bit mask to shift and set for each prediction size.
private static readonly ulong[] AbovePredictionMask = new ulong[]
{
0x0000000000000001UL, // BLOCK_4X4
0x0000000000000001UL, // BLOCK_4X8
0x0000000000000001UL, // BLOCK_8X4
0x0000000000000001UL, // BLOCK_8X8
0x0000000000000001UL, // BLOCK_8X16,
0x0000000000000003UL, // BLOCK_16X8
0x0000000000000003UL, // BLOCK_16X16
0x0000000000000003UL, // BLOCK_16X32,
0x000000000000000fUL, // BLOCK_32X16,
0x000000000000000fUL, // BLOCK_32X32,
0x000000000000000fUL, // BLOCK_32X64,
0x00000000000000ffUL, // BLOCK_64X32,
0x00000000000000ffUL, // BLOCK_64X64
};
// 64 bit mask to shift and set for each prediction size. A bit is set for
// each 8x8 block that would be in the left most block of the given block
// size in the 64x64 block.
private static readonly ulong[] SizeMask = new ulong[]
{
0x0000000000000001UL, // BLOCK_4X4
0x0000000000000001UL, // BLOCK_4X8
0x0000000000000001UL, // BLOCK_8X4
0x0000000000000001UL, // BLOCK_8X8
0x0000000000000101UL, // BLOCK_8X16,
0x0000000000000003UL, // BLOCK_16X8
0x0000000000000303UL, // BLOCK_16X16
0x0000000003030303UL, // BLOCK_16X32,
0x0000000000000f0fUL, // BLOCK_32X16,
0x000000000f0f0f0fUL, // BLOCK_32X32,
0x0f0f0f0f0f0f0f0fUL, // BLOCK_32X64,
0x00000000ffffffffUL, // BLOCK_64X32,
0xffffffffffffffffUL, // BLOCK_64X64
};
// These are used for masking the left and above borders.
private const ulong LeftBorder = 0x1111111111111111UL;
private const ulong AboveBorder = 0x000000ff000000ffUL;
// 16 bit masks for uv transform sizes.
private static readonly ushort[] Left64X64TxformMaskUv = new ushort[]
{
0xffff, // TX_4X4
0xffff, // TX_8x8
0x5555, // TX_16x16
0x1111, // TX_32x32
};
private static readonly ushort[] Above64X64TxformMaskUv = new ushort[]
{
0xffff, // TX_4X4
0xffff, // TX_8x8
0x0f0f, // TX_16x16
0x000f, // TX_32x32
};
// 16 bit left mask to shift and set for each uv prediction size.
private static readonly ushort[] LeftPredictionMaskUv = new ushort[]
{
0x0001, // BLOCK_4X4,
0x0001, // BLOCK_4X8,
0x0001, // BLOCK_8X4,
0x0001, // BLOCK_8X8,
0x0001, // BLOCK_8X16,
0x0001, // BLOCK_16X8,
0x0001, // BLOCK_16X16,
0x0011, // BLOCK_16X32,
0x0001, // BLOCK_32X16,
0x0011, // BLOCK_32X32,
0x1111, // BLOCK_32X64
0x0011, // BLOCK_64X32,
0x1111, // BLOCK_64X64
};
// 16 bit above mask to shift and set for uv each prediction size.
private static readonly ushort[] AbovePredictionMaskUv = new ushort[]
{
0x0001, // BLOCK_4X4
0x0001, // BLOCK_4X8
0x0001, // BLOCK_8X4
0x0001, // BLOCK_8X8
0x0001, // BLOCK_8X16,
0x0001, // BLOCK_16X8
0x0001, // BLOCK_16X16
0x0001, // BLOCK_16X32,
0x0003, // BLOCK_32X16,
0x0003, // BLOCK_32X32,
0x0003, // BLOCK_32X64,
0x000f, // BLOCK_64X32,
0x000f, // BLOCK_64X64
};
// 64 bit mask to shift and set for each uv prediction size
private static readonly ushort[] SizeMaskUv = new ushort[]
{
0x0001, // BLOCK_4X4
0x0001, // BLOCK_4X8
0x0001, // BLOCK_8X4
0x0001, // BLOCK_8X8
0x0001, // BLOCK_8X16,
0x0001, // BLOCK_16X8
0x0001, // BLOCK_16X16
0x0011, // BLOCK_16X32,
0x0003, // BLOCK_32X16,
0x0033, // BLOCK_32X32,
0x3333, // BLOCK_32X64,
0x00ff, // BLOCK_64X32,
0xffff, // BLOCK_64X64
};
private const ushort LeftBorderUv = 0x1111;
private const ushort AboveBorderUv = 0x000f;
private static readonly int[] ModeLfLut = new int[]
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
1, 1, 0, 1 // INTER_MODES (ZEROMV == 0)
};
private static byte GetFilterLevel(ref LoopFilterInfoN lfiN, ref ModeInfo mi)
{
return lfiN.Lvl[mi.SegmentId][mi.RefFrame[0]][ModeLfLut[(int)mi.Mode]];
}
private static ref LoopFilterMask GetLfm(ref Types.LoopFilter lf, int miRow, int miCol)
{
return ref lf.Lfm[(miCol >> 3) + ((miRow >> 3) * lf.LfmStride)];
}
// 8x8 blocks in a superblock. A "1" represents the first block in a 16x16
// or greater area.
private static readonly byte[][] FirstBlockIn16x16 = new byte[][]
{
new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }
};
// This function sets up the bit masks for a block represented
// by miRow, miCol in a 64x64 region.
public static void BuildMask(ref Vp9Common cm, ref ModeInfo mi, int miRow, int miCol, int bw, int bh)
{
BlockSize blockSize = mi.SbType;
TxSize txSizeY = mi.TxSize;
ref LoopFilterInfoN lfiN = ref cm.LfInfo;
int filterLevel = GetFilterLevel(ref lfiN, ref mi);
TxSize txSizeUv = Luts.UvTxsizeLookup[(int)blockSize][(int)txSizeY][1][1];
ref LoopFilterMask lfm = ref GetLfm(ref cm.Lf, miRow, miCol);
ref ulong leftY = ref lfm.LeftY[(int)txSizeY];
ref ulong aboveY = ref lfm.AboveY[(int)txSizeY];
ref ulong int4X4Y = ref lfm.Int4x4Y;
ref ushort leftUv = ref lfm.LeftUv[(int)txSizeUv];
ref ushort aboveUv = ref lfm.AboveUv[(int)txSizeUv];
ref ushort int4X4Uv = ref lfm.Int4x4Uv;
int rowInSb = (miRow & 7);
int colInSb = (miCol & 7);
int shiftY = colInSb + (rowInSb << 3);
int shiftUv = (colInSb >> 1) + ((rowInSb >> 1) << 2);
int buildUv = FirstBlockIn16x16[rowInSb][colInSb];
if (filterLevel == 0)
{
return;
}
else
{
int index = shiftY;
int i;
for (i = 0; i < bh; i++)
{
MemoryMarshal.CreateSpan(ref lfm.LflY[index], 64 - index).Slice(0, bw).Fill((byte)filterLevel);
index += 8;
}
}
// These set 1 in the current block size for the block size edges.
// For instance if the block size is 32x16, we'll set:
// above = 1111
// 0000
// and
// left = 1000
// = 1000
// NOTE : In this example the low bit is left most ( 1000 ) is stored as
// 1, not 8...
//
// U and V set things on a 16 bit scale.
//
aboveY |= AbovePredictionMask[(int)blockSize] << shiftY;
leftY |= LeftPredictionMask[(int)blockSize] << shiftY;
if (buildUv != 0)
{
aboveUv |= (ushort)(AbovePredictionMaskUv[(int)blockSize] << shiftUv);
leftUv |= (ushort)(LeftPredictionMaskUv[(int)blockSize] << shiftUv);
}
// If the block has no coefficients and is not intra we skip applying
// the loop filter on block edges.
if (mi.Skip != 0 && mi.IsInterBlock())
{
return;
}
// Add a mask for the transform size. The transform size mask is set to
// be correct for a 64x64 prediction block size. Mask to match the size of
// the block we are working on and then shift it into place.
aboveY |= (SizeMask[(int)blockSize] & Above64X64TxformMask[(int)txSizeY]) << shiftY;
leftY |= (SizeMask[(int)blockSize] & Left64X64TxformMask[(int)txSizeY]) << shiftY;
if (buildUv != 0)
{
aboveUv |= (ushort)((SizeMaskUv[(int)blockSize] & Above64X64TxformMaskUv[(int)txSizeUv]) << shiftUv);
leftUv |= (ushort)((SizeMaskUv[(int)blockSize] & Left64X64TxformMaskUv[(int)txSizeUv]) << shiftUv);
}
// Try to determine what to do with the internal 4x4 block boundaries. These
// differ from the 4x4 boundaries on the outside edge of an 8x8 in that the
// internal ones can be skipped and don't depend on the prediction block size.
if (txSizeY == TxSize.Tx4x4)
{
int4X4Y |= SizeMask[(int)blockSize] << shiftY;
}
if (buildUv != 0 && txSizeUv == TxSize.Tx4x4)
{
int4X4Uv |= (ushort)((SizeMaskUv[(int)blockSize] & 0xffff) << shiftUv);
}
}
public static unsafe void ResetLfm(ref Vp9Common cm)
{
if (cm.Lf.FilterLevel != 0)
{
MemoryUtil.Fill(cm.Lf.Lfm.ToPointer(), new LoopFilterMask(), ((cm.MiRows + (Constants.MiBlockSize - 1)) >> 3) * cm.Lf.LfmStride);
}
}
private static void UpdateSharpness(ref LoopFilterInfoN lfi, int sharpnessLvl)
{
int lvl;
// For each possible value for the loop filter fill out limits
for (lvl = 0; lvl <= MaxLoopFilter; lvl++)
{
// Set loop filter parameters that control sharpness.
int blockInsideLimit = lvl >> ((sharpnessLvl > 0 ? 1 : 0) + (sharpnessLvl > 4 ? 1 : 0));
if (sharpnessLvl > 0)
{
if (blockInsideLimit > (9 - sharpnessLvl))
{
blockInsideLimit = (9 - sharpnessLvl);
}
}
if (blockInsideLimit < 1)
{
blockInsideLimit = 1;
}
lfi.Lfthr[lvl].Lim.AsSpan().Fill((byte)blockInsideLimit);
lfi.Lfthr[lvl].Mblim.AsSpan().Fill((byte)(2 * (lvl + 2) + blockInsideLimit));
}
}
public static void LoopFilterFrameInit(ref Vp9Common cm, int defaultFiltLvl)
{
int segId;
// nShift is the multiplier for lfDeltas
// the multiplier is 1 for when filterLvl is between 0 and 31;
// 2 when filterLvl is between 32 and 63
int scale = 1 << (defaultFiltLvl >> 5);
ref LoopFilterInfoN lfi = ref cm.LfInfo;
ref Types.LoopFilter lf = ref cm.Lf;
ref Segmentation seg = ref cm.Seg;
// Update limits if sharpness has changed
if (lf.LastSharpnessLevel != lf.SharpnessLevel)
{
UpdateSharpness(ref lfi, lf.SharpnessLevel);
lf.LastSharpnessLevel = lf.SharpnessLevel;
}
for (segId = 0; segId < Constants.MaxSegments; segId++)
{
int lvlSeg = defaultFiltLvl;
if (seg.IsSegFeatureActive(segId, SegLvlFeatures.SegLvlAltLf) != 0)
{
int data = seg.GetSegData(segId, SegLvlFeatures.SegLvlAltLf);
lvlSeg = Math.Clamp(seg.AbsDelta == Constants.SegmentAbsData ? data : defaultFiltLvl + data, 0, MaxLoopFilter);
}
if (!lf.ModeRefDeltaEnabled)
{
// We could get rid of this if we assume that deltas are set to
// zero when not in use; encoder always uses deltas
MemoryMarshal.Cast<Array2<byte>, byte>(lfi.Lvl[segId].AsSpan()).Fill((byte)lvlSeg);
}
else
{
int refr, mode;
int intraLvl = lvlSeg + lf.RefDeltas[Constants.IntraFrame] * scale;
lfi.Lvl[segId][Constants.IntraFrame][0] = (byte)Math.Clamp(intraLvl, 0, MaxLoopFilter);
for (refr = Constants.LastFrame; refr < Constants.MaxRefFrames; ++refr)
{
for (mode = 0; mode < MaxModeLfDeltas; ++mode)
{
int interLvl = lvlSeg + lf.RefDeltas[refr] * scale + lf.ModeDeltas[mode] * scale;
lfi.Lvl[segId][refr][mode] = (byte)Math.Clamp(interLvl, 0, MaxLoopFilter);
}
}
}
}
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,389 @@
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using System.Diagnostics;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class PredCommon
{
public static int GetReferenceModeContext(ref Vp9Common cm, ref MacroBlockD xd)
{
int ctx;
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
{ // both edges available
if (!xd.AboveMi.Value.HasSecondRef() && !xd.LeftMi.Value.HasSecondRef())
{
// Neither edge uses comp pred (0/1)
ctx = (xd.AboveMi.Value.RefFrame[0] == cm.CompFixedRef ? 1 : 0) ^
(xd.LeftMi.Value.RefFrame[0] == cm.CompFixedRef ? 1 : 0);
}
else if (!xd.AboveMi.Value.HasSecondRef())
{
// One of two edges uses comp pred (2/3)
ctx = 2 + (xd.AboveMi.Value.RefFrame[0] == cm.CompFixedRef || !xd.AboveMi.Value.IsInterBlock() ? 1 : 0);
}
else if (!xd.LeftMi.Value.HasSecondRef())
{
// One of two edges uses comp pred (2/3)
ctx = 2 + (xd.LeftMi.Value.RefFrame[0] == cm.CompFixedRef || !xd.LeftMi.Value.IsInterBlock() ? 1 : 0);
}
else // Both edges use comp pred (4)
{
ctx = 4;
}
}
else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
{ // One edge available
ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
if (!edgeMi.HasSecondRef())
{
// Edge does not use comp pred (0/1)
ctx = edgeMi.RefFrame[0] == cm.CompFixedRef ? 1 : 0;
}
else
{
// Edge uses comp pred (3)
ctx = 3;
}
}
else
{ // No edges available (1)
ctx = 1;
}
Debug.Assert(ctx >= 0 && ctx < Constants.CompInterContexts);
return ctx;
}
// Returns a context number for the given MB prediction signal
public static int GetPredContextCompRefP(ref Vp9Common cm, ref MacroBlockD xd)
{
int predContext;
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
int fixRefIdx = cm.RefFrameSignBias[cm.CompFixedRef];
int varRefIdx = fixRefIdx == 0 ? 1 : 0;
if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
{ // Both edges available
bool aboveIntra = !xd.AboveMi.Value.IsInterBlock();
bool leftIntra = !xd.LeftMi.Value.IsInterBlock();
if (aboveIntra && leftIntra)
{ // Intra/Intra (2)
predContext = 2;
}
else if (aboveIntra || leftIntra)
{ // Intra/Inter
ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value;
if (!edgeMi.HasSecondRef()) // single pred (1/3)
{
predContext = 1 + 2 * (edgeMi.RefFrame[0] != cm.CompVarRef[1] ? 1 : 0);
}
else // Comp pred (1/3)
{
predContext = 1 + 2 * (edgeMi.RefFrame[varRefIdx] != cm.CompVarRef[1] ? 1 : 0);
}
}
else
{ // Inter/Inter
bool lSg = !xd.LeftMi.Value.HasSecondRef();
bool aSg = !xd.AboveMi.Value.HasSecondRef();
sbyte vrfa = aSg ? xd.AboveMi.Value.RefFrame[0] : xd.AboveMi.Value.RefFrame[varRefIdx];
sbyte vrfl = lSg ? xd.LeftMi.Value.RefFrame[0] : xd.LeftMi.Value.RefFrame[varRefIdx];
if (vrfa == vrfl && cm.CompVarRef[1] == vrfa)
{
predContext = 0;
}
else if (lSg && aSg)
{ // Single/Single
if ((vrfa == cm.CompFixedRef && vrfl == cm.CompVarRef[0]) ||
(vrfl == cm.CompFixedRef && vrfa == cm.CompVarRef[0]))
{
predContext = 4;
}
else if (vrfa == vrfl)
{
predContext = 3;
}
else
{
predContext = 1;
}
}
else if (lSg || aSg)
{ // Single/Comp
sbyte vrfc = lSg ? vrfa : vrfl;
sbyte rfs = aSg ? vrfa : vrfl;
if (vrfc == cm.CompVarRef[1] && rfs != cm.CompVarRef[1])
{
predContext = 1;
}
else if (rfs == cm.CompVarRef[1] && vrfc != cm.CompVarRef[1])
{
predContext = 2;
}
else
{
predContext = 4;
}
}
else if (vrfa == vrfl)
{ // Comp/Comp
predContext = 4;
}
else
{
predContext = 2;
}
}
}
else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
{ // One edge available
ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
if (!edgeMi.IsInterBlock())
{
predContext = 2;
}
else
{
if (edgeMi.HasSecondRef())
{
predContext = 4 * (edgeMi.RefFrame[varRefIdx] != cm.CompVarRef[1] ? 1 : 0);
}
else
{
predContext = 3 * (edgeMi.RefFrame[0] != cm.CompVarRef[1] ? 1 : 0);
}
}
}
else
{ // No edges available (2)
predContext = 2;
}
Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts);
return predContext;
}
public static int GetPredContextSingleRefP1(ref MacroBlockD xd)
{
int predContext;
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
{ // Both edges available
bool aboveIntra = !xd.AboveMi.Value.IsInterBlock();
bool leftIntra = !xd.LeftMi.Value.IsInterBlock();
if (aboveIntra && leftIntra)
{ // Intra/Intra
predContext = 2;
}
else if (aboveIntra || leftIntra)
{ // Intra/Inter or Inter/Intra
ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value;
if (!edgeMi.HasSecondRef())
{
predContext = 4 * (edgeMi.RefFrame[0] == Constants.LastFrame ? 1 : 0);
}
else
{
predContext = 1 + (edgeMi.RefFrame[0] == Constants.LastFrame ||
edgeMi.RefFrame[1] == Constants.LastFrame ? 1 : 0);
}
}
else
{ // Inter/Inter
bool aboveHasSecond = xd.AboveMi.Value.HasSecondRef();
bool leftHasSecond = xd.LeftMi.Value.HasSecondRef();
sbyte above0 = xd.AboveMi.Value.RefFrame[0];
sbyte above1 = xd.AboveMi.Value.RefFrame[1];
sbyte left0 = xd.LeftMi.Value.RefFrame[0];
sbyte left1 = xd.LeftMi.Value.RefFrame[1];
if (aboveHasSecond && leftHasSecond)
{
predContext = 1 + (above0 == Constants.LastFrame || above1 == Constants.LastFrame ||
left0 == Constants.LastFrame || left1 == Constants.LastFrame ? 1 : 0);
}
else if (aboveHasSecond || leftHasSecond)
{
sbyte rfs = !aboveHasSecond ? above0 : left0;
sbyte crf1 = aboveHasSecond ? above0 : left0;
sbyte crf2 = aboveHasSecond ? above1 : left1;
if (rfs == Constants.LastFrame)
{
predContext = 3 + (crf1 == Constants.LastFrame || crf2 == Constants.LastFrame ? 1 : 0);
}
else
{
predContext = (crf1 == Constants.LastFrame || crf2 == Constants.LastFrame ? 1 : 0);
}
}
else
{
predContext = 2 * (above0 == Constants.LastFrame ? 1 : 0) + 2 * (left0 == Constants.LastFrame ? 1 : 0);
}
}
}
else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
{ // One edge available
ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
if (!edgeMi.IsInterBlock())
{ // Intra
predContext = 2;
}
else
{ // Inter
if (!edgeMi.HasSecondRef())
{
predContext = 4 * (edgeMi.RefFrame[0] == Constants.LastFrame ? 1 : 0);
}
else
{
predContext = 1 + (edgeMi.RefFrame[0] == Constants.LastFrame ||
edgeMi.RefFrame[1] == Constants.LastFrame ? 1 : 0);
}
}
}
else
{ // No edges available
predContext = 2;
}
Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts);
return predContext;
}
public static int GetPredContextSingleRefP2(ref MacroBlockD xd)
{
int predContext;
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
{ // Both edges available
bool aboveIntra = !xd.AboveMi.Value.IsInterBlock();
bool leftIntra = !xd.LeftMi.Value.IsInterBlock();
if (aboveIntra && leftIntra)
{ // Intra/Intra
predContext = 2;
}
else if (aboveIntra || leftIntra)
{ // Intra/Inter or Inter/Intra
ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value;
if (!edgeMi.HasSecondRef())
{
if (edgeMi.RefFrame[0] == Constants.LastFrame)
{
predContext = 3;
}
else
{
predContext = 4 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ? 1 : 0);
}
}
else
{
predContext = 1 + 2 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ||
edgeMi.RefFrame[1] == Constants.GoldenFrame ? 1 : 0);
}
}
else
{ // Inter/Inter
bool aboveHasSecond = xd.AboveMi.Value.HasSecondRef();
bool leftHasSecond = xd.LeftMi.Value.HasSecondRef();
sbyte above0 = xd.AboveMi.Value.RefFrame[0];
sbyte above1 = xd.AboveMi.Value.RefFrame[1];
sbyte left0 = xd.LeftMi.Value.RefFrame[0];
sbyte left1 = xd.LeftMi.Value.RefFrame[1];
if (aboveHasSecond && leftHasSecond)
{
if (above0 == left0 && above1 == left1)
{
predContext = 3 * (above0 == Constants.GoldenFrame || above1 == Constants.GoldenFrame ||
left0 == Constants.GoldenFrame || left1 == Constants.GoldenFrame ? 1 : 0);
}
else
{
predContext = 2;
}
}
else if (aboveHasSecond || leftHasSecond)
{
sbyte rfs = !aboveHasSecond ? above0 : left0;
sbyte crf1 = aboveHasSecond ? above0 : left0;
sbyte crf2 = aboveHasSecond ? above1 : left1;
if (rfs == Constants.GoldenFrame)
{
predContext = 3 + (crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0);
}
else if (rfs == Constants.AltRefFrame)
{
predContext = crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0;
}
else
{
predContext = 1 + 2 * (crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0);
}
}
else
{
if (above0 == Constants.LastFrame && left0 == Constants.LastFrame)
{
predContext = 3;
}
else if (above0 == Constants.LastFrame || left0 == Constants.LastFrame)
{
sbyte edge0 = (above0 == Constants.LastFrame) ? left0 : above0;
predContext = 4 * (edge0 == Constants.GoldenFrame ? 1 : 0);
}
else
{
predContext = 2 * (above0 == Constants.GoldenFrame ? 1 : 0) + 2 * (left0 == Constants.GoldenFrame ? 1 : 0);
}
}
}
}
else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
{ // One edge available
ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
if (!edgeMi.IsInterBlock() || (edgeMi.RefFrame[0] == Constants.LastFrame && !edgeMi.HasSecondRef()))
{
predContext = 2;
}
else if (!edgeMi.HasSecondRef())
{
predContext = 4 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ? 1 : 0);
}
else
{
predContext = 3 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ||
edgeMi.RefFrame[1] == Constants.GoldenFrame ? 1 : 0);
}
}
else
{ // No edges available (2)
predContext = 2;
}
Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts);
return predContext;
}
}
}

View file

@ -0,0 +1,203 @@
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using System;
using System.Diagnostics;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class QuantCommon
{
public const int MinQ = 0;
public const int MaxQ = 255;
private static readonly short[] DcQlookup = new short[]
{
4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18,
19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30,
31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42,
43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 53,
54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65,
66, 66, 67, 68, 69, 70, 70, 71, 72, 73, 74, 74, 75, 76,
77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85, 87, 88,
90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110,
111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134,
136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164,
166, 169, 172, 174, 177, 180, 182, 185, 187, 190, 192, 195, 199, 202,
205, 208, 211, 214, 217, 220, 223, 226, 230, 233, 237, 240, 243, 247,
250, 253, 257, 261, 265, 269, 272, 276, 280, 284, 288, 292, 296, 300,
304, 309, 313, 317, 322, 326, 330, 335, 340, 344, 349, 354, 359, 364,
369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429, 435, 441,
447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549,
559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736,
755, 775, 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139,
1184, 1232, 1282, 1336,
};
private static readonly short[] DcQlookup10 = new short[]
{
4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37,
40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82,
86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132,
136, 140, 143, 147, 151, 155, 159, 163, 166, 170, 174, 178, 182,
185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226, 230,
233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276,
280, 283, 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321,
324, 327, 331, 334, 337, 343, 350, 356, 362, 369, 375, 381, 387,
394, 400, 406, 412, 418, 424, 430, 436, 442, 448, 454, 460, 466,
472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550, 559, 567,
576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687,
698, 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831,
844, 856, 868, 880, 891, 906, 920, 933, 947, 961, 975, 988, 1001,
1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202,
1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436,
1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717,
1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088,
2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675,
2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823,
3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347,
};
private static readonly short[] DcQlookup12 = new short[]
{
4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91,
103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237,
251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405,
421, 437, 453, 469, 484, 500, 516, 532, 548, 564, 580,
596, 611, 627, 643, 659, 674, 690, 706, 721, 737, 752,
768, 783, 798, 814, 829, 844, 859, 874, 889, 904, 919,
934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080,
1094, 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234,
1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419,
1444, 1469, 1494, 1519, 1544, 1569, 1594, 1618, 1643, 1668, 1692,
1717, 1741, 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, 1957,
1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267, 2300, 2334,
2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746,
2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226,
3275, 3324, 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788,
3843, 3897, 3951, 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420,
4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, 5013, 5083, 5153,
5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745, 5825, 5905, 5984,
6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, 6966,
7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214,
8352, 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031,
10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118,
13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949,
19718, 20521, 21387,
};
private static readonly short[] AcQlookup = new short[]
{
4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
98, 99, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144,
146, 148, 150, 152, 155, 158, 161, 164, 167, 170, 173, 176, 179,
182, 185, 188, 191, 194, 197, 200, 203, 207, 211, 215, 219, 223,
227, 231, 235, 239, 243, 247, 251, 255, 260, 265, 270, 275, 280,
285, 290, 295, 300, 305, 311, 317, 323, 329, 335, 341, 347, 353,
359, 366, 373, 380, 387, 394, 401, 408, 416, 424, 432, 440, 448,
456, 465, 474, 483, 492, 501, 510, 520, 530, 540, 550, 560, 571,
582, 593, 604, 615, 627, 639, 651, 663, 676, 689, 702, 715, 729,
743, 757, 771, 786, 801, 816, 832, 848, 864, 881, 898, 915, 933,
951, 969, 988, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196,
1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537,
1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
};
private static readonly short[] AcQlookup10 = new short[]
{
4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40,
44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92,
96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149,
154, 158, 163, 168, 172, 177, 181, 186, 190, 195, 199, 204, 208,
213, 217, 222, 226, 231, 235, 240, 244, 249, 253, 258, 262, 267,
271, 275, 280, 284, 289, 293, 297, 302, 306, 311, 315, 319, 324,
328, 332, 337, 341, 345, 349, 354, 358, 362, 367, 371, 375, 379,
384, 388, 392, 396, 401, 409, 417, 425, 433, 441, 449, 458, 466,
474, 482, 490, 498, 506, 514, 523, 531, 539, 547, 555, 563, 571,
579, 588, 596, 604, 616, 628, 640, 652, 664, 676, 688, 700, 713,
725, 737, 749, 761, 773, 785, 797, 809, 825, 841, 857, 873, 889,
905, 922, 938, 954, 970, 986, 1002, 1018, 1038, 1058, 1078, 1098, 1118,
1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411,
1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791,
1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283,
2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915,
2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731,
3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784,
4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148,
6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312,
};
private static readonly short[] AcQlookup12 = new short[]
{
4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99,
112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263,
280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456,
475, 493, 511, 530, 548, 567, 586, 604, 623, 642, 660,
679, 698, 716, 735, 753, 772, 791, 809, 828, 846, 865,
884, 902, 920, 939, 957, 976, 994, 1012, 1030, 1049, 1067,
1085, 1103, 1121, 1139, 1157, 1175, 1193, 1211, 1229, 1246, 1264,
1282, 1299, 1317, 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457,
1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, 1627, 1660, 1693,
1725, 1758, 1791, 1824, 1856, 1889, 1922, 1954, 1987, 2020, 2052,
2085, 2118, 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, 2411,
2459, 2508, 2556, 2605, 2653, 2701, 2750, 2798, 2847, 2895, 2943,
2992, 3040, 3088, 3137, 3185, 3234, 3298, 3362, 3426, 3491, 3555,
3619, 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, 4230, 4310,
4390, 4470, 4550, 4631, 4711, 4791, 4871, 4967, 5064, 5160, 5256,
5352, 5448, 5544, 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410,
6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, 7579, 7723, 7867,
8011, 8155, 8315, 8475, 8635, 8795, 8956, 9132, 9308, 9484, 9660,
9836, 10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885,
12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637,
14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062,
18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334,
22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599,
28143, 28687, 29247,
};
public static short DcQuant(int qindex, int delta, BitDepth bitDepth)
{
switch (bitDepth)
{
case BitDepth.Bits8: return DcQlookup[Math.Clamp(qindex + delta, 0, MaxQ)];
case BitDepth.Bits10: return DcQlookup10[Math.Clamp(qindex + delta, 0, MaxQ)];
case BitDepth.Bits12: return DcQlookup12[Math.Clamp(qindex + delta, 0, MaxQ)];
default:
Debug.Assert(false, "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
return -1;
}
}
public static short AcQuant(int qindex, int delta, BitDepth bitDepth)
{
switch (bitDepth)
{
case BitDepth.Bits8: return AcQlookup[Math.Clamp(qindex + delta, 0, MaxQ)];
case BitDepth.Bits10: return AcQlookup10[Math.Clamp(qindex + delta, 0, MaxQ)];
case BitDepth.Bits12: return AcQlookup12[Math.Clamp(qindex + delta, 0, MaxQ)];
default:
Debug.Assert(false, "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
return -1;
}
}
public static int GetQIndex(ref Segmentation seg, int segmentId, int baseQIndex)
{
if (seg.IsSegFeatureActive(segmentId, SegLvlFeatures.SegLvlAltQ) != 0)
{
int data = seg.GetSegData(segmentId, SegLvlFeatures.SegLvlAltQ);
int segQIndex = seg.AbsDelta == Constants.SegmentAbsData ? data : baseQIndex + data;
return Math.Clamp(segQIndex, 0, MaxQ);
}
else
{
return baseQIndex;
}
}
}
}

View file

@ -0,0 +1,234 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class ReconInter
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe void InterPredictor(
byte* src,
int srcStride,
byte* dst,
int dstStride,
int subpelX,
int subpelY,
ref ScaleFactors sf,
int w,
int h,
int refr,
Array8<short>[] kernel,
int xs,
int ys)
{
sf.InterPredict(
subpelX != 0 ? 1 : 0,
subpelY != 0 ? 1 : 0,
refr,
src,
srcStride,
dst,
dstStride,
subpelX,
subpelY,
w,
h,
kernel,
xs,
ys);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe void HighbdInterPredictor(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
int subpelX,
int subpelY,
ref ScaleFactors sf,
int w,
int h,
int refr,
Array8<short>[] kernel,
int xs,
int ys,
int bd)
{
sf.HighbdInterPredict(
subpelX != 0 ? 1 : 0,
subpelY != 0 ? 1 : 0,
refr,
src,
srcStride,
dst,
dstStride,
subpelX,
subpelY,
w,
h,
kernel,
xs,
ys,
bd);
}
private static int RoundMvCompQ4(int value)
{
return (value < 0 ? value - 2 : value + 2) / 4;
}
private static Mv MiMvPredQ4(ref ModeInfo mi, int idx)
{
Mv res = new Mv()
{
Row = (short)RoundMvCompQ4(
mi.Bmi[0].Mv[idx].Row + mi.Bmi[1].Mv[idx].Row +
mi.Bmi[2].Mv[idx].Row + mi.Bmi[3].Mv[idx].Row),
Col = (short)RoundMvCompQ4(
mi.Bmi[0].Mv[idx].Col + mi.Bmi[1].Mv[idx].Col +
mi.Bmi[2].Mv[idx].Col + mi.Bmi[3].Mv[idx].Col)
};
return res;
}
private static int RoundMvCompQ2(int value)
{
return (value < 0 ? value - 1 : value + 1) / 2;
}
private static Mv MiMvPredQ2(ref ModeInfo mi, int idx, int block0, int block1)
{
Mv res = new Mv()
{
Row = (short)RoundMvCompQ2(
mi.Bmi[block0].Mv[idx].Row +
mi.Bmi[block1].Mv[idx].Row),
Col = (short)RoundMvCompQ2(
mi.Bmi[block0].Mv[idx].Col +
mi.Bmi[block1].Mv[idx].Col)
};
return res;
}
public static Mv ClampMvToUmvBorderSb(ref MacroBlockD xd, ref Mv srcMv, int bw, int bh, int ssX, int ssY)
{
// If the MV points so far into the UMV border that no visible pixels
// are used for reconstruction, the subpel part of the MV can be
// discarded and the MV limited to 16 pixels with equivalent results.
int spelLeft = (Constants.Vp9InterpExtend + bw) << SubpelBits;
int spelRight = spelLeft - SubpelShifts;
int spelTop = (Constants.Vp9InterpExtend + bh) << SubpelBits;
int spelBottom = spelTop - SubpelShifts;
Mv clampedMv = new Mv()
{
Row = (short)(srcMv.Row * (1 << (1 - ssY))),
Col = (short)(srcMv.Col * (1 << (1 - ssX)))
};
Debug.Assert(ssX <= 1);
Debug.Assert(ssY <= 1);
clampedMv.ClampMv(
xd.MbToLeftEdge * (1 << (1 - ssX)) - spelLeft,
xd.MbToRightEdge * (1 << (1 - ssX)) + spelRight,
xd.MbToTopEdge * (1 << (1 - ssY)) - spelTop,
xd.MbToBottomEdge * (1 << (1 - ssY)) + spelBottom);
return clampedMv;
}
public static Mv AverageSplitMvs(ref MacroBlockDPlane pd, ref ModeInfo mi, int refr, int block)
{
int ssIdx = ((pd.SubsamplingX > 0 ? 1 : 0) << 1) | (pd.SubsamplingY > 0 ? 1 : 0);
Mv res = new Mv();
switch (ssIdx)
{
case 0: res = mi.Bmi[block].Mv[refr]; break;
case 1: res = MiMvPredQ2(ref mi, refr, block, block + 2); break;
case 2: res = MiMvPredQ2(ref mi, refr, block, block + 1); break;
case 3: res = MiMvPredQ4(ref mi, refr); break;
default: Debug.Assert(ssIdx <= 3 && ssIdx >= 0); break;
}
return res;
}
private static int ScaledBufferOffset(int xOffset, int yOffset, int stride, Ptr<ScaleFactors> sf)
{
int x = !sf.IsNull ? sf.Value.ScaleValueX(xOffset) : xOffset;
int y = !sf.IsNull ? sf.Value.ScaleValueY(yOffset) : yOffset;
return y * stride + x;
}
private static void SetupPredPlanes(
ref Buf2D dst,
ArrayPtr<byte> src,
int stride,
int miRow,
int miCol,
Ptr<ScaleFactors> scale,
int subsamplingX,
int subsamplingY)
{
int x = (Constants.MiSize * miCol) >> subsamplingX;
int y = (Constants.MiSize * miRow) >> subsamplingY;
dst.Buf = src.Slice(ScaledBufferOffset(x, y, stride, scale));
dst.Stride = stride;
}
public static void SetupDstPlanes(
ref Array3<MacroBlockDPlane> planes,
ref Surface src,
int miRow,
int miCol)
{
Span<ArrayPtr<byte>> buffers = stackalloc ArrayPtr<byte>[Constants.MaxMbPlane];
buffers[0] = src.YBuffer;
buffers[1] = src.UBuffer;
buffers[2] = src.VBuffer;
Span<int> strides = stackalloc int[Constants.MaxMbPlane];
strides[0] = src.Stride;
strides[1] = src.UvStride;
strides[2] = src.UvStride;
int i;
for (i = 0; i < Constants.MaxMbPlane; ++i)
{
ref MacroBlockDPlane pd = ref planes[i];
SetupPredPlanes(ref pd.Dst, buffers[i], strides[i], miRow, miCol, Ptr<ScaleFactors>.Null, pd.SubsamplingX, pd.SubsamplingY);
}
}
public static void SetupPrePlanes(
ref MacroBlockD xd,
int idx,
ref Surface src,
int miRow,
int miCol,
Ptr<ScaleFactors> sf)
{
if (!src.YBuffer.IsNull && !src.UBuffer.IsNull && !src.VBuffer.IsNull)
{
Span<ArrayPtr<byte>> buffers = stackalloc ArrayPtr<byte>[Constants.MaxMbPlane];
buffers[0] = src.YBuffer;
buffers[1] = src.UBuffer;
buffers[2] = src.VBuffer;
Span<int> strides = stackalloc int[Constants.MaxMbPlane];
strides[0] = src.Stride;
strides[1] = src.UvStride;
strides[2] = src.UvStride;
int i;
for (i = 0; i < Constants.MaxMbPlane; ++i)
{
ref MacroBlockDPlane pd = ref xd.Plane[i];
SetupPredPlanes(ref pd.Pre[idx], buffers[i], strides[i], miRow, miCol, sf, pd.SubsamplingX, pd.SubsamplingY);
}
}
}
}
}

View file

@ -0,0 +1,762 @@
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using System;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.IntraPred;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class ReconIntra
{
public static readonly TxType[] IntraModeToTxTypeLookup = new TxType[]
{
TxType.DctDct, // DC
TxType.AdstDct, // V
TxType.DctAdst, // H
TxType.DctDct, // D45
TxType.AdstAdst, // D135
TxType.AdstDct, // D117
TxType.DctAdst, // D153
TxType.DctAdst, // D207
TxType.AdstDct, // D63
TxType.AdstAdst // TM
};
private const int NeedLeft = 1 << 1;
private const int NeedAbove = 1 << 2;
private const int NeedAboveRight = 1 << 3;
private static ReadOnlySpan<byte> ExtendModes => new byte[]
{
NeedAbove | NeedLeft, // DC
NeedAbove, // V
NeedLeft, // H
NeedAboveRight, // D45
NeedLeft | NeedAbove, // D135
NeedLeft | NeedAbove, // D117
NeedLeft | NeedAbove, // D153
NeedLeft, // D207
NeedAboveRight, // D63
NeedLeft | NeedAbove, // TM
};
private unsafe delegate void IntraPredFn(byte* dst, int stride, byte* above, byte* left);
private static unsafe IntraPredFn[][] _pred = new IntraPredFn[][]
{
new IntraPredFn[]
{
null,
null,
null,
null
},
new IntraPredFn[]
{
VPredictor4x4,
VPredictor8x8,
VPredictor16x16,
VPredictor32x32
},
new IntraPredFn[]
{
HPredictor4x4,
HPredictor8x8,
HPredictor16x16,
HPredictor32x32
},
new IntraPredFn[]
{
D45Predictor4x4,
D45Predictor8x8,
D45Predictor16x16,
D45Predictor32x32
},
new IntraPredFn[]
{
D135Predictor4x4,
D135Predictor8x8,
D135Predictor16x16,
D135Predictor32x32
},
new IntraPredFn[]
{
D117Predictor4x4,
D117Predictor8x8,
D117Predictor16x16,
D117Predictor32x32
},
new IntraPredFn[]
{
D153Predictor4x4,
D153Predictor8x8,
D153Predictor16x16,
D153Predictor32x32
},
new IntraPredFn[]
{
D207Predictor4x4,
D207Predictor8x8,
D207Predictor16x16,
D207Predictor32x32
},
new IntraPredFn[]
{
D63Predictor4x4,
D63Predictor8x8,
D63Predictor16x16,
D63Predictor32x32
},
new IntraPredFn[]
{
TMPredictor4x4,
TMPredictor8x8,
TMPredictor16x16,
TMPredictor32x32
}
};
private static unsafe IntraPredFn[][][] _dcPred = new IntraPredFn[][][]
{
new IntraPredFn[][]
{
new IntraPredFn[]
{
Dc128Predictor4x4,
Dc128Predictor8x8,
Dc128Predictor16x16,
Dc128Predictor32x32
},
new IntraPredFn[]
{
DcTopPredictor4x4,
DcTopPredictor8x8,
DcTopPredictor16x16,
DcTopPredictor32x32
}
},
new IntraPredFn[][]
{
new IntraPredFn[]
{
DcLeftPredictor4x4,
DcLeftPredictor8x8,
DcLeftPredictor16x16,
DcLeftPredictor32x32
},
new IntraPredFn[]
{
DcPredictor4x4,
DcPredictor8x8,
DcPredictor16x16,
DcPredictor32x32
}
}
};
private unsafe delegate void IntraHighPredFn(ushort* dst, int stride, ushort* above, ushort* left, int bd);
private static unsafe IntraHighPredFn[][] _predHigh = new IntraHighPredFn[][]
{
new IntraHighPredFn[]
{
null,
null,
null,
null
},
new IntraHighPredFn[]
{
HighbdVPredictor4x4,
HighbdVPredictor8x8,
HighbdVPredictor16x16,
HighbdVPredictor32x32
},
new IntraHighPredFn[]
{
HighbdHPredictor4x4,
HighbdHPredictor8x8,
HighbdHPredictor16x16,
HighbdHPredictor32x32
},
new IntraHighPredFn[]
{
HighbdD45Predictor4x4,
HighbdD45Predictor8x8,
HighbdD45Predictor16x16,
HighbdD45Predictor32x32
},
new IntraHighPredFn[]
{
HighbdD135Predictor4x4,
HighbdD135Predictor8x8,
HighbdD135Predictor16x16,
HighbdD135Predictor32x32
},
new IntraHighPredFn[]
{
HighbdD117Predictor4x4,
HighbdD117Predictor8x8,
HighbdD117Predictor16x16,
HighbdD117Predictor32x32
},
new IntraHighPredFn[]
{
HighbdD153Predictor4x4,
HighbdD153Predictor8x8,
HighbdD153Predictor16x16,
HighbdD153Predictor32x32
},
new IntraHighPredFn[]
{
HighbdD207Predictor4x4,
HighbdD207Predictor8x8,
HighbdD207Predictor16x16,
HighbdD207Predictor32x32
},
new IntraHighPredFn[]
{
HighbdD63Predictor4x4,
HighbdD63Predictor8x8,
HighbdD63Predictor16x16,
HighbdD63Predictor32x32
},
new IntraHighPredFn[]
{
HighbdTMPredictor4x4,
HighbdTMPredictor8x8,
HighbdTMPredictor16x16,
HighbdTMPredictor32x32
}
};
private static unsafe IntraHighPredFn[][][] _dcPredHigh = new IntraHighPredFn[][][]
{
new IntraHighPredFn[][]
{
new IntraHighPredFn[]
{
HighbdDc128Predictor4x4,
HighbdDc128Predictor8x8,
HighbdDc128Predictor16x16,
HighbdDc128Predictor32x32
},
new IntraHighPredFn[]
{
HighbdDcTopPredictor4x4,
HighbdDcTopPredictor8x8,
HighbdDcTopPredictor16x16,
HighbdDcTopPredictor32x32
}
},
new IntraHighPredFn[][]
{
new IntraHighPredFn[]
{
HighbdDcLeftPredictor4x4,
HighbdDcLeftPredictor8x8,
HighbdDcLeftPredictor16x16,
HighbdDcLeftPredictor32x32
},
new IntraHighPredFn[]
{
HighbdDcPredictor4x4,
HighbdDcPredictor8x8,
HighbdDcPredictor16x16,
HighbdDcPredictor32x32
}
}
};
private static unsafe void BuildIntraPredictorsHigh(
ref MacroBlockD xd,
byte* ref8,
int refStride,
byte* dst8,
int dstStride,
PredictionMode mode,
TxSize txSize,
int upAvailable,
int leftAvailable,
int rightAvailable,
int x,
int y,
int plane)
{
int i;
ushort* dst = (ushort*)dst8;
ushort* refr = (ushort*)ref8;
ushort* leftCol = stackalloc ushort[32];
ushort* aboveData = stackalloc ushort[64 + 16];
ushort* aboveRow = aboveData + 16;
ushort* constAboveRow = aboveRow;
int bs = 4 << (int)txSize;
int frameWidth, frameHeight;
int x0, y0;
ref MacroBlockDPlane pd = ref xd.Plane[plane];
int needLeft = ExtendModes[(int)mode] & NeedLeft;
int needAbove = ExtendModes[(int)mode] & NeedAbove;
int needAboveRight = ExtendModes[(int)mode] & NeedAboveRight;
int baseVal = 128 << (xd.Bd - 8);
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
// 129 C D .. W X
// 129 E F .. U V
// 129 G H .. S T T T T T
// For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1.
// Get current frame pointer, width and height.
if (plane == 0)
{
frameWidth = xd.CurBuf.Width;
frameHeight = xd.CurBuf.Height;
}
else
{
frameWidth = xd.CurBuf.UvWidth;
frameHeight = xd.CurBuf.UvHeight;
}
// Get block position in current frame.
x0 = (-xd.MbToLeftEdge >> (3 + pd.SubsamplingX)) + x;
y0 = (-xd.MbToTopEdge >> (3 + pd.SubsamplingY)) + y;
// NEED_LEFT
if (needLeft != 0)
{
if (leftAvailable != 0)
{
if (xd.MbToBottomEdge < 0)
{
/* slower path if the block needs border extension */
if (y0 + bs <= frameHeight)
{
for (i = 0; i < bs; ++i)
{
leftCol[i] = refr[i * refStride - 1];
}
}
else
{
int extendBottom = frameHeight - y0;
for (i = 0; i < extendBottom; ++i)
{
leftCol[i] = refr[i * refStride - 1];
}
for (; i < bs; ++i)
{
leftCol[i] = refr[(extendBottom - 1) * refStride - 1];
}
}
}
else
{
/* faster path if the block does not need extension */
for (i = 0; i < bs; ++i)
{
leftCol[i] = refr[i * refStride - 1];
}
}
}
else
{
MemoryUtil.Fill(leftCol, (ushort)(baseVal + 1), bs);
}
}
// NEED_ABOVE
if (needAbove != 0)
{
if (upAvailable != 0)
{
ushort* aboveRef = refr - refStride;
if (xd.MbToRightEdge < 0)
{
/* slower path if the block needs border extension */
if (x0 + bs <= frameWidth)
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
}
else if (x0 <= frameWidth)
{
int r = frameWidth - x0;
MemoryUtil.Copy(aboveRow, aboveRef, r);
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + bs - frameWidth);
}
}
else
{
/* faster path if the block does not need extension */
if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
{
constAboveRow = aboveRef;
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
}
}
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1);
}
else
{
MemoryUtil.Fill(aboveRow, (ushort)(baseVal - 1), bs);
aboveRow[-1] = (ushort)(baseVal - 1);
}
}
// NEED_ABOVERIGHT
if (needAboveRight != 0)
{
if (upAvailable != 0)
{
ushort* aboveRef = refr - refStride;
if (xd.MbToRightEdge < 0)
{
/* slower path if the block needs border extension */
if (x0 + 2 * bs <= frameWidth)
{
if (rightAvailable != 0 && bs == 4)
{
MemoryUtil.Copy(aboveRow, aboveRef, 2 * bs);
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
}
}
else if (x0 + bs <= frameWidth)
{
int r = frameWidth - x0;
if (rightAvailable != 0 && bs == 4)
{
MemoryUtil.Copy(aboveRow, aboveRef, r);
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
}
}
else if (x0 <= frameWidth)
{
int r = frameWidth - x0;
MemoryUtil.Copy(aboveRow, aboveRef, r);
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
}
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1);
}
else
{
/* faster path if the block does not need extension */
if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
{
constAboveRow = aboveRef;
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
if (bs == 4 && rightAvailable != 0)
{
MemoryUtil.Copy(aboveRow + bs, aboveRef + bs, bs);
}
else
{
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
}
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1);
}
}
}
else
{
MemoryUtil.Fill(aboveRow, (ushort)(baseVal - 1), bs * 2);
aboveRow[-1] = (ushort)(baseVal - 1);
}
}
// Predict
if (mode == PredictionMode.DcPred)
{
_dcPredHigh[leftAvailable][upAvailable][(int)txSize](dst, dstStride, constAboveRow, leftCol, xd.Bd);
}
else
{
_predHigh[(int)mode][(int)txSize](dst, dstStride, constAboveRow, leftCol, xd.Bd);
}
}
public static unsafe void BuildIntraPredictors(
ref MacroBlockD xd,
byte* refr,
int refStride,
byte* dst,
int dstStride,
PredictionMode mode,
TxSize txSize,
int upAvailable,
int leftAvailable,
int rightAvailable,
int x,
int y,
int plane)
{
int i;
byte* leftCol = stackalloc byte[32];
byte* aboveData = stackalloc byte[64 + 16];
byte* aboveRow = aboveData + 16;
byte* constAboveRow = aboveRow;
int bs = 4 << (int)txSize;
int frameWidth, frameHeight;
int x0, y0;
ref MacroBlockDPlane pd = ref xd.Plane[plane];
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
// 129 C D .. W X
// 129 E F .. U V
// 129 G H .. S T T T T T
// ..
// Get current frame pointer, width and height.
if (plane == 0)
{
frameWidth = xd.CurBuf.Width;
frameHeight = xd.CurBuf.Height;
}
else
{
frameWidth = xd.CurBuf.UvWidth;
frameHeight = xd.CurBuf.UvHeight;
}
// Get block position in current frame.
x0 = (-xd.MbToLeftEdge >> (3 + pd.SubsamplingX)) + x;
y0 = (-xd.MbToTopEdge >> (3 + pd.SubsamplingY)) + y;
// NEED_LEFT
if ((ExtendModes[(int)mode] & NeedLeft) != 0)
{
if (leftAvailable != 0)
{
if (xd.MbToBottomEdge < 0)
{
/* Slower path if the block needs border extension */
if (y0 + bs <= frameHeight)
{
for (i = 0; i < bs; ++i)
{
leftCol[i] = refr[i * refStride - 1];
}
}
else
{
int extendBottom = frameHeight - y0;
for (i = 0; i < extendBottom; ++i)
{
leftCol[i] = refr[i * refStride - 1];
}
for (; i < bs; ++i)
{
leftCol[i] = refr[(extendBottom - 1) * refStride - 1];
}
}
}
else
{
/* Faster path if the block does not need extension */
for (i = 0; i < bs; ++i)
{
leftCol[i] = refr[i * refStride - 1];
}
}
}
else
{
MemoryUtil.Fill(leftCol, (byte)129, bs);
}
}
// NEED_ABOVE
if ((ExtendModes[(int)mode] & NeedAbove) != 0)
{
if (upAvailable != 0)
{
byte* aboveRef = refr - refStride;
if (xd.MbToRightEdge < 0)
{
/* Slower path if the block needs border extension */
if (x0 + bs <= frameWidth)
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
}
else if (x0 <= frameWidth)
{
int r = frameWidth - x0;
MemoryUtil.Copy(aboveRow, aboveRef, r);
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + bs - frameWidth);
}
}
else
{
/* Faster path if the block does not need extension */
if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
{
constAboveRow = aboveRef;
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
}
}
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (byte)129;
}
else
{
MemoryUtil.Fill(aboveRow, (byte)127, bs);
aboveRow[-1] = 127;
}
}
// NEED_ABOVERIGHT
if ((ExtendModes[(int)mode] & NeedAboveRight) != 0)
{
if (upAvailable != 0)
{
byte* aboveRef = refr - refStride;
if (xd.MbToRightEdge < 0)
{
/* Slower path if the block needs border extension */
if (x0 + 2 * bs <= frameWidth)
{
if (rightAvailable != 0 && bs == 4)
{
MemoryUtil.Copy(aboveRow, aboveRef, 2 * bs);
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
}
}
else if (x0 + bs <= frameWidth)
{
int r = frameWidth - x0;
if (rightAvailable != 0 && bs == 4)
{
MemoryUtil.Copy(aboveRow, aboveRef, r);
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
}
}
else if (x0 <= frameWidth)
{
int r = frameWidth - x0;
MemoryUtil.Copy(aboveRow, aboveRef, r);
MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
}
}
else
{
/* Faster path if the block does not need extension */
if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
{
constAboveRow = aboveRef;
}
else
{
MemoryUtil.Copy(aboveRow, aboveRef, bs);
if (bs == 4 && rightAvailable != 0)
{
MemoryUtil.Copy(aboveRow + bs, aboveRef + bs, bs);
}
else
{
MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
}
}
}
aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (byte)129;
}
else
{
MemoryUtil.Fill(aboveRow, (byte)127, bs * 2);
aboveRow[-1] = 127;
}
}
// Predict
if (mode == PredictionMode.DcPred)
{
_dcPred[leftAvailable][upAvailable][(int)txSize](dst, dstStride, constAboveRow, leftCol);
}
else
{
_pred[(int)mode][(int)txSize](dst, dstStride, constAboveRow, leftCol);
}
}
public static unsafe void PredictIntraBlock(
ref MacroBlockD xd,
int bwlIn,
TxSize txSize,
PredictionMode mode,
byte* refr,
int refStride,
byte* dst,
int dstStride,
int aoff,
int loff,
int plane)
{
int bw = 1 << bwlIn;
int txw = 1 << (int)txSize;
int haveTop = loff != 0 || !xd.AboveMi.IsNull ? 1 : 0;
int haveLeft = aoff != 0 || !xd.LeftMi.IsNull ? 1 : 0;
int haveRight = (aoff + txw) < bw ? 1 : 0;
int x = aoff * 4;
int y = loff * 4;
if (xd.CurBuf.HighBd)
{
BuildIntraPredictorsHigh(
ref xd,
refr,
refStride,
dst,
dstStride,
mode,
txSize,
haveTop,
haveLeft,
haveRight,
x,
y,
plane);
return;
}
BuildIntraPredictors(
ref xd,
refr,
refStride,
dst,
dstStride,
mode,
txSize,
haveTop,
haveLeft,
haveRight,
x,
y,
plane);
}
}
}

View file

@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net7.0</TargetFramework>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
<ProjectReference Include="..\Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj" />
</ItemGroup>
</Project>

View file

@ -0,0 +1,11 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal struct TileBuffer
{
public int Col;
public ArrayPtr<byte> Data;
public int Size;
}
}

View file

@ -0,0 +1,20 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using Ryujinx.Graphics.Video;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal struct TileWorkerData
{
public ArrayPtr<byte> DataEnd;
public int BufStart;
public int BufEnd;
public Reader BitReader;
public Vp9BackwardUpdates Counts;
public MacroBlockD Xd;
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
public Array32<Array32<int>> Dqcoeff;
public InternalErrorInfo ErrorInfo;
}
}

View file

@ -0,0 +1,10 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct BModeInfo
{
public PredictionMode Mode;
public Array2<Mv> Mv; // First, second inter predictor motion vectors
}
}

View file

@ -0,0 +1,21 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum BlockSize
{
Block4x4 = 0,
Block4x8 = 1,
Block8x4 = 2,
Block8x8 = 3,
Block8x16 = 4,
Block16x8 = 5,
Block16x16 = 6,
Block16x32 = 7,
Block32x16 = 8,
Block32x32 = 9,
Block32x64 = 10,
Block64x32 = 11,
Block64x64 = 12,
BlockSizes = 13,
BlockInvalid = BlockSizes
}
}

View file

@ -0,0 +1,10 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct Buf2D
{
public ArrayPtr<byte> Buf;
public int Stride;
}
}

View file

@ -0,0 +1,8 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum FrameType
{
KeyFrame = 0,
InterFrame = 1
}
}

View file

@ -0,0 +1,27 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct LoopFilter
{
public int FilterLevel;
public int LastFiltLevel;
public int SharpnessLevel;
public int LastSharpnessLevel;
public bool ModeRefDeltaEnabled;
public bool ModeRefDeltaUpdate;
// 0 = Intra, Last, GF, ARF
public Array4<sbyte> RefDeltas;
public Array4<sbyte> LastRefDeltas;
// 0 = ZERO_MV, MV
public Array2<sbyte> ModeDeltas;
public Array2<sbyte> LastModeDeltas;
public ArrayPtr<LoopFilterMask> Lfm;
public int LfmStride;
}
}

View file

@ -0,0 +1,10 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct LoopFilterInfoN
{
public Array64<LoopFilterThresh> Lfthr;
public Array8<Array4<Array2<byte>>> Lvl;
}
}

View file

@ -0,0 +1,24 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
// Each 1 bit represents a position in which we want to apply the loop filter.
// Left_ entries refer to whether we apply a filter on the border to the
// left of the block. Above_ entries refer to whether or not to apply a
// filter on the above border. Int_ entries refer to whether or not to
// apply borders on the 4x4 edges within the 8x8 block that each bit
// represents.
// Since each transform is accompanied by a potentially different type of
// loop filter there is a different entry in the array for each transform size.
internal struct LoopFilterMask
{
public Array4<ulong> LeftY;
public Array4<ulong> AboveY;
public ulong Int4x4Y;
public Array4<ushort> LeftUv;
public Array4<ushort> AboveUv;
public ushort Int4x4Uv;
public Array64<byte> LflY;
}
}

View file

@ -0,0 +1,15 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
// Need to align this structure so when it is declared and
// passed it can be loaded into vector registers.
internal struct LoopFilterThresh
{
#pragma warning disable CS0649
public Array16<byte> Mblim;
public Array16<byte> Lim;
public Array16<byte> HevThr;
#pragma warning restore CS0649
}
}

View file

@ -0,0 +1,179 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Video;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct MacroBlockD
{
public Array3<MacroBlockDPlane> Plane;
public byte BmodeBlocksWl;
public byte BmodeBlocksHl;
public Ptr<Vp9BackwardUpdates> Counts;
public TileInfo Tile;
public int MiStride;
// Grid of 8x8 cells is placed over the block.
// If some of them belong to the same mbtree-block
// they will just have same mi[i][j] value
public ArrayPtr<Ptr<ModeInfo>> Mi;
public Ptr<ModeInfo> LeftMi;
public Ptr<ModeInfo> AboveMi;
public uint MaxBlocksWide;
public uint MaxBlocksHigh;
public ArrayPtr<Array3<byte>> PartitionProbs;
/* Distance of MB away from frame edges */
public int MbToLeftEdge;
public int MbToRightEdge;
public int MbToTopEdge;
public int MbToBottomEdge;
public Ptr<Vp9EntropyProbs> Fc;
/* pointers to reference frames */
public Array2<Ptr<RefBuffer>> BlockRefs;
/* pointer to current frame */
public Surface CurBuf;
public Array3<ArrayPtr<sbyte>> AboveContext;
public Array3<Array16<sbyte>> LeftContext;
public ArrayPtr<sbyte> AboveSegContext;
public Array8<sbyte> LeftSegContext;
/* Bit depth: 8, 10, 12 */
public int Bd;
public bool Lossless;
public bool Corrupted;
public Ptr<InternalErrorInfo> ErrorInfo;
public int GetPredContextSegId()
{
sbyte aboveSip = !AboveMi.IsNull ? AboveMi.Value.SegIdPredicted : (sbyte)0;
sbyte leftSip = !LeftMi.IsNull ? LeftMi.Value.SegIdPredicted : (sbyte)0;
return aboveSip + leftSip;
}
public int GetSkipContext()
{
int aboveSkip = !AboveMi.IsNull ? AboveMi.Value.Skip : 0;
int leftSkip = !LeftMi.IsNull ? LeftMi.Value.Skip : 0;
return aboveSkip + leftSkip;
}
public int GetPredContextSwitchableInterp()
{
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
int leftType = !LeftMi.IsNull ? LeftMi.Value.InterpFilter : Constants.SwitchableFilters;
int aboveType = !AboveMi.IsNull ? AboveMi.Value.InterpFilter : Constants.SwitchableFilters;
if (leftType == aboveType)
{
return leftType;
}
else if (leftType == Constants.SwitchableFilters)
{
return aboveType;
}
else if (aboveType == Constants.SwitchableFilters)
{
return leftType;
}
else
{
return Constants.SwitchableFilters;
}
}
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
// 0 - inter/inter, inter/--, --/inter, --/--
// 1 - intra/inter, inter/intra
// 2 - intra/--, --/intra
// 3 - intra/intra
public int GetIntraInterContext()
{
if (!AboveMi.IsNull && !LeftMi.IsNull)
{ // Both edges available
bool aboveIntra = !AboveMi.Value.IsInterBlock();
bool leftIntra = !LeftMi.Value.IsInterBlock();
return leftIntra && aboveIntra ? 3 : (leftIntra || aboveIntra ? 1 : 0);
}
else if (!AboveMi.IsNull || !LeftMi.IsNull)
{ // One edge available
return 2 * (!(!AboveMi.IsNull ? AboveMi.Value : LeftMi.Value).IsInterBlock() ? 1 : 0);
}
return 0;
}
// Returns a context number for the given MB prediction signal
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real blocks.
// The prediction flags in these dummy entries are initialized to 0.
public int GetTxSizeContext()
{
int maxTxSize = (int)Luts.MaxTxSizeLookup[(int)Mi[0].Value.SbType];
int aboveCtx = (!AboveMi.IsNull && AboveMi.Value.Skip == 0) ? (int)AboveMi.Value.TxSize : maxTxSize;
int leftCtx = (!LeftMi.IsNull && LeftMi.Value.Skip == 0) ? (int)LeftMi.Value.TxSize : maxTxSize;
if (LeftMi.IsNull)
{
leftCtx = aboveCtx;
}
if (AboveMi.IsNull)
{
aboveCtx = leftCtx;
}
return (aboveCtx + leftCtx) > maxTxSize ? 1 : 0;
}
public void SetupBlockPlanes(int ssX, int ssY)
{
int i;
for (i = 0; i < Constants.MaxMbPlane; i++)
{
Plane[i].SubsamplingX = i != 0 ? ssX : 0;
Plane[i].SubsamplingY = i != 0 ? ssY : 0;
}
}
public void SetSkipContext(int miRow, int miCol)
{
int aboveIdx = miCol * 2;
int leftIdx = (miRow * 2) & 15;
int i;
for (i = 0; i < Constants.MaxMbPlane; ++i)
{
ref MacroBlockDPlane pd = ref Plane[i];
pd.AboveContext = AboveContext[i].Slice(aboveIdx >> pd.SubsamplingX);
pd.LeftContext = new ArrayPtr<sbyte>(ref LeftContext[i][leftIdx >> pd.SubsamplingY], 16 - (leftIdx >> pd.SubsamplingY));
}
}
internal void SetMiRowCol(ref TileInfo tile, int miRow, int bh, int miCol, int bw, int miRows, int miCols)
{
MbToTopEdge = -((miRow * Constants.MiSize) * 8);
MbToBottomEdge = ((miRows - bh - miRow) * Constants.MiSize) * 8;
MbToLeftEdge = -((miCol * Constants.MiSize) * 8);
MbToRightEdge = ((miCols - bw - miCol) * Constants.MiSize) * 8;
// Are edges available for intra prediction?
AboveMi = (miRow != 0) ? Mi[-MiStride] : Ptr<ModeInfo>.Null;
LeftMi = (miCol > tile.MiColStart) ? Mi[-1] : Ptr<ModeInfo>.Null;
}
}
}

View file

@ -0,0 +1,21 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct MacroBlockDPlane
{
public ArrayPtr<int> DqCoeff;
public int SubsamplingX;
public int SubsamplingY;
public Buf2D Dst;
public Array2<Buf2D> Pre;
public ArrayPtr<sbyte> AboveContext;
public ArrayPtr<sbyte> LeftContext;
public Array8<Array2<short>> SegDequant;
// Number of 4x4s in current block
public ushort N4W, N4H;
// Log2 of N4W, N4H
public byte N4Wl, N4Hl;
}
}

View file

@ -0,0 +1,66 @@
using Ryujinx.Common.Memory;
using System.Diagnostics;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct ModeInfo
{
// Common for both Inter and Intra blocks
public BlockSize SbType;
public PredictionMode Mode;
public TxSize TxSize;
public sbyte Skip;
public sbyte SegmentId;
public sbyte SegIdPredicted; // Valid only when TemporalUpdate is enabled
// Only for Intra blocks
public PredictionMode UvMode;
// Only for Inter blocks
public byte InterpFilter;
// if ref_frame[idx] is equal to AltRefFrame then
// MacroBlockD.BlockRef[idx] is an altref
public Array2<sbyte> RefFrame;
public Array2<Mv> Mv;
public Array4<BModeInfo> Bmi;
public PredictionMode GetYMode(int block)
{
return SbType < BlockSize.Block8x8 ? Bmi[block].Mode : Mode;
}
public TxSize GetUvTxSize(ref MacroBlockDPlane pd)
{
Debug.Assert(SbType < BlockSize.Block8x8 ||
Luts.SsSizeLookup[(int)SbType][pd.SubsamplingX][pd.SubsamplingY] != BlockSize.BlockInvalid);
return Luts.UvTxsizeLookup[(int)SbType][(int)TxSize][pd.SubsamplingX][pd.SubsamplingY];
}
public bool IsInterBlock()
{
return RefFrame[0] > Constants.IntraFrame;
}
public bool HasSecondRef()
{
return RefFrame[1] > Constants.IntraFrame;
}
private static readonly int[][] IdxNColumnToSubblock = new int[][]
{
new int[] { 1, 2 }, new int[] { 1, 3 }, new int[] { 3, 2 }, new int[] { 3, 3 }
};
// This function returns either the appropriate sub block or block's mv
// on whether the block_size < 8x8 and we have check_sub_blocks set.
public Mv GetSubBlockMv(int whichMv, int searchCol, int blockIdx)
{
return blockIdx >= 0 && SbType < BlockSize.Block8x8
? Bmi[IdxNColumnToSubblock[blockIdx][searchCol == 0 ? 1 : 0]].Mv[whichMv]
: Mv[whichMv];
}
}
}

View file

@ -0,0 +1,14 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum MotionVectorContext
{
BothZero = 0,
ZeroPlusPredicted = 1,
BothPredicted = 2,
NewPlusNonIntra = 3,
BothNew = 4,
IntraPlusNonIntra = 5,
BothIntra = 6,
InvalidCase = 9
}
}

View file

@ -0,0 +1,189 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Video;
using System;
using System.Diagnostics;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct Mv
{
public short Row;
public short Col;
private static ReadOnlySpan<byte> LogInBase2 => new byte[]
{
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10
};
public bool UseMvHp()
{
const int kMvRefThresh = 64; // Threshold for use of high-precision 1/8 mv
return Math.Abs(Row) < kMvRefThresh && Math.Abs(Col) < kMvRefThresh;
}
public static bool MvJointVertical(MvJointType type)
{
return type == MvJointType.MvJointHzvnz || type == MvJointType.MvJointHnzvnz;
}
public static bool MvJointHorizontal(MvJointType type)
{
return type == MvJointType.MvJointHnzvz || type == MvJointType.MvJointHnzvnz;
}
private static int MvClassBase(MvClassType c)
{
return c != 0 ? Constants.Class0Size << ((int)c + 2) : 0;
}
private static MvClassType GetMvClass(int z, Ptr<int> offset)
{
MvClassType c = (z >= Constants.Class0Size * 4096) ? MvClassType.MvClass10 : (MvClassType)LogInBase2[z >> 3];
if (!offset.IsNull)
{
offset.Value = z - MvClassBase(c);
}
return c;
}
private static void IncMvComponent(int v, ref Vp9BackwardUpdates counts, int comp, int incr, int usehp)
{
int s, z, c, o = 0, d, e, f;
Debug.Assert(v != 0); /* Should not be zero */
s = v < 0 ? 1 : 0;
counts.Sign[comp][s] += (uint)incr;
z = (s != 0 ? -v : v) - 1; /* Magnitude - 1 */
c = (int)GetMvClass(z, new Ptr<int>(ref o));
counts.Classes[comp][c] += (uint)incr;
d = (o >> 3); /* Int mv data */
f = (o >> 1) & 3; /* Fractional pel mv data */
e = (o & 1); /* High precision mv data */
if (c == (int)MvClassType.MvClass0)
{
counts.Class0[comp][d] += (uint)incr;
counts.Class0Fp[comp][d][f] += (uint)incr;
counts.Class0Hp[comp][e] += (uint)(usehp * incr);
}
else
{
int i;
int b = c + Constants.Class0Bits - 1; // Number of bits
for (i = 0; i < b; ++i)
{
counts.Bits[comp][i][((d >> i) & 1)] += (uint)incr;
}
counts.Fp[comp][f] += (uint)incr;
counts.Hp[comp][e] += (uint)(usehp * incr);
}
}
private MvJointType GetMvJoint()
{
if (Row == 0)
{
return Col == 0 ? MvJointType.MvJointZero : MvJointType.MvJointHnzvz;
}
else
{
return Col == 0 ? MvJointType.MvJointHzvnz : MvJointType.MvJointHnzvnz;
}
}
internal void IncMv(Ptr<Vp9BackwardUpdates> counts)
{
if (!counts.IsNull)
{
MvJointType j = GetMvJoint();
++counts.Value.Joints[(int)j];
if (MvJointVertical(j))
{
IncMvComponent(Row, ref counts.Value, 0, 1, 1);
}
if (MvJointHorizontal(j))
{
IncMvComponent(Col, ref counts.Value, 1, 1, 1);
}
}
}
public void ClampMv(int minCol, int maxCol, int minRow, int maxRow)
{
Col = (short)Math.Clamp(Col, minCol, maxCol);
Row = (short)Math.Clamp(Row, minRow, maxRow);
}
private const int MvBorder = (16 << 3); // Allow 16 pels in 1/8th pel units
public void ClampMvRef(ref MacroBlockD xd)
{
ClampMv(
xd.MbToLeftEdge - MvBorder,
xd.MbToRightEdge + MvBorder,
xd.MbToTopEdge - MvBorder,
xd.MbToBottomEdge + MvBorder);
}
public void LowerMvPrecision(bool allowHP)
{
bool useHP = allowHP && UseMvHp();
if (!useHP)
{
if ((Row & 1) != 0)
{
Row += (short)(Row > 0 ? -1 : 1);
}
if ((Col & 1) != 0)
{
Col += (short)(Col > 0 ? -1 : 1);
}
}
}
}
}

View file

@ -0,0 +1,8 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct Mv32
{
public int Row;
public int Col;
}
}

View file

@ -0,0 +1,17 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum MvClassType
{
MvClass0 = 0, /* (0, 2] integer pel */
MvClass1 = 1, /* (2, 4] integer pel */
MvClass2 = 2, /* (4, 8] integer pel */
MvClass3 = 3, /* (8, 16] integer pel */
MvClass4 = 4, /* (16, 32] integer pel */
MvClass5 = 5, /* (32, 64] integer pel */
MvClass6 = 6, /* (64, 128] integer pel */
MvClass7 = 7, /* (128, 256] integer pel */
MvClass8 = 8, /* (256, 512] integer pel */
MvClass9 = 9, /* (512, 1024] integer pel */
MvClass10 = 10, /* (1024,2048] integer pel */
}
}

View file

@ -0,0 +1,10 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum MvJointType
{
MvJointZero = 0, /* Zero vector */
MvJointHnzvz = 1, /* Vert zero, hor nonzero */
MvJointHzvnz = 2, /* Hor zero, vert nonzero */
MvJointHnzvnz = 3, /* Both components nonzero */
}
}

View file

@ -0,0 +1,10 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct MvRef
{
public Array2<Mv> Mv;
public Array2<sbyte> RefFrame;
}
}

View file

@ -0,0 +1,12 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum PartitionType
{
PartitionNone,
PartitionHorz,
PartitionVert,
PartitionSplit,
PartitionTypes,
PartitionInvalid = PartitionTypes
}
}

View file

@ -0,0 +1,9 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum PlaneType
{
Y = 0,
Uv = 1,
PlaneTypes
}
}

View file

@ -0,0 +1,14 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct Position
{
public int Row;
public int Col;
public Position(int row, int col)
{
Row = row;
Col = col;
}
}
}

View file

@ -0,0 +1,21 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum PredictionMode
{
DcPred = 0, // Average of above and left pixels
VPred = 1, // Vertical
HPred = 2, // Horizontal
D45Pred = 3, // Directional 45 deg = round(arctan(1 / 1) * 180 / pi)
D135Pred = 4, // Directional 135 deg = 180 - 45
D117Pred = 5, // Directional 117 deg = 180 - 63
D153Pred = 6, // Directional 153 deg = 180 - 27
D207Pred = 7, // Directional 207 deg = 180 + 27
D63Pred = 8, // Directional 63 deg = round(arctan(2 / 1) * 180 / pi)
TmPred = 9, // True-motion
NearestMv = 10,
NearMv = 11,
ZeroMv = 12,
NewMv = 13,
MbModeCount = 14
}
}

View file

@ -0,0 +1,8 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct RefBuffer
{
public Surface Buf;
public ScaleFactors Sf;
}
}

View file

@ -0,0 +1,10 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum ReferenceMode
{
SingleReference = 0,
CompoundReference = 1,
ReferenceModeSelect = 2,
ReferenceModes = 3
}
}

View file

@ -0,0 +1,451 @@
using Ryujinx.Common.Memory;
using System.Runtime.CompilerServices;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Convolve;
using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct ScaleFactors
{
private const int RefScaleShift = 14;
private const int RefNoScale = (1 << RefScaleShift);
private const int RefInvalidScale = -1;
private unsafe delegate void ConvolveFn(
byte* src,
int srcStride,
byte* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h);
private unsafe delegate void HighbdConvolveFn(
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
Array8<short>[] filter,
int x0Q4,
int xStepQ4,
int y0Q4,
int yStepQ4,
int w,
int h,
int bd);
private static readonly unsafe ConvolveFn[][][] PredictX16Y16 = new ConvolveFn[][][]
{
new ConvolveFn[][]
{
new ConvolveFn[]
{
ConvolveCopy,
ConvolveAvg
},
new ConvolveFn[]
{
Convolve8Vert,
Convolve8AvgVert
}
},
new ConvolveFn[][]
{
new ConvolveFn[]
{
Convolve8Horiz,
Convolve8AvgHoriz
},
new ConvolveFn[]
{
Convolve8,
Convolve8Avg
}
}
};
private static readonly unsafe ConvolveFn[][][] PredictX16 = new ConvolveFn[][][]
{
new ConvolveFn[][]
{
new ConvolveFn[]
{
ScaledVert,
ScaledAvgVert
},
new ConvolveFn[]
{
ScaledVert,
ScaledAvgVert
}
},
new ConvolveFn[][]
{
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
},
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
}
}
};
private static readonly unsafe ConvolveFn[][][] PredictY16 = new ConvolveFn[][][]
{
new ConvolveFn[][]
{
new ConvolveFn[]
{
ScaledHoriz,
ScaledAvgHoriz
},
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
}
},
new ConvolveFn[][]
{
new ConvolveFn[]
{
ScaledHoriz,
ScaledAvgHoriz
},
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
}
}
};
private static readonly unsafe ConvolveFn[][][] Predict = new ConvolveFn[][][]
{
new ConvolveFn[][]
{
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
},
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
}
},
new ConvolveFn[][]
{
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
},
new ConvolveFn[]
{
Scaled2D,
ScaledAvg2D
}
}
};
private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictX16Y16 = new HighbdConvolveFn[][][]
{
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolveCopy,
HighbdConvolveAvg
},
new HighbdConvolveFn[]
{
HighbdConvolve8Vert,
HighbdConvolve8AvgVert
}
},
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolve8Horiz,
HighbdConvolve8AvgHoriz
},
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
}
}
};
private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictX16 = new HighbdConvolveFn[][][]
{
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolve8Vert,
HighbdConvolve8AvgVert
},
new HighbdConvolveFn[]
{
HighbdConvolve8Vert,
HighbdConvolve8AvgVert
}
},
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
},
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
}
}
};
private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictY16 = new HighbdConvolveFn[][][]
{
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolve8Horiz,
HighbdConvolve8AvgHoriz
},
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
}
},
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolve8Horiz,
HighbdConvolve8AvgHoriz
},
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
}
}
};
private static readonly unsafe HighbdConvolveFn[][][] HighbdPredict = new HighbdConvolveFn[][][]
{
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
},
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
}
},
new HighbdConvolveFn[][]
{
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
},
new HighbdConvolveFn[]
{
HighbdConvolve8,
HighbdConvolve8Avg
}
}
};
public int XScaleFP; // Horizontal fixed point scale factor
public int YScaleFP; // Vertical fixed point scale factor
public int XStepQ4;
public int YStepQ4;
public int ScaleValueX(int val)
{
return IsScaled() ? ScaledX(val) : val;
}
public int ScaleValueY(int val)
{
return IsScaled() ? ScaledY(val) : val;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public unsafe void InterPredict(
int horiz,
int vert,
int avg,
byte* src,
int srcStride,
byte* dst,
int dstStride,
int subpelX,
int subpelY,
int w,
int h,
Array8<short>[] kernel,
int xs,
int ys)
{
if (XStepQ4 == 16)
{
if (YStepQ4 == 16)
{
// No scaling in either direction.
PredictX16Y16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
}
else
{
// No scaling in x direction. Must always scale in the y direction.
PredictX16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
}
}
else
{
if (YStepQ4 == 16)
{
// No scaling in the y direction. Must always scale in the x direction.
PredictY16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
}
else
{
// Must always scale in both directions.
Predict[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public unsafe void HighbdInterPredict(
int horiz,
int vert,
int avg,
ushort* src,
int srcStride,
ushort* dst,
int dstStride,
int subpelX,
int subpelY,
int w,
int h,
Array8<short>[] kernel,
int xs,
int ys,
int bd)
{
if (XStepQ4 == 16)
{
if (YStepQ4 == 16)
{
// No scaling in either direction.
HighbdPredictX16Y16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
}
else
{
// No scaling in x direction. Must always scale in the y direction.
HighbdPredictX16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
}
}
else
{
if (YStepQ4 == 16)
{
// No scaling in the y direction. Must always scale in the x direction.
HighbdPredictY16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
}
else
{
// Must always scale in both directions.
HighbdPredict[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
}
}
}
private int ScaledX(int val)
{
return (int)((long)val * XScaleFP >> RefScaleShift);
}
private int ScaledY(int val)
{
return (int)((long)val * YScaleFP >> RefScaleShift);
}
private static int GetFixedPointScaleFactor(int otherSize, int thisSize)
{
// Calculate scaling factor once for each reference frame
// and use fixed point scaling factors in decoding and encoding routines.
// Hardware implementations can calculate scale factor in device driver
// and use multiplication and shifting on hardware instead of division.
return (otherSize << RefScaleShift) / thisSize;
}
public Mv32 ScaleMv(ref Mv mv, int x, int y)
{
int xOffQ4 = ScaledX(x << SubpelBits) & SubpelMask;
int yOffQ4 = ScaledY(y << SubpelBits) & SubpelMask;
Mv32 res = new Mv32()
{
Row = ScaledY(mv.Row) + yOffQ4,
Col = ScaledX(mv.Col) + xOffQ4
};
return res;
}
public bool IsValidScale()
{
return XScaleFP != RefInvalidScale && YScaleFP != RefInvalidScale;
}
public bool IsScaled()
{
return IsValidScale() && (XScaleFP != RefNoScale || YScaleFP != RefNoScale);
}
public static bool ValidRefFrameSize(int refWidth, int refHeight, int thisWidth, int thisHeight)
{
return 2 * thisWidth >= refWidth &&
2 * thisHeight >= refHeight &&
thisWidth <= 16 * refWidth &&
thisHeight <= 16 * refHeight;
}
public void SetupScaleFactorsForFrame(int otherW, int otherH, int thisW, int thisH)
{
if (!ValidRefFrameSize(otherW, otherH, thisW, thisH))
{
XScaleFP = RefInvalidScale;
YScaleFP = RefInvalidScale;
return;
}
XScaleFP = GetFixedPointScaleFactor(otherW, thisW);
YScaleFP = GetFixedPointScaleFactor(otherH, thisH);
XStepQ4 = ScaledX(16);
YStepQ4 = ScaledY(16);
}
}
}

View file

@ -0,0 +1,11 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum SegLvlFeatures
{
SegLvlAltQ = 0, // Use alternate Quantizer ....
SegLvlAltLf = 1, // Use alternate loop filter value...
SegLvlRefFrame = 2, // Optional Segment reference frame
SegLvlSkip = 3, // Optional Segment (0,0) + skip mode
SegLvlMax = 4 // Number of features supported
}
}

View file

@ -0,0 +1,71 @@
using Ryujinx.Common.Memory;
using System.Diagnostics;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct Segmentation
{
private static readonly int[] SegFeatureDataSigned = new int[] { 1, 1, 0, 0 };
private static readonly int[] SegFeatureDataMax = new int[] { QuantCommon.MaxQ, Vp9.LoopFilter.MaxLoopFilter, 3, 0 };
public bool Enabled;
public bool UpdateMap;
public byte UpdateData;
public byte AbsDelta;
public bool TemporalUpdate;
public Array8<Array4<short>> FeatureData;
public Array8<uint> FeatureMask;
public int AqAvOffset;
public static byte GetPredProbSegId(ref Array3<byte> segPredProbs, ref MacroBlockD xd)
{
return segPredProbs[xd.GetPredContextSegId()];
}
public void ClearAllSegFeatures()
{
MemoryMarshal.CreateSpan(ref FeatureData[0][0], 8 * 4).Fill(0);
MemoryMarshal.CreateSpan(ref FeatureMask[0], 8).Fill(0);
AqAvOffset = 0;
}
internal void EnableSegFeature(int segmentId, SegLvlFeatures featureId)
{
FeatureMask[segmentId] |= 1u << (int)featureId;
}
internal static int FeatureDataMax(SegLvlFeatures featureId)
{
return SegFeatureDataMax[(int)featureId];
}
internal static int IsSegFeatureSigned(SegLvlFeatures featureId)
{
return SegFeatureDataSigned[(int)featureId];
}
internal void SetSegData(int segmentId, SegLvlFeatures featureId, int segData)
{
Debug.Assert(segData <= SegFeatureDataMax[(int)featureId]);
if (segData < 0)
{
Debug.Assert(SegFeatureDataSigned[(int)featureId] != 0);
Debug.Assert(-segData <= SegFeatureDataMax[(int)featureId]);
}
FeatureData[segmentId][(int)featureId] = (short)segData;
}
internal int IsSegFeatureActive(int segmentId, SegLvlFeatures featureId)
{
return Enabled && (FeatureMask[segmentId] & (1 << (int)featureId)) != 0 ? 1 : 0;
}
internal short GetSegData(int segmentId, SegLvlFeatures featureId)
{
return FeatureData[segmentId][(int)featureId];
}
}
}

View file

@ -0,0 +1,82 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Video;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct Surface : ISurface
{
public ArrayPtr<byte> YBuffer;
public ArrayPtr<byte> UBuffer;
public ArrayPtr<byte> VBuffer;
public unsafe Plane YPlane => new Plane((IntPtr)YBuffer.ToPointer(), YBuffer.Length);
public unsafe Plane UPlane => new Plane((IntPtr)UBuffer.ToPointer(), UBuffer.Length);
public unsafe Plane VPlane => new Plane((IntPtr)VBuffer.ToPointer(), VBuffer.Length);
public FrameField Field => FrameField.Progressive;
public int Width { get; }
public int Height { get; }
public int AlignedWidth { get; }
public int AlignedHeight { get; }
public int Stride { get; }
public int UvWidth { get; }
public int UvHeight { get; }
public int UvAlignedWidth { get; }
public int UvAlignedHeight { get; }
public int UvStride { get; }
public bool HighBd => false;
private readonly IntPtr _pointer;
public Surface(int width, int height)
{
const int border = 32;
const int ssX = 1;
const int ssY = 1;
const bool highbd = false;
int alignedWidth = (width + 7) & ~7;
int alignedHeight = (height + 7) & ~7;
int yStride = ((alignedWidth + 2 * border) + 31) & ~31;
int yplaneSize = (alignedHeight + 2 * border) * yStride;
int uvWidth = alignedWidth >> ssX;
int uvHeight = alignedHeight >> ssY;
int uvStride = yStride >> ssX;
int uvBorderW = border >> ssX;
int uvBorderH = border >> ssY;
int uvplaneSize = (uvHeight + 2 * uvBorderH) * uvStride;
int frameSize = (highbd ? 2 : 1) * (yplaneSize + 2 * uvplaneSize);
IntPtr pointer = Marshal.AllocHGlobal(frameSize);
_pointer = pointer;
Width = width;
Height = height;
AlignedWidth = alignedWidth;
AlignedHeight = alignedHeight;
Stride = yStride;
UvWidth = (width + ssX) >> ssX;
UvHeight = (height + ssY) >> ssY;
UvAlignedWidth = uvWidth;
UvAlignedHeight = uvHeight;
UvStride = uvStride;
ArrayPtr<byte> NewPlane(int start, int size, int border)
{
return new ArrayPtr<byte>(pointer + start + border, size - border);
}
YBuffer = NewPlane(0, yplaneSize, (border * yStride) + border);
UBuffer = NewPlane(yplaneSize, uvplaneSize, (uvBorderH * uvStride) + uvBorderW);
VBuffer = NewPlane(yplaneSize + uvplaneSize, uvplaneSize, (uvBorderH * uvStride) + uvBorderW);
}
public void Dispose()
{
Marshal.FreeHGlobal(_pointer);
}
}
}

View file

@ -0,0 +1,85 @@
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using System;
using System.Diagnostics;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct TileInfo
{
private const int MinTileWidthB64 = 4;
private const int MaxTileWidthB64 = 64;
public int MiRowStart, MiRowEnd;
public int MiColStart, MiColEnd;
public static int MiColsAlignedToSb(int nMis)
{
return BitUtils.AlignPowerOfTwo(nMis, Constants.MiBlockSizeLog2);
}
private static int GetTileOffset(int idx, int mis, int log2)
{
int sbCols = MiColsAlignedToSb(mis) >> Constants.MiBlockSizeLog2;
int offset = ((idx * sbCols) >> log2) << Constants.MiBlockSizeLog2;
return Math.Min(offset, mis);
}
public void SetRow(ref Vp9Common cm, int row)
{
MiRowStart = GetTileOffset(row, cm.MiRows, cm.Log2TileRows);
MiRowEnd = GetTileOffset(row + 1, cm.MiRows, cm.Log2TileRows);
}
public void SetCol(ref Vp9Common cm, int col)
{
MiColStart = GetTileOffset(col, cm.MiCols, cm.Log2TileCols);
MiColEnd = GetTileOffset(col + 1, cm.MiCols, cm.Log2TileCols);
}
public void Init(ref Vp9Common cm, int row, int col)
{
SetRow(ref cm, row);
SetCol(ref cm, col);
}
// Checks that the given miRow, miCol and search point
// are inside the borders of the tile.
public bool IsInside(int miCol, int miRow, int miRows, ref Position miPos)
{
return !(miRow + miPos.Row < 0 ||
miCol + miPos.Col < MiColStart ||
miRow + miPos.Row >= miRows ||
miCol + miPos.Col >= MiColEnd);
}
private static int GetMinLog2TileCols(int sb64Cols)
{
int minLog2 = 0;
while ((MaxTileWidthB64 << minLog2) < sb64Cols)
{
++minLog2;
}
return minLog2;
}
private static int GetMaxLog2TileCols(int sb64Cols)
{
int maxLog2 = 1;
while ((sb64Cols >> maxLog2) >= MinTileWidthB64)
{
++maxLog2;
}
return maxLog2 - 1;
}
public static void GetTileNBits(int miCols, ref int minLog2TileCols, ref int maxLog2TileCols)
{
int sb64Cols = MiColsAlignedToSb(miCols) >> Constants.MiBlockSizeLog2;
minLog2TileCols = GetMinLog2TileCols(sb64Cols);
maxLog2TileCols = GetMaxLog2TileCols(sb64Cols);
Debug.Assert(minLog2TileCols <= maxLog2TileCols);
}
}
}

View file

@ -0,0 +1,12 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
public enum TxMode
{
Only4X4 = 0, // Only 4x4 transform used
Allow8X8 = 1, // Allow block transform size up to 8x8
Allow16X16 = 2, // Allow block transform size up to 16x16
Allow32X32 = 3, // Allow block transform size up to 32x32
TxModeSelect = 4, // Transform specified for each block
TxModes = 5
}
}

View file

@ -0,0 +1,11 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
public enum TxSize
{
Tx4x4 = 0, // 4x4 transform
Tx8x8 = 1, // 8x8 transform
Tx16x16 = 2, // 16x16 transform
Tx32x32 = 3, // 32x32 transform
TxSizes = 4
}
}

View file

@ -0,0 +1,11 @@
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal enum TxType
{
DctDct = 0, // DCT in both horizontal and vertical
AdstDct = 1, // ADST in vertical, DCT in horizontal
DctAdst = 2, // DCT in vertical, ADST in horizontal
AdstAdst = 3, // ADST in both directions
TxTypes = 4
}
}

View file

@ -0,0 +1,331 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Common;
using Ryujinx.Graphics.Video;
namespace Ryujinx.Graphics.Nvdec.Vp9.Types
{
internal struct Vp9Common
{
public MacroBlockD Mb;
public ArrayPtr<TileWorkerData> TileWorkerData;
public InternalErrorInfo Error;
public int Width;
public int Height;
public int SubsamplingX;
public int SubsamplingY;
public ArrayPtr<MvRef> PrevFrameMvs;
public ArrayPtr<MvRef> CurFrameMvs;
public Array3<RefBuffer> FrameRefs;
public FrameType FrameType;
// Flag signaling that the frame is encoded using only Intra modes.
public bool IntraOnly;
public bool AllowHighPrecisionMv;
// MBs, MbRows/Cols is in 16-pixel units; MiRows/Cols is in
// ModeInfo (8-pixel) units.
public int MBs;
public int MbRows, MiRows;
public int MbCols, MiCols;
public int MiStride;
/* Profile settings */
public TxMode TxMode;
public int BaseQindex;
public int YDcDeltaQ;
public int UvDcDeltaQ;
public int UvAcDeltaQ;
public Array8<Array2<short>> YDequant;
public Array8<Array2<short>> UvDequant;
/* We allocate a ModeInfo struct for each macroblock, together with
an extra row on top and column on the left to simplify prediction. */
public ArrayPtr<ModeInfo> Mip; /* Base of allocated array */
public ArrayPtr<ModeInfo> Mi; /* Corresponds to upper left visible macroblock */
public ArrayPtr<Ptr<ModeInfo>> MiGridBase;
public ArrayPtr<Ptr<ModeInfo>> MiGridVisible;
// Whether to use previous frame's motion vectors for prediction.
public bool UsePrevFrameMvs;
// Persistent mb segment id map used in prediction.
public int SegMapIdx;
public int PrevSegMapIdx;
public Array2<ArrayPtr<byte>> SegMapArray;
public ArrayPtr<byte> LastFrameSegMap;
public ArrayPtr<byte> CurrentFrameSegMap;
public byte InterpFilter;
public LoopFilterInfoN LfInfo;
public Array4<sbyte> RefFrameSignBias; /* Two state 0, 1 */
public LoopFilter Lf;
public Segmentation Seg;
// Context probabilities for reference frame prediction
public sbyte CompFixedRef;
public Array2<sbyte> CompVarRef;
public ReferenceMode ReferenceMode;
public Ptr<Vp9EntropyProbs> Fc;
public Ptr<Vp9BackwardUpdates> Counts;
public int Log2TileCols, Log2TileRows;
public ArrayPtr<sbyte> AboveSegContext;
public ArrayPtr<sbyte> AboveContext;
public bool FrameIsIntraOnly()
{
return FrameType == FrameType.KeyFrame || IntraOnly;
}
public bool CompoundReferenceAllowed()
{
int i;
for (i = 1; i < Constants.RefsPerFrame; ++i)
{
if (RefFrameSignBias[i + 1] != RefFrameSignBias[1])
{
return true;
}
}
return false;
}
private static int CalcMiSize(int len)
{
// Len is in mi units.
return len + Constants.MiBlockSize;
}
public void SetMbMi(int width, int height)
{
int alignedWidth = BitUtils.AlignPowerOfTwo(width, Constants.MiSizeLog2);
int alignedHeight = BitUtils.AlignPowerOfTwo(height, Constants.MiSizeLog2);
MiCols = alignedWidth >> Constants.MiSizeLog2;
MiRows = alignedHeight >> Constants.MiSizeLog2;
MiStride = CalcMiSize(MiCols);
MbCols = (MiCols + 1) >> 1;
MbRows = (MiRows + 1) >> 1;
MBs = MbRows * MbCols;
}
public void AllocTileWorkerData(MemoryAllocator allocator, int tileCols, int tileRows, int maxThreads)
{
TileWorkerData = allocator.Allocate<TileWorkerData>(tileCols * tileRows + (maxThreads > 1 ? maxThreads : 0));
}
public void FreeTileWorkerData(MemoryAllocator allocator)
{
allocator.Free(TileWorkerData);
}
private void AllocSegMap(MemoryAllocator allocator, int segMapSize)
{
int i;
for (i = 0; i < Constants.NumPingPongBuffers; ++i)
{
SegMapArray[i] = allocator.Allocate<byte>(segMapSize);
}
// Init the index.
SegMapIdx = 0;
PrevSegMapIdx = 1;
CurrentFrameSegMap = SegMapArray[SegMapIdx];
LastFrameSegMap = SegMapArray[PrevSegMapIdx];
}
private void FreeSegMap(MemoryAllocator allocator)
{
int i;
for (i = 0; i < Constants.NumPingPongBuffers; ++i)
{
allocator.Free(SegMapArray[i]);
SegMapArray[i] = ArrayPtr<byte>.Null;
}
CurrentFrameSegMap = ArrayPtr<byte>.Null;
LastFrameSegMap = ArrayPtr<byte>.Null;
}
private void DecAllocMi(MemoryAllocator allocator, int miSize)
{
Mip = allocator.Allocate<ModeInfo>(miSize);
MiGridBase = allocator.Allocate<Ptr<ModeInfo>>(miSize);
}
private void DecFreeMi(MemoryAllocator allocator)
{
allocator.Free(Mip);
Mip = ArrayPtr<ModeInfo>.Null;
allocator.Free(MiGridBase);
MiGridBase = ArrayPtr<Ptr<ModeInfo>>.Null;
}
public void FreeContextBuffers(MemoryAllocator allocator)
{
DecFreeMi(allocator);
FreeSegMap(allocator);
allocator.Free(AboveContext);
AboveContext = ArrayPtr<sbyte>.Null;
allocator.Free(AboveSegContext);
AboveSegContext = ArrayPtr<sbyte>.Null;
allocator.Free(Lf.Lfm);
Lf.Lfm = ArrayPtr<LoopFilterMask>.Null;
allocator.Free(CurFrameMvs);
CurFrameMvs = ArrayPtr<MvRef>.Null;
if (UsePrevFrameMvs)
{
allocator.Free(PrevFrameMvs);
PrevFrameMvs = ArrayPtr<MvRef>.Null;
}
}
private void AllocLoopFilter(MemoryAllocator allocator)
{
// Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The
// stride and rows are rounded up / truncated to a multiple of 8.
Lf.LfmStride = (MiCols + (Constants.MiBlockSize - 1)) >> 3;
Lf.Lfm = allocator.Allocate<LoopFilterMask>(((MiRows + (Constants.MiBlockSize - 1)) >> 3) * Lf.LfmStride);
}
public void AllocContextBuffers(MemoryAllocator allocator, int width, int height)
{
SetMbMi(width, height);
int newMiSize = MiStride * CalcMiSize(MiRows);
if (newMiSize != 0)
{
DecAllocMi(allocator, newMiSize);
}
if (MiRows * MiCols != 0)
{
// Create the segmentation map structure and set to 0.
AllocSegMap(allocator, MiRows * MiCols);
}
if (MiCols != 0)
{
AboveContext = allocator.Allocate<sbyte>(2 * TileInfo.MiColsAlignedToSb(MiCols) * Constants.MaxMbPlane);
AboveSegContext = allocator.Allocate<sbyte>(TileInfo.MiColsAlignedToSb(MiCols));
}
AllocLoopFilter(allocator);
CurFrameMvs = allocator.Allocate<MvRef>(MiRows * MiCols);
// Using the same size as the current frame is fine here,
// as this is never true when we have a resolution change.
if (UsePrevFrameMvs)
{
PrevFrameMvs = allocator.Allocate<MvRef>(MiRows * MiCols);
}
}
private unsafe void DecSetupMi()
{
Mi = Mip.Slice(MiStride + 1);
MiGridVisible = MiGridBase.Slice(MiStride + 1);
MemoryUtil.Fill(MiGridBase.ToPointer(), Ptr<ModeInfo>.Null, MiStride * (MiRows + 1));
}
public unsafe void InitContextBuffers()
{
DecSetupMi();
if (!LastFrameSegMap.IsNull)
{
MemoryUtil.Fill(LastFrameSegMap.ToPointer(), (byte)0, MiRows * MiCols);
}
}
private void SetPartitionProbs(ref MacroBlockD xd)
{
xd.PartitionProbs = FrameIsIntraOnly()
? new ArrayPtr<Array3<byte>>(ref Fc.Value.KfPartitionProb[0], 16)
: new ArrayPtr<Array3<byte>>(ref Fc.Value.PartitionProb[0], 16);
}
internal void InitMacroBlockD(ref MacroBlockD xd, ArrayPtr<int> dqcoeff)
{
int i;
for (i = 0; i < Constants.MaxMbPlane; ++i)
{
xd.Plane[i].DqCoeff = dqcoeff;
xd.AboveContext[i] = AboveContext.Slice(i * 2 * TileInfo.MiColsAlignedToSb(MiCols));
if (i == 0)
{
MemoryUtil.Copy(ref xd.Plane[i].SegDequant, ref YDequant);
}
else
{
MemoryUtil.Copy(ref xd.Plane[i].SegDequant, ref UvDequant);
}
xd.Fc = new Ptr<Vp9EntropyProbs>(ref Fc.Value);
}
xd.AboveSegContext = AboveSegContext;
xd.MiStride = MiStride;
xd.ErrorInfo = new Ptr<InternalErrorInfo>(ref Error);
SetPartitionProbs(ref xd);
}
public void SetupSegmentationDequant()
{
const BitDepth bitDepth = BitDepth.Bits8; // TODO: Configurable
// Build y/uv dequant values based on segmentation.
if (Seg.Enabled)
{
int i;
for (i = 0; i < Constants.MaxSegments; ++i)
{
int qIndex = QuantCommon.GetQIndex(ref Seg, i, BaseQindex);
YDequant[i][0] = QuantCommon.DcQuant(qIndex, YDcDeltaQ, bitDepth);
YDequant[i][1] = QuantCommon.AcQuant(qIndex, 0, bitDepth);
UvDequant[i][0] = QuantCommon.DcQuant(qIndex, UvDcDeltaQ, bitDepth);
UvDequant[i][1] = QuantCommon.AcQuant(qIndex, UvAcDeltaQ, bitDepth);
}
}
else
{
int qIndex = BaseQindex;
// When segmentation is disabled, only the first value is used. The
// remaining are don't cares.
YDequant[0][0] = QuantCommon.DcQuant(qIndex, YDcDeltaQ, bitDepth);
YDequant[0][1] = QuantCommon.AcQuant(qIndex, 0, bitDepth);
UvDequant[0][0] = QuantCommon.DcQuant(qIndex, UvDcDeltaQ, bitDepth);
UvDequant[0][1] = QuantCommon.AcQuant(qIndex, UvAcDeltaQ, bitDepth);
}
}
public void SetupScaleFactors()
{
for (int i = 0; i < Constants.RefsPerFrame; ++i)
{
ref RefBuffer refBuf = ref FrameRefs[i];
refBuf.Sf.SetupScaleFactorsForFrame(refBuf.Buf.Width, refBuf.Buf.Height, Width, Height);
}
}
}
}