Initial community commit

This commit is contained in:
Jef 2024-09-24 14:54:57 +02:00
parent 537bcbc862
commit fc06254474
16440 changed files with 4239995 additions and 2 deletions

View file

@ -0,0 +1,64 @@
## Target to built
TARGET =libvppp
## TOOLS
CC = ecc
LD = ecc
AR = ar
OBJDUMP = objdump
RM = rm -f
## Directories
TOPDIR =C:\DuckSoft
PRIVATEINCLUDE =${TOPDIR}\private\include
CORELIBSINCLUDE =${TOPDIR}\private\corelibs\include
CDXVINCLUDE =${TOPDIR}\private\corelibs\cdxv\include
VPPPINCLUDE =${TOPDIR}\private\corelibs\cdxv\vppp\include
CURRENTDIR =${TOPDIR}\private\corelibs\cdxv\vppp
LIBDIR =${TOPDIR}\private\corelibs\lib\mapca
## Compile Flags
ALLINCLUDES =-I${CDXVINCLUDE} -I${CORELIBSINCLUDE} -I${PRIVATEINCLUDE} -I${VPPPINCLUDE}
VP6DEFINES =-DPREDICT_2D -DVFW_COMP -DCOMPDLL -DPOSTPROCESS -DCPUISLITTLEENDIAN -DNORMALIZED
ETIDEFINES =-DMAPCA
ALLDEFINES =${VP6DEFINES} ${ETIDEFINES}
DEBUG =-O2
CFLAGS =-msvc -align 8 -etswp -mP3OPT_nonlocal_calls_through_register=true \
-mP2OPT_suppress_library_call_conv_warnings=TRUE -maalign_branch_target \
-magen_interroutine_padding
ALLFLAGS =$(CFLAGS) ${ALLDEFINES} ${ALLINCLUDES} ${DEBUG}
## Files
OBJS = bsp\borders.o \
generic\clamp.o \
generic\deblock.o \
generic\DeInterlace.o \
generic\Dering.o \
generic\loopfilter.o \
generic\postproc.o \
generic\scale.o \
generic\simpledeblocker.o \
generic\doptsystemdependant.o
SRCS = $(OBJS:.o=.c)
ARTARGET = ${TARGET}.a
# archive
ARTARGET:${OBJS}
${AR} -cr ${ARTARGET} ${OBJS}
mv ${ARTARGET} ${LIBDIR}
${OBJS} : ${SRCS}
$(CC) $(ALLFLAGS) -c $*.c -o $*.o
clean:
${RM} ${OBJS} ${ARTARGET}

View file

@ -0,0 +1,76 @@
/****************************************************************************
*
* Module Title : DeInterlace.c
*
* Description : De-Interlace routines.
*
***************************************************************************/
/****************************************************************************
* Header Files
****************************************************************************/
#include <memory.h>
#include "type_aliases.h"
/****************************************************************************
*
* ROUTINE : CFastDeInterlace
*
* INPUTS : UINT8 *SrcPtr : Pointer to input image.
* UINT8 *DstPtr : Pointer to output image.
* INT32 Width : Image width.
* INT32 Height : Image height.
* INT32 Stride : Image stride.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies a 3-tap filter vertically to remove interlacing
* artifacts.
*
* SPECIAL NOTES : This function use a three tap filter [1, 2, 1] to blur
* veritically in an interlaced frame. This function assumes:
* 1) SrcPtr & DstPtr buffers have the same geometry.
* 2) SrcPtr != DstPtr.
*
****************************************************************************/
void CFastDeInterlace
(
UINT8 *SrcPtr,
UINT8 *DstPtr,
INT32 Width,
INT32 Height,
INT32 Stride
)
{
INT32 i, j;
UINT32 x0, x1, x2;
UINT8 *PrevSrcPtr, *NextSrcPtr;
UINT8 *CurrentSrcPtr = SrcPtr;
UINT8 *CurrentDstPtr = DstPtr;
// Always copy the first line
memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
for ( i=1; i<Height-1; i++ )
{
PrevSrcPtr = CurrentSrcPtr;
CurrentSrcPtr += Stride;
NextSrcPtr = CurrentSrcPtr + Stride;
CurrentDstPtr += Stride;
for ( j=0; j<Width; j++ )
{
x0 = PrevSrcPtr[j];
x1 = (CurrentSrcPtr[j]<<1);
x2 = NextSrcPtr[j];
CurrentDstPtr[j] = (UINT8)( (x0 + x1 + x2 + 2)>>2 );
}
}
// Copy the last line
CurrentSrcPtr += Stride;
CurrentDstPtr += Stride;
memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
}

View file

@ -0,0 +1,303 @@
/****************************************************************************
*
* Module Title : borders.c
*
* Description :
*
****************************************************************************/
#define STRICT /* Strict type checking */
/****************************************************************************
* Header Files
****************************************************************************/
#include "postp.h"
#ifdef MAPCA
#include "eti/mm.h"
#include "eti_loopdir.h"
#endif
#ifdef MAPCA
void CopyYLeftRightBorder
(
UINT8 *restrict SrcPtr1,
UINT8 *restrict SrcPtr2,
UINT8 *restrict DestPtr1,
UINT8 *restrict DestPtr2,
UINT32 PlaneHeight,
UINT32 PlaneStride
)
{
n64 *restrict DstPtr64_1 = (n64* restrict)DestPtr1;
n64 *restrict DstPtr64_2 = (n64* restrict)DestPtr2;
n32 PlaneStride64 = (PlaneStride>>3);
n32 Left, Right;
n64 Left64, Right64;
int i;
loop_directives ( ELD_SWP_IVDEP );
for ( i=0; i<PlaneHeight; i++ )
{
Left = SrcPtr1[0];
Right = SrcPtr2[0];
Left64 = hmpv_bcopyrev_64_32 ( Left, 0, 0 );
Right64 = hmpv_bcopyrev_64_32 ( Right, 0, 0 );
DstPtr64_1[0] = Left64;
DstPtr64_2[0] = Right64;
DstPtr64_1[1] = Left64;
DstPtr64_2[1] = Right64;
DstPtr64_1[2] = Left64;
DstPtr64_2[2] = Right64;
DstPtr64_1[3] = Left64;
DstPtr64_2[3] = Right64;
SrcPtr1 += PlaneStride;
SrcPtr2 += PlaneStride;
DstPtr64_1 += PlaneStride64;
DstPtr64_2 += PlaneStride64;
}
}
void CopyUVLeftRightBorder
(
UINT8 *restrict SrcPtr1,
UINT8 *restrict SrcPtr2,
UINT8 *restrict DestPtr1,
UINT8 *restrict DestPtr2,
UINT32 PlaneHeight,
UINT32 PlaneStride
)
{
n64 *restrict DstPtr64_1 = (n64* restrict)DestPtr1;
n64 *restrict DstPtr64_2 = (n64* restrict)DestPtr2;
n32 PlaneStride64 = (PlaneStride>>3);
n32 Left, Right;
n64 Left64, Right64;
int i;
loop_directives ( ELD_SWP_IVDEP );
for ( i=0; i<PlaneHeight; i++ )
{
Left = SrcPtr1[0];
Right = SrcPtr2[0];
Left64 = hmpv_bcopyrev_64_32 ( Left, 0, 0 );
Right64 = hmpv_bcopyrev_64_32 ( Right, 0, 0 );
DstPtr64_1[0] = Left64;
DstPtr64_2[0] = Right64;
DstPtr64_1[1] = Left64;
DstPtr64_2[1] = Right64;
SrcPtr1 += PlaneStride;
SrcPtr2 += PlaneStride;
DstPtr64_1 += PlaneStride64;
DstPtr64_2 += PlaneStride64;
}
}
#endif
/****************************************************************************
*
* ROUTINE : UpdateUMVBorder
*
* INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
* UINT8 *DestReconPtr : Pointer to reconstructed image.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Copies pixel values in first/last rows/columns of the
* image into the UMV border in the specified reconstructed
* image.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void UpdateUMVBorder ( POSTPROC_INSTANCE *pbi, UINT8 *DestReconPtr )
{
INT32 i;
INT32 PlaneHeight;
UINT8 *SrcPtr1, *SrcPtr2;
UINT8 *DestPtr1, *DestPtr2;
UINT32 Border = pbi->MVBorder;
INT32 PlaneStride = pbi->YStride;
/***********/
/* Y Plane */
/***********/
PlaneStride = pbi->YStride;
PlaneHeight = pbi->VFragments * 8;
// copy the left and right most columns out
SrcPtr1 = DestReconPtr + pbi->ReconYDataOffset;
SrcPtr2 = SrcPtr1 + 8 * pbi->HFragments - 1;
DestPtr1= SrcPtr1 - Border;
DestPtr2= SrcPtr2 + 1;
#ifdef MAPCA
CopyYLeftRightBorder ( SrcPtr1, SrcPtr2, DestPtr1,DestPtr2, PlaneHeight, PlaneStride );
#else
for ( i=0; i<PlaneHeight; i++ )
{
memset ( DestPtr1, SrcPtr1[0], Border );
memset ( DestPtr2, SrcPtr2[0], Border );
SrcPtr1 += PlaneStride;
SrcPtr2 += PlaneStride;
DestPtr1 += PlaneStride;
DestPtr2 += PlaneStride;
}
#endif
// Now copy the top and bottom source lines into each line of the respective borders
SrcPtr1 = DestReconPtr + Border * PlaneStride;
SrcPtr2 = SrcPtr1 + (pbi->VFragments * 8 * PlaneStride)- PlaneStride;
DestPtr1= DestReconPtr;
DestPtr2= SrcPtr2 + PlaneStride;
for ( i=0; i<(INT32)Border; i++ )
{
memcpy ( DestPtr1, SrcPtr1, PlaneStride );
memcpy ( DestPtr2, SrcPtr2, PlaneStride );
DestPtr1 += PlaneStride;
DestPtr2 += PlaneStride;
}
PlaneStride = pbi->UVStride;
PlaneHeight = pbi->VFragments * 4;
/***********/
/* U Plane */
/***********/
// copy the left and right most columns out
SrcPtr1 = DestReconPtr + pbi->ReconUDataOffset;
SrcPtr2 = SrcPtr1 + 4 * pbi->HFragments - 1;
DestPtr1= SrcPtr1 - Border/2;
DestPtr2= SrcPtr2 + 1;
#ifdef MAPCA
CopyUVLeftRightBorder ( SrcPtr1, SrcPtr2, DestPtr1,DestPtr2, PlaneHeight, PlaneStride );
#else
for ( i=0; i<PlaneHeight; i++ )
{
memset ( DestPtr1, SrcPtr1[0], Border/2 );
memset ( DestPtr2, SrcPtr2[0], Border/2 );
SrcPtr1 += PlaneStride;
SrcPtr2 += PlaneStride;
DestPtr1 += PlaneStride;
DestPtr2 += PlaneStride;
}
#endif
// Now copy the top and bottom source lines into each line of the respective borders
SrcPtr1 = DestReconPtr + pbi->ReconUDataOffset - Border/2;
SrcPtr2 = SrcPtr1 + (pbi->VFragments * 4 * PlaneStride)- PlaneStride;
DestPtr1= SrcPtr1 - Border/2*PlaneStride;
DestPtr2= SrcPtr2 + PlaneStride;
for ( i=0; i<(INT32)(Border/2); i++ )
{
memcpy ( DestPtr1, SrcPtr1, PlaneStride );
memcpy ( DestPtr2, SrcPtr2, PlaneStride );
DestPtr1 += PlaneStride;
DestPtr2 += PlaneStride;
}
/***********/
/* V Plane */
/***********/
// copy the left and right most columns out
SrcPtr1 = DestReconPtr + pbi->ReconVDataOffset;
SrcPtr2 = SrcPtr1 + 4 * pbi->HFragments - 1;
DestPtr1= SrcPtr1 - Border/2;
DestPtr2= SrcPtr2 + 1;
#ifdef MAPCA
CopyUVLeftRightBorder ( SrcPtr1, SrcPtr2, DestPtr1,DestPtr2, PlaneHeight, PlaneStride );
#else
for ( i=0; i<PlaneHeight; i++ )
{
memset ( DestPtr1, SrcPtr1[0], Border/2 );
memset ( DestPtr2, SrcPtr2[0], Border/2 );
SrcPtr1 += PlaneStride;
SrcPtr2 += PlaneStride;
DestPtr1 += PlaneStride;
DestPtr2 += PlaneStride;
}
#endif
// Now copy the top and bottom source lines into each line of the respective borders
SrcPtr1 = DestReconPtr + pbi->ReconVDataOffset - Border/2;
SrcPtr2 = SrcPtr1 + (pbi->VFragments * 4 * PlaneStride)- PlaneStride;
DestPtr1= SrcPtr1 - Border/2*PlaneStride;
DestPtr2= SrcPtr2 + PlaneStride;
for ( i=0; i<(INT32)(Border/2); i++ )
{
memcpy ( DestPtr1, SrcPtr1, PlaneStride );
memcpy ( DestPtr2, SrcPtr2, PlaneStride );
DestPtr1 += PlaneStride;
DestPtr2 += PlaneStride;
}
}
/****************************************************************************
*
* ROUTINE : CopyFrame
*
* INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
* YUV_BUFFER_CONFIG *b : Pointer to source image.
* UINT8 *DestReconPtr : Pointer to destination image.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Copies the source image into the destination image and
* updates the destination's UMV borders.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void CopyFrame ( POSTPROC_INSTANCE *pbi, YUV_BUFFER_CONFIG *b, UINT8 *DestReconPtr )
{
int row;
unsigned char *source, *dest;
source = (unsigned char *) b->YBuffer;
dest = DestReconPtr + pbi->ReconYDataOffset;
for ( row=0; row<b->YHeight; row++ )
{
memcpy ( dest, source, b->YWidth );
source += b->YStride;
dest += pbi->YStride;
}
source = (unsigned char *) b->UBuffer;
dest = DestReconPtr + pbi->ReconUDataOffset;
for ( row=0; row<b->UVHeight; row++ )
{
memcpy ( dest, source, b->UVWidth );
source += b->UVStride;
dest += pbi->UVStride;
}
source = (unsigned char *) b->VBuffer;
dest = DestReconPtr + pbi->ReconVDataOffset;
for ( row=0; row<b->UVHeight; row++ )
{
memcpy ( dest, source, b->UVWidth );
source += b->UVStride;
dest += pbi->UVStride;
}
UpdateUMVBorder ( pbi, DestReconPtr );
}

View file

@ -0,0 +1,75 @@
/****************************************************************************
*
* Module Title : clamp.c
*
* Description : Image pixel value clamping routines.
*
***************************************************************************/
/****************************************************************************
* Header Files
****************************************************************************/
#include "postp.h"
/****************************************************************************
*
* ROUTINE : ClampLevels_C
*
* INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
* INT32 BlackClamp, : Number of levels to clamp up from 0.
* INT32 WhiteClamp, : Number of levels to clamp down from 255.
* UINT8 *Src, : Pointer to input image to be clamped.
* UINT8 *Dst : Pointer to clamped image.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Clamps the pixel values in the input image at each
* end of the 8-bit range.
*
* SPECIAL NOTES : BlackClamp/WhiteClamp are the number.of levels to
* clamp at either end of the range. In particular, it
* should be noted that WhiteClamp is _not_ the level
* to clamp to at the high end of the range.
*
****************************************************************************/
void ClampLevels_C
(
POSTPROC_INSTANCE *pbi,
INT32 BlackClamp,
INT32 WhiteClamp,
UINT8 *Src,
UINT8 *Dst
)
{
int i;
int row,col;
unsigned char clamped[256];
int width = pbi->HFragments*8;
int height = pbi->VFragments*8;
UINT8 *SrcPtr = Src + pbi->ReconYDataOffset;
UINT8 *DestPtr = Dst + pbi->ReconYDataOffset;
UINT32 LineLength = pbi->YStride;
// set up clamping table so we can avoid ifs while clamping
for ( i=0; i<256; i++ )
{
clamped[i] = i;
if ( i<BlackClamp )
clamped[i] = BlackClamp;
if ( i>(255-WhiteClamp) )
clamped[i] = 255-WhiteClamp;
}
// clamping is for Y only!
for ( row=0 ; row<height; row++ )
{
for ( col=0; col<width; col++ )
SrcPtr[col] = clamped[DestPtr[col]];
SrcPtr += LineLength;
DestPtr += LineLength;
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,92 @@
/****************************************************************************
*
* Module Title : SystemDependant.c
*
* Description : Miscellaneous system dependant functions
*
****************************************************************************/
/*******************************************3********************************
* Header Files
****************************************************************************/
#include "postp.h"
/****************************************************************************
* Imports
****************************************************************************/
extern void GetProcessorFlags ( INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled );
extern void FilteringVert_12_C ( UINT32 QValue,UINT8 *Src, INT32 Pitch);
extern void FilteringHoriz_12_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch );
extern void FilteringVert_8_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch );
extern void FilteringHoriz_8_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch );
extern void HorizontalLine_1_2_Scale_C ( const unsigned char *source, unsigned int sourceWidth, unsigned char *dest, unsigned int destWidth );
extern void HorizontalLine_3_5_Scale_C ( const unsigned char *source, unsigned int sourceWidth, unsigned char *dest, unsigned int destWidth );
extern void HorizontalLine_4_5_Scale_C ( const unsigned char *source, unsigned int sourceWidth, unsigned char *dest, unsigned int destWidth );
extern void VerticalBand_4_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
extern void LastVerticalBand_4_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
extern void VerticalBand_3_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
extern void LastVerticalBand_3_5_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
extern void VerticalBand_1_2_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
extern void LastVerticalBand_1_2_Scale_C ( unsigned char *dest, unsigned int destPitch, unsigned int destWidth );
extern void FilterHoriz_Simple_C ( POSTPROC_INSTANCE *pbi, UINT8 *PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
extern void FilterVert_Simple_C ( POSTPROC_INSTANCE *pbi, UINT8 *PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
extern void FilterHoriz_Generic ( POSTPROC_INSTANCE *pbi, UINT8 *PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
extern void FilterVert_Generic ( POSTPROC_INSTANCE *pbi, UINT8 *PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
extern INT32 *SetupBoundingValueArray_Generic ( POSTPROC_INSTANCE *pbi, INT32 FLimit );
extern INT32 *SetupDeblockValueArray_Generic ( POSTPROC_INSTANCE *pbi, INT32 FLimit );
extern void DeringBlockWeak_C ( POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
extern void DeringBlockStrong_C ( POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
extern void DeblockLoopFilteredBand_C ( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
extern void DeblockNonFilteredBand_C ( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
extern void DeblockNonFilteredBandNewFilter_C ( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
extern void ClampLevels_C( POSTPROC_INSTANCE *pbi,INT32 BlackClamp,INT32 WhiteClamp,UINT8 *Src,UINT8 *Dst);
extern void CFastDeInterlace(UINT8 * SrcPtr,UINT8 * DstPtr,INT32 Width,INT32 Height,INT32 Stride);
extern void PlaneAddNoise_C( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
/****************************************************************************
*
* ROUTINE : PostProcMachineSpecificConfig
*
* INPUTS : UINT32 version : Codec version number (UNUSED)
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Sets post-processing function pointers to vanilla
* C implementations.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void PostProcMachineSpecificConfig ( UINT32 Version )
{
FilterHoriz = FilterHoriz_Generic;
FilterVert = FilterVert_Generic;
SetupBoundingValueArray = SetupBoundingValueArray_Generic;
SetupDeblockValueArray = SetupDeblockValueArray_Generic;
DeringBlockWeak = DeringBlockWeak_C;
DeringBlockStrong = DeringBlockStrong_C;
DeblockLoopFilteredBand = DeblockLoopFilteredBand_C;
DeblockNonFilteredBand = DeblockNonFilteredBand_C;
DeblockNonFilteredBandNewFilter = DeblockNonFilteredBandNewFilter_C;
FilterHoriz_Simple = FilterHoriz_Simple_C;
FilterVert_Simple = FilterVert_Simple_C;
HorizontalLine_1_2_Scale = HorizontalLine_1_2_Scale_C;
VerticalBand_1_2_Scale = VerticalBand_1_2_Scale_C;
LastVerticalBand_1_2_Scale = LastVerticalBand_1_2_Scale_C;
HorizontalLine_3_5_Scale = HorizontalLine_3_5_Scale_C;
VerticalBand_3_5_Scale = VerticalBand_3_5_Scale_C;
LastVerticalBand_3_5_Scale = LastVerticalBand_3_5_Scale_C;
HorizontalLine_4_5_Scale = HorizontalLine_4_5_Scale_C;
VerticalBand_4_5_Scale = VerticalBand_4_5_Scale_C;
LastVerticalBand_4_5_Scale = LastVerticalBand_4_5_Scale_C;
FilteringHoriz_8 = FilteringHoriz_8_C;
FilteringVert_8 = FilteringVert_8_C;
FilteringHoriz_12 = FilteringHoriz_12_C;
FilteringVert_12 = FilteringVert_12_C;
FastDeInterlace = CFastDeInterlace;
ClampLevels = ClampLevels_C;
PlaneAddNoise = PlaneAddNoise_C;
}

View file

@ -0,0 +1,976 @@
/****************************************************************************
*
* Module Title : loopfilter.c
*
* Description : Loop filter functions.
*
****************************************************************************/
#define STRICT /* Strict type checking */
/****************************************************************************
* Header Files
****************************************************************************/
#include "postp.h"
/****************************************************************************
* Macros
****************************************************************************/
#define Mod8(x) ( (x) & 7 )
/****************************************************************************
* Exports
****************************************************************************/
UINT32 LoopFilterLimitValuesV1[Q_TABLE_SIZE] =
{
30, 25, 20, 20, 15, 15, 14, 14,
13, 13, 12, 12, 11, 11, 10, 10,
9, 9, 8, 8, 7, 7, 7, 7,
6, 6, 6, 6, 5, 5, 5, 5,
4, 4, 4, 4, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0
};
UINT32 *LoopFilterLimitValuesV2;
UINT32 LoopFilterLimitValuesVp4[Q_TABLE_SIZE] =
{
30, 25, 20, 20, 15, 15, 14, 14,
13, 13, 12, 12, 11, 11, 10, 10,
9, 9, 8, 8, 7, 7, 7, 7,
6, 6, 6, 6, 5, 5, 5, 5,
4, 4, 4, 4, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1
};
UINT32 LoopFilterLimitValuesVp5[Q_TABLE_SIZE] =
{
14, 14, 13, 13, 12, 12, 10, 10,
10, 10, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 7, 7, 7, 7,
7, 7, 6, 6, 6, 6, 6, 6,
5, 5, 5, 5, 4, 4, 4, 4,
4, 4, 4, 3, 3, 3, 3, 2
};
UINT32 LoopFilterLimitValuesVp6[Q_TABLE_SIZE] =
{
14, 14, 13, 13, 12, 12, 10, 10,
10, 10, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 7, 7, 7, 7,
7, 7, 6, 6, 6, 6, 6, 6,
5, 5, 5, 5, 4, 4, 4, 4,
4, 4, 4, 3, 3, 3, 3, 2
};
/****************************************************************************
*
* ROUTINE : SetupBoundingValueArray_Generic
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
* INT32 FLimit : Value to use as limit.
*
* OUTPUTS : None.
*
* RETURNS : INT32: Pointer to LUT position 0 (cast to UINT32)
*
* FUNCTION : Set up the bounding value array.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
INT32 *SetupBoundingValueArray_Generic ( POSTPROC_INSTANCE *ppi, INT32 FLimit )
{
INT32 i;
INT32 *BoundingValuePtr;
BoundingValuePtr = &ppi->FiltBoundingValue[256];
// Set up the bounding value array
memset ( ppi->FiltBoundingValue, 0, (512*sizeof(*ppi->FiltBoundingValue)) );
for ( i=0; i<FLimit; i++ )
{
BoundingValuePtr[-i-FLimit] = (-FLimit+i);
BoundingValuePtr[-i] = -i;
BoundingValuePtr[i] = i;
BoundingValuePtr[i+FLimit] = FLimit-i;
}
return BoundingValuePtr;
}
/****************************************************************************
*
* ROUTINE : SetupLoopFilter
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Initializes LUTs and function pointer for loop filter.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void SetupLoopFilter ( POSTPROC_INSTANCE *ppi )
{
INT32 FLimit;
FLimit = LoopFilterLimitValuesV2[ppi->FrameQIndex];
if ( ppi->Vp3VersionNo >= 2 )
ppi->BoundingValuePtr = SetupBoundingValueArray_Generic(ppi, FLimit);
else
ppi->BoundingValuePtr = SetupBoundingValueArray ( ppi, FLimit );
}
/****************************************************************************
*
* ROUTINE : FilterHoriz_Generic
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
* UINT8 *PixelPtr : Pointer to Pointer to input data.
* INT32 LineLength : Stride of input data.
* INT32 *BoundingValuePtr : Pointer to array of bounding values.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies the 4-tap loop-filter across vertical edge,
* i.e. filter is applied horizontally.
*
* SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
*
****************************************************************************/
void FilterHoriz_Generic
(
POSTPROC_INSTANCE *ppi,
UINT8 *PixelPtr,
INT32 LineLength,
INT32 *BoundingValuePtr
)
{
INT32 j;
INT32 FiltVal;
UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
(void)ppi;
for ( j=0; j<8; j++ )
{
FiltVal = PixelPtr[0] - (PixelPtr[1]*3) + (PixelPtr[2]*3) - PixelPtr[3];
FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
PixelPtr[1] = LimitTable[(INT32)PixelPtr[1] + FiltVal];
PixelPtr[2] = LimitTable[(INT32)PixelPtr[2] - FiltVal];
PixelPtr += LineLength;
}
}
/****************************************************************************
*
* ROUTINE : FilterVert_Generic
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
* UINT8 *PixelPtr : Pointer to Pointer to input data.
* INT32 LineLength : Stride of input data.
* INT32 *BoundingValuePtr : Pointer to array of bounding values.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies the 4-tap loop-filter across horizontal edge,
* i.e. filter is applied vertically.
*
* SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
*
****************************************************************************/
void FilterVert_Generic
(
POSTPROC_INSTANCE *ppi,
UINT8 *PixelPtr,
INT32 LineLength,
INT32 *BoundingValuePtr
)
{
INT32 j;
INT32 FiltVal;
UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
(void)ppi;
for ( j=0; j<8; j++ )
{
FiltVal = (INT32)PixelPtr[-(2 * LineLength)]
- ((INT32)PixelPtr[- LineLength] * 3)
+ ((INT32)PixelPtr[0] * 3)
- (INT32)PixelPtr[LineLength];
FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
PixelPtr[-LineLength] = LimitTable[(INT32)PixelPtr[-LineLength] + FiltVal];
PixelPtr[0] = LimitTable[(INT32)PixelPtr[0] - FiltVal];
PixelPtr++;
}
}
/****************************************************************************
*
* ROUTINE : Bound
*
* INPUTS : UINT32 FLimit : Limit to use in computing bounding value.
* INT32 FiltVal : Value to have bounds applied to.
*
* OUTPUTS : None.
*
* RETURNS : INT32:
*
* FUNCTION : Computes a bounded Filtval based on specified Flimit.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
#if defined (_WIN32_WCE)
INT32 Bound ( UINT32 FLimit, INT32 FiltVal )
#else
INLINE INT32 Bound ( UINT32 FLimit, INT32 FiltVal )
#endif
{
INT32 Clamp;
INT32 FiltSign;
INT32 NewSign;
Clamp = 2 * FLimit;
// Next 3 lines are fast way to find abs...
FiltSign = (FiltVal >> 31); // Sign extension makes FiltSign all 0's or all 1's
FiltVal ^= FiltSign; // FiltVal is then 1's complement of value if -ve
FiltVal -= FiltSign; // Filtval = abs Filtval
FiltVal *= (FiltVal < Clamp); // clamp filter value to 2 times limit
FiltVal -= FLimit; // subtract limit value
// Next 3 lines are fast way to find abs...
NewSign = (FiltVal >> 31); // Sign extension makes NewSign all 0's or all 1's
FiltVal ^= NewSign; // FiltVal is then 1's complement of value if -ve
FiltVal -= NewSign; // FiltVal = abs FiltVal
FiltVal = FLimit - FiltVal; // flimit - abs (filtVal - flimit)
FiltVal += FiltSign; // convert back to signed value
FiltVal ^= FiltSign;
return FiltVal;
}
/****************************************************************************
*
* ROUTINE : FilteringHoriz_8_C
*
* INPUTS : UINT32 QValue : Current quatizer level.
* UINT8 *Src : Pointer to data to be filtered.
* INT32 Pitch : Pitch of input data.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies horizontal filter across vertical edge inside
* block with Q-dependent limits.
*
* SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
*
****************************************************************************/
void FilteringHoriz_8_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch )
{
INT32 j;
INT32 FiltVal;
UINT32 FLimit;
UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
FLimit = LoopFilterLimitValuesV2[QValue];
for ( j=0; j<8; j++ )
{
// Apply 4-tap filter with rounding...
FiltVal = ( Src[-2] -
(Src[-1] * 3) +
(Src[ 0] * 3) -
Src[ 1] + 4 ) >> 3;
FiltVal = Bound ( FLimit, FiltVal );
Src[-1] = LimitTable[(INT32)Src[-1] + FiltVal];
Src[ 0] = LimitTable[(INT32)Src[ 0] - FiltVal];
Src += Pitch;
}
}
/****************************************************************************
*
* ROUTINE : FilteringVert_8_C
*
* INPUTS : UINT32 QValue : Current quatizer level.
* UINT8 *Src : Pointer to data to be filtered.
* INT32 Pitch : Pitch of input data.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies vertical filter across horizontal edge inside
* block with Q-dependent limits.
*
* SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
*
****************************************************************************/
void FilteringVert_8_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch )
{
INT32 j;
INT32 FiltVal;
UINT32 FLimit;
UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
FLimit = LoopFilterLimitValuesV2[QValue];
for ( j=0; j<8; j++ )
{
// Apply 4-tap filter with rounding...
FiltVal = ( (INT32)Src[-(2 * Pitch)] -
((INT32)Src[-Pitch] * 3) +
((INT32)Src[0] * 3 ) -
(INT32)Src[Pitch] + 4 ) >> 3;
FiltVal = Bound( FLimit, FiltVal);
Src[-Pitch] = LimitTable[(INT32)Src[-Pitch] + FiltVal];
Src[ 0] = LimitTable[(INT32)Src[ 0] - FiltVal];
Src++;
}
}
/****************************************************************************
*
* ROUTINE : FilteringHoriz_12_C
*
* INPUTS : UINT32 QValue : Current quatizer level.
* UINT8 *Src : Pointer to data to be filtered.
* INT32 Pitch : Pitch of input data.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies vertical filter across horizontal edge inside
* block with Q-dependent limits.
*
* SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
*
****************************************************************************/
void FilteringHoriz_12_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch )
{
INT32 j;
INT32 FiltVal;
UINT32 FLimit;
UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
FLimit = LoopFilterLimitValuesV2[QValue];
for ( j=0; j<12; j++ )
{
// Apply 4-tap filter with rounding...
FiltVal = ( Src[-2] -
(Src[-1] * 3) +
(Src[ 0] * 3) -
Src[1] + 4) >> 3;
FiltVal = Bound ( FLimit, FiltVal );
Src[-1] = LimitTable[(INT32)Src[-1] + FiltVal];
Src[ 0] = LimitTable[(INT32)Src[ 0] - FiltVal];
Src += Pitch;
}
}
/****************************************************************************
*
* ROUTINE : FilteringVert_12_C
*
* INPUTS : UINT32 QValue : Current quatizer level.
* UINT8 *Src : Pointer to data to be filtered.
* INT32 Pitch : Pitch of input data.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies vertical filter across horizontal edge inside
* block with Q-dependent limits.
*
* SPECIAL NOTES : 4-Tap filter used is (1, -3, 3, -1).
*
****************************************************************************/
void FilteringVert_12_C ( UINT32 QValue, UINT8 *Src, INT32 Pitch )
{
INT32 j;
INT32 FiltVal;
UINT32 FLimit;
UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
FLimit = LoopFilterLimitValuesV2[QValue];
for ( j=0; j<12; j++ )
{
FiltVal = ( (INT32)Src[- (2 * Pitch)] -
((INT32)Src[- Pitch] * 3) +
((INT32)Src[0] * 3) -
(INT32)Src[Pitch] + 4 ) >> 3;
FiltVal = Bound ( FLimit, FiltVal );
Src[-Pitch] = LimitTable[(INT32)Src[-Pitch] + FiltVal];
Src[ 0] = LimitTable[(INT32)Src[ 0] - FiltVal];
Src++;
}
}
/****************************************************************************
*
* ROUTINE : ApplyReconLoopFilter
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
* INT32 FrameQIndex : Q index for the frame.
* UINT8 *LastFrameRecon : Pointer to last frame reconstruction buffer.
* UINT8 *PostProcessBuffer : Pointer to last post-processing buffer.
* UINT8 *FragInfo : Pointer to list of coded blocks.
* UINT32 FragInfoElementSize : Size of each element.
* UINT32 FragInfoCodedMask : Mask to get at whether fragment is coded.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies a loop filter to the edge pixels of coded blocks.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void ApplyReconLoopFilter
(
POSTPROC_INSTANCE *ppi,
INT32 FrameQIndex,
UINT8 *LastFrameRecon,
UINT8 *PostProcessBuffer,
UINT8 *FragInfo,
UINT32 FragInfoElementSize,
UINT32 FragInfoCodedMask
)
{
int j, m, n;
UINT32 nextRow;
UINT8 *rowStart;
INT32 *BoundingValuePtr;
INT32 i = 0;
INT32 FLimit = 0;
int FromFragment = 0;
INT32 LineLength = 0;
INT32 LineFragments = 0;
int FragsAcross = ppi->HFragments;
int FragsDown = ppi->VFragments;
// variables passed in per frame
ppi->FrameQIndex = FrameQIndex;
ppi->LastFrameRecon = LastFrameRecon;
ppi->PostProcessBuffer = PostProcessBuffer;
ppi->FragInfo = FragInfo;
ppi->FragInfoElementSize = FragInfoElementSize;
ppi->FragInfoCodedMask = FragInfoCodedMask;
FLimit = LoopFilterLimitValuesV1[ppi->FrameQIndex];
if ( FLimit == 0 )
return;
BoundingValuePtr = SetupBoundingValueArray ( ppi, FLimit );
for ( j=0; j<3; j++ )
{
switch ( j )
{
case 0: // Y
FromFragment = 0;
FragsAcross = ppi->HFragments;
FragsDown = ppi->VFragments;
LineLength = ppi->YStride;
LineFragments = ppi->HFragments;
rowStart = ppi->LastFrameRecon + ppi->ReconYDataOffset;
break;
case 1: // U
FromFragment = ppi->YPlaneFragments;
FragsAcross = ppi->HFragments >> 1;
FragsDown = ppi->VFragments >> 1;
LineLength = ppi->UVStride;
LineFragments = ppi->HFragments / 2;
rowStart = ppi->LastFrameRecon + ppi->ReconUDataOffset;
break;
case 2: // V
FromFragment = ppi->YPlaneFragments + ppi->UVPlaneFragments;
FragsAcross = ppi->HFragments >> 1;
FragsDown = ppi->VFragments >> 1;
LineLength = ppi->UVStride;
LineFragments = ppi->HFragments / 2;
rowStart = ppi->LastFrameRecon + ppi->ReconVDataOffset;
break;
}
nextRow = 8*LineLength;
i = FromFragment;
n = 0;
/*************/
/* First Row */
/*************/
/* First column */
// only do 2 prediction if fragment coded and on non intra or if all fragments are intra
if ( blockCoded ( i ) )
{
// Filter right hand border only if the block to the right is not coded
if ( !blockCoded ( i + 1 ) )
FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
// Bottom done if next row set
if ( !blockCoded (i + LineFragments) )
FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
}
i++;
/* Middle columns */
for ( n=1; n<FragsAcross-1; n++, i++ )
{
if ( blockCoded( i ))
{
// Filter Left edge always
FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
// Filter right hand border only if the block to the right is not coded
if ( !blockCoded( i + 1 ) )
FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
// Bottom done if next row set
if( !blockCoded( i + LineFragments) )
FilterVert(ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr);
}
}
// Last Column
if ( blockCoded( i ) )
{
// Filter Left edge always
FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
// Bottom done if next row set
if( !blockCoded (i + LineFragments) )
FilterVert(ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr);
}
i++;
rowStart += nextRow;
n = 0;
/***************/
/* Middle Rows */
/***************/
for ( m=1; m<FragsDown-1; m++ )
{
/* First column */
n=0;
// only do 2 prediction if fragment coded and on non intra or if all fragments are intra
if( blockCoded( i ) )
{
// TopRow is always done
FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
// Filter right hand border only if the block to the right is not coded
if ( !blockCoded ( i + 1 ) )
FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
// Bottom done if next row set
if ( !blockCoded (i + LineFragments) )
FilterVert ( ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr );
}
i++;
/* Middle columns */
for ( n=1; n<FragsAcross-1; n++, i++ )
{
if ( blockCoded ( i ) )
{
// Filter Left edge always
FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
// TopRow is always done
FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
// Filter right hand border only if the block to the right is not coded
if ( !blockCoded ( i + 1 ) )
FilterHoriz ( ppi, rowStart + 8*n + 6 , LineLength, BoundingValuePtr );
// Bottom done if next row set
if ( !blockCoded (i + LineFragments) )
FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
}
}
/* Last Column */
if ( blockCoded ( i ) )
{
// Filter Left edge always
FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
// TopRow is always done
FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
// Bottom done if next row set
if ( !blockCoded (i + LineFragments) )
FilterVert ( ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr );
}
i++;
rowStart += nextRow;
}
}
//***********/
// Last Row */
//***********/
/* First Column */
n = 0;
// only do 2 prediction if fragment coded and on non intra or if all fragments are intra
if ( blockCoded ( i ) )
{
// TopRow is always done
FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
// Filter right hand border only if the block to the right is not coded
if ( !blockCoded ( i + 1 ) )
FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
}
i++;
/* middle columns */
for ( n=1; n<FragsAcross-1; n++, i++ )
{
if ( blockCoded ( i ) )
{
// Filter Left edge always
FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
// TopRow is always done
FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
// Filter right hand border only if the block to the right is not coded
if ( !blockCoded( i + 1 ) )
FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
}
}
/* Last Column */
if ( blockCoded ( i ) )
{
// Filter Left edge always
FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
// TopRow is always done
FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
}
}
/****************************************************************************
*
* ROUTINE : LoopFilter
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
* INT32 FrameQIndex : Q index for the frame.
* UINT8 *LastFrameRecon : Pointer to last frame reconstruction buffer.
* UINT8 *PostProcessBuffer : Pointer to last post-processing buffer.
* UINT8 *FragInfo : Pointer to list of coded blocks.
* UINT32 FragInfoElementSize : Size of each element.
* UINT32 FragInfoCodedMask : Mask to get at whether fragment is coded.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies a loop filter to the edge pixels of coded blocks.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void LoopFilter
(
POSTPROC_INSTANCE *ppi,
INT32 FrameQIndex,
UINT8 *LastFrameRecon,
UINT8 *PostProcessBuffer,
UINT8 *FragInfo,
UINT32 FragInfoElementSize,
UINT32 FragInfoCodedMask
)
{
int j, m, n;
UINT32 nextRow;
UINT8 *rowStart;
INT32 *BoundingValuePtr;
INT32 i = 0;
INT32 FLimit = 0;
int FromFragment = 0;
INT32 LineLength = 0;
INT32 LineFragments = 0;
int FragsDown = ppi->VFragments;
int FragsAcross = ppi->HFragments;
// variables passed in per frame
ppi->FrameQIndex = FrameQIndex;
ppi->LastFrameRecon = LastFrameRecon;
ppi->PostProcessBuffer = PostProcessBuffer;
ppi->FragInfo = FragInfo;
ppi->FragInfoElementSize = FragInfoElementSize;
ppi->FragInfoCodedMask = FragInfoCodedMask;
FLimit = LoopFilterLimitValuesV1[ppi->FrameQIndex];
if ( FLimit == 0 )
return;
BoundingValuePtr = SetupBoundingValueArray ( ppi, FLimit );
for ( j=0; j<3; j++ )
{
switch ( j )
{
case 0: // Y
FromFragment = 0;
FragsAcross = ppi->HFragments;
FragsDown = ppi->VFragments;
LineLength = ppi->YStride;
LineFragments = ppi->HFragments;
rowStart = ppi->LastFrameRecon + ppi->ReconYDataOffset;
break;
case 1: // U
FromFragment = ppi->YPlaneFragments;
FragsAcross = ppi->HFragments >> 1;
FragsDown = ppi->VFragments >> 1;
LineLength = ppi->UVStride;
LineFragments = ppi->HFragments / 2;
rowStart = ppi->LastFrameRecon + ppi->ReconUDataOffset;
break;
case 2: // V
FromFragment = ppi->YPlaneFragments + ppi->UVPlaneFragments;
FragsAcross = ppi->HFragments >> 1;
FragsDown = ppi->VFragments >> 1;
LineLength = ppi->UVStride;
LineFragments = ppi->HFragments / 2;
rowStart = ppi->LastFrameRecon + ppi->ReconVDataOffset;
break;
}
nextRow = 8*LineLength;
i = FromFragment;
n = 0;
//************/
// First Row */
//************/
/* First Column */
// only do 2 prediction if fragment coded and on non intra or if all fragments are intra
if ( blockCoded ( i ) )
{
// Filter right hand border only if the block to the right is not coded
if ( !blockCoded ( i + 1 ) )
FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
// Bottom done if next row set
if( !blockCoded (i + LineFragments) )
FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
}
i++;
/* Middle columns */
for ( n=1; n<FragsAcross-1; n++, i++ )
{
if ( blockCoded ( i ) )
{
// Filter Left edge always
FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
// Filter right hand border only if the block to the right is not coded
if ( !blockCoded ( i + 1 ) )
FilterHoriz(ppi, rowStart + 8*n +6 , LineLength, BoundingValuePtr);
// Bottom done if next row set
if( !blockCoded (i + LineFragments) )
FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
}
}
/* Last Column */
if ( blockCoded ( i ) )
{
// Filter Left edge always
FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
// Bottom done if next row set
if( !blockCoded (i + LineFragments) )
FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
}
i++;
rowStart += nextRow;
n = 0;
//**************/
// Middle Rows */
//**************/
for ( m=1; m<FragsDown-1; m++ )
{
/* First column */
n = 0;
// only do 2 prediction if fragment coded and on non intra or if all fragments are intra
if ( blockCoded ( i ) )
{
// TopRow is always done
FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
// Filter right hand border only if the block to the right is not coded
if ( !blockCoded ( i + 1 ) )
FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
// Bottom done if next row set
if( !blockCoded (i + LineFragments) )
FilterVert(ppi, rowStart + 8*n + nextRow, LineLength, BoundingValuePtr);
}
i++;
/* Middle columns */
for ( n=1; n<FragsAcross-1; n++, i++ )
{
if ( blockCoded ( i ) )
{
// Filter Left edge always
FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
// TopRow is always done
FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
// Filter right hand border only if the block to the right is not coded
if ( !blockCoded ( i + 1 ) )
FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
// Bottom done if next row set
if( !blockCoded (i + LineFragments) )
FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
}
}
/* Last Column */
if ( blockCoded ( i ) )
{
// Filter Left edge always
FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
// TopRow is always done
FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
// Bottom done if next row set
if( !blockCoded (i + LineFragments) )
FilterVert ( ppi, rowStart+8*n+nextRow, LineLength, BoundingValuePtr );
}
i++;
rowStart += nextRow;
}
//***********/
// Last Row */
//***********/
/* First column */
n = 0;
// only do 2 prediction if fragment coded and on non intra or if all fragments are intra
if ( blockCoded ( i ) )
{
// TopRow is always done
FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
// Filter right hand border only if the block to the right is not coded
if ( !blockCoded ( i + 1 ) )
FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
}
i++;
/* Middle columns */
for ( n=1; n<FragsAcross-1; n++, i++ )
{
if ( blockCoded ( i ) )
{
// Filter Left edge always
FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
// TopRow is always done
FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
// Filter right hand border only if the block to the right is not coded
if ( !blockCoded ( i + 1 ) )
FilterHoriz ( ppi, rowStart+8*n+6, LineLength, BoundingValuePtr );
}
}
/* Last Column */
if ( blockCoded ( i ) )
{
// Filter Left edge always
FilterHoriz ( ppi, rowStart+8*n-2, LineLength, BoundingValuePtr );
// TopRow is always done
FilterVert ( ppi, rowStart+8*n, LineLength, BoundingValuePtr );
}
i++;
}
}

View file

@ -0,0 +1,796 @@
/***************************************************************************
*
* Module Title : PostProc.c
*
* Description : Post Processing
*
***************************************************************************/
#define STRICT /* Strict type checking */
/****************************************************************************
* Header Files
****************************************************************************/
#include "postp.h"
#include "duck_mem.h"
#include "stdlib.h"
#include <math.h>
#include <stddef.h>
/****************************************************************************
* Macros
****************************************************************************/
#define Clamp255(x) (unsigned char) ( (x) < 0 ? 0 : ( (x) <= 255 ? (x) : 255 ) )
// TODO: benski> need better checks for other compilers
#if defined(_M_AMD64) || defined(__LP64__)
#define ROUNDUP32(X) ( ( ( (uintptr_t) X ) + 31 )&( 0xFFFFFFFFFFFFFFE0 ) )
#else //#elif //defined(_M_IX86)
#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
#endif
/****************************************************************************
* Imports
****************************************************************************/
extern void SimpleDeblockFrame(POSTPROC_INSTANCE *ppi, UINT8* SrcBuffer, UINT8* DestBuffer);
extern void UpdateUMVBorder( POSTPROC_INSTANCE *ppi, UINT8 * DestReconPtr);
extern void PostProcMachineSpecificConfig(UINT32 );
extern void DeringFrame(POSTPROC_INSTANCE *ppi, UINT8 *Src, UINT8 *Dst);
extern void DeringFrameInterlaced(POSTPROC_INSTANCE *ppi, UINT8 *Src, UINT8 *Dst);
extern void DeblockFrame(POSTPROC_INSTANCE *ppi, UINT8 *SourceBuffer, UINT8 *DestinationBuffer);
extern void DeblockFrameUsing7TapFilter(POSTPROC_INSTANCE *ppi, UINT8 *SourceBuffer, UINT8 *DestinationBuffer);
extern void DeblockFrameInterlaced(POSTPROC_INSTANCE *ppi, UINT8 *SourceBuffer, UINT8 *DestinationBuffer);
extern UINT32 DeringModifierV1[ Q_TABLE_SIZE ];
extern UINT32 DeringModifierV2[ Q_TABLE_SIZE ];
extern UINT32 *DCQuantScaleV2;
extern UINT32 *DCQuantScaleUV;
extern UINT32 *DCQuantScaleV1;
extern UINT32 LoopFilterLimitValuesVp4[Q_TABLE_SIZE];
extern UINT32 LoopFilterLimitValuesVp5[Q_TABLE_SIZE];
extern UINT32 LoopFilterLimitValuesVp6[Q_TABLE_SIZE];
extern UINT32 DeblockLimitValuesVp4[Q_TABLE_SIZE];
extern UINT32 DeblockLimitValuesVp5[Q_TABLE_SIZE];
extern UINT32 DeblockLimitValuesVp6[Q_TABLE_SIZE];
extern UINT32 *LoopFilterLimitValuesV2;
extern UINT32 *DeblockLimitValuesV2;
/****************************************************************************
* Exports
****************************************************************************/
UINT8 LimitVal_VP31[VAL_RANGE * 3];
void (*FilteringVert_12)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
void (*FilteringHoriz_12)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
void (*FilteringVert_8)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
void (*FilteringHoriz_8)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
void (*VerticalBand_4_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
void (*LastVerticalBand_4_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
void (*VerticalBand_3_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
void (*LastVerticalBand_3_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
void (*HorizontalLine_1_2_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
void (*HorizontalLine_3_5_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
void (*HorizontalLine_4_5_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
void (*VerticalBand_1_2_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
void (*LastVerticalBand_1_2_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
void (*FilterHoriz_Simple)(xPB_INST ppi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
void (*FilterVert_Simple)(xPB_INST ppi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
void (*DeringBlockWeak)(xPB_INST, const UINT8 *, UINT8 *, INT32, UINT32, UINT32 *);
void (*DeringBlockStrong)(xPB_INST, const UINT8 *, UINT8 *, INT32, UINT32, UINT32 *);
void (*DeblockLoopFilteredBand)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
void (*DeblockNonFilteredBand)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
void (*DeblockNonFilteredBandNewFilter)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
INT32*(*SetupBoundingValueArray)(xPB_INST ppi, INT32 FLimit);
INT32*(*SetupDeblockValueArray)(xPB_INST ppi, INT32 FLimit);
void (*FilterHoriz)(xPB_INST ppi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
void (*FilterVert)(xPB_INST ppi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
void (*ClampLevels)( POSTPROC_INSTANCE *ppi,INT32 BlackClamp, INT32 WhiteClamp, UINT8 *Src, UINT8 *Dst);
void (*FastDeInterlace)(UINT8 *SrcPtr, UINT8 *DstPtr, INT32 Width, INT32 Height, INT32 Stride);
void (*PlaneAddNoise)( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
/****************************************************************************
*
* ROUTINE : InitPostProcessing
*
* INPUTS : UINT32 *DCQuantScaleV2p :
* UINT32 *DCQuantScaleUVp :
* UINT32 *DCQuantScaleV1p :
* UINT32 Version : Codec version number.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Initialise pointers to version specific data tables &
* set-up LUTs.
*
* SPECIAL NOTES : None
*
****************************************************************************/
void InitPostProcessing
(
UINT32 *DCQuantScaleV2p,
UINT32 *DCQuantScaleUVp,
UINT32 *DCQuantScaleV1p,
UINT32 Version
)
{
int i;
for ( i=0; i<VAL_RANGE*3; i++ )
{
int x = i - VAL_RANGE;
LimitVal_VP31[i] = Clamp255 ( x );
}
DCQuantScaleV2 = DCQuantScaleV2p;
DCQuantScaleUV = DCQuantScaleUVp;
DCQuantScaleV1 = DCQuantScaleV1p;
for ( i=0 ; i<Q_TABLE_SIZE; i++ )
DeringModifierV1[i] = DCQuantScaleV1[i];
if ( Version >= 6 )
{
LoopFilterLimitValuesV2 = LoopFilterLimitValuesVp6;
DeblockLimitValuesV2 = DeblockLimitValuesVp6;
}
else if ( Version >= 5 )
{
LoopFilterLimitValuesV2 = LoopFilterLimitValuesVp5;
DeblockLimitValuesV2 = DeblockLimitValuesVp5;
}
else
{
LoopFilterLimitValuesV2 = LoopFilterLimitValuesVp4;
DeblockLimitValuesV2 = DeblockLimitValuesVp4;
}
PostProcMachineSpecificConfig ( Version );
}
/****************************************************************************
*
* ROUTINE : DeInitPostProcessing
*
* INPUTS : None.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : De-initializes post-processing module.
*
* SPECIAL NOTES : Currently this function does nothing.
*
****************************************************************************/
void DeInitPostProcessing ( void )
{
return;
}
/****************************************************************************
*
* ROUTINE : DeletePostProcBuffers
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : De-allocates buffers used by the post-processing module.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void DeletePostProcBuffers ( POSTPROC_INSTANCE *ppi )
{
if ( ppi->IntermediateBufferAlloc )
duck_free ( ppi->IntermediateBufferAlloc );
ppi->IntermediateBufferAlloc = 0;
ppi->IntermediateBuffer = 0;
if ( ppi->IntermediateBufferAlloc )
duck_free ( ppi->IntermediateBufferAlloc );
ppi->IntermediateBufferAlloc = 0;
ppi->IntermediateBuffer = 0;
if ( ppi->FiltBoundingValueAlloc )
duck_free ( ppi->FiltBoundingValueAlloc );
ppi->FiltBoundingValueAlloc = 0;
ppi->FiltBoundingValue = 0;
if ( ppi->DeblockBoundingValueAlloc )
duck_free ( ppi->DeblockBoundingValueAlloc );
ppi->DeblockBoundingValueAlloc = 0;
ppi->DeblockBoundingValue = 0;
if ( ppi->FragQIndexAlloc )
duck_free ( ppi->FragQIndexAlloc );
ppi->FragQIndexAlloc = 0;
ppi->FragQIndex = 0;
if ( ppi->FragmentVariancesAlloc )
duck_free ( ppi->FragmentVariancesAlloc );
ppi->FragmentVariancesAlloc = 0;
ppi->FragmentVariances = 0;
if ( ppi->FragDeblockingFlagAlloc )
duck_free ( ppi->FragDeblockingFlagAlloc );
ppi->FragDeblockingFlagAlloc = 0;
ppi->FragDeblockingFlag = 0;
}
/****************************************************************************
*
* ROUTINE : AllocatePostProcBuffers
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
*
* OUTPUTS : None.
*
* RETURNS : INT32: TRUE: Success, FALSE Failure (Chenge to BOOL!!)
*
* FUNCTION : Allocates buffers used by the post-processing module.
*
* SPECIAL NOTES : Uses ROUNDUP32 to align allocated buffers to improve
* cache performance.
*
****************************************************************************/
INT32 AllocatePostProcBuffers ( POSTPROC_INSTANCE *ppi )
{
DeletePostProcBuffers ( ppi );
ppi->IntermediateBufferAlloc = (UINT8*)duck_malloc ( 32 + ppi->YStride *
(ppi->Configuration.VideoFrameHeight + ppi->MVBorder*2) * 3/2 * sizeof(UINT8), DMEM_GENERAL);
if ( !ppi->IntermediateBufferAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
ppi->IntermediateBuffer = (UINT8 *)ROUNDUP32 ( ppi->IntermediateBufferAlloc );
ppi->FiltBoundingValueAlloc = (INT32 *)duck_malloc(32+512*sizeof(INT32), DMEM_GENERAL);
if ( !ppi->FiltBoundingValueAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
ppi->FiltBoundingValue = (INT32 *)ROUNDUP32 ( ppi->FiltBoundingValueAlloc );
ppi->DeblockBoundingValueAlloc = (INT32 *)duck_malloc(32+512*sizeof(INT32), DMEM_GENERAL);
if ( !ppi->DeblockBoundingValueAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
ppi->DeblockBoundingValue = (INT32 *)ROUNDUP32 ( ppi->DeblockBoundingValueAlloc );
ppi->FragQIndexAlloc = (INT32 *)duck_malloc(32+ppi->UnitFragments*sizeof(INT32), DMEM_GENERAL);
if ( !ppi->FragQIndexAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
ppi->FragQIndex = (INT32 *)ROUNDUP32 ( ppi->FragQIndexAlloc );
ppi->FragmentVariancesAlloc = (INT32 *)duck_malloc(32+ppi->UnitFragments*sizeof(INT32), DMEM_GENERAL);
if ( !ppi->FragmentVariancesAlloc ) { DeletePostProcBuffers ( ppi ); return FALSE; };
ppi->FragmentVariances = (INT32 *)ROUNDUP32 ( ppi->FragmentVariancesAlloc );
ppi->FragDeblockingFlagAlloc = (UINT8 *)duck_malloc(32+ppi->UnitFragments*sizeof(UINT8), DMEM_GENERAL);
if ( !ppi->FragDeblockingFlagAlloc ){ DeletePostProcBuffers ( ppi ); return FALSE; };
ppi->FragDeblockingFlag = (UINT8 *)ROUNDUP32 ( ppi->FragDeblockingFlagAlloc );
return TRUE;
}
/****************************************************************************
*
* ROUTINE : ChangePostProcConfiguration
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
* CONFIG_TYPE *ConfigurationInit : Pointer to
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Initialize post-processor to with the setting passed in.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void ChangePostProcConfiguration ( POSTPROC_INSTANCE *ppi, CONFIG_TYPE *ConfigurationInit )
{
memcpy ((void *)&ppi->Configuration, (void *)ConfigurationInit, sizeof(CONFIG_TYPE) );
ppi->HFragments = (ppi->Configuration.VideoFrameWidth >> 3);
ppi->VFragments = (ppi->Configuration.VideoFrameHeight>> 3);
ppi->YStride = ppi->Configuration.YStride;
ppi->UVStride = ppi->Configuration.UVStride;
ppi->YPlaneFragments = ppi->HFragments * ppi->VFragments;
ppi->UVPlaneFragments = ppi->YPlaneFragments / 4;
ppi->UnitFragments = ppi->YPlaneFragments + 2 * ppi->UVPlaneFragments;
ppi->MVBorder = (ppi->YStride - 8*ppi->HFragments)/2;
ppi->ReconYDataOffset = ppi->MVBorder * ppi->YStride + ppi->MVBorder;
ppi->ReconYDataOffset = ppi->MVBorder * ppi->YStride + ppi->MVBorder;
ppi->ReconUDataOffset =
(ppi->YStride * (ppi->Configuration.VideoFrameHeight + ppi->MVBorder*2))
+ ppi->MVBorder / 2 * ppi->UVStride + ppi->MVBorder/2;
ppi->ReconVDataOffset =
(ppi->YStride * (ppi->Configuration.VideoFrameHeight + ppi->MVBorder*2))
+ (ppi->UVStride * (ppi->Configuration.VideoFrameHeight/2 + ppi->MVBorder))
+ ppi->MVBorder/2 * ppi->UVStride +ppi->MVBorder/2;
AllocatePostProcBuffers ( ppi );
}
/****************************************************************************
*
* ROUTINE : CreatePostProcInstance
*
* INPUTS : CONFIG_TYPE *ConfigurationInit : Pointer to configuration.
*
* OUTPUTS : None.
*
* RETURNS : POSTPROC_INSTANCE *: Pointer to allocated & configured
* post-processor instance.
*
* FUNCTION : Allocates space for and initializes a post-processor
* instance.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
POSTPROC_INSTANCE *CreatePostProcInstance ( CONFIG_TYPE *ConfigurationInit )
{
POSTPROC_INSTANCE *ppi;
int postproc_size = sizeof ( POSTPROC_INSTANCE );
ppi = (POSTPROC_INSTANCE *) duck_malloc ( postproc_size, DMEM_GENERAL );
if ( !ppi )
return 0;
// initialize whole structure to 0
memset ( (unsigned char *)ppi, 0, postproc_size );
ChangePostProcConfiguration ( ppi, ConfigurationInit );
ppi->AddNoiseMode = 1;
return ppi;
}
/****************************************************************************
*
* ROUTINE : DeletePostProcInstance
*
* INPUTS : POSTPROC_INSTANCE **ppi : Pointer-to-pointer to post-processor instance.
*
* OUTPUTS : POSTPROC_INSTANCE **ppi : Pointer-to-pointer to post-processor instance.
*
* RETURNS : void.
*
* FUNCTION : Deletes post-processor instance & de-allocates memory.
*
* SPECIAL NOTES : Pointer to post-processor instance is set to NULL
* on exit.
*
****************************************************************************/
void DeletePostProcInstance ( POSTPROC_INSTANCE **ppi )
{
if ( *ppi )
{
// Delete any other dynamically allocaed temporary buffers
DeletePostProcBuffers ( *ppi );
duck_free ( *ppi );
*ppi = 0;
}
}
/****************************************************************************
*
* ROUTINE : SetPPInterlacedMode
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
* int Interlaced : 0=Non-interlaced, 1=Interlaced.
*
* OUTPUTS : None.
*
* RETURNS : void.
*
* FUNCTION : Set post-processor's Interlaced Mode to specified value.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void SetPPInterlacedMode ( POSTPROC_INSTANCE *ppi, int Interlaced )
{
ppi->Configuration.Interlaced = Interlaced;
}
/****************************************************************************
*
* ROUTINE : SetDeInterlaceMode
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
* int DeInterlaceMode : Mode to use for de-interlacing.
*
* OUTPUTS : None.
*
* RETURNS : void.
*
* FUNCTION : Set post-processor's De-Interlace Mode to specified value.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void SetDeInterlaceMode ( POSTPROC_INSTANCE *ppi, int DeInterlaceMode )
{
ppi->DeInterlaceMode = DeInterlaceMode;
}
/****************************************************************************
*
* ROUTINE : SetDeInterlaceMode
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
* int DeInterlaceMode : Mode to use for de-interlacing.
*
* OUTPUTS : None.
*
* RETURNS : void.
*
* FUNCTION : Set post-processor's De-Interlace Mode to specified value.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void SetAddNoiseMode(POSTPROC_INSTANCE *ppi, int AddNoiseMode)
{
ppi->AddNoiseMode = AddNoiseMode;
}
/****************************************************************************
*
* ROUTINE : UpdateFragQIndex
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
*
* OUTPUTS : None.
*
* RETURNS : void.
*
* FUNCTION : Update the QIndex for each updated block.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void UpdateFragQIndex ( POSTPROC_INSTANCE *ppi )
{
UINT32 i;
UINT32 ThisFrameQIndex;
// Mark coded blocks with Q-index
ThisFrameQIndex = ppi->FrameQIndex;
for ( i=0; i<ppi->UnitFragments; i++ )
if ( blockCoded ( i ) )
ppi->FragQIndex[i] = ThisFrameQIndex;
}
/****************************************************************************
*
* ROUTINE : Gaussian
*
* INPUTS : sigma ( standard deviation), mu ( mean) and x (value)
*
* OUTPUTS : None.
*
* RETURNS : void.
*
* FUNCTION : generate height of gaussian distribution curve with
* deviation sigma and mean mu at position x
*
* SPECIAL NOTES : None.
*
****************************************************************************/
double gaussian(double sigma, double mu, double x)
{
return 1 / ( sigma * sqrt(2.0*3.14159265)) *
(exp(-(x-mu)*(x-mu)/(2*sigma*sigma)));
}
/****************************************************************************
*
* ROUTINE : PlaneAddNoise_C
*
* INPUTS : UINT8 *Start starting address of buffer to add gaussian
* noise to
* UINT32 Width width of plane
* UINT32 Height height of plane
* INT32 Pitch distance between subsequent lines of frame
* INT32 q quantizer used to determine amount of noise
* to add
*
* OUTPUTS : None.
*
* RETURNS : void.
*
* FUNCTION : adds gaussian noise to a plane of pixels
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void PlaneAddNoise_C( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q)
{
unsigned int i,j;
INT32 Pitch4 = Pitch * 4;
const int noiseAmount = 2;
const int noiseAdder = 2 * noiseAmount + 1;
unsigned char blackclamp[16];
unsigned char whiteclamp[16];
unsigned char bothclamp[16];
char CharDist[300];
char Rand[2048];
double sigma;
sigma = 1 + .8*(63-q) / 63.0;
// set up a lookup table of 256 entries that matches
// a gaussian distribution with sigma determined by q.
//
{
double i,sum=0;
int next,j;
next=0;
for(i=-32;i<32;i++)
{
int a = (int) (.5+256*gaussian(sigma,0,i));
if(a)
{
for(j=0;j<a;j++)
{
CharDist[next+j]=(char) i;
}
next = next+j;
}
}
for(next=next;next<256;next++)
CharDist[next] = 0;
}
// generate a line of 2048 characters following our gaussian distribution
for(i=0;i<2048;i++)
{
Rand[i]=CharDist[rand() & 0xff];
}
for(i=0;i<16;i++)
{
blackclamp[i]=-CharDist[0];
whiteclamp[i]=-CharDist[0];
bothclamp[i]=-2*CharDist[0];
}
for(i=0;i<Height;i++)
{
UINT8* Pos = Start + i *Pitch;
INT8* Ref = (INT8 *) (Rand + (rand() & 0xff)); /* cast required on strict OSX-CW8 */
for(j=0;j<Width;j++)
{
if(Pos[j] < -CharDist[0])
Pos[j] = -CharDist[0];
if(Pos[j] > 255-CharDist[0])
Pos[j] = 255-CharDist[0];
Pos[j]+=Ref[j];
}
}
}
/****************************************************************************
*
* ROUTINE : PostProcess
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processor instance.
* INT32 Vp3VersionNo : Encoder version used to code frame.
* INT32 FrameType : Encoding method: Keyframe or non-Keyframe.
* INT32 PostProcessingLevel : Level of post-processing to perform.
* INT32 FrameQIndex : Q-index used to code frame.
* UINT8 *LastFrameRecon : Pointer to last frame reconstruction buffer.
* UINT8 *PostProcessBuffer : Pointer to last post-processing buffer.
* UINT8 *FragInfo : Pointer to list of coded blocks.
* UINT32 FragInfoElementSize : Size of each element.
* UINT32 FragInfoCodedMask : Mask to get at whether fragment is coded.
*
* OUTPUTS : None.
*
* RETURNS : void.
*
* FUNCTION : Applies de-blocking and de-ringing filters to the frame.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void PostProcess
(
POSTPROC_INSTANCE *ppi,
INT32 Vp3VersionNo,
INT32 FrameType,
INT32 PostProcessingLevel,
INT32 FrameQIndex,
UINT8 *LastFrameRecon,
UINT8 *PostProcessBuffer,
UINT8 *FragInfo,
UINT32 FragInfoElementSize,
UINT32 FragInfoCodedMask
)
{
int ReconUVPlaneSize;
// variables passed in per frame
ppi->Vp3VersionNo = Vp3VersionNo;
ppi->FrameType = FrameType;
ppi->PostProcessingLevel = PostProcessingLevel;
ppi->FrameQIndex = FrameQIndex;
ppi->LastFrameRecon = LastFrameRecon;
ppi->PostProcessBuffer = PostProcessBuffer;
ppi->FragInfo = FragInfo;
ppi->FragInfoElementSize = FragInfoElementSize;
ppi->FragInfoCodedMask = FragInfoCodedMask;
switch ( ppi->PostProcessingLevel )
{
case 8:
// On a slow machine, use a simpler and faster deblocking filter
UpdateFragQIndex ( ppi );
if(ppi->Vp3VersionNo < 2)
{
DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
}
else
{
if ( ppi->Configuration.Interlaced && ppi->DeInterlaceMode )
{
SimpleDeblockFrame ( ppi, ppi->LastFrameRecon, ppi->IntermediateBuffer );
ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->IntermediateBuffer+ppi->ReconUDataOffset, ReconUVPlaneSize );
memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->IntermediateBuffer+ppi->ReconVDataOffset, ReconUVPlaneSize );
FastDeInterlace ( ppi->IntermediateBuffer+ppi->ReconYDataOffset,
ppi->PostProcessBuffer+ppi->ReconYDataOffset,
ppi->HFragments*8, ppi->VFragments*8, ppi->YStride );
}
else
SimpleDeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
}
break;
case 6:
case 5:
if ( ppi->Vp3VersionNo < 5 )
{
UpdateFragQIndex ( ppi );
}
else
{
if ( ppi->Configuration.Interlaced )
{
if ( !ppi->DeInterlaceMode )
{
DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
UpdateUMVBorder ( ppi, ppi->PostProcessBuffer );
DeringFrameInterlaced ( ppi, ppi->PostProcessBuffer, ppi->PostProcessBuffer );
}
else
{
DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->IntermediateBuffer );
UpdateUMVBorder ( ppi, ppi->IntermediateBuffer );
DeringFrameInterlaced ( ppi, ppi->IntermediateBuffer, ppi->IntermediateBuffer );
ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->IntermediateBuffer+ppi->ReconUDataOffset, ReconUVPlaneSize );
memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->IntermediateBuffer+ppi->ReconVDataOffset, ReconUVPlaneSize );
FastDeInterlace ( ppi->IntermediateBuffer+ppi->ReconYDataOffset,
ppi->PostProcessBuffer+ppi->ReconYDataOffset,
ppi->HFragments*8, ppi->VFragments*8, ppi->YStride);
}
break;
}
}
DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
UpdateUMVBorder ( ppi, ppi->PostProcessBuffer );
DeringFrame ( ppi, ppi->PostProcessBuffer, ppi->PostProcessBuffer );
if(ppi->AddNoiseMode&&PlaneAddNoise!=0)
PlaneAddNoise(ppi->PostProcessBuffer + ppi->ReconYDataOffset,ppi->HFragments*8, ppi->VFragments*8,ppi->YStride,FrameQIndex);
break;
case 7:
if ( ppi->Vp3VersionNo >= 5 )
{
if ( ppi->Configuration.Interlaced )
{
if ( !ppi->DeInterlaceMode )
DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
else
{
DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->IntermediateBuffer );
ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->IntermediateBuffer+ppi->ReconUDataOffset, ReconUVPlaneSize );
memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->IntermediateBuffer+ppi->ReconVDataOffset, ReconUVPlaneSize );
FastDeInterlace ( ppi->IntermediateBuffer+ppi->ReconYDataOffset,
ppi->PostProcessBuffer+ppi->ReconYDataOffset,
ppi->HFragments*8, ppi->VFragments*8, ppi->YStride );
}
break;
}
}
else
{
UpdateFragQIndex ( ppi );
}
DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
UpdateUMVBorder ( ppi, ppi->PostProcessBuffer );
DeringFrame ( ppi, ppi->PostProcessBuffer, ppi->PostProcessBuffer );
break;
case 4:
if ( ppi->Vp3VersionNo >= 5 )
{
if ( ppi->Configuration.Interlaced )
{
if ( !ppi->DeInterlaceMode )
DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
else
{
DeblockFrameInterlaced ( ppi, ppi->LastFrameRecon, ppi->IntermediateBuffer );
ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->IntermediateBuffer+ppi->ReconUDataOffset, ReconUVPlaneSize );
memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->IntermediateBuffer+ppi->ReconVDataOffset, ReconUVPlaneSize );
FastDeInterlace ( ppi->IntermediateBuffer+ppi->ReconYDataOffset,
ppi->PostProcessBuffer+ppi->ReconYDataOffset,
ppi->HFragments*8, ppi->VFragments*8, ppi->YStride );
}
break;
}
}
else
{
UpdateFragQIndex ( ppi );
}
DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
//PlaneAddNoise(ppi->PostProcessBuffer + ppi->ReconYDataOffset,ppi->HFragments*8, ppi->VFragments*8,ppi->YStride,FrameQIndex);
break;
case 1:
UpdateFragQIndex ( ppi );
break;
case 0:
if ( ppi->Configuration.Interlaced && ppi->DeInterlaceMode )
{
ReconUVPlaneSize = ppi->VFragments*2*ppi->YStride;
memcpy ( ppi->PostProcessBuffer+ppi->ReconUDataOffset, ppi->LastFrameRecon+ppi->ReconUDataOffset, ReconUVPlaneSize );
memcpy ( ppi->PostProcessBuffer+ppi->ReconVDataOffset, ppi->LastFrameRecon+ppi->ReconVDataOffset, ReconUVPlaneSize );
FastDeInterlace ( ppi->LastFrameRecon+ppi->ReconYDataOffset,
ppi->PostProcessBuffer+ppi->ReconYDataOffset,
ppi->HFragments*8, ppi->VFragments*8, ppi->YStride );
}
break;
default:
DeblockFrame ( ppi, ppi->LastFrameRecon, ppi->PostProcessBuffer );
UpdateUMVBorder ( ppi, ppi->PostProcessBuffer );
DeringFrame ( ppi, ppi->PostProcessBuffer, ppi->PostProcessBuffer );
break;
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,392 @@
/****************************************************************************
*
* Module Title : simpledeblock.c
*
* Description : Simple deblocking filter.
*
***************************************************************************/
#define STRICT /* Strict type checking */
/****************************************************************************
* Header Files
****************************************************************************/
#include "postp.h"
/****************************************************************************
* Macros
****************************************************************************/
#if ( defined(_MSC_VER) || defined(MAPCA) )
#define abs(x) ( (x>0) ? (x) : (-(x)) )
#endif
/****************************************************************************
* Imports
****************************************************************************/
extern UINT32 *DeblockLimitValuesV2;
/****************************************************************************
* Module Statics
****************************************************************************/
static const UINT32 DeblockLimitValuesV1[Q_TABLE_SIZE] =
{
30, 25, 20, 20, 15, 15, 14, 14,
13, 13, 12, 12, 11, 11, 10, 10,
9, 9, 8, 8, 7, 7, 7, 7,
6, 6, 6, 6, 5, 5, 5, 5,
4, 4, 4, 4, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0
};
/****************************************************************************
*
* ROUTINE : FilterHoriz_Simple2_C
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processing instance (NOT USED).
* UINT8 *PixelPtr : Pointer to four pixels that straddle the edge.
* INT32 LineLength : Stride of the image being filtered.
* INT32 *BoundingValuePtr : Pointer to array of bounding values.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies a loop filter to the vertical edge by applying
* the filter horizontally to each of the 8-rows of the
* block edge.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void FilterHoriz_Simple2_C
(
POSTPROC_INSTANCE *ppi,
UINT8 *PixelPtr,
INT32 LineLength,
INT32 *BoundingValuePtr
)
{
INT32 j;
INT32 x,y,z;
INT32 FiltVal;
UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
(void) ppi;
for ( j=0; j<8; j++ )
{
y = PixelPtr[2]-PixelPtr[1];
if ( !y ) continue;
x = PixelPtr[1]-PixelPtr[0];
z = PixelPtr[3]-PixelPtr[2];
FiltVal = 2 * y + z - x;
FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
PixelPtr[1] = LimitTable[(INT32)PixelPtr[1] + FiltVal];
PixelPtr[2] = LimitTable[(INT32)PixelPtr[2] - FiltVal];
FiltVal >>= 1;
FiltVal *= ((x|z)==0);
PixelPtr[0] = LimitTable[(INT32)PixelPtr[0] + FiltVal];
PixelPtr[3] = LimitTable[(INT32)PixelPtr[3] - FiltVal];
PixelPtr += LineLength;
}
}
/****************************************************************************
*
* ROUTINE : FilterVert_Simple2_C
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processing instance (NOT USED).
* UINT8 *PixelPtr : Pointer to four pixels that straddle the edge.
* INT32 LineLength : Stride of the image being filtered.
* INT32 *BoundingValuePtr : Pointer to array of bounding values.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies a loop filter to the horizontal edge by applying
* the filter vertically to each of the 8-columns of the
* block edge.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void FilterVert_Simple2_C
(
POSTPROC_INSTANCE *ppi,
UINT8 *PixelPtr,
INT32 LineLength,
INT32 *BoundingValuePtr
)
{
INT32 j;
INT32 FiltVal;
UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
(void) ppi;
for ( j=0; j<8; j++ )
{
INT32 UseHighVariance;
FiltVal = ( ((INT32)PixelPtr[0]*3) - ((INT32)PixelPtr[-LineLength]*3) );
UseHighVariance = abs ( PixelPtr[-(2*LineLength)] - PixelPtr[-LineLength] ) > 1 ||
abs ( PixelPtr[0] - PixelPtr[LineLength]) > 1;
if ( UseHighVariance )
FiltVal += ((INT32)PixelPtr[-(2*LineLength)]) - ((INT32)PixelPtr[LineLength]);
FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
PixelPtr[-LineLength] = LimitTable[(INT32)PixelPtr[-LineLength] + FiltVal];
PixelPtr[ 0] = LimitTable[(INT32)PixelPtr[ 0] - FiltVal];
if ( !UseHighVariance )
{
FiltVal >>=1;
PixelPtr[-2*LineLength] = LimitTable[(INT32)PixelPtr[-2*LineLength] + FiltVal];
PixelPtr[ LineLength] = LimitTable[(INT32)PixelPtr[ LineLength] - FiltVal];
}
PixelPtr++;
}
}
/****************************************************************************
*
* ROUTINE : FilterHoriz_Simple_C
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processing instance (NOT USED).
* UINT8 *PixelPtr : Pointer to four pixels that straddle the edge.
* INT32 LineLength : Stride of the image being filtered.
* INT32 *BoundingValuePtr : Pointer to array of bounding values.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies a loop filter to the vertical edge by applying
* the filter horizontally to each of the 8-rows of the
* block edge.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void FilterHoriz_Simple_C
(
POSTPROC_INSTANCE *ppi,
UINT8 *PixelPtr,
INT32 LineLength,
INT32 *BoundingValuePtr
)
{
INT32 j;
INT32 FiltVal;
UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
(void) ppi;
for ( j=0; j<8; j++ )
{
INT32 UseHighVariance;
FiltVal = (PixelPtr[2]*3) - (PixelPtr[1]*3);
UseHighVariance = abs(PixelPtr[0] - PixelPtr[1]) > 1 ||
abs(PixelPtr[2] - PixelPtr[3]) > 1;
if ( UseHighVariance )
FiltVal += PixelPtr[0] - PixelPtr[3];
FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
PixelPtr[1] = LimitTable[(INT32)PixelPtr[1] + FiltVal];
PixelPtr[2] = LimitTable[(INT32)PixelPtr[2] - FiltVal];
if ( !UseHighVariance )
{
FiltVal >>= 1;
PixelPtr[0] = LimitTable[(INT32)PixelPtr[0] + FiltVal];
PixelPtr[3] = LimitTable[(INT32)PixelPtr[3] - FiltVal];
}
PixelPtr += LineLength;
}
}
/****************************************************************************
*
* ROUTINE : FilterVert_Simple_C
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processing instance (NOT USED).
* UINT8 *PixelPtr : Pointer to four pixels that straddle the edge.
* INT32 LineLength : Stride of the image being filtered.
* INT32 *BoundingValuePtr : Pointer to array of bounding values.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies a loop filter to the horizontal edge by applying
* the filter vertically to each of the 8-columns of the
* block edge.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void FilterVert_Simple_C
(
POSTPROC_INSTANCE *ppi,
UINT8 *PixelPtr,
INT32 LineLength,
INT32 *BoundingValuePtr
)
{
INT32 j;
INT32 FiltVal;
UINT8 *LimitTable = &LimitVal_VP31[VAL_RANGE];
(void) ppi;
for ( j=0; j<8; j++ )
{
INT32 UseHighVariance;
FiltVal = ( ((INT32)PixelPtr[0]*3) - ((INT32)PixelPtr[-LineLength]*3) );
UseHighVariance = abs(PixelPtr[-(2*LineLength)] - PixelPtr[-LineLength]) > 1 ||
abs(PixelPtr[0] - PixelPtr[LineLength]) > 1;
if ( UseHighVariance )
FiltVal += ((INT32)PixelPtr[-(2*LineLength)]) - ((INT32)PixelPtr[LineLength]);
FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
PixelPtr[-LineLength] = LimitTable[(INT32)PixelPtr[-LineLength] + FiltVal];
PixelPtr[ 0] = LimitTable[(INT32)PixelPtr[ 0] - FiltVal];
if ( !UseHighVariance )
{
FiltVal >>=1;
PixelPtr[-2*LineLength] = LimitTable[(INT32)PixelPtr[-2*LineLength] + FiltVal];
PixelPtr[ LineLength] = LimitTable[(INT32)PixelPtr[ LineLength] - FiltVal];
}
PixelPtr++;
}
}
/****************************************************************************
*
* ROUTINE : SimpleDeblockFrame
*
* INPUTS : POSTPROC_INSTANCE *ppi : Pointer to post-processing instance.
* UINT8 *SrcBuffer : Pointer to image to be deblocked.
* UINT8 *DestBuffer : Pointer to image to hold deblocked image.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Simple deblocker.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void SimpleDeblockFrame ( POSTPROC_INSTANCE *ppi, UINT8 *SrcBuffer, UINT8 *DestBuffer )
{
INT32 j, m, n;
INT32 RowStart;
INT32 NextRow;
INT32 FLimit;
INT32 QIndex;
INT32 *BoundingValuePtr;
INT32 LineLength = 0;
INT32 FragsAcross = ppi->HFragments;
INT32 FragsDown = ppi->VFragments;
QIndex = ppi->FrameQIndex;
// Encoder version specific clause
if ( ppi->Vp3VersionNo >= 2 )
FLimit = DeblockLimitValuesV2[QIndex];
else
FLimit = DeblockLimitValuesV1[QIndex];
BoundingValuePtr = SetupDeblockValueArray ( ppi, FLimit );
for ( j=0; j<3; j++ )
{
switch ( j )
{
case 0: // Y
FragsAcross = ppi->HFragments;
FragsDown = ppi->VFragments;
LineLength = ppi->YStride;
RowStart = ppi->ReconYDataOffset;
break;
case 1: // U
FragsAcross = ppi->HFragments >> 1;
FragsDown = ppi->VFragments >> 1;
LineLength = ppi->UVStride;
RowStart = ppi->ReconUDataOffset;
break;
case 2: // V
FragsAcross = ppi->HFragments >> 1;
FragsDown = ppi->VFragments >> 1;
LineLength = ppi->UVStride;
RowStart = ppi->ReconVDataOffset;
break;
}
NextRow = LineLength * 8;
/*************/
/* First Row */
/*************/
memcpy ( &DestBuffer[RowStart], &SrcBuffer[RowStart], 8*LineLength );
/* First Column -- Skip */
/* Remaining Columns */
for ( n=1; n<FragsAcross; n++ ) // Filter Left edge always
FilterHoriz_Simple ( ppi, &DestBuffer[RowStart+n*8-2], LineLength, BoundingValuePtr );
RowStart += NextRow;
//**************/
// Middle Rows */
//**************/
for ( m=1; m<FragsDown; m++ )
{
n = 0;
memcpy ( &DestBuffer[RowStart], &SrcBuffer[RowStart], 8*LineLength );
/* First column */
FilterVert_Simple ( ppi, &DestBuffer[RowStart+n*8], LineLength, BoundingValuePtr );
/* Middle columns */
for ( n=1; n<FragsAcross; n++ )
{
// Filter Left edge always
FilterHoriz_Simple ( ppi, &DestBuffer[RowStart+n*8-2], LineLength, BoundingValuePtr );
// TopRow is always done
FilterVert_Simple ( ppi, &DestBuffer[RowStart+n*8], LineLength, BoundingValuePtr );
}
RowStart += NextRow;
}
}
}

View file

@ -0,0 +1,136 @@
/****************************************************************************
*
* Module Title : postp.h
*
* Description : Post processor interface
*
****************************************************************************/
#ifndef POSTP_H
#define POSTP_H
#include "codec_common.h"
// YUV buffer configuration structure
typedef struct
{
int YWidth;
int YHeight;
int YStride;
int UVWidth;
int UVHeight;
int UVStride;
char * YBuffer;
char * UBuffer;
char * VBuffer;
} YUV_BUFFER_CONFIG;
typedef enum
{
MAINTAIN_ASPECT_RATIO = 0x0,
SCALE_TO_FIT = 0x1,
CENTER = 0x2,
OTHER = 0x3
} SCALE_MODE;
// macro defined so that I can get the information from fraginfo ( I suspect this is going to change !)
// and I wanted to be ready for the change!
#define blockCoded(i) (ppi->FragInfo[(i)*ppi->FragInfoElementSize]&ppi->FragInfoCodedMask)
typedef struct
{
// per frame information passed in
INT32 Vp3VersionNo; // version of frame
INT32 FrameType; // key or non key
INT32 PostProcessingLevel; // level of post processing to perform
INT32 FrameQIndex; // q index value used on passed in frame
UINT8 *LastFrameRecon; // reconstruction buffer : passed in
UINT8 *PostProcessBuffer; // postprocessing buffer : passed in
// per block information passed in
UINT8 *FragInfo; // blocks coded : passed in
UINT32 FragInfoElementSize; // size of each element
UINT32 FragInfoCodedMask; // mask to get at whether fragment is coded
// per block info maintained by postprocessor
INT32 *FragQIndex; // block's q index : allocated and filled
INT32 *FragmentVariances; // block's pseudo variance : allocated and filled
UINT8 *FragDeblockingFlag; // whether to deblock block : allocated and filled
// filter specific vars
INT32 *BoundingValuePtr; // pointer to a filter
INT32 *FiltBoundingValue; // allocated (512 big)
// deblocker specific vars
INT32 *DeblockValuePtr; // pointer to a filter
INT32 *DeblockBoundingValue; // allocated (512 big)
// frame configuration
CONFIG_TYPE Configuration;
UINT32 ReconYDataOffset; // position within buffer of first y fragment
UINT32 ReconUDataOffset; // position within buffer of first u fragment
UINT32 ReconVDataOffset; // position within buffer of first v fragment
UINT32 YPlaneFragments; // number of y fragments
UINT32 UVPlaneFragments; // number of u and v fragments
UINT32 UnitFragments; // number of total fragments y+u+v
UINT32 HFragments; // number of horizontal fragments in y
UINT32 VFragments; // number of vertical fragments in y
INT32 YStride; // pitch of y in bytes
INT32 UVStride; // pitch of uv in bytes
// allocs so we can align our ptrs
INT32 *FiltBoundingValueAlloc;
INT32 *DeblockBoundingValueAlloc;
INT32 *FragQIndexAlloc;
INT32 *FragmentVariancesAlloc;
UINT8 *FragDeblockingFlagAlloc;
UINT32 MVBorder;
UINT8 *IntermediateBufferAlloc;
UINT8 *IntermediateBuffer;
UINT32 DeInterlaceMode;
UINT32 AddNoiseMode;
} POSTPROC_INSTANCE;
#define VAL_RANGE 256
extern UINT8 LimitVal_VP31[VAL_RANGE * 3];
typedef POSTPROC_INSTANCE * xPB_INST ;
extern void (*FilteringVert_12)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
extern void (*FilteringHoriz_12)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
extern void (*FilteringVert_8)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
extern void (*FilteringHoriz_8)(UINT32 QValue,UINT8 * Src, INT32 Pitch);
extern void (*CopyBlock) (unsigned char *src, unsigned char *dest, unsigned int srcstride);
extern void (*VerticalBand_4_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
extern void (*LastVerticalBand_4_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
extern void (*VerticalBand_3_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
extern void (*LastVerticalBand_3_5_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
extern void (*HorizontalLine_1_2_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
extern void (*HorizontalLine_3_5_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
extern void (*HorizontalLine_4_5_Scale)(const unsigned char * source,unsigned int sourceWidth,unsigned char * dest,unsigned int destWidth);
extern void (*VerticalBand_1_2_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
extern void (*LastVerticalBand_1_2_Scale)(unsigned char * dest,unsigned int destPitch,unsigned int destWidth);
extern void (*FilterHoriz_Simple)(xPB_INST pbi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
extern void (*FilterVert_Simple)(xPB_INST pbi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
extern void (*DeringBlockWeak)(xPB_INST, const UINT8 *, UINT8 *, INT32, UINT32, UINT32 *);
extern void (*DeringBlockStrong)(xPB_INST, const UINT8 *, UINT8 *, INT32, UINT32, UINT32 *);
extern void (*DeblockLoopFilteredBand)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
extern void (*DeblockNonFilteredBand)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
extern void (*DeblockNonFilteredBandNewFilter)(xPB_INST, UINT8 *, UINT8 *, UINT32, UINT32, UINT32, UINT32 *);
extern INT32*(*SetupBoundingValueArray)(xPB_INST pbi, INT32 FLimit);
extern INT32*(*SetupDeblockValueArray)(xPB_INST pbi, INT32 FLimit);
extern void (*FilterHoriz)(xPB_INST pbi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
extern void (*FilterVert)(xPB_INST pbi, UINT8 * PixelPtr, INT32 LineLength, INT32*);
extern void (*ClampLevels)( POSTPROC_INSTANCE *pbi,INT32 BlackClamp, INT32 WhiteClamp, UINT8 *Src, UINT8 *Dst);
extern void (*FastDeInterlace)(UINT8 *SrcPtr, UINT8 *DstPtr, INT32 Width, INT32 Height, INT32 Stride);
extern void (*PlaneAddNoise)( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
extern void DMachineSpecificConfig(INT32 MmxEnabled, INT32 XmmEnabled, INT32 WmtEnabled);
#endif

View file

@ -0,0 +1,441 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>17.0</VCProjectVersion>
<ProjectGuid>{8F2BF92C-C4E1-45AE-BA45-2617B03B32AC}</ProjectGuid>
<RootNamespace>vppp</RootNamespace>
<WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<PlatformToolset>v142</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<PlatformToolset>v142</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<PlatformToolset>v142</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<PlatformToolset>v142</PlatformToolset>
<UseOfMfc>false</UseOfMfc>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
<IntDir>..\..\..\obj\vppp\$(PlatformShortName)_$(Configuration)\</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<LibraryPath>$(LibraryPath)</LibraryPath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<IncludePath>$(IncludePath)</IncludePath>
<LibraryPath>$(LibraryPath)</LibraryPath>
<OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
<IntDir>..\..\..\obj\vppp\$(PlatformShortName)_$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
<IntDir>..\..\..\obj\vppp\$(PlatformShortName)_$(Configuration)\</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<LibraryPath>$(LibraryPath)</LibraryPath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<IncludePath>$(IncludePath)</IncludePath>
<LibraryPath>$(LibraryPath)</LibraryPath>
<OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
<IntDir>..\..\..\obj\vppp\$(PlatformShortName)_$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Label="Vcpkg">
<VcpkgEnableManifest>false</VcpkgEnableManifest>
</PropertyGroup>
<PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<VcpkgInstalledDir>
</VcpkgInstalledDir>
<VcpkgUseStatic>false</VcpkgUseStatic>
<VcpkgConfiguration>Debug</VcpkgConfiguration>
<VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
</PropertyGroup>
<PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<VcpkgInstalledDir>
</VcpkgInstalledDir>
<VcpkgUseStatic>false</VcpkgUseStatic>
<VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
</PropertyGroup>
<PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<VcpkgInstalledDir>
</VcpkgInstalledDir>
<VcpkgUseStatic>false</VcpkgUseStatic>
<VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
<VcpkgConfiguration>Debug</VcpkgConfiguration>
</PropertyGroup>
<PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<VcpkgInstalledDir>
</VcpkgInstalledDir>
<VcpkgUseStatic>false</VcpkgUseStatic>
<VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>.\include;..\include;..\vp60\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_LIB;_DEBUG;INLINE=__inline;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<PrecompiledHeaderOutputFile>
</PrecompiledHeaderOutputFile>
<WarningLevel>Level3</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<CompileAs>Default</CompileAs>
<DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0409</Culture>
</ResourceCompile>
<Lib>
<SuppressStartupBanner>true</SuppressStartupBanner>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
</Lib>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>.\include;..\include;..\vp60\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_LIB;_DEBUG;INLINE=__inline;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<PrecompiledHeaderOutputFile>
</PrecompiledHeaderOutputFile>
<WarningLevel>Level3</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<CompileAs>Default</CompileAs>
<DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0409</Culture>
</ResourceCompile>
<Lib>
<SuppressStartupBanner>true</SuppressStartupBanner>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
</Lib>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<OmitFramePointers>true</OmitFramePointers>
<AdditionalIncludeDirectories>.\include;..\include;..\vp60\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_LIB;NDEBUG;INLINE=__inline;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<BufferSecurityCheck>false</BufferSecurityCheck>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeaderOutputFile>
</PrecompiledHeaderOutputFile>
<AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
<ObjectFileName>$(IntDir)</ObjectFileName>
<ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
<WarningLevel>Level3</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
<DebugInformationFormat>None</DebugInformationFormat>
<CompileAs>Default</CompileAs>
<DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0409</Culture>
</ResourceCompile>
<Lib>
<SuppressStartupBanner>true</SuppressStartupBanner>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
</Lib>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<OmitFramePointers>true</OmitFramePointers>
<AdditionalIncludeDirectories>.\include;..\include;..\vp60\include;..\..\..\..\libvp6\include;..\..\..\..\libvp6\corelibs\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_LIB;NDEBUG;INLINE=__inline;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<BufferSecurityCheck>false</BufferSecurityCheck>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeaderOutputFile>
</PrecompiledHeaderOutputFile>
<AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
<ObjectFileName>$(IntDir)</ObjectFileName>
<ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
<WarningLevel>Level3</WarningLevel>
<SuppressStartupBanner>true</SuppressStartupBanner>
<DebugInformationFormat>None</DebugInformationFormat>
<CompileAs>Default</CompileAs>
<DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0409</Culture>
</ResourceCompile>
<Lib>
<SuppressStartupBanner>true</SuppressStartupBanner>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
</Lib>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="generic\borders.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="generic\clamp.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="generic\deblock.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="generic\DeInterlace.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="generic\dering.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="generic\doptsystemdependant.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="generic\loopfilter.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="generic\postproc.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="generic\scale.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="generic\simpledeblocker.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="win32\clamp_asm.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="win32\deblockopt.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="win32\deblockwmtopt.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="win32\DeInterlaceMmx.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="win32\DeInterlaceWmt.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="win32\deringopt.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="win32\deringwmtopt.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="win32\doptsystemdependant.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="win32\loopf_asm.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="win32\newlooptest_asm.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="win32\scaleopt.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
<ClCompile Include="win32\simpledeblock_asm.c">
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
</ClCompile>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View file

@ -0,0 +1,79 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="generic">
<UniqueIdentifier>{adcd4975-46d4-4f20-8422-a898d3456999}</UniqueIdentifier>
</Filter>
<Filter Include="win32">
<UniqueIdentifier>{4fbef4da-8fe3-440e-858e-2fbabea42066}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="generic\borders.c">
<Filter>generic</Filter>
</ClCompile>
<ClCompile Include="generic\clamp.c">
<Filter>generic</Filter>
</ClCompile>
<ClCompile Include="generic\deblock.c">
<Filter>generic</Filter>
</ClCompile>
<ClCompile Include="generic\DeInterlace.c">
<Filter>generic</Filter>
</ClCompile>
<ClCompile Include="generic\dering.c">
<Filter>generic</Filter>
</ClCompile>
<ClCompile Include="generic\doptsystemdependant.c">
<Filter>generic</Filter>
</ClCompile>
<ClCompile Include="generic\loopfilter.c">
<Filter>generic</Filter>
</ClCompile>
<ClCompile Include="generic\postproc.c">
<Filter>generic</Filter>
</ClCompile>
<ClCompile Include="generic\scale.c">
<Filter>generic</Filter>
</ClCompile>
<ClCompile Include="generic\simpledeblocker.c">
<Filter>generic</Filter>
</ClCompile>
<ClCompile Include="win32\clamp_asm.c">
<Filter>win32</Filter>
</ClCompile>
<ClCompile Include="win32\deblockopt.c">
<Filter>win32</Filter>
</ClCompile>
<ClCompile Include="win32\deblockwmtopt.c">
<Filter>win32</Filter>
</ClCompile>
<ClCompile Include="win32\DeInterlaceMmx.c">
<Filter>win32</Filter>
</ClCompile>
<ClCompile Include="win32\DeInterlaceWmt.c">
<Filter>win32</Filter>
</ClCompile>
<ClCompile Include="win32\deringopt.c">
<Filter>win32</Filter>
</ClCompile>
<ClCompile Include="win32\deringwmtopt.c">
<Filter>win32</Filter>
</ClCompile>
<ClCompile Include="win32\doptsystemdependant.c">
<Filter>win32</Filter>
</ClCompile>
<ClCompile Include="win32\loopf_asm.c">
<Filter>win32</Filter>
</ClCompile>
<ClCompile Include="win32\newlooptest_asm.c">
<Filter>win32</Filter>
</ClCompile>
<ClCompile Include="win32\scaleopt.c">
<Filter>win32</Filter>
</ClCompile>
<ClCompile Include="win32\simpledeblock_asm.c">
<Filter>win32</Filter>
</ClCompile>
</ItemGroup>
</Project>

View file

@ -0,0 +1,233 @@
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 42;
objects = {
/* Begin PBXBuildFile section */
0C14238C0BB8155500FDDAB7 /* postproc.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C14238B0BB8155500FDDAB7 /* postproc.c */; };
0C14238E0BB8155F00FDDAB7 /* loopfilter.c in Sources */ = {isa = PBXBuildFile; fileRef = 0C14238D0BB8155F00FDDAB7 /* loopfilter.c */; };
0CF73A2D0BB78F6700DD0AFD /* scale.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A2C0BB78F6700DD0AFD /* scale.c */; };
0CF73A3A0BB78F8600DD0AFD /* simpledeblocker.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A310BB78F8600DD0AFD /* simpledeblocker.c */; };
0CF73A3D0BB78F8600DD0AFD /* doptsystemdependant.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A340BB78F8600DD0AFD /* doptsystemdependant.c */; };
0CF73A3E0BB78F8600DD0AFD /* deblock.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A350BB78F8600DD0AFD /* deblock.c */; };
0CF73A3F0BB78F8600DD0AFD /* DeInterlace.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A360BB78F8600DD0AFD /* DeInterlace.c */; };
0CF73A400BB78F8600DD0AFD /* dering.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A370BB78F8600DD0AFD /* dering.c */; };
0CF73A410BB78F8600DD0AFD /* borders.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A380BB78F8600DD0AFD /* borders.c */; };
0CF73A420BB78F8600DD0AFD /* clamp.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CF73A390BB78F8600DD0AFD /* clamp.c */; };
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
0C14238B0BB8155500FDDAB7 /* postproc.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = postproc.c; path = generic/postproc.c; sourceTree = "<group>"; };
0C14238D0BB8155F00FDDAB7 /* loopfilter.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = loopfilter.c; path = generic/loopfilter.c; sourceTree = "<group>"; };
0CF73A2C0BB78F6700DD0AFD /* scale.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = scale.c; path = generic/scale.c; sourceTree = "<group>"; };
0CF73A310BB78F8600DD0AFD /* simpledeblocker.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = simpledeblocker.c; path = generic/simpledeblocker.c; sourceTree = "<group>"; };
0CF73A340BB78F8600DD0AFD /* doptsystemdependant.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = doptsystemdependant.c; path = generic/doptsystemdependant.c; sourceTree = "<group>"; };
0CF73A350BB78F8600DD0AFD /* deblock.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = deblock.c; path = generic/deblock.c; sourceTree = "<group>"; };
0CF73A360BB78F8600DD0AFD /* DeInterlace.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = DeInterlace.c; path = generic/DeInterlace.c; sourceTree = "<group>"; };
0CF73A370BB78F8600DD0AFD /* dering.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = dering.c; path = generic/dering.c; sourceTree = "<group>"; };
0CF73A380BB78F8600DD0AFD /* borders.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = borders.c; path = generic/borders.c; sourceTree = "<group>"; };
0CF73A390BB78F8600DD0AFD /* clamp.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = clamp.c; path = generic/clamp.c; sourceTree = "<group>"; };
D2AAC046055464E500DB518D /* libvppp.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvppp.a; sourceTree = BUILT_PRODUCTS_DIR; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
D289987405E68DCB004EDB86 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
08FB7794FE84155DC02AAC07 /* vppp */ = {
isa = PBXGroup;
children = (
08FB7795FE84155DC02AAC07 /* Source */,
C6A0FF2B0290797F04C91782 /* Documentation */,
1AB674ADFE9D54B511CA2CBB /* Products */,
);
name = vppp;
sourceTree = "<group>";
};
08FB7795FE84155DC02AAC07 /* Source */ = {
isa = PBXGroup;
children = (
0C14238B0BB8155500FDDAB7 /* postproc.c */,
0CF73A2C0BB78F6700DD0AFD /* scale.c */,
0CF73A310BB78F8600DD0AFD /* simpledeblocker.c */,
0CF73A340BB78F8600DD0AFD /* doptsystemdependant.c */,
0CF73A350BB78F8600DD0AFD /* deblock.c */,
0CF73A360BB78F8600DD0AFD /* DeInterlace.c */,
0CF73A370BB78F8600DD0AFD /* dering.c */,
0C14238D0BB8155F00FDDAB7 /* loopfilter.c */,
0CF73A380BB78F8600DD0AFD /* borders.c */,
0CF73A390BB78F8600DD0AFD /* clamp.c */,
);
name = Source;
sourceTree = "<group>";
};
1AB674ADFE9D54B511CA2CBB /* Products */ = {
isa = PBXGroup;
children = (
D2AAC046055464E500DB518D /* libvppp.a */,
);
name = Products;
sourceTree = "<group>";
};
C6A0FF2B0290797F04C91782 /* Documentation */ = {
isa = PBXGroup;
children = (
);
name = Documentation;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */
D2AAC043055464E500DB518D /* Headers */ = {
isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXHeadersBuildPhase section */
/* Begin PBXNativeTarget section */
D2AAC045055464E500DB518D /* vppp */ = {
isa = PBXNativeTarget;
buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vppp" */;
buildPhases = (
D2AAC043055464E500DB518D /* Headers */,
D2AAC044055464E500DB518D /* Sources */,
D289987405E68DCB004EDB86 /* Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = vppp;
productName = vppp;
productReference = D2AAC046055464E500DB518D /* libvppp.a */;
productType = "com.apple.product-type.library.static";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
08FB7793FE84155DC02AAC07 /* Project object */ = {
isa = PBXProject;
buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vppp" */;
hasScannedForEncodings = 1;
mainGroup = 08FB7794FE84155DC02AAC07 /* vppp */;
projectDirPath = "";
targets = (
D2AAC045055464E500DB518D /* vppp */,
);
};
/* End PBXProject section */
/* Begin PBXSourcesBuildPhase section */
D2AAC044055464E500DB518D /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
0CF73A2D0BB78F6700DD0AFD /* scale.c in Sources */,
0CF73A3A0BB78F8600DD0AFD /* simpledeblocker.c in Sources */,
0CF73A3D0BB78F8600DD0AFD /* doptsystemdependant.c in Sources */,
0CF73A3E0BB78F8600DD0AFD /* deblock.c in Sources */,
0CF73A3F0BB78F8600DD0AFD /* DeInterlace.c in Sources */,
0CF73A400BB78F8600DD0AFD /* dering.c in Sources */,
0CF73A410BB78F8600DD0AFD /* borders.c in Sources */,
0CF73A420BB78F8600DD0AFD /* clamp.c in Sources */,
0C14238C0BB8155500FDDAB7 /* postproc.c in Sources */,
0C14238E0BB8155F00FDDAB7 /* loopfilter.c in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
1DEB91EC08733DB70010E9CD /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
COPY_PHASE_STRIP = NO;
GCC_DYNAMIC_NO_PIC = NO;
GCC_ENABLE_FIX_AND_CONTINUE = YES;
GCC_MODEL_TUNING = G5;
GCC_OPTIMIZATION_LEVEL = 0;
INSTALL_PATH = /usr/local/lib;
PRODUCT_NAME = vppp;
ZERO_LINK = YES;
};
name = Debug;
};
1DEB91ED08733DB70010E9CD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ARCHS = (
ppc,
i386,
);
GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
GCC_MODEL_TUNING = G5;
INSTALL_PATH = /usr/local/lib;
PRODUCT_NAME = vppp;
};
name = Release;
};
1DEB91F008733DB70010E9CD /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
OBJROOT = build;
PREBINDING = NO;
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
SYMROOT = ../../../lib/osx;
USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../../include";
};
name = Debug;
};
1DEB91F108733DB70010E9CD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
OBJROOT = build;
PREBINDING = NO;
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
SYMROOT = ../../../lib/osx;
USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../../include";
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vppp" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1DEB91EC08733DB70010E9CD /* Debug */,
1DEB91ED08733DB70010E9CD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vppp" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1DEB91F008733DB70010E9CD /* Debug */,
1DEB91F108733DB70010E9CD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
}

View file

@ -0,0 +1,143 @@
/****************************************************************************
*
* Module Title : DeInterlaceWmt.c
*
* Description : DeInterlace Routines
*
***************************************************************************/
/****************************************************************************
* Header Files
****************************************************************************/
#include "postp.h"
/****************************************************************************
* Module constants.
****************************************************************************/
#if defined(_WIN32_WCE)
#pragma pack(16)
short four2s[] = { 2, 2, 2, 2 };
#pragma pack()
#else
__declspec(align(16)) short four2s[] = { 2, 2, 2, 2 };
#endif
/****************************************************************************
*
* ROUTINE : WmtFastDeInterlace
*
* INPUTS : UINT8 *SrcPtr : Pointer to input frame.
* UINT8 *DstPtr : Pointer to output frame.
* INT32 Width : Width of frame in pixels.
* INT32 Height : Height of frame in pixels.
* INT32 Stride : Stride of images.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies a 3 tap filter vertically to remove interlacing
* artifacts.
*
* SPECIAL NOTES : This function use a three tap filter [1, 2, 1] to blur
* veritically in an interlaced frame. This function assumes:
* 1) Buffers SrcPtr and DstPtr point to have the same geometery,
* 2) SrcPtr and DstPtr can _not_ be same.
*
****************************************************************************/
void MmxFastDeInterlace
(
UINT8 *SrcPtr,
UINT8 *DstPtr,
INT32 Width,
INT32 Height,
INT32 Stride
)
{
INT32 i;
UINT8 *CurrentSrcPtr = SrcPtr;
UINT8 *CurrentDstPtr = DstPtr;
// Always copy the first line
memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
for ( i=1; i<Height-1; i++ )
{
CurrentDstPtr += Stride;
__asm
{
mov esi, [CurrentSrcPtr]
mov edi, [CurrentDstPtr]
xor ecx, ecx
mov edx, [Stride]
lea eax, [esi + edx]
lea edx, [eax + edx]
mov ebx, [Width]
pxor mm7, mm7
MmxDeInterlaceLoop:
movq mm0, QWORD ptr [esi + ecx] // line -1
movq mm1, QWORD ptr [eax + ecx] // line 0
movq mm3, mm0 // line -1
punpcklbw mm0, mm7 // line -1 low
movq mm2, QWORD ptr [edx + ecx] // line 1
punpckhbw mm3, mm7 // line -1 high
movq mm4, mm1 // line 0
punpcklbw mm1, mm7 // line 0 low
paddw mm0, four2s // line -1 low + 2s
paddw mm3, four2s // line -1 high + 2s
punpckhbw mm4, mm7 // line 0 high
psllw mm1, 1 // line 0 * 2
psllw mm4, 1 // line 0 * 2
movq mm5, mm2 // line 1
punpcklbw mm2, mm7 // line 1 low
paddw mm0, mm1 // line -1 + line 0 * 2
paddw mm3, mm4 // line -1 + line 0 * 2
punpckhbw mm5, mm7 // line 1 high
paddw mm0, mm2 // -1 + 0 * 2 + 1
paddw mm3, mm5 // -1 + 0 * 2 + 1
psraw mm0, 2 // >> 2
psraw mm3, 2 // >> 2
packuswb mm0, mm3
movq QWORD ptr [edi+ecx], mm0
add ecx, 8
cmp ecx, ebx
jl MmxDeInterlaceLoop
}
CurrentSrcPtr += Stride;
/*
for(j=0;j<Width;j++)
{
x0 = PrevSrcPtr[j];
x1 = (CurrentSrcPtr[j]<<1);
x2 = NextSrcPtr[j];
CurrentDstPtr[j] = (UINT8)( (x0 + x1 + x2)>>2 );
}
*/
}
// copy the last line
CurrentSrcPtr += Stride;
CurrentDstPtr += Stride;
memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
}

View file

@ -0,0 +1,129 @@
/****************************************************************************
*
* Module Title : DeInterlaceWmt.c
*
* Description : DeInterlace
*
***************************************************************************/
/****************************************************************************
* Header Frames
****************************************************************************/
#include "postp.h"
/****************************************************************************
* Module constants.
****************************************************************************/
#if defined(_WIN32_WCE)
#pragma pack(16)
short Eight2s[] = { 2, 2, 2, 2, 2, 2, 2, 2 };
#pragma pack()
#else
__declspec(align(16)) short Eight2s[] = { 2, 2, 2, 2, 2, 2, 2, 2 };
#endif
/****************************************************************************
*
* ROUTINE : WmtFastDeInterlace
*
* INPUTS : UINT8 *SrcPtr : Pointer to input frame.
* UINT8 *DstPtr : Pointer to output frame.
* INT32 Width : Width of frame in pixels.
* INT32 Height : Height of frame in pixels.
* INT32 Stride : Stride of images.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies a 3 tap filter vertically to remove interlacing
* artifacts.
*
* SPECIAL NOTES : This function use a three tap filter [1, 2, 1] to blur
* veritically in an interlaced frame. This function assumes:
* 1) Buffers SrcPtr and DstPtr point to have the same geometery,
* 2) SrcPtr and DstPtr can _not_ be same.
*
****************************************************************************/
void WmtFastDeInterlace
(
UINT8 *SrcPtr,
UINT8 *DstPtr,
INT32 Width,
INT32 Height,
INT32 Stride
)
{
INT32 i;
UINT8 *CurrentSrcPtr = SrcPtr;
UINT8 *CurrentDstPtr = DstPtr;
#if defined(_WIN32_WCE)
return;
#else
// Always copy the first line
memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
for ( i=1; i<Height-1; i++ )
{
CurrentDstPtr += Stride;
__asm
{
mov esi, [CurrentSrcPtr]
mov edi, [CurrentDstPtr]
xor ecx, ecx
mov edx, [Stride]
lea eax, [esi + edx]
lea edx, [eax + edx]
mov ebx, [Width]
pxor xmm7, xmm7
WmtDeInterlaceLoop:
movq xmm0, QWORD ptr [esi + ecx]
movq xmm1, QWORD ptr [eax + ecx]
punpcklbw xmm0, xmm7
movq xmm2, QWORD ptr [edx + ecx]
punpcklbw xmm1, xmm7
paddw xmm0, Eight2s
psllw xmm1, 1
punpcklbw xmm2, xmm7
paddw xmm0, xmm1
paddw xmm0, xmm2
psraw xmm0, 2
packuswb xmm0, xmm7
movq QWORD ptr [edi+ecx], xmm0
add ecx, 8
cmp ecx, ebx
jl WmtDeInterlaceLoop
}
CurrentSrcPtr += Stride;
/*
for(j=0;j<Width;j++)
{
x0 = PrevSrcPtr[j];
x1 = (CurrentSrcPtr[j]<<1);
x2 = NextSrcPtr[j];
CurrentDstPtr[j] = (UINT8)( (x0 + x1 + x2)>>2 );
}
*/
}
//copy the last line
CurrentSrcPtr += Stride;
CurrentDstPtr += Stride;
memcpy ( CurrentDstPtr, CurrentSrcPtr, Width );
#endif
}

View file

@ -0,0 +1,170 @@
/****************************************************************************
*
* Module Title : clamp.c
*
* Description : c
*
* AUTHOR : Jim Bankoski
*
*****************************************************************************
* Revision History
*
* 1.09 YWX 26-Sep-01 Changed the default bandHeight from 5 to 4
* 1.08 YWX 23-Jul-00 Changed horizontal scaling function names
* 1.07 JBB 04 Dec 00 Added new Center vs Scale Bits
* 1.06 YWX 01-Dec-00 Removed bi-cubic scale functions
* 1.05 YWX 18-Oct-00 Added 1-2 scale functions
* 1.04 YWX 11-Oct-00 Added ratio check to determine scaling or centering
* 1.03 YWX 09-Oct-00 Added functions that do differen scaling in horizontal
* and vertical directions
* 1.02 YWX 04-Oct-00 Added 3-5 scaling functions
* 1.01 YWX 03-Oct-00 Added a set of 4-5 scaling functions
* 1.00 JBB 15 Sep 00 New Configuration baseline.
*
*****************************************************************************
*/
/****************************************************************************
* Header Files
*****************************************************************************
*/
#include "postp.h"
#include <stdio.h>
/****************************************************************************
* Imported
*****************************************************************************
*/
/****************************************************************************
* Module constants.
*****************************************************************************
*/
/****************************************************************************
* Exported Global Variables
*****************************************************************************
*/
/****************************************************************************
* Module Static Variables
*****************************************************************************
*/
void ClampLevels_wmt(
POSTPROC_INSTANCE *pbi,
INT32 BlackClamp, // number of values to clamp from 0
INT32 WhiteClamp, // number of values to clamp from 255
UINT8 *Src, // reconstruction buffer : passed in
UINT8 *Dst // postprocessing buffer : passed in
)
{
#if defined(_WIN32_WCE)
return;
#else
__declspec(align(16)) unsigned char blackclamp[16];
__declspec(align(16)) unsigned char whiteclamp[16];
__declspec(align(16)) unsigned char bothclamp[16];
int i;
int width = pbi->HFragments *8;
int height = pbi->VFragments *8;
UINT8 *SrcPtr = Src + pbi->ReconYDataOffset;
UINT8 *DestPtr = Dst + pbi->ReconYDataOffset;
UINT32 LineLength = pbi->YStride ; // pitch is doubled for interlacing
int row;
for(i=0;i<16;i++)
{
blackclamp[i]=(unsigned char )BlackClamp;
whiteclamp[i]=(unsigned char )WhiteClamp;
bothclamp[i]=BlackClamp+WhiteClamp;
}
// clamping is for y only!
for ( row = 0 ; row < height ; row ++)
{
__asm
{
mov ecx, [width]
mov esi, SrcPtr
mov edi, DestPtr
xor eax,eax
nextset:
movdqa xmm1,[esi+eax]
psubusb xmm1,blackclamp
paddusb xmm1,bothclamp
psubusb xmm1,whiteclamp
movdqa [edi+eax],xmm1 ;write first 4 pixels
add eax,16
cmp eax, ecx
jl nextset
}
SrcPtr += LineLength;
DestPtr += LineLength;
}
#endif
}
void ClampLevels_mmx(
POSTPROC_INSTANCE *pbi,
INT32 BlackClamp, // number of values to clamp from 0
INT32 WhiteClamp, // number of values to clamp from 255
UINT8 *Src, // reconstruction buffer : passed in
UINT8 *Dst // postprocessing buffer : passed in
)
{
#if defined(_WIN32_WCE)
#pragma pack(8)
unsigned char blackclamp[16];
unsigned char whiteclamp[16];
unsigned char bothclamp[16];
#pragma pack()
#else
__declspec(align(8)) unsigned char blackclamp[16];
__declspec(align(8)) unsigned char whiteclamp[16];
__declspec(align(8)) unsigned char bothclamp[16];
#endif
int i;
int width = pbi->HFragments *8;
int height = pbi->VFragments *8;
UINT8 *SrcPtr = Src + pbi->ReconYDataOffset;
UINT8 *DestPtr = Dst + pbi->ReconYDataOffset;
UINT32 LineLength = pbi->YStride ; // pitch is doubled for interlacing
int row;
for(i=0;i<8;i++)
{
blackclamp[i]=(unsigned char )BlackClamp;
whiteclamp[i]=(unsigned char )WhiteClamp;
bothclamp[i]=BlackClamp+WhiteClamp;
}
// clamping is for y only!
for ( row = 0 ; row < height ; row ++)
{
__asm
{
mov ecx, [width]
mov esi, SrcPtr
mov edi, DestPtr
xor eax,eax
nextset:
movq mm1,[esi+eax]
psubusb mm1,blackclamp
paddusb mm1,bothclamp
psubusb mm1,whiteclamp
movq [edi+eax],mm1 ;write first 4 pixels
add eax,8
cmp eax, ecx
jl nextset
}
SrcPtr += LineLength;
DestPtr += LineLength;
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,748 @@
/****************************************************************************
*
* Module Title : DeRingingWmtOpt.c
*
* Description : Optimized functions for PostProcessor
*
***************************************************************************/
#define STRICT /* Strict type checking */
/****************************************************************************
* Header Files
****************************************************************************/
#include "postp.h"
/****************************************************************************
* MAcros
****************************************************************************/
#pragma warning(disable:4305)
#pragma warning(disable:4731)
/****************************************************************************
* Module Statics
****************************************************************************/
#if defined(_WIN32_WCE)
#else
__declspec(align(16)) static unsigned short eight128s []= { 128, 128, 128, 128, 128, 128, 128, 128};
__declspec(align(16)) static unsigned short eight64s[] = { 64, 64, 64, 64, 64, 64, 64, 64};
__declspec(align(16)) static char eight64c [] = { 64, 64, 64,64,64,64,64,64};
__declspec(align(16)) static char eight32c [] = { 32,32,32,32,32,32,32,32};
__declspec(align(16)) static char eight127c []= { 127, 127, 127, 127, 127, 127, 127, 127};
__declspec(align(16)) static char eight128c []= { 128, 128, 128, 128, 128, 128, 128, 128};
__declspec(align(16)) static unsigned char eight223c[] = { 223,223,223,223,223,223,223,223};
__declspec(align(16)) static unsigned char eight231c[] = { 231,231,231,231,231,231,231,231};
#endif
/****************************************************************************
* Imports
****************************************************************************/
extern UINT32 SharpenModifier[];
/****************************************************************************
*
* ROUTINE : DeRingBlockStrong_WMT
*
* INPUTS : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
* const UINT8 *SrcPtr : Pointer to input image.
* UINT8 *DstPtr : Pointer to output image.
* const INT32 Pitch : Image stride.
* UINT32 FragQIndex : Q-index block encoded with.
* UINT32 *QuantScale : Array of quantization scale factors.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Filtering a block for de-ringing purpose.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void DeringBlockStrong_WMT
(
const POSTPROC_INSTANCE *pbi,
const UINT8 *SrcPtr,
UINT8 *DstPtr,
const INT32 Pitch,
UINT32 FragQIndex,
UINT32 *QuantScale
)
{
#if defined(_WIN32_WCE)
return;
#else
__declspec(align(16)) short UDMod[72];
__declspec(align(16)) short LRMod[128];
unsigned int PlaneLineStep = Pitch;
const unsigned char *Src = SrcPtr;
unsigned char *Des = DstPtr;
short *UDPointer = UDMod;
short *LRPointer = LRMod;
UINT32 QStep = QuantScale[FragQIndex];
INT32 Sharpen = SharpenModifier[FragQIndex];
(void) pbi;
__asm
{
push esi
push edi
mov esi, Src /* Source Pointer */
mov edi, UDPointer /* UD modifier pointer */
push ecx
push edx
mov ecx, PlaneLineStep /* Pitch Step */
xor edx, edx
push eax
push ebx
mov eax, QStep /* QValue */
mov ebx, Sharpen /* Sharpen */
movd mm0, eax /* QValue */
movd mm2, ebx /* sharpen */
push ebp
punpcklbw mm0, mm0 /* 00 00 00 QQ */
sub edx, ecx /* Negative Pitch */
punpcklbw mm2, mm2 /* 00 00 00 SS */
pxor mm7, mm7 /* clear mm7 for unpacks */
punpcklbw mm0, mm0 /* 00 00 qq qq */
mov eax, LRPointer /* Left and Right Modifier */
punpcklbw mm2, mm2 /* 00 00 ss ss */
lea ebx, [esi+ecx*8] /* Source Pointer of last row */
punpcklbw mm0, mm0 /* qq qq qq qq */
movq mm1, mm0; /* make a copy */
punpcklbw mm2, mm2 /* ss ss ss ss */
paddb mm1, mm0 /* QValue * 2 */
paddb mm1, mm0 /* High = 3 * Qvalue */
paddusb mm1, eight223c /* clamping high to 32 */
paddb mm0, eight32c /* 32+QValues */
psubusb mm1, eight223c /* Get the real value back */
movq mm3, eight127c /* 7f 7f 7f 7f 7f 7f 7f 7f */
pandn mm1, mm3 /* ClampHigh */
/* mm0,mm1,mm2,mm7 are in use */
/* mm0---> QValue+32 */
/* mm1---> ClampHigh */
/* mm2---> Sharpen */
/* mm7---> Cleared for unpack */
FillModLoop1:
movq mm3, QWORD PTR [esi] /* read 8 pixels p */
pxor xmm7, xmm7 /* clear xmm7 */
movq mm4, QWORD PTR [esi+edx] /* Pixels on top pu */
movq mm5, mm3 /* make a copy of p */
psubusb mm3, mm4 /* p-pu */
psubusb mm4, mm5 /* pu-p */
por mm3, mm4 /* abs(p-pu) */
movq mm6, mm0 /* 32+QValues */
movq mm4, mm0 /* 32+QValues */
psubusb mm6, mm3 /* zero clampled TmpMod */
movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
paddb mm4, eight64c /* 32+QValues + 64 */
pxor mm4, mm5 /* convert to a sign number */
pxor mm3, mm5 /* convert to a sign number */
pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
pand mm3, mm2 /* use sharpen */
paddsb mm6, mm1 /* clamping to high */
psubsb mm6, mm1 /* offset back */
por mm6, mm3 /* Mod value to be stored */
movq mm3, QWORD PTR [esi] /* read 8 pixels p */
movq2dq xmm0, mm6
movq mm4, QWORD PTR [esi-1] /* Pixels on top pu */
punpcklbw xmm7, xmm0 /* extended to words */
movq mm5, mm3 /* make a copy of p */
psraw xmm7, 8 /* sign extended */
psubusb mm3, mm4 /* p-pu */
movdqa [edi], xmm7 /* writeout UDmod*/
psubusb mm4, mm5 /* pu-p */
por mm3, mm4 /* abs(p-pu) */
movq mm6, mm0 /* 32+QValues */
movq mm4, mm0 /* 32+QValues */
psubusb mm6, mm3 /* zero clampled TmpMod */
movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
paddb mm4, eight64c /* 32+QValues + 64 */
pxor mm4, mm5 /* convert to a sign number */
pxor mm3, mm5 /* convert to a sign number */
pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
pand mm3, mm2 /* use sharpen */
paddsb mm6, mm1 /* clamping to high */
psubsb mm6, mm1 /* offset back */
por mm6, mm3 /* Mod value to be stored */
movq mm3, QWORD PTR [esi] /* read 8 pixels p */
pxor xmm7, xmm7 /* clear xmm7 */
movq mm4, QWORD PTR [esi+1] /* Pixels on top pu */
movq2dq xmm0, mm6
movq mm5, mm3 /* make a copy of p */
punpcklbw xmm7, xmm0 /* extened to shorts */
psubusb mm3, mm4 /* p-pu */
psraw xmm7, 8 /* sign extended */
psubusb mm4, mm5 /* pu-p */
movdqa [eax], xmm7 /* writeout UDmod*/
por mm3, mm4 /* abs(p-pu) */
movq mm6, mm0 /* 32+QValues */
pxor xmm7, xmm7 /* clear xmm7 */
movq mm4, mm0 /* 32+QValues */
psubusb mm6, mm3 /* zero clampled TmpMod */
movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
paddb mm4, eight64c /* 32+QValues + 64 */
pxor mm4, mm5 /* convert to a sign number */
pxor mm3, mm5 /* convert to a sign number */
pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
pand mm3, mm2 /* use sharpen */
paddsb mm6, mm1 /* clamping to high */
psubsb mm6, mm1 /* offset back */
por mm6, mm3 /* Mod value to be stored */
add esi, ecx
movq2dq xmm0, mm6
add edi, 16
punpcklbw xmm7, mm0 /* extended to shorts */
add eax, 16
psraw xmm7, 8 /* sign extended */
cmp esi, ebx
movdqa [eax+112], xmm7 /* writeout UDmod*/
jne FillModLoop1
/* last UDMod */
movq mm3, QWORD PTR [esi] /* read 8 pixels p */
pxor xmm7, xmm7 /* clear xmm7 */
movq mm4, QWORD PTR [esi+edx] /* Pixels on top pu */
movq mm5, mm3 /* make a copy of p */
psubusb mm3, mm4 /* p-pu */
psubusb mm4, mm5 /* pu-p */
por mm3, mm4 /* abs(p-pu) */
movq mm6, mm0 /* 32+QValues */
movq mm4, mm0 /* 32+QValues */
psubusb mm6, mm3 /* zero clampled TmpMod */
movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
paddb mm4, eight64c /* 32+QValues + 64 */
pxor mm4, mm5 /* convert to a sign number */
pxor mm3, mm5 /* convert to a sign number */
pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
pand mm3, mm2 /* use sharpen */
paddsb mm6, mm1 /* clamping to high */
psubsb mm6, mm1 /* offset back */
por mm6, mm3 /* Mod value to be stored */
movq2dq xmm6, mm6
punpcklbw xmm7, xmm6 /* 03 xx 02 xx 01 xx 00 xx */
psraw xmm7, 8 /* sign extended */
movdqa [edi], xmm7 /* writeout UDmod */
mov esi, Src
mov edi, Des
mov eax, UDPointer
mov ebx, LRPointer
mov ebp, 8
FilterLoop1:
movq xmm0, QWORD PTR [esi+edx] /* mm0 = Pixels above */
pxor xmm7, xmm7 /* clear mm7 */
movdqa xmm4, [eax] /* au */
punpcklbw xmm0, xmm7 /* extended to shorts */
movq xmm2, QWORD PTR [esi+ecx] /* mm2 = pixels below */
pmullw xmm0, xmm4 /* pu*au */
movdqa xmm6, [eax+16] /* ad */
punpcklbw xmm2, xmm7 /* extened to shorts*/
movq xmm1, QWORD PTR [esi-1] /* pixel to the left */
pmullw xmm2, xmm6 /* ad*pd */
movdqa xmm3, [ebx] /* al */
punpcklbw xmm1, xmm7 /* extended to shorts */
movq xmm5, QWORD PTR [esi+1] /* pixel to the right */
pmullw xmm1, xmm3 /* al * pl */
paddw xmm4, xmm6 /* au+ad */
punpcklbw xmm5, xmm7 /* extends to shorts */
movdqa xmm6, [ebx+128] /* ar */
pmullw xmm5, xmm6 /* ar * pr */
paddw xmm0, xmm2 /* au*pu + ad*pd */
paddw xmm4, xmm3 /* au+ad+al */
paddw xmm0, xmm1 /* au*pu+ad*pd+al*pl */
paddw xmm4, xmm6 /* au+ad+al+ar */
movq xmm2, QWORD PTR [esi] /* p */
paddw xmm0, xmm5 /* au*pu+ad*pd+al*pl+ar*pr */
/* xmm0 --- au*pu+ad*pd+al*pl+ar*pr */
/* xmm4 --- au + ad + al + ar */
movdqa xmm1, eight128s /* 0080 0080 0080 0080 0080 0080 0080 0080 */
punpcklbw xmm2, xmm7 /* extended to shorts */
psubw xmm1, xmm4 /* 128-(au+ad+al+ar) */
pmullw xmm2, xmm1 /* p*(128-(au+ad+al+ar)) */
add esi, ecx /* Src += Pitch */
movdqa xmm6, eight64s /* 64, 64, 64, 64, 64, 64, 64, 64 */
movdqa xmm7, xmm6 /* 64, 64, 64, 64, 64, 64, 64, 64 */
add eax, 16 /* UDPointer += 8 */
psllw xmm7, 8 /* {16384, .. } */
paddw xmm0, xmm2 /* sum */
add edi, ecx /* Des += Pitch */
paddw xmm0, xmm6 /* sum+B */
add ebx, 16 /* LPointer +=8 */
paddw xmm0, xmm7 /* clamping */
psubusw xmm0, xmm7 /* clamping */
dec ebp
psrlw xmm0, 7 /* (sum+B)>>7 */
packuswb xmm0, xmm7 /* pack to 8 bytes */
movq QWORD PTR [edi+edx], xmm0 /* write to destination */
jnz FilterLoop1
pop ebp
pop ebx
pop eax
pop edx
pop ecx
pop edi
pop esi
}
#endif
}
/****************************************************************************
*
* ROUTINE : DeRingBlockWeak_WMT
*
* INPUTS : const POSTPROC_INSTANCE *pbi : Pointer to post-processor instance.
* const UINT8 *SrcPtr : Pointer to input image.
* UINT8 *DstPtr : Pointer to output image.
* const INT32 Pitch : Image stride.
* UINT32 FragQIndex : Q-index block encoded with.
* UINT32 *QuantScale : Array of quantization scale factors.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Filtering a block for de-ringing purpose.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void DeringBlockWeak_WMT
(
const POSTPROC_INSTANCE *pbi,
const UINT8 *SrcPtr,
UINT8 *DstPtr,
const INT32 Pitch,
UINT32 FragQIndex,
UINT32 *QuantScale
)
{
#if defined(_WIN32_WCE)
return;
#else
__declspec(align(16)) short UDMod[72];
__declspec(align(16)) short LRMod[128];
unsigned int PlaneLineStep = Pitch;
const unsigned char *Src = SrcPtr;
unsigned char *Des = DstPtr;
short *UDPointer = UDMod;
short *LRPointer = LRMod;
UINT32 QStep = QuantScale[FragQIndex];
INT32 Sharpen = SharpenModifier[FragQIndex];
(void) pbi;
__asm
{
push esi
push edi
mov esi, Src /* Source Pointer */
mov edi, UDPointer /* UD modifier pointer */
push ecx
push edx
mov ecx, PlaneLineStep /* Pitch Step */
xor edx, edx
push eax
push ebx
mov eax, QStep /* QValue */
mov ebx, Sharpen /* Sharpen */
movd mm0, eax /* QValue */
movd mm2, ebx /* sharpen */
push ebp
punpcklbw mm0, mm0 /* 00 00 00 QQ */
sub edx, ecx /* Negative Pitch */
punpcklbw mm2, mm2 /* 00 00 00 SS */
pxor mm7, mm7 /* clear mm7 for unpacks */
punpcklbw mm0, mm0 /* 00 00 qq qq */
mov eax, LRPointer /* Left and Right Modifier */
punpcklbw mm2, mm2 /* 00 00 ss ss */
lea ebx, [esi+ecx*8] /* Source Pointer of last row */
punpcklbw mm0, mm0 /* qq qq qq qq */
movq mm1, mm0; /* make a copy */
punpcklbw mm2, mm2 /* ss ss ss ss */
paddb mm1, mm0 /* QValue * 2 */
paddb mm1, mm0 /* High = 3 * Qvalue */
paddusb mm1, eight231c /* clamping high to 24 */
paddb mm0, eight32c /* 32+QValues */
psubusb mm1, eight231c /* Get the real value back */
movq mm3, eight127c /* 7f 7f 7f 7f 7f 7f 7f 7f */
pandn mm1, mm3 /* ClampHigh */
/* mm0,mm1,mm2,mm7 are in use */
/* mm0---> QValue+32 */
/* mm1---> ClampHigh */
/* mm2---> Sharpen */
/* mm7---> Cleared for unpack */
FillModLoop1:
movq mm3, QWORD PTR [esi] /* read 8 pixels p */
pxor xmm7, xmm7 /* clear xmm7 */
movq mm4, QWORD PTR [esi+edx] /* Pixels on top pu */
movq mm5, mm3 /* make a copy of p */
psubusb mm3, mm4 /* p-pu */
psubusb mm4, mm5 /* pu-p */
por mm3, mm4 /* abs(p-pu) */
movq mm6, mm0 /* 32+QValues */
paddusb mm3, mm3 /* 2*abs(p-pu) */
movq mm4, mm0 /* 32+QValues */
psubusb mm6, mm3 /* zero clampled TmpMod */
movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
paddb mm4, eight64c /* 32+QValues + 64 */
pxor mm4, mm5 /* convert to a sign number */
pxor mm3, mm5 /* convert to a sign number */
pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
pand mm3, mm2 /* use sharpen */
paddsb mm6, mm1 /* clamping to high */
psubsb mm6, mm1 /* offset back */
por mm6, mm3 /* Mod value to be stored */
movq mm3, QWORD PTR [esi] /* read 8 pixels p */
movq2dq xmm0, mm6
movq mm4, QWORD PTR [esi-1] /* Pixels on top pu */
punpcklbw xmm7, xmm0 /* extended to words */
movq mm5, mm3 /* make a copy of p */
psraw xmm7, 8 /* sign extended */
psubusb mm3, mm4 /* p-pu */
movdqa [edi], xmm7 /* writeout UDmod*/
psubusb mm4, mm5 /* pu-p */
por mm3, mm4 /* abs(p-pu) */
movq mm6, mm0 /* 32+QValues */
paddusb mm3, mm3 /* 2*abs(p-pu) */
movq mm4, mm0 /* 32+QValues */
psubusb mm6, mm3 /* zero clampled TmpMod */
movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
paddb mm4, eight64c /* 32+QValues + 64 */
pxor mm4, mm5 /* convert to a sign number */
pxor mm3, mm5 /* convert to a sign number */
pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
pand mm3, mm2 /* use sharpen */
paddsb mm6, mm1 /* clamping to high */
psubsb mm6, mm1 /* offset back */
por mm6, mm3 /* Mod value to be stored */
movq mm3, QWORD PTR [esi] /* read 8 pixels p */
pxor xmm7, xmm7 /* clear xmm7 */
movq mm4, QWORD PTR [esi+1] /* Pixels on top pu */
movq2dq xmm0, mm6
movq mm5, mm3 /* make a copy of p */
punpcklbw xmm7, xmm0 /* extened to shorts */
psubusb mm3, mm4 /* p-pu */
psraw xmm7, 8 /* sign extended */
psubusb mm4, mm5 /* pu-p */
movdqa [eax], xmm7 /* writeout UDmod*/
por mm3, mm4 /* abs(p-pu) */
movq mm6, mm0 /* 32+QValues */
paddusb mm3, mm3 /* 2*abs(p-pu) */
pxor xmm7, xmm7 /* clear xmm7 */
movq mm4, mm0 /* 32+QValues */
psubusb mm6, mm3 /* zero clampled TmpMod */
movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
paddb mm4, eight64c /* 32+QValues + 64 */
pxor mm4, mm5 /* convert to a sign number */
pxor mm3, mm5 /* convert to a sign number */
pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
pand mm3, mm2 /* use sharpen */
paddsb mm6, mm1 /* clamping to high */
psubsb mm6, mm1 /* offset back */
por mm6, mm3 /* Mod value to be stored */
add esi, ecx
movq2dq xmm0, mm6
add edi, 16
punpcklbw xmm7, mm0 /* extended to shorts */
add eax, 16
psraw xmm7, 8 /* sign extended */
cmp esi, ebx
movdqa [eax+112], xmm7 /* writeout UDmod*/
jne FillModLoop1
/* last UDMod */
movq mm3, QWORD PTR [esi] /* read 8 pixels p */
pxor xmm7, xmm7 /* clear xmm7 */
movq mm4, QWORD PTR [esi+edx] /* Pixels on top pu */
movq mm5, mm3 /* make a copy of p */
psubusb mm3, mm4 /* p-pu */
psubusb mm4, mm5 /* pu-p */
por mm3, mm4 /* abs(p-pu) */
movq mm6, mm0 /* 32+QValues */
paddusb mm3, mm3 /* 2*abs(p-pu) */
movq mm4, mm0 /* 32+QValues */
psubusb mm6, mm3 /* zero clampled TmpMod */
movq mm5, eight128c /* 80 80 80 80 80 80 80 80 */
paddb mm4, eight64c /* 32+QValues + 64 */
pxor mm4, mm5 /* convert to a sign number */
pxor mm3, mm5 /* convert to a sign number */
pcmpgtb mm3, mm4 /* 32+QValue- 2*abs(p-pu) <-64 ? */
pand mm3, mm2 /* use sharpen */
paddsb mm6, mm1 /* clamping to high */
psubsb mm6, mm1 /* offset back */
por mm6, mm3 /* Mod value to be stored */
movq2dq xmm6, mm6
punpcklbw xmm7, xmm6 /* 03 xx 02 xx 01 xx 00 xx */
psraw xmm7, 8 /* sign extended */
movdqa [edi], xmm7 /* writeout UDmod */
mov esi, Src
mov edi, Des
mov eax, UDPointer
mov ebx, LRPointer
mov ebp, 8
FilterLoop1:
movq xmm0, QWORD PTR [esi+edx] /* mm0 = Pixels above */
pxor xmm7, xmm7 /* clear mm7 */
movdqa xmm4, [eax] /* au */
punpcklbw xmm0, xmm7 /* extended to shorts */
movq xmm2, QWORD PTR [esi+ecx] /* mm2 = pixels below */
pmullw xmm0, xmm4 /* pu*au */
movdqa xmm6, [eax+16] /* ad */
punpcklbw xmm2, xmm7 /* extened to shorts*/
movq xmm1, QWORD PTR [esi-1] /* pixel to the left */
pmullw xmm2, xmm6 /* ad*pd */
movdqa xmm3, [ebx] /* al */
punpcklbw xmm1, xmm7 /* extended to shorts */
movq xmm5, QWORD PTR [esi+1] /* pixel to the right */
pmullw xmm1, xmm3 /* al * pl */
paddw xmm4, xmm6 /* au+ad */
punpcklbw xmm5, xmm7 /* extends to shorts */
movdqa xmm6, [ebx+128] /* ar */
pmullw xmm5, xmm6 /* ar * pr */
paddw xmm0, xmm2 /* au*pu + ad*pd */
paddw xmm4, xmm3 /* au+ad+al */
paddw xmm0, xmm1 /* au*pu+ad*pd+al*pl */
paddw xmm4, xmm6 /* au+ad+al+ar */
movq xmm2, QWORD PTR [esi] /* p */
paddw xmm0, xmm5 /* au*pu+ad*pd+al*pl+ar*pr */
/* xmm0 --- au*pu+ad*pd+al*pl+ar*pr */
/* xmm4 --- au + ad + al + ar */
movdqa xmm1, eight128s /* 0080 0080 0080 0080 0080 0080 0080 0080 */
punpcklbw xmm2, xmm7 /* extended to shorts */
psubw xmm1, xmm4 /* 128-(au+ad+al+ar) */
pmullw xmm2, xmm1 /* p*(128-(au+ad+al+ar)) */
add esi, ecx /* Src += Pitch */
movdqa xmm6, eight64s /* 64, 64, 64, 64, 64, 64, 64, 64 */
movdqa xmm7, xmm6 /* 64, 64, 64, 64, 64, 64, 64, 64 */
add eax, 16 /* UDPointer += 8 */
psllw xmm7, 8 /* {16384, .. } */
paddw xmm0, xmm2 /* sum */
add edi, ecx /* Des += Pitch */
paddw xmm0, xmm6 /* sum+B */
add ebx, 16 /* LPointer +=8 */
paddw xmm0, xmm7 /* clamping */
psubusw xmm0, xmm7 /* clamping */
dec ebp
psrlw xmm0, 7 /* (sum+B)>>7 */
packuswb xmm0, xmm7 /* pack to 8 bytes */
movq QWORD PTR [edi+edx], xmm0 /* write to destination */
jnz FilterLoop1
pop ebp
pop ebx
pop eax
pop edx
pop ecx
pop edi
pop esi
}
#endif
}

View file

@ -0,0 +1,211 @@
/****************************************************************************
*
* Module Title : SystemDependant.c
*
* Description : Miscellaneous system dependant functions
*
****************************************************************************/
/****************************************************************************
* Header Files
****************************************************************************/
#include "postp.h"
/****************************************************************************
* Imports
*****************************************************************************/
extern void GetProcessorFlags(INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled);
// c imports
extern void FilteringVert_12_C(UINT32 QValue,UINT8 * Src, INT32 Pitch);
extern void FilteringHoriz_12_C(UINT32 QValue, UINT8 * Src, INT32 Pitch );
extern void FilteringVert_8_C(UINT32 QValue, UINT8 * Src, INT32 Pitch );
extern void FilteringHoriz_8_C(UINT32 QValue, UINT8 * Src, INT32 Pitch );
extern void HorizontalLine_1_2_Scale_C( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
extern void HorizontalLine_3_5_Scale_C( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
extern void HorizontalLine_4_5_Scale_C( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
extern void VerticalBand_4_5_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
extern void LastVerticalBand_4_5_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
extern void VerticalBand_3_5_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
extern void LastVerticalBand_3_5_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
extern void VerticalBand_1_2_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
extern void LastVerticalBand_1_2_Scale_C( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
extern void FilterHoriz_Simple_C( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
extern void FilterVert_Simple_C( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
extern void FilterHoriz_Generic( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
extern void FilterVert_Generic( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
extern INT32 *SetupBoundingValueArray_Generic( POSTPROC_INSTANCE *pbi, INT32 FLimit );
extern INT32 *SetupDeblockValueArray_Generic( POSTPROC_INSTANCE *pbi, INT32 FLimit );
extern void DeringBlockWeak_C( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
extern void DeringBlockStrong_C( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale);
extern void DeblockLoopFilteredBand_C( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
extern void DeblockNonFilteredBand_C( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
extern void DeblockNonFilteredBandNewFilter_C( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
extern void PlaneAddNoise_C( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
// mmx imports
extern void FilteringVert_12_MMX(UINT32 QValue,UINT8 * Src, INT32 Pitch);
extern void FilteringHoriz_12_MMX(UINT32 QValue, UINT8 * Src, INT32 Pitch );
extern void FilteringVert_8_MMX(UINT32 QValue, UINT8 * Src, INT32 Pitch );
extern void FilteringHoriz_8_MMX(UINT32 QValue, UINT8 * Src, INT32 Pitch );
extern void HorizontalLine_1_2_Scale_MMX( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
extern void HorizontalLine_3_5_Scale_MMX( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
extern void HorizontalLine_4_5_Scale_MMX( const unsigned char * source, unsigned int sourceWidth, unsigned char * dest, unsigned int destWidth );
extern void VerticalBand_4_5_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
extern void LastVerticalBand_4_5_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
extern void VerticalBand_3_5_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
extern void LastVerticalBand_3_5_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
extern void VerticalBand_1_2_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
extern void LastVerticalBand_1_2_Scale_MMX( unsigned char * dest, unsigned int destPitch, unsigned int destWidth );
extern void FilterHoriz_Simple_MMX( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
extern void FilterVert_Simple_MMX( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
extern void FilterHoriz_MMX( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
extern void FilterVert_MMX( POSTPROC_INSTANCE *pbi, UINT8 * PixelPtr, INT32 LineLength, INT32 *BoundingValuePtr );
extern INT32 *SetupBoundingValueArray_ForMMX( POSTPROC_INSTANCE *pbi, INT32 FLimit );
extern INT32 *SetupDeblockValueArray_ForMMX( POSTPROC_INSTANCE *pbi, INT32 FLimit );
extern void DeringBlockWeak_MMX( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
extern void DeringBlockStrong_MMX( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale);
extern void DeblockLoopFilteredBand_MMX( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
extern void DeblockNonFilteredBand_MMX( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
extern void DeblockNonFilteredBandNewFilter_MMX( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
extern void FillLoopFilterLimitValues_MMX(void);
extern INT16 *LoopFilterLimitValuesV2_MMX;
extern void PlaneAddNoise_mmx( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
// wmt imports
extern void DeblockLoopFilteredBand_WMT( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
extern void DeblockNonFilteredBand_WMT( POSTPROC_INSTANCE *pbi, UINT8 *SrcPtr, UINT8 *DesPtr, UINT32 PlaneLineStep, UINT32 FragsAcross, UINT32 StartFrag, UINT32 *QuantScale );
extern void DeringBlockWeak_WMT( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
extern void DeringBlockStrong_WMT( const POSTPROC_INSTANCE *pbi, const UINT8 *SrcPtr, UINT8 *DstPtr, const INT32 Pitch, UINT32 FragQIndex, UINT32 *QuantScale );
extern void CFastDeInterlace(UINT8 * SrcPtr,UINT8 * DstPtr,INT32 Width,INT32 Height,INT32 Stride);
extern void MmxFastDeInterlace(UINT8 * SrcPtr,UINT8 * DstPtr,INT32 Width,INT32 Height,INT32 Stride);
extern void WmtFastDeInterlace(UINT8 * SrcPtr,UINT8 * DstPtr,INT32 Width,INT32 Height,INT32 Stride);
extern void ClampLevels_C( POSTPROC_INSTANCE *pbi,INT32 BlackClamp,INT32 WhiteClamp,UINT8 *Src,UINT8 *Dst);
extern void ClampLevels_wmt( POSTPROC_INSTANCE *pbi,INT32 BlackClamp,INT32 WhiteClamp,UINT8 *Src,UINT8 *Dst);
extern void ClampLevels_mmx( POSTPROC_INSTANCE *pbi,INT32 BlackClamp,INT32 WhiteClamp,UINT8 *Src,UINT8 *Dst);
extern void PlaneAddNoise_wmt( UINT8 *Start, UINT32 Width, UINT32 Height, INT32 Pitch, int q);
/****************************************************************************
*
* ROUTINE : PostProcMachineSpecificConfig
*
* INPUTS : UINT32 Version : Codec version number.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Checks for machine specifc features such as MMX support
* sets appropriate flags and function pointers.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void PostProcMachineSpecificConfig ( UINT32 Version )
{
// If MMX supported then set to use MMX versions of functions else
// use original 'C' versions.
INT32 MmxEnabled;
INT32 XmmEnabled;
INT32 WmtEnabled;
GetProcessorFlags( &MmxEnabled, &XmmEnabled, &WmtEnabled );
if ( WmtEnabled )
{
// Willamette
FillLoopFilterLimitValues_MMX();
FilterHoriz = FilterHoriz_MMX;
FilterVert = FilterVert_MMX;
SetupBoundingValueArray = SetupBoundingValueArray_ForMMX;
SetupDeblockValueArray = SetupDeblockValueArray_ForMMX;
DeringBlockWeak = DeringBlockWeak_WMT;
DeringBlockStrong = DeringBlockStrong_WMT;
DeblockLoopFilteredBand = DeblockLoopFilteredBand_WMT;
DeblockNonFilteredBand = DeblockNonFilteredBand_WMT;
DeblockNonFilteredBandNewFilter = DeblockNonFilteredBandNewFilter_MMX;
FilterHoriz_Simple = FilterHoriz_Simple_MMX;
FilterVert_Simple = FilterVert_Simple_MMX;
HorizontalLine_1_2_Scale = HorizontalLine_1_2_Scale_MMX;
HorizontalLine_3_5_Scale = HorizontalLine_3_5_Scale_MMX;
HorizontalLine_4_5_Scale = HorizontalLine_4_5_Scale_MMX;
VerticalBand_1_2_Scale = VerticalBand_1_2_Scale_MMX;
LastVerticalBand_1_2_Scale = LastVerticalBand_1_2_Scale_MMX;
VerticalBand_3_5_Scale = VerticalBand_3_5_Scale_MMX;
LastVerticalBand_3_5_Scale = LastVerticalBand_3_5_Scale_MMX;
VerticalBand_4_5_Scale = VerticalBand_4_5_Scale_MMX;
LastVerticalBand_4_5_Scale = LastVerticalBand_4_5_Scale_MMX;
FilteringHoriz_8 = FilteringHoriz_8_MMX;
FilteringVert_8 = FilteringVert_8_MMX;
FilteringHoriz_12 = FilteringHoriz_12_MMX;
FilteringVert_12 = FilteringVert_12_MMX;
FastDeInterlace = WmtFastDeInterlace;
ClampLevels = ClampLevels_wmt;
PlaneAddNoise = PlaneAddNoise_wmt;
}
else if ( MmxEnabled )
{
FillLoopFilterLimitValues_MMX();
FilterHoriz = FilterHoriz_MMX;
FilterVert = FilterVert_MMX;
SetupBoundingValueArray = SetupBoundingValueArray_ForMMX;
SetupDeblockValueArray = SetupDeblockValueArray_ForMMX;
DeringBlockWeak = DeringBlockWeak_MMX;
DeringBlockStrong = DeringBlockStrong_MMX;
DeblockLoopFilteredBand = DeblockLoopFilteredBand_MMX;
DeblockNonFilteredBand = DeblockNonFilteredBand_MMX;
DeblockNonFilteredBandNewFilter = DeblockNonFilteredBandNewFilter_MMX;
FilterHoriz_Simple = FilterHoriz_Simple_MMX;
FilterVert_Simple = FilterVert_Simple_MMX;
HorizontalLine_1_2_Scale = HorizontalLine_1_2_Scale_MMX;
HorizontalLine_3_5_Scale = HorizontalLine_3_5_Scale_MMX;
HorizontalLine_4_5_Scale = HorizontalLine_4_5_Scale_MMX;
VerticalBand_1_2_Scale = VerticalBand_1_2_Scale_MMX;
LastVerticalBand_1_2_Scale = LastVerticalBand_1_2_Scale_MMX;
VerticalBand_3_5_Scale = VerticalBand_3_5_Scale_MMX;
LastVerticalBand_3_5_Scale = LastVerticalBand_3_5_Scale_MMX;
VerticalBand_4_5_Scale = VerticalBand_4_5_Scale_MMX;
LastVerticalBand_4_5_Scale = LastVerticalBand_4_5_Scale_MMX;
FilteringHoriz_8 = FilteringHoriz_8_MMX;
FilteringVert_8 = FilteringVert_8_MMX;
FilteringHoriz_12 = FilteringHoriz_12_MMX;
FilteringVert_12 = FilteringVert_12_MMX;
FastDeInterlace = MmxFastDeInterlace;
ClampLevels = ClampLevels_mmx;
PlaneAddNoise = PlaneAddNoise_mmx;
}
else
{
FilterHoriz = FilterHoriz_Generic;
FilterVert = FilterVert_Generic;
SetupBoundingValueArray = SetupBoundingValueArray_Generic;
SetupDeblockValueArray = SetupDeblockValueArray_Generic;
DeringBlockWeak = DeringBlockWeak_C;
DeringBlockStrong = DeringBlockStrong_C;
DeblockLoopFilteredBand = DeblockLoopFilteredBand_C;
DeblockNonFilteredBand = DeblockNonFilteredBand_C;
DeblockNonFilteredBandNewFilter = DeblockNonFilteredBandNewFilter_C;
FilterHoriz_Simple = FilterHoriz_Simple_C;
FilterVert_Simple = FilterVert_Simple_C;
HorizontalLine_1_2_Scale = HorizontalLine_1_2_Scale_C;
VerticalBand_1_2_Scale = VerticalBand_1_2_Scale_C;
LastVerticalBand_1_2_Scale = LastVerticalBand_1_2_Scale_C;
HorizontalLine_3_5_Scale = HorizontalLine_3_5_Scale_C;
VerticalBand_3_5_Scale = VerticalBand_3_5_Scale_C;
LastVerticalBand_3_5_Scale = LastVerticalBand_3_5_Scale_C;
HorizontalLine_4_5_Scale = HorizontalLine_4_5_Scale_C;
VerticalBand_4_5_Scale = VerticalBand_4_5_Scale_C;
LastVerticalBand_4_5_Scale = LastVerticalBand_4_5_Scale_C;
FilteringHoriz_8 = FilteringHoriz_8_C;
FilteringVert_8 = FilteringVert_8_C;
FilteringHoriz_12 = FilteringHoriz_12_C;
FilteringVert_12 = FilteringVert_12_C;
FastDeInterlace = CFastDeInterlace;
ClampLevels = ClampLevels_C;
PlaneAddNoise = PlaneAddNoise_C;
}
}

View file

@ -0,0 +1,540 @@
/****************************************************************************
*
* Module Title : loopf_asm.c
*
* Description : Optimized version of the loop filter.
*
****************************************************************************/
#define STRICT /* Strict type checking */
/****************************************************************************
* Header Frames
****************************************************************************/
#include <stdio.h>
#include <memory.h>
#include "postp.h"
/****************************************************************************
* Macros
****************************************************************************/
#pragma warning (disable:4799)
#pragma warning (disable:4731)
#define LIMIT_OFFSET 0
#define FOURONES_OFFSET 8
#define LFABS_OFFSET 16
#define TRANS_OFFSET 24
/****************************************************************************
*
* ROUTINE : SetupBoundingValueArray_ForMMX
*
* INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processing instance.
* INT32 FLimit : Filter limiting value.
*
* OUTPUTS : None.
*
* RETURNS : INT32*: Pointer to bounding value array.
*
* FUNCTION : Sets up bounding value array used in filtering operations.
*
* SPECIAL NOTES : None.
*
****************************************************************************/
INT32 *SetupBoundingValueArray_ForMMX ( POSTPROC_INSTANCE *pbi, INT32 FLimit )
{
INT32 *BoundingValuePtr;
/* Since the FiltBoundingValue array is currently only used in the generic */
/* version, we are going to reuse this memory for our own purposes. */
/* 2 longs for limit, 2 longs for _4ONES, 2 longs for LFABS_MMX, and */
/* 8 longs for temp work storage */
BoundingValuePtr = (INT32 *)((UINT32)(&pbi->FiltBoundingValue[256]) & 0xffffffe0);
// expand for mmx code
BoundingValuePtr[0] = BoundingValuePtr[1] = FLimit * 0x00010001;
BoundingValuePtr[2] = BoundingValuePtr[3] = 0x00010001;
BoundingValuePtr[4] = BoundingValuePtr[5] = 0x00040004;
return BoundingValuePtr;
}
/****************************************************************************
*
* ROUTINE : FilterHoriz_MMX
*
* INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processing instance.
* UINT8 *PixelPtr : Pointer to input frame.
* INT32 LineLength : Length of line in input frame.
* INT32 *BoundingValuePtr : Pointer to bouning value array.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies a loop filter to the vertical edge (i.e. horizontally).
*
* SPECIAL NOTES : This version attempts to fix the DC_misalign stalls.
*
****************************************************************************/
void FilterHoriz_MMX
(
POSTPROC_INSTANCE *pbi,
UINT8 *PixelPtr,
INT32 LineLength,
INT32 *BoundingValuePtr
)
{
(void) pbi;
/* A somewhat optimized MMX version of the left edge filter. */
__asm
{
mov eax,[BoundingValuePtr]
mov edx,[LineLength] //stride
mov ebx,[PixelPtr]
mov ecx,[LineLength] //stride
movd mm0,[ebx + -2] //xx xx xx xx 01 00 xx xx
;-
movd mm4,[ebx + 2] //xx xx xx xx xx xx 03 02
psrld mm0,16 //xx xx xx xx 00 00 01 00
movd mm1,[ebx + ecx + -2] //xx xx xx xx 11 10 xx xx
punpcklwd mm0,mm4 //xx xx xx xx 03 02 01 00
movd mm4,[ebx + ecx + 2] //xx xx xx xx xx xx 13 12
psrld mm1,16 //xx xx xx xx 00 00 11 10
punpcklwd mm1,mm4 //xx xx xx xx 13 12 11 10
lea edx,[edx + edx*2] //stride * 3
movd mm2,[ebx + ecx*2 + -2] //xx xx xx xx 21 20 xx xx
punpcklbw mm0,mm1 //13 03 12 02 11 01 10 00
movd mm4,[ebx + ecx*2 + 2] //xx xx xx xx xx xx 23 22
psrld mm2,16 //xx xx xx xx 00 00 21 20
movd mm1,[ebx + edx + -2] //xx xx xx xx 31 30 xx xx
punpcklwd mm2,mm4 //xx xx xx xx 23 22 21 20
movd mm4,[ebx + edx + 2] //xx xx xx xx xx xx 33 32
psrld mm1,16 //xx xx xx xx 00 00 31 30
punpcklwd mm1,mm4 //xx xx xx xx 33 32 31 30
pxor mm4,mm4
punpcklbw mm2,mm1 //33 23 32 22 31 21 30 20
movq mm1,mm0
punpcklwd mm0,mm2 //31 21 11 01 30 20 10 00
lea ebx,[ebx + ecx*4] //base + (stride * 4)
punpckhwd mm1,mm2 //33 23 13 03 32 22 12 02
movq mm6,mm0 //xx xx xx xx 30 20 10 00
movq [eax + TRANS_OFFSET + 0],mm0
movq mm2,mm1
movq [eax + TRANS_OFFSET + 8],mm1
psrlq mm0,32 //xx xx xx xx 31 21 11 01
;-----------
movd mm7,[ebx + -2] //xx xx xx xx 41 40 xx xx
punpcklbw mm1,mm4 //convert to words
movd mm4,[ebx + 2] //xx xx xx xx xx xx 43 42
psrld mm7,16 //xx xx xx xx 00 00 41 40
movd mm5,[ebx + ecx + -2] //xx xx xx xx 51 50 xx xx
punpcklwd mm7,mm4 //xx xx xx xx 43 42 41 40
movd mm4,[ebx + ecx + 2] //xx xx xx xx xx xx 53 52
psrld mm5,16
punpcklwd mm5,mm4
pxor mm4,mm4
punpcklbw mm0,mm4
;-
psrlq mm2,32 //xx xx xx xx 33 23 13 03
psubw mm1,mm0 //x = p[0] - p[ms]
punpcklbw mm7,mm5 //53 43 52 42 51 41 50 40
movq mm3,mm1
;-------------------
punpcklbw mm6,mm4
paddw mm3,mm1
punpcklbw mm2,mm4
paddw mm1,mm3
paddw mm1,[eax + LFABS_OFFSET] //x += LoopFilterAdjustBeforeShift
psubw mm6,mm2
movd mm2,[ebx + ecx*2 + -2] //xx xx xx xx 61 60 xx xx
paddw mm6,mm1
movd mm4,[ebx + ecx*2 + 2] //xx xx xx xx xx xx 63 62
psrld mm2,16
movd mm5,[ebx + edx + -2] //xx xx xx xx 71 70 xx xx
punpcklwd mm2,mm4 //xx xx xx xx 63 62 61 60
movd mm4,[ebx + edx + 2] //xx xx xx xx xx xx 73 72
psrld mm5,16 //xx xx xx xx 00 00 71 70
mov ebx,[PixelPtr] //restore PixelPtr
punpcklwd mm5,mm4 //xx xx xx xx 73 72 71 70
psraw mm6,3 //values to be clipped
pxor mm4,mm4
punpcklbw mm2,mm5 //73 63 72 62 71 61 70 60
movq mm5,mm7 //53 43 52 42 51 41 50 40
movq mm1,mm6
punpckhwd mm5,mm2 //73 63 53 43 72 62 52 42
movq [eax + TRANS_OFFSET + 24],mm5 //save for later
punpcklwd mm7,mm2 //71 61 51 41 70 60 50 40
movq [eax + TRANS_OFFSET + 16],mm7 //save for later
psraw mm6,15
movq mm2,[eax + LIMIT_OFFSET] //get the limit value
movq mm0,mm7 //xx xx xx xx 70 60 50 41
psrlq mm7,32 //xx xx xx xx 71 61 51 41
pxor mm1,mm6
psubsw mm1,mm6 //abs(i)
punpcklbw mm5,mm4
por mm6,[eax + FOURONES_OFFSET] //now have -1 or 1
movq mm3,mm2
punpcklbw mm7,mm4
psubw mm3,mm1 //limit - abs(i)
movq mm4,mm3
psraw mm3,15
push ebp
;-
psubw mm5,mm7 //x = p[0] - p[ms]
pxor mm4,mm3
psubsw mm4,mm3 //abs(limit - abs(i))
pxor mm3,mm3
movq mm1,[eax + TRANS_OFFSET + 28] //xx xx xx xx 73 63 53 43
psubusw mm2,mm4 //limit - abs(limit - abs(i))
punpcklbw mm0,mm3
movq mm7,mm5
paddw mm7,mm5
pmullw mm2,mm6 //new y -- wait 3 cycles
punpcklbw mm1,mm3
paddw mm5,mm7
paddw mm5,[eax + LFABS_OFFSET] //x += LoopFilterAdjustBeforeShift
psubw mm0,mm1
paddw mm0,mm5
pxor mm6,mm6
movd mm7,[eax + TRANS_OFFSET + 8] //xx xx xx xx 32 22 12 02
psraw mm0,3 //values to be clipped
movd mm3,[eax + TRANS_OFFSET + 4] //xx xx xx xx 31 21 11 01
punpcklbw mm7,mm6
psubw mm7,mm2 //p[ms] + y
punpcklbw mm3,mm6
paddw mm3,mm2 //p[0] - y
packuswb mm7,mm7 //clamp[ p[ms] + y]
packuswb mm3,mm3 //clamp[ p[0] - y]
movq mm1,mm0
movq mm2,[eax + LIMIT_OFFSET] //get the limit value
psraw mm0,15
//values to write out
punpcklbw mm3,mm7 //32 31 22 21 12 11 02 01
movq mm7,mm0 //save sign
movd ebp,mm3 //12 11 02 01
pxor mm1,mm0
//xor bp,bp
mov WORD PTR[ebx + 1],bp //02 01
psubsw mm1,mm0 //abs(i)
shr ebp,16
movq mm5,mm2
mov WORD PTR[ebx + ecx + 1],bp
psrlq mm3,32 //xx xx xx xx 32 31 22 21
por mm7,[eax + FOURONES_OFFSET] //now have -1 or 1
psubw mm5,mm1 //limit - abs(i)
movd ebp,mm3 //32 31 22 21
movq mm4,mm5
mov [ebx + ecx*2 + 1],bp
psraw mm5,15
shr ebp,16
pxor mm4,mm5
mov [ebx + edx + 1],bp
psubsw mm4,mm5 //abs(limit - abs(i))
movd mm5,[eax + TRANS_OFFSET + 24] //xx xx xx xx 72 62 52 42
psubusw mm2,mm4 //limit - abs(limit - abs(i))
pmullw mm2,mm7 //new y
pxor mm6,mm6
movd mm3,[eax + TRANS_OFFSET + 20] //xx xx xx xx 71 61 51 41
punpcklbw mm5,mm6
lea ebx,[ebx + ecx*4]
punpcklbw mm3,mm6
paddw mm3,mm2 //p[ms] + y
psubw mm5,mm2 //p[0] - y
packuswb mm3,mm3 //clamp[ p[ms] + y]
pop ebp
;-
//
//NOTE: optimize the following somehow
//
packuswb mm5,mm5 //clamp[ p[0] - y]
;-
punpcklbw mm3,mm5 //72 71 62 61 52 51 42 41
;-
movd eax,mm3 //52 51 42 41
psrlq mm3,32 //xx xx xx xx 72 71 62 61
mov [ebx + 1],ax
;-
shr eax,16
;-
mov [ebx + ecx + 1],ax
;-
movd eax,mm3
;-
mov [ebx + ecx*2 + 1],ax
;-
shr eax,16
;-
mov [ebx + edx + 1],ax
;-
}
}
/****************************************************************************
*
* ROUTINE : FilterVert_MMX
*
* INPUTS : POSTPROC_INSTANCE *pbi : Pointer to post-processing instance.
* UINT8 *PixelPtr : Pointer to input frame.
* INT32 LineLength : Length of line in input frame.
* INT32 *BoundingValuePtr : Pointer to bouning value array.
*
* OUTPUTS : None.
*
* RETURNS : void
*
* FUNCTION : Applies a loop filter to the horizontal edge (i.e. vertically).
*
* SPECIAL NOTES : This version attempts to fix the DC_misalign stalls.
*
****************************************************************************/
void FilterVert_MMX
(
POSTPROC_INSTANCE *pbi,
UINT8 *PixelPtr,
INT32 LineLength,
INT32 *BoundingValuePtr
)
{
INT32 ms = -LineLength;
(void) pbi;
/* A somewhat optimized MMX version of the top edge filter. */
__asm
{
mov eax,[BoundingValuePtr]
;-
mov ebx,[PixelPtr]
mov ecx,[ms] //negative stride
movd mm1,[ebx + 0] //p[0]
pxor mm4,mm4
movd mm0,[ebx + ecx] //get row above -- p[ms]
punpcklbw mm1,mm4 //convert to words
mov edx,[LineLength]
punpcklbw mm0,mm4
movd mm6,[ebx + ecx*2] //p[ms2]
psubw mm1,mm0 //x = p[0] - p[ms]
movq mm2,[ebx + edx] //p[stride]
movq mm3,mm1
punpcklbw mm6,mm4
paddw mm3,mm1
punpcklbw mm2,mm4
paddw mm1,mm3
paddw mm1,[eax + LFABS_OFFSET] //x += LoopFilterAdjustBeforeShift
psubw mm6,mm2
movq mm2,[eax + LIMIT_OFFSET] //get the limit value
paddw mm6,mm1
movd mm5,[ebx + 4] //p[0]
psraw mm6,3 //values to be clipped
movq mm1,mm6
psraw mm6,15
movd mm7,[ebx + ecx + 4] //p[ms]
pxor mm1,mm6
psubsw mm1,mm6 //abs(i)
pxor mm0,mm0
punpcklbw mm5,mm0
movq mm3,mm2
por mm6,[eax + FOURONES_OFFSET] //now have -1 or 1
punpcklbw mm7,mm0
psubw mm3,mm1 //limit - abs(i)
psubw mm5,mm7 //x = p[0] - p[ms]
movq mm4,mm3
psraw mm3,15
movd mm0,[ebx + ecx*2 + 4] //p[ms2]
pxor mm4,mm3
movd mm1,[ebx + edx +4] //p[stride]
psubsw mm4,mm3 //abs(limit - abs(i))
pxor mm3,mm3
psubusw mm2,mm4 //limit - abs(limit - abs(i))
punpcklbw mm0,mm3
movq mm7,mm5
paddw mm7,mm5
pmullw mm2,mm6 //new y -- wait 3 cycles
punpcklbw mm1,mm3
paddw mm5,mm7
paddw mm5,[eax + LFABS_OFFSET] //x += LoopFilterAdjustBeforeShift
psubw mm0,mm1
paddw mm0,mm5
pxor mm6,mm6
movd mm7,[ebx + 0] //p[0]
psraw mm0,3 //values to be clipped
movd mm3,[ebx + ecx] //get row above -- p[ms]
punpcklbw mm7,mm6
psubw mm7,mm2 //p[ms] + y
punpcklbw mm3,mm6
paddw mm3,mm2 //p[0] - y
packuswb mm7,mm7 //clamp[ p[ms] + y]
packuswb mm3,mm3 //clamp[ p[0] - y]
movq mm1,mm0
movd [ebx + 0],mm7 //write p[0]
psraw mm0,15
movq mm7,mm0 //save sign
pxor mm1,mm0
;
;
movq mm2,[eax + LIMIT_OFFSET] //get the limit value
;
;
psubsw mm1,mm0 //abs(i)
movq mm5,mm2
por mm7,[eax + FOURONES_OFFSET] //now have -1 or 1
psubw mm5,mm1 //limit - abs(i)
movq mm4,mm5
psraw mm5,15
movd [ebx + ecx],mm3 //write p[ms]
pxor mm4,mm5
psubsw mm4,mm5 //abs(limit - abs(i))
pxor mm6,mm6
movd mm5,[ebx + 4] //p[0]
psubusw mm2,mm4 //limit - abs(limit - abs(i))
movd mm3,[ebx + ecx + 4] //p[ms]
pmullw mm2,mm7 //new y
punpcklbw mm5,mm6
;-
punpcklbw mm3,mm6
;-
paddw mm3,mm2 //p[ms] + y
psubw mm5,mm2 //p[0] - y
packuswb mm3,mm3 //clamp[ p[ms] + y]
;-
packuswb mm5,mm5 //clamp[ p[0] - y]
;-
movd [ebx + ecx + 4],mm3 //write p[ms]
;
movd [ebx + 4],mm5 //write p[0]
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,733 @@
/****************************************************************************
*
* Module Title : simpledeblock_asm.c
*
* Description : Simple deblocking filter for low end machines
*
***************************************************************************/
#define STRICT /* Strict type checking */
/****************************************************************************
* Header Files
****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "postp.h"
/****************************************************************************
* Module Statics
****************************************************************************/
__declspec(align(16)) const unsigned char eightNOnes[]= {255, 255, 255, 255, 255, 255, 255, 255};
__declspec(align(16)) const short fourFours[] = {4, 4, 4, 4};
__declspec(align(16)) const short fourOnes[] = { 1, 1, 1, 1};
__declspec(align(16)) const unsigned char eightFours[] = {4, 4, 4, 4, 4, 4, 4, 4};
__declspec(align(16)) const unsigned char eightOnes[] = {1, 1, 1, 1, 1, 1, 1, 1};
__declspec(align(16)) const unsigned char eight128s[] = {128, 128, 128, 128, 128, 128, 128, 128};
/****************************************************************************
* Imports
****************************************************************************/
extern UINT32 LoopFilterLimitValuesV1[];
extern UINT32 *DeblockLimitValuesV2;
/****************************************************************************
*
* ROUTINE : FilterHoriz_Simple_MMX
*
* INPUTS : None
*
* OUTPUTS : None
*
* RETURNS : None
*
* FUNCTION : Applies a loop filter to the vertical edge horizontally
*
* SPECIAL NOTES :
*
*
* ERRORS : None.
*
****************************************************************************/
void FilterHoriz_Simple_MMX(
POSTPROC_INSTANCE *pbi,
UINT8 * PixelPtr,
INT32 LineLength,
INT32 *BoundingValuePtr
)
{
/*************************************************************
The following code in comments is the C version of the
function, provided here for reference
*************************************************************
INT32 j;
INT32 FiltVal;
UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
for ( j = 0; j < 8; j++ )
{
INT32 UseHighVariance;
FiltVal = ( PixelPtr[2] * 3 ) -
( PixelPtr[1] * 3 );
UseHighVariance = abs(PixelPtr[0] - PixelPtr[1]) > 1 ||
abs(PixelPtr[2] - PixelPtr[3]) > 1;
if(UseHighVariance)
{
FiltVal += ( PixelPtr[0] ) -
( PixelPtr[3] );
}
FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
PixelPtr[1] = LimitTable[(INT32)PixelPtr[1] + FiltVal];
PixelPtr[2] = LimitTable[(INT32)PixelPtr[2] - FiltVal];
if(!UseHighVariance)
{
FiltVal >>= 1;
PixelPtr[0] = LimitTable[(INT32)PixelPtr[0] + FiltVal];
PixelPtr[3] = LimitTable[(INT32)PixelPtr[3] - FiltVal];
}
PixelPtr += LineLength;
}
************************************************************/
UINT32 FLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
__declspec(align(16)) unsigned char WorkingBuffer[32];
(void)BoundingValuePtr;
__asm
{
mov eax, FLimit // Flimit
xor ecx, ecx // clear ecx
mov edx, LineLength // pitch
mov esi, PixelPtr // src and des pointer
sub ecx, edx // negative pitch
lea esi, [esi + edx] // next line
movd mm0, [esi + ecx + -2] // xx xx xx xx 01 00 xx xx
movd mm4, [esi + -2] // xx xx xx xx 11 10 xx xx
movd mm2, [esi + ecx + 2] // xx xx xx xx xx xx 03 02
punpcklbw mm0, mm4 // 11 01 10 00 xx xx xx xx
movd mm3, [esi + 2] // xx xx xx xx xx xx 13 12
punpcklbw mm2, mm3 // xx xx xx xx 13 03 12 02
movd mm1, [esi+ edx + -2] // xx xx xx xx 21 20 xx xx
movd mm5, [esi+ edx *2 + -2] // xx xx xx xx 31 30 xx xx
movd mm6, [esi+ edx + 2] // xx xx xx xx xx xx 23 22
punpcklbw mm1, mm5 // 31 21 30 20 xx xx xx xx
movd mm7, [esi+ edx*2 + 2] // xx xx xx xx xx xx 33 32
punpckhwd mm0, mm1 // 31 21 11 01 30 20 10 00
punpcklbw mm6, mm7 // xx xx xx xx 33 23 32 22
lea edi, WorkingBuffer
punpcklwd mm2, mm6 // 33 23 13 03 32 22 12 02
lea esi, [esi+edx*4] // four lines below
movd mm4, [esi+ecx + -2] // xx xx xx xx 41 40 xx xx
movd mm1, [esi + -2] // xx xx xx xx 51 50 xx xx
movd mm3, [esi+ecx + 2] // xx xx xx xx xx xx 43 42
punpcklbw mm4, mm1 // 51 41 50 40 xx xx xx xx
movd mm6, [esi + 2] // xx xx xx xx xx xx 53 52
movd mm1, [esi + edx + -2] // xx xx xx xx 61 60 xx xx
punpcklbw mm3, mm6 // xx xx xx xx 53 43 52 42
movq mm5, [esi + edx*2 -2] // xx xx xx xx 71 70 xx xx
movq mm6, [esi + edx +2] // xx xx xx xx xx xx 63 62
punpcklbw mm1, mm5 // 71 61 70 60 xx xx xx xx
movq mm7, [esi + edx*2 + 2] // xx xx xx xx xx xx 73 72
punpckhwd mm4, mm1 // 71 61 51 41 70 60 50 40
punpcklbw mm6, mm7 // xx xx xx xx 73 63 72 62
movq mm1, mm0 // 31 21 11 01 30 20 10 00
punpcklwd mm3, mm6 // 73 63 53 43 72 62 52 42
movq mm7, mm2 // 33 23 13 03 32 22 12 02
punpckldq mm0, mm4 // 70 60 50 40 30 20 10 00
movq [edi], mm0 // save p[0]
punpckhdq mm1, mm4 // 71 61 51 41 31 21 11 01
movq mm4, mm0 // copy of p[0]
movq [edi+8], mm1 // save p[1]
punpckldq mm2, mm3 // 72 62 52 42 32 22 12 02
movq mm5, mm1 // copy of p[1]
movq [edi+16], mm2 // save p[2]
punpckhdq mm7, mm3 // 73 63 53 43 33 23 13 03
movq mm6, mm2 // copy of p[2]
movq [edi+24], mm7 // save p[3]
// mm0, 4 ---> p[0]
// mm1, 5 ---> p[1]
// mm2, 6 ---> p[2]
// mm7, 3 ---> p[3]
movq mm1, eightNOnes // mm1 = FFFFFFFFFFFFFFFFF
psubb mm0, mm5 // p[0]-p[1]
movq mm7, eightOnes // mm7 = 0101010101010101
pcmpgtb mm1, mm0 // p[0]-p[1]<-1?
pcmpgtb mm0, mm7 // p[0]-p[1]>1?
movq mm3, eightNOnes // mm1 = FFFFFFFFFFFFFFFFF
por mm0, mm1 // abs(p[0]-p[1])>1?
movq mm1, mm7 // mm1 = 0101010101010101
movq mm7, [edi+24] // p[3]
psubb mm2, mm7 // p[2]-p[3]
pcmpgtb mm3, mm2 // p[2]-p[3]<-1?
pcmpgtb mm2, mm1 // p[2]-p[3]>1?
por mm2, mm3 // abs(p[3]-p[2])>1?
movq mm3, eight128s // mm3 = 8080808080808080
por mm0, mm2 // mm0 = UseHighVariance
// mm0 = UseHighVariance
// mm4 = P[0]
// mm5 = P[1]
// mm6 = P[2]
// mm7 = P[3]
// mm3 = 8080808080808080
pxor mm1, mm1 // clear mm1 for unpack
movq mm2, mm5 // copy p[1]
movq mm3, mm6 // ocpy of p[2]
punpcklbw mm2, mm1 // low four p[1]
punpcklbw mm3, mm1 // low four p[2]
psubw mm3, mm2 // low four p[2]-p[1]
punpckhbw mm5, mm1 // high four p[1]
movq mm2, mm3 // low p[2]-p[1]
punpckhbw mm6, mm1 // high four p[2]
paddw mm3, mm3 // 2*(p[2]-p[1]) low four
psubw mm6, mm5 // high four p[2]-p[1]
paddw mm2, mm3 // 3*(p[2]-p[1]) low four
movq mm5, mm6 // high four p[2]-p[1]
movq mm3, mm4 // copy of p[0]
paddw mm6, mm6 // 2*(p[2]-p[1]) highfour
punpcklbw mm3, mm1 // low four p[0]
paddw mm5, mm6 // 3*(p[2]-p[1]) highfour
punpckhbw mm4, mm1 // high four p[0]
movq mm6, mm7 // copy of p[3]
punpcklbw mm7, mm1 // low four p[3]
punpckhbw mm6, mm1 // high four p[3]
psubw mm3, mm7 // low four p[0]-p[3]
punpcklbw mm1, mm0 // UseHighVariance Low four
pxor mm7, mm7 // clear mm7 for unpack
psraw mm1, 8 // FFFF or 0000
punpckhbw mm7, mm0 // UseHighVaraince high four
psubw mm4, mm6 // high four p[0]-p[3]
psraw mm7, 8 // FFFF or 0000
pand mm3, mm1 // And UseHighVariance
pand mm4, mm7 // And UseHighVariance
paddw mm2, mm3 // Low four 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
paddw mm4, mm5 // High four 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
paddw mm2, fourFours // adjust before shift
movd mm1, eax // Flimit
paddw mm4, fourFours // adjust before shift
psraw mm2, 3 // shift
psraw mm4, 3 // shift
movq mm3, mm2 // copy of low four
punpcklwd mm1, mm1 // Flimit Flimit
movq mm5, mm4 // copy of Highfour
punpckldq mm1, mm1 // Four Flimit
psraw mm2, 15 // FFFF or 0000
movq mm6, mm1 // copy of FLimit
psraw mm4, 15 // FFFF or 0000
pxor mm3, mm2
psubsw mm3, mm2 // abs(FiltVal) for Low
pxor mm5, mm4
psubsw mm5, mm4 // abs(FiltVal) for Low
por mm2, fourOnes // -1 or -1 for sign
por mm4, fourOnes // -1 or +1 for sign
// mm0 = UseHIghVariance?
// mm1 = FLimit in shorts
// mm2 = sign for lower four FiltVal
// mm3 = abs for lower four FiltVal
// mm4 = sign for higher four FiltVal
// mm5 = abs for higher four FiltVal
movq mm6, mm1 // copy of Flimit
psubusw mm1, mm3 // Flimit - abs(FiltVal)
psubusw mm3, mm6 // abs(Filtval) -FLimit
por mm3, mm1 // abs(Flimit-abs(FiltVal)
movq mm1, mm6 // Flimit
psubusw mm1, mm3 // Flimit-abs(FLimit-abs(FiltVal)
movq mm3, mm6 // copy of the Flimit
pmullw mm1, mm2 // Get the sign back
psubusw mm3, mm5 // Flimit-abs(Filtval)
psubusw mm5, mm6 // abs(Filtval)-Flimit)
por mm5, mm3 // abs(Flimit-abs(FiltVal)
movq mm3, mm6 // Flimit
psubusw mm3, mm5 // Flimit-abs(FLimit-abs(FiltVal)
pmullw mm4, mm3 // Get the sign back
movq mm2, mm4
// mm0 = UseHighVariance
// mm1 = low four
// mm2 = high four
movq mm5, [edi+8] // p[1]
movq mm3, mm1 // copy of low four
movq mm4, eight128s // 128 for offset
packsswb mm1, mm2 // pack to chars
movq mm6, [edi+16] // p[2]
psubb mm5, mm4 // unsigned -> signed
psubb mm6, mm4 // unsigned -> signed
paddsb mm5, mm1 // p[1]+delta
psubsb mm6, mm1 // p[1]-delta
paddb mm5, mm4 // offset back
paddb mm6, mm4 // offset back
movq mm1, [edi] // p[0]
psraw mm3, 1 // delta/2
psraw mm2, 1 // delta/2
movq mm7, [edi+24] // p[3]
packsswb mm3, mm2 // pack to chars
psubb mm1, mm4 // unsigned -> signed
pandn mm0, mm3 // and !UseHighVariance
psubb mm7, mm4 // unsigned -> signed
psubsb mm7, mm0 //
paddsb mm0, mm1 //
paddb mm7, mm4 // offset back
paddb mm0, mm4 // offset back
lea esi, [esi+ecx*4] // esi now point to the second line
//done with calculation, now write back the resutls
// mm0 -> 7060504030201000
// mm5 -> 7161514131211101
// mm6 -> 7262524232221202
// mm7 -> 7363534333231303
movq mm4, mm0 // 7060504030201000
punpcklbw mm0, mm5 // 3130212011100100
punpckhbw mm4, mm5 // 7170616051504140
movq mm2, mm6 // 7262524232221202
punpcklbw mm2, mm7 // 3332232213120302
punpckhbw mm6, mm7 // 7372636253524342
movq mm1, mm0 // 3130212011100100
punpcklwd mm0, mm2 // 1312111003020100
movd [esi+ecx], mm0 // write 03020100
punpckhwd mm1, mm2 // 3332313023222120
psrlq mm0, 32 // xxxxxxxx13121110
movd [esi], mm0 // write 13121110
movq mm5, mm4 // 7170717051504140
punpcklwd mm4, mm6 // 5352515043424140
movd [esi+edx], mm1 // write 23222120
psrlq mm1, 32 // xxxxxxxx33323130
punpckhwd mm5, mm6 // 7372717063626160
movd [esi+edx*2],mm1 // write 33323130
lea esi, [esi+edx*4] // fifth line
movd [esi+ecx], mm4 // write 43424140
psrlq mm4, 32 // xxxxxxxx53525150
movd [esi], mm4 // write 53525150
movd [esi+edx], mm5 // write 63626160
psrlq mm5, 32 // xxxxxxxx73727170
movd [esi+edx*2], mm5 // write 73727170
}
}
/****************************************************************************
*
* ROUTINE : FilterVert_Simple_MMX
*
* INPUTS : None
*
* OUTPUTS : None
*
* RETURNS : None
*
* FUNCTION : Applies a loop filter to a horizontal edge vertically
*
* SPECIAL NOTES :
*
*
* ERRORS : None.
*
****************************************************************************/
void FilterVert_Simple_MMX(
POSTPROC_INSTANCE *pbi,
UINT8 * PixelPtr,
INT32 Pitch,
INT32 *BoundingValuePtr
)
{
/************************************************************
The following code in comments is the C version of the
function, provided here for reference
************************************************************
INT32 j;
INT32 FiltVal;
UINT8 * LimitTable = &LimitVal_VP31[VAL_RANGE];
for ( j = 0; j < 8; j++ )
{
INT32 UseHighVariance;
FiltVal = ( ( (INT32) PixelPtr[0] * 3 ) -
( (INT32)PixelPtr[- LineLength] * 3 ));
UseHighVariance = abs(PixelPtr[- (2 * LineLength)] - PixelPtr[- LineLength]) > 1 ||
abs(PixelPtr[0] - PixelPtr[LineLength]) > 1;
if(UseHighVariance)
{
FiltVal += ( (INT32)PixelPtr[- (2 * LineLength)] ) -
( (INT32)PixelPtr[LineLength] );
}
FiltVal = BoundingValuePtr[(FiltVal + 4) >> 3];
PixelPtr[- LineLength] = LimitTable[(INT32)PixelPtr[- LineLength] + FiltVal];
PixelPtr[0] = LimitTable[(INT32)PixelPtr[0] - FiltVal];
if(!UseHighVariance)
{
FiltVal >>=1 ;
PixelPtr[- 2* LineLength] = LimitTable[(INT32)PixelPtr[- 2 * LineLength] + FiltVal];
PixelPtr[LineLength] = LimitTable[(INT32)PixelPtr[LineLength] - FiltVal];
}
PixelPtr ++;
}
*************************************************************/
UINT32 FLimit = DeblockLimitValuesV2[pbi->FrameQIndex];
(void)BoundingValuePtr;
__asm
{
mov eax, FLimit // Flimit Values
xor ecx, ecx // clear ecx for negative pitch
mov edx, Pitch // Pitch
mov esi, PixelPtr // Pointer to Src and Destination
sub ecx, edx // negative pitch
movq mm2, [esi] // p[2]
movq mm7, eightOnes // mm7 = 0101010101010101
movq mm0, [esi+ecx*2] // p[0]
movq mm6, mm2 // Make a copy
movq mm5, [esi+ecx] // p[1]
movq mm4, mm0 // Make a copy
movq mm1, eightNOnes // mm1 = FFFFFFFFFFFFFFFFF
psubb mm0, mm5 // p[0]-p[1]
pcmpgtb mm1, mm0 // p[0]-p[1]<-1?
pcmpgtb mm0, mm7 // p[0]-p[1]>1?
movq mm3, eightNOnes // mm1 = FFFFFFFFFFFFFFFFF
por mm0, mm1 // abs(p[0]-p[1])>1?
movq mm1, mm7 // mm1 = 0101010101010101
movq mm7, [esi+edx] // p[3]
psubb mm2, mm7 // p[2]-p[3]
pcmpgtb mm3, mm2 // p[2]-p[3]<-1?
pcmpgtb mm2, mm1 // p[2]-p[3]>1?
por mm2, mm3 // abs(p[3]-p[2])>1?
movq mm3, eight128s // mm3 = 8080808080808080
por mm0, mm2 // mm0 = UseHighVariance
// mm0 = UseHighVariance
// mm4 = P[0]
// mm5 = P[1]
// mm6 = P[2]
// mm7 = P[3]
// mm3 = 8080808080808080
pxor mm1, mm1 // clear mm1 for unpack
movq mm2, mm5 // copy p[1]
movq mm3, mm6 // ocpy of p[2]
punpcklbw mm2, mm1 // low four p[1]
punpcklbw mm3, mm1 // low four p[2]
psubw mm3, mm2 // low four p[2]-p[1]
punpckhbw mm5, mm1 // high four p[1]
movq mm2, mm3 // low p[2]-p[1]
punpckhbw mm6, mm1 // high four p[2]
paddw mm3, mm3 // 2*(p[2]-p[1]) low four
psubw mm6, mm5 // high four p[2]-p[1]
paddw mm2, mm3 // 3*(p[2]-p[1]) low four
movq mm5, mm6 // high four p[2]-p[1]
movq mm3, mm4 // copy of p[0]
paddw mm6, mm6 // 2*(p[2]-p[1]) highfour
punpcklbw mm3, mm1 // low four p[0]
paddw mm5, mm6 // 3*(p[2]-p[1]) highfour
punpckhbw mm4, mm1 // high four p[0]
movq mm6, mm7 // copy of p[3]
punpcklbw mm7, mm1 // low four p[3]
punpckhbw mm6, mm1 // high four p[3]
psubw mm3, mm7 // low four p[0]-p[3]
punpcklbw mm1, mm0 // UseHighVariance Low four
pxor mm7, mm7 // clear mm7 for unpack
psraw mm1, 8 // FFFF or 0000
punpckhbw mm7, mm0 // UseHighVaraince high four
psubw mm4, mm6 // high four p[0]-p[3]
psraw mm7, 8 // FFFF or 0000
pand mm3, mm1 // And UseHighVariance
pand mm4, mm7 // And UseHighVariance
paddw mm2, mm3 // Low four 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
paddw mm4, mm5 // High four 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
paddw mm2, fourFours // adjust before shift
paddw mm4, fourFours // adjust before shift
movd mm1, eax // Flimit
psraw mm2, 3 // shift
psraw mm4, 3 // shift
movq mm3, mm2 // copy of low four
punpcklwd mm1, mm1 // Flimit Flimit
movq mm5, mm4 // copy of Highfour
punpckldq mm1, mm1 // Four Flimit
psraw mm2, 15 // FFFF or 0000
movq mm6, mm1 // copy of FLimit
psraw mm4, 15 // FFFF or 0000
pxor mm3, mm2
psubsw mm3, mm2 // abs(FiltVal) for Low
pxor mm5, mm4
psubsw mm5, mm4 // abs(FiltVal) for Low
por mm2, fourOnes // -1 or -1 for sign
por mm4, fourOnes // -1 or +1 for sign
/*
THE FOLLOWING CODE TRIED TO DO IT IN CHARS, BUT GENERATES DIFFERENT RESULTS
THAN THE C VERSION BECAUSE OF OVERFLOW IN VERY RARE CASES
pxor mm4, mm3 // offset all the pixels by 128
pxor mm5, mm3
pxor mm6, mm3
pxor mm7, mm3
psubsb mm6, mm5 // p[2]-p[1]
psubsb mm4, mm7 // p[0]-p[3]
movq mm2, mm6 // Make a copy p[2] - p[1]
paddsb mm6, mm6 // 2 * p[2] - p[1]
pand mm4, mm0 // UseHighVariance * (p[0]-p[3])
paddsb mm2, mm6 // 3*(p[2]-p[1])
paddsb mm4, mm2 // 3*(p[2]-p(1)+ (p[0]-p[3])*Flag
paddsb mm4, eightFours // adjust before shift
pxor mm7, mm7 // clear mm7 for unpack
movd mm1, eax // FLimit
pxor mm2, mm2 // make a copy
punpcklwd mm1, mm1 // FLimit FLimit
punpcklbw mm2, mm4 // Unpack to shorts
punpckldq mm1, mm1 // 4 Flimit in short
punpckhbw mm7, mm4 // Unpcak to shorts
psraw mm2, 11 // >> 3-> FiltVal low four
psraw mm7, 11 // >> 3-> FiltVal High four
movq mm3, mm2 // make a copy of Low 4
movq mm4, mm7
pxor mm7, mm7
movq mm5, mm4 // make a copy of high 4
psraw mm2, 15 // FFFF or 0000
movq mm6, mm1 // copy of FLimit
psraw mm4, 15 // FFFF or 0000
pxor mm3, mm2
psubsw mm3, mm2 // abs(FiltVal) for Low
pxor mm5, mm4
psubsw mm5, mm4 // abs(FiltVal) for Low
por mm2, fourOnes // -1 or -1 for sign
por mm4, fourOnes // -1 or +1 for sign
*/
// mm0 = UseHIghVariance?
// mm1 = FLimit in shorts
// mm2 = sign for lower four FiltVal
// mm3 = abs for lower four FiltVal
// mm4 = sign for higher four FiltVal
// mm5 = abs for higher four FiltVal
movq mm6, mm1 // copy of Flimit
psubusw mm1, mm3 // Flimit - abs(FiltVal)
psubusw mm3, mm6 // abs(Filtval) -FLimit
por mm3, mm1 // abs(Flimit-abs(FiltVal)
movq mm1, mm6 // Flimit
psubusw mm1, mm3 // Flimit-abs(FLimit-abs(FiltVal)
movq mm3, mm6 // copy of the Flimit
pmullw mm2, mm1 // Get the sign back
psubusw mm3, mm5 // Flimit-abs(Filtval)
psubusw mm5, mm6 // abs(Filtval)-Flimit)
por mm5, mm3 // abs(Flimit-abs(FiltVal)
movq mm3, mm6 // Flimit
psubusw mm3, mm5 // Flimit-abs(FLimit-abs(FiltVal)
pmullw mm4, mm3 // Get the sign back
// mm0 = UserHighVaraince
// mm2 = Final value with sign for lower four
// mm4 = Final value with sing for higher four
movq mm5, [esi+ecx] // p[1]
movq mm1, mm2 // make a copy of low four
movq mm7, eight128s // 128 for offset
packsswb mm2, mm4 // pack to chars for operation
movq mm6, [esi] // p[2]
psubb mm5, mm7 // unsigned -> signed
psubb mm6, mm7 // unsgined -> signed
paddsb mm5, mm2 // p[1] + Delta
psubsb mm6, mm2 // p[2] - Delta
paddb mm5, mm7 // offset back
paddb mm6, mm7 // offset back
movq [esi+ecx], mm5 // write out p[1]
psraw mm1, 1 // Delta/2
psraw mm4, 1 // Delta/2
movq [esi], mm6 // write out p[2]
movq mm2, [esi+ecx*2] // p[0]
packsswb mm1, mm4 // pack to chars
movq mm3, [esi+edx] // p[3]
pandn mm0, mm1 // and !UseHighVaraince
psubb mm2, mm7 // unsigned -> signed
psubb mm3, mm7 // unsigned -> signed
paddsb mm2, mm0 //
paddb mm2, mm7 // offset back
movq [esi+ecx*2], mm2 // write p[0]
psubsb mm3, mm0 //
paddb mm3, mm7 // offset back
movq [esi+edx], mm3 // write p[3]
}
}