Initial community commit
This commit is contained in:
parent
537bcbc862
commit
fc06254474
16440 changed files with 4239995 additions and 2 deletions
61
Src/libvpShared/corelibs/cdxv/vputil/Makefile
Normal file
61
Src/libvpShared/corelibs/cdxv/vputil/Makefile
Normal file
|
@ -0,0 +1,61 @@
|
|||
## Target to built
|
||||
|
||||
TARGET =libvputil
|
||||
|
||||
## TOOLS
|
||||
CC = ecc
|
||||
LD = ecc
|
||||
AR = ar
|
||||
OBJDUMP = objdump
|
||||
RM = rm -f
|
||||
|
||||
## Directories
|
||||
TOPDIR =C:\DuckSoft
|
||||
PRIVATEINCLUDE =${TOPDIR}\private\include
|
||||
CORELIBSINCLUDE =${TOPDIR}\private\corelibs\include
|
||||
CDXVINCLUDE =${TOPDIR}\private\corelibs\cdxv\include
|
||||
VPPPINCLUDE =${TOPDIR}\private\corelibs\cdxv\vputil\include
|
||||
CURRENTDIR =${TOPDIR}\private\corelibs\cdxv\vputil
|
||||
LIBDIR =${TOPDIR}\private\corelibs\lib\mapca
|
||||
|
||||
## Compile Flags
|
||||
ALLINCLUDES =-I${CDXVINCLUDE} -I${CORELIBSINCLUDE} -I${PRIVATEINCLUDE} -I${VPPPINCLUDE}
|
||||
VP6DEFINES =-DPREDICT_2D -DVFW_COMP -DCOMPDLL -DPOSTPROCESS -DCPUISLITTLEENDIAN -DNORMALIZED
|
||||
ETIDEFINES =-DMAPCA
|
||||
ALLDEFINES =${VP6DEFINES} ${ETIDEFINES}
|
||||
DEBUG =-O2
|
||||
CFLAGS =-msvc -align 8 -etswp -mP3OPT_nonlocal_calls_through_register=true \
|
||||
-mP2OPT_suppress_library_call_conv_warnings=TRUE -maalign_branch_target \
|
||||
-magen_interroutine_padding
|
||||
ALLFLAGS =$(CFLAGS) ${ALLDEFINES} ${ALLINCLUDES} ${DEBUG}
|
||||
|
||||
|
||||
## Files
|
||||
OBJS =generic\fdct.o \
|
||||
generic\idctpart.o \
|
||||
generic\reconstruct.o \
|
||||
generic\vputil.o \
|
||||
bsp\bspFdct.o \
|
||||
bsp\bspIDct.o \
|
||||
bsp\bsprecon.o \
|
||||
bsp\bspvputil.o \
|
||||
bsp\uoptsystemdependant.o
|
||||
|
||||
|
||||
SRCS =$(OBJS:.o=.c)
|
||||
|
||||
ARTARGET =${TARGET}.a
|
||||
|
||||
# archive
|
||||
|
||||
ARTARGET:${OBJS}
|
||||
${AR} -cr ${ARTARGET} ${OBJS}
|
||||
mv ${ARTARGET} ${LIBDIR}
|
||||
|
||||
${OBJS} : ${SRCS}
|
||||
$(CC) $(ALLFLAGS) -c $*.c -o $*.o
|
||||
|
||||
clean:
|
||||
${RM} ${OBJS} ${ARTARGET}
|
||||
|
||||
|
312
Src/libvpShared/corelibs/cdxv/vputil/generic/fdct.c
Normal file
312
Src/libvpShared/corelibs/cdxv/vputil/generic/fdct.c
Normal file
|
@ -0,0 +1,312 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Module Title : fdct.c
|
||||
*
|
||||
* Description : Fast 8x8 DCT C-Implementation.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/****************************************************************************
|
||||
* Header Files
|
||||
****************************************************************************/
|
||||
#include "dct.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Macros
|
||||
****************************************************************************/
|
||||
#define SIGNBITDUPPED(X) ( (signed )((X & 0x80000000)) >> 31 )
|
||||
#define DOROUND(X) X = ( (SIGNBITDUPPED(X) & (0xffff)) + X );
|
||||
|
||||
/****************************************************************************
|
||||
* Module statics
|
||||
****************************************************************************/
|
||||
static INT32 xC1S7 = 64277;
|
||||
static INT32 xC2S6 = 60547;
|
||||
static INT32 xC3S5 = 54491;
|
||||
static INT32 xC4S4 = 46341;
|
||||
static INT32 xC5S3 = 36410;
|
||||
static INT32 xC6S2 = 25080;
|
||||
static INT32 xC7S1 = 12785;
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : fdct_short_C_orig
|
||||
*
|
||||
* INPUTS : INT16 *InputData : 16-bit input data.
|
||||
*
|
||||
* OUTPUTS : INT16 *OutputData : 16-bit transform coefficients.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Performs an 8x8 2-D fast DCT.
|
||||
*
|
||||
* The algorithm used is derived from the flowgraph for
|
||||
* the Vetterli and Ligtenberg fast 1-D dct given in the
|
||||
* JPEG reference book by Pennebaker and Mitchell.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void fdct_short_C_orig ( INT16 *InputData, INT16 *OutputData )
|
||||
{
|
||||
int loop;
|
||||
INT32 is07, is12, is34, is56;
|
||||
INT32 is0734, is1256;
|
||||
INT32 id07, id12, id34, id56;
|
||||
INT32 irot_input_x, irot_input_y;
|
||||
INT32 icommon_product1; // Re-used product (c4s4 * (s12 - s56)).
|
||||
INT32 icommon_product2; // Re-used product (c4s4 * (d12 + d56)).
|
||||
INT32 temp1, temp2; // intermediate variable for computation
|
||||
INT32 InterData[64];
|
||||
|
||||
INT32 *ip = InterData;
|
||||
INT16 *op = OutputData;
|
||||
|
||||
for ( loop=0; loop<8; loop++ )
|
||||
{
|
||||
// Pre calculate some common sums and differences.
|
||||
is07 = InputData[0] + InputData[7];
|
||||
is12 = InputData[1] + InputData[2];
|
||||
is34 = InputData[3] + InputData[4];
|
||||
is56 = InputData[5] + InputData[6];
|
||||
|
||||
id07 = InputData[0] - InputData[7];
|
||||
id12 = InputData[1] - InputData[2];
|
||||
id34 = InputData[3] - InputData[4];
|
||||
id56 = InputData[5] - InputData[6];
|
||||
|
||||
is0734 = is07 + is34;
|
||||
is1256 = is12 + is56;
|
||||
|
||||
// Pre-Calculate some common product terms.
|
||||
icommon_product1 = xC4S4*(is12 - is56);
|
||||
DOROUND ( icommon_product1 )
|
||||
icommon_product1 >>= 16;
|
||||
|
||||
icommon_product2 = xC4S4*(id12 + id56);
|
||||
DOROUND ( icommon_product2 )
|
||||
icommon_product2 >>= 16;
|
||||
|
||||
ip[0] = (xC4S4*(is0734 + is1256));
|
||||
DOROUND ( ip[0] );
|
||||
ip[0] >>= 16;
|
||||
|
||||
ip[4] = (xC4S4*(is0734 - is1256));
|
||||
DOROUND ( ip[4] );
|
||||
ip[4] >>= 16;
|
||||
|
||||
// Define inputs to rotation for outputs 2 and 6
|
||||
irot_input_x = id12 - id56;
|
||||
irot_input_y = is07 - is34;
|
||||
|
||||
// Apply rotation for outputs 2 and 6.
|
||||
temp1 = xC6S2*irot_input_x;
|
||||
DOROUND ( temp1 );
|
||||
temp1 >>= 16;
|
||||
temp2 = xC2S6*irot_input_y;
|
||||
DOROUND ( temp2 );
|
||||
temp2 >>= 16;
|
||||
ip[2] = temp1 + temp2;
|
||||
|
||||
temp1 = xC6S2*irot_input_y;
|
||||
DOROUND ( temp1 );
|
||||
temp1 >>= 16;
|
||||
temp2 = xC2S6*irot_input_x;
|
||||
DOROUND ( temp2 );
|
||||
temp2 >>= 16;
|
||||
ip[6] = temp1 -temp2;
|
||||
|
||||
// Define inputs to rotation for outputs 1 and 7
|
||||
irot_input_x = icommon_product1 + id07;
|
||||
irot_input_y = -( id34 + icommon_product2 );
|
||||
|
||||
// Apply rotation for outputs 1 and 7.
|
||||
temp1 = xC1S7*irot_input_x;
|
||||
DOROUND ( temp1 );
|
||||
temp1 >>= 16;
|
||||
temp2 = xC7S1*irot_input_y;
|
||||
DOROUND ( temp2 );
|
||||
temp2 >>= 16;
|
||||
ip[1] = temp1 - temp2;
|
||||
|
||||
temp1 = xC7S1*irot_input_x;
|
||||
DOROUND ( temp1 );
|
||||
temp1 >>= 16;
|
||||
temp2 = xC1S7*irot_input_y;
|
||||
DOROUND ( temp2 );
|
||||
temp2 >>= 16;
|
||||
ip[7] = temp1 + temp2;
|
||||
|
||||
// Define inputs to rotation for outputs 3 and 5
|
||||
irot_input_x = id07 - icommon_product1;
|
||||
irot_input_y = id34 - icommon_product2;
|
||||
|
||||
// Apply rotation for outputs 3 and 5.
|
||||
temp1 = xC3S5 * irot_input_x;
|
||||
DOROUND ( temp1 );
|
||||
temp1 >>= 16;
|
||||
temp2 = xC5S3*irot_input_y;
|
||||
DOROUND ( temp2 );
|
||||
temp2 >>= 16;
|
||||
ip[3] = temp1 - temp2;
|
||||
|
||||
temp1 = xC5S3*irot_input_x;
|
||||
DOROUND ( temp1 );
|
||||
temp1 >>= 16;
|
||||
temp2 = xC3S5*irot_input_y;
|
||||
DOROUND ( temp2 );
|
||||
temp2 >>= 16;
|
||||
ip[5] = temp1 + temp2;
|
||||
|
||||
// Increment data pointer for next row.
|
||||
InputData += 8;
|
||||
ip += 8; // advance pointer to next row
|
||||
}
|
||||
|
||||
// Performed DCT on rows, now transform the columns
|
||||
ip = InterData;
|
||||
for ( loop=0; loop<8; loop++ )
|
||||
{
|
||||
// Pre calculate some common sums and differences.
|
||||
is07 = ip[0 * 8] + ip[7 * 8];
|
||||
is12 = ip[1 * 8] + ip[2 * 8];
|
||||
is34 = ip[3 * 8] + ip[4 * 8];
|
||||
is56 = ip[5 * 8] + ip[6 * 8];
|
||||
|
||||
id07 = ip[0 * 8] - ip[7 * 8];
|
||||
id12 = ip[1 * 8] - ip[2 * 8];
|
||||
id34 = ip[3 * 8] - ip[4 * 8];
|
||||
id56 = ip[5 * 8] - ip[6 * 8];
|
||||
|
||||
is0734 = is07 + is34;
|
||||
is1256 = is12 + is56;
|
||||
|
||||
// Pre-Calculate some common product terms.
|
||||
icommon_product1 = xC4S4*(is12 - is56);
|
||||
icommon_product2 = xC4S4*(id12 + id56);
|
||||
DOROUND ( icommon_product1 )
|
||||
DOROUND ( icommon_product2 )
|
||||
icommon_product1 >>= 16;
|
||||
icommon_product2 >>= 16;
|
||||
|
||||
temp1 = xC4S4*(is0734 + is1256);
|
||||
temp2 = xC4S4*(is0734 - is1256);
|
||||
DOROUND ( temp1 );
|
||||
DOROUND ( temp2 );
|
||||
temp1 >>= 16;
|
||||
temp2 >>= 16;
|
||||
op[0*8] = (INT16)temp1;
|
||||
op[4*8] = (INT16)temp2;
|
||||
|
||||
// Define inputs to rotation for outputs 2 and 6
|
||||
irot_input_x = id12 - id56;
|
||||
irot_input_y = is07 - is34;
|
||||
|
||||
// Apply rotation for outputs 2 and 6.
|
||||
temp1 = xC6S2*irot_input_x;
|
||||
DOROUND ( temp1 );
|
||||
temp1 >>= 16;
|
||||
temp2 = xC2S6*irot_input_y;
|
||||
DOROUND ( temp2 );
|
||||
temp2 >>= 16;
|
||||
op[2*8] = (INT16)(temp1 + temp2);
|
||||
|
||||
temp1 = xC6S2*irot_input_y;
|
||||
DOROUND ( temp1 );
|
||||
temp1 >>= 16;
|
||||
temp2 = xC2S6*irot_input_x;
|
||||
DOROUND ( temp2 );
|
||||
temp2 >>= 16;
|
||||
op[6*8] = (INT16)(temp1 -temp2);
|
||||
|
||||
// Define inputs to rotation for outputs 1 and 7
|
||||
irot_input_x = icommon_product1 + id07;
|
||||
irot_input_y = -( id34 + icommon_product2 );
|
||||
|
||||
// Apply rotation for outputs 1 and 7.
|
||||
temp1 = xC1S7*irot_input_x;
|
||||
DOROUND ( temp1 );
|
||||
temp1 >>= 16;
|
||||
temp2 = xC7S1*irot_input_y;
|
||||
DOROUND ( temp2 );
|
||||
temp2 >>= 16;
|
||||
op[1*8] = (INT16) (temp1 - temp2);
|
||||
|
||||
temp1 = xC7S1*irot_input_x;
|
||||
DOROUND ( temp1 );
|
||||
temp1 >>= 16;
|
||||
temp2 = xC1S7*irot_input_y;
|
||||
DOROUND ( temp2 );
|
||||
temp2 >>= 16;
|
||||
op[7*8] = (INT16)(temp1 + temp2);
|
||||
|
||||
// Define inputs to rotation for outputs 3 and 5
|
||||
irot_input_x = id07 - icommon_product1;
|
||||
irot_input_y = id34 - icommon_product2;
|
||||
|
||||
// Apply rotation for outputs 3 and 5.
|
||||
temp1 = xC3S5*irot_input_x;
|
||||
DOROUND ( temp1 );
|
||||
temp1 >>= 16;
|
||||
temp2 = xC5S3*irot_input_y;
|
||||
DOROUND ( temp2 );
|
||||
temp2 >>= 16;
|
||||
op[3*8] = (INT16)(temp1 - temp2);
|
||||
|
||||
temp1 = xC5S3*irot_input_x;
|
||||
DOROUND ( temp1 );
|
||||
temp1 >>= 16;
|
||||
temp2 = xC3S5*irot_input_y;
|
||||
DOROUND ( temp2 );
|
||||
temp2 >>= 16;
|
||||
op[5*8] = (INT16) (temp1 + temp2);
|
||||
|
||||
// Increment data pointer for next column.
|
||||
ip ++;
|
||||
op ++;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : fdct_short_C
|
||||
*
|
||||
* INPUTS : INT16 *InputData : 16-bit input data.
|
||||
*
|
||||
* OUTPUTS : INT16 *OutputData : 16-bit transform coefficients.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Performs an 8x8 2-D fast DCT.
|
||||
*
|
||||
* The function to up the precision of FDCT by number of bits
|
||||
* defined by FDCT_PRECISION_BITS.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void fdct_short_C ( INT16 *DCTDataBuffer, INT16 *DCT_codes )
|
||||
{
|
||||
|
||||
INT32 i;
|
||||
|
||||
// Increase precision on input to fdct
|
||||
for ( i = 0; i < 64; i++ )
|
||||
DCTDataBuffer[i] = DCTDataBuffer[i] << FDCT_PRECISION_BITS;
|
||||
|
||||
// Transform the error signal using the forward DCT to get set of transform coefficients
|
||||
fdct_short_C_orig ( DCTDataBuffer, DCT_codes );
|
||||
|
||||
// Strip off the extra bits from the DCT output.
|
||||
// This should ultimately be merged into the quantize process but there are also
|
||||
// implications for DC prediction that would then need to be sorted
|
||||
for ( i = 0; i < 64; i++ )
|
||||
{
|
||||
// signed shift modified so behaves like "/" (truncates towards 0 for + and -)
|
||||
if ( DCT_codes[i] >= 0 )
|
||||
DCT_codes[i] = (DCT_codes[i]) >> FDCT_PRECISION_BITS;
|
||||
else
|
||||
DCT_codes[i] = (DCT_codes[i] + FDCT_PRECISION_NEG_ADJ) >> FDCT_PRECISION_BITS;
|
||||
}
|
||||
|
||||
}
|
921
Src/libvpShared/corelibs/cdxv/vputil/generic/idctpart.c
Normal file
921
Src/libvpShared/corelibs/cdxv/vputil/generic/idctpart.c
Normal file
|
@ -0,0 +1,921 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Module Title : idctpart.c
|
||||
*
|
||||
* Description : IDCT with multiple versions based on # of non 0 coeffs
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/****************************************************************************
|
||||
* Header Files
|
||||
****************************************************************************/
|
||||
|
||||
#include "dct.h"
|
||||
#include "string.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Macros
|
||||
****************************************************************************/
|
||||
#define int32 int
|
||||
#define int16 short
|
||||
#define IdctAdjustBeforeShift 8
|
||||
|
||||
#define xC1S7 64277
|
||||
#define xC2S6 60547
|
||||
#define xC3S5 54491
|
||||
#define xC4S4 46341
|
||||
#define xC5S3 36410
|
||||
#define xC6S2 25080
|
||||
#define xC7S1 12785
|
||||
|
||||
/****************************************************************************
|
||||
* Module statics
|
||||
****************************************************************************/
|
||||
static const UINT32 dequant_index[64] =
|
||||
{
|
||||
0, 1, 8, 16, 9, 2, 3, 10,
|
||||
17, 24, 32, 25, 18, 11, 4, 5,
|
||||
12, 19, 26, 33, 40, 48, 41, 34,
|
||||
27, 20, 13, 6, 7, 14, 21, 28,
|
||||
35, 42, 49, 56, 57, 50, 43, 36,
|
||||
29, 22, 15, 23, 30, 37, 44, 51,
|
||||
58, 59, 52, 45, 38, 31, 39, 46,
|
||||
53, 60, 61, 54, 47, 55, 62, 63
|
||||
};
|
||||
|
||||
#if 0 // AWG CODE NO LONGER USED IN CODEBASE.
|
||||
/* Cos and Sin constant multipliers used during DCT and IDCT */
|
||||
const double C1S7 = (double)0.9807852804032;
|
||||
const double C2S6 = (double)0.9238795325113;
|
||||
const double C3S5 = (double)0.8314696123025;
|
||||
const double C4S4 = (double)0.7071067811865;
|
||||
const double C5S3 = (double)0.5555702330196;
|
||||
const double C6S2 = (double)0.3826834323651;
|
||||
const double C7S1 = (double)0.1950903220161;
|
||||
|
||||
/****************************************************************************
|
||||
* Exports
|
||||
****************************************************************************/
|
||||
|
||||
// DCT lookup tables
|
||||
INT32 * C4S4_TablePtr;
|
||||
INT32 C4S4_Table[(COEFF_MAX * 4) + 1];
|
||||
|
||||
INT32 * C6S2_TablePtr;
|
||||
INT32 C6S2_Table[(COEFF_MAX * 2) + 1];
|
||||
|
||||
INT32 * C2S6_TablePtr;
|
||||
INT32 C2S6_Table[(COEFF_MAX * 2) + 1];
|
||||
|
||||
INT32 * C1S7_TablePtr;
|
||||
INT32 C1S7_Table[(COEFF_MAX * 2) + 1];
|
||||
|
||||
INT32 * C7S1_TablePtr;
|
||||
INT32 C7S1_Table[(COEFF_MAX * 2) + 1];
|
||||
|
||||
INT32 * C3S5_TablePtr;
|
||||
INT32 C3S5_Table[(COEFF_MAX * 2) + 1];
|
||||
|
||||
INT32 * C5S3_TablePtr;
|
||||
INT32 C5S3_Table[(COEFF_MAX * 2) + 1];
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : InitDctTables
|
||||
*
|
||||
* INPUTS : None.
|
||||
*
|
||||
* OUTPUTS : None.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Initialises lookup tables used in IDCT.
|
||||
*
|
||||
* SPECIAL NOTES : NO LONGER USED IN CODEBASE.
|
||||
*
|
||||
****************************************************************************/
|
||||
void InitDctTables ( void )
|
||||
{
|
||||
INT32 i;
|
||||
|
||||
C4S4_TablePtr = &C4S4_Table[COEFF_MAX*2];
|
||||
for( i = -(2 * COEFF_MAX); i < (2 * COEFF_MAX); i++ )
|
||||
{
|
||||
if ( i < 0 )
|
||||
C4S4_TablePtr[i] = (INT32)((i * C4S4) - 0.5);
|
||||
else
|
||||
C4S4_TablePtr[i] = (INT32)((i * C4S4) + 0.5);
|
||||
}
|
||||
|
||||
C6S2_TablePtr = &C6S2_Table[COEFF_MAX];
|
||||
for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
|
||||
{
|
||||
if ( i < 0 )
|
||||
C6S2_TablePtr[i] = (INT32)((i * C6S2) - 0.5);
|
||||
else
|
||||
C6S2_TablePtr[i] = (INT32)((i * C6S2) + 0.5);
|
||||
}
|
||||
|
||||
C2S6_TablePtr = &C2S6_Table[COEFF_MAX];
|
||||
for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
|
||||
{
|
||||
if ( i < 0 )
|
||||
C2S6_TablePtr[i] = (INT32)((i * C2S6) - 0.5);
|
||||
else
|
||||
C2S6_TablePtr[i] = (INT32)((i * C2S6) + 0.5);
|
||||
}
|
||||
|
||||
C1S7_TablePtr = &C1S7_Table[COEFF_MAX];
|
||||
for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
|
||||
{
|
||||
if ( i < 0 )
|
||||
C1S7_TablePtr[i] = (INT32)((i * C1S7) - 0.5);
|
||||
else
|
||||
C1S7_TablePtr[i] = (INT32)((i * C1S7) + 0.5);
|
||||
}
|
||||
|
||||
C7S1_TablePtr = &C7S1_Table[COEFF_MAX];
|
||||
for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
|
||||
{
|
||||
if ( i < 0 )
|
||||
C7S1_TablePtr[i] = (INT32)((i * C7S1) - 0.5);
|
||||
else
|
||||
C7S1_TablePtr[i] = (INT32)((i * C7S1) + 0.5);
|
||||
}
|
||||
|
||||
C3S5_TablePtr = &C3S5_Table[COEFF_MAX];
|
||||
for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
|
||||
{
|
||||
if ( i < 0 )
|
||||
C3S5_TablePtr[i] = (INT32)((i * C3S5) - 0.5);
|
||||
else
|
||||
C3S5_TablePtr[i] = (INT32)((i * C3S5) + 0.5);
|
||||
}
|
||||
|
||||
C5S3_TablePtr = &C5S3_Table[COEFF_MAX];
|
||||
for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
|
||||
{
|
||||
if ( i < 0 )
|
||||
C5S3_TablePtr[i] = (INT32)((i * C5S3) - 0.5);
|
||||
else
|
||||
C5S3_TablePtr[i] = (INT32)((i * C5S3) + 0.5);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : dequant_slow
|
||||
*
|
||||
* INPUTS : INT16 *dequant_coeffs : Pointer to dequantization step sizes.
|
||||
* INT16 *quantized_list : Pointer to quantized DCT coeffs
|
||||
* (in zig-zag order).
|
||||
*
|
||||
* OUTPUTS : INT32 *DCT_block : Pointer to 8x8 de-quantized block
|
||||
* (in 2-D raster order).
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : De-quantizes an 8x8 block of quantized DCT coeffs.
|
||||
*
|
||||
* SPECIAL NOTES : Uses dequant_index to invert zig-zag ordering.
|
||||
*
|
||||
****************************************************************************/
|
||||
void dequant_slow ( INT16 *dequant_coeffs, INT16 *quantized_list, INT32 *DCT_block )
|
||||
{
|
||||
// Loop fully expanded for maximum speed
|
||||
DCT_block[dequant_index[0]] = quantized_list[0] * dequant_coeffs[0];
|
||||
DCT_block[dequant_index[1]] = quantized_list[1] * dequant_coeffs[1];
|
||||
DCT_block[dequant_index[2]] = quantized_list[2] * dequant_coeffs[2];
|
||||
DCT_block[dequant_index[3]] = quantized_list[3] * dequant_coeffs[3];
|
||||
DCT_block[dequant_index[4]] = quantized_list[4] * dequant_coeffs[4];
|
||||
DCT_block[dequant_index[5]] = quantized_list[5] * dequant_coeffs[5];
|
||||
DCT_block[dequant_index[6]] = quantized_list[6] * dequant_coeffs[6];
|
||||
DCT_block[dequant_index[7]] = quantized_list[7] * dequant_coeffs[7];
|
||||
DCT_block[dequant_index[8]] = quantized_list[8] * dequant_coeffs[8];
|
||||
DCT_block[dequant_index[9]] = quantized_list[9] * dequant_coeffs[9];
|
||||
DCT_block[dequant_index[10]] = quantized_list[10] * dequant_coeffs[10];
|
||||
DCT_block[dequant_index[11]] = quantized_list[11] * dequant_coeffs[11];
|
||||
DCT_block[dequant_index[12]] = quantized_list[12] * dequant_coeffs[12];
|
||||
DCT_block[dequant_index[13]] = quantized_list[13] * dequant_coeffs[13];
|
||||
DCT_block[dequant_index[14]] = quantized_list[14] * dequant_coeffs[14];
|
||||
DCT_block[dequant_index[15]] = quantized_list[15] * dequant_coeffs[15];
|
||||
DCT_block[dequant_index[16]] = quantized_list[16] * dequant_coeffs[16];
|
||||
DCT_block[dequant_index[17]] = quantized_list[17] * dequant_coeffs[17];
|
||||
DCT_block[dequant_index[18]] = quantized_list[18] * dequant_coeffs[18];
|
||||
DCT_block[dequant_index[19]] = quantized_list[19] * dequant_coeffs[19];
|
||||
DCT_block[dequant_index[20]] = quantized_list[20] * dequant_coeffs[20];
|
||||
DCT_block[dequant_index[21]] = quantized_list[21] * dequant_coeffs[21];
|
||||
DCT_block[dequant_index[22]] = quantized_list[22] * dequant_coeffs[22];
|
||||
DCT_block[dequant_index[23]] = quantized_list[23] * dequant_coeffs[23];
|
||||
DCT_block[dequant_index[24]] = quantized_list[24] * dequant_coeffs[24];
|
||||
DCT_block[dequant_index[25]] = quantized_list[25] * dequant_coeffs[25];
|
||||
DCT_block[dequant_index[26]] = quantized_list[26] * dequant_coeffs[26];
|
||||
DCT_block[dequant_index[27]] = quantized_list[27] * dequant_coeffs[27];
|
||||
DCT_block[dequant_index[28]] = quantized_list[28] * dequant_coeffs[28];
|
||||
DCT_block[dequant_index[29]] = quantized_list[29] * dequant_coeffs[29];
|
||||
DCT_block[dequant_index[30]] = quantized_list[30] * dequant_coeffs[30];
|
||||
DCT_block[dequant_index[31]] = quantized_list[31] * dequant_coeffs[31];
|
||||
DCT_block[dequant_index[32]] = quantized_list[32] * dequant_coeffs[32];
|
||||
DCT_block[dequant_index[33]] = quantized_list[33] * dequant_coeffs[33];
|
||||
DCT_block[dequant_index[34]] = quantized_list[34] * dequant_coeffs[34];
|
||||
DCT_block[dequant_index[35]] = quantized_list[35] * dequant_coeffs[35];
|
||||
DCT_block[dequant_index[36]] = quantized_list[36] * dequant_coeffs[36];
|
||||
DCT_block[dequant_index[37]] = quantized_list[37] * dequant_coeffs[37];
|
||||
DCT_block[dequant_index[38]] = quantized_list[38] * dequant_coeffs[38];
|
||||
DCT_block[dequant_index[39]] = quantized_list[39] * dequant_coeffs[39];
|
||||
DCT_block[dequant_index[40]] = quantized_list[40] * dequant_coeffs[40];
|
||||
DCT_block[dequant_index[41]] = quantized_list[41] * dequant_coeffs[41];
|
||||
DCT_block[dequant_index[42]] = quantized_list[42] * dequant_coeffs[42];
|
||||
DCT_block[dequant_index[43]] = quantized_list[43] * dequant_coeffs[43];
|
||||
DCT_block[dequant_index[44]] = quantized_list[44] * dequant_coeffs[44];
|
||||
DCT_block[dequant_index[45]] = quantized_list[45] * dequant_coeffs[45];
|
||||
DCT_block[dequant_index[46]] = quantized_list[46] * dequant_coeffs[46];
|
||||
DCT_block[dequant_index[47]] = quantized_list[47] * dequant_coeffs[47];
|
||||
DCT_block[dequant_index[48]] = quantized_list[48] * dequant_coeffs[48];
|
||||
DCT_block[dequant_index[49]] = quantized_list[49] * dequant_coeffs[49];
|
||||
DCT_block[dequant_index[50]] = quantized_list[50] * dequant_coeffs[50];
|
||||
DCT_block[dequant_index[51]] = quantized_list[51] * dequant_coeffs[51];
|
||||
DCT_block[dequant_index[52]] = quantized_list[52] * dequant_coeffs[52];
|
||||
DCT_block[dequant_index[53]] = quantized_list[53] * dequant_coeffs[53];
|
||||
DCT_block[dequant_index[54]] = quantized_list[54] * dequant_coeffs[54];
|
||||
DCT_block[dequant_index[55]] = quantized_list[55] * dequant_coeffs[55];
|
||||
DCT_block[dequant_index[56]] = quantized_list[56] * dequant_coeffs[56];
|
||||
DCT_block[dequant_index[57]] = quantized_list[57] * dequant_coeffs[57];
|
||||
DCT_block[dequant_index[58]] = quantized_list[58] * dequant_coeffs[58];
|
||||
DCT_block[dequant_index[59]] = quantized_list[59] * dequant_coeffs[59];
|
||||
DCT_block[dequant_index[60]] = quantized_list[60] * dequant_coeffs[60];
|
||||
DCT_block[dequant_index[61]] = quantized_list[61] * dequant_coeffs[61];
|
||||
DCT_block[dequant_index[62]] = quantized_list[62] * dequant_coeffs[62];
|
||||
DCT_block[dequant_index[63]] = quantized_list[63] * dequant_coeffs[63];
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : IDctSlow
|
||||
*
|
||||
* INPUTS : int16 *InputData : Pointer to 8x8 quantized DCT coefficients.
|
||||
* int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
|
||||
*
|
||||
* OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Inverse quantizes and inverse DCT's input 8x8 block
|
||||
* to reproduce prediction error.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void IDctSlow ( int16 *InputData, int16 *QuantMatrix, int16 *OutputData )
|
||||
{
|
||||
int loop;
|
||||
int32 t1, t2;
|
||||
int32 IntermediateData[64];
|
||||
int32 _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
|
||||
int32 _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
|
||||
|
||||
int32 *ip = IntermediateData;
|
||||
int16 *op = OutputData;
|
||||
|
||||
// dequantize the input
|
||||
dequant_slow ( QuantMatrix, InputData, IntermediateData );
|
||||
|
||||
// Inverse DCT on the rows now
|
||||
for ( loop=0; loop<8; loop++ )
|
||||
{
|
||||
// Check for non-zero values
|
||||
if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] )
|
||||
{
|
||||
t1 = (int32)(xC1S7 * ip[1]);
|
||||
t2 = (int32)(xC7S1 * ip[7]);
|
||||
t1 >>= 16;
|
||||
t2 >>= 16;
|
||||
_A = t1 + t2;
|
||||
|
||||
t1 = (int32)(xC7S1 * ip[1]);
|
||||
t2 = (int32)(xC1S7 * ip[7]);
|
||||
t1 >>= 16;
|
||||
t2 >>= 16;
|
||||
_B = t1 - t2;
|
||||
|
||||
t1 = (int32)(xC3S5 * ip[3]);
|
||||
t2 = (int32)(xC5S3 * ip[5]);
|
||||
t1 >>= 16;
|
||||
t2 >>= 16;
|
||||
_C = t1 + t2;
|
||||
|
||||
t1 = (int32)(xC3S5 * ip[5]);
|
||||
t2 = (int32)(xC5S3 * ip[3]);
|
||||
t1 >>= 16;
|
||||
t2 >>= 16;
|
||||
_D = t1 - t2;
|
||||
|
||||
t1 = (int32)(xC4S4 * (_A - _C));
|
||||
t1 >>= 16;
|
||||
_Ad = t1;
|
||||
|
||||
t1 = (int32)(xC4S4 * (_B - _D));
|
||||
t1 >>= 16;
|
||||
_Bd = t1;
|
||||
|
||||
_Cd = _A + _C;
|
||||
_Dd = _B + _D;
|
||||
|
||||
t1 = (int32)(xC4S4 * (ip[0] + ip[4]));
|
||||
t1 >>= 16;
|
||||
_E = t1;
|
||||
|
||||
t1 = (int32)(xC4S4 * (ip[0] - ip[4]));
|
||||
t1 >>= 16;
|
||||
_F = t1;
|
||||
|
||||
t1 = (int32)(xC2S6 * ip[2]);
|
||||
t2 = (int32)(xC6S2 * ip[6]);
|
||||
t1 >>= 16;
|
||||
t2 >>= 16;
|
||||
_G = t1 + t2;
|
||||
|
||||
t1 = (int32)(xC6S2 * ip[2]);
|
||||
t2 = (int32)(xC2S6 * ip[6]);
|
||||
t1 >>= 16;
|
||||
t2 >>= 16;
|
||||
_H = t1 - t2;
|
||||
|
||||
_Ed = _E - _G;
|
||||
_Gd = _E + _G;
|
||||
|
||||
_Add = _F + _Ad;
|
||||
_Bdd = _Bd - _H;
|
||||
|
||||
_Fd = _F - _Ad;
|
||||
_Hd = _Bd + _H;
|
||||
|
||||
// Final sequence of operations over-write original inputs.
|
||||
ip[0] = (int16)((_Gd + _Cd ) >> 0);
|
||||
ip[7] = (int16)((_Gd - _Cd ) >> 0);
|
||||
|
||||
ip[1] = (int16)((_Add + _Hd ) >> 0);
|
||||
ip[2] = (int16)((_Add - _Hd ) >> 0);
|
||||
|
||||
ip[3] = (int16)((_Ed + _Dd ) >> 0);
|
||||
ip[4] = (int16)((_Ed - _Dd ) >> 0);
|
||||
|
||||
ip[5] = (int16)((_Fd + _Bdd ) >> 0);
|
||||
ip[6] = (int16)((_Fd - _Bdd ) >> 0);
|
||||
}
|
||||
|
||||
ip += 8; /* next row */
|
||||
}
|
||||
|
||||
ip = IntermediateData;
|
||||
|
||||
for ( loop=0; loop<8; loop++ )
|
||||
{
|
||||
// Check for non-zero values (bitwise | faster than logical ||)
|
||||
if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
|
||||
ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] )
|
||||
{
|
||||
|
||||
t1 = (int32)(xC1S7 * ip[1*8]);
|
||||
t2 = (int32)(xC7S1 * ip[7*8]);
|
||||
t1 >>= 16;
|
||||
t2 >>= 16;
|
||||
_A = t1 + t2;
|
||||
|
||||
t1 = (int32)(xC7S1 * ip[1*8]);
|
||||
t2 = (int32)(xC1S7 * ip[7*8]);
|
||||
t1 >>= 16;
|
||||
t2 >>= 16;
|
||||
_B = t1 - t2;
|
||||
|
||||
t1 = (int32)(xC3S5 * ip[3*8]);
|
||||
t2 = (int32)(xC5S3 * ip[5*8]);
|
||||
t1 >>= 16;
|
||||
t2 >>= 16;
|
||||
_C = t1 + t2;
|
||||
|
||||
t1 = (int32)(xC3S5 * ip[5*8]);
|
||||
t2 = (int32)(xC5S3 * ip[3*8]);
|
||||
t1 >>= 16;
|
||||
t2 >>= 16;
|
||||
_D = t1 - t2;
|
||||
|
||||
t1 = (int32)(xC4S4 * (_A - _C));
|
||||
t1 >>= 16;
|
||||
_Ad = t1;
|
||||
|
||||
t1 = (int32)(xC4S4 * (_B - _D));
|
||||
t1 >>= 16;
|
||||
_Bd = t1;
|
||||
|
||||
_Cd = _A + _C;
|
||||
_Dd = _B + _D;
|
||||
|
||||
t1 = (int32)(xC4S4 * (ip[0*8] + ip[4*8]));
|
||||
t1 >>= 16;
|
||||
_E = t1;
|
||||
|
||||
t1 = (int32)(xC4S4 * (ip[0*8] - ip[4*8]));
|
||||
t1 >>= 16;
|
||||
_F = t1;
|
||||
|
||||
t1 = (int32)(xC2S6 * ip[2*8]);
|
||||
t2 = (int32)(xC6S2 * ip[6*8]);
|
||||
t1 >>= 16;
|
||||
t2 >>= 16;
|
||||
_G = t1 + t2;
|
||||
|
||||
t1 = (int32)(xC6S2 * ip[2*8]);
|
||||
t2 = (int32)(xC2S6 * ip[6*8]);
|
||||
t1 >>= 16;
|
||||
t2 >>= 16;
|
||||
_H = t1 - t2;
|
||||
|
||||
_Ed = _E - _G;
|
||||
_Gd = _E + _G;
|
||||
|
||||
_Add = _F + _Ad;
|
||||
_Bdd = _Bd - _H;
|
||||
|
||||
_Fd = _F - _Ad;
|
||||
_Hd = _Bd + _H;
|
||||
|
||||
_Gd += IdctAdjustBeforeShift;
|
||||
_Add += IdctAdjustBeforeShift;
|
||||
_Ed += IdctAdjustBeforeShift;
|
||||
_Fd += IdctAdjustBeforeShift;
|
||||
|
||||
// Final sequence of operations over-write original inputs.
|
||||
op[0*8] = (int16)((_Gd + _Cd ) >> 4);
|
||||
op[7*8] = (int16)((_Gd - _Cd ) >> 4);
|
||||
|
||||
op[1*8] = (int16)((_Add + _Hd ) >> 4);
|
||||
op[2*8] = (int16)((_Add - _Hd ) >> 4);
|
||||
|
||||
op[3*8] = (int16)((_Ed + _Dd ) >> 4);
|
||||
op[4*8] = (int16)((_Ed - _Dd ) >> 4);
|
||||
|
||||
op[5*8] = (int16)((_Fd + _Bdd ) >> 4);
|
||||
op[6*8] = (int16)((_Fd - _Bdd ) >> 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
op[0*8] = 0;
|
||||
op[7*8] = 0;
|
||||
op[1*8] = 0;
|
||||
op[2*8] = 0;
|
||||
op[3*8] = 0;
|
||||
op[4*8] = 0;
|
||||
op[5*8] = 0;
|
||||
op[6*8] = 0;
|
||||
}
|
||||
|
||||
ip++; // next column
|
||||
op++;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : dequant_slow10
|
||||
*
|
||||
* INPUTS : INT16 *dequant_coeffs : Pointer to dequantization step sizes.
|
||||
* INT16 *quantized_list : Pointer to quantized DCT coeffs
|
||||
* (in zig-zag order).
|
||||
*
|
||||
* OUTPUTS : INT32 *DCT_block : Pointer to 8x8 de-quantized block
|
||||
* (in 2-D raster order).
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : De-quantizes an 8x8 block of quantized DCT coeffs that
|
||||
* only has non-zero coefficients in the first 10, i.e.
|
||||
* only DC & AC1-9 are non-zero, AC10-63 __MUST_BE_ zero.
|
||||
*
|
||||
* SPECIAL NOTES : Uses dequant_index to invert zig-zag ordering.
|
||||
*
|
||||
****************************************************************************/
|
||||
void dequant_slow10 ( INT16 *dequant_coeffs, INT16 *quantized_list, INT32 *DCT_block )
|
||||
{
|
||||
memset(DCT_block,0, 128);
|
||||
|
||||
// Loop fully expanded for maximum speed
|
||||
DCT_block[dequant_index[0]] = quantized_list[0] * dequant_coeffs[0];
|
||||
DCT_block[dequant_index[1]] = quantized_list[1] * dequant_coeffs[1];
|
||||
DCT_block[dequant_index[2]] = quantized_list[2] * dequant_coeffs[2];
|
||||
DCT_block[dequant_index[3]] = quantized_list[3] * dequant_coeffs[3];
|
||||
DCT_block[dequant_index[4]] = quantized_list[4] * dequant_coeffs[4];
|
||||
DCT_block[dequant_index[5]] = quantized_list[5] * dequant_coeffs[5];
|
||||
DCT_block[dequant_index[6]] = quantized_list[6] * dequant_coeffs[6];
|
||||
DCT_block[dequant_index[7]] = quantized_list[7] * dequant_coeffs[7];
|
||||
DCT_block[dequant_index[8]] = quantized_list[8] * dequant_coeffs[8];
|
||||
DCT_block[dequant_index[9]] = quantized_list[9] * dequant_coeffs[9];
|
||||
DCT_block[dequant_index[10]] = quantized_list[10] * dequant_coeffs[10];
|
||||
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : IDctSlow10
|
||||
*
|
||||
* INPUTS : int16 *InputData : Pointer to 8x8 quantized DCT coefficients.
|
||||
* int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
|
||||
*
|
||||
* OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Inverse quantizes and inverse DCT's input 8x8 block
|
||||
* with non-zero coeffs only in DC & the first 9 AC coeffs.
|
||||
* i.e. non-zeros ONLY in the following 10 positions:
|
||||
*
|
||||
* x x x x 0 0 0 0
|
||||
* x x x 0 0 0 0 0
|
||||
* x x 0 0 0 0 0 0
|
||||
* x 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
*
|
||||
* SPECIAL NOTES : Output data is in raster, not zig-zag, order.
|
||||
*
|
||||
****************************************************************************/
|
||||
void IDct10 ( int16 *InputData, int16 *QuantMatrix, int16 *OutputData )
|
||||
{
|
||||
int loop;
|
||||
int32 t1, t2;
|
||||
int32 IntermediateData[64];
|
||||
int32 _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
|
||||
int32 _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
|
||||
|
||||
int32 *ip = IntermediateData;
|
||||
int16 *op = OutputData;
|
||||
|
||||
// dequantize the input
|
||||
dequant_slow10 ( QuantMatrix, InputData, IntermediateData );
|
||||
|
||||
// Inverse DCT on the rows now
|
||||
for ( loop=0; loop<4; loop++ )
|
||||
{
|
||||
// Check for non-zero values
|
||||
if ( ip[0] | ip[1] | ip[2] | ip[3] )
|
||||
{
|
||||
t1 = (int32)(xC1S7 * ip[1]);
|
||||
t1 >>= 16;
|
||||
_A = t1;
|
||||
|
||||
t1 = (int32)(xC7S1 * ip[1]);
|
||||
t1 >>= 16;
|
||||
_B = t1 ;
|
||||
|
||||
t1 = (int32)(xC3S5 * ip[3]);
|
||||
t1 >>= 16;
|
||||
_C = t1;
|
||||
|
||||
t2 = (int32)(xC5S3 * ip[3]);
|
||||
t2 >>= 16;
|
||||
_D = -t2;
|
||||
|
||||
t1 = (int32)(xC4S4 * (_A - _C));
|
||||
t1 >>= 16;
|
||||
_Ad = t1;
|
||||
|
||||
t1 = (int32)(xC4S4 * (_B - _D));
|
||||
t1 >>= 16;
|
||||
_Bd = t1;
|
||||
|
||||
_Cd = _A + _C;
|
||||
_Dd = _B + _D;
|
||||
|
||||
t1 = (int32)(xC4S4 * ip[0] );
|
||||
t1 >>= 16;
|
||||
_E = t1;
|
||||
|
||||
_F = t1;
|
||||
|
||||
t1 = (int32)(xC2S6 * ip[2]);
|
||||
t1 >>= 16;
|
||||
_G = t1;
|
||||
|
||||
t1 = (int32)(xC6S2 * ip[2]);
|
||||
t1 >>= 16;
|
||||
_H = t1 ;
|
||||
|
||||
_Ed = _E - _G;
|
||||
_Gd = _E + _G;
|
||||
|
||||
_Add = _F + _Ad;
|
||||
_Bdd = _Bd - _H;
|
||||
|
||||
_Fd = _F - _Ad;
|
||||
_Hd = _Bd + _H;
|
||||
|
||||
// Final sequence of operations over-write original inputs.
|
||||
ip[0] = (int16)((_Gd + _Cd ) >> 0);
|
||||
ip[7] = (int16)((_Gd - _Cd ) >> 0);
|
||||
|
||||
ip[1] = (int16)((_Add + _Hd ) >> 0);
|
||||
ip[2] = (int16)((_Add - _Hd ) >> 0);
|
||||
|
||||
ip[3] = (int16)((_Ed + _Dd ) >> 0);
|
||||
ip[4] = (int16)((_Ed - _Dd ) >> 0);
|
||||
|
||||
ip[5] = (int16)((_Fd + _Bdd ) >> 0);
|
||||
ip[6] = (int16)((_Fd - _Bdd ) >> 0);
|
||||
}
|
||||
|
||||
ip += 8; /* next row */
|
||||
}
|
||||
|
||||
ip = IntermediateData;
|
||||
|
||||
for ( loop=0; loop<8; loop++ )
|
||||
{
|
||||
// Check for non-zero values (bitwise or faster than ||)
|
||||
if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] )
|
||||
{
|
||||
t1 = (int32)(xC1S7 * ip[1*8]);
|
||||
t1 >>= 16;
|
||||
_A = t1 ;
|
||||
|
||||
t1 = (int32)(xC7S1 * ip[1*8]);
|
||||
t1 >>= 16;
|
||||
_B = t1 ;
|
||||
|
||||
t1 = (int32)(xC3S5 * ip[3*8]);
|
||||
t1 >>= 16;
|
||||
_C = t1 ;
|
||||
|
||||
t2 = (int32)(xC5S3 * ip[3*8]);
|
||||
t2 >>= 16;
|
||||
_D = - t2;
|
||||
|
||||
t1 = (int32)(xC4S4 * (_A - _C));
|
||||
t1 >>= 16;
|
||||
_Ad = t1;
|
||||
|
||||
t1 = (int32)(xC4S4 * (_B - _D));
|
||||
t1 >>= 16;
|
||||
_Bd = t1;
|
||||
|
||||
_Cd = _A + _C;
|
||||
_Dd = _B + _D;
|
||||
|
||||
t1 = (int32)(xC4S4 * ip[0*8]);
|
||||
t1 >>= 16;
|
||||
_E = t1;
|
||||
_F = t1;
|
||||
|
||||
t1 = (int32)(xC2S6 * ip[2*8]);
|
||||
t1 >>= 16;
|
||||
_G = t1;
|
||||
|
||||
t1 = (int32)(xC6S2 * ip[2*8]);
|
||||
t1 >>= 16;
|
||||
_H = t1;
|
||||
|
||||
_Ed = _E - _G;
|
||||
_Gd = _E + _G;
|
||||
|
||||
_Add = _F + _Ad;
|
||||
_Bdd = _Bd - _H;
|
||||
|
||||
_Fd = _F - _Ad;
|
||||
_Hd = _Bd + _H;
|
||||
|
||||
_Gd += IdctAdjustBeforeShift;
|
||||
_Add += IdctAdjustBeforeShift;
|
||||
_Ed += IdctAdjustBeforeShift;
|
||||
_Fd += IdctAdjustBeforeShift;
|
||||
|
||||
// Final sequence of operations over-write original inputs.
|
||||
op[0*8] = (int16)((_Gd + _Cd ) >> 4);
|
||||
op[7*8] = (int16)((_Gd - _Cd ) >> 4);
|
||||
|
||||
op[1*8] = (int16)((_Add + _Hd ) >> 4);
|
||||
op[2*8] = (int16)((_Add - _Hd ) >> 4);
|
||||
|
||||
op[3*8] = (int16)((_Ed + _Dd ) >> 4);
|
||||
op[4*8] = (int16)((_Ed - _Dd ) >> 4);
|
||||
|
||||
op[5*8] = (int16)((_Fd + _Bdd ) >> 4);
|
||||
op[6*8] = (int16)((_Fd - _Bdd ) >> 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
op[0*8] = 0;
|
||||
op[7*8] = 0;
|
||||
op[1*8] = 0;
|
||||
op[2*8] = 0;
|
||||
op[3*8] = 0;
|
||||
op[4*8] = 0;
|
||||
op[5*8] = 0;
|
||||
op[6*8] = 0;
|
||||
}
|
||||
|
||||
ip++; // next column
|
||||
op++;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : IDct1
|
||||
*
|
||||
* INPUTS : int16 *InputData : Pointer to 8x8 quantized DCT coefficients.
|
||||
* int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
|
||||
*
|
||||
* OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Inverse DCT's input 8x8 block with only one non-zero
|
||||
* coeff in the DC position:
|
||||
*
|
||||
* x 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
*
|
||||
* SPECIAL NOTES : Output data is in raster, not zig-zag, order.
|
||||
*
|
||||
****************************************************************************/
|
||||
void IDct1 ( int16 *InputData, int16 *QuantMatrix, INT16 *OutputData )
|
||||
{
|
||||
INT32 loop;
|
||||
INT16 OutD;
|
||||
|
||||
OutD = (INT16)((INT32)(InputData[0]*QuantMatrix[0]+15)>>5);
|
||||
|
||||
for ( loop=0; loop<64; loop++ )
|
||||
OutputData[loop] = OutD;
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : IDct4
|
||||
*
|
||||
* INPUTS : int16 *InputData : Pointer to 8x8 DCT coefficients.
|
||||
*
|
||||
* OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Inverse DCT's input 8x8 block with at most four non-zero
|
||||
* coeffs in the following positions:
|
||||
*
|
||||
* x x 0 0 0 0 0 0
|
||||
* x x 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
* 0 0 0 0 0 0 0 0
|
||||
*
|
||||
* SPECIAL NOTES : CURRENTLY NOT USED IN CODEBASE.
|
||||
*
|
||||
****************************************************************************/
|
||||
void IDct4 ( int16 *InputData, int16 *OutputData )
|
||||
{
|
||||
int32 t1;
|
||||
int loop;
|
||||
int32 _Add, _Fd;
|
||||
int32 _A, _B, _Ad, _Bd, _Cd, _Dd, _E;
|
||||
|
||||
int16 *ip = InputData;
|
||||
int16 *op = OutputData;
|
||||
|
||||
// Unzigzag the coefficents
|
||||
ip[8] = ip[2];
|
||||
ip[9] = ip[4];
|
||||
ip[2] = 0;
|
||||
ip[5] = 0;
|
||||
|
||||
// Inverse DCT on the rows now
|
||||
for ( loop = 0; loop < 2; loop++)
|
||||
{
|
||||
// Check for non-zero values
|
||||
if ( ip[0] | ip[1] )
|
||||
{
|
||||
t1 = (int32)(xC1S7 * ip[1]);
|
||||
t1 >>= 16;
|
||||
_A = t1;
|
||||
|
||||
t1 = (int32)(xC7S1 * ip[1]);
|
||||
t1 >>= 16;
|
||||
_B = t1 ;
|
||||
|
||||
t1 = (int32)(xC4S4 * _A );
|
||||
t1 >>= 16;
|
||||
_Ad = t1;
|
||||
|
||||
t1 = (int32)(xC4S4 * _B );
|
||||
t1 >>= 16;
|
||||
_Bd = t1;
|
||||
|
||||
_Cd = _A ;
|
||||
_Dd = _B ;
|
||||
|
||||
t1 = (int32)(xC4S4 * ip[0] );
|
||||
t1 >>= 16;
|
||||
_E = t1;
|
||||
|
||||
_Add = _E + _Ad;
|
||||
|
||||
_Fd = _E - _Ad;
|
||||
|
||||
// Final sequence of operations over-write original inputs.
|
||||
ip[0] = (int16)((_E + _Cd ) >> 0);
|
||||
ip[7] = (int16)((_E - _Cd ) >> 0);
|
||||
|
||||
ip[1] = (int16)((_Add + _Bd ) >> 0);
|
||||
ip[2] = (int16)((_Add - _Bd ) >> 0);
|
||||
|
||||
ip[3] = (int16)((_E + _Dd ) >> 0);
|
||||
ip[4] = (int16)((_E - _Dd ) >> 0);
|
||||
|
||||
ip[5] = (int16)((_Fd + _Bd ) >> 0);
|
||||
ip[6] = (int16)((_Fd - _Bd ) >> 0);
|
||||
}
|
||||
|
||||
ip += 8; /* next row */
|
||||
}
|
||||
|
||||
ip = InputData;
|
||||
|
||||
for ( loop=0; loop<8; loop++ )
|
||||
{
|
||||
// Check for non-zero values (bitwise or faster than ||)
|
||||
if ( ip[0 * 8] | ip[1 * 8] )
|
||||
{
|
||||
|
||||
t1 = (int32)(xC1S7 * ip[1*8]);
|
||||
t1 >>= 16;
|
||||
_A = t1 ;
|
||||
|
||||
t1 = (int32)(xC7S1 * ip[1*8]);
|
||||
t1 >>= 16;
|
||||
_B = t1 ;
|
||||
|
||||
t1 = (int32)(xC4S4 * _A );
|
||||
t1 >>= 16;
|
||||
_Ad = t1;
|
||||
|
||||
t1 = (int32)(xC4S4 * _B );
|
||||
t1 >>= 16;
|
||||
_Bd = t1;
|
||||
|
||||
_Cd = _A ;
|
||||
_Dd = _B ;
|
||||
|
||||
t1 = (int32)(xC4S4 * ip[0*8]);
|
||||
t1 >>= 16;
|
||||
_E = t1;
|
||||
|
||||
_Add = _E + _Ad;
|
||||
|
||||
_Fd = _E - _Ad;
|
||||
|
||||
_Add += IdctAdjustBeforeShift;
|
||||
_E += IdctAdjustBeforeShift;
|
||||
_Fd += IdctAdjustBeforeShift;
|
||||
|
||||
// Final sequence of operations over-write original inputs.
|
||||
op[0*8] = (int16)((_E + _Cd ) >> 4);
|
||||
op[7*8] = (int16)((_E - _Cd ) >> 4);
|
||||
|
||||
op[1*8] = (int16)((_Add + _Bd ) >> 4);
|
||||
op[2*8] = (int16)((_Add - _Bd ) >> 4);
|
||||
|
||||
op[3*8] = (int16)((_E + _Dd ) >> 4);
|
||||
op[4*8] = (int16)((_E - _Dd ) >> 4);
|
||||
|
||||
op[5*8] = (int16)((_Fd + _Bd ) >> 4);
|
||||
op[6*8] = (int16)((_Fd - _Bd ) >> 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
op[0*8] = 0;
|
||||
op[7*8] = 0;
|
||||
op[1*8] = 0;
|
||||
op[2*8] = 0;
|
||||
op[3*8] = 0;
|
||||
op[4*8] = 0;
|
||||
op[5*8] = 0;
|
||||
op[6*8] = 0;
|
||||
}
|
||||
|
||||
ip++; // next column
|
||||
op++;
|
||||
}
|
||||
}
|
||||
#endif
|
243
Src/libvpShared/corelibs/cdxv/vputil/generic/reconstruct.c
Normal file
243
Src/libvpShared/corelibs/cdxv/vputil/generic/reconstruct.c
Normal file
|
@ -0,0 +1,243 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Module Title : Reconstruct.c
|
||||
*
|
||||
* Description : Block reconstruction functions.
|
||||
*
|
||||
****************************************************************************/
|
||||
#define STRICT // Strict type checking
|
||||
|
||||
/****************************************************************************
|
||||
* Header Files
|
||||
****************************************************************************/
|
||||
#include "reconstruct.h"
|
||||
#include "codec_common.h"
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : SatUnsigned8
|
||||
*
|
||||
* INPUTS : INT16 *DataBlock : Pointer to 8x8 input block.
|
||||
* UINT32 ResultLineStep : Stride of output block.
|
||||
* UINT32 DataLineStep : Stride of input block.
|
||||
*
|
||||
* OUTPUTS : UINT8 *ResultPtr : Pointer to 8x8 output block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Saturates the input data to 8 bits unsigned and stores
|
||||
* in the output buffer.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void SatUnsigned8 ( UINT8 *ResultPtr, INT16 *DataBlock, UINT32 ResultLineStep, UINT32 DataLineStep )
|
||||
{
|
||||
INT32 i;
|
||||
|
||||
// Partly expanded loop
|
||||
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
|
||||
{
|
||||
ResultPtr[0] = (char) LIMIT(DataBlock[0]);
|
||||
ResultPtr[1] = (char) LIMIT(DataBlock[1]);
|
||||
ResultPtr[2] = (char) LIMIT(DataBlock[2]);
|
||||
ResultPtr[3] = (char) LIMIT(DataBlock[3]);
|
||||
ResultPtr[4] = (char) LIMIT(DataBlock[4]);
|
||||
ResultPtr[5] = (char) LIMIT(DataBlock[5]);
|
||||
ResultPtr[6] = (char) LIMIT(DataBlock[6]);
|
||||
ResultPtr[7] = (char) LIMIT(DataBlock[7]);
|
||||
|
||||
DataBlock += DataLineStep;
|
||||
ResultPtr += ResultLineStep;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : ScalarReconIntra
|
||||
*
|
||||
* INPUTS : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
|
||||
* UINT16 *ChangePtr : Pointer to 8x8 intra prediction block.
|
||||
* UINT32 LineStep : Stride of reconstruction block.
|
||||
*
|
||||
* OUTPUTS : UINT8 *ReconPtr : Pointer to 8x8 block to hold reconstructed block.
|
||||
*
|
||||
* RETURNS : None
|
||||
*
|
||||
* FUNCTION : Reconstructs an intra block.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void ScalarReconIntra ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT16 *ChangePtr, UINT32 LineStep )
|
||||
{
|
||||
UINT32 i;
|
||||
INT16 *TmpDataPtr = TmpDataBuffer;
|
||||
|
||||
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
|
||||
{
|
||||
TmpDataPtr[0] = (INT16) ( ChangePtr[0] + 128 );
|
||||
TmpDataPtr[1] = (INT16) ( ChangePtr[1] + 128 );
|
||||
TmpDataPtr[2] = (INT16) ( ChangePtr[2] + 128 );
|
||||
TmpDataPtr[3] = (INT16) ( ChangePtr[3] + 128 );
|
||||
TmpDataPtr[4] = (INT16) ( ChangePtr[4] + 128 );
|
||||
TmpDataPtr[5] = (INT16) ( ChangePtr[5] + 128 );
|
||||
TmpDataPtr[6] = (INT16) ( ChangePtr[6] + 128 );
|
||||
TmpDataPtr[7] = (INT16) ( ChangePtr[7] + 128 );
|
||||
|
||||
TmpDataPtr += BLOCK_HEIGHT_WIDTH;
|
||||
ChangePtr += BLOCK_HEIGHT_WIDTH;
|
||||
}
|
||||
|
||||
// Saturate the output to unsigned 8 bit values in recon buffer
|
||||
SatUnsigned8 ( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : ScalarReconInter
|
||||
*
|
||||
* INPUTS : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
|
||||
* UINT8 *RefPtr : Pointer to 8x8 reference block.
|
||||
* INT16 *ChangePtr : Pointer to 8x8 inter prediction error block.
|
||||
* UINT32 LineStep : Stride of reference and output blocks.
|
||||
*
|
||||
* OUTPUTS : UINT8 *ReconPtr : Pointer to 8x8 block to hold reconstructed block.
|
||||
*
|
||||
* RETURNS : None
|
||||
*
|
||||
* FUNCTION : Reconstructs an inter-coded block by adding a prediction
|
||||
* error to a reference block in the previous frame
|
||||
* reconstruction buffer.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void ScalarReconInter ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT8 *RefPtr, INT16 *ChangePtr, UINT32 LineStep )
|
||||
{
|
||||
UINT32 i;
|
||||
INT16 *TmpDataPtr = TmpDataBuffer;
|
||||
|
||||
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
|
||||
{
|
||||
// Form each row
|
||||
TmpDataPtr[0] = (INT16)(RefPtr[0] + ChangePtr[0]);
|
||||
TmpDataPtr[1] = (INT16)(RefPtr[1] + ChangePtr[1]);
|
||||
TmpDataPtr[2] = (INT16)(RefPtr[2] + ChangePtr[2]);
|
||||
TmpDataPtr[3] = (INT16)(RefPtr[3] + ChangePtr[3]);
|
||||
TmpDataPtr[4] = (INT16)(RefPtr[4] + ChangePtr[4]);
|
||||
TmpDataPtr[5] = (INT16)(RefPtr[5] + ChangePtr[5]);
|
||||
TmpDataPtr[6] = (INT16)(RefPtr[6] + ChangePtr[6]);
|
||||
TmpDataPtr[7] = (INT16)(RefPtr[7] + ChangePtr[7]);
|
||||
|
||||
// Next row of Block
|
||||
ChangePtr += BLOCK_HEIGHT_WIDTH;
|
||||
TmpDataPtr += BLOCK_HEIGHT_WIDTH;
|
||||
RefPtr += LineStep;
|
||||
}
|
||||
|
||||
// Saturate the output to unsigned 8 bit values in recon buffer
|
||||
SatUnsigned8 ( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : ScalarReconInterHalfPixel2
|
||||
*
|
||||
* INPUTS : INT16 *TmpDataBuffer : Pointer to 8x8 temporary buffer for internal use.
|
||||
* UINT8 *RefPtr1 : Pointer to first 8x8 reference block.
|
||||
* UINT8 *RefPtr2 : Pointer to second 8x8 reference block.
|
||||
* INT16 *ChangePtr : Pointer to 8x8 inter prediction error block.
|
||||
* UINT32 LineStep : Stride of reference blocks.
|
||||
*
|
||||
* OUTPUTS : UINT8 *ReconPtr : Pointer to 8x8 block to hold reconstructed block.
|
||||
*
|
||||
* RETURNS : None
|
||||
*
|
||||
* FUNCTION : Reconstructs an inter-coded block by adding a prediction
|
||||
* error to a reference block computed by averaging the two
|
||||
* specified reference blocks. The two reference blocks are
|
||||
* those that bracket the 1/2-pixel accuracy motion vector.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void ScalarReconInterHalfPixel2
|
||||
(
|
||||
INT16 *TmpDataBuffer,
|
||||
UINT8 *ReconPtr,
|
||||
UINT8 *RefPtr1,
|
||||
UINT8 *RefPtr2,
|
||||
INT16 *ChangePtr,
|
||||
UINT32 LineStep
|
||||
)
|
||||
{
|
||||
UINT32 i;
|
||||
INT16 *TmpDataPtr = TmpDataBuffer;
|
||||
|
||||
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
|
||||
{
|
||||
// Form each row
|
||||
TmpDataPtr[0] = (INT16)( (((INT32)RefPtr1[0] + (INT32)RefPtr2[0]) >> 1) + ChangePtr[0] );
|
||||
TmpDataPtr[1] = (INT16)( (((INT32)RefPtr1[1] + (INT32)RefPtr2[1]) >> 1) + ChangePtr[1] );
|
||||
TmpDataPtr[2] = (INT16)( (((INT32)RefPtr1[2] + (INT32)RefPtr2[2]) >> 1) + ChangePtr[2] );
|
||||
TmpDataPtr[3] = (INT16)( (((INT32)RefPtr1[3] + (INT32)RefPtr2[3]) >> 1) + ChangePtr[3] );
|
||||
TmpDataPtr[4] = (INT16)( (((INT32)RefPtr1[4] + (INT32)RefPtr2[4]) >> 1) + ChangePtr[4] );
|
||||
TmpDataPtr[5] = (INT16)( (((INT32)RefPtr1[5] + (INT32)RefPtr2[5]) >> 1) + ChangePtr[5] );
|
||||
TmpDataPtr[6] = (INT16)( (((INT32)RefPtr1[6] + (INT32)RefPtr2[6]) >> 1) + ChangePtr[6] );
|
||||
TmpDataPtr[7] = (INT16)( (((INT32)RefPtr1[7] + (INT32)RefPtr2[7]) >> 1) + ChangePtr[7] );
|
||||
|
||||
// Next row of Block
|
||||
ChangePtr += BLOCK_HEIGHT_WIDTH;
|
||||
TmpDataPtr += BLOCK_HEIGHT_WIDTH;
|
||||
RefPtr1 += LineStep;
|
||||
RefPtr2 += LineStep;
|
||||
}
|
||||
|
||||
// Saturate the output to unsigned 8 bit values in recon buffer
|
||||
SatUnsigned8( ReconPtr, TmpDataBuffer, LineStep, BLOCK_HEIGHT_WIDTH );
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : ReconBlock_C
|
||||
*
|
||||
* INPUTS : INT16 *SrcBlock : Pointer to 8x8 prediction error.
|
||||
* INT16 *ReconRefPtr : Pointer to 8x8 block prediction.
|
||||
* UINT32 LineStep : Stride of output block.
|
||||
*
|
||||
* OUTPUTS : UINT8 *DestBlock : Pointer to 8x8 reconstructed block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Reconstrut a block by adding the prediction error
|
||||
* block to the source block and clipping values.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void ReconBlock_C ( INT16 *SrcBlock, INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep )
|
||||
{
|
||||
UINT32 i;
|
||||
INT16 *SrcBlockPtr = SrcBlock;
|
||||
|
||||
// For each block row
|
||||
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
|
||||
{
|
||||
SrcBlock[0] = (INT16)(SrcBlock[0] + ReconRefPtr[0]);
|
||||
SrcBlock[1] = (INT16)(SrcBlock[1] + ReconRefPtr[1]);
|
||||
SrcBlock[2] = (INT16)(SrcBlock[2] + ReconRefPtr[2]);
|
||||
SrcBlock[3] = (INT16)(SrcBlock[3] + ReconRefPtr[3]);
|
||||
SrcBlock[4] = (INT16)(SrcBlock[4] + ReconRefPtr[4]);
|
||||
SrcBlock[5] = (INT16)(SrcBlock[5] + ReconRefPtr[5]);
|
||||
SrcBlock[6] = (INT16)(SrcBlock[6] + ReconRefPtr[6]);
|
||||
SrcBlock[7] = (INT16)(SrcBlock[7] + ReconRefPtr[7]);
|
||||
|
||||
// Next row...
|
||||
SrcBlock += BLOCK_HEIGHT_WIDTH;
|
||||
ReconRefPtr += BLOCK_HEIGHT_WIDTH;
|
||||
}
|
||||
|
||||
// Saturate the output to unsigned 8 bit values in recon buffer
|
||||
SatUnsigned8( DestBlock, SrcBlockPtr, LineStep, BLOCK_HEIGHT_WIDTH );
|
||||
}
|
|
@ -0,0 +1,100 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Module Title : SystemDependant.c
|
||||
*
|
||||
* Description : Miscellaneous system dependant functions.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/****************************************************************************
|
||||
* Header Files
|
||||
****************************************************************************/
|
||||
#include "codec_common.h"
|
||||
#include "vputil_if.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Exports
|
||||
****************************************************************************/
|
||||
// Scalar (no mmx) reconstruction functions
|
||||
extern void ClearSysState_C ( void );
|
||||
extern void IDctSlow ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
|
||||
extern void IDct10 ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
|
||||
extern void IDct1 ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
|
||||
extern void ScalarReconIntra ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT16 *ChangePtr, UINT32 LineStep );
|
||||
extern void ScalarReconInter ( INT16 *TmpDataBuffer, UINT8 *ReconPtr, UINT8 *RefPtr, INT16 *ChangePtr, UINT32 LineStep );
|
||||
extern void ScalarReconInterHalfPixel2 ( INT16 *TmpDataBuffer, UINT8 *ReconPtr,UINT8 *RefPtr1, UINT8 *RefPtr2, INT16 *ChangePtr, UINT32 LineStep );
|
||||
extern void ReconBlock_C(INT16 *SrcBlock,INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep );
|
||||
extern void SubtractBlock_C ( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep );
|
||||
extern void UnpackBlock_C ( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine );
|
||||
extern void AverageBlock_C ( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine );
|
||||
extern void CopyBlock_C ( unsigned char *src, unsigned char *dest, unsigned int srcstride );
|
||||
extern void Copy12x12_C ( const unsigned char *src, unsigned char *dest, unsigned int srcstride, unsigned int deststride );
|
||||
extern void fdct_short_C ( INT16 *InputData, INT16 *OutputData );
|
||||
extern void FilterBlockBil_8_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
|
||||
extern void FilterBlock_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
|
||||
extern void GetProcessorFlags ( INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled );
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : fillidctconstants
|
||||
*
|
||||
* INPUTS : None
|
||||
*
|
||||
* OUTPUTS : None
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : STUB FUNCTION.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void fillidctconstants ( void )
|
||||
{
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : MachineSpecificConfig
|
||||
*
|
||||
* INPUTS : None
|
||||
*
|
||||
* OUTPUTS : None
|
||||
*
|
||||
* RETURNS : None
|
||||
*
|
||||
* FUNCTION : Checks for machine specifc features such as MMX support
|
||||
* sets approipriate flags and function pointers.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void UtilMachineSpecificConfig ( void )
|
||||
{
|
||||
int i;
|
||||
for(i=0;i<=64;i++)
|
||||
{
|
||||
if(i<=1)idctc[i]=IDct1;
|
||||
else if(i<=10)idctc[i]=IDct10;
|
||||
else idctc[i]=IDctSlow;
|
||||
}
|
||||
fdct_short=fdct_short_C ;
|
||||
for(i=0;i<=64;i++)
|
||||
{
|
||||
if(i<=1)idct[i]=IDct1;
|
||||
else if(i<=10)idct[i]=IDct10;
|
||||
else idct[i]=IDctSlow;
|
||||
}
|
||||
ClearSysState = ClearSysState_C;
|
||||
ReconIntra = ScalarReconIntra;
|
||||
ReconInter = ScalarReconInter;
|
||||
ReconInterHalfPixel2 = ScalarReconInterHalfPixel2;
|
||||
AverageBlock = AverageBlock_C;
|
||||
UnpackBlock = UnpackBlock_C;
|
||||
ReconBlock = ReconBlock_C;
|
||||
SubtractBlock = SubtractBlock_C;
|
||||
CopyBlock = CopyBlock_C;
|
||||
Copy12x12 = Copy12x12_C;
|
||||
FilterBlockBil_8 = FilterBlockBil_8_C;
|
||||
FilterBlock=FilterBlock_C;
|
||||
}
|
1285
Src/libvpShared/corelibs/cdxv/vputil/generic/vputil.c
Normal file
1285
Src/libvpShared/corelibs/cdxv/vputil/generic/vputil.c
Normal file
File diff suppressed because it is too large
Load diff
74
Src/libvpShared/corelibs/cdxv/vputil/include/dct.h
Normal file
74
Src/libvpShared/corelibs/cdxv/vputil/include/dct.h
Normal file
|
@ -0,0 +1,74 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Module Title : dct.h
|
||||
*
|
||||
* Description : DCT header file.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef __INC_DCT_H
|
||||
#define __INC_DCT_H
|
||||
|
||||
/****************************************************************************
|
||||
* Header files
|
||||
****************************************************************************/
|
||||
#include "type_aliases.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Macros
|
||||
****************************************************************************/
|
||||
#define COEFF_MAX 32768 // Max magnitude of DCT coefficient
|
||||
// Extra bits of precision added to the fdct that have to be stripped off during the quantize
|
||||
#define FDCT_PRECISION_BITS 1
|
||||
#define FDCT_PRECISION_NEG_ADJ ((INT16) (1<<FDCT_PRECISION_BITS)-1)
|
||||
|
||||
|
||||
|
||||
|
||||
#if 0 // AWG not required any more!!!
|
||||
/* Cos and Sin constant multipliers used during DCT and IDCT */
|
||||
extern const double C1S7;
|
||||
extern const double C2S6;
|
||||
extern const double C3S5;
|
||||
extern const double C4S4;
|
||||
extern const double C5S3;
|
||||
extern const double C6S2;
|
||||
extern const double C7S1;
|
||||
|
||||
// DCT lookup tables and pointers
|
||||
extern INT32 * C4S4_TablePtr;
|
||||
extern INT32 C4S4_Table[(COEFF_MAX * 4) + 1];
|
||||
|
||||
extern INT32 * C6S2_TablePtr;
|
||||
extern INT32 C6S2_Table[(COEFF_MAX * 2) + 1];
|
||||
|
||||
extern INT32 * C2S6_TablePtr;
|
||||
extern INT32 C2S6_Table[(COEFF_MAX * 2) + 1];
|
||||
|
||||
extern INT32 * C1S7_TablePtr;
|
||||
extern INT32 C1S7_Table[(COEFF_MAX * 2) + 1];
|
||||
|
||||
extern INT32 * C7S1_TablePtr;
|
||||
extern INT32 C7S1_Table[(COEFF_MAX * 2) + 1];
|
||||
|
||||
extern INT32 * C3S5_TablePtr;
|
||||
extern INT32 C3S5_Table[(COEFF_MAX * 2) + 1];
|
||||
|
||||
extern INT32 * C5S3_TablePtr;
|
||||
extern INT32 C5S3_Table[(COEFF_MAX * 2) + 1];
|
||||
#endif
|
||||
|
||||
/****************************************************************************
|
||||
* Exports
|
||||
****************************************************************************/
|
||||
#ifdef COMPDLL
|
||||
// Forward Transform
|
||||
extern void fdct_slow ( INT32 *InputData, double *OutputData );
|
||||
#endif
|
||||
|
||||
// Reverse Transform
|
||||
extern void IDctSlow( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
|
||||
extern void IDct10 ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
|
||||
extern void IDct1 ( INT16 *InputData, INT16 *QuantMatrix, INT16 *OutputData );
|
||||
|
||||
#endif
|
11
Src/libvpShared/corelibs/cdxv/vputil/include/mac_specs.h
Normal file
11
Src/libvpShared/corelibs/cdxv/vputil/include/mac_specs.h
Normal file
|
@ -0,0 +1,11 @@
|
|||
#if !defined(_mac_specs_h)
|
||||
#define _mac_specs_h
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
int vputil_hasAltivec(void);
|
||||
int vputil_cpuMhz(void);
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif
|
60
Src/libvpShared/corelibs/cdxv/vputil/include/reconstruct.h
Normal file
60
Src/libvpShared/corelibs/cdxv/vputil/include/reconstruct.h
Normal file
|
@ -0,0 +1,60 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Module Title : Reconstruct.h
|
||||
*
|
||||
* Description : Block Reconstruction module header
|
||||
*
|
||||
* AUTHOR : Paul Wilkins
|
||||
*
|
||||
*****************************************************************************
|
||||
* Revision History
|
||||
*
|
||||
* 1.00 PGW 14/10/99 Created
|
||||
*
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
#define STRICT /* Strict type checking. */
|
||||
|
||||
#ifndef RECONSTRUCT_H
|
||||
#define RECONSTRUCT_H
|
||||
|
||||
#include "type_aliases.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Constants
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Types
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Data structures
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Functions
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
// Scalar (no mmx) reconstruction functions
|
||||
extern void ScalarReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void ScalarReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void ScalarReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr,UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
|
||||
|
||||
// MMx versions
|
||||
extern void MMXReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void MmxReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void MmxReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
|
||||
|
||||
// WMT versions
|
||||
extern void WmtReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void WmtReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void WmtReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
|
||||
|
||||
|
||||
#endif
|
388
Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj
Normal file
388
Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj
Normal file
|
@ -0,0 +1,388 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<VCProjectVersion>17.0</VCProjectVersion>
|
||||
<ProjectGuid>{F93716CE-8F89-4334-BE64-43705EF3FB70}</ProjectGuid>
|
||||
<RootNamespace>vputil</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0.19041.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<UseOfMfc>false</UseOfMfc>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>17.0.32505.173</_ProjectFileVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
|
||||
<IntDir>..\..\..\obj\vputil\$(PlatformShortName)_$(Configuration)\</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<LibraryPath>$(LibraryPath)</LibraryPath>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<LibraryPath>$(LibraryPath)</LibraryPath>
|
||||
<IntDir>..\..\..\obj\vputil\$(PlatformShortName)_$(Configuration)\</IntDir>
|
||||
<OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
|
||||
<IntDir>..\..\..\obj\vputil\$(PlatformShortName)_$(Configuration)\</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<LibraryPath>$(LibraryPath)</LibraryPath>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<LibraryPath>$(LibraryPath)</LibraryPath>
|
||||
<IntDir>..\..\..\obj\vputil\$(PlatformShortName)_$(Configuration)\</IntDir>
|
||||
<OutDir>..\..\..\lib\$(PlatformShortName)_$(Configuration)\</OutDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Label="Vcpkg">
|
||||
<VcpkgEnableManifest>false</VcpkgEnableManifest>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<VcpkgInstalledDir>
|
||||
</VcpkgInstalledDir>
|
||||
<VcpkgUseStatic>false</VcpkgUseStatic>
|
||||
<VcpkgConfiguration>Debug</VcpkgConfiguration>
|
||||
<VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<VcpkgInstalledDir>
|
||||
</VcpkgInstalledDir>
|
||||
<VcpkgUseStatic>false</VcpkgUseStatic>
|
||||
<VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<VcpkgInstalledDir>
|
||||
</VcpkgInstalledDir>
|
||||
<VcpkgUseStatic>false</VcpkgUseStatic>
|
||||
<VcpkgConfiguration>Debug</VcpkgConfiguration>
|
||||
<VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Label="Vcpkg" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<VcpkgInstalledDir>
|
||||
</VcpkgInstalledDir>
|
||||
<VcpkgUseStatic>false</VcpkgUseStatic>
|
||||
<VcpkgTriplet>x86-windows-static-md</VcpkgTriplet>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>.\include;..\include;..\..\..\..\libvp6\include;..\vp60\include;..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
<PrecompiledHeader />
|
||||
<PrecompiledHeaderOutputFile>$(IntDir)vputil.pch</PrecompiledHeaderOutputFile>
|
||||
<AssemblerListingLocation />
|
||||
<ObjectFileName>$(IntDir)</ObjectFileName>
|
||||
<ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
<CompileAs>Default</CompileAs>
|
||||
<DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0409</Culture>
|
||||
</ResourceCompile>
|
||||
<Lib>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Lib>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>.\include;..\include;..\..\..\..\libvp6\include;..\vp60\include;..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<PrecompiledHeaderOutputFile>$(IntDir)vputil.pch</PrecompiledHeaderOutputFile>
|
||||
<AssemblerListingLocation>
|
||||
</AssemblerListingLocation>
|
||||
<ObjectFileName>$(IntDir)</ObjectFileName>
|
||||
<ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
<CompileAs>Default</CompileAs>
|
||||
<DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0409</Culture>
|
||||
</ResourceCompile>
|
||||
<Lib>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Lib>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<OmitFramePointers>true</OmitFramePointers>
|
||||
<AdditionalIncludeDirectories>.\include;..\include;..\..\..\..\libvp6\include;..\vp60\include;..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>$(IntDir)vputil.pch</PrecompiledHeaderOutputFile>
|
||||
<AssemblerListingLocation />
|
||||
<ObjectFileName>$(IntDir)</ObjectFileName>
|
||||
<ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>None</DebugInformationFormat>
|
||||
<CompileAs>Default</CompileAs>
|
||||
<DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0409</Culture>
|
||||
</ResourceCompile>
|
||||
<Lib>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Lib>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<OmitFramePointers>true</OmitFramePointers>
|
||||
<AdditionalIncludeDirectories>.\include;..\include;..\..\..\..\libvp6\include;..\vp60\include;..\..\include;..\..\..\..\include;..\..\..\..\include\vp60;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeaderOutputFile>$(IntDir)vputil.pch</PrecompiledHeaderOutputFile>
|
||||
<AssemblerListingLocation>
|
||||
</AssemblerListingLocation>
|
||||
<ObjectFileName>$(IntDir)</ObjectFileName>
|
||||
<ProgramDataBaseFileName>$(IntDir)$(TargetName).pdb</ProgramDataBaseFileName>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<DebugInformationFormat>None</DebugInformationFormat>
|
||||
<CompileAs>Default</CompileAs>
|
||||
<DisableSpecificWarnings>4799;%(DisableSpecificWarnings)</DisableSpecificWarnings>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Culture>0x0409</Culture>
|
||||
</ResourceCompile>
|
||||
<Lib>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Lib>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="generic\fdct.c">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="generic\idctpart.c">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="generic\reconstruct.c">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="generic\uoptsystemdependant.c">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="generic\vputil.c">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\fdctmmx.c">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\fdctwmt.c">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\filtmmx.c">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\filtwmt.c">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\mmxidct.c">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\mmxrecon.c">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\uoptsystemdependant.c">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\vputilasm.c">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\wmtidct.c">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\wmtrecon.c">
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Disabled</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Disabled</Optimization>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">EnableFastChecks</BasicRuntimeChecks>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">MaxSpeed</Optimization>
|
||||
<Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">MaxSpeed</Optimization>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
58
Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj.filters
Normal file
58
Src/libvpShared/corelibs/cdxv/vputil/vputil.vcxproj.filters
Normal file
|
@ -0,0 +1,58 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="generic">
|
||||
<UniqueIdentifier>{f7966dc8-1d55-46a4-b0e6-8584774d721d}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="win32">
|
||||
<UniqueIdentifier>{ad0ce32e-d033-416c-813e-7a7f913ac3fa}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="generic\fdct.c">
|
||||
<Filter>generic</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="generic\idctpart.c">
|
||||
<Filter>generic</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="generic\reconstruct.c">
|
||||
<Filter>generic</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="generic\uoptsystemdependant.c">
|
||||
<Filter>generic</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="generic\vputil.c">
|
||||
<Filter>generic</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\fdctmmx.c">
|
||||
<Filter>win32</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\fdctwmt.c">
|
||||
<Filter>win32</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\filtmmx.c">
|
||||
<Filter>win32</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\filtwmt.c">
|
||||
<Filter>win32</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\mmxidct.c">
|
||||
<Filter>win32</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\mmxrecon.c">
|
||||
<Filter>win32</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\uoptsystemdependant.c">
|
||||
<Filter>win32</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\vputilasm.c">
|
||||
<Filter>win32</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\wmtidct.c">
|
||||
<Filter>win32</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="win32\wmtrecon.c">
|
||||
<Filter>win32</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -0,0 +1,213 @@
|
|||
// !$*UTF8*$!
|
||||
{
|
||||
archiveVersion = 1;
|
||||
classes = {
|
||||
};
|
||||
objectVersion = 42;
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
0CAF34950BB78E9F000FB06C /* vputil.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34940BB78E9F000FB06C /* vputil.c */; };
|
||||
0CAF34AC0BB78EDF000FB06C /* idctpart.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34A80BB78EDF000FB06C /* idctpart.c */; };
|
||||
0CAF34AD0BB78EDF000FB06C /* fdct.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34A90BB78EDF000FB06C /* fdct.c */; };
|
||||
0CAF34AE0BB78EDF000FB06C /* uoptsystemdependant.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34AA0BB78EDF000FB06C /* uoptsystemdependant.c */; };
|
||||
0CAF34AF0BB78EDF000FB06C /* reconstruct.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CAF34AB0BB78EDF000FB06C /* reconstruct.c */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
0CAF34940BB78E9F000FB06C /* vputil.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = vputil.c; path = generic/vputil.c; sourceTree = "<group>"; };
|
||||
0CAF34A80BB78EDF000FB06C /* idctpart.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = idctpart.c; path = generic/idctpart.c; sourceTree = "<group>"; };
|
||||
0CAF34A90BB78EDF000FB06C /* fdct.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = fdct.c; path = generic/fdct.c; sourceTree = "<group>"; };
|
||||
0CAF34AA0BB78EDF000FB06C /* uoptsystemdependant.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = uoptsystemdependant.c; path = generic/uoptsystemdependant.c; sourceTree = "<group>"; };
|
||||
0CAF34AB0BB78EDF000FB06C /* reconstruct.c */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.c; name = reconstruct.c; path = generic/reconstruct.c; sourceTree = "<group>"; };
|
||||
D2AAC046055464E500DB518D /* libvputil.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvputil.a; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
D289987405E68DCB004EDB86 /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXFrameworksBuildPhase section */
|
||||
|
||||
/* Begin PBXGroup section */
|
||||
08FB7794FE84155DC02AAC07 /* vputil */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
08FB7795FE84155DC02AAC07 /* Source */,
|
||||
C6A0FF2B0290797F04C91782 /* Documentation */,
|
||||
1AB674ADFE9D54B511CA2CBB /* Products */,
|
||||
);
|
||||
name = vputil;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
08FB7795FE84155DC02AAC07 /* Source */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
0CAF34940BB78E9F000FB06C /* vputil.c */,
|
||||
0CAF34A80BB78EDF000FB06C /* idctpart.c */,
|
||||
0CAF34A90BB78EDF000FB06C /* fdct.c */,
|
||||
0CAF34AA0BB78EDF000FB06C /* uoptsystemdependant.c */,
|
||||
0CAF34AB0BB78EDF000FB06C /* reconstruct.c */,
|
||||
);
|
||||
name = Source;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
1AB674ADFE9D54B511CA2CBB /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
D2AAC046055464E500DB518D /* libvputil.a */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
C6A0FF2B0290797F04C91782 /* Documentation */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
);
|
||||
name = Documentation;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXHeadersBuildPhase section */
|
||||
D2AAC043055464E500DB518D /* Headers */ = {
|
||||
isa = PBXHeadersBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXHeadersBuildPhase section */
|
||||
|
||||
/* Begin PBXNativeTarget section */
|
||||
D2AAC045055464E500DB518D /* vputil */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vputil" */;
|
||||
buildPhases = (
|
||||
D2AAC043055464E500DB518D /* Headers */,
|
||||
D2AAC044055464E500DB518D /* Sources */,
|
||||
D289987405E68DCB004EDB86 /* Frameworks */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
);
|
||||
name = vputil;
|
||||
productName = vputil;
|
||||
productReference = D2AAC046055464E500DB518D /* libvputil.a */;
|
||||
productType = "com.apple.product-type.library.static";
|
||||
};
|
||||
/* End PBXNativeTarget section */
|
||||
|
||||
/* Begin PBXProject section */
|
||||
08FB7793FE84155DC02AAC07 /* Project object */ = {
|
||||
isa = PBXProject;
|
||||
buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vputil" */;
|
||||
hasScannedForEncodings = 1;
|
||||
mainGroup = 08FB7794FE84155DC02AAC07 /* vputil */;
|
||||
projectDirPath = "";
|
||||
targets = (
|
||||
D2AAC045055464E500DB518D /* vputil */,
|
||||
);
|
||||
};
|
||||
/* End PBXProject section */
|
||||
|
||||
/* Begin PBXSourcesBuildPhase section */
|
||||
D2AAC044055464E500DB518D /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
0CAF34950BB78E9F000FB06C /* vputil.c in Sources */,
|
||||
0CAF34AC0BB78EDF000FB06C /* idctpart.c in Sources */,
|
||||
0CAF34AD0BB78EDF000FB06C /* fdct.c in Sources */,
|
||||
0CAF34AE0BB78EDF000FB06C /* uoptsystemdependant.c in Sources */,
|
||||
0CAF34AF0BB78EDF000FB06C /* reconstruct.c in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXSourcesBuildPhase section */
|
||||
|
||||
/* Begin XCBuildConfiguration section */
|
||||
1DEB91EC08733DB70010E9CD /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
COPY_PHASE_STRIP = NO;
|
||||
GCC_DYNAMIC_NO_PIC = NO;
|
||||
GCC_ENABLE_FIX_AND_CONTINUE = YES;
|
||||
GCC_MODEL_TUNING = G5;
|
||||
GCC_OPTIMIZATION_LEVEL = 0;
|
||||
INSTALL_PATH = /usr/local/lib;
|
||||
PRODUCT_NAME = vputil;
|
||||
ZERO_LINK = YES;
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
1DEB91ED08733DB70010E9CD /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ARCHS = (
|
||||
ppc,
|
||||
i386,
|
||||
);
|
||||
GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
|
||||
GCC_MODEL_TUNING = G5;
|
||||
INSTALL_PATH = /usr/local/lib;
|
||||
PRODUCT_NAME = vputil;
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
1DEB91F008733DB70010E9CD /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
OBJROOT = build;
|
||||
PREBINDING = NO;
|
||||
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
|
||||
SYMROOT = ../../../lib/osx;
|
||||
USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../../include";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
1DEB91F108733DB70010E9CD /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
OBJROOT = build;
|
||||
PREBINDING = NO;
|
||||
SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
|
||||
SYMROOT = ../../../lib/osx;
|
||||
USER_HEADER_SEARCH_PATHS = "include ../include ../../include ../../../include";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
/* End XCBuildConfiguration section */
|
||||
|
||||
/* Begin XCConfigurationList section */
|
||||
1DEB91EB08733DB70010E9CD /* Build configuration list for PBXNativeTarget "vputil" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
1DEB91EC08733DB70010E9CD /* Debug */,
|
||||
1DEB91ED08733DB70010E9CD /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "vputil" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
1DEB91F008733DB70010E9CD /* Debug */,
|
||||
1DEB91F108733DB70010E9CD /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
/* End XCConfigurationList section */
|
||||
};
|
||||
rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
|
||||
}
|
1002
Src/libvpShared/corelibs/cdxv/vputil/win32/fdct_m.asm
Normal file
1002
Src/libvpShared/corelibs/cdxv/vputil/win32/fdct_m.asm
Normal file
File diff suppressed because it is too large
Load diff
1398
Src/libvpShared/corelibs/cdxv/vputil/win32/fdctmmx.c
Normal file
1398
Src/libvpShared/corelibs/cdxv/vputil/win32/fdctmmx.c
Normal file
File diff suppressed because it is too large
Load diff
810
Src/libvpShared/corelibs/cdxv/vputil/win32/fdctwmt.c
Normal file
810
Src/libvpShared/corelibs/cdxv/vputil/win32/fdctwmt.c
Normal file
|
@ -0,0 +1,810 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Module Title : Fdctwmt.c
|
||||
*
|
||||
* Description : Forward DCT optimized specifically for Intel P4
|
||||
* processor
|
||||
*
|
||||
* AUTHOR : YaoWu Xu
|
||||
*
|
||||
*****************************************************************************
|
||||
* Revision History
|
||||
*
|
||||
* 1.00 YWX 03/11/02 Configuration baseline
|
||||
*
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
* Module Constants
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
__declspec(align(16)) static unsigned short TIRY[8];
|
||||
|
||||
__declspec(align(16)) static unsigned short WmtIdctConst[8 * 8] =
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
64277,64277,64277,64277,64277,64277,64277,64277,
|
||||
60547,60547,60547,60547,60547,60547,60547,60547,
|
||||
54491,54491,54491,54491,54491,54491,54491,54491,
|
||||
46341,46341,46341,46341,46341,46341,46341,46341,
|
||||
36410,36410,36410,36410,36410,36410,36410,36410,
|
||||
25080,25080,25080,25080,25080,25080,25080,25080,
|
||||
12785,12785,12785,12785,12785,12785,12785,12785
|
||||
};
|
||||
|
||||
|
||||
/**************************************************************************************
|
||||
*
|
||||
* Macro: FDct_WMT
|
||||
*
|
||||
* Description: The Macro does 1-D IDct on 8 columns.
|
||||
*
|
||||
* Input: None
|
||||
*
|
||||
* Output: None
|
||||
*
|
||||
* Return: None
|
||||
*
|
||||
* Special Note: None
|
||||
*
|
||||
* Error: None
|
||||
*
|
||||
***************************************************************************************
|
||||
*/
|
||||
void fdct_WMT(short *InputData, short *OutputData)
|
||||
{
|
||||
|
||||
__asm
|
||||
{
|
||||
mov eax, InputData
|
||||
mov ebx, OutputData
|
||||
lea edx, WmtIdctConst
|
||||
|
||||
#define I(i) [eax + 16 * i ]
|
||||
#define O(i) [ebx + 16 * i ]
|
||||
#define C(i) [edx + 16 * i ]
|
||||
|
||||
/******************************************************/
|
||||
/* Do 8x8 Transpose */
|
||||
/******************************************************/
|
||||
|
||||
movdqa xmm4, I(4) /* xmm4=e7e6e5e4e3e2e1e0 */
|
||||
movdqa xmm0, I(5) /* xmm4=f7f6f5f4f3f2f1f0 */
|
||||
|
||||
psllw xmm4, 1
|
||||
psllw xmm0, 1
|
||||
|
||||
movdqa xmm5, xmm4 /* make a copy */
|
||||
punpcklwd xmm4, xmm0 /* xmm4=f3e3f2e2f1e1f0e0 */
|
||||
|
||||
punpckhwd xmm5, xmm0 /* xmm5=f7e7f6e6f5e5f4e4 */
|
||||
movdqa xmm6, I(6) /* xmm6=g7g6g5g4g3g2g1g0 */
|
||||
|
||||
movdqa xmm0, I(7) /* xmm0=h7h6h5h4h3h2h1h0 */
|
||||
|
||||
psllw xmm6, 1
|
||||
psllw xmm0, 1
|
||||
|
||||
movdqa xmm7, xmm6 /* make a copy */
|
||||
|
||||
punpcklwd xmm6, xmm0 /* xmm6=h3g3h3g2h1g1h0g0 */
|
||||
punpckhwd xmm7, xmm0 /* xmm7=h7g7h6g6h5g5h4g4 */
|
||||
|
||||
movdqa xmm3, xmm4 /* make a copy */
|
||||
punpckldq xmm4, xmm6 /* xmm4=h1g1f1e1h0g0f0e0 */
|
||||
|
||||
punpckhdq xmm3, xmm6 /* xmm3=h3g3g3e3h2g2f2e2 */
|
||||
movdqa I(6), xmm3 /* save h3g3g3e3h2g2f2e2 */
|
||||
/* Free xmm6 */
|
||||
movdqa xmm6, xmm5 /* make a copy */
|
||||
punpckldq xmm5, xmm7 /* xmm5=h5g5f5e5h4g4f4e4 */
|
||||
|
||||
punpckhdq xmm6, xmm7 /* xmm6=h7g7f7e7h6g6f6e6 */
|
||||
movdqa xmm0, I(0) /* xmm0=a7a6a5a4a3a2a1a0 */
|
||||
/* Free xmm7 */
|
||||
movdqa xmm1, I(1) /* xmm1=b7b6b5b4b3b2b1b0 */
|
||||
|
||||
psllw xmm0, 1
|
||||
psllw xmm1, 1
|
||||
|
||||
movdqa xmm7, xmm0 /* make a copy */
|
||||
|
||||
punpcklwd xmm0, xmm1 /* xmm0=b3a3b2a2b1a1b0a0 */
|
||||
punpckhwd xmm7, xmm1 /* xmm7=b7a7b6a6b5a5b4a4 */
|
||||
/* Free xmm1 */
|
||||
movdqa xmm2, I(2) /* xmm2=c7c6c5c4c3c2c1c0 */
|
||||
movdqa xmm3, I(3) /* xmm3=d7d6d5d4d3d2d1d0 */
|
||||
|
||||
psllw xmm2, 1
|
||||
psllw xmm3, 1
|
||||
|
||||
movdqa xmm1, xmm2 /* make a copy */
|
||||
punpcklwd xmm2, xmm3 /* xmm2=d3c3d2c2d1c1d0c0 */
|
||||
|
||||
punpckhwd xmm1, xmm3 /* xmm1=d7c7d6c6d5c5d4c4 */
|
||||
movdqa xmm3, xmm0 /* make a copy */
|
||||
|
||||
punpckldq xmm0, xmm2 /* xmm0=d1c1b1a1d0c0b0a0 */
|
||||
punpckhdq xmm3, xmm2 /* xmm3=d3c3b3a3d2c2b2a2 */
|
||||
/* Free xmm2 */
|
||||
movdqa xmm2, xmm7 /* make a copy */
|
||||
punpckldq xmm2, xmm1 /* xmm2=d5c5b5a5d4c4b4a4 */
|
||||
|
||||
punpckhdq xmm7, xmm1 /* xmm7=d7c7b7a7d6c6b6a6 */
|
||||
movdqa xmm1, xmm0 /* make a copy */
|
||||
|
||||
punpcklqdq xmm0, xmm4 /* xmm0=h0g0f0e0d0c0b0a0 */
|
||||
punpckhqdq xmm1, xmm4 /* xmm1=h1g1g1e1d1c1b1a1 */
|
||||
|
||||
movdqa I(0), xmm0 /* save I(0) */
|
||||
movdqa I(1), xmm1 /* save I(1) */
|
||||
|
||||
movdqa xmm0, I(6) /* load h3g3g3e3h2g2f2e2 */
|
||||
movdqa xmm1, xmm3 /* make a copy */
|
||||
|
||||
punpcklqdq xmm1, xmm0 /* xmm1=h2g2f2e2d2c2b2a2 */
|
||||
punpckhqdq xmm3, xmm0 /* xmm3=h3g3f3e3d3c3b3a3 */
|
||||
|
||||
movdqa xmm4, xmm2 /* make a copy */
|
||||
punpcklqdq xmm4, xmm5 /* xmm4=h4g4f4e4d4c4b4a4 */
|
||||
|
||||
punpckhqdq xmm2, xmm5 /* xmm2=h5g5f5e5d5c5b5a5 */
|
||||
movdqa I(2), xmm1 /* save I(2) */
|
||||
|
||||
movdqa I(3), xmm3 /* save I(3) */
|
||||
movdqa I(4), xmm4 /* save I(4) */
|
||||
|
||||
movdqa I(5), xmm2 /* save I(5) */
|
||||
movdqa xmm5, xmm7 /* make a copy */
|
||||
|
||||
punpcklqdq xmm5, xmm6 /* xmm5=h6g6f6e6d6c6b6a6 */
|
||||
punpckhqdq xmm7, xmm6 /* xmm7=h7g7f7e7d7c7b7a7 */
|
||||
|
||||
movdqa I(6), xmm5 /* save I(6) */
|
||||
movdqa I(7), xmm7 /* save I(7) */
|
||||
|
||||
/******************************************************/
|
||||
/* Done with transpose - Let's do the forward DCT */
|
||||
/******************************************************/
|
||||
|
||||
movdqa xmm0, I(0) /* xmm0 = ip0 */
|
||||
movdqa xmm1, I(1) /* xmm1 = ip1 */
|
||||
|
||||
movdqa xmm2, I(3) /* xmm2 = ip3 */
|
||||
movdqa xmm3, I(5) /* xmm3 = ip5 */
|
||||
|
||||
movdqa xmm4, xmm0 /* xmm4 = ip0 */
|
||||
movdqa xmm5, xmm1 /* xmm5 = ip1 */
|
||||
|
||||
movdqa xmm6, xmm2 /* xmm6 = ip3 */
|
||||
movdqa xmm7, xmm3 /* xmm7 = ip5 */
|
||||
|
||||
paddsw xmm0, I(7) /* xmm0 = ip0 + ip7 */
|
||||
paddsw xmm1, I(2) /* xmm1 = ip1 + ip2 */
|
||||
|
||||
paddsw xmm2, I(4) /* xmm2 = ip3 + ip4 */
|
||||
paddsw xmm3, I(6) /* xmm3 = ip5 + ip6 */
|
||||
|
||||
psubsw xmm4, I(7) /* xmm4 = ip0 - ip7 */
|
||||
psubsw xmm5, I(2) /* xmm5 = ip1 - ip2 */
|
||||
|
||||
psubsw xmm0, xmm2 /* xmm0 = is07 - is34 */
|
||||
paddsw xmm2, xmm2 /* xmm2 = is34 * 2 */
|
||||
|
||||
psubsw xmm6, I(4) /* xmm6 = ip3 - ip4 */
|
||||
paddsw xmm2, xmm0 /* xmm2 = is07 + is34 */
|
||||
|
||||
psubsw xmm1, xmm3 /* xmm1 = is12 - is56 */
|
||||
movdqa TIRY, xmm0 /* save is07-is34 */
|
||||
|
||||
paddsw xmm3, xmm3 /* xmm3 = is56 * 2 */
|
||||
paddsw xmm3, xmm1 /* xmm3 = is12 + is56 */
|
||||
|
||||
psubsw xmm7, I(6) /* xmm7 = ip5 -ip6 */
|
||||
psubsw xmm5, xmm7 /* xmm5 = id12 - id56 */
|
||||
|
||||
paddsw xmm7, xmm7 /* xmm7 = id56 * 2 */
|
||||
paddsw xmm7, xmm5 /* xmm7 = id12 + id56 */
|
||||
/*---------------------------------------------------------*/
|
||||
/* op0 and op4
|
||||
/*---------------------------------------------------------*/
|
||||
psubsw xmm2, xmm3 /* xmm2 = is0734 - is1256 */
|
||||
paddsw xmm3, xmm3 /* xmm3 = is1256 * 2 */
|
||||
|
||||
movdqa xmm0, xmm2 /* xmm0 = is0734 - is1256 */
|
||||
paddsw xmm3, xmm2 /* xmm3 = is0734 + is1256 */
|
||||
|
||||
pmulhw xmm0, C(4) /* xmm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
|
||||
paddw xmm0, xmm2 /* xmm0 = xC4S4 * ( is0734 - is1256 ) */
|
||||
|
||||
psrlw xmm2, 15
|
||||
paddw xmm0, xmm2 /* Truncate xmm0, now it is op[4] */
|
||||
|
||||
movdqa xmm2, xmm3 /* xmm2 = is0734 + is1256 */
|
||||
movdqa O(4), xmm0 /* op4, now xmm0,xmm2 are free */
|
||||
|
||||
movdqa xmm0, xmm3 /* xmm0 = is0734 + is1256 */
|
||||
pmulhw xmm3, C(4) /* xmm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */
|
||||
|
||||
psrlw xmm2, 15
|
||||
paddw xmm3, xmm0 /* xmm3 = xC4S4 * ( is0734 +is1256 ) */
|
||||
|
||||
paddw xmm3, xmm2 /* Truncate xmm3, now it is op[0] */
|
||||
movdqa O(0), xmm3 /* save op0 */
|
||||
/*---------------------------------------------------------*/
|
||||
/* op2 and op6
|
||||
/*---------------------------------------------------------*/
|
||||
movdqa xmm3, TIRY /* xmm3 = irot_input_y */
|
||||
pmulhw xmm3, C(2) /* xmm3 = xC2S6 * irot_input_y - irot_input_y */
|
||||
|
||||
movdqa xmm2, TIRY /* xmm2 = irot_input_y */
|
||||
movdqa xmm0, xmm2 /* xmm0 = irot_input_y */
|
||||
|
||||
psrlw xmm2, 15
|
||||
paddw xmm3, xmm0 /* xmm3 = xC2S6 * irot_input_y */
|
||||
|
||||
paddw xmm3, xmm2 /* Truncated */
|
||||
movdqa xmm0, xmm5 /* xmm0 = id12 - id56 */
|
||||
|
||||
|
||||
movdqa xmm2, xmm5 /* xmm2 = id12 - id56 */
|
||||
pmulhw xmm0, C(6) /* xmm0 = xC6S2 * irot_input_x */
|
||||
|
||||
psrlw xmm2, 15
|
||||
paddw xmm0, xmm2 /* Truncated */
|
||||
|
||||
paddsw xmm3, xmm0 /* op[2] */
|
||||
movdqa O(2), xmm3 /* save op[2] */
|
||||
|
||||
|
||||
movdqa xmm0, xmm5 /* xmm0 = id12 - id56 */
|
||||
movdqa xmm2, xmm5 /* xmm0 = id12 - id56 */
|
||||
|
||||
pmulhw xmm5, C(2) /* xmm5 = xC2S6 * irot_input_x - irot_input_x */
|
||||
psrlw xmm2, 15
|
||||
|
||||
movdqa xmm3, TIRY /* xmm3 = irot_input_y */
|
||||
paddw xmm5, xmm0 /* xmm5 = xC2S6 * irot_input_x */
|
||||
|
||||
paddw xmm5, xmm2 /* Truncated */
|
||||
movdqa xmm2, xmm3 /* xmm2 = irot_input_y */
|
||||
|
||||
pmulhw xmm3, C(6) /* mm3 = xC6S2 * irot_input_y */
|
||||
psrlw xmm2, 15
|
||||
|
||||
paddw xmm3, xmm2 /* Truncated */
|
||||
psubsw xmm3, xmm5 /* xmm3 = op[6] */
|
||||
|
||||
movdqa O(6), xmm3
|
||||
/*-----------------------------------------------------------------------*/
|
||||
/* icommon_product1, icommon_product2 */
|
||||
/*-----------------------------------------------------------------------*/
|
||||
movdqa xmm0, C(4) /* xmm0 = xC4s4 */
|
||||
movdqa xmm2, xmm1 /* xmm2 = is12 - is56 */
|
||||
|
||||
movdqa xmm3, xmm1 /* xmm3 = is12 - is56 */
|
||||
pmulhw xmm1, xmm0 /* xmm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
|
||||
|
||||
psrlw xmm2, 15
|
||||
paddw xmm1, xmm3 /* xmm1 = xC4S4 * ( is12 - is56 ) */
|
||||
|
||||
paddw xmm1, xmm2 /* Truncate xmm1, now it is icommon_product1 */
|
||||
movdqa xmm2, xmm7 /* xmm2 = id12 + id56 */
|
||||
|
||||
movdqa xmm3, xmm7 /* xmm3 = id12 + id56 */
|
||||
pmulhw xmm7, xmm0 /* xmm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
|
||||
|
||||
psrlw xmm2, 15 /* For trucation */
|
||||
paddw xmm7, xmm3 /* xmm7 = xC4S4 * ( id12 + id56 ) */
|
||||
|
||||
paddw xmm7, xmm2 /* Truncate xmm7, now it is icommon_product2 */
|
||||
/*---------------------------------------------------------*/
|
||||
pxor xmm0, xmm0 /* Clear xmm0 */
|
||||
psubsw xmm0, xmm6 /* xmm0 = - id34 */
|
||||
|
||||
psubsw xmm0, xmm7 /* xmm0 = - ( id34 + idcommon_product2 ) = irot_input_y for 17*/
|
||||
paddsw xmm6, xmm6 /* xmm6 = id34 * 2 */
|
||||
|
||||
paddsw xmm6, xmm0 /* xmm6 = id34 - icommon_product2 = irot_input_x for 35 */
|
||||
psubsw xmm4, xmm1 /* xmm4 = id07 - icommon_product1 = irot_input_x for 35*/
|
||||
|
||||
paddsw xmm1, xmm1 /* xmm1 = icommon_product1 * 2 */
|
||||
paddsw xmm1, xmm4 /* xmm1 = id07 + icommon_product1 = irot_input_x for 17*/
|
||||
|
||||
/*---------------------------------------------------------*/
|
||||
/* op1 and op7
|
||||
/*---------------------------------------------------------*/
|
||||
|
||||
movdqa xmm7, C(1) /* xC1S7 */
|
||||
movdqa xmm2, xmm1 /* xmm2 = irot_input_x */
|
||||
|
||||
movdqa xmm3, xmm1; /* xmm3 = irot_input_x */
|
||||
pmulhw xmm1, xmm7 /* xmm1 = xC1S7 * irot_input_x - irot_input_x */
|
||||
|
||||
movdqa xmm7, C(7) /* xC7S1 */
|
||||
psrlw xmm2, 15 /* for trucation */
|
||||
|
||||
paddw xmm1, xmm3 /* xmm1 = xC1S7 * irot_input_x */
|
||||
paddw xmm1, xmm2 /* Trucated */
|
||||
|
||||
pmulhw xmm3, xmm7 /* xmm3 = xC7S1 * irot_input_x */
|
||||
paddw xmm3, xmm2 /* Truncated */
|
||||
|
||||
movdqa xmm5, xmm0 /* xmm5 = irot_input_y */
|
||||
movdqa xmm2, xmm0 /* xmm2 = irot_input_y */
|
||||
|
||||
movdqa xmm7, C(1) /* xC1S7 */
|
||||
pmulhw xmm0, xmm7 /* xmm0 = xC1S7 * irot_input_y - irot_input_y */
|
||||
|
||||
movdqa xmm7, C(7) /* xC7S1 */
|
||||
psrlw xmm2, 15 /* for trucation */
|
||||
|
||||
paddw xmm0, xmm5 /* xmm0 = xC1S7 * irot_input_y */
|
||||
paddw xmm0, xmm2 /* Truncated */
|
||||
|
||||
pmulhw xmm5, xmm7 /* xmm5 = xC7S1 * irot_input_y */
|
||||
paddw xmm5, xmm2 /* Truncated */
|
||||
|
||||
psubsw xmm1, xmm5 /* xmm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = op[1] */
|
||||
paddsw xmm3, xmm0 /* xmm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = op[7] */
|
||||
|
||||
movdqa O(1), xmm1
|
||||
movdqa O(7), xmm3
|
||||
/*---------------------------------------------------------*/
|
||||
/* op3 and op5
|
||||
/*---------------------------------------------------------*/
|
||||
movdqa xmm0, C(3) /* xC3S5 */
|
||||
movdqa xmm1, C(5) /* xC5S3 */
|
||||
|
||||
movdqa xmm5,xmm6 /* irot_input_x */
|
||||
movdqa xmm7,xmm6 /* irot_input_x */
|
||||
|
||||
movdqa xmm2,xmm4 /* irot_input_y */
|
||||
movdqa xmm3,xmm4 /* irot_input_y */
|
||||
|
||||
pmulhw xmm4,xmm0 /* xmm4 = xC3S5 * irot_input_x - irot_input_x */
|
||||
pmulhw xmm6,xmm1 /* xmm6 = xC5S3 * irot_input_y - irot_input_y */
|
||||
|
||||
psrlw xmm2,15 /* for trucation */
|
||||
psrlw xmm5,15 /* for trucation */
|
||||
|
||||
paddw xmm4,xmm3 /* xmm4 = xC3S5 * irot_input_x */
|
||||
paddw xmm6,xmm7 /* xmm6 = xC5S3 * irot_input_y */
|
||||
|
||||
paddw xmm4,xmm2 /* Truncated */
|
||||
paddw xmm6,xmm5 /* Truncated */
|
||||
|
||||
psubsw xmm4,xmm6 /* op [3] */
|
||||
movdqa O(3),xmm4 /* Save Op[3] */
|
||||
|
||||
movdqa xmm4,xmm3 /* irot_input_y */
|
||||
movdqa xmm6,xmm7 /* irot_input_x */
|
||||
|
||||
pmulhw xmm3,xmm1 /* mm3 = xC5S3 * irot_input_x - irot_input_x */
|
||||
pmulhw xmm7,xmm0 /* mm7 = xC3S5 * irot_input_y - irot_input_y */
|
||||
|
||||
paddw xmm4,xmm2 /* Trucated */
|
||||
paddw xmm6,xmm5 /* Trucated */
|
||||
|
||||
paddw xmm3,xmm4 /* xmm3 = xC5S3 * irot_input_x */
|
||||
paddw xmm7,xmm6 /* mm7 = xC3S5 * irot_input_y */
|
||||
|
||||
paddw xmm3,xmm7 /* Op[5] */
|
||||
movdqa O(5),xmm3 /* Save Op[5] */
|
||||
/*---------------------------------------------------------*/
|
||||
/* End of 8 1-D FDCT */
|
||||
/*---------------------------------------------------------*/
|
||||
#undef I
|
||||
#undef O
|
||||
#define I(i) [ebx + 16 * i ]
|
||||
#define O(i) [ebx + 16 * i ]
|
||||
|
||||
/******************************************************/
|
||||
/* Do 8x8 Transpose */
|
||||
/******************************************************/
|
||||
|
||||
movdqa xmm4, I(4) /* xmm4=e7e6e5e4e3e2e1e0 */
|
||||
movdqa xmm0, I(5) /* xmm4=f7f6f5f4f3f2f1f0 */
|
||||
|
||||
movdqa xmm5, xmm4 /* make a copy */
|
||||
punpcklwd xmm4, xmm0 /* xmm4=f3e3f2e2f1e1f0e0 */
|
||||
|
||||
punpckhwd xmm5, xmm0 /* xmm5=f7e7f6e6f5e5f4e4 */
|
||||
movdqa xmm6, I(6) /* xmm6=g7g6g5g4g3g2g1g0 */
|
||||
|
||||
movdqa xmm0, I(7) /* xmm0=h7h6h5h4h3h2h1h0 */
|
||||
movdqa xmm7, xmm6 /* make a copy */
|
||||
|
||||
punpcklwd xmm6, xmm0 /* xmm6=h3g3h3g2h1g1h0g0 */
|
||||
punpckhwd xmm7, xmm0 /* xmm7=h7g7h6g6h5g5h4g4 */
|
||||
|
||||
movdqa xmm3, xmm4 /* make a copy */
|
||||
punpckldq xmm4, xmm6 /* xmm4=h1g1f1e1h0g0f0e0 */
|
||||
|
||||
punpckhdq xmm3, xmm6 /* xmm3=h3g3g3e3h2g2f2e2 */
|
||||
movdqa I(6), xmm3 /* save h3g3g3e3h2g2f2e2 */
|
||||
/* Free xmm6 */
|
||||
movdqa xmm6, xmm5 /* make a copy */
|
||||
punpckldq xmm5, xmm7 /* xmm5=h5g5f5e5h4g4f4e4 */
|
||||
|
||||
punpckhdq xmm6, xmm7 /* xmm6=h7g7f7e7h6g6f6e6 */
|
||||
movdqa xmm0, I(0) /* xmm0=a7a6a5a4a3a2a1a0 */
|
||||
/* Free xmm7 */
|
||||
movdqa xmm1, I(1) /* xmm1=b7b6b5b4b3b2b1b0 */
|
||||
movdqa xmm7, xmm0 /* make a copy */
|
||||
|
||||
punpcklwd xmm0, xmm1 /* xmm0=b3a3b2a2b1a1b0a0 */
|
||||
punpckhwd xmm7, xmm1 /* xmm7=b7a7b6a6b5a5b4a4 */
|
||||
/* Free xmm1 */
|
||||
movdqa xmm2, I(2) /* xmm2=c7c6c5c4c3c2c1c0 */
|
||||
movdqa xmm3, I(3) /* xmm3=d7d6d5d4d3d2d1d0 */
|
||||
|
||||
movdqa xmm1, xmm2 /* make a copy */
|
||||
punpcklwd xmm2, xmm3 /* xmm2=d3c3d2c2d1c1d0c0 */
|
||||
|
||||
punpckhwd xmm1, xmm3 /* xmm1=d7c7d6c6d5c5d4c4 */
|
||||
movdqa xmm3, xmm0 /* make a copy */
|
||||
|
||||
punpckldq xmm0, xmm2 /* xmm0=d1c1b1a1d0c0b0a0 */
|
||||
punpckhdq xmm3, xmm2 /* xmm3=d3c3b3a3d2c2b2a2 */
|
||||
/* Free xmm2 */
|
||||
movdqa xmm2, xmm7 /* make a copy */
|
||||
punpckldq xmm2, xmm1 /* xmm2=d5c5b5a5d4c4b4a4 */
|
||||
|
||||
punpckhdq xmm7, xmm1 /* xmm7=d7c7b7a7d6c6b6a6 */
|
||||
movdqa xmm1, xmm0 /* make a copy */
|
||||
|
||||
punpcklqdq xmm0, xmm4 /* xmm0=h0g0f0e0d0c0b0a0 */
|
||||
punpckhqdq xmm1, xmm4 /* xmm1=h1g1g1e1d1c1b1a1 */
|
||||
|
||||
movdqa I(0), xmm0 /* save I(0) */
|
||||
movdqa I(1), xmm1 /* save I(1) */
|
||||
|
||||
movdqa xmm0, I(6) /* load h3g3g3e3h2g2f2e2 */
|
||||
movdqa xmm1, xmm3 /* make a copy */
|
||||
|
||||
punpcklqdq xmm1, xmm0 /* xmm1=h2g2f2e2d2c2b2a2 */
|
||||
punpckhqdq xmm3, xmm0 /* xmm3=h3g3f3e3d3c3b3a3 */
|
||||
|
||||
movdqa xmm4, xmm2 /* make a copy */
|
||||
punpcklqdq xmm4, xmm5 /* xmm4=h4g4f4e4d4c4b4a4 */
|
||||
|
||||
punpckhqdq xmm2, xmm5 /* xmm2=h5g5f5e5d5c5b5a5 */
|
||||
movdqa I(2), xmm1 /* save I(2) */
|
||||
|
||||
movdqa I(3), xmm3 /* save I(3) */
|
||||
movdqa I(4), xmm4 /* save I(4) */
|
||||
|
||||
movdqa I(5), xmm2 /* save I(5) */
|
||||
movdqa xmm5, xmm7 /* make a copy */
|
||||
|
||||
punpcklqdq xmm5, xmm6 /* xmm5=h6g6f6e6d6c6b6a6 */
|
||||
punpckhqdq xmm7, xmm6 /* xmm7=h7g7f7e7d7c7b7a7 */
|
||||
|
||||
movdqa I(6), xmm5 /* save I(6) */
|
||||
movdqa I(7), xmm7 /* save I(7) */
|
||||
|
||||
/******************************************************/
|
||||
/* Done with transpose - Let's do the forward DCT */
|
||||
/******************************************************/
|
||||
|
||||
movdqa xmm0, I(0) /* xmm0 = ip0 */
|
||||
movdqa xmm1, I(1) /* xmm1 = ip1 */
|
||||
|
||||
movdqa xmm2, I(3) /* xmm2 = ip3 */
|
||||
movdqa xmm3, I(5) /* xmm3 = ip5 */
|
||||
|
||||
movdqa xmm4, xmm0 /* xmm4 = ip0 */
|
||||
movdqa xmm5, xmm1 /* xmm5 = ip1 */
|
||||
|
||||
movdqa xmm6, xmm2 /* xmm6 = ip3 */
|
||||
movdqa xmm7, xmm3 /* xmm7 = ip5 */
|
||||
|
||||
paddsw xmm0, I(7) /* xmm0 = ip0 + ip7 */
|
||||
paddsw xmm1, I(2) /* xmm1 = ip1 + ip2 */
|
||||
|
||||
paddsw xmm2, I(4) /* xmm2 = ip3 + ip4 */
|
||||
paddsw xmm3, I(6) /* xmm3 = ip5 + ip6 */
|
||||
|
||||
psubsw xmm4, I(7) /* xmm4 = ip0 - ip7 */
|
||||
psubsw xmm5, I(2) /* xmm5 = ip1 - ip2 */
|
||||
|
||||
psubsw xmm0, xmm2 /* xmm0 = is07 - is34 */
|
||||
paddsw xmm2, xmm2 /* xmm2 = is34 * 2 */
|
||||
|
||||
psubsw xmm6, I(4) /* xmm6 = ip3 - ip4 */
|
||||
paddsw xmm2, xmm0 /* xmm2 = is07 + is34 */
|
||||
|
||||
psubsw xmm1, xmm3 /* xmm1 = is12 - is56 */
|
||||
movdqa TIRY, xmm0 /* save is07-is34 */
|
||||
|
||||
paddsw xmm3, xmm3 /* xmm3 = is56 * 2 */
|
||||
paddsw xmm3, xmm1 /* xmm3 = is12 + is56 */
|
||||
|
||||
psubsw xmm7, I(6) /* xmm7 = ip5 -ip6 */
|
||||
psubsw xmm5, xmm7 /* xmm5 = id12 - id56 */
|
||||
|
||||
paddsw xmm7, xmm7 /* xmm7 = id56 * 2 */
|
||||
paddsw xmm7, xmm5 /* xmm7 = id12 + id56 */
|
||||
/*---------------------------------------------------------*/
|
||||
/* op0 and op4
|
||||
/*---------------------------------------------------------*/
|
||||
#if 0
|
||||
movdqa xmm0, xmm2 /* xmm0 =xmm2= is0734 */
|
||||
pmulhw xmm2, C(4) /* xC4S4 * is0734 - is0734 */
|
||||
|
||||
paddw xmm2, xmm0 /* XC4S4 * is0734 */
|
||||
movdqa xmm0, xmm3 /* xmm0 =xmm3= is1256 */
|
||||
|
||||
pmulhw xmm3, C(4) /* xC4S4 * is1256 - is1256 */
|
||||
paddw xmm3, xmm0 /* xC4S4 * is1256 */
|
||||
|
||||
|
||||
movdqa xmm0, xmm2
|
||||
paddsw xmm2, xmm3 /* xC4S4 * ( is0734 +is1256 ) */
|
||||
|
||||
psubsw xmm0, xmm3 /* xC4S4 * ( is0734 -is1256 ) */
|
||||
movdqa xmm3, xmm2
|
||||
|
||||
psrlw xmm2, 15
|
||||
paddsw xmm3, xmm2
|
||||
|
||||
movdqa xmm2, xmm0
|
||||
movdqa O(0), xmm3
|
||||
|
||||
psrlw xmm0, 15
|
||||
paddsw xmm2, xmm0
|
||||
|
||||
movdqa O(4), xmm2
|
||||
|
||||
|
||||
#else
|
||||
|
||||
|
||||
psubsw xmm2, xmm3 /* xmm2 = is0734 - is1256 */
|
||||
paddsw xmm3, xmm3 /* xmm3 = is1256 * 2 */
|
||||
|
||||
movdqa xmm0, xmm2 /* xmm0 = is0734 - is1256 */
|
||||
paddsw xmm3, xmm2 /* xmm3 = is0734 + is1256 */
|
||||
|
||||
pmulhw xmm0, C(4) /* xmm0 = xC4S4 * ( is0734 - is1256 ) - ( is0734 - is1256 ) */
|
||||
paddw xmm0, xmm2 /* xmm0 = xC4S4 * ( is0734 - is1256 ) */
|
||||
|
||||
psrlw xmm2, 15
|
||||
paddw xmm0, xmm2 /* Truncate xmm0, now it is op[4] */
|
||||
|
||||
movdqa xmm2, xmm0
|
||||
psrlw xmm0, 15
|
||||
|
||||
paddw xmm0, xmm2
|
||||
psraw xmm0, 1
|
||||
|
||||
movdqa O(4), xmm0 /* op4, now xmm0,xmm2 are free */
|
||||
movdqa xmm2, xmm3 /* xmm2 = is0734 + is1256 */
|
||||
|
||||
|
||||
movdqa xmm0, xmm3 /* xmm0 = is0734 + is1256 */
|
||||
pmulhw xmm3, C(4) /* xmm3 = xC4S4 * ( is0734 +is1256 ) - ( is0734 +is1256 ) */
|
||||
|
||||
psrlw xmm2, 15
|
||||
paddw xmm3, xmm0 /* xmm3 = xC4S4 * ( is0734 +is1256 ) */
|
||||
|
||||
paddw xmm3, xmm2 /* Truncate xmm3, now it is op[0] */
|
||||
movdqa xmm2, xmm3
|
||||
|
||||
psrlw xmm3, 15
|
||||
paddw xmm3, xmm2
|
||||
|
||||
psraw xmm3, 1
|
||||
movdqa O(0), xmm3 /* save op0 */
|
||||
#endif
|
||||
/*---------------------------------------------------------*/
|
||||
/* op2 and op6
|
||||
/*---------------------------------------------------------*/
|
||||
movdqa xmm3, TIRY /* xmm3 = irot_input_y */
|
||||
pmulhw xmm3, C(2) /* xmm3 = xC2S6 * irot_input_y - irot_input_y */
|
||||
|
||||
movdqa xmm2, TIRY /* xmm2 = irot_input_y */
|
||||
movdqa xmm0, xmm2 /* xmm0 = irot_input_y */
|
||||
|
||||
psrlw xmm2, 15
|
||||
paddw xmm3, xmm0 /* xmm3 = xC2S6 * irot_input_y */
|
||||
|
||||
paddw xmm3, xmm2 /* Truncated */
|
||||
movdqa xmm0, xmm5 /* xmm0 = id12 - id56 */
|
||||
|
||||
|
||||
movdqa xmm2, xmm5 /* xmm2 = id12 - id56 */
|
||||
pmulhw xmm0, C(6) /* xmm0 = xC6S2 * irot_input_x */
|
||||
|
||||
psrlw xmm2, 15
|
||||
paddw xmm0, xmm2 /* Truncated */
|
||||
|
||||
paddsw xmm3, xmm0 /* op[2] */
|
||||
movdqa xmm0, xmm3
|
||||
|
||||
psrlw xmm3, 15
|
||||
paddw xmm3, xmm0
|
||||
|
||||
psraw xmm3, 1
|
||||
movdqa O(2), xmm3 /* save op[2] */
|
||||
|
||||
|
||||
movdqa xmm0, xmm5 /* xmm0 = id12 - id56 */
|
||||
movdqa xmm2, xmm5 /* xmm0 = id12 - id56 */
|
||||
|
||||
pmulhw xmm5, C(2) /* xmm5 = xC2S6 * irot_input_x - irot_input_x */
|
||||
psrlw xmm2, 15
|
||||
|
||||
movdqa xmm3, TIRY /* xmm3 = irot_input_y */
|
||||
paddw xmm5, xmm0 /* xmm5 = xC2S6 * irot_input_x */
|
||||
|
||||
paddw xmm5, xmm2 /* Truncated */
|
||||
movdqa xmm2, xmm3 /* xmm2 = irot_input_y */
|
||||
|
||||
pmulhw xmm3, C(6) /* mm3 = xC6S2 * irot_input_y */
|
||||
psrlw xmm2, 15
|
||||
|
||||
paddw xmm3, xmm2 /* Truncated */
|
||||
psubsw xmm3, xmm5 /* xmm3 = op[6] */
|
||||
|
||||
movdqa xmm5, xmm3
|
||||
psrlw xmm3, 15
|
||||
|
||||
paddw xmm3, xmm5
|
||||
psraw xmm3, 1
|
||||
|
||||
movdqa O(6), xmm3
|
||||
/*-----------------------------------------------------------------------*/
|
||||
/* icommon_product1, icommon_product2 */
|
||||
/*-----------------------------------------------------------------------*/
|
||||
movdqa xmm0, C(4) /* xmm0 = xC4s4 */
|
||||
movdqa xmm2, xmm1 /* xmm2 = is12 - is56 */
|
||||
|
||||
movdqa xmm3, xmm1 /* xmm3 = is12 - is56 */
|
||||
pmulhw xmm1, xmm0 /* xmm0 = xC4S4 * ( is12 - is56 ) - ( is12 - is56 ) */
|
||||
|
||||
psrlw xmm2, 15
|
||||
paddw xmm1, xmm3 /* xmm1 = xC4S4 * ( is12 - is56 ) */
|
||||
|
||||
paddw xmm1, xmm2 /* Truncate xmm1, now it is icommon_product1 */
|
||||
movdqa xmm2, xmm7 /* xmm2 = id12 + id56 */
|
||||
|
||||
movdqa xmm3, xmm7 /* xmm3 = id12 + id56 */
|
||||
pmulhw xmm7, xmm0 /* xmm7 = xC4S4 * ( id12 + id56 ) - ( id12 + id56 ) */
|
||||
|
||||
psrlw xmm2, 15 /* For trucation */
|
||||
paddw xmm7, xmm3 /* xmm7 = xC4S4 * ( id12 + id56 ) */
|
||||
|
||||
paddw xmm7, xmm2 /* Truncate xmm7, now it is icommon_product2 */
|
||||
/*---------------------------------------------------------*/
|
||||
pxor xmm0, xmm0 /* Clear xmm0 */
|
||||
psubsw xmm0, xmm6 /* xmm0 = - id34 */
|
||||
|
||||
psubsw xmm0, xmm7 /* xmm0 = - ( id34 + idcommon_product2 ) = irot_input_y for 17*/
|
||||
paddsw xmm6, xmm6 /* xmm6 = id34 * 2 */
|
||||
|
||||
paddsw xmm6, xmm0 /* xmm6 = id34 - icommon_product2 = irot_input_x for 35 */
|
||||
psubsw xmm4, xmm1 /* xmm4 = id07 - icommon_product1 = irot_input_x for 35*/
|
||||
|
||||
paddsw xmm1, xmm1 /* xmm1 = icommon_product1 * 2 */
|
||||
paddsw xmm1, xmm4 /* xmm1 = id07 + icommon_product1 = irot_input_x for 17*/
|
||||
|
||||
/*---------------------------------------------------------*/
|
||||
/* op1 and op7
|
||||
/*---------------------------------------------------------*/
|
||||
|
||||
movdqa xmm7, C(1) /* xC1S7 */
|
||||
movdqa xmm2, xmm1 /* xmm2 = irot_input_x */
|
||||
|
||||
movdqa xmm3, xmm1; /* xmm3 = irot_input_x */
|
||||
pmulhw xmm1, xmm7 /* xmm1 = xC1S7 * irot_input_x - irot_input_x */
|
||||
|
||||
movdqa xmm7, C(7) /* xC7S1 */
|
||||
psrlw xmm2, 15 /* for trucation */
|
||||
|
||||
paddw xmm1, xmm3 /* xmm1 = xC1S7 * irot_input_x */
|
||||
paddw xmm1, xmm2 /* Trucated */
|
||||
|
||||
pmulhw xmm3, xmm7 /* xmm3 = xC7S1 * irot_input_x */
|
||||
paddw xmm3, xmm2 /* Truncated */
|
||||
|
||||
movdqa xmm5, xmm0 /* xmm5 = irot_input_y */
|
||||
movdqa xmm2, xmm0 /* xmm2 = irot_input_y */
|
||||
|
||||
movdqa xmm7, C(1) /* xC1S7 */
|
||||
pmulhw xmm0, xmm7 /* xmm0 = xC1S7 * irot_input_y - irot_input_y */
|
||||
|
||||
movdqa xmm7, C(7) /* xC7S1 */
|
||||
psrlw xmm2, 15 /* for trucation */
|
||||
|
||||
paddw xmm0, xmm5 /* xmm0 = xC1S7 * irot_input_y */
|
||||
paddw xmm0, xmm2 /* Truncated */
|
||||
|
||||
pmulhw xmm5, xmm7 /* xmm5 = xC7S1 * irot_input_y */
|
||||
paddw xmm5, xmm2 /* Truncated */
|
||||
|
||||
psubsw xmm1, xmm5 /* xmm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = op[1] */
|
||||
paddsw xmm3, xmm0 /* xmm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = op[7] */
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
movdqa xmm0, xmm3
|
||||
|
||||
psrlw xmm1, 15
|
||||
psrlw xmm3, 15
|
||||
|
||||
paddw xmm1, xmm5
|
||||
paddw xmm3, xmm0
|
||||
|
||||
psraw xmm1, 1
|
||||
psraw xmm3, 1
|
||||
|
||||
|
||||
movdqa O(1), xmm1
|
||||
movdqa O(7), xmm3
|
||||
/*---------------------------------------------------------*/
|
||||
/* op3 and op5
|
||||
/*---------------------------------------------------------*/
|
||||
movdqa xmm0, C(3) /* xC3S5 */
|
||||
movdqa xmm1, C(5) /* xC5S3 */
|
||||
|
||||
movdqa xmm5,xmm6 /* irot_input_x */
|
||||
movdqa xmm7,xmm6 /* irot_input_x */
|
||||
|
||||
movdqa xmm2,xmm4 /* irot_input_y */
|
||||
movdqa xmm3,xmm4 /* irot_input_y */
|
||||
|
||||
pmulhw xmm4,xmm0 /* xmm4 = xC3S5 * irot_input_x - irot_input_x */
|
||||
pmulhw xmm6,xmm1 /* xmm6 = xC5S3 * irot_input_y - irot_input_y */
|
||||
|
||||
psrlw xmm2,15 /* for trucation */
|
||||
psrlw xmm5,15 /* for trucation */
|
||||
|
||||
paddw xmm4,xmm3 /* xmm4 = xC3S5 * irot_input_x */
|
||||
paddw xmm6,xmm7 /* xmm6 = xC5S3 * irot_input_y */
|
||||
|
||||
paddw xmm4,xmm2 /* Truncated */
|
||||
paddw xmm6,xmm5 /* Truncated */
|
||||
|
||||
psubsw xmm4,xmm6 /* op [3] */
|
||||
movdqa xmm6,xmm4
|
||||
|
||||
psrlw xmm4,15
|
||||
paddw xmm4,xmm6
|
||||
|
||||
psraw xmm4,1
|
||||
movdqa O(3),xmm4 /* Save Op[3] */
|
||||
|
||||
movdqa xmm4,xmm3 /* irot_input_y */
|
||||
movdqa xmm6,xmm7 /* irot_input_x */
|
||||
|
||||
pmulhw xmm3,xmm1 /* mm3 = xC5S3 * irot_input_x - irot_input_x */
|
||||
pmulhw xmm7,xmm0 /* mm7 = xC3S5 * irot_input_y - irot_input_y */
|
||||
|
||||
paddw xmm4,xmm2 /* Trucated */
|
||||
paddw xmm6,xmm5 /* Trucated */
|
||||
|
||||
paddw xmm3,xmm4 /* xmm3 = xC5S3 * irot_input_x */
|
||||
paddw xmm7,xmm6 /* mm7 = xC3S5 * irot_input_y */
|
||||
|
||||
paddw xmm3,xmm7 /* Op[5] */
|
||||
movdqa xmm7,xmm3
|
||||
|
||||
psrlw xmm3,15
|
||||
paddw xmm3,xmm7
|
||||
|
||||
psraw xmm3,1
|
||||
movdqa O(5),xmm3 /* Save Op[5] */
|
||||
/*---------------------------------------------------------*/
|
||||
/* End of 8 1-D FDCT */
|
||||
/*---------------------------------------------------------*/
|
||||
|
||||
}/* end of _asm code section */
|
||||
}
|
||||
|
||||
|
||||
|
1053
Src/libvpShared/corelibs/cdxv/vputil/win32/filtmmx.c
Normal file
1053
Src/libvpShared/corelibs/cdxv/vputil/win32/filtmmx.c
Normal file
File diff suppressed because it is too large
Load diff
790
Src/libvpShared/corelibs/cdxv/vputil/win32/filtwmt.c
Normal file
790
Src/libvpShared/corelibs/cdxv/vputil/win32/filtwmt.c
Normal file
|
@ -0,0 +1,790 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Module Title : newLoopTest_asm.c
|
||||
*
|
||||
* Description : Codec specific functions
|
||||
*
|
||||
* AUTHOR : Yaowu Xu
|
||||
*
|
||||
*****************************************************************************
|
||||
* Revision History
|
||||
*
|
||||
* 1.02 YWX 03-Nov-00 Changed confusing variable name
|
||||
* 1.01 YWX 02-Nov-00 Added the set of functions
|
||||
* 1.00 YWX 19-Oct-00 configuration baseline
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Header Frames
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
#define STRICT /* Strict type checking. */
|
||||
#include "codec_common.h"
|
||||
#include <math.h>
|
||||
|
||||
/****************************************************************************
|
||||
* Module constants.
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
|
||||
#define FILTER_WEIGHT 128
|
||||
#define FILTER_SHIFT 7
|
||||
__declspec(align(16)) short rd[]={64,64,64,64,64,64,64,64};
|
||||
|
||||
|
||||
__declspec(align(16)) INT16 BilinearFilters_wmt[8][16] =
|
||||
{
|
||||
{ 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ 112,112,112,112,112,112,112,112, 16, 16, 16, 16, 16, 16, 16, 16 },
|
||||
{ 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
|
||||
{ 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
|
||||
{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
|
||||
{ 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
|
||||
{ 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
|
||||
{ 16, 16, 16, 16, 16, 16, 16, 16, 112,112,112,112,112,112,112,112 }
|
||||
};
|
||||
|
||||
extern __declspec(align(16)) INT16 BicubicFilters_mmx[17][8][32];
|
||||
|
||||
_inline
|
||||
void FilterBlock1d_h_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
|
||||
{
|
||||
__asm
|
||||
{
|
||||
|
||||
mov edi, Filter
|
||||
movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
|
||||
movdqa xmm2, [edi+ 16] ; xmm3 *= kernel 0 modifiers.
|
||||
movdqa xmm6, [edi + 32] ; xmm3 *= kernel 0 modifiers.
|
||||
movdqa xmm7, [edi + 48] ; xmm3 *= kernel 0 modifiers.
|
||||
|
||||
mov edi,OutputPtr
|
||||
mov esi,SrcPtr
|
||||
dec esi
|
||||
mov ecx, DWORD PTR OutputHeight
|
||||
mov eax, OutputWidth ; destination pitch?
|
||||
pxor xmm0, xmm0 ; xmm0 = 00000000
|
||||
|
||||
nextrow:
|
||||
|
||||
// kernel 0 and 3 are potentially negative taps. These negative tap filters
|
||||
// must be done first or we could have problems saturating our high value
|
||||
// tap filters
|
||||
movdqu xmm3, [esi] ; xmm3 = p-1..p14
|
||||
movdqu xmm4, xmm3 ; xmm4 = p-1..p14
|
||||
punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6
|
||||
pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
|
||||
|
||||
psrldq xmm4, 3 ; xmm4 = p2..p13
|
||||
movdqa xmm5, xmm4 ; xmm5 = p2..p13
|
||||
punpcklbw xmm5, xmm0 ; xmm5 = p2..p7
|
||||
pmullw xmm5, xmm7 ; xmm5 *= kernel 3 modifiers
|
||||
paddsw xmm3, xmm5 ; xmm3 += xmm5
|
||||
|
||||
movdqu xmm4, [esi+1] ; xmm4 = p0..p13
|
||||
movdqa xmm5, xmm4 ; xmm5 = p0..p13
|
||||
punpcklbw xmm5, xmm0 ; xmm5 = p0..p7
|
||||
pmullw xmm5, xmm2 ; xmm5 *= kernel 1 modifiers
|
||||
paddsw xmm3, xmm5 ; xmm3 += xmm5
|
||||
|
||||
psrldq xmm4, 1 ; xmm4 = p1..p13
|
||||
movdqa xmm5, xmm4 ; xmm5 = p1..p13
|
||||
punpcklbw xmm5, xmm0 ; xmm5 = p1..p7
|
||||
pmullw xmm5, xmm6 ; xmm5 *= kernel 2 modifiers
|
||||
paddsw xmm3, xmm5 ; xmm3 += xmm5
|
||||
|
||||
paddsw xmm3, rd ; xmm3 += round value
|
||||
psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
|
||||
packuswb xmm3, xmm0 ; pack and saturate
|
||||
|
||||
movdq2q mm0, xmm3
|
||||
movq [edi],mm0 ; store the results in the destination
|
||||
|
||||
add esi,SrcPixelsPerLine ; next line
|
||||
add edi,eax;
|
||||
|
||||
dec ecx ; decrement count
|
||||
jnz nextrow ; next row
|
||||
}
|
||||
}
|
||||
|
||||
_inline
|
||||
void FilterBlock1d_v_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
|
||||
{
|
||||
__asm
|
||||
{
|
||||
|
||||
mov edi, Filter
|
||||
movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
|
||||
movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
|
||||
movdqa xmm6, [edi + 32] ; xmm3 *= kernel 0 modifiers.
|
||||
movdqa xmm7, [edi + 48] ; xmm3 *= kernel 0 modifiers.
|
||||
|
||||
mov edx, PixelsPerLine
|
||||
mov edi, OutputPtr
|
||||
mov esi, SrcPtr
|
||||
sub esi, PixelsPerLine
|
||||
mov ecx, DWORD PTR OutputHeight
|
||||
mov eax, OutputWidth ; destination pitch?
|
||||
pxor xmm0, xmm0 ; xmm0 = 00000000
|
||||
|
||||
|
||||
nextrow:
|
||||
movdqu xmm3, [esi] ; xmm3 = p0..p16
|
||||
punpcklbw xmm3, xmm0 ; xmm3 = p0..p8
|
||||
pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
|
||||
|
||||
add esi, edx ; move source forward 1 line to avoid 3 * pitch
|
||||
|
||||
movdqu xmm4, [esi+2*edx] ; xmm4 = p0..p16
|
||||
punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
|
||||
pmullw xmm4, xmm7 ; xmm4 *= kernel 3 modifiers.
|
||||
paddsw xmm3, xmm4 ; xmm3 += xmm4
|
||||
|
||||
movdqu xmm4, [esi ] ; xmm4 = p0..p16
|
||||
punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
|
||||
pmullw xmm4, xmm2 ; xmm4 *= kernel 1 modifiers.
|
||||
paddsw xmm3, xmm4 ; xmm3 += xmm4
|
||||
|
||||
movdqu xmm4, [esi +edx] ; xmm4 = p0..p16
|
||||
punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
|
||||
pmullw xmm4, xmm6 ; xmm4 *= kernel 2 modifiers.
|
||||
paddsw xmm3, xmm4 ; xmm3 += xmm4
|
||||
|
||||
|
||||
|
||||
paddsw xmm3, rd ; xmm3 += round value
|
||||
psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
|
||||
packuswb xmm3, xmm0 ; pack and unpack to saturate
|
||||
|
||||
movdq2q mm0, xmm3
|
||||
movq [edi],mm0 ; store the results in the destination
|
||||
|
||||
// the subsequent iterations repeat 3 out of 4 of these reads. Since the
|
||||
// recon block should be in cache this shouldn't cost much. Its obviously
|
||||
// avoidable!!!.
|
||||
add edi,eax;
|
||||
|
||||
dec ecx ; decrement count
|
||||
jnz nextrow ; next row
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
_inline
|
||||
void FilterBlock1d_hb8_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
|
||||
{
|
||||
__asm
|
||||
{
|
||||
|
||||
mov edi, Filter
|
||||
movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
|
||||
movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
|
||||
|
||||
mov edi,OutputPtr
|
||||
mov esi,SrcPtr
|
||||
mov ecx, DWORD PTR OutputHeight
|
||||
mov eax, OutputWidth ; destination pitch?
|
||||
pxor xmm0, xmm0 ; xmm0 = 00000000
|
||||
|
||||
nextrow:
|
||||
movdqu xmm3, [esi] ; xmm3 = p-1..p14
|
||||
movdqu xmm5, xmm3 ; xmm4 = p-1..p14
|
||||
punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6
|
||||
pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
|
||||
|
||||
psrldq xmm5, 1 ; xmm4 = p0..p13
|
||||
punpcklbw xmm5, xmm0 ; xmm5 = p0..p7
|
||||
pmullw xmm5, xmm2 ; xmm5 *= kernel 1 modifiers
|
||||
paddw xmm3, xmm5 ; xmm3 += xmm5
|
||||
|
||||
paddw xmm3, rd ; xmm3 += round value
|
||||
psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
|
||||
packuswb xmm3, xmm0 ; pack and unpack to saturate
|
||||
|
||||
movdq2q mm0, xmm3
|
||||
movq [edi],mm0 ; store the results in the destination
|
||||
|
||||
add esi,SrcPixelsPerLine ; next line
|
||||
add edi,eax;
|
||||
|
||||
dec ecx ; decrement count
|
||||
jnz nextrow ; next row
|
||||
}
|
||||
}
|
||||
|
||||
_inline
|
||||
void FilterBlock1d_vb8_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
|
||||
{
|
||||
__asm
|
||||
{
|
||||
|
||||
mov edi, Filter
|
||||
movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
|
||||
movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
|
||||
mov edx, PixelsPerLine
|
||||
mov edi, OutputPtr
|
||||
mov esi, SrcPtr
|
||||
mov ecx, DWORD PTR OutputHeight
|
||||
mov eax, OutputWidth ; destination pitch?
|
||||
pxor xmm0, xmm0 ; xmm0 = 00000000
|
||||
|
||||
|
||||
nextrow:
|
||||
movdqu xmm3, [esi] ; xmm3 = p0..p16
|
||||
punpcklbw xmm3, xmm0 ; xmm3 = p0..p8
|
||||
pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
|
||||
|
||||
movdqu xmm4, [esi +edx ] ; xmm4 = p0..p16
|
||||
punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
|
||||
pmullw xmm4, xmm2 ; xmm4 *= kernel 1 modifiers.
|
||||
paddw xmm3, xmm4 ; xmm3 += xmm4
|
||||
|
||||
paddw xmm3, rd ; xmm3 += round value
|
||||
psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
|
||||
packuswb xmm3, xmm0 ; pack and unpack to saturate
|
||||
|
||||
movdq2q mm0, xmm3
|
||||
movq [edi],mm0 ; store the results in the destination
|
||||
|
||||
// the subsequent iterations repeat 3 out of 4 of these reads. Since the
|
||||
// recon block should be in cache this shouldn't cost much. Its obviously
|
||||
// avoidable!!!.
|
||||
add esi,edx
|
||||
add edi,eax
|
||||
|
||||
dec ecx ; decrement count
|
||||
jnz nextrow ; next row
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : FilterBlock2dBil
|
||||
*
|
||||
* INPUTS : Pointer to source data
|
||||
*
|
||||
* OUTPUTS : Filtered data
|
||||
*
|
||||
* RETURNS : None.
|
||||
*
|
||||
* FUNCTION : Applies a bilinear filter on the intput data to produce
|
||||
* a predictor block (UINT16)
|
||||
*
|
||||
* SPECIAL NOTES :
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
_inline
|
||||
void FilterBlock2dBil_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
|
||||
{
|
||||
|
||||
__asm
|
||||
{
|
||||
mov eax, HFilter ;
|
||||
mov edi, OutputPtr ;
|
||||
mov esi, SrcPtr ;
|
||||
lea ecx, [edi+64] ;
|
||||
mov edx, SrcPixelsPerLine ;
|
||||
|
||||
movdqa xmm1, [eax] ;
|
||||
movdqa xmm2, [eax+16] ;
|
||||
|
||||
mov eax, VFilter ;
|
||||
pxor xmm0, xmm0 ;
|
||||
|
||||
// get the first horizontal line done ;
|
||||
movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
||||
movdqa xmm4, xmm3 ; make a copy of current line
|
||||
|
||||
punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
|
||||
psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx
|
||||
|
||||
pmullw xmm3, xmm1 ;
|
||||
punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07
|
||||
|
||||
pmullw xmm4, xmm2 ;
|
||||
paddw xmm3, xmm4 ;
|
||||
|
||||
paddw xmm3, rd ;
|
||||
psraw xmm3, FILTER_SHIFT ; ready for output
|
||||
|
||||
movdqa xmm5, xmm3 ;
|
||||
|
||||
add esi, edx ; next line
|
||||
NextRow:
|
||||
pmullw xmm5, [eax] ;
|
||||
movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
||||
|
||||
movdqa xmm4, xmm3 ; make a copy of current line
|
||||
punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
|
||||
|
||||
psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx
|
||||
pmullw xmm3, xmm1 ;
|
||||
punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07
|
||||
|
||||
movdqa xmm6, xmm5 ;
|
||||
pmullw xmm4, xmm2 ;
|
||||
|
||||
paddw xmm3, xmm4 ;
|
||||
paddw xmm3, rd ;
|
||||
|
||||
psraw xmm3, FILTER_SHIFT ; ready for output
|
||||
movdqa xmm5, xmm3 ; make a copy for the next row
|
||||
|
||||
pmullw xmm3, [eax+16] ;
|
||||
paddw xmm6, xmm3 ;
|
||||
|
||||
|
||||
paddw xmm6, rd ; xmm6 += round value
|
||||
psraw xmm6, FILTER_SHIFT ; xmm6 /= 128
|
||||
|
||||
packuswb xmm6, xmm0 ; pack and unpack to saturate
|
||||
movdq2q mm0, xmm6
|
||||
|
||||
movq [edi], mm0 ; store the results in the destination
|
||||
add esi, edx ; next line
|
||||
add edi, 8 ;
|
||||
|
||||
cmp edi, ecx ;
|
||||
jne NextRow
|
||||
|
||||
}
|
||||
|
||||
// First filter 1d Horizontal
|
||||
//FilterBlock1d_hb8_wmt(SrcPtr, Intermediate, SrcPixelsPerLine, 1, 9, 8, HFilter );
|
||||
// Now filter Verticaly
|
||||
//FilterBlock1d_vb8_wmt(Intermediate, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
|
||||
|
||||
|
||||
}
|
||||
|
||||
_inline
|
||||
void FilterUnpackBlock2dBil_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
|
||||
{
|
||||
|
||||
__asm
|
||||
{
|
||||
mov eax, HFilter ;
|
||||
mov edi, OutputPtr ;
|
||||
mov esi, SrcPtr ;
|
||||
lea ecx, [edi+128] ;
|
||||
mov edx, SrcPixelsPerLine ;
|
||||
|
||||
movdqa xmm1, [eax] ;
|
||||
movdqa xmm2, [eax+16] ;
|
||||
|
||||
mov eax, VFilter ;
|
||||
pxor xmm0, xmm0 ;
|
||||
|
||||
// get the first horizontal line done ;
|
||||
movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
||||
movdqa xmm4, xmm3 ; make a copy of current line
|
||||
|
||||
punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
|
||||
psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx
|
||||
|
||||
pmullw xmm3, xmm1 ;
|
||||
punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07
|
||||
|
||||
pmullw xmm4, xmm2 ;
|
||||
paddw xmm3, xmm4 ;
|
||||
|
||||
paddw xmm3, rd ;
|
||||
psraw xmm3, FILTER_SHIFT ; ready for output
|
||||
|
||||
movdqa xmm5, xmm3 ;
|
||||
|
||||
add esi, edx ; next line
|
||||
NextRow:
|
||||
pmullw xmm5, [eax] ;
|
||||
movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
||||
|
||||
movdqa xmm4, xmm3 ; make a copy of current line
|
||||
punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
|
||||
|
||||
psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx
|
||||
pmullw xmm3, xmm1 ;
|
||||
punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07
|
||||
|
||||
movdqa xmm6, xmm5 ;
|
||||
pmullw xmm4, xmm2 ;
|
||||
|
||||
paddw xmm3, xmm4 ;
|
||||
paddw xmm3, rd ;
|
||||
|
||||
psraw xmm3, FILTER_SHIFT ; ready for output
|
||||
movdqa xmm5, xmm3 ; make a copy for the next row
|
||||
|
||||
pmullw xmm3, [eax+16] ;
|
||||
paddw xmm6, xmm3 ;
|
||||
|
||||
|
||||
paddw xmm6, rd ; xmm6 += round value
|
||||
psraw xmm6, FILTER_SHIFT ; xmm6 /= 128
|
||||
|
||||
movdqu [edi], xmm6;
|
||||
|
||||
/*
|
||||
packuswb xmm6, xmm0 ; pack and unpack to saturate
|
||||
movdq2q mm0, xmm6
|
||||
|
||||
movq [edi], mm0 ; store the results in the destination
|
||||
*/
|
||||
add esi, edx ; next line
|
||||
add edi, 16 ;
|
||||
|
||||
cmp edi, ecx ;
|
||||
jne NextRow
|
||||
|
||||
}
|
||||
|
||||
// First filter 1d Horizontal
|
||||
//FilterBlock1d_hb8_wmt(SrcPtr, Intermediate, SrcPixelsPerLine, 1, 9, 8, HFilter );
|
||||
// Now filter Verticaly
|
||||
//FilterBlock1d_vb8_wmt(Intermediate, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
|
||||
|
||||
|
||||
}
|
||||
_inline
|
||||
void FilterUnpackBlock1d_hb8_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
|
||||
{
|
||||
__asm
|
||||
{
|
||||
|
||||
mov edi, Filter
|
||||
movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
|
||||
movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
|
||||
|
||||
mov edi,OutputPtr
|
||||
mov esi,SrcPtr
|
||||
mov ecx, DWORD PTR OutputHeight
|
||||
mov eax, OutputWidth ; destination pitch?
|
||||
pxor xmm0, xmm0 ; xmm0 = 00000000
|
||||
|
||||
nextrow:
|
||||
movdqu xmm3, [esi] ; xmm3 = p-1..p14
|
||||
movdqu xmm5, xmm3 ; xmm4 = p-1..p14
|
||||
punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6
|
||||
pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
|
||||
|
||||
psrldq xmm5, 1 ; xmm4 = p0..p13
|
||||
punpcklbw xmm5, xmm0 ; xmm5 = p0..p7
|
||||
pmullw xmm5, xmm2 ; xmm5 *= kernel 1 modifiers
|
||||
paddw xmm3, xmm5 ; xmm3 += xmm5
|
||||
|
||||
paddw xmm3, rd ; xmm3 += round value
|
||||
psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
/*
|
||||
packuswb xmm3, xmm0 ; pack and unpack to saturate
|
||||
movdq2q mm0, xmm3
|
||||
*/
|
||||
|
||||
movdqu [edi],xmm3 ; store the results in the destination
|
||||
|
||||
add esi,SrcPixelsPerLine ; next line
|
||||
add edi,eax;
|
||||
|
||||
dec ecx ; decrement count
|
||||
jnz nextrow ; next row
|
||||
}
|
||||
}
|
||||
|
||||
_inline
|
||||
void FilterUnpackBlock1d_vb8_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
|
||||
{
|
||||
__asm
|
||||
{
|
||||
|
||||
mov edi, Filter
|
||||
movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
|
||||
movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
|
||||
mov edx, PixelsPerLine
|
||||
mov edi, OutputPtr
|
||||
mov esi, SrcPtr
|
||||
mov ecx, DWORD PTR OutputHeight
|
||||
mov eax, OutputWidth ; destination pitch?
|
||||
pxor xmm0, xmm0 ; xmm0 = 00000000
|
||||
|
||||
|
||||
nextrow:
|
||||
movdqu xmm3, [esi] ; xmm3 = p0..p16
|
||||
punpcklbw xmm3, xmm0 ; xmm3 = p0..p8
|
||||
pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
|
||||
|
||||
movdqu xmm4, [esi +edx ] ; xmm4 = p0..p16
|
||||
punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
|
||||
pmullw xmm4, xmm2 ; xmm4 *= kernel 1 modifiers.
|
||||
paddw xmm3, xmm4 ; xmm3 += xmm4
|
||||
|
||||
paddw xmm3, rd ; xmm3 += round value
|
||||
psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
/*packuswb xmm3, xmm0 ; pack and unpack to saturate
|
||||
|
||||
movdq2q mm0, xmm3
|
||||
*/
|
||||
movdqu [edi],xmm3 ; store the results in the destination
|
||||
|
||||
// the subsequent iterations repeat 3 out of 4 of these reads. Since the
|
||||
// recon block should be in cache this shouldn't cost much. Its obviously
|
||||
// avoidable!!!.
|
||||
add esi,edx
|
||||
add edi,eax
|
||||
|
||||
dec ecx ; decrement count
|
||||
jnz nextrow ; next row
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : FilterBlockBil_8
|
||||
*
|
||||
* INPUTS : ReconPtr1, ReconPtr12
|
||||
* Two pointers into the block of data to be filtered
|
||||
* These pointers bound the fractional pel position
|
||||
* PixelsPerLine
|
||||
* Pixels per line in the buffer pointed to by ReconPtr1 & ReconPtr12
|
||||
* Modx, ModY
|
||||
* The fractional pel bits used to select a filter.
|
||||
*
|
||||
*
|
||||
* OUTPUTS : ReconRefPtr
|
||||
* A pointer to an 8x8 buffer into which UINT8 filtered data is written.
|
||||
*
|
||||
* RETURNS : None.
|
||||
*
|
||||
* FUNCTION : Produces a bilinear filtered fractional pel prediction block
|
||||
* with UINT8 output
|
||||
*
|
||||
* SPECIAL NOTES :
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void FilterBlockBil_8_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY )
|
||||
{
|
||||
int diff;
|
||||
|
||||
// swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
|
||||
diff=ReconPtr2-ReconPtr1;
|
||||
|
||||
// The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
|
||||
// This works out to be what we want... despite the pointer swapping that goes on below.
|
||||
// For example... if the X component of the vector is a +ve ModX = X%8.
|
||||
// if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
|
||||
|
||||
if(diff<0)
|
||||
{ // swap pointers so ReconPtr1 smaller
|
||||
UINT8 *temp=ReconPtr1;
|
||||
ReconPtr1=ReconPtr2;
|
||||
ReconPtr2=temp;
|
||||
diff= (int)(ReconPtr2-ReconPtr1);
|
||||
}
|
||||
|
||||
if( diff==1 )
|
||||
{
|
||||
FilterBlock1d_hb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 8, BilinearFilters_wmt[ModX] );
|
||||
}
|
||||
else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only
|
||||
{
|
||||
FilterBlock1d_vb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters_wmt[ModY]);
|
||||
}
|
||||
else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right
|
||||
{
|
||||
FilterBlock2dBil_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
|
||||
//FilterBlock2dBil_8_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
|
||||
}
|
||||
else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left
|
||||
{
|
||||
FilterBlock2dBil_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
|
||||
//FilterBlock2dBil_8_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
|
||||
}
|
||||
}
|
||||
|
||||
_inline void UnpackBlock_wmt( UINT8 *SrcPtr, UINT16 *OutputPtr, UINT32 SrcPixelsPerLine )
|
||||
{
|
||||
__asm
|
||||
{
|
||||
mov edi,OutputPtr
|
||||
mov esi,SrcPtr
|
||||
|
||||
mov ecx, 8
|
||||
mov eax, 16 ; destination pitch?
|
||||
pxor xmm0, xmm0 ; xmm0 = 00000000
|
||||
|
||||
nextrow:
|
||||
movdqu xmm3, [esi] ; xmm3 = p-1..p14
|
||||
punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6
|
||||
movdqu [edi],xmm3 ; store the results in the destination
|
||||
|
||||
add esi,SrcPixelsPerLine ; next line
|
||||
add edi,eax;
|
||||
|
||||
dec ecx ; decrement count
|
||||
jnz nextrow ; next row
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : FilterBlock2d
|
||||
*
|
||||
* INPUTS : Pointer to source data
|
||||
*
|
||||
* OUTPUTS : Filtered data
|
||||
*
|
||||
* RETURNS : None.
|
||||
*
|
||||
* FUNCTION : Applies a 2d 4 tap filter on the intput data to produce
|
||||
* a predictor block (UINT16)
|
||||
*
|
||||
* SPECIAL NOTES :
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void FilterBlock2d_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
|
||||
{
|
||||
|
||||
UINT8 Intermediate[256];
|
||||
|
||||
// First filter 1d Horizontal
|
||||
FilterBlock1d_h_wmt(SrcPtr-SrcPixelsPerLine, Intermediate, SrcPixelsPerLine, 1, 11, 8, HFilter );
|
||||
|
||||
// Now filter Verticaly
|
||||
FilterBlock1d_v_wmt(Intermediate+BLOCK_HEIGHT_WIDTH, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : FilterBlock
|
||||
*
|
||||
* INPUTS : ReconPtr1, ReconPtr12
|
||||
* Two pointers into the block of data to be filtered
|
||||
* These pointers bound the fractional pel position
|
||||
* PixelsPerLine
|
||||
* Pixels per line in the buffer pointed to by ReconPtr1 & ReconPtr12
|
||||
* Modx, ModY
|
||||
* The fractional pel bits used to select a filter.
|
||||
* UseBicubic
|
||||
* Whether to use the bicubuc filter set or the bilinear set
|
||||
*
|
||||
*
|
||||
* OUTPUTS : ReconRefPtr
|
||||
* A pointer to an 8x8 buffer into which the filtered data is written.
|
||||
*
|
||||
* RETURNS : None.
|
||||
*
|
||||
* FUNCTION : Produces a filtered fractional pel prediction block
|
||||
* using bilinear or bicubic filters
|
||||
*
|
||||
* SPECIAL NOTES :
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void FilterBlock_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha )
|
||||
{
|
||||
int diff;
|
||||
UINT8 Intermediate[256];
|
||||
|
||||
// swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
|
||||
diff=ReconPtr2-ReconPtr1;
|
||||
|
||||
// The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
|
||||
// This works out to be what we want... despite the pointer swapping that goes on below.
|
||||
// For example... if the X component of the vector is a +ve ModX = X%8.
|
||||
// if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
|
||||
|
||||
if(diff<0)
|
||||
{ // swap pointers so ReconPtr1 smaller
|
||||
UINT8 *temp=ReconPtr1;
|
||||
ReconPtr1=ReconPtr2;
|
||||
ReconPtr2=temp;
|
||||
diff= (int)(ReconPtr2-ReconPtr1);
|
||||
}
|
||||
|
||||
if(!diff)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if(UseBicubic)
|
||||
{
|
||||
if( diff==1 )
|
||||
{ // Fractional pixel in horizontal only
|
||||
FilterBlock1d_h_wmt(ReconPtr1, Intermediate, PixelsPerLine, 1, 8, 8, BicubicFilters_mmx[BicubicAlpha][ModX] );
|
||||
}
|
||||
else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only
|
||||
{
|
||||
FilterBlock1d_v_wmt(ReconPtr1, Intermediate, PixelsPerLine, PixelsPerLine, 8, 8, BicubicFilters_mmx[BicubicAlpha][ModY]);
|
||||
}
|
||||
else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right
|
||||
{
|
||||
FilterBlock2d_wmt( ReconPtr1-1, Intermediate, PixelsPerLine, BicubicFilters_mmx[BicubicAlpha][ModX], BicubicFilters_mmx[BicubicAlpha][ModY] );
|
||||
}
|
||||
else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left
|
||||
{
|
||||
FilterBlock2d_wmt( ReconPtr1, Intermediate, PixelsPerLine, BicubicFilters_mmx[BicubicAlpha][ModX], BicubicFilters_mmx[BicubicAlpha][ModY] );
|
||||
}
|
||||
UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if( diff==1 )
|
||||
{
|
||||
FilterUnpackBlock1d_hb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 16, BilinearFilters_wmt[ModX] );
|
||||
|
||||
// Fractional pixel in horizontal only
|
||||
/*
|
||||
FilterBlock1d_hb8_wmt(ReconPtr1, Intermediate, PixelsPerLine, 1, 8, 8, BilinearFilters_wmt[ModX] );
|
||||
UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
|
||||
*/
|
||||
|
||||
}
|
||||
else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only
|
||||
{
|
||||
FilterUnpackBlock1d_vb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 16, BilinearFilters_wmt[ModY]);
|
||||
/*
|
||||
FilterBlock1d_vb8_wmt(ReconPtr1, Intermediate, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters_wmt[ModY]);
|
||||
UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
|
||||
*/
|
||||
}
|
||||
else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right
|
||||
{
|
||||
|
||||
FilterUnpackBlock2dBil_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
|
||||
/*
|
||||
FilterBlock2dBil_wmt( ReconPtr1-1, Intermediate, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
|
||||
UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
|
||||
*/
|
||||
}
|
||||
else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left
|
||||
{
|
||||
FilterUnpackBlock2dBil_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
|
||||
/*
|
||||
FilterBlock2dBil_wmt( ReconPtr1, Intermediate, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
|
||||
UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
2156
Src/libvpShared/corelibs/cdxv/vputil/win32/mmxidct.c
Normal file
2156
Src/libvpShared/corelibs/cdxv/vputil/win32/mmxidct.c
Normal file
File diff suppressed because it is too large
Load diff
856
Src/libvpShared/corelibs/cdxv/vputil/win32/mmxrecon.c
Normal file
856
Src/libvpShared/corelibs/cdxv/vputil/win32/mmxrecon.c
Normal file
|
@ -0,0 +1,856 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Module Title : OptFunctions.c
|
||||
*
|
||||
* Description : MMX or otherwise processor specific
|
||||
* optimised versions of functions
|
||||
*
|
||||
* AUTHOR : Paul Wilkins
|
||||
*
|
||||
*****************************************************************************
|
||||
* Revision History
|
||||
*
|
||||
* 1.07 JBB 26/01/01 Removed unused function
|
||||
* 1.06 YWX 23/05/00 Remove the clamping in MmxReconPostProcess()
|
||||
* 1.05 YWX 15/05/00 Added MmxReconPostProcess()
|
||||
* 1.04 SJL 03/14/00 Added in Tim's versions of MmxReconInter and MmxReconInterHalfPixel2.
|
||||
* 1.03 PGW 12/10/99 Changes to reduce uneccessary dependancies.
|
||||
* 1.02 PGW 30/08/99 Minor changes to MmxReconInterHalfPixel2().
|
||||
* 1.01 PGW 13/07/99 Changes to keep reconstruction data to 16 bit
|
||||
* 1.00 PGW 14/06/99 Configuration baseline
|
||||
*
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/*
|
||||
Use Tim's optimized version.
|
||||
*/
|
||||
#define USING_TIMS 1
|
||||
|
||||
/****************************************************************************
|
||||
* Header Files
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
#define STRICT // Strict type checking.
|
||||
|
||||
#include "codec_common.h"
|
||||
|
||||
#include "reconstruct.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Module constants.
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Imports.
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
extern INT32 * XX_LUT;
|
||||
|
||||
/****************************************************************************
|
||||
* Exported Global Variables
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Exported Functions
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Module Statics
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
INT16 Ones[4] = {1,1,1,1};
|
||||
INT16 OneTwoEight[4] = {128,128,128,128};
|
||||
UINT8 Eight128s[8] = {128,128,128,128,128,128,128,128};
|
||||
|
||||
#pragma warning( disable : 4799 ) // Disable no emms instruction warning!
|
||||
/****************************************************************************
|
||||
* Forward References
|
||||
*****************************************************************************
|
||||
*/
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : MMXReconIntra
|
||||
*
|
||||
* INPUTS : INT16 * idct
|
||||
* Pointer to the output from the idct for this block
|
||||
*
|
||||
* UINT32 stride
|
||||
* Line Length in pixels in recon and reference images
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
* OUTPUTS : UINT8 * dest
|
||||
* The reconstruction buffer
|
||||
*
|
||||
* RETURNS : None
|
||||
*
|
||||
* FUNCTION : Reconstructs an intra block - MMX version
|
||||
*
|
||||
* SPECIAL NOTES : Tim Murphy's optimized version
|
||||
*
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void MMXReconIntra( INT16 *TmpDataBuffer, UINT8 * dest, UINT16 * idct, UINT32 stride )
|
||||
{
|
||||
(void) TmpDataBuffer;
|
||||
__asm
|
||||
{
|
||||
// u pipe
|
||||
// v pipe
|
||||
mov eax,[idct] ; Signed 16 bit inputs
|
||||
mov edx,[dest] ; Signed 8 bit outputs
|
||||
movq mm0,[Eight128s] ; Set mm0 to 0x8080808080808080
|
||||
;
|
||||
mov ebx,[stride] ; Line stride in output buffer
|
||||
lea ecx,[eax+128] ; Endpoint in input buffer
|
||||
loop_label: ;
|
||||
movq mm2,[eax] ; First four input values
|
||||
;
|
||||
packsswb mm2,[eax+8] ; pack with next(high) four values
|
||||
por mm0,mm0 ; stall
|
||||
pxor mm2,mm0 ; Convert result to unsigned (same as add 128)
|
||||
lea eax,[eax + 16] ; Step source buffer
|
||||
cmp eax,ecx ; are we done
|
||||
;
|
||||
movq [edx],mm2 ; store results
|
||||
;
|
||||
lea edx,[edx+ebx] ; Step output buffer
|
||||
jc loop_label ; Loop back if we are not done
|
||||
}
|
||||
// 6c/8 elts = 9c/8 = 1.125 c/pix
|
||||
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : MmxReconInter
|
||||
*
|
||||
* INPUTS : UINT8 * RefPtr
|
||||
* The last frame reference
|
||||
*
|
||||
* INT16 * ChangePtr
|
||||
* Pointer to the change data
|
||||
*
|
||||
* UINT32 LineStep
|
||||
* Line Length in pixels in recon and ref images
|
||||
*
|
||||
* OUTPUTS : UINT8 * ReconPtr
|
||||
* The reconstruction
|
||||
*
|
||||
* RETURNS : None
|
||||
*
|
||||
* FUNCTION : Reconstructs data from last data and change
|
||||
*
|
||||
* SPECIAL NOTES :
|
||||
*
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
#if USING_TIMS
|
||||
void MmxReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
|
||||
{
|
||||
(void) TmpDataBuffer;
|
||||
|
||||
_asm {
|
||||
push edi
|
||||
;; mov ebx, [ref]
|
||||
;; mov ecx, [diff]
|
||||
;; mov eax, [dest]
|
||||
;; mov edx, [stride]
|
||||
mov ebx, [RefPtr]
|
||||
mov ecx, [ChangePtr]
|
||||
mov eax, [ReconPtr]
|
||||
mov edx, [LineStep]
|
||||
pxor mm0, mm0
|
||||
lea edi, [ecx + 128]
|
||||
;
|
||||
L:
|
||||
movq mm2, [ebx] ; (+3 misaligned) 8 reference pixels
|
||||
;
|
||||
movq mm4, [ecx] ; first 4 changes
|
||||
movq mm3, mm2
|
||||
movq mm5, [ecx + 8] ; last 4 changes
|
||||
punpcklbw mm2, mm0 ; turn first 4 refs into positive 16-bit #s
|
||||
paddsw mm2, mm4 ; add in first 4 changes
|
||||
punpckhbw mm3, mm0 ; turn last 4 refs into positive 16-bit #s
|
||||
paddsw mm3, mm5 ; add in last 4 changes
|
||||
add ebx, edx ; next row of reference pixels
|
||||
packuswb mm2, mm3 ; pack result to unsigned 8-bit values
|
||||
lea ecx, [ecx + 16] ; next row of changes
|
||||
cmp ecx, edi ; are we done?
|
||||
;
|
||||
movq [eax], mm2 ; store result
|
||||
;
|
||||
lea eax, [eax+edx] ; next row of output
|
||||
jc L ; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
|
||||
|
||||
pop edi
|
||||
}
|
||||
}
|
||||
#else
|
||||
void MmxReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
|
||||
{
|
||||
|
||||
// Note that the line step for the change data is assumed to be 8 * 32 bits.
|
||||
__asm
|
||||
{
|
||||
// Set up data pointers
|
||||
mov eax,dword ptr [ReconPtr]
|
||||
mov ebx,dword ptr [RefPtr]
|
||||
mov ecx,dword ptr [ChangePtr]
|
||||
mov edx,dword ptr [LineStep]
|
||||
pxor mm6, mm6 ; Blank mmx6
|
||||
|
||||
// Row 1
|
||||
// Load the data values. The change data needs to be unpacked to words
|
||||
movq mm0,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data
|
||||
paddsw mm0, mm2 ; First 4 values
|
||||
paddsw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [eax],mm0 ; Write the data out to the results buffer
|
||||
|
||||
add ebx,edx ; Step the reference pointer.
|
||||
add ecx,16 ; Step the change pointer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
|
||||
// Row 2
|
||||
// Load the data values. The change data needs to be unpacked to words
|
||||
movq mm0,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data
|
||||
paddsw mm0, mm2 ; First 4 values
|
||||
paddsw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [eax],mm0 ; Write the data out to the results buffer
|
||||
|
||||
add ebx,edx ; Step the reference pointer.
|
||||
add ecx,16 ; Step the change pointer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
|
||||
// Row 3
|
||||
// Load the data values. The change data needs to be unpacked to words
|
||||
movq mm0,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data
|
||||
paddsw mm0, mm2 ; First 4 values
|
||||
paddsw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [eax],mm0 ; Write the data out to the results buffer
|
||||
|
||||
add ebx,edx ; Step the reference pointer.
|
||||
add ecx,16 ; Step the change pointer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
|
||||
// Row 4
|
||||
// Load the data values. The change data needs to be unpacked to words
|
||||
movq mm0,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data
|
||||
paddsw mm0, mm2 ; First 4 values
|
||||
paddsw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [eax],mm0 ; Write the data out to the results buffer
|
||||
|
||||
add ebx,edx ; Step the reference pointer.
|
||||
add ecx,16 ; Step the change pointer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
|
||||
// Row 5
|
||||
// Load the data values. The change data needs to be unpacked to words
|
||||
movq mm0,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data
|
||||
paddsw mm0, mm2 ; First 4 values
|
||||
paddsw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [eax],mm0 ; Write the data out to the results buffer
|
||||
|
||||
add ebx,edx ; Step the reference pointer.
|
||||
add ecx,16 ; Step the change pointer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
|
||||
// Row 6
|
||||
// Load the data values. The change data needs to be unpacked to words
|
||||
movq mm0,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data
|
||||
paddsw mm0, mm2 ; First 4 values
|
||||
paddsw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [eax],mm0 ; Write the data out to the results buffer
|
||||
|
||||
add ebx,edx ; Step the reference pointer.
|
||||
add ecx,16 ; Step the change pointer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
|
||||
// Row 7
|
||||
// Load the data values. The change data needs to be unpacked to words
|
||||
movq mm0,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data
|
||||
paddsw mm0, mm2 ; First 4 values
|
||||
paddsw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [eax],mm0 ; Write the data out to the results buffer
|
||||
|
||||
add ebx,edx ; Step the reference pointer.
|
||||
add ecx,16 ; Step the change pointer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
|
||||
// Row 8
|
||||
// Load the data values. The change data needs to be unpacked to words
|
||||
movq mm0,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data
|
||||
paddsw mm0, mm2 ; First 4 values
|
||||
paddsw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [eax],mm0 ; Write the data out to the results buffer
|
||||
|
||||
//emms ; Clear the MMX state.
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : MmxReconInterHalfPixel2
|
||||
*
|
||||
* INPUTS : UINT8 * RefPtr1, RefPtr2
|
||||
* The last frame reference
|
||||
*
|
||||
* INT16 * ChangePtr
|
||||
* Pointer to the change data
|
||||
*
|
||||
* UINT32 LineStep
|
||||
* Line Length in pixels in recon and ref images
|
||||
*
|
||||
*
|
||||
* OUTPUTS : UINT8 * ReconPtr
|
||||
* The reconstruction
|
||||
*
|
||||
* RETURNS : None
|
||||
*
|
||||
* FUNCTION : Reconstructs data from half pixel reference data and change.
|
||||
* Half pixel data interpolated from 2 references.
|
||||
*
|
||||
* SPECIAL NOTES :
|
||||
*
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
#if USING_TIMS
|
||||
|
||||
#define A 0
|
||||
|
||||
void MmxReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr,
|
||||
UINT8 * RefPtr1, UINT8 * RefPtr2,
|
||||
INT16 * ChangePtr, UINT32 LineStep )
|
||||
{
|
||||
# if A
|
||||
static culong FourOnes[2] = { 65537, 65537}; // only read once
|
||||
# endif
|
||||
(void) TmpDataBuffer;
|
||||
|
||||
_asm {
|
||||
push esi
|
||||
push edi
|
||||
|
||||
;; mov ecx, [diff]
|
||||
;; mov esi, [ref1]
|
||||
;; mov edi, [ref2]
|
||||
;; mov ebx, [dest]
|
||||
;; mov edx, [stride]
|
||||
|
||||
mov ecx, [ChangePtr]
|
||||
mov esi, [RefPtr1]
|
||||
mov edi, [RefPtr2]
|
||||
mov ebx, [ReconPtr]
|
||||
mov edx, [LineStep]
|
||||
|
||||
lea eax, [ecx+128]
|
||||
|
||||
# if A
|
||||
movq mm1, [FourOnes]
|
||||
# endif
|
||||
|
||||
pxor mm0, mm0
|
||||
L:
|
||||
movq mm2, [esi] ; (+3 misaligned) mm2 = row from ref1
|
||||
;
|
||||
movq mm4, [edi] ; (+3 misaligned) mm4 = row from ref2
|
||||
movq mm3, mm2
|
||||
punpcklbw mm2, mm0 ; mm2 = start ref1 as positive 16-bit #s
|
||||
movq mm5, mm4
|
||||
movq mm6, [ecx] ; mm6 = first 4 changes
|
||||
punpckhbw mm3, mm0 ; mm3 = end ref1 as positive 16-bit #s
|
||||
movq mm7, [ecx+8] ; mm7 = last 4 changes
|
||||
punpcklbw mm4, mm0 ; mm4 = start ref2 as positive 16-bit #s
|
||||
punpckhbw mm5, mm0 ; mm5 = end ref2 as positive 16-bit #s
|
||||
paddw mm2, mm4 ; mm2 = start (ref1 + ref2)
|
||||
paddw mm3, mm5 ; mm3 = end (ref1 + ref2)
|
||||
|
||||
# if A
|
||||
paddw mm2, mm1 ; rounding adjustment
|
||||
paddw mm3, mm1
|
||||
# endif
|
||||
|
||||
psrlw mm2, 1 ; mm2 = start (ref1 + ref2)/2
|
||||
psrlw mm3, 1 ; mm3 = end (ref1 + ref2)/2
|
||||
paddw mm2, mm6 ; add changes to start
|
||||
paddw mm3, mm7 ; add changes to end
|
||||
lea ecx, [ecx+16] ; next row idct
|
||||
packuswb mm2, mm3 ; pack start|end to unsigned 8-bit
|
||||
add esi, edx ; next row ref1
|
||||
add edi, edx ; next row ref2
|
||||
cmp ecx, eax
|
||||
movq [ebx], mm2 ; store result
|
||||
;
|
||||
lea ebx, [ebx+edx]
|
||||
jc L ; 22c / 8 elts = 33c / 8 pixels = 4.125 c/pix
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
}
|
||||
}
|
||||
|
||||
#undef A
|
||||
|
||||
#else
|
||||
void MmxReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr,
|
||||
UINT8 * RefPtr1, UINT8 * RefPtr2,
|
||||
INT16 * ChangePtr, UINT32 LineStep )
|
||||
{
|
||||
UINT8 * TmpDataPtr = (UINT8 *)TmpDataBuffer->TmpReconBuffer;
|
||||
|
||||
// Note that the line step for the change data is assumed to be 8 * 32 bits.
|
||||
__asm
|
||||
{
|
||||
pxor mm6, mm6 ; Blank mmx6
|
||||
|
||||
// Set up data pointers
|
||||
mov eax,dword ptr [RefPtr1]
|
||||
mov ebx,dword ptr [RefPtr2]
|
||||
mov edx,dword ptr [LineStep]
|
||||
|
||||
// Row 1
|
||||
// Load the change pointer
|
||||
mov ecx,dword ptr [ChangePtr]
|
||||
|
||||
// Load the data values (Ref1 and Ref2) and unpack to signed 16 bit values
|
||||
movq mm0,dword ptr [eax] ; Load 8 elements of source data
|
||||
movq mm2,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
movq mm3, mm2 ; Copy data
|
||||
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpcklbw mm2, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
punpckhbw mm3, mm6 ; High bytes to words
|
||||
|
||||
// Average Ref1 and Ref2
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm3 ; Second 4 values
|
||||
psrlw mm0, 1
|
||||
psrlw mm1, 1
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data reference and difference data
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
mov ecx,dword ptr [TmpDataPtr] ; Load the temp results pointer
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [ecx],mm0 ; Write the data out to the temporary results buffer
|
||||
add eax,edx ; Step the reference pointers
|
||||
add ebx,edx
|
||||
|
||||
// Row 2
|
||||
// Load the change pointer
|
||||
mov ecx,dword ptr [ChangePtr]
|
||||
add ecx,16
|
||||
|
||||
// Load the data values (Ref1 and Ref2).
|
||||
movq mm0,dword ptr [eax] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
|
||||
movq mm2,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm3, mm2 ; Copy data
|
||||
punpcklbw mm2, mm6 ; Low bytes to words
|
||||
punpckhbw mm3, mm6 ; High bytes to words
|
||||
|
||||
// Average Ref1 and Ref2
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm3 ; Second 4 values
|
||||
psrlw mm0, 1
|
||||
psrlw mm1, 1
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data reference and difference data
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
mov ecx,dword ptr [TmpDataPtr] ; Load the temp results pointer
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [ecx+8],mm0 ; Write the data out to the temporary results buffer
|
||||
add eax,edx ; Step the reference pointers
|
||||
add ebx,edx
|
||||
|
||||
// Row 3
|
||||
// Load the change pointer
|
||||
mov ecx,dword ptr [ChangePtr]
|
||||
add ecx,32
|
||||
|
||||
// Load the data values (Ref1 and Ref2).
|
||||
movq mm0,dword ptr [eax] ; Load 8 elements of source data
|
||||
movq mm2,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
movq mm3, mm2 ; Copy data
|
||||
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
punpcklbw mm2, mm6 ; Low bytes to words
|
||||
punpckhbw mm3, mm6 ; High bytes to words
|
||||
|
||||
// Average Ref1 and Ref2
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm3 ; Second 4 values
|
||||
psrlw mm0, 1
|
||||
psrlw mm1, 1
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data reference and difference data
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
mov ecx,dword ptr [TmpDataPtr]
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [ecx+16],mm0 ; Write the data out to the temporary results buffer
|
||||
add eax,edx ; Step the reference pointers
|
||||
add ebx,edx
|
||||
|
||||
// Row 4
|
||||
// Load the change pointer
|
||||
mov ecx,dword ptr [ChangePtr]
|
||||
add ecx,48
|
||||
|
||||
// Load the data values (Ref1 and Ref2).
|
||||
movq mm0,dword ptr [eax] ; Load 8 elements of source data
|
||||
movq mm2,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
movq mm3, mm2 ; Copy data
|
||||
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
punpcklbw mm2, mm6 ; Low bytes to words
|
||||
punpckhbw mm3, mm6 ; High bytes to words
|
||||
|
||||
// Average Ref1 and Ref2
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm3 ; Second 4 values
|
||||
psrlw mm0, 1
|
||||
psrlw mm1, 1
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data reference and difference data
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
mov ecx,dword ptr [TmpDataPtr]
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [ecx+24],mm0 ; Write the data out to the temporary results buffer
|
||||
add eax,edx ; Step the reference pointers
|
||||
add ebx,edx
|
||||
|
||||
// Row 5
|
||||
// Load the change pointer
|
||||
mov ecx,dword ptr [ChangePtr]
|
||||
add ecx,64
|
||||
|
||||
// Load the data values (Ref1 and Ref2).
|
||||
movq mm0,dword ptr [eax] ; Load 8 elements of source data
|
||||
movq mm2,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
movq mm3, mm2 ; Copy data
|
||||
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
punpcklbw mm2, mm6 ; Low bytes to words
|
||||
punpckhbw mm3, mm6 ; High bytes to words
|
||||
|
||||
// Average Ref1 and Ref2
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm3 ; Second 4 values
|
||||
psrlw mm0, 1
|
||||
psrlw mm1, 1
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data reference and difference data
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
mov ecx,dword ptr [TmpDataPtr]
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [ecx+32],mm0 ; Write the data out to the temporary results buffer
|
||||
add eax,edx ; Step the reference pointers
|
||||
add ebx,edx
|
||||
|
||||
// Row 6
|
||||
// Load the change pointer
|
||||
mov ecx,dword ptr [ChangePtr]
|
||||
add ecx,80
|
||||
|
||||
// Load the data values (Ref1 and Ref2).
|
||||
movq mm0,dword ptr [eax] ; Load 8 elements of source data
|
||||
movq mm2,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
movq mm3, mm2 ; Copy data
|
||||
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
punpcklbw mm2, mm6 ; Low bytes to words
|
||||
punpckhbw mm3, mm6 ; High bytes to words
|
||||
|
||||
// Average Ref1 and Ref2
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm3 ; Second 4 values
|
||||
psrlw mm0, 1
|
||||
psrlw mm1, 1
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data reference and difference data
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
mov ecx,dword ptr [TmpDataPtr]
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [ecx+40],mm0 ; Write the data out to the temporary results buffer
|
||||
add eax,edx ; Step the reference pointers
|
||||
add ebx,edx
|
||||
|
||||
// Row 7
|
||||
// Load the change pointer
|
||||
mov ecx,dword ptr [ChangePtr]
|
||||
add ecx,96
|
||||
|
||||
// Load the data values (Ref1 and Ref2).
|
||||
movq mm0,dword ptr [eax] ; Load 8 elements of source data
|
||||
movq mm2,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
movq mm3, mm2 ; Copy data
|
||||
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
punpcklbw mm2, mm6 ; Low bytes to words
|
||||
punpckhbw mm3, mm6 ; High bytes to words
|
||||
|
||||
// Average Ref1 and Ref2
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm3 ; Second 4 values
|
||||
psrlw mm0, 1
|
||||
psrlw mm1, 1
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data reference and difference data
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
mov ecx,dword ptr [TmpDataPtr]
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [ecx+48],mm0 ; Write the data out to the temporary results buffer
|
||||
add eax,edx ; Step the reference pointers
|
||||
add ebx,edx
|
||||
|
||||
// Row 8
|
||||
// Load the change pointer
|
||||
mov ecx,dword ptr [ChangePtr]
|
||||
add ecx,112
|
||||
|
||||
// Load the data values (Ref1 and Ref2).
|
||||
movq mm0,dword ptr [eax] ; Load 8 elements of source data
|
||||
movq mm2,dword ptr [ebx] ; Load 8 elements of source data
|
||||
movq mm1, mm0 ; Copy data
|
||||
movq mm3, mm2 ; Copy data
|
||||
|
||||
punpcklbw mm0, mm6 ; Low bytes to words
|
||||
punpckhbw mm1, mm6 ; High bytes to words
|
||||
punpcklbw mm2, mm6 ; Low bytes to words
|
||||
punpckhbw mm3, mm6 ; High bytes to words
|
||||
|
||||
// Average Ref1 and Ref2
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm3 ; Second 4 values
|
||||
psrlw mm0, 1
|
||||
psrlw mm1, 1
|
||||
|
||||
// Load 8 elements of 16 bit change data
|
||||
movq mm2,dword ptr [ecx] ; Load 4 elements of change data
|
||||
movq mm4,dword ptr [ecx+8] ; Load next 4 elements of change data
|
||||
|
||||
// Sum the data reference and difference data
|
||||
paddw mm0, mm2 ; First 4 values
|
||||
paddw mm1, mm4 ; Second 4 values
|
||||
|
||||
// Pack and store
|
||||
mov ecx,dword ptr [TmpDataPtr]
|
||||
packuswb mm0, mm1 ; Then pack and saturate to unsigned bytes
|
||||
movq dword ptr [ecx+56],mm0 ; Write the data out to the temporary results buffer
|
||||
|
||||
|
||||
// Now copy the results back to the reconstruction buffer.
|
||||
mov eax,dword ptr [ReconPtr] ; Load the reconstruction Pointer
|
||||
mov ecx,dword ptr [TmpDataPtr] ; Load the temp results pointer
|
||||
// Row 1
|
||||
movq mm0,dword ptr [ecx] ; Load 8 elements of results data
|
||||
movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
// Row 2
|
||||
movq mm0,dword ptr [ecx+8] ; Load 8 elements of results data
|
||||
movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
// Row 3
|
||||
movq mm0,dword ptr [ecx+16] ; Load 8 elements of results data
|
||||
movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
// Row 4
|
||||
movq mm0,dword ptr [ecx+24] ; Load 8 elements of results data
|
||||
movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
// Row 5
|
||||
movq mm0,dword ptr [ecx+32] ; Load 8 elements of results data
|
||||
movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
// Row 6
|
||||
movq mm0,dword ptr [ecx+40] ; Load 8 elements of results data
|
||||
movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
// Row 7
|
||||
movq mm0,dword ptr [ecx+48] ; Load 8 elements of results data
|
||||
movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
// Row 8
|
||||
movq mm0,dword ptr [ecx+56] ; Load 8 elements of results data
|
||||
movq dword ptr [eax],mm0 ; Write the data tot he reconstruction buffer.
|
||||
add eax,edx ; Step the reconstruction pointer
|
||||
|
||||
//emms
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
351
Src/libvpShared/corelibs/cdxv/vputil/win32/uoptsystemdependant.c
Normal file
351
Src/libvpShared/corelibs/cdxv/vputil/win32/uoptsystemdependant.c
Normal file
|
@ -0,0 +1,351 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Module Title : SystemDependant.c
|
||||
*
|
||||
* Description : Miscellaneous system dependant functions
|
||||
*
|
||||
* AUTHOR : Paul Wilkins
|
||||
*
|
||||
*****************************************************************************
|
||||
* Revision History
|
||||
*
|
||||
* 1.20 YWX 06-Nov-02 Added forward DCT function optimized for Pentium 4
|
||||
* 1.19 YWX 15-Jun-01 added function pointer setups for new deblocking filter
|
||||
* 1.18 YWX 26-Apr-01 Fixed the cpu frequency detection bug caused by Sleep()
|
||||
* 1.17 JBX 22-Mar-01 Merged with new vp4-mapca bitstream
|
||||
* 1.16 JBB 26-Jan-01 Cleaned out unused function
|
||||
* 1.15 YWX 08-dec-00 Added WMT PostProcessor and
|
||||
* moved function declarations into _head files
|
||||
* 1.14 JBB 30 NOV 00 Version number changes
|
||||
* 1.13 YWX 03-Nov-00 Optimized postprocessor filters
|
||||
* 1.12 YWX 02-Nov-00 Added new loopfilter function pointers
|
||||
* 1.11 YWX 19-Oct-00 Added 1-2 Scaling functions pointers
|
||||
* 1.10 jbb 16 oct 00 added ifdefs to insure version code
|
||||
* 1.09 YWX 04-Oct-00 Added function pointers for scaling
|
||||
* 1.08 YWX 06 Sep 00 Added function pointers for new deringing filter
|
||||
* using frag baseed Q Value.
|
||||
* 1.07 JBB 21 Aug 00 New More Blurry in high variance area deringer
|
||||
* 1.06 YWX 2 Aug 00 Added function pointers for postprocess
|
||||
* 1.05 YWX 15/05/00 Added functions to check processor frequency
|
||||
* and more function pointers for postprocessor
|
||||
* 1.04 YWX 08/05/00 Added function pointers setup for postprocess
|
||||
* 1.03 SJL 20/04/00 Added ability to enable the new dequant code.
|
||||
* 1.02 SJL 22/03/00 Function pointers for the loop filter.
|
||||
* 1.01 JBB 21/03/00 More Function Pointers for optimized playback
|
||||
* 1.00 PGW 12/10/99 Configuration baseline
|
||||
*
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Header Files
|
||||
*****************************************************************************
|
||||
*/
|
||||
#include "codec_common.h"
|
||||
#include "vputil_if.h"
|
||||
#include "cpuidlib.h"
|
||||
|
||||
//global debugging aid's!
|
||||
int fastIDCTDisabled = 0;
|
||||
int forceCPUID = 0;
|
||||
int CPUID = 0;
|
||||
|
||||
|
||||
extern void GetProcessorFlags(INT32 *MmxEnabled, INT32 *XmmEnabled, INT32 *WmtEnabled);
|
||||
|
||||
// Scalar (no mmx) reconstruction functions
|
||||
extern void ClearSysState_C(void);
|
||||
extern void IDctSlow( INT16 * InputData, INT16 *QuantMatrix, INT16 * OutputData );
|
||||
extern void IDct10( INT16 * InputData, INT16 *QuantMatrix, INT16 * OutputData );
|
||||
extern void IDct1( INT16 * InputData, INT16 *QuantMatrix, INT16 * OutputData );
|
||||
extern void ScalarReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void ScalarReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void ScalarReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr,UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void ReconBlock_C(INT16 *SrcBlock,INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep);
|
||||
extern void SubtractBlock_C( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep );
|
||||
extern void UnpackBlock_C( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
|
||||
extern void AverageBlock_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
|
||||
extern void CopyBlock_C(unsigned char *src, unsigned char *dest, unsigned int srcstride);
|
||||
extern void Copy12x12_C(const unsigned char *src, unsigned char *dest, unsigned int srcstride, unsigned int deststride);
|
||||
extern void fdct_short_C ( INT16 * InputData, INT16 * OutputData );
|
||||
extern void FilterBlockBil_8_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
|
||||
extern void FilterBlock_C( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
|
||||
|
||||
// MMx versions
|
||||
extern void fdct_MMX ( INT16 * InputData, INT16 * OutputData );
|
||||
extern void ClearMmx(void);
|
||||
extern void MMXReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void MmxReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void MmxReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void MMX_idct( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
|
||||
extern void MMX_idct10( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
|
||||
extern void MMX_idct1( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
|
||||
extern void MMX_idct_DX( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
|
||||
extern void MMX_idct10_DX( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
|
||||
extern void ReconBlock_MMX(INT16 *SrcBlock,INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep);
|
||||
extern void SubtractBlock_MMX( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep );
|
||||
extern void UnpackBlock_MMX( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
|
||||
extern void AverageBlock_MMX( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine);
|
||||
extern void CopyBlockMMX(unsigned char *src, unsigned char *dest, unsigned int srcstride);
|
||||
extern void Copy12x12_MMX(const unsigned char *src, unsigned char *dest, unsigned int srcstride, unsigned int deststride);
|
||||
extern void FilterBlockBil_8_mmx( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
|
||||
extern void FilterBlock_mmx( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
|
||||
|
||||
// WMT versions
|
||||
extern void WmtReconIntra( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void WmtReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void WmtReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr1, UINT8 * RefPtr2, INT16 * ChangePtr, UINT32 LineStep );
|
||||
extern void Wmt_idct1( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
|
||||
extern void Wmt_IDct_Dx( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
|
||||
extern void Wmt_IDct10_Dx( Q_LIST_ENTRY * InputData, INT16 *QuantMatrix, INT16 * OutputData );
|
||||
extern void fdct_WMT(short *InputData, short *OutputData);
|
||||
extern void FilterBlockBil_8_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 ReconPixelsPerLine, INT32 ModX, INT32 ModY );
|
||||
extern void FilterBlock_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha );
|
||||
|
||||
|
||||
#define IdctAdjustBeforeShift 8
|
||||
extern UINT16 idctconstants[(4+7+1) * 4];
|
||||
extern UINT16 idctcosTbl[ 7];
|
||||
|
||||
void fillidctconstants(void)
|
||||
{
|
||||
int j = 16;
|
||||
UINT16 * p;
|
||||
do
|
||||
{
|
||||
idctconstants[ --j] = 0;
|
||||
}
|
||||
while( j);
|
||||
|
||||
idctconstants[0] = idctconstants[5] = idctconstants[10] = idctconstants[15] = 65535;
|
||||
|
||||
j = 1;
|
||||
do
|
||||
{
|
||||
p = idctconstants + ( (j+3) << 2);
|
||||
p[0] = p[1] = p[2] = p[3] = idctcosTbl[ j - 1];
|
||||
}
|
||||
while( ++j <= 7);
|
||||
|
||||
idctconstants[44] = idctconstants[45] = idctconstants[46] = idctconstants[47] = IdctAdjustBeforeShift;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : Get Processor Flags
|
||||
*
|
||||
* INPUTS : None
|
||||
*
|
||||
* OUTPUTS : None
|
||||
*
|
||||
* RETURNS : None
|
||||
*
|
||||
* FUNCTION : Checks for machine specifc features such as MMX support
|
||||
* sets approipriate flags and function pointers.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void GetProcessorFlags
|
||||
(
|
||||
INT32 *MmxEnabled,
|
||||
INT32 *XmmEnabled,
|
||||
INT32 *WmtEnabled
|
||||
)
|
||||
{
|
||||
|
||||
PROCTYPE CPUType = findCPUId();
|
||||
if(forceCPUID)
|
||||
CPUType = CPUID;
|
||||
|
||||
switch(CPUType)
|
||||
{
|
||||
case X86 :
|
||||
case PPRO :
|
||||
case C6X86 :
|
||||
case C6X86MX:
|
||||
case AMDK5 :
|
||||
case MACG3 :
|
||||
case MAC68K :
|
||||
*MmxEnabled = FALSE;
|
||||
*XmmEnabled = FALSE;
|
||||
*WmtEnabled = FALSE;
|
||||
break;
|
||||
case PII :
|
||||
case AMDK63D:
|
||||
case AMDK6 :
|
||||
case PMMX :
|
||||
*MmxEnabled = TRUE;
|
||||
*XmmEnabled = FALSE;
|
||||
*WmtEnabled = FALSE;
|
||||
break;
|
||||
case XMM :
|
||||
*MmxEnabled = TRUE;
|
||||
*XmmEnabled = TRUE;
|
||||
*WmtEnabled = FALSE;
|
||||
break;
|
||||
case WMT :
|
||||
*MmxEnabled = TRUE;
|
||||
*XmmEnabled = TRUE;
|
||||
*WmtEnabled = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : MachineSpecificConfig
|
||||
*
|
||||
* INPUTS : None
|
||||
*
|
||||
* OUTPUTS : None
|
||||
*
|
||||
* RETURNS : None
|
||||
*
|
||||
* FUNCTION : Checks for machine specifc features such as MMX support
|
||||
* sets approipriate flags and function pointers.
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void UtilMachineSpecificConfig
|
||||
(
|
||||
void
|
||||
)
|
||||
{
|
||||
UINT32 i;
|
||||
INT32 MmxEnabled;
|
||||
INT32 XmmEnabled;
|
||||
INT32 WmtEnabled;
|
||||
|
||||
GetProcessorFlags( &MmxEnabled,&XmmEnabled,&WmtEnabled);
|
||||
|
||||
if(WmtEnabled) //Willamette
|
||||
{
|
||||
for(i=0;i<=64;i++)
|
||||
{
|
||||
|
||||
if(fastIDCTDisabled)
|
||||
idct[i]=Wmt_IDct_Dx;
|
||||
else
|
||||
{
|
||||
if(i<=1)idct[i]=Wmt_idct1;
|
||||
else if(i<=10)idct[i]=Wmt_IDct10_Dx;
|
||||
else idct[i]=Wmt_IDct_Dx;
|
||||
}
|
||||
}
|
||||
for(i=0;i<=64;i++)
|
||||
{
|
||||
if(fastIDCTDisabled)
|
||||
idctc[i]=MMX_idct;
|
||||
else
|
||||
{
|
||||
if(i<=1)idctc[i]=Wmt_idct1;
|
||||
else if(i<=10)idctc[i]=MMX_idct10;
|
||||
else idctc[i]=MMX_idct;
|
||||
}
|
||||
}
|
||||
fdct_short=fdct_WMT;
|
||||
|
||||
ReconIntra = WmtReconIntra;
|
||||
ReconInter = WmtReconInter;
|
||||
ReconInterHalfPixel2 = WmtReconInterHalfPixel2;
|
||||
ClearSysState = ClearMmx;
|
||||
AverageBlock = AverageBlock_MMX;
|
||||
UnpackBlock = UnpackBlock_MMX;
|
||||
ReconBlock = ReconBlock_MMX;
|
||||
SubtractBlock = SubtractBlock_MMX;
|
||||
CopyBlock = CopyBlockMMX;
|
||||
Copy12x12 = Copy12x12_MMX;
|
||||
FilterBlockBil_8 = FilterBlockBil_8_wmt;
|
||||
FilterBlock=FilterBlock_wmt;
|
||||
//FilterBlock=FilterBlock_C;
|
||||
}
|
||||
else if ( MmxEnabled )
|
||||
{
|
||||
for(i=0;i<=64;i++)
|
||||
{
|
||||
if(fastIDCTDisabled)
|
||||
idctc[i]=MMX_idct_DX;
|
||||
else
|
||||
{
|
||||
if(i<=1)idctc[i]=MMX_idct1;
|
||||
else if(i<=10)idctc[i]=MMX_idct10;
|
||||
else idctc[i]=MMX_idct;
|
||||
}
|
||||
}
|
||||
fdct_short=fdct_MMX;
|
||||
for(i=0;i<=64;i++)
|
||||
{
|
||||
if(fastIDCTDisabled)
|
||||
idct[i]=MMX_idct_DX;
|
||||
else
|
||||
{
|
||||
if(i<=1)idct[i]=MMX_idct1;
|
||||
else if(i<=10)idct[i]=MMX_idct10_DX;
|
||||
else idct[i]=MMX_idct_DX;
|
||||
}
|
||||
}
|
||||
|
||||
ReconIntra = MMXReconIntra;
|
||||
ReconInter = MmxReconInter;
|
||||
ReconInterHalfPixel2 = MmxReconInterHalfPixel2;
|
||||
ClearSysState = ClearMmx;
|
||||
AverageBlock = AverageBlock_MMX;
|
||||
UnpackBlock = UnpackBlock_MMX;
|
||||
ReconBlock = ReconBlock_MMX;
|
||||
SubtractBlock = SubtractBlock_MMX;
|
||||
CopyBlock = CopyBlockMMX;
|
||||
Copy12x12 = Copy12x12_MMX;
|
||||
FilterBlockBil_8 = FilterBlockBil_8_mmx;
|
||||
FilterBlock=FilterBlock_mmx;
|
||||
//FilterBlock=FilterBlock_C;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
for(i=0;i<=64;i++)
|
||||
{
|
||||
if(fastIDCTDisabled)
|
||||
idctc[i]=IDctSlow;
|
||||
else
|
||||
{
|
||||
if(i<=1)idctc[i]=IDct1;
|
||||
else if(i<=10)idctc[i]=IDct10;
|
||||
else idctc[i]=IDctSlow;
|
||||
}
|
||||
}
|
||||
fdct_short=fdct_short_C ;
|
||||
for(i=0;i<=64;i++)
|
||||
{
|
||||
if(fastIDCTDisabled)
|
||||
idct[i]=IDctSlow;
|
||||
else
|
||||
{
|
||||
if(i<=1)idct[i]=IDct1;
|
||||
else if(i<=10)idct[i]=IDct10;
|
||||
else idct[i]=IDctSlow;
|
||||
}
|
||||
}
|
||||
ClearSysState = ClearSysState_C;
|
||||
ReconIntra = ScalarReconIntra;
|
||||
ReconInter = ScalarReconInter;
|
||||
ReconInterHalfPixel2 = ScalarReconInterHalfPixel2;
|
||||
AverageBlock = AverageBlock_C;
|
||||
UnpackBlock = UnpackBlock_C;
|
||||
ReconBlock = ReconBlock_C;
|
||||
SubtractBlock = SubtractBlock_C;
|
||||
CopyBlock = CopyBlock_C;
|
||||
Copy12x12 = Copy12x12_MMX;
|
||||
FilterBlockBil_8 = FilterBlockBil_8_C;
|
||||
FilterBlock=FilterBlock_C;
|
||||
}
|
||||
//FilterBlock=FilterBlock_C;
|
||||
|
||||
}
|
507
Src/libvpShared/corelibs/cdxv/vputil/win32/vputilasm.c
Normal file
507
Src/libvpShared/corelibs/cdxv/vputil/win32/vputilasm.c
Normal file
|
@ -0,0 +1,507 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Module Title : newLoopTest_asm.c
|
||||
*
|
||||
* Description : Codec specific functions
|
||||
*
|
||||
* AUTHOR : Yaowu Xu
|
||||
*
|
||||
*****************************************************************************
|
||||
* Revision History
|
||||
*
|
||||
* 1.02 YWX 03-Nov-00 Changed confusing variable name
|
||||
* 1.01 YWX 02-Nov-00 Added the set of functions
|
||||
* 1.00 YWX 19-Oct-00 configuration baseline
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Header Frames
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
#define STRICT /* Strict type checking. */
|
||||
#include "codec_common.h"
|
||||
#include <math.h>
|
||||
|
||||
/****************************************************************************
|
||||
* Module constants.
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Explicit Imports
|
||||
*****************************************************************************
|
||||
*/
|
||||
extern void SatUnsigned8( UINT8 * ResultPtr, INT16 * DataBlock,
|
||||
UINT32 ResultLineStep, UINT32 DataLineStep );
|
||||
|
||||
/****************************************************************************
|
||||
* Exported Global Variables
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Exported Functions
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Module Statics
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Foreward References
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : ClearMmx()
|
||||
*
|
||||
*
|
||||
* INPUTS : None
|
||||
*
|
||||
* OUTPUTS :
|
||||
*
|
||||
* RETURNS :
|
||||
*
|
||||
*
|
||||
* FUNCTION : Clears down the MMX state
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void ClearMmx(void)
|
||||
{
|
||||
__asm
|
||||
{
|
||||
emms ; Clear the MMX state.
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : CopyBlockUsingMMX
|
||||
*
|
||||
* INPUTS : None
|
||||
*
|
||||
* OUTPUTS : None
|
||||
*
|
||||
* RETURNS : None.
|
||||
*
|
||||
* FUNCTION : Copies a block from source to destination
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void CopyBlockMMX(unsigned char *src, unsigned char *dest, unsigned int srcstride)
|
||||
{
|
||||
unsigned char *s = src;
|
||||
unsigned char *d = dest;
|
||||
unsigned int stride = srcstride;
|
||||
// recon copy
|
||||
_asm
|
||||
{
|
||||
mov ecx, [stride]
|
||||
mov eax, [s]
|
||||
mov ebx, [d]
|
||||
lea edx, [ecx + ecx * 2]
|
||||
|
||||
movq mm0, [eax]
|
||||
movq mm1, [eax + ecx]
|
||||
movq mm2, [eax + ecx*2]
|
||||
movq mm3, [eax + edx]
|
||||
|
||||
lea eax, [eax + ecx*4]
|
||||
|
||||
movq [ebx], mm0
|
||||
movq [ebx + ecx], mm1
|
||||
movq [ebx + ecx*2], mm2
|
||||
movq [ebx + edx], mm3
|
||||
|
||||
lea ebx, [ebx + ecx * 4]
|
||||
|
||||
movq mm0, [eax]
|
||||
movq mm1, [eax + ecx]
|
||||
movq mm2, [eax + ecx*2]
|
||||
movq mm3, [eax + edx]
|
||||
|
||||
movq [ebx], mm0
|
||||
movq [ebx + ecx], mm1
|
||||
movq [ebx + ecx*2], mm2
|
||||
movq [ebx + edx], mm3
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : CopyBlockUsingMMX
|
||||
*
|
||||
* INPUTS : None
|
||||
*
|
||||
* OUTPUTS : None
|
||||
*
|
||||
* RETURNS : None.
|
||||
*
|
||||
* FUNCTION : Copies a block from source to destination
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void Copy12x12_MMX(
|
||||
const unsigned char *src,
|
||||
unsigned char *dest,
|
||||
unsigned int srcstride,
|
||||
unsigned int deststride)
|
||||
{
|
||||
|
||||
|
||||
int j=0;
|
||||
do
|
||||
{
|
||||
((UINT32*)dest)[0] = ((UINT32*)src)[0];
|
||||
((UINT32*)dest)[1] = ((UINT32*)src)[1];
|
||||
((UINT32*)dest)[2] = ((UINT32*)src)[2];
|
||||
src+=srcstride;
|
||||
dest+=deststride;
|
||||
}
|
||||
while(++j<12);
|
||||
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : AverageBlock_MMX
|
||||
*
|
||||
* INPUTS : Two block data to be averaged
|
||||
*
|
||||
* OUTPUTS : block with the average values
|
||||
*
|
||||
* RETURNS : None.
|
||||
*
|
||||
* FUNCTION : Do pixel averages on two reference blocks
|
||||
*
|
||||
* SPECIAL NOTES : This functions has a mmx version in newlooptest_asm.c
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void AverageBlock_MMX( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 ReconPixelsPerLine)
|
||||
{
|
||||
|
||||
__asm
|
||||
{
|
||||
mov esi, ReconPtr1
|
||||
mov eax, ReconPtr2
|
||||
|
||||
mov edi, ReconRefPtr
|
||||
mov ecx, BLOCK_HEIGHT_WIDTH
|
||||
|
||||
mov edx, ReconPixelsPerLine
|
||||
pxor mm7, mm7
|
||||
|
||||
AverageBlock_Loop:
|
||||
|
||||
movq mm0, [esi]
|
||||
movq mm1, [eax]
|
||||
|
||||
movq mm2, mm0
|
||||
punpcklbw mm0, mm7
|
||||
|
||||
movq mm3, mm1
|
||||
punpcklbw mm1, mm7
|
||||
|
||||
paddw mm0, mm1
|
||||
punpckhbw mm2, mm7
|
||||
|
||||
psraw mm0, 1
|
||||
punpckhbw mm3, mm7
|
||||
|
||||
paddw mm2, mm3
|
||||
movq [edi], mm0
|
||||
|
||||
psraw mm2, 1
|
||||
add esi, edx
|
||||
|
||||
add eax, edx
|
||||
add edi, 16
|
||||
|
||||
movq [edi-8], mm2
|
||||
dec ecx
|
||||
|
||||
jnz AverageBlock_Loop
|
||||
}
|
||||
/*
|
||||
UINT32 i;
|
||||
|
||||
// For each block row
|
||||
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
|
||||
{
|
||||
ReconRefPtr[0] = (INT16)((INT32)(ReconPtr1[0])+ ((INT32)ReconPtr2[0]))>>1;
|
||||
ReconRefPtr[1] = (INT16)((INT32)(ReconPtr1[1])+ ((INT32)ReconPtr2[1]))>>1;
|
||||
ReconRefPtr[2] = (INT16)((INT32)(ReconPtr1[2])+ ((INT32)ReconPtr2[2]))>>1;
|
||||
ReconRefPtr[3] = (INT16)((INT32)(ReconPtr1[3])+ ((INT32)ReconPtr2[3]))>>1;
|
||||
ReconRefPtr[4] = (INT16)((INT32)(ReconPtr1[4])+ ((INT32)ReconPtr2[4]))>>1;
|
||||
ReconRefPtr[5] = (INT16)((INT32)(ReconPtr1[5])+ ((INT32)ReconPtr2[5]))>>1;
|
||||
ReconRefPtr[6] = (INT16)((INT32)(ReconPtr1[6])+ ((INT32)ReconPtr2[6]))>>1;
|
||||
ReconRefPtr[7] = (INT16)((INT32)(ReconPtr1[7])+ ((INT32)ReconPtr2[7]))>>1;
|
||||
|
||||
// Start next row
|
||||
ReconPtr1 += ReconPixelsPerLine;
|
||||
ReconPtr2 += ReconPixelsPerLine;
|
||||
|
||||
ReconRefPtr += BLOCK_HEIGHT_WIDTH;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : UnpackBlock
|
||||
*
|
||||
* INPUTS : Block of char data to be converted to short
|
||||
*
|
||||
* OUTPUTS : converted output
|
||||
*
|
||||
* RETURNS : None.
|
||||
*
|
||||
* FUNCTION : Converted char block data to short
|
||||
*
|
||||
* SPECIAL NOTES : This functions has a mmx version in newlooptest_asm.c
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void UnpackBlock_MMX( UINT8 *ReconPtr, INT16 *ReconRefPtr, UINT32 ReconPixelsPerLine)
|
||||
{
|
||||
|
||||
__asm
|
||||
{
|
||||
mov esi, ReconPtr
|
||||
mov edi, ReconRefPtr
|
||||
|
||||
mov ecx, BLOCK_HEIGHT_WIDTH
|
||||
mov edx, ReconPixelsPerLine
|
||||
|
||||
pxor mm7, mm7
|
||||
|
||||
UnpackBlock_Loop:
|
||||
|
||||
movq mm0, [esi]
|
||||
movq mm2, mm0
|
||||
|
||||
punpcklbw mm0, mm7
|
||||
movq [edi], mm0
|
||||
|
||||
punpckhbw mm2, mm7
|
||||
add esi, edx
|
||||
|
||||
movq [edi+8], mm2
|
||||
add edi, 16
|
||||
|
||||
dec ecx
|
||||
jnz UnpackBlock_Loop
|
||||
}
|
||||
|
||||
/*
|
||||
UINT32 i;
|
||||
|
||||
// For each block row
|
||||
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
|
||||
{
|
||||
|
||||
ReconRefPtr[0] = (INT16)(ReconPtr[0]);
|
||||
ReconRefPtr[1] = (INT16)(ReconPtr[1]);
|
||||
ReconRefPtr[2] = (INT16)(ReconPtr[2]);
|
||||
ReconRefPtr[3] = (INT16)(ReconPtr[3]);
|
||||
ReconRefPtr[4] = (INT16)(ReconPtr[4]);
|
||||
ReconRefPtr[5] = (INT16)(ReconPtr[5]);
|
||||
ReconRefPtr[6] = (INT16)(ReconPtr[6]);
|
||||
ReconRefPtr[7] = (INT16)(ReconPtr[7]);
|
||||
|
||||
// Start next row
|
||||
ReconPtr += ReconPixelsPerLine;
|
||||
ReconRefPtr += BLOCK_HEIGHT_WIDTH;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : SubtractBlock
|
||||
*
|
||||
* INPUTS : Get the residue data for the block
|
||||
*
|
||||
* OUTPUTS : Source block data and ref block data
|
||||
*
|
||||
* RETURNS : residue block data
|
||||
*
|
||||
* FUNCTION : do pixel subtraction of ref block from source block
|
||||
*
|
||||
* SPECIAL NOTES : This functions has a mmx version in newlooptest_asm.c
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void SubtractBlock_MMX( UINT8 *SrcBlock, INT16 *DestPtr, UINT32 LineStep )
|
||||
{
|
||||
|
||||
__asm
|
||||
{
|
||||
|
||||
mov esi, SrcBlock
|
||||
mov edi, DestPtr
|
||||
|
||||
mov edx, LineStep
|
||||
mov ecx, 8
|
||||
|
||||
pxor mm7, mm7
|
||||
|
||||
SubtractBlock_Loop:
|
||||
|
||||
movq mm0, [esi]
|
||||
movq mm1, [edi]
|
||||
|
||||
movq mm2, mm0
|
||||
punpcklbw mm0, mm7
|
||||
|
||||
movq mm3, [edi+8]
|
||||
psubw mm0, mm1
|
||||
|
||||
punpckhbw mm2, mm7
|
||||
movq [edi], mm0
|
||||
|
||||
psubw mm2, mm3
|
||||
add esi, edx
|
||||
|
||||
movq [edi+8], mm2
|
||||
add edi, 16
|
||||
|
||||
dec ecx
|
||||
jnz SubtractBlock_Loop
|
||||
}
|
||||
|
||||
/*
|
||||
UINT32 i;
|
||||
|
||||
// For each block row
|
||||
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
|
||||
{
|
||||
|
||||
DestPtr[0] = (INT16)((INT32)SrcBlock[0] - (INT32)DestPtr[0]);
|
||||
DestPtr[1] = (INT16)((INT32)SrcBlock[1] - (INT32)DestPtr[1]);
|
||||
DestPtr[2] = (INT16)((INT32)SrcBlock[2] - (INT32)DestPtr[2]);
|
||||
DestPtr[3] = (INT16)((INT32)SrcBlock[3] - (INT32)DestPtr[3]);
|
||||
DestPtr[4] = (INT16)((INT32)SrcBlock[4] - (INT32)DestPtr[4]);
|
||||
DestPtr[5] = (INT16)((INT32)SrcBlock[5] - (INT32)DestPtr[5]);
|
||||
DestPtr[6] = (INT16)((INT32)SrcBlock[6] - (INT32)DestPtr[6]);
|
||||
DestPtr[7] = (INT16)((INT32)SrcBlock[7] - (INT32)DestPtr[7]);
|
||||
|
||||
// Start next row
|
||||
SrcBlock += LineStep;
|
||||
DestPtr += BLOCK_HEIGHT_WIDTH;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : ReconBlock
|
||||
*
|
||||
* INPUTS :
|
||||
*
|
||||
* OUTPUTS :
|
||||
*
|
||||
* RETURNS :
|
||||
*
|
||||
* FUNCTION : Reconstrut a block using ref blocka and change data
|
||||
*
|
||||
* SPECIAL NOTES : This functions has a mmx version in newlooptest_asm.c
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void ReconBlock_MMX( INT16 *SrcBlock, INT16 *ReconRefPtr, UINT8 *DestBlock, UINT32 LineStep)
|
||||
{
|
||||
|
||||
__asm
|
||||
{
|
||||
|
||||
mov esi, SrcBlock
|
||||
mov eax, ReconRefPtr
|
||||
|
||||
mov edi, DestBlock
|
||||
mov ecx, 8
|
||||
|
||||
mov edx, LineStep
|
||||
pxor mm7, mm7
|
||||
|
||||
ReconBlock_Loop:
|
||||
|
||||
movq mm0, [esi]
|
||||
movq mm1, [eax]
|
||||
|
||||
movq mm2, [esi+8]
|
||||
movq mm3, [eax+8]
|
||||
|
||||
paddw mm0, mm1
|
||||
paddw mm2, mm3
|
||||
|
||||
packuswb mm0, mm2
|
||||
movq [edi], mm0
|
||||
|
||||
add esi, 16
|
||||
add eax, 16
|
||||
|
||||
add edi, edx
|
||||
dec ecx
|
||||
|
||||
jnz ReconBlock_Loop
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
UINT32 i;
|
||||
INT16 *SrcBlockPtr = SrcBlock;
|
||||
|
||||
// For each block row
|
||||
for ( i=0; i<BLOCK_HEIGHT_WIDTH; i++ )
|
||||
{
|
||||
SrcBlock[0] += ReconRefPtr[0];
|
||||
SrcBlock[1] += ReconRefPtr[1];
|
||||
SrcBlock[2] += ReconRefPtr[2];
|
||||
SrcBlock[3] += ReconRefPtr[3];
|
||||
SrcBlock[4] += ReconRefPtr[4];
|
||||
SrcBlock[5] += ReconRefPtr[5];
|
||||
SrcBlock[6] += ReconRefPtr[6];
|
||||
SrcBlock[7] += ReconRefPtr[7];
|
||||
|
||||
// Start next row
|
||||
SrcBlock += BLOCK_HEIGHT_WIDTH;
|
||||
ReconRefPtr += BLOCK_HEIGHT_WIDTH;
|
||||
}
|
||||
// Saturated the block and write to the output
|
||||
SatUnsigned8( DestBlock, SrcBlockPtr, LineStep, BLOCK_HEIGHT_WIDTH );
|
||||
*/
|
||||
|
||||
}
|
||||
|
1859
Src/libvpShared/corelibs/cdxv/vputil/win32/wmtidct.c
Normal file
1859
Src/libvpShared/corelibs/cdxv/vputil/win32/wmtidct.c
Normal file
File diff suppressed because it is too large
Load diff
281
Src/libvpShared/corelibs/cdxv/vputil/win32/wmtrecon.c
Normal file
281
Src/libvpShared/corelibs/cdxv/vputil/win32/wmtrecon.c
Normal file
|
@ -0,0 +1,281 @@
|
|||
/****************************************************************************
|
||||
*
|
||||
* Module Title : WmtOptFunctions.c
|
||||
*
|
||||
* Description : willamette processor specific
|
||||
* optimised versions of functions
|
||||
*
|
||||
* AUTHOR : Yaowu Xu
|
||||
*
|
||||
* Special Note:
|
||||
*
|
||||
*****************************************************************************
|
||||
* Revision History
|
||||
*
|
||||
*
|
||||
* 1.03 YWX 07-Dec-00 Removed constants and functions that are not in use
|
||||
* Added push and pop ebx in WmtReconIntra
|
||||
* 1.02 YWX 30 Aug 00 changed to be compatible with Microsoft compiler
|
||||
* 1.01 YWX 13 JUL 00 New Willamette Optimized Functions
|
||||
* 1.00 YWX 14/06/00 Configuration baseline from OptFunctions.c
|
||||
*
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/*
|
||||
Use Tim's optimized version.
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Header Files
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
#define STRICT // Strict type checking.
|
||||
|
||||
#include "reconstruct.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Module constants.
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Imports.
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Exported Global Variables
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Exported Functions
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
/****************************************************************************
|
||||
* Module Statics
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
|
||||
_declspec(align(16)) static UINT8 Eight128s[8] = {128,128,128,128,128,128,128,128};
|
||||
|
||||
#pragma warning( disable : 4799 ) // Disable no emms instruction warning!
|
||||
|
||||
/****************************************************************************
|
||||
* Forward References
|
||||
*****************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : WmtReconIntra
|
||||
*
|
||||
* INPUTS : INT16 * idct
|
||||
* Pointer to the output from the idct for this block
|
||||
*
|
||||
* UINT32 stride
|
||||
* Line Length in pixels in recon and reference images
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
* OUTPUTS : UINT8 * dest
|
||||
* The reconstruction buffer
|
||||
*
|
||||
* RETURNS : None
|
||||
*
|
||||
* FUNCTION : Reconstructs an intra block - wmt version
|
||||
*
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void WmtReconIntra( INT16 *TmpDataBuffer, UINT8 * dest, UINT16 * idct, UINT32 stride )
|
||||
{
|
||||
(void)TmpDataBuffer;
|
||||
__asm
|
||||
{
|
||||
|
||||
push ebx
|
||||
|
||||
mov eax,[idct] ; Signed 16 bit inputs
|
||||
mov edx,[dest] ; Unsigned 8 bit outputs
|
||||
|
||||
movq xmm0,QWORD PTR [Eight128s] ; Set xmm0 to 0x000000000000008080808080808080
|
||||
pxor xmm3, xmm3 ; set xmm3 to 0
|
||||
;
|
||||
mov ebx,[stride] ; Line stride in output buffer
|
||||
lea ecx,[eax+128] ; Endpoint in input buffer
|
||||
|
||||
loop_label:
|
||||
|
||||
movdqa xmm2,XMMWORD PTR [eax] ; Read the eight inputs
|
||||
packsswb xmm2,xmm3 ;
|
||||
|
||||
pxor xmm2,xmm0 ; Convert result to unsigned (same as add 128)
|
||||
lea eax,[eax + 16] ; Step source buffer
|
||||
|
||||
cmp eax,ecx ; are we done
|
||||
movq QWORD PTR [edx],xmm2 ; store results
|
||||
|
||||
lea edx,[edx+ebx] ; Step output buffer
|
||||
jc loop_label ; Loop back if we are not done
|
||||
|
||||
pop ebx
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : WmtReconInter
|
||||
*
|
||||
* INPUTS : UINT8 * RefPtr
|
||||
* The last frame reference
|
||||
*
|
||||
* INT16 * ChangePtr
|
||||
* Pointer to the change data
|
||||
*
|
||||
* UINT32 LineStep
|
||||
* Line Length in pixels in recon and ref images
|
||||
*
|
||||
* OUTPUTS : UINT8 * ReconPtr
|
||||
* The reconstruction
|
||||
*
|
||||
* RETURNS : None
|
||||
*
|
||||
* FUNCTION : Reconstructs data from last data and change
|
||||
*
|
||||
* SPECIAL NOTES :
|
||||
*
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void WmtReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
|
||||
{
|
||||
(void) TmpDataBuffer;
|
||||
|
||||
_asm {
|
||||
push edi
|
||||
|
||||
mov ebx, [RefPtr]
|
||||
mov ecx, [ChangePtr]
|
||||
|
||||
mov eax, [ReconPtr]
|
||||
mov edx, [LineStep]
|
||||
|
||||
pxor xmm0, xmm0
|
||||
lea edi, [ecx + 128]
|
||||
L:
|
||||
movq xmm2, QWORD ptr [ebx] ; (+3 misaligned) 8 reference pixels
|
||||
movdqa xmm4, XMMWORD ptr [ecx] ; 8 changes
|
||||
|
||||
punpcklbw xmm2, xmm0 ;
|
||||
|
||||
add ebx, edx ; next row of reference pixels
|
||||
paddsw xmm2, xmm4 ; add in first 4 changes
|
||||
|
||||
lea ecx, [ecx + 16] ; next row of changes
|
||||
packuswb xmm2, xmm0 ; pack result to unsigned 8-bit values
|
||||
|
||||
cmp ecx, edi ; are we done?
|
||||
movq QWORD PTR [eax], xmm2 ; store result
|
||||
|
||||
lea eax, [eax+edx] ; next row of output
|
||||
jc L ; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
|
||||
|
||||
pop edi
|
||||
}
|
||||
|
||||
}
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : WmtReconInterHalfPixel2
|
||||
*
|
||||
* INPUTS : UINT8 * RefPtr1, RefPtr2
|
||||
* The last frame reference
|
||||
*
|
||||
* INT16 * ChangePtr
|
||||
* Pointer to the change data
|
||||
*
|
||||
* UINT32 LineStep
|
||||
* Line Length in pixels in recon and ref images
|
||||
*
|
||||
*
|
||||
* OUTPUTS : UINT8 * ReconPtr
|
||||
* The reconstruction
|
||||
*
|
||||
* RETURNS : None
|
||||
*
|
||||
* FUNCTION : Reconstructs data from half pixel reference data and change.
|
||||
* Half pixel data interpolated from 2 references.
|
||||
*
|
||||
* SPECIAL NOTES :
|
||||
*
|
||||
*
|
||||
* ERRORS : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void WmtReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr,
|
||||
UINT8 * RefPtr1, UINT8 * RefPtr2,
|
||||
INT16 * ChangePtr, UINT32 LineStep )
|
||||
{
|
||||
(void)TmpDataBuffer;
|
||||
|
||||
_asm {
|
||||
push esi
|
||||
push edi
|
||||
|
||||
mov ecx, [ChangePtr]
|
||||
mov esi, [RefPtr1]
|
||||
|
||||
mov edi, [RefPtr2]
|
||||
mov ebx, [ReconPtr]
|
||||
|
||||
mov edx, [LineStep]
|
||||
lea eax, [ecx+128]
|
||||
|
||||
pxor xmm0, xmm0
|
||||
|
||||
L:
|
||||
|
||||
movq xmm2, QWORD PTR [esi] ; (+3 misaligned) mm2 = row from ref1
|
||||
movq xmm4, QWORD PTR [edi] ; (+3 misaligned) mm4 = row from ref2
|
||||
|
||||
punpcklbw xmm2, xmm0 ;
|
||||
punpcklbw xmm4, xmm0 ;
|
||||
|
||||
movdqa xmm6, [ecx] ; mm6 = first 4 changes
|
||||
paddw xmm2, xmm4 ; mm2 = start (ref1 + ref2)
|
||||
|
||||
|
||||
psrlw xmm2, 1 ; mm2 = start (ref1 + ref2)/2
|
||||
paddw xmm2, xmm6 ; add changes to start
|
||||
|
||||
lea ecx, [ecx+16] ; next row idct
|
||||
packuswb xmm2, xmm0 ; pack start|end to unsigned 8-bit
|
||||
|
||||
add esi, edx ; next row ref1
|
||||
add edi, edx ; next row ref2
|
||||
|
||||
cmp ecx, eax
|
||||
movq QWORD PTR [ebx], xmm2 ; store result
|
||||
;
|
||||
lea ebx, [ebx+edx]
|
||||
jc L
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue