Initial community commit
This commit is contained in:
parent
537bcbc862
commit
fc06254474
16440 changed files with 4239995 additions and 2 deletions
327
Src/h264dec/lcommon/src/img_io.c
Normal file
327
Src/h264dec/lcommon/src/img_io.c
Normal file
|
@ -0,0 +1,327 @@
|
|||
|
||||
/*!
|
||||
*************************************************************************************
|
||||
* \file img_io.c
|
||||
*
|
||||
* \brief
|
||||
* image I/O related functions
|
||||
*
|
||||
* \author
|
||||
* Main contributors (see contributors.h for copyright, address and affiliation details)
|
||||
* - Alexis Michael Tourapis <alexismt@ieee.org>
|
||||
*************************************************************************************
|
||||
*/
|
||||
#include "contributors.h"
|
||||
#include "global.h"
|
||||
#include "img_io.h"
|
||||
#include "report.h"
|
||||
|
||||
static const VIDEO_SIZE VideoRes[] = {
|
||||
{ "qcif" , 176, 144},
|
||||
{ "qqvga" , 160, 128},
|
||||
{ "qvga" , 320, 240},
|
||||
{ "sif" , 352, 240},
|
||||
{ "cif" , 352, 288},
|
||||
{ "vga" , 640, 480},
|
||||
{ "sd1" , 720, 480},
|
||||
{ "sd2" , 704, 576},
|
||||
{ "sd3" , 720, 576},
|
||||
{ "720p" , 1280, 720},
|
||||
{ "1080p" , 1920, 1080},
|
||||
{ NULL, 0, 0}
|
||||
};
|
||||
|
||||
/*!
|
||||
************************************************************************
|
||||
* \brief
|
||||
* Parse Size from from file name
|
||||
*
|
||||
************************************************************************
|
||||
*/
|
||||
int ParseSizeFromString (VideoDataFile *input_file, int *x_size, int *y_size, double *fps)
|
||||
{
|
||||
char *p1, *p2, *tail;
|
||||
char *fn = input_file->fname;
|
||||
char c;
|
||||
int i = 0;
|
||||
|
||||
*x_size = *y_size = -1;
|
||||
p1 = p2 = fn;
|
||||
while (p1 != NULL && p2 != NULL)
|
||||
{
|
||||
// Search for first '_'
|
||||
p1 = strstr( p1, "_");
|
||||
if (p1 == NULL)
|
||||
break;
|
||||
|
||||
// Search for end character of x_size (first 'x' after last '_')
|
||||
p2 = strstr( p1, "x");
|
||||
|
||||
// If no 'x' is found, exit
|
||||
if (p2 == NULL)
|
||||
break;
|
||||
|
||||
// Try conversion of number
|
||||
*p2 = 0;
|
||||
*x_size = strtol( p1 + 1, &tail, 10);
|
||||
|
||||
// If there are characters left in the string, or the string is null, discard conversion
|
||||
if (*tail != '\0' || *(p1 + 1) == '\0')
|
||||
{
|
||||
*p2 = 'x';
|
||||
p1 = tail;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Conversion was correct. Restore string
|
||||
*p2 = 'x';
|
||||
|
||||
// Search for end character of y_size (first '_' or '.' after last 'x')
|
||||
p1 = strpbrk( p2 + 1, "_.");
|
||||
// If no '_' or '.' is found, try again from current position
|
||||
if (p1 == NULL)
|
||||
{
|
||||
p1 = p2 + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try conversion of number
|
||||
c = *p1;
|
||||
*p1 = 0;
|
||||
*y_size = strtol( p2 + 1, &tail, 10);
|
||||
|
||||
// If there are characters left in the string, or the string is null, discard conversion
|
||||
if (*tail != '\0' || *(p2 + 1) == '\0')
|
||||
{
|
||||
*p1 = c;
|
||||
p1 = tail;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Conversion was correct. Restore string
|
||||
*p1 = c;
|
||||
|
||||
// Search for end character of y_size (first 'i' or 'p' after last '_')
|
||||
p2 = strstr( p1 + 1, "ip");
|
||||
|
||||
// If no 'i' or 'p' is found, exit
|
||||
if (p2 == NULL)
|
||||
break;
|
||||
|
||||
// Try conversion of number
|
||||
c = *p2;
|
||||
*p2 = 0;
|
||||
*fps = strtod( p1 + 1, &tail);
|
||||
|
||||
// If there are characters left in the string, or the string is null, discard conversion
|
||||
if (*tail != '\0' || *(p1 + 1) == '\0')
|
||||
{
|
||||
*p2 = c;
|
||||
p1 = tail;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Conversion was correct. Restore string
|
||||
*p2 = c;
|
||||
break;
|
||||
}
|
||||
|
||||
// Now lets test some common video file formats
|
||||
if (p1 == NULL || p2 == NULL)
|
||||
{
|
||||
for (i = 0; VideoRes[i].name != NULL; i++)
|
||||
{
|
||||
if (strcasecmp (fn, VideoRes[i].name))
|
||||
{
|
||||
*x_size = VideoRes[i].x_size;
|
||||
*y_size = VideoRes[i].y_size;
|
||||
// Should add frame rate support as well
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (*x_size == -1 || *y_size == -1) ? 0 : 1;
|
||||
}
|
||||
|
||||
/*!
|
||||
************************************************************************
|
||||
* \brief
|
||||
* Parse Size from from file name
|
||||
*
|
||||
************************************************************************
|
||||
*/
|
||||
void ParseFrameNoFormatFromString (VideoDataFile *input_file)
|
||||
{
|
||||
char *p1, *p2, *tail;
|
||||
char *fn = input_file->fname;
|
||||
char *fhead = input_file->fhead;
|
||||
char *ftail = input_file->ftail;
|
||||
int *zero_pad = &input_file->zero_pad;
|
||||
int *num_digits = &input_file->num_digits;
|
||||
|
||||
*zero_pad = 0;
|
||||
*num_digits = -1;
|
||||
p1 = p2 = fn;
|
||||
while (p1 != NULL && p2 != NULL)
|
||||
{
|
||||
// Search for first '_'
|
||||
p1 = strstr( p1, "%");
|
||||
if (p1 == NULL)
|
||||
break;
|
||||
|
||||
strncpy(fhead, fn, p1 - fn);
|
||||
|
||||
// Search for end character of x_size (first 'x' after last '_')
|
||||
p2 = strstr( p1, "d");
|
||||
|
||||
// If no 'x' is found, exit
|
||||
if (p2 == NULL)
|
||||
break;
|
||||
|
||||
// Try conversion of number
|
||||
*p2 = 0;
|
||||
|
||||
if (*(p1 + 1) == '0')
|
||||
*zero_pad = 1;
|
||||
|
||||
*num_digits = strtol( p1 + 1, &tail, 10);
|
||||
|
||||
// If there are characters left in the string, or the string is null, discard conversion
|
||||
if (*tail != '\0' || *(p1 + 1) == '\0')
|
||||
{
|
||||
*p2 = 'd';
|
||||
p1 = tail;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Conversion was correct. Restore string
|
||||
*p2 = 'd';
|
||||
|
||||
tail++;
|
||||
strncpy(ftail, tail, strlen(tail));
|
||||
break;
|
||||
}
|
||||
|
||||
if (input_file->vdtype == VIDEO_TIFF)
|
||||
{
|
||||
input_file->is_concatenated = 0;
|
||||
}
|
||||
else
|
||||
input_file->is_concatenated = (*num_digits == -1) ? 1 : 0;
|
||||
}
|
||||
|
||||
/*!
|
||||
************************************************************************
|
||||
* \brief
|
||||
* Open file containing a single frame
|
||||
************************************************************************
|
||||
*/
|
||||
void OpenFrameFile( VideoDataFile *input_file, int FrameNumberInFile)
|
||||
{
|
||||
char infile [FILE_NAME_SIZE], in_number[16];
|
||||
int length = 0;
|
||||
in_number[length]='\0';
|
||||
length = strlen(input_file->fhead);
|
||||
strncpy(infile, input_file->fhead, length);
|
||||
infile[length]='\0';
|
||||
if (input_file->zero_pad)
|
||||
snprintf(in_number, 16, "%0*d", input_file->num_digits, FrameNumberInFile);
|
||||
else
|
||||
snprintf(in_number, 16, "%*d", input_file->num_digits, FrameNumberInFile);
|
||||
|
||||
strncat(infile, in_number, sizeof(in_number));
|
||||
length += sizeof(in_number);
|
||||
infile[length]='\0';
|
||||
strncat(infile, input_file->ftail, strlen(input_file->ftail));
|
||||
length += strlen(input_file->ftail);
|
||||
infile[length]='\0';
|
||||
|
||||
if ((input_file->f_num = open(infile, OPENFLAGS_READ)) == -1)
|
||||
{
|
||||
printf ("OpenFrameFile: cannot open file %s\n", infile);
|
||||
report_stats_on_error();
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
************************************************************************
|
||||
* \brief
|
||||
* Open file(s) containing the entire frame sequence
|
||||
************************************************************************
|
||||
*/
|
||||
void OpenFiles( VideoDataFile *input_file)
|
||||
{
|
||||
if (input_file->is_concatenated == 1)
|
||||
{
|
||||
if (strlen(input_file->fname) == 0)
|
||||
{
|
||||
snprintf(errortext, ET_SIZE, "No input sequence name was provided. Please check settings.");
|
||||
error (errortext, 500);
|
||||
}
|
||||
|
||||
if ((input_file->f_num = open(input_file->fname, OPENFLAGS_READ)) == -1)
|
||||
{
|
||||
snprintf(errortext, ET_SIZE, "Input file %s does not exist",input_file->fname);
|
||||
error (errortext, 500);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
************************************************************************
|
||||
* \brief
|
||||
* Close input file
|
||||
************************************************************************
|
||||
*/
|
||||
void CloseFiles(VideoDataFile *input_file)
|
||||
{
|
||||
if (input_file->f_num != -1)
|
||||
close(input_file->f_num);
|
||||
input_file->f_num = -1;
|
||||
}
|
||||
|
||||
/* ==========================================================================
|
||||
*
|
||||
* ParseVideoType
|
||||
*
|
||||
* ==========================================================================
|
||||
*/
|
||||
VideoFileType ParseVideoType (VideoDataFile *input_file)
|
||||
{
|
||||
char *format;
|
||||
|
||||
format = input_file->fname + strlen(input_file->fname) - 3;
|
||||
|
||||
if (strcasecmp (format, "yuv") == 0)
|
||||
{
|
||||
input_file->vdtype = VIDEO_YUV;
|
||||
input_file->format.yuv_format = YUV420;
|
||||
input_file->avi = NULL;
|
||||
}
|
||||
else if (strcasecmp (format, "rgb") == 0)
|
||||
{
|
||||
input_file->vdtype = VIDEO_RGB;
|
||||
input_file->format.yuv_format = YUV444;
|
||||
input_file->avi = NULL;
|
||||
}
|
||||
else if (strcasecmp (format, "tif") == 0)
|
||||
{
|
||||
input_file->vdtype = VIDEO_TIFF;
|
||||
input_file->avi = NULL;
|
||||
}
|
||||
else if (strcasecmp (format, "avi") == 0)
|
||||
{
|
||||
input_file->vdtype = VIDEO_AVI;
|
||||
}
|
||||
else
|
||||
{
|
||||
//snprintf(errortext, ET_SIZE, "ERROR: video file format not supported");
|
||||
//error (errortext, 500);
|
||||
input_file->vdtype = VIDEO_YUV;
|
||||
input_file->format.yuv_format = YUV420;
|
||||
input_file->avi = NULL;
|
||||
}
|
||||
|
||||
return input_file->vdtype;
|
||||
}
|
1280
Src/h264dec/lcommon/src/memalloc.c
Normal file
1280
Src/h264dec/lcommon/src/memalloc.c
Normal file
File diff suppressed because it is too large
Load diff
106
Src/h264dec/lcommon/src/memcache.c
Normal file
106
Src/h264dec/lcommon/src/memcache.c
Normal file
|
@ -0,0 +1,106 @@
|
|||
#include "memcache.h"
|
||||
#include "mbuffer.h"
|
||||
#include "memalloc.h"
|
||||
|
||||
void image_cache_flush(ImageCache *cache)
|
||||
{
|
||||
while (cache->head)
|
||||
{
|
||||
VideoImage *next = cache->head->next;
|
||||
free_memImage(cache->head);
|
||||
cache->head = next;
|
||||
}
|
||||
cache->size_x = 0;
|
||||
cache->size_y = 0;
|
||||
}
|
||||
|
||||
void image_cache_set_dimensions(ImageCache *cache, int width, int height)
|
||||
{
|
||||
if (width != cache->size_x || height != cache->size_y)
|
||||
{
|
||||
image_cache_flush(cache);
|
||||
cache->size_x = width;
|
||||
cache->size_y = height;
|
||||
}
|
||||
}
|
||||
|
||||
int image_cache_dimensions_match(ImageCache *cache, int width, int height)
|
||||
{
|
||||
if (width != cache->size_x || height != cache->size_y)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void image_cache_add(ImageCache *cache, VideoImage *image)
|
||||
{
|
||||
image->next = cache->head;
|
||||
cache->head = image;
|
||||
}
|
||||
|
||||
struct video_image *image_cache_get(ImageCache *cache)
|
||||
{
|
||||
if (cache->head)
|
||||
{
|
||||
VideoImage *ret = cache->head;
|
||||
cache->head = ret->next;
|
||||
ret->next = 0;
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* -------------
|
||||
|
||||
PicMotion arrays are allowed with one extra slot in the first dimension
|
||||
which we use as the next pointer
|
||||
------------- */
|
||||
|
||||
|
||||
void motion_cache_flush(MotionCache *cache)
|
||||
{
|
||||
while (cache->head)
|
||||
{
|
||||
PicMotion **next = (PicMotion **)cache->head[cache->size_y];
|
||||
free_mem2DPicMotion(cache->head);
|
||||
cache->head = next;
|
||||
}
|
||||
cache->size_x = 0;
|
||||
cache->size_y = 0;
|
||||
}
|
||||
|
||||
void motion_cache_set_dimensions(MotionCache *cache, int width, int height)
|
||||
{
|
||||
if (width != cache->size_x || height != cache->size_y)
|
||||
{
|
||||
motion_cache_flush(cache);
|
||||
cache->size_x = width;
|
||||
cache->size_y = height;
|
||||
}
|
||||
}
|
||||
|
||||
int motion_cache_dimensions_match(MotionCache *cache, int width, int height)
|
||||
{
|
||||
if (width != cache->size_x || height != cache->size_y)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void motion_cache_add(MotionCache *cache, PicMotion **image)
|
||||
{
|
||||
image[cache->size_y] = (PicMotion *)cache->head;
|
||||
cache->head = image;
|
||||
}
|
||||
|
||||
struct pic_motion **motion_cache_get(MotionCache *cache)
|
||||
{
|
||||
if (cache->head)
|
||||
{
|
||||
PicMotion **ret = cache->head;
|
||||
cache->head = (PicMotion **)ret[cache->size_y];
|
||||
ret[cache->size_y] = 0;
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
250
Src/h264dec/lcommon/src/mv_prediction.c
Normal file
250
Src/h264dec/lcommon/src/mv_prediction.c
Normal file
|
@ -0,0 +1,250 @@
|
|||
/*!
|
||||
*************************************************************************************
|
||||
* \file mv_prediction.c
|
||||
*
|
||||
* \brief
|
||||
* Motion Vector Prediction Functions
|
||||
*
|
||||
* \author
|
||||
* Main contributors (see contributors.h for copyright, address and affiliation details)
|
||||
* - Alexis Michael Tourapis <alexismt@ieee.org>
|
||||
* - Karsten Sühring <suehring@hhi.de>
|
||||
*************************************************************************************
|
||||
*/
|
||||
|
||||
#include "global.h"
|
||||
#include "mbuffer.h"
|
||||
/*!
|
||||
************************************************************************
|
||||
* \brief
|
||||
* Get motion vector predictor
|
||||
************************************************************************
|
||||
*/
|
||||
static void GetMotionVectorPredictorMBAFF (Macroblock *currMB,
|
||||
PixelPos *block, // <--> block neighbors
|
||||
short pmv[2],
|
||||
short ref_frame,
|
||||
PicMotion **motion,
|
||||
int mb_x,
|
||||
int mb_y,
|
||||
int blockshape_x,
|
||||
int blockshape_y)
|
||||
{
|
||||
int mv_a, mv_b, mv_c, pred_vec=0;
|
||||
int mvPredType, rFrameL, rFrameU, rFrameUR;
|
||||
int hv;
|
||||
VideoParameters *p_Vid = currMB->p_Vid;
|
||||
|
||||
mvPredType = MVPRED_MEDIAN;
|
||||
|
||||
|
||||
if (currMB->mb_field)
|
||||
{
|
||||
rFrameL = block[0].available
|
||||
? (p_Vid->mb_data[block[0].mb_addr].mb_field
|
||||
? motion[block[0].pos_y][block[0].pos_x].ref_idx
|
||||
: motion[block[0].pos_y][block[0].pos_x].ref_idx * 2) : -1;
|
||||
rFrameU = block[1].available
|
||||
? (p_Vid->mb_data[block[1].mb_addr].mb_field
|
||||
? motion[block[1].pos_y][block[1].pos_x].ref_idx
|
||||
: motion[block[1].pos_y][block[1].pos_x].ref_idx * 2) : -1;
|
||||
rFrameUR = block[2].available
|
||||
? (p_Vid->mb_data[block[2].mb_addr].mb_field
|
||||
? motion[block[2].pos_y][block[2].pos_x].ref_idx
|
||||
: motion[block[2].pos_y][block[2].pos_x].ref_idx * 2) : -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
rFrameL = block[0].available
|
||||
? (p_Vid->mb_data[block[0].mb_addr].mb_field
|
||||
? motion[block[0].pos_y][block[0].pos_x].ref_idx >>1
|
||||
: motion[block[0].pos_y][block[0].pos_x].ref_idx) : -1;
|
||||
rFrameU = block[1].available
|
||||
? (p_Vid->mb_data[block[1].mb_addr].mb_field
|
||||
? motion[block[1].pos_y][block[1].pos_x].ref_idx >>1
|
||||
: motion[block[1].pos_y][block[1].pos_x].ref_idx) : -1;
|
||||
rFrameUR = block[2].available
|
||||
? (p_Vid->mb_data[block[2].mb_addr].mb_field
|
||||
? motion[block[2].pos_y][block[2].pos_x].ref_idx >>1
|
||||
: motion[block[2].pos_y][block[2].pos_x].ref_idx) : -1;
|
||||
}
|
||||
|
||||
|
||||
/* Prediction if only one of the neighbors uses the reference frame
|
||||
* we are checking
|
||||
*/
|
||||
if(rFrameL == ref_frame && rFrameU != ref_frame && rFrameUR != ref_frame)
|
||||
mvPredType = MVPRED_L;
|
||||
else if(rFrameL != ref_frame && rFrameU == ref_frame && rFrameUR != ref_frame)
|
||||
mvPredType = MVPRED_U;
|
||||
else if(rFrameL != ref_frame && rFrameU != ref_frame && rFrameUR == ref_frame)
|
||||
mvPredType = MVPRED_UR;
|
||||
// Directional predictions
|
||||
if(blockshape_x == 8 && blockshape_y == 16)
|
||||
{
|
||||
if(mb_x == 0)
|
||||
{
|
||||
if(rFrameL == ref_frame)
|
||||
mvPredType = MVPRED_L;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( rFrameUR == ref_frame)
|
||||
mvPredType = MVPRED_UR;
|
||||
}
|
||||
}
|
||||
else if(blockshape_x == 16 && blockshape_y == 8)
|
||||
{
|
||||
if(mb_y == 0)
|
||||
{
|
||||
if(rFrameU == ref_frame)
|
||||
mvPredType = MVPRED_U;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(rFrameL == ref_frame)
|
||||
mvPredType = MVPRED_L;
|
||||
}
|
||||
}
|
||||
|
||||
for (hv=0; hv < 2; hv++)
|
||||
{
|
||||
if (hv == 0)
|
||||
{
|
||||
mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[hv] : 0;
|
||||
mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[hv] : 0;
|
||||
mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[hv] : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (currMB->mb_field)
|
||||
{
|
||||
mv_a = block[0].available ? p_Vid->mb_data[block[0].mb_addr].mb_field
|
||||
? motion[block[0].pos_y][block[0].pos_x].mv[hv]
|
||||
: motion[block[0].pos_y][block[0].pos_x].mv[hv] / 2
|
||||
: 0;
|
||||
mv_b = block[1].available ? p_Vid->mb_data[block[1].mb_addr].mb_field
|
||||
? motion[block[1].pos_y][block[1].pos_x].mv[hv]
|
||||
: motion[block[1].pos_y][block[1].pos_x].mv[hv] / 2
|
||||
: 0;
|
||||
mv_c = block[2].available ? p_Vid->mb_data[block[2].mb_addr].mb_field
|
||||
? motion[block[2].pos_y][block[2].pos_x].mv[hv]
|
||||
: motion[block[2].pos_y][block[2].pos_x].mv[hv] / 2
|
||||
: 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
mv_a = block[0].available ? p_Vid->mb_data[block[0].mb_addr].mb_field
|
||||
? motion[block[0].pos_y][block[0].pos_x].mv[hv] * 2
|
||||
: motion[block[0].pos_y][block[0].pos_x].mv[hv]
|
||||
: 0;
|
||||
mv_b = block[1].available ? p_Vid->mb_data[block[1].mb_addr].mb_field
|
||||
? motion[block[1].pos_y][block[1].pos_x].mv[hv] * 2
|
||||
: motion[block[1].pos_y][block[1].pos_x].mv[hv]
|
||||
: 0;
|
||||
mv_c = block[2].available ? p_Vid->mb_data[block[2].mb_addr].mb_field
|
||||
? motion[block[2].pos_y][block[2].pos_x].mv[hv] * 2
|
||||
: motion[block[2].pos_y][block[2].pos_x].mv[hv]
|
||||
: 0;
|
||||
}
|
||||
}
|
||||
|
||||
switch (mvPredType)
|
||||
{
|
||||
case MVPRED_MEDIAN:
|
||||
if(!(block[1].available || block[2].available))
|
||||
{
|
||||
pred_vec = mv_a;
|
||||
}
|
||||
else
|
||||
{
|
||||
pred_vec = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c));
|
||||
}
|
||||
break;
|
||||
case MVPRED_L:
|
||||
pred_vec = mv_a;
|
||||
break;
|
||||
case MVPRED_U:
|
||||
pred_vec = mv_b;
|
||||
break;
|
||||
case MVPRED_UR:
|
||||
pred_vec = mv_c;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
pmv[hv] = (short) pred_vec;
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
************************************************************************
|
||||
* \brief
|
||||
* Get motion vector predictor
|
||||
************************************************************************
|
||||
*/
|
||||
// TODO: benski> make SSE3/MMX version
|
||||
static void GetMotionVectorPredictorNormal (Macroblock *currMB,
|
||||
PixelPos *block, // <--> block neighbors
|
||||
short pmv[2],
|
||||
short ref_frame,
|
||||
PicMotion **motion,
|
||||
int mb_x,
|
||||
int mb_y,
|
||||
int blockshape_x,
|
||||
int blockshape_y)
|
||||
{
|
||||
int rFrameL = block[0].available ? motion[block[0].pos_y][block[0].pos_x].ref_idx : -1;
|
||||
int rFrameU = block[1].available ? motion[block[1].pos_y][block[1].pos_x].ref_idx : -1;
|
||||
int rFrameUR = block[2].available ? motion[block[2].pos_y][block[2].pos_x].ref_idx : -1;
|
||||
|
||||
/* Prediction if only one of the neighbors uses the reference frame
|
||||
* we are checking
|
||||
*/
|
||||
if (rFrameL == ref_frame &&
|
||||
((rFrameU != ref_frame && rFrameUR != ref_frame) || (blockshape_x == 8 && blockshape_y == 16 && mb_x == 0) || (blockshape_x == 16 && blockshape_y == 8 && mb_y != 0)))
|
||||
{ // left
|
||||
pmv[0] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0;
|
||||
pmv[1] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0;
|
||||
}
|
||||
else if (rFrameU == ref_frame &&
|
||||
((rFrameL != ref_frame && rFrameUR != ref_frame) || (blockshape_x == 16 && blockshape_y == 8 && mb_y == 0)))
|
||||
{ // up
|
||||
pmv[0] = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[0] : 0;
|
||||
pmv[1] = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[1] : 0;
|
||||
}
|
||||
else if (rFrameUR == ref_frame &&
|
||||
((rFrameL != ref_frame && rFrameU != ref_frame) || (blockshape_x == 8 && blockshape_y == 16 && mb_x != 0)))
|
||||
{ // upper right
|
||||
pmv[0] = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[0] : 0;
|
||||
pmv[1] = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[1] : 0;
|
||||
}
|
||||
else
|
||||
{ // median
|
||||
if(!(block[1].available || block[2].available))
|
||||
{
|
||||
pmv[0] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0;
|
||||
pmv[1] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
int mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0;
|
||||
int mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[0] : 0;
|
||||
int mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[0] : 0;
|
||||
pmv[0] = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c));
|
||||
mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0;
|
||||
mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[1] : 0;
|
||||
mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[1] : 0;
|
||||
pmv[1] = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void init_motion_vector_prediction(Macroblock *currMB, int mb_aff_frame_flag)
|
||||
{
|
||||
if (mb_aff_frame_flag)
|
||||
currMB->GetMVPredictor = GetMotionVectorPredictorMBAFF;
|
||||
else
|
||||
currMB->GetMVPredictor = GetMotionVectorPredictorNormal;
|
||||
}
|
244
Src/h264dec/lcommon/src/parsetcommon.c
Normal file
244
Src/h264dec/lcommon/src/parsetcommon.c
Normal file
|
@ -0,0 +1,244 @@
|
|||
|
||||
/*!
|
||||
**************************************************************************************
|
||||
* \file
|
||||
* parsetcommon.c
|
||||
* \brief
|
||||
* Picture and Sequence Parameter set generation and handling
|
||||
* \date 25 November 2002
|
||||
* \author
|
||||
* Main contributors (see contributors.h for copyright, address and affiliation details)
|
||||
* - Stephan Wenger <stewe@cs.tu-berlin.de>
|
||||
*
|
||||
**************************************************************************************
|
||||
*/
|
||||
|
||||
#include "global.h"
|
||||
#include "parsetcommon.h"
|
||||
#include "memalloc.h"
|
||||
/*!
|
||||
*************************************************************************************
|
||||
* \brief
|
||||
* Allocates memory for a picture paramater set
|
||||
*
|
||||
* \return
|
||||
* pointer to a pps
|
||||
*************************************************************************************
|
||||
*/
|
||||
|
||||
pic_parameter_set_rbsp_t *AllocPPS ()
|
||||
{
|
||||
pic_parameter_set_rbsp_t *p;
|
||||
|
||||
if ((p=calloc (sizeof (pic_parameter_set_rbsp_t), 1)) == NULL)
|
||||
no_mem_exit ("AllocPPS: PPS");
|
||||
p->slice_group_id = NULL;
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
*************************************************************************************
|
||||
* \brief
|
||||
* Allocates memory for am sequence paramater set
|
||||
*
|
||||
* \return
|
||||
* pointer to a sps
|
||||
*************************************************************************************
|
||||
*/
|
||||
|
||||
seq_parameter_set_rbsp_t *AllocSPS ()
|
||||
{
|
||||
seq_parameter_set_rbsp_t *p;
|
||||
|
||||
if ((p=calloc (sizeof (seq_parameter_set_rbsp_t), 1)) == NULL)
|
||||
no_mem_exit ("AllocSPS: SPS");
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
*************************************************************************************
|
||||
* \brief
|
||||
* Frees a picture parameter set
|
||||
*
|
||||
* \param pps to be freed
|
||||
* Picture parameter set to be freed
|
||||
*************************************************************************************
|
||||
*/
|
||||
|
||||
void FreePPS (pic_parameter_set_rbsp_t *pps)
|
||||
{
|
||||
assert (pps != NULL);
|
||||
if (pps->slice_group_id != NULL)
|
||||
free (pps->slice_group_id);
|
||||
free (pps);
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
*************************************************************************************
|
||||
* \brief
|
||||
* Frees a sps
|
||||
*
|
||||
* \param sps
|
||||
* Sequence parameter set to be freed
|
||||
*************************************************************************************
|
||||
*/
|
||||
|
||||
void FreeSPS (seq_parameter_set_rbsp_t *sps)
|
||||
{
|
||||
assert (sps != NULL);
|
||||
free (sps);
|
||||
}
|
||||
|
||||
|
||||
int sps_is_equal(seq_parameter_set_rbsp_t *sps1, seq_parameter_set_rbsp_t *sps2)
|
||||
{
|
||||
unsigned i;
|
||||
int equal = 1;
|
||||
|
||||
if ((!sps1->Valid) || (!sps2->Valid))
|
||||
return 0;
|
||||
|
||||
equal &= (sps1->profile_idc == sps2->profile_idc);
|
||||
equal &= (sps1->constrained_set0_flag == sps2->constrained_set0_flag);
|
||||
equal &= (sps1->constrained_set1_flag == sps2->constrained_set1_flag);
|
||||
equal &= (sps1->constrained_set2_flag == sps2->constrained_set2_flag);
|
||||
equal &= (sps1->level_idc == sps2->level_idc);
|
||||
equal &= (sps1->seq_parameter_set_id == sps2->seq_parameter_set_id);
|
||||
equal &= (sps1->log2_max_frame_num_minus4 == sps2->log2_max_frame_num_minus4);
|
||||
equal &= (sps1->pic_order_cnt_type == sps2->pic_order_cnt_type);
|
||||
|
||||
if (!equal) return equal;
|
||||
|
||||
if( sps1->pic_order_cnt_type == 0 )
|
||||
{
|
||||
equal &= (sps1->log2_max_pic_order_cnt_lsb_minus4 == sps2->log2_max_pic_order_cnt_lsb_minus4);
|
||||
}
|
||||
|
||||
else if( sps1->pic_order_cnt_type == 1 )
|
||||
{
|
||||
equal &= (sps1->delta_pic_order_always_zero_flag == sps2->delta_pic_order_always_zero_flag);
|
||||
equal &= (sps1->offset_for_non_ref_pic == sps2->offset_for_non_ref_pic);
|
||||
equal &= (sps1->offset_for_top_to_bottom_field == sps2->offset_for_top_to_bottom_field);
|
||||
equal &= (sps1->num_ref_frames_in_pic_order_cnt_cycle == sps2->num_ref_frames_in_pic_order_cnt_cycle);
|
||||
if (!equal) return equal;
|
||||
|
||||
for ( i = 0 ; i< sps1->num_ref_frames_in_pic_order_cnt_cycle ;i ++)
|
||||
equal &= (sps1->offset_for_ref_frame[i] == sps2->offset_for_ref_frame[i]);
|
||||
}
|
||||
|
||||
equal &= (sps1->num_ref_frames == sps2->num_ref_frames);
|
||||
equal &= (sps1->gaps_in_frame_num_value_allowed_flag == sps2->gaps_in_frame_num_value_allowed_flag);
|
||||
equal &= (sps1->pic_width_in_mbs_minus1 == sps2->pic_width_in_mbs_minus1);
|
||||
equal &= (sps1->pic_height_in_map_units_minus1 == sps2->pic_height_in_map_units_minus1);
|
||||
equal &= (sps1->frame_mbs_only_flag == sps2->frame_mbs_only_flag);
|
||||
|
||||
if (!equal) return equal;
|
||||
if( !sps1->frame_mbs_only_flag )
|
||||
equal &= (sps1->mb_adaptive_frame_field_flag == sps2->mb_adaptive_frame_field_flag);
|
||||
|
||||
equal &= (sps1->direct_8x8_inference_flag == sps2->direct_8x8_inference_flag);
|
||||
equal &= (sps1->frame_cropping_flag == sps2->frame_cropping_flag);
|
||||
if (!equal) return equal;
|
||||
if (sps1->frame_cropping_flag)
|
||||
{
|
||||
equal &= (sps1->frame_cropping_rect_left_offset == sps2->frame_cropping_rect_left_offset);
|
||||
equal &= (sps1->frame_cropping_rect_right_offset == sps2->frame_cropping_rect_right_offset);
|
||||
equal &= (sps1->frame_cropping_rect_top_offset == sps2->frame_cropping_rect_top_offset);
|
||||
equal &= (sps1->frame_cropping_rect_bottom_offset == sps2->frame_cropping_rect_bottom_offset);
|
||||
}
|
||||
equal &= (sps1->vui_parameters_present_flag == sps2->vui_parameters_present_flag);
|
||||
|
||||
return equal;
|
||||
}
|
||||
|
||||
int pps_is_equal(pic_parameter_set_rbsp_t *pps1, pic_parameter_set_rbsp_t *pps2)
|
||||
{
|
||||
unsigned i, j;
|
||||
int equal = 1;
|
||||
|
||||
if ((!pps1->Valid) || (!pps2->Valid))
|
||||
return 0;
|
||||
|
||||
equal &= (pps1->pic_parameter_set_id == pps2->pic_parameter_set_id);
|
||||
equal &= (pps1->seq_parameter_set_id == pps2->seq_parameter_set_id);
|
||||
equal &= (pps1->entropy_coding_mode_flag == pps2->entropy_coding_mode_flag);
|
||||
equal &= (pps1->bottom_field_pic_order_in_frame_present_flag == pps2->bottom_field_pic_order_in_frame_present_flag);
|
||||
equal &= (pps1->num_slice_groups_minus1 == pps2->num_slice_groups_minus1);
|
||||
|
||||
if (!equal) return equal;
|
||||
|
||||
if (pps1->num_slice_groups_minus1>0)
|
||||
{
|
||||
equal &= (pps1->slice_group_map_type == pps2->slice_group_map_type);
|
||||
if (!equal) return equal;
|
||||
if (pps1->slice_group_map_type == 0)
|
||||
{
|
||||
for (i=0; i<=pps1->num_slice_groups_minus1; i++)
|
||||
equal &= (pps1->run_length_minus1[i] == pps2->run_length_minus1[i]);
|
||||
}
|
||||
else if( pps1->slice_group_map_type == 2 )
|
||||
{
|
||||
for (i=0; i<pps1->num_slice_groups_minus1; i++)
|
||||
{
|
||||
equal &= (pps1->top_left[i] == pps2->top_left[i]);
|
||||
equal &= (pps1->bottom_right[i] == pps2->bottom_right[i]);
|
||||
}
|
||||
}
|
||||
else if( pps1->slice_group_map_type == 3 || pps1->slice_group_map_type==4 || pps1->slice_group_map_type==5 )
|
||||
{
|
||||
equal &= (pps1->slice_group_change_direction_flag == pps2->slice_group_change_direction_flag);
|
||||
equal &= (pps1->slice_group_change_rate_minus1 == pps2->slice_group_change_rate_minus1);
|
||||
}
|
||||
else if( pps1->slice_group_map_type == 6 )
|
||||
{
|
||||
equal &= (pps1->pic_size_in_map_units_minus1 == pps2->pic_size_in_map_units_minus1);
|
||||
if (!equal) return equal;
|
||||
for (i=0; i<=pps1->pic_size_in_map_units_minus1; i++)
|
||||
equal &= (pps1->slice_group_id[i] == pps2->slice_group_id[i]);
|
||||
}
|
||||
}
|
||||
|
||||
equal &= (pps1->num_ref_idx_l0_active_minus1 == pps2->num_ref_idx_l0_active_minus1);
|
||||
equal &= (pps1->num_ref_idx_l1_active_minus1 == pps2->num_ref_idx_l1_active_minus1);
|
||||
equal &= (pps1->weighted_pred_flag == pps2->weighted_pred_flag);
|
||||
equal &= (pps1->weighted_bipred_idc == pps2->weighted_bipred_idc);
|
||||
equal &= (pps1->pic_init_qp_minus26 == pps2->pic_init_qp_minus26);
|
||||
equal &= (pps1->pic_init_qs_minus26 == pps2->pic_init_qs_minus26);
|
||||
equal &= (pps1->chroma_qp_index_offset == pps2->chroma_qp_index_offset);
|
||||
equal &= (pps1->deblocking_filter_control_present_flag == pps2->deblocking_filter_control_present_flag);
|
||||
equal &= (pps1->constrained_intra_pred_flag == pps2->constrained_intra_pred_flag);
|
||||
equal &= (pps1->redundant_pic_cnt_present_flag == pps2->redundant_pic_cnt_present_flag);
|
||||
|
||||
if (!equal) return equal;
|
||||
|
||||
//Fidelity Range Extensions Stuff
|
||||
//It is initialized to zero, so should be ok to check all the time.
|
||||
equal &= (pps1->transform_8x8_mode_flag == pps2->transform_8x8_mode_flag);
|
||||
equal &= (pps1->pic_scaling_matrix_present_flag == pps2->pic_scaling_matrix_present_flag);
|
||||
if(pps1->pic_scaling_matrix_present_flag)
|
||||
{
|
||||
for(i = 0; i < (6 + ((unsigned)pps1->transform_8x8_mode_flag << 1)); i++)
|
||||
{
|
||||
equal &= (pps1->pic_scaling_list_present_flag[i] == pps2->pic_scaling_list_present_flag[i]);
|
||||
if(pps1->pic_scaling_list_present_flag[i])
|
||||
{
|
||||
if(i < 6)
|
||||
{
|
||||
for (j = 0; j < 16; j++)
|
||||
equal &= (pps1->ScalingList4x4[i][j] == pps2->ScalingList4x4[i][j]);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (j = 0; j < 64; j++)
|
||||
equal &= (pps1->ScalingList8x8[i-6][j] == pps2->ScalingList8x8[i-6][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
equal &= (pps1->second_chroma_qp_index_offset == pps2->second_chroma_qp_index_offset);
|
||||
|
||||
return equal;
|
||||
}
|
809
Src/h264dec/lcommon/src/transform.c
Normal file
809
Src/h264dec/lcommon/src/transform.c
Normal file
|
@ -0,0 +1,809 @@
|
|||
/*!
|
||||
***************************************************************************
|
||||
* \file transform.c
|
||||
*
|
||||
* \brief
|
||||
* Transform functions
|
||||
*
|
||||
* \author
|
||||
* Main contributors (see contributors.h for copyright, address and affiliation details)
|
||||
* - Alexis Michael Tourapis
|
||||
* \date
|
||||
* 01. July 2007
|
||||
**************************************************************************
|
||||
*/
|
||||
#include "global.h"
|
||||
#include "transform.h"
|
||||
#include <emmintrin.h>
|
||||
|
||||
void forward4x4(int **block, int **tblock, int pos_y, int pos_x)
|
||||
{
|
||||
int i, ii;
|
||||
int tmp[16];
|
||||
int *pTmp = tmp, *pblock;
|
||||
int p0,p1,p2,p3;
|
||||
int t0,t1,t2,t3;
|
||||
|
||||
// Horizontal
|
||||
for (i=pos_y; i < pos_y + BLOCK_SIZE; i++)
|
||||
{
|
||||
pblock = &block[i][pos_x];
|
||||
p0 = *(pblock++);
|
||||
p1 = *(pblock++);
|
||||
p2 = *(pblock++);
|
||||
p3 = *(pblock );
|
||||
|
||||
t0 = p0 + p3;
|
||||
t1 = p1 + p2;
|
||||
t2 = p1 - p2;
|
||||
t3 = p0 - p3;
|
||||
|
||||
*(pTmp++) = t0 + t1;
|
||||
*(pTmp++) = (t3 << 1) + t2;
|
||||
*(pTmp++) = t0 - t1;
|
||||
*(pTmp++) = t3 - (t2 << 1);
|
||||
}
|
||||
|
||||
// Vertical
|
||||
for (i=0; i < BLOCK_SIZE; i++)
|
||||
{
|
||||
pTmp = tmp + i;
|
||||
p0 = *pTmp;
|
||||
p1 = *(pTmp += BLOCK_SIZE);
|
||||
p2 = *(pTmp += BLOCK_SIZE);
|
||||
p3 = *(pTmp += BLOCK_SIZE);
|
||||
|
||||
t0 = p0 + p3;
|
||||
t1 = p1 + p2;
|
||||
t2 = p1 - p2;
|
||||
t3 = p0 - p3;
|
||||
|
||||
ii = pos_x + i;
|
||||
tblock[pos_y ][ii] = t0 + t1;
|
||||
tblock[pos_y + 1][ii] = t2 + (t3 << 1);
|
||||
tblock[pos_y + 2][ii] = t0 - t1;
|
||||
tblock[pos_y + 3][ii] = t3 - (t2 << 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void inverse4x4(const h264_short_block_t tblock, h264_short_block_t block, int pos_y, int pos_x)
|
||||
{
|
||||
int i;
|
||||
short tmp[16];
|
||||
short *pTmp = tmp;
|
||||
int p0,p1,p2,p3;
|
||||
int t0,t1,t2,t3;
|
||||
|
||||
// Horizontal
|
||||
for (i = 0; i < BLOCK_SIZE; i++)
|
||||
{
|
||||
t0 = tblock[i][0];
|
||||
t1 = tblock[i][1];
|
||||
t2 = tblock[i][2];
|
||||
t3 = tblock[i][3];
|
||||
|
||||
p0 = t0 + t2;
|
||||
p1 = t0 - t2;
|
||||
p2 = (t1 >> 1) - t3;
|
||||
p3 = t1 + (t3 >> 1);
|
||||
|
||||
*(pTmp++) = p0 + p3;
|
||||
*(pTmp++) = p1 + p2;
|
||||
*(pTmp++) = p1 - p2;
|
||||
*(pTmp++) = p0 - p3;
|
||||
}
|
||||
|
||||
// Vertical
|
||||
for (i = 0; i < BLOCK_SIZE; i++)
|
||||
{
|
||||
pTmp = tmp + i;
|
||||
t0 = *pTmp;
|
||||
t1 = *(pTmp += BLOCK_SIZE);
|
||||
t2 = *(pTmp += BLOCK_SIZE);
|
||||
t3 = *(pTmp += BLOCK_SIZE);
|
||||
|
||||
p0 = t0 + t2;
|
||||
p1 = t0 - t2;
|
||||
p2 =(t1 >> 1) - t3;
|
||||
p3 = t1 + (t3 >> 1);
|
||||
|
||||
block[0][i] = p0 + p3;
|
||||
block[1][i] = p1 + p2;
|
||||
block[2][i] = p1 - p2;
|
||||
block[3][i] = p0 - p3;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _M_IX86
|
||||
// benski> this exists just for conformance testing. not used in production code
|
||||
static void inverse4x4_sse2_x86(const h264_short_macroblock_t tblock, h264_short_macroblock_t block, int pos_y, int pos_x)
|
||||
{
|
||||
__asm
|
||||
{
|
||||
mov edx, pos_y
|
||||
shl edx, 4 // 16 step stride
|
||||
add edx, pos_x
|
||||
shl edx, 1 // * sizeof(short)
|
||||
|
||||
// eax: pointer to the start of tblock (offset by passed pos_y, pos_x)
|
||||
mov eax, edx
|
||||
add eax, tblock
|
||||
|
||||
// esi: results
|
||||
mov esi, edx
|
||||
add esi, block
|
||||
|
||||
// load 4x4 matrix
|
||||
movq mm0, MMWORD PTR 0[eax]
|
||||
movq mm1, MMWORD PTR 32[eax]
|
||||
movq mm2, MMWORD PTR 64[eax]
|
||||
movq mm3, MMWORD PTR 96[eax]
|
||||
|
||||
// rotate 4x4 matrix
|
||||
movq mm4, mm0 // p0 = mm4 (copy)
|
||||
punpcklwd mm0, mm2 // r0 = mm0
|
||||
punpckhwd mm4, mm2 // r2 = mm4
|
||||
movq mm5, mm1 // p1 = mm5 (copy)
|
||||
punpcklwd mm1, mm3 // r1 = mm1
|
||||
punpckhwd mm5, mm3 // r3 = mm5
|
||||
movq mm6, mm0 // r0 = mm6 (copy)
|
||||
punpcklwd mm0, mm1 // t0 = mm0
|
||||
punpckhwd mm6, mm1 // t1 = mm6
|
||||
movq mm1, mm4 // r2 = mm1 (copy)
|
||||
punpcklwd mm1, mm5 // t2 = mm1
|
||||
punpckhwd mm4, mm5 // t3 = mm4
|
||||
|
||||
/* register state:
|
||||
mm0: t0
|
||||
mm1: t2
|
||||
mm2:
|
||||
mm3:
|
||||
mm4: t3
|
||||
mm5:
|
||||
mm6: t1
|
||||
mm7:
|
||||
*/
|
||||
|
||||
/*
|
||||
p0 = t0 + t2;
|
||||
p1 = t0 - t2;
|
||||
p2 = (t1 >> 1) - t3;
|
||||
p3 = t1 + (t3 >> 1);
|
||||
*/
|
||||
movq mm2, mm0 // mm2 = t0 (copy)
|
||||
paddw mm0, mm1 // mm0 = p0
|
||||
psubw mm2, mm1 // mm2 = p1, mm1 available
|
||||
movq mm5, mm6 // mm5 = t1 (copy)
|
||||
psraw mm5, 1 // mm5 = (t1 >> 1)
|
||||
psubw mm5, mm4 // mm5 = p2
|
||||
psraw mm4, 1 // mm4 = (t3 >> 1)
|
||||
paddw mm6, mm4 // mm6 = p3
|
||||
|
||||
/* register state:
|
||||
mm0: p0
|
||||
mm1:
|
||||
mm2: p1
|
||||
mm3:
|
||||
mm4:
|
||||
mm5: p2
|
||||
mm6: p3
|
||||
mm7:
|
||||
*/
|
||||
|
||||
/*
|
||||
*(pTmp++) = p0 + p3;
|
||||
*(pTmp++) = p1 + p2;
|
||||
*(pTmp++) = p1 - p2;
|
||||
*(pTmp++) = p0 - p3;
|
||||
*/
|
||||
|
||||
movq mm3, mm0 // mm3 = p0 (copy)
|
||||
paddw mm0, mm6 // mm0 = r0
|
||||
movq mm1, mm2 // mm1 = p1 (copy)
|
||||
paddw mm1, mm5 // mm1 = r1
|
||||
psubw mm2, mm5 // mm2 = r2, mm5 available
|
||||
psubw mm3, mm6 // mm3 = r3
|
||||
|
||||
/* register state:
|
||||
mm0: r0
|
||||
mm1: r1
|
||||
mm2: r2
|
||||
mm3: r3
|
||||
mm4:
|
||||
mm5:
|
||||
mm6:
|
||||
mm7:
|
||||
*/
|
||||
|
||||
// rotate 4x4 matrix to set up for vertical
|
||||
movq mm4, mm0 // r0 = mm4 (copy)
|
||||
punpcklwd mm0, mm2 // p0 = mm0
|
||||
punpckhwd mm4, mm2 // p2 = mm4
|
||||
movq mm5, mm1 // r1 = mm5 (copy)
|
||||
punpcklwd mm1, mm3 // p1 = mm1
|
||||
punpckhwd mm5, mm3 // p3 = mm5
|
||||
movq mm6, mm0 // p0 = mm6 (copy)
|
||||
punpcklwd mm0, mm1 // t0 = mm0
|
||||
punpckhwd mm6, mm1 // t1 = mm6
|
||||
movq mm1, mm4 // p2 = mm1 (copy)
|
||||
punpcklwd mm1, mm5 // t2 = mm1
|
||||
punpckhwd mm4, mm5 // t3 = mm4
|
||||
|
||||
/* register state:
|
||||
mm0: t0
|
||||
mm1: t2
|
||||
mm2:
|
||||
mm3:
|
||||
mm4: t3
|
||||
mm5:
|
||||
mm6: t1
|
||||
mm7:
|
||||
*/
|
||||
/*
|
||||
p0 = t0 + t2;
|
||||
p1 = t0 - t2;
|
||||
p2 = (t1 >> 1) - t3;
|
||||
p3 = t1 + (t3 >> 1);
|
||||
*/
|
||||
movq mm2, mm0 // mm2 = t0 (copy)
|
||||
paddw mm0, mm1 // mm0 = p0
|
||||
psubw mm2, mm1 // mm2 = p1, mm1 available
|
||||
movq mm5, mm6 // mm5 = t1 (copy)
|
||||
psraw mm5, 1 // mm5 = (t1 >> 1)
|
||||
psubw mm5, mm4 // mm5 = p2
|
||||
psraw mm4, 1 // mm4 = (t3 >> 1)
|
||||
paddw mm6, mm4 // mm6 = p3
|
||||
|
||||
/* register state:
|
||||
mm0: p0
|
||||
mm1:
|
||||
mm2: p1
|
||||
mm3:
|
||||
mm4:
|
||||
mm5: p2
|
||||
mm6: p3
|
||||
mm7:
|
||||
*/
|
||||
|
||||
/*
|
||||
*(pTmp++) = p0 + p3;
|
||||
*(pTmp++) = p1 + p2;
|
||||
*(pTmp++) = p1 - p2;
|
||||
*(pTmp++) = p0 - p3;
|
||||
*/
|
||||
|
||||
movq mm3, mm0 // mm3 = p0 (copy)
|
||||
paddw mm0, mm6 // mm0 = r0
|
||||
movq mm1, mm2 // mm1 = p1 (copy)
|
||||
paddw mm1, mm5 // mm1 = r1
|
||||
psubw mm2, mm5 // mm2 = r2, mm5 available
|
||||
psubw mm3, mm6 // mm3 = r3
|
||||
|
||||
/* register state:
|
||||
mm0: r0
|
||||
mm1: r1
|
||||
mm2: r2
|
||||
mm3: r3
|
||||
mm4:
|
||||
mm5:
|
||||
mm6:
|
||||
mm7:
|
||||
*/
|
||||
movq XMMWORD PTR 0[esi], mm0
|
||||
movq XMMWORD PTR 32[esi], mm1
|
||||
movq XMMWORD PTR 64[esi], mm2
|
||||
movq XMMWORD PTR 96[esi], mm3
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void sample_reconstruct(h264_imgpel_macroblock_t curImg, const h264_imgpel_macroblock_t mpr, const h264_short_block_t tblock, int joff, int mb_x, int max_imgpel_value)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
__asm
|
||||
{
|
||||
// mm0 : constant value 32
|
||||
mov edx, 0x00200020
|
||||
movd mm0, edx
|
||||
punpckldq mm0, mm0
|
||||
|
||||
// ecx: y offset
|
||||
mov ecx, joff
|
||||
shl ecx, 4 // imgpel stuff is going to be 16 byte stride
|
||||
add ecx, mb_x
|
||||
|
||||
// eax: curImg
|
||||
mov eax, curImg
|
||||
add eax, ecx
|
||||
|
||||
// edx: mpr
|
||||
mov edx, mpr
|
||||
add edx, ecx
|
||||
|
||||
// ecx: tblock (which is short, not byte)
|
||||
mov ecx, tblock
|
||||
|
||||
// mm7: zero
|
||||
pxor mm7, mm7
|
||||
|
||||
// load coefficients
|
||||
movq mm1, MMWORD PTR 0[ecx]
|
||||
movq mm2, MMWORD PTR 8[ecx]
|
||||
movq mm3, MMWORD PTR 16[ecx]
|
||||
movq mm4, MMWORD PTR 24[ecx]
|
||||
paddw mm1, mm0 // rres + 32
|
||||
paddw mm2, mm0 // rres + 32
|
||||
paddw mm3, mm0 // rres + 32
|
||||
paddw mm0, mm4 // rres + 32
|
||||
psraw mm1, 6 // (rres + 32) >> 6
|
||||
psraw mm2, 6 // (rres + 32) >> 6
|
||||
psraw mm3, 6 // (rres + 32) >> 6
|
||||
psraw mm0, 6 // (rres + 32) >> 6
|
||||
// mm1-mm3: tblock[0] - tblock[2], mm0: tblock[3]
|
||||
|
||||
// convert mpr from unsigned char to short
|
||||
movd mm4, DWORD PTR 0[edx]
|
||||
movd mm5, DWORD PTR 16[edx]
|
||||
movd mm6, DWORD PTR 32[edx]
|
||||
punpcklbw mm4, mm7
|
||||
punpcklbw mm5, mm7
|
||||
punpcklbw mm6, mm7
|
||||
paddsw mm4, mm1 // pred_row + rres_row
|
||||
movd mm1, DWORD PTR 48[edx] // reuse mm1 for mpr[3]
|
||||
paddsw mm5, mm2 // pred_row + rres_row
|
||||
punpcklbw mm1, mm7
|
||||
paddsw mm6, mm3 // pred_row + rres_row
|
||||
paddsw mm1, mm0 // pred_row + rres_row
|
||||
// results in mm4, mm5, mm6, mm1
|
||||
|
||||
// move back to 8 bit
|
||||
packuswb mm4, mm7
|
||||
packuswb mm5, mm7
|
||||
packuswb mm6, mm7
|
||||
packuswb mm1, mm7
|
||||
movd DWORD PTR 0[eax], mm4
|
||||
movd DWORD PTR 16[eax], mm5
|
||||
movd DWORD PTR 32[eax], mm6
|
||||
movd DWORD PTR 48[eax], mm1
|
||||
}
|
||||
#else
|
||||
int i, j;
|
||||
|
||||
for (j = 0; j < BLOCK_SIZE; j++)
|
||||
{
|
||||
for (i=0;i<BLOCK_SIZE;i++)
|
||||
curImg[j+joff][mb_x+i] = (imgpel) iClip1( max_imgpel_value, rshift_rnd_sf(tblock[j][i], DQ_BITS) + mpr[j+joff][mb_x+i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(_M_IX86) && defined(_DEBUG)
|
||||
void itrans4x4_sse2(const h264_short_macroblock_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y)
|
||||
{
|
||||
__declspec(align(32)) static const short const32[4] = {32, 32, 32, 32};
|
||||
__asm
|
||||
{
|
||||
mov edx, pos_y
|
||||
shl edx, 4 // imgpel stuff is going to be 16 byte stride
|
||||
add edx, pos_x
|
||||
|
||||
// eax: tblock
|
||||
lea eax, [edx*2]
|
||||
add eax, tblock
|
||||
|
||||
// ecx: mpr
|
||||
mov ecx, mb_pred
|
||||
add ecx, edx
|
||||
|
||||
// edx: results
|
||||
add edx, mb_rec
|
||||
|
||||
// load 4x4 matrix
|
||||
movq mm0, MMWORD PTR 0[eax]
|
||||
movq mm1, MMWORD PTR 32[eax]
|
||||
movq mm2, MMWORD PTR 64[eax]
|
||||
movq mm3, MMWORD PTR 96[eax]
|
||||
|
||||
// rotate 4x4 matrix
|
||||
movq mm4, mm0 // p0 = mm4 (copy)
|
||||
punpcklwd mm0, mm2 // r0 = mm0
|
||||
punpckhwd mm4, mm2 // r2 = mm4
|
||||
movq mm5, mm1 // p1 = mm5 (copy)
|
||||
punpcklwd mm1, mm3 // r1 = mm1
|
||||
punpckhwd mm5, mm3 // r3 = mm5
|
||||
movq mm6, mm0 // r0 = mm6 (copy)
|
||||
punpcklwd mm0, mm1 // t0 = mm0
|
||||
punpckhwd mm6, mm1 // t1 = mm6
|
||||
movq mm1, mm4 // r2 = mm1 (copy)
|
||||
punpcklwd mm1, mm5 // t2 = mm1
|
||||
punpckhwd mm4, mm5 // t3 = mm4
|
||||
|
||||
/* register state:
|
||||
mm0: t0
|
||||
mm1: t2
|
||||
mm2:
|
||||
mm3:
|
||||
mm4: t3
|
||||
mm5:
|
||||
mm6: t1
|
||||
mm7:
|
||||
*/
|
||||
|
||||
/*
|
||||
p0 = t0 + t2;
|
||||
p1 = t0 - t2;
|
||||
p2 = (t1 >> 1) - t3;
|
||||
p3 = t1 + (t3 >> 1);
|
||||
*/
|
||||
movq mm2, mm0 // mm2 = t0 (copy)
|
||||
paddw mm0, mm1 // mm0 = p0
|
||||
psubw mm2, mm1 // mm2 = p1, mm1 available
|
||||
movq mm5, mm6 // mm5 = t1 (copy)
|
||||
psraw mm5, 1 // mm5 = (t1 >> 1)
|
||||
psubw mm5, mm4 // mm5 = p2
|
||||
psraw mm4, 1 // mm4 = (t3 >> 1)
|
||||
paddw mm6, mm4 // mm6 = p3
|
||||
|
||||
/* register state:
|
||||
mm0: p0
|
||||
mm1:
|
||||
mm2: p1
|
||||
mm3:
|
||||
mm4:
|
||||
mm5: p2
|
||||
mm6: p3
|
||||
mm7:
|
||||
*/
|
||||
|
||||
/*
|
||||
*(pTmp++) = p0 + p3;
|
||||
*(pTmp++) = p1 + p2;
|
||||
*(pTmp++) = p1 - p2;
|
||||
*(pTmp++) = p0 - p3;
|
||||
*/
|
||||
|
||||
movq mm3, mm0 // mm3 = p0 (copy)
|
||||
paddw mm0, mm6 // mm0 = r0
|
||||
movq mm1, mm2 // mm1 = p1 (copy)
|
||||
paddw mm1, mm5 // mm1 = r1
|
||||
psubw mm2, mm5 // mm2 = r2, mm5 available
|
||||
psubw mm3, mm6 // mm3 = r3
|
||||
|
||||
/* register state:
|
||||
mm0: r0
|
||||
mm1: r1
|
||||
mm2: r2
|
||||
mm3: r3
|
||||
mm4:
|
||||
mm5:
|
||||
mm6:
|
||||
mm7:
|
||||
*/
|
||||
|
||||
// rotate 4x4 matrix to set up for vertical
|
||||
movq mm4, mm0 // r0 = mm4 (copy)
|
||||
punpcklwd mm0, mm2 // p0 = mm0
|
||||
punpckhwd mm4, mm2 // p2 = mm4
|
||||
movq mm5, mm1 // r1 = mm5 (copy)
|
||||
punpcklwd mm1, mm3 // p1 = mm1
|
||||
punpckhwd mm5, mm3 // p3 = mm5
|
||||
movq mm6, mm0 // p0 = mm6 (copy)
|
||||
punpcklwd mm0, mm1 // t0 = mm0
|
||||
punpckhwd mm6, mm1 // t1 = mm6
|
||||
movq mm1, mm4 // p2 = mm1 (copy)
|
||||
punpcklwd mm1, mm5 // t2 = mm1
|
||||
punpckhwd mm4, mm5 // t3 = mm4
|
||||
|
||||
/* register state:
|
||||
mm0: t0
|
||||
mm1: t2
|
||||
mm2:
|
||||
mm3:
|
||||
mm4: t3
|
||||
mm5:
|
||||
mm6: t1
|
||||
mm7:
|
||||
*/
|
||||
/*
|
||||
p0 = t0 + t2;
|
||||
p1 = t0 - t2;
|
||||
p2 = (t1 >> 1) - t3;
|
||||
p3 = t1 + (t3 >> 1);
|
||||
*/
|
||||
movq mm2, mm0 // mm2 = t0 (copy)
|
||||
paddw mm0, mm1 // mm0 = p0
|
||||
psubw mm2, mm1 // mm2 = p1, mm1 available
|
||||
movq mm5, mm6 // mm5 = t1 (copy)
|
||||
psraw mm5, 1 // mm5 = (t1 >> 1)
|
||||
psubw mm5, mm4 // mm5 = p2
|
||||
psraw mm4, 1 // mm4 = (t3 >> 1)
|
||||
paddw mm6, mm4 // mm6 = p3
|
||||
|
||||
/* register state:
|
||||
mm0: p0
|
||||
mm1:
|
||||
mm2: p1
|
||||
mm3:
|
||||
mm4:
|
||||
mm5: p2
|
||||
mm6: p3
|
||||
mm7:
|
||||
*/
|
||||
|
||||
/*
|
||||
*(pTmp++) = p0 + p3;
|
||||
*(pTmp++) = p1 + p2;
|
||||
*(pTmp++) = p1 - p2;
|
||||
*(pTmp++) = p0 - p3;
|
||||
*/
|
||||
|
||||
movq mm3, mm0 // mm3 = p0 (copy)
|
||||
paddw mm0, mm6 // mm0 = r0
|
||||
movq mm1, mm2 // mm1 = p1 (copy)
|
||||
paddw mm1, mm5 // mm1 = r1
|
||||
psubw mm2, mm5 // mm2 = r2, mm5 available
|
||||
psubw mm3, mm6 // mm3 = r3
|
||||
|
||||
/* register state:
|
||||
mm0: r0
|
||||
mm1: r1
|
||||
mm2: r2
|
||||
mm3: r3
|
||||
mm4:
|
||||
mm5:
|
||||
mm6:
|
||||
mm7:
|
||||
*/
|
||||
/* --- 4x4 iDCT done, now time to combine with mpr --- */
|
||||
// mm0 : constant value 32
|
||||
movq mm7, const32
|
||||
|
||||
paddw mm0, mm7 // rres + 32
|
||||
psraw mm0, 6 // (rres + 32) >> 6
|
||||
paddw mm1, mm7 // rres + 32
|
||||
psraw mm1, 6 // (rres + 32) >> 6
|
||||
paddw mm2, mm7 // rres + 32
|
||||
psraw mm2, 6 // (rres + 32) >> 6
|
||||
paddw mm3, mm7 // rres + 32
|
||||
psraw mm3, 6 // (rres + 32) >> 6
|
||||
|
||||
pxor mm7, mm7
|
||||
|
||||
// convert mpr from unsigned char to short
|
||||
movd mm4, DWORD PTR 0[ecx]
|
||||
movd mm5, DWORD PTR 16[ecx]
|
||||
movd mm6, DWORD PTR 32[ecx]
|
||||
punpcklbw mm4, mm7
|
||||
punpcklbw mm5, mm7
|
||||
punpcklbw mm6, mm7
|
||||
paddsw mm4, mm0 // pred_row + rres_row
|
||||
movd mm0, DWORD PTR 48[ecx] // reuse mm0 for mpr[3]
|
||||
paddsw mm5, mm1 // pred_row + rres_row
|
||||
punpcklbw mm0, mm7
|
||||
paddsw mm6, mm2 // pred_row + rres_row
|
||||
paddsw mm0, mm3 // pred_row + rres_row
|
||||
// results in mm4, mm5, mm6, mm0
|
||||
|
||||
// move back to 8 bit
|
||||
packuswb mm4, mm7
|
||||
packuswb mm5, mm7
|
||||
packuswb mm6, mm7
|
||||
packuswb mm0, mm7
|
||||
movd DWORD PTR 0[edx], mm4
|
||||
movd DWORD PTR 16[edx], mm5
|
||||
movd DWORD PTR 32[edx], mm6
|
||||
movd DWORD PTR 48[edx], mm0
|
||||
}
|
||||
}
|
||||
#elif defined(_M_X64)
|
||||
static void itrans4x4_sse2(const h264_int_macroblock_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y)
|
||||
{
|
||||
__declspec(align(32)) static const int const32[4] = {32, 32, 32, 32};
|
||||
__m128i p0,p1,p2,p3;
|
||||
__m128i t0,t1,t2,t3;
|
||||
__m128i r0,r1,r2,r3;
|
||||
__m128i c32, zero;
|
||||
|
||||
// horizontal
|
||||
// load registers in vertical mode, we'll rotate them next
|
||||
p0 = _mm_loadu_si128((__m128i *)&tblock[pos_y][pos_x]); // 00 01 02 03
|
||||
p1 = _mm_loadu_si128((__m128i *)&tblock[pos_y+1][pos_x]); // 10 11 12 13
|
||||
p2 = _mm_loadu_si128((__m128i *)&tblock[pos_y+2][pos_x]); // 20 21 22 23
|
||||
p3 = _mm_loadu_si128((__m128i *)&tblock[pos_y+3][pos_x]); // 30 31 32 33
|
||||
|
||||
// rotate 4x4 matrix
|
||||
r0 = _mm_unpacklo_epi32(p0, p2); // 00 20 01 21
|
||||
r1 = _mm_unpacklo_epi32(p1, p3); // 10 30 11 31
|
||||
r2 = _mm_unpackhi_epi32(p0, p2); // 02 22 03 23
|
||||
r3 = _mm_unpackhi_epi32(p1, p3); // 12 32 13 33
|
||||
t0 = _mm_unpacklo_epi32(r0, r1); // 00 10 20 30
|
||||
t1 = _mm_unpackhi_epi32(r0, r1); // 01 11 21 31
|
||||
t2 = _mm_unpacklo_epi32(r2, r3); // 02 12 22 32
|
||||
t3 = _mm_unpackhi_epi32(r2, r3); // 03 13 23 33
|
||||
|
||||
p0 = _mm_add_epi32(t0, t2); //t0 + t2;
|
||||
p1 = _mm_sub_epi32(t0, t2); // t0 - t2;
|
||||
p2 = _mm_srai_epi32(t1, 1); // t1 >> 1
|
||||
p2 = _mm_sub_epi32(p2, t3); // (t1 >> 1) - t3;
|
||||
p3 = _mm_srai_epi32(t3, 1); // (t3 >> 1)
|
||||
p3 = _mm_add_epi32(p3, t1); // t1 + (t3 >> 1);
|
||||
|
||||
t0 = _mm_add_epi32(p0, p3); //p0 + p3;
|
||||
t1 = _mm_add_epi32(p1, p2);//p1 + p2;
|
||||
t2 = _mm_sub_epi32(p1, p2); //p1 - p2;
|
||||
t3 = _mm_sub_epi32(p0, p3); //p0 - p3;
|
||||
|
||||
// rotate 4x4 matrix to set up for vertical
|
||||
r0 = _mm_unpacklo_epi32(t0, t2);
|
||||
r1 = _mm_unpacklo_epi32(t1, t3);
|
||||
r2 = _mm_unpackhi_epi32(t0, t2);
|
||||
r3 = _mm_unpackhi_epi32(t1, t3);
|
||||
t0 = _mm_unpacklo_epi32(r0, r1);
|
||||
t1 = _mm_unpackhi_epi32(r0, r1);
|
||||
t2 = _mm_unpacklo_epi32(r2, r3);
|
||||
t3 = _mm_unpackhi_epi32(r2, r3);
|
||||
|
||||
// vertical
|
||||
p0 = _mm_add_epi32(t0, t2); //t0 + t2;
|
||||
p3 = _mm_srai_epi32(t3, 1); // (t3 >> 1)
|
||||
p3 = _mm_add_epi32(p3, t1); // t1 + (t3 >> 1);
|
||||
r0 = _mm_add_epi32(p0, p3); //p0 + p3;
|
||||
r3 = _mm_sub_epi32(p0, p3); //p0 - p3;
|
||||
p1 = _mm_sub_epi32(t0, t2); // t0 - t2;
|
||||
p2 = _mm_srai_epi32(t1, 1); // t1 >> 1
|
||||
p2 = _mm_sub_epi32(p2, t3); // (t1 >> 1) - t3;
|
||||
r1 = _mm_add_epi32(p1, p2);//p1 + p2;
|
||||
r2 = _mm_sub_epi32(p1, p2); //p1 - p2;
|
||||
|
||||
c32 = _mm_load_si128((const __m128i *)const32);
|
||||
zero = _mm_setzero_si128();
|
||||
|
||||
// (x + 32) >> 6
|
||||
r0 = _mm_add_epi32(r0, c32);
|
||||
r0 = _mm_srai_epi32(r0, 6);
|
||||
r1 = _mm_add_epi32(r1, c32);
|
||||
r1 = _mm_srai_epi32(r1, 6);
|
||||
r2 = _mm_add_epi32(r2, c32);
|
||||
r2 = _mm_srai_epi32(r2, 6);
|
||||
r3 = _mm_add_epi32(r3, c32);
|
||||
r3 = _mm_srai_epi32(r3, 6);
|
||||
|
||||
// convert to 16bit values
|
||||
r0 = _mm_packs_epi32(r0, r1);
|
||||
r2 = _mm_packs_epi32(r2, r3);
|
||||
|
||||
// convert mpr from unsigned char to short
|
||||
p0 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y][pos_x]);
|
||||
p1 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+1][pos_x]);
|
||||
p0 = _mm_unpacklo_epi32(p0, p1);
|
||||
p0 = _mm_unpacklo_epi8(p0, zero); // convert to short
|
||||
r0 = _mm_add_epi16(r0, p0);
|
||||
|
||||
p0 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+2][pos_x]);
|
||||
p1 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+3][pos_x]);
|
||||
p0 = _mm_unpacklo_epi32(p0, p1);
|
||||
p0 = _mm_unpacklo_epi8(p0, zero); // convert to short
|
||||
r2 = _mm_add_epi16(r2, p0);
|
||||
|
||||
r0 = _mm_packus_epi16(r0, r2); // convert to unsigned char
|
||||
*(int32_t *)&mb_rec[pos_y][pos_x] = _mm_cvtsi128_si32(r0);
|
||||
r0 = _mm_srli_si128(r0, 4);
|
||||
*(int32_t *)&mb_rec[pos_y+1][pos_x] = _mm_cvtsi128_si32(r0);
|
||||
r0 = _mm_srli_si128(r0, 4);
|
||||
*(int32_t *)&mb_rec[pos_y+2][pos_x] = _mm_cvtsi128_si32(r0);
|
||||
r0 = _mm_srli_si128(r0, 4);
|
||||
*(int32_t *)&mb_rec[pos_y+3][pos_x] = _mm_cvtsi128_si32(r0);
|
||||
}
|
||||
#endif
|
||||
|
||||
void itrans4x4_c(const h264_short_block_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y)
|
||||
{
|
||||
inverse4x4(tblock, (h264_short_block_row_t *)tblock,pos_y,pos_x);
|
||||
sample_reconstruct(mb_rec, mb_pred, tblock, pos_y, pos_x, 255);
|
||||
}
|
||||
|
||||
void ihadamard4x4(int block[4][4])
|
||||
{
|
||||
int i;
|
||||
int tmp[16];
|
||||
int *pTmp = tmp;
|
||||
int p0,p1,p2,p3;
|
||||
int t0,t1,t2,t3;
|
||||
|
||||
// Horizontal
|
||||
for (i = 0; i < BLOCK_SIZE; i++)
|
||||
{
|
||||
t0 = block[i][0];
|
||||
t1 = block[i][1];
|
||||
t2 = block[i][2];
|
||||
t3 = block[i][3];
|
||||
|
||||
p0 = t0 + t2;
|
||||
p1 = t0 - t2;
|
||||
p2 = t1 - t3;
|
||||
p3 = t1 + t3;
|
||||
|
||||
*(pTmp++) = p0 + p3;
|
||||
*(pTmp++) = p1 + p2;
|
||||
*(pTmp++) = p1 - p2;
|
||||
*(pTmp++) = p0 - p3;
|
||||
}
|
||||
|
||||
// Vertical
|
||||
for (i = 0; i < BLOCK_SIZE; i++)
|
||||
{
|
||||
pTmp = tmp + i;
|
||||
t0 = *pTmp;
|
||||
t1 = *(pTmp += BLOCK_SIZE);
|
||||
t2 = *(pTmp += BLOCK_SIZE);
|
||||
t3 = *(pTmp += BLOCK_SIZE);
|
||||
|
||||
p0 = t0 + t2;
|
||||
p1 = t0 - t2;
|
||||
p2 = t1 - t3;
|
||||
p3 = t1 + t3;
|
||||
|
||||
block[0][i] = p0 + p3;
|
||||
block[1][i] = p1 + p2;
|
||||
block[2][i] = p1 - p2;
|
||||
block[3][i] = p0 - p3;
|
||||
}
|
||||
}
|
||||
|
||||
void ihadamard4x2(int **tblock, int **block)
|
||||
{
|
||||
int i;
|
||||
int tmp[8];
|
||||
int *pTmp = tmp;
|
||||
int p0,p1,p2,p3;
|
||||
int t0,t1,t2,t3;
|
||||
|
||||
// Horizontal
|
||||
*(pTmp++) = tblock[0][0] + tblock[1][0];
|
||||
*(pTmp++) = tblock[0][1] + tblock[1][1];
|
||||
*(pTmp++) = tblock[0][2] + tblock[1][2];
|
||||
*(pTmp++) = tblock[0][3] + tblock[1][3];
|
||||
|
||||
*(pTmp++) = tblock[0][0] - tblock[1][0];
|
||||
*(pTmp++) = tblock[0][1] - tblock[1][1];
|
||||
*(pTmp++) = tblock[0][2] - tblock[1][2];
|
||||
*(pTmp ) = tblock[0][3] - tblock[1][3];
|
||||
|
||||
// Vertical
|
||||
pTmp = tmp;
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
p0 = *(pTmp++);
|
||||
p1 = *(pTmp++);
|
||||
p2 = *(pTmp++);
|
||||
p3 = *(pTmp++);
|
||||
|
||||
t0 = p0 + p2;
|
||||
t1 = p0 - p2;
|
||||
t2 = p1 - p3;
|
||||
t3 = p1 + p3;
|
||||
|
||||
// coefficients (transposed)
|
||||
block[0][i] = t0 + t3;
|
||||
block[1][i] = t1 + t2;
|
||||
block[2][i] = t1 - t2;
|
||||
block[3][i] = t0 - t3;
|
||||
}
|
||||
}
|
||||
|
||||
//following functions perform 8 additions, 8 assignments. Should be a bit faster
|
||||
void ihadamard2x2(int tblock[4], int block[4])
|
||||
{
|
||||
int t0,t1,t2,t3;
|
||||
|
||||
t0 = tblock[0] + tblock[1];
|
||||
t1 = tblock[0] - tblock[1];
|
||||
t2 = tblock[2] + tblock[3];
|
||||
t3 = tblock[2] - tblock[3];
|
||||
|
||||
block[0] = (t0 + t2);
|
||||
block[1] = (t1 + t3);
|
||||
block[2] = (t0 - t2);
|
||||
block[3] = (t1 - t3);
|
||||
}
|
||||
|
67
Src/h264dec/lcommon/src/win32.c
Normal file
67
Src/h264dec/lcommon/src/win32.c
Normal file
|
@ -0,0 +1,67 @@
|
|||
|
||||
/*!
|
||||
*************************************************************************************
|
||||
* \file win32.c
|
||||
*
|
||||
* \brief
|
||||
* Platform dependent code
|
||||
*
|
||||
* \author
|
||||
* Main contributors (see contributors.h for copyright, address and affiliation details)
|
||||
* - Karsten Suehring <suehring@hhi.de>
|
||||
*************************************************************************************
|
||||
*/
|
||||
|
||||
#include "global.h"
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
static LARGE_INTEGER freq;
|
||||
|
||||
void gettime(TIME_T* time)
|
||||
{
|
||||
QueryPerformanceCounter(time);
|
||||
}
|
||||
|
||||
int64 timediff(TIME_T* start, TIME_T* end)
|
||||
{
|
||||
return (int64)((end->QuadPart - start->QuadPart));
|
||||
}
|
||||
|
||||
int64 timenorm(int64 cur_time)
|
||||
{
|
||||
static int first = 1;
|
||||
|
||||
if(first)
|
||||
{
|
||||
QueryPerformanceFrequency(&freq);
|
||||
first = 0;
|
||||
}
|
||||
|
||||
return (int64)(cur_time * 1000 /(freq.QuadPart));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static struct timezone tz;
|
||||
|
||||
void gettime(TIME_T* time)
|
||||
{
|
||||
gettimeofday(time, &tz);
|
||||
}
|
||||
|
||||
int64 timediff(TIME_T* start, TIME_T* end)
|
||||
{
|
||||
int t1, t2;
|
||||
|
||||
t1 = end->tv_sec - start->tv_sec;
|
||||
t2 = end->tv_usec - start->tv_usec;
|
||||
return (int64) t2 + (int64) t1 * (int64) 1000000;
|
||||
}
|
||||
|
||||
int64 timenorm(int64 cur_time)
|
||||
{
|
||||
return (int64)(cur_time / (int64) 1000);
|
||||
}
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue