Initial community commit

This commit is contained in:
Jef 2024-09-24 14:54:57 +02:00
parent 537bcbc862
commit fc06254474
16440 changed files with 4239995 additions and 2 deletions

View file

@ -0,0 +1,327 @@
/*!
*************************************************************************************
* \file img_io.c
*
* \brief
* image I/O related functions
*
* \author
* Main contributors (see contributors.h for copyright, address and affiliation details)
* - Alexis Michael Tourapis <alexismt@ieee.org>
*************************************************************************************
*/
#include "contributors.h"
#include "global.h"
#include "img_io.h"
#include "report.h"
static const VIDEO_SIZE VideoRes[] = {
{ "qcif" , 176, 144},
{ "qqvga" , 160, 128},
{ "qvga" , 320, 240},
{ "sif" , 352, 240},
{ "cif" , 352, 288},
{ "vga" , 640, 480},
{ "sd1" , 720, 480},
{ "sd2" , 704, 576},
{ "sd3" , 720, 576},
{ "720p" , 1280, 720},
{ "1080p" , 1920, 1080},
{ NULL, 0, 0}
};
/*!
************************************************************************
* \brief
* Parse Size from from file name
*
************************************************************************
*/
int ParseSizeFromString (VideoDataFile *input_file, int *x_size, int *y_size, double *fps)
{
char *p1, *p2, *tail;
char *fn = input_file->fname;
char c;
int i = 0;
*x_size = *y_size = -1;
p1 = p2 = fn;
while (p1 != NULL && p2 != NULL)
{
// Search for first '_'
p1 = strstr( p1, "_");
if (p1 == NULL)
break;
// Search for end character of x_size (first 'x' after last '_')
p2 = strstr( p1, "x");
// If no 'x' is found, exit
if (p2 == NULL)
break;
// Try conversion of number
*p2 = 0;
*x_size = strtol( p1 + 1, &tail, 10);
// If there are characters left in the string, or the string is null, discard conversion
if (*tail != '\0' || *(p1 + 1) == '\0')
{
*p2 = 'x';
p1 = tail;
continue;
}
// Conversion was correct. Restore string
*p2 = 'x';
// Search for end character of y_size (first '_' or '.' after last 'x')
p1 = strpbrk( p2 + 1, "_.");
// If no '_' or '.' is found, try again from current position
if (p1 == NULL)
{
p1 = p2 + 1;
continue;
}
// Try conversion of number
c = *p1;
*p1 = 0;
*y_size = strtol( p2 + 1, &tail, 10);
// If there are characters left in the string, or the string is null, discard conversion
if (*tail != '\0' || *(p2 + 1) == '\0')
{
*p1 = c;
p1 = tail;
continue;
}
// Conversion was correct. Restore string
*p1 = c;
// Search for end character of y_size (first 'i' or 'p' after last '_')
p2 = strstr( p1 + 1, "ip");
// If no 'i' or 'p' is found, exit
if (p2 == NULL)
break;
// Try conversion of number
c = *p2;
*p2 = 0;
*fps = strtod( p1 + 1, &tail);
// If there are characters left in the string, or the string is null, discard conversion
if (*tail != '\0' || *(p1 + 1) == '\0')
{
*p2 = c;
p1 = tail;
continue;
}
// Conversion was correct. Restore string
*p2 = c;
break;
}
// Now lets test some common video file formats
if (p1 == NULL || p2 == NULL)
{
for (i = 0; VideoRes[i].name != NULL; i++)
{
if (strcasecmp (fn, VideoRes[i].name))
{
*x_size = VideoRes[i].x_size;
*y_size = VideoRes[i].y_size;
// Should add frame rate support as well
break;
}
}
}
return (*x_size == -1 || *y_size == -1) ? 0 : 1;
}
/*!
************************************************************************
* \brief
* Parse Size from from file name
*
************************************************************************
*/
void ParseFrameNoFormatFromString (VideoDataFile *input_file)
{
char *p1, *p2, *tail;
char *fn = input_file->fname;
char *fhead = input_file->fhead;
char *ftail = input_file->ftail;
int *zero_pad = &input_file->zero_pad;
int *num_digits = &input_file->num_digits;
*zero_pad = 0;
*num_digits = -1;
p1 = p2 = fn;
while (p1 != NULL && p2 != NULL)
{
// Search for first '_'
p1 = strstr( p1, "%");
if (p1 == NULL)
break;
strncpy(fhead, fn, p1 - fn);
// Search for end character of x_size (first 'x' after last '_')
p2 = strstr( p1, "d");
// If no 'x' is found, exit
if (p2 == NULL)
break;
// Try conversion of number
*p2 = 0;
if (*(p1 + 1) == '0')
*zero_pad = 1;
*num_digits = strtol( p1 + 1, &tail, 10);
// If there are characters left in the string, or the string is null, discard conversion
if (*tail != '\0' || *(p1 + 1) == '\0')
{
*p2 = 'd';
p1 = tail;
continue;
}
// Conversion was correct. Restore string
*p2 = 'd';
tail++;
strncpy(ftail, tail, strlen(tail));
break;
}
if (input_file->vdtype == VIDEO_TIFF)
{
input_file->is_concatenated = 0;
}
else
input_file->is_concatenated = (*num_digits == -1) ? 1 : 0;
}
/*!
************************************************************************
* \brief
* Open file containing a single frame
************************************************************************
*/
void OpenFrameFile( VideoDataFile *input_file, int FrameNumberInFile)
{
char infile [FILE_NAME_SIZE], in_number[16];
int length = 0;
in_number[length]='\0';
length = strlen(input_file->fhead);
strncpy(infile, input_file->fhead, length);
infile[length]='\0';
if (input_file->zero_pad)
snprintf(in_number, 16, "%0*d", input_file->num_digits, FrameNumberInFile);
else
snprintf(in_number, 16, "%*d", input_file->num_digits, FrameNumberInFile);
strncat(infile, in_number, sizeof(in_number));
length += sizeof(in_number);
infile[length]='\0';
strncat(infile, input_file->ftail, strlen(input_file->ftail));
length += strlen(input_file->ftail);
infile[length]='\0';
if ((input_file->f_num = open(infile, OPENFLAGS_READ)) == -1)
{
printf ("OpenFrameFile: cannot open file %s\n", infile);
report_stats_on_error();
}
}
/*!
************************************************************************
* \brief
* Open file(s) containing the entire frame sequence
************************************************************************
*/
void OpenFiles( VideoDataFile *input_file)
{
if (input_file->is_concatenated == 1)
{
if (strlen(input_file->fname) == 0)
{
snprintf(errortext, ET_SIZE, "No input sequence name was provided. Please check settings.");
error (errortext, 500);
}
if ((input_file->f_num = open(input_file->fname, OPENFLAGS_READ)) == -1)
{
snprintf(errortext, ET_SIZE, "Input file %s does not exist",input_file->fname);
error (errortext, 500);
}
}
}
/*!
************************************************************************
* \brief
* Close input file
************************************************************************
*/
void CloseFiles(VideoDataFile *input_file)
{
if (input_file->f_num != -1)
close(input_file->f_num);
input_file->f_num = -1;
}
/* ==========================================================================
*
* ParseVideoType
*
* ==========================================================================
*/
VideoFileType ParseVideoType (VideoDataFile *input_file)
{
char *format;
format = input_file->fname + strlen(input_file->fname) - 3;
if (strcasecmp (format, "yuv") == 0)
{
input_file->vdtype = VIDEO_YUV;
input_file->format.yuv_format = YUV420;
input_file->avi = NULL;
}
else if (strcasecmp (format, "rgb") == 0)
{
input_file->vdtype = VIDEO_RGB;
input_file->format.yuv_format = YUV444;
input_file->avi = NULL;
}
else if (strcasecmp (format, "tif") == 0)
{
input_file->vdtype = VIDEO_TIFF;
input_file->avi = NULL;
}
else if (strcasecmp (format, "avi") == 0)
{
input_file->vdtype = VIDEO_AVI;
}
else
{
//snprintf(errortext, ET_SIZE, "ERROR: video file format not supported");
//error (errortext, 500);
input_file->vdtype = VIDEO_YUV;
input_file->format.yuv_format = YUV420;
input_file->avi = NULL;
}
return input_file->vdtype;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,106 @@
#include "memcache.h"
#include "mbuffer.h"
#include "memalloc.h"
void image_cache_flush(ImageCache *cache)
{
while (cache->head)
{
VideoImage *next = cache->head->next;
free_memImage(cache->head);
cache->head = next;
}
cache->size_x = 0;
cache->size_y = 0;
}
void image_cache_set_dimensions(ImageCache *cache, int width, int height)
{
if (width != cache->size_x || height != cache->size_y)
{
image_cache_flush(cache);
cache->size_x = width;
cache->size_y = height;
}
}
int image_cache_dimensions_match(ImageCache *cache, int width, int height)
{
if (width != cache->size_x || height != cache->size_y)
return 0;
return 1;
}
void image_cache_add(ImageCache *cache, VideoImage *image)
{
image->next = cache->head;
cache->head = image;
}
struct video_image *image_cache_get(ImageCache *cache)
{
if (cache->head)
{
VideoImage *ret = cache->head;
cache->head = ret->next;
ret->next = 0;
return ret;
}
return 0;
}
/* -------------
PicMotion arrays are allowed with one extra slot in the first dimension
which we use as the next pointer
------------- */
void motion_cache_flush(MotionCache *cache)
{
while (cache->head)
{
PicMotion **next = (PicMotion **)cache->head[cache->size_y];
free_mem2DPicMotion(cache->head);
cache->head = next;
}
cache->size_x = 0;
cache->size_y = 0;
}
void motion_cache_set_dimensions(MotionCache *cache, int width, int height)
{
if (width != cache->size_x || height != cache->size_y)
{
motion_cache_flush(cache);
cache->size_x = width;
cache->size_y = height;
}
}
int motion_cache_dimensions_match(MotionCache *cache, int width, int height)
{
if (width != cache->size_x || height != cache->size_y)
return 0;
return 1;
}
void motion_cache_add(MotionCache *cache, PicMotion **image)
{
image[cache->size_y] = (PicMotion *)cache->head;
cache->head = image;
}
struct pic_motion **motion_cache_get(MotionCache *cache)
{
if (cache->head)
{
PicMotion **ret = cache->head;
cache->head = (PicMotion **)ret[cache->size_y];
ret[cache->size_y] = 0;
return ret;
}
return 0;
}

View file

@ -0,0 +1,250 @@
/*!
*************************************************************************************
* \file mv_prediction.c
*
* \brief
* Motion Vector Prediction Functions
*
* \author
* Main contributors (see contributors.h for copyright, address and affiliation details)
* - Alexis Michael Tourapis <alexismt@ieee.org>
* - Karsten Sühring <suehring@hhi.de>
*************************************************************************************
*/
#include "global.h"
#include "mbuffer.h"
/*!
************************************************************************
* \brief
* Get motion vector predictor
************************************************************************
*/
static void GetMotionVectorPredictorMBAFF (Macroblock *currMB,
PixelPos *block, // <--> block neighbors
short pmv[2],
short ref_frame,
PicMotion **motion,
int mb_x,
int mb_y,
int blockshape_x,
int blockshape_y)
{
int mv_a, mv_b, mv_c, pred_vec=0;
int mvPredType, rFrameL, rFrameU, rFrameUR;
int hv;
VideoParameters *p_Vid = currMB->p_Vid;
mvPredType = MVPRED_MEDIAN;
if (currMB->mb_field)
{
rFrameL = block[0].available
? (p_Vid->mb_data[block[0].mb_addr].mb_field
? motion[block[0].pos_y][block[0].pos_x].ref_idx
: motion[block[0].pos_y][block[0].pos_x].ref_idx * 2) : -1;
rFrameU = block[1].available
? (p_Vid->mb_data[block[1].mb_addr].mb_field
? motion[block[1].pos_y][block[1].pos_x].ref_idx
: motion[block[1].pos_y][block[1].pos_x].ref_idx * 2) : -1;
rFrameUR = block[2].available
? (p_Vid->mb_data[block[2].mb_addr].mb_field
? motion[block[2].pos_y][block[2].pos_x].ref_idx
: motion[block[2].pos_y][block[2].pos_x].ref_idx * 2) : -1;
}
else
{
rFrameL = block[0].available
? (p_Vid->mb_data[block[0].mb_addr].mb_field
? motion[block[0].pos_y][block[0].pos_x].ref_idx >>1
: motion[block[0].pos_y][block[0].pos_x].ref_idx) : -1;
rFrameU = block[1].available
? (p_Vid->mb_data[block[1].mb_addr].mb_field
? motion[block[1].pos_y][block[1].pos_x].ref_idx >>1
: motion[block[1].pos_y][block[1].pos_x].ref_idx) : -1;
rFrameUR = block[2].available
? (p_Vid->mb_data[block[2].mb_addr].mb_field
? motion[block[2].pos_y][block[2].pos_x].ref_idx >>1
: motion[block[2].pos_y][block[2].pos_x].ref_idx) : -1;
}
/* Prediction if only one of the neighbors uses the reference frame
* we are checking
*/
if(rFrameL == ref_frame && rFrameU != ref_frame && rFrameUR != ref_frame)
mvPredType = MVPRED_L;
else if(rFrameL != ref_frame && rFrameU == ref_frame && rFrameUR != ref_frame)
mvPredType = MVPRED_U;
else if(rFrameL != ref_frame && rFrameU != ref_frame && rFrameUR == ref_frame)
mvPredType = MVPRED_UR;
// Directional predictions
if(blockshape_x == 8 && blockshape_y == 16)
{
if(mb_x == 0)
{
if(rFrameL == ref_frame)
mvPredType = MVPRED_L;
}
else
{
if( rFrameUR == ref_frame)
mvPredType = MVPRED_UR;
}
}
else if(blockshape_x == 16 && blockshape_y == 8)
{
if(mb_y == 0)
{
if(rFrameU == ref_frame)
mvPredType = MVPRED_U;
}
else
{
if(rFrameL == ref_frame)
mvPredType = MVPRED_L;
}
}
for (hv=0; hv < 2; hv++)
{
if (hv == 0)
{
mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[hv] : 0;
mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[hv] : 0;
mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[hv] : 0;
}
else
{
if (currMB->mb_field)
{
mv_a = block[0].available ? p_Vid->mb_data[block[0].mb_addr].mb_field
? motion[block[0].pos_y][block[0].pos_x].mv[hv]
: motion[block[0].pos_y][block[0].pos_x].mv[hv] / 2
: 0;
mv_b = block[1].available ? p_Vid->mb_data[block[1].mb_addr].mb_field
? motion[block[1].pos_y][block[1].pos_x].mv[hv]
: motion[block[1].pos_y][block[1].pos_x].mv[hv] / 2
: 0;
mv_c = block[2].available ? p_Vid->mb_data[block[2].mb_addr].mb_field
? motion[block[2].pos_y][block[2].pos_x].mv[hv]
: motion[block[2].pos_y][block[2].pos_x].mv[hv] / 2
: 0;
}
else
{
mv_a = block[0].available ? p_Vid->mb_data[block[0].mb_addr].mb_field
? motion[block[0].pos_y][block[0].pos_x].mv[hv] * 2
: motion[block[0].pos_y][block[0].pos_x].mv[hv]
: 0;
mv_b = block[1].available ? p_Vid->mb_data[block[1].mb_addr].mb_field
? motion[block[1].pos_y][block[1].pos_x].mv[hv] * 2
: motion[block[1].pos_y][block[1].pos_x].mv[hv]
: 0;
mv_c = block[2].available ? p_Vid->mb_data[block[2].mb_addr].mb_field
? motion[block[2].pos_y][block[2].pos_x].mv[hv] * 2
: motion[block[2].pos_y][block[2].pos_x].mv[hv]
: 0;
}
}
switch (mvPredType)
{
case MVPRED_MEDIAN:
if(!(block[1].available || block[2].available))
{
pred_vec = mv_a;
}
else
{
pred_vec = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c));
}
break;
case MVPRED_L:
pred_vec = mv_a;
break;
case MVPRED_U:
pred_vec = mv_b;
break;
case MVPRED_UR:
pred_vec = mv_c;
break;
default:
break;
}
pmv[hv] = (short) pred_vec;
}
}
/*!
************************************************************************
* \brief
* Get motion vector predictor
************************************************************************
*/
// TODO: benski> make SSE3/MMX version
static void GetMotionVectorPredictorNormal (Macroblock *currMB,
PixelPos *block, // <--> block neighbors
short pmv[2],
short ref_frame,
PicMotion **motion,
int mb_x,
int mb_y,
int blockshape_x,
int blockshape_y)
{
int rFrameL = block[0].available ? motion[block[0].pos_y][block[0].pos_x].ref_idx : -1;
int rFrameU = block[1].available ? motion[block[1].pos_y][block[1].pos_x].ref_idx : -1;
int rFrameUR = block[2].available ? motion[block[2].pos_y][block[2].pos_x].ref_idx : -1;
/* Prediction if only one of the neighbors uses the reference frame
* we are checking
*/
if (rFrameL == ref_frame &&
((rFrameU != ref_frame && rFrameUR != ref_frame) || (blockshape_x == 8 && blockshape_y == 16 && mb_x == 0) || (blockshape_x == 16 && blockshape_y == 8 && mb_y != 0)))
{ // left
pmv[0] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0;
pmv[1] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0;
}
else if (rFrameU == ref_frame &&
((rFrameL != ref_frame && rFrameUR != ref_frame) || (blockshape_x == 16 && blockshape_y == 8 && mb_y == 0)))
{ // up
pmv[0] = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[0] : 0;
pmv[1] = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[1] : 0;
}
else if (rFrameUR == ref_frame &&
((rFrameL != ref_frame && rFrameU != ref_frame) || (blockshape_x == 8 && blockshape_y == 16 && mb_x != 0)))
{ // upper right
pmv[0] = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[0] : 0;
pmv[1] = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[1] : 0;
}
else
{ // median
if(!(block[1].available || block[2].available))
{
pmv[0] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0;
pmv[1] = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0;
}
else
{
int mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[0] : 0;
int mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[0] : 0;
int mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[0] : 0;
pmv[0] = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c));
mv_a = block[0].available ? motion[block[0].pos_y][block[0].pos_x].mv[1] : 0;
mv_b = block[1].available ? motion[block[1].pos_y][block[1].pos_x].mv[1] : 0;
mv_c = block[2].available ? motion[block[2].pos_y][block[2].pos_x].mv[1] : 0;
pmv[1] = mv_a + mv_b + mv_c - imin(mv_a, imin(mv_b, mv_c)) - imax(mv_a, imax(mv_b ,mv_c));
}
}
}
void init_motion_vector_prediction(Macroblock *currMB, int mb_aff_frame_flag)
{
if (mb_aff_frame_flag)
currMB->GetMVPredictor = GetMotionVectorPredictorMBAFF;
else
currMB->GetMVPredictor = GetMotionVectorPredictorNormal;
}

View file

@ -0,0 +1,244 @@
/*!
**************************************************************************************
* \file
* parsetcommon.c
* \brief
* Picture and Sequence Parameter set generation and handling
* \date 25 November 2002
* \author
* Main contributors (see contributors.h for copyright, address and affiliation details)
* - Stephan Wenger <stewe@cs.tu-berlin.de>
*
**************************************************************************************
*/
#include "global.h"
#include "parsetcommon.h"
#include "memalloc.h"
/*!
*************************************************************************************
* \brief
* Allocates memory for a picture paramater set
*
* \return
* pointer to a pps
*************************************************************************************
*/
pic_parameter_set_rbsp_t *AllocPPS ()
{
pic_parameter_set_rbsp_t *p;
if ((p=calloc (sizeof (pic_parameter_set_rbsp_t), 1)) == NULL)
no_mem_exit ("AllocPPS: PPS");
p->slice_group_id = NULL;
return p;
}
/*!
*************************************************************************************
* \brief
* Allocates memory for am sequence paramater set
*
* \return
* pointer to a sps
*************************************************************************************
*/
seq_parameter_set_rbsp_t *AllocSPS ()
{
seq_parameter_set_rbsp_t *p;
if ((p=calloc (sizeof (seq_parameter_set_rbsp_t), 1)) == NULL)
no_mem_exit ("AllocSPS: SPS");
return p;
}
/*!
*************************************************************************************
* \brief
* Frees a picture parameter set
*
* \param pps to be freed
* Picture parameter set to be freed
*************************************************************************************
*/
void FreePPS (pic_parameter_set_rbsp_t *pps)
{
assert (pps != NULL);
if (pps->slice_group_id != NULL)
free (pps->slice_group_id);
free (pps);
}
/*!
*************************************************************************************
* \brief
* Frees a sps
*
* \param sps
* Sequence parameter set to be freed
*************************************************************************************
*/
void FreeSPS (seq_parameter_set_rbsp_t *sps)
{
assert (sps != NULL);
free (sps);
}
int sps_is_equal(seq_parameter_set_rbsp_t *sps1, seq_parameter_set_rbsp_t *sps2)
{
unsigned i;
int equal = 1;
if ((!sps1->Valid) || (!sps2->Valid))
return 0;
equal &= (sps1->profile_idc == sps2->profile_idc);
equal &= (sps1->constrained_set0_flag == sps2->constrained_set0_flag);
equal &= (sps1->constrained_set1_flag == sps2->constrained_set1_flag);
equal &= (sps1->constrained_set2_flag == sps2->constrained_set2_flag);
equal &= (sps1->level_idc == sps2->level_idc);
equal &= (sps1->seq_parameter_set_id == sps2->seq_parameter_set_id);
equal &= (sps1->log2_max_frame_num_minus4 == sps2->log2_max_frame_num_minus4);
equal &= (sps1->pic_order_cnt_type == sps2->pic_order_cnt_type);
if (!equal) return equal;
if( sps1->pic_order_cnt_type == 0 )
{
equal &= (sps1->log2_max_pic_order_cnt_lsb_minus4 == sps2->log2_max_pic_order_cnt_lsb_minus4);
}
else if( sps1->pic_order_cnt_type == 1 )
{
equal &= (sps1->delta_pic_order_always_zero_flag == sps2->delta_pic_order_always_zero_flag);
equal &= (sps1->offset_for_non_ref_pic == sps2->offset_for_non_ref_pic);
equal &= (sps1->offset_for_top_to_bottom_field == sps2->offset_for_top_to_bottom_field);
equal &= (sps1->num_ref_frames_in_pic_order_cnt_cycle == sps2->num_ref_frames_in_pic_order_cnt_cycle);
if (!equal) return equal;
for ( i = 0 ; i< sps1->num_ref_frames_in_pic_order_cnt_cycle ;i ++)
equal &= (sps1->offset_for_ref_frame[i] == sps2->offset_for_ref_frame[i]);
}
equal &= (sps1->num_ref_frames == sps2->num_ref_frames);
equal &= (sps1->gaps_in_frame_num_value_allowed_flag == sps2->gaps_in_frame_num_value_allowed_flag);
equal &= (sps1->pic_width_in_mbs_minus1 == sps2->pic_width_in_mbs_minus1);
equal &= (sps1->pic_height_in_map_units_minus1 == sps2->pic_height_in_map_units_minus1);
equal &= (sps1->frame_mbs_only_flag == sps2->frame_mbs_only_flag);
if (!equal) return equal;
if( !sps1->frame_mbs_only_flag )
equal &= (sps1->mb_adaptive_frame_field_flag == sps2->mb_adaptive_frame_field_flag);
equal &= (sps1->direct_8x8_inference_flag == sps2->direct_8x8_inference_flag);
equal &= (sps1->frame_cropping_flag == sps2->frame_cropping_flag);
if (!equal) return equal;
if (sps1->frame_cropping_flag)
{
equal &= (sps1->frame_cropping_rect_left_offset == sps2->frame_cropping_rect_left_offset);
equal &= (sps1->frame_cropping_rect_right_offset == sps2->frame_cropping_rect_right_offset);
equal &= (sps1->frame_cropping_rect_top_offset == sps2->frame_cropping_rect_top_offset);
equal &= (sps1->frame_cropping_rect_bottom_offset == sps2->frame_cropping_rect_bottom_offset);
}
equal &= (sps1->vui_parameters_present_flag == sps2->vui_parameters_present_flag);
return equal;
}
int pps_is_equal(pic_parameter_set_rbsp_t *pps1, pic_parameter_set_rbsp_t *pps2)
{
unsigned i, j;
int equal = 1;
if ((!pps1->Valid) || (!pps2->Valid))
return 0;
equal &= (pps1->pic_parameter_set_id == pps2->pic_parameter_set_id);
equal &= (pps1->seq_parameter_set_id == pps2->seq_parameter_set_id);
equal &= (pps1->entropy_coding_mode_flag == pps2->entropy_coding_mode_flag);
equal &= (pps1->bottom_field_pic_order_in_frame_present_flag == pps2->bottom_field_pic_order_in_frame_present_flag);
equal &= (pps1->num_slice_groups_minus1 == pps2->num_slice_groups_minus1);
if (!equal) return equal;
if (pps1->num_slice_groups_minus1>0)
{
equal &= (pps1->slice_group_map_type == pps2->slice_group_map_type);
if (!equal) return equal;
if (pps1->slice_group_map_type == 0)
{
for (i=0; i<=pps1->num_slice_groups_minus1; i++)
equal &= (pps1->run_length_minus1[i] == pps2->run_length_minus1[i]);
}
else if( pps1->slice_group_map_type == 2 )
{
for (i=0; i<pps1->num_slice_groups_minus1; i++)
{
equal &= (pps1->top_left[i] == pps2->top_left[i]);
equal &= (pps1->bottom_right[i] == pps2->bottom_right[i]);
}
}
else if( pps1->slice_group_map_type == 3 || pps1->slice_group_map_type==4 || pps1->slice_group_map_type==5 )
{
equal &= (pps1->slice_group_change_direction_flag == pps2->slice_group_change_direction_flag);
equal &= (pps1->slice_group_change_rate_minus1 == pps2->slice_group_change_rate_minus1);
}
else if( pps1->slice_group_map_type == 6 )
{
equal &= (pps1->pic_size_in_map_units_minus1 == pps2->pic_size_in_map_units_minus1);
if (!equal) return equal;
for (i=0; i<=pps1->pic_size_in_map_units_minus1; i++)
equal &= (pps1->slice_group_id[i] == pps2->slice_group_id[i]);
}
}
equal &= (pps1->num_ref_idx_l0_active_minus1 == pps2->num_ref_idx_l0_active_minus1);
equal &= (pps1->num_ref_idx_l1_active_minus1 == pps2->num_ref_idx_l1_active_minus1);
equal &= (pps1->weighted_pred_flag == pps2->weighted_pred_flag);
equal &= (pps1->weighted_bipred_idc == pps2->weighted_bipred_idc);
equal &= (pps1->pic_init_qp_minus26 == pps2->pic_init_qp_minus26);
equal &= (pps1->pic_init_qs_minus26 == pps2->pic_init_qs_minus26);
equal &= (pps1->chroma_qp_index_offset == pps2->chroma_qp_index_offset);
equal &= (pps1->deblocking_filter_control_present_flag == pps2->deblocking_filter_control_present_flag);
equal &= (pps1->constrained_intra_pred_flag == pps2->constrained_intra_pred_flag);
equal &= (pps1->redundant_pic_cnt_present_flag == pps2->redundant_pic_cnt_present_flag);
if (!equal) return equal;
//Fidelity Range Extensions Stuff
//It is initialized to zero, so should be ok to check all the time.
equal &= (pps1->transform_8x8_mode_flag == pps2->transform_8x8_mode_flag);
equal &= (pps1->pic_scaling_matrix_present_flag == pps2->pic_scaling_matrix_present_flag);
if(pps1->pic_scaling_matrix_present_flag)
{
for(i = 0; i < (6 + ((unsigned)pps1->transform_8x8_mode_flag << 1)); i++)
{
equal &= (pps1->pic_scaling_list_present_flag[i] == pps2->pic_scaling_list_present_flag[i]);
if(pps1->pic_scaling_list_present_flag[i])
{
if(i < 6)
{
for (j = 0; j < 16; j++)
equal &= (pps1->ScalingList4x4[i][j] == pps2->ScalingList4x4[i][j]);
}
else
{
for (j = 0; j < 64; j++)
equal &= (pps1->ScalingList8x8[i-6][j] == pps2->ScalingList8x8[i-6][j]);
}
}
}
}
equal &= (pps1->second_chroma_qp_index_offset == pps2->second_chroma_qp_index_offset);
return equal;
}

View file

@ -0,0 +1,809 @@
/*!
***************************************************************************
* \file transform.c
*
* \brief
* Transform functions
*
* \author
* Main contributors (see contributors.h for copyright, address and affiliation details)
* - Alexis Michael Tourapis
* \date
* 01. July 2007
**************************************************************************
*/
#include "global.h"
#include "transform.h"
#include <emmintrin.h>
void forward4x4(int **block, int **tblock, int pos_y, int pos_x)
{
int i, ii;
int tmp[16];
int *pTmp = tmp, *pblock;
int p0,p1,p2,p3;
int t0,t1,t2,t3;
// Horizontal
for (i=pos_y; i < pos_y + BLOCK_SIZE; i++)
{
pblock = &block[i][pos_x];
p0 = *(pblock++);
p1 = *(pblock++);
p2 = *(pblock++);
p3 = *(pblock );
t0 = p0 + p3;
t1 = p1 + p2;
t2 = p1 - p2;
t3 = p0 - p3;
*(pTmp++) = t0 + t1;
*(pTmp++) = (t3 << 1) + t2;
*(pTmp++) = t0 - t1;
*(pTmp++) = t3 - (t2 << 1);
}
// Vertical
for (i=0; i < BLOCK_SIZE; i++)
{
pTmp = tmp + i;
p0 = *pTmp;
p1 = *(pTmp += BLOCK_SIZE);
p2 = *(pTmp += BLOCK_SIZE);
p3 = *(pTmp += BLOCK_SIZE);
t0 = p0 + p3;
t1 = p1 + p2;
t2 = p1 - p2;
t3 = p0 - p3;
ii = pos_x + i;
tblock[pos_y ][ii] = t0 + t1;
tblock[pos_y + 1][ii] = t2 + (t3 << 1);
tblock[pos_y + 2][ii] = t0 - t1;
tblock[pos_y + 3][ii] = t3 - (t2 << 1);
}
}
static void inverse4x4(const h264_short_block_t tblock, h264_short_block_t block, int pos_y, int pos_x)
{
int i;
short tmp[16];
short *pTmp = tmp;
int p0,p1,p2,p3;
int t0,t1,t2,t3;
// Horizontal
for (i = 0; i < BLOCK_SIZE; i++)
{
t0 = tblock[i][0];
t1 = tblock[i][1];
t2 = tblock[i][2];
t3 = tblock[i][3];
p0 = t0 + t2;
p1 = t0 - t2;
p2 = (t1 >> 1) - t3;
p3 = t1 + (t3 >> 1);
*(pTmp++) = p0 + p3;
*(pTmp++) = p1 + p2;
*(pTmp++) = p1 - p2;
*(pTmp++) = p0 - p3;
}
// Vertical
for (i = 0; i < BLOCK_SIZE; i++)
{
pTmp = tmp + i;
t0 = *pTmp;
t1 = *(pTmp += BLOCK_SIZE);
t2 = *(pTmp += BLOCK_SIZE);
t3 = *(pTmp += BLOCK_SIZE);
p0 = t0 + t2;
p1 = t0 - t2;
p2 =(t1 >> 1) - t3;
p3 = t1 + (t3 >> 1);
block[0][i] = p0 + p3;
block[1][i] = p1 + p2;
block[2][i] = p1 - p2;
block[3][i] = p0 - p3;
}
}
#ifdef _M_IX86
// benski> this exists just for conformance testing. not used in production code
static void inverse4x4_sse2_x86(const h264_short_macroblock_t tblock, h264_short_macroblock_t block, int pos_y, int pos_x)
{
__asm
{
mov edx, pos_y
shl edx, 4 // 16 step stride
add edx, pos_x
shl edx, 1 // * sizeof(short)
// eax: pointer to the start of tblock (offset by passed pos_y, pos_x)
mov eax, edx
add eax, tblock
// esi: results
mov esi, edx
add esi, block
// load 4x4 matrix
movq mm0, MMWORD PTR 0[eax]
movq mm1, MMWORD PTR 32[eax]
movq mm2, MMWORD PTR 64[eax]
movq mm3, MMWORD PTR 96[eax]
// rotate 4x4 matrix
movq mm4, mm0 // p0 = mm4 (copy)
punpcklwd mm0, mm2 // r0 = mm0
punpckhwd mm4, mm2 // r2 = mm4
movq mm5, mm1 // p1 = mm5 (copy)
punpcklwd mm1, mm3 // r1 = mm1
punpckhwd mm5, mm3 // r3 = mm5
movq mm6, mm0 // r0 = mm6 (copy)
punpcklwd mm0, mm1 // t0 = mm0
punpckhwd mm6, mm1 // t1 = mm6
movq mm1, mm4 // r2 = mm1 (copy)
punpcklwd mm1, mm5 // t2 = mm1
punpckhwd mm4, mm5 // t3 = mm4
/* register state:
mm0: t0
mm1: t2
mm2:
mm3:
mm4: t3
mm5:
mm6: t1
mm7:
*/
/*
p0 = t0 + t2;
p1 = t0 - t2;
p2 = (t1 >> 1) - t3;
p3 = t1 + (t3 >> 1);
*/
movq mm2, mm0 // mm2 = t0 (copy)
paddw mm0, mm1 // mm0 = p0
psubw mm2, mm1 // mm2 = p1, mm1 available
movq mm5, mm6 // mm5 = t1 (copy)
psraw mm5, 1 // mm5 = (t1 >> 1)
psubw mm5, mm4 // mm5 = p2
psraw mm4, 1 // mm4 = (t3 >> 1)
paddw mm6, mm4 // mm6 = p3
/* register state:
mm0: p0
mm1:
mm2: p1
mm3:
mm4:
mm5: p2
mm6: p3
mm7:
*/
/*
*(pTmp++) = p0 + p3;
*(pTmp++) = p1 + p2;
*(pTmp++) = p1 - p2;
*(pTmp++) = p0 - p3;
*/
movq mm3, mm0 // mm3 = p0 (copy)
paddw mm0, mm6 // mm0 = r0
movq mm1, mm2 // mm1 = p1 (copy)
paddw mm1, mm5 // mm1 = r1
psubw mm2, mm5 // mm2 = r2, mm5 available
psubw mm3, mm6 // mm3 = r3
/* register state:
mm0: r0
mm1: r1
mm2: r2
mm3: r3
mm4:
mm5:
mm6:
mm7:
*/
// rotate 4x4 matrix to set up for vertical
movq mm4, mm0 // r0 = mm4 (copy)
punpcklwd mm0, mm2 // p0 = mm0
punpckhwd mm4, mm2 // p2 = mm4
movq mm5, mm1 // r1 = mm5 (copy)
punpcklwd mm1, mm3 // p1 = mm1
punpckhwd mm5, mm3 // p3 = mm5
movq mm6, mm0 // p0 = mm6 (copy)
punpcklwd mm0, mm1 // t0 = mm0
punpckhwd mm6, mm1 // t1 = mm6
movq mm1, mm4 // p2 = mm1 (copy)
punpcklwd mm1, mm5 // t2 = mm1
punpckhwd mm4, mm5 // t3 = mm4
/* register state:
mm0: t0
mm1: t2
mm2:
mm3:
mm4: t3
mm5:
mm6: t1
mm7:
*/
/*
p0 = t0 + t2;
p1 = t0 - t2;
p2 = (t1 >> 1) - t3;
p3 = t1 + (t3 >> 1);
*/
movq mm2, mm0 // mm2 = t0 (copy)
paddw mm0, mm1 // mm0 = p0
psubw mm2, mm1 // mm2 = p1, mm1 available
movq mm5, mm6 // mm5 = t1 (copy)
psraw mm5, 1 // mm5 = (t1 >> 1)
psubw mm5, mm4 // mm5 = p2
psraw mm4, 1 // mm4 = (t3 >> 1)
paddw mm6, mm4 // mm6 = p3
/* register state:
mm0: p0
mm1:
mm2: p1
mm3:
mm4:
mm5: p2
mm6: p3
mm7:
*/
/*
*(pTmp++) = p0 + p3;
*(pTmp++) = p1 + p2;
*(pTmp++) = p1 - p2;
*(pTmp++) = p0 - p3;
*/
movq mm3, mm0 // mm3 = p0 (copy)
paddw mm0, mm6 // mm0 = r0
movq mm1, mm2 // mm1 = p1 (copy)
paddw mm1, mm5 // mm1 = r1
psubw mm2, mm5 // mm2 = r2, mm5 available
psubw mm3, mm6 // mm3 = r3
/* register state:
mm0: r0
mm1: r1
mm2: r2
mm3: r3
mm4:
mm5:
mm6:
mm7:
*/
movq XMMWORD PTR 0[esi], mm0
movq XMMWORD PTR 32[esi], mm1
movq XMMWORD PTR 64[esi], mm2
movq XMMWORD PTR 96[esi], mm3
}
}
#endif
static void sample_reconstruct(h264_imgpel_macroblock_t curImg, const h264_imgpel_macroblock_t mpr, const h264_short_block_t tblock, int joff, int mb_x, int max_imgpel_value)
{
#ifdef _M_IX86
__asm
{
// mm0 : constant value 32
mov edx, 0x00200020
movd mm0, edx
punpckldq mm0, mm0
// ecx: y offset
mov ecx, joff
shl ecx, 4 // imgpel stuff is going to be 16 byte stride
add ecx, mb_x
// eax: curImg
mov eax, curImg
add eax, ecx
// edx: mpr
mov edx, mpr
add edx, ecx
// ecx: tblock (which is short, not byte)
mov ecx, tblock
// mm7: zero
pxor mm7, mm7
// load coefficients
movq mm1, MMWORD PTR 0[ecx]
movq mm2, MMWORD PTR 8[ecx]
movq mm3, MMWORD PTR 16[ecx]
movq mm4, MMWORD PTR 24[ecx]
paddw mm1, mm0 // rres + 32
paddw mm2, mm0 // rres + 32
paddw mm3, mm0 // rres + 32
paddw mm0, mm4 // rres + 32
psraw mm1, 6 // (rres + 32) >> 6
psraw mm2, 6 // (rres + 32) >> 6
psraw mm3, 6 // (rres + 32) >> 6
psraw mm0, 6 // (rres + 32) >> 6
// mm1-mm3: tblock[0] - tblock[2], mm0: tblock[3]
// convert mpr from unsigned char to short
movd mm4, DWORD PTR 0[edx]
movd mm5, DWORD PTR 16[edx]
movd mm6, DWORD PTR 32[edx]
punpcklbw mm4, mm7
punpcklbw mm5, mm7
punpcklbw mm6, mm7
paddsw mm4, mm1 // pred_row + rres_row
movd mm1, DWORD PTR 48[edx] // reuse mm1 for mpr[3]
paddsw mm5, mm2 // pred_row + rres_row
punpcklbw mm1, mm7
paddsw mm6, mm3 // pred_row + rres_row
paddsw mm1, mm0 // pred_row + rres_row
// results in mm4, mm5, mm6, mm1
// move back to 8 bit
packuswb mm4, mm7
packuswb mm5, mm7
packuswb mm6, mm7
packuswb mm1, mm7
movd DWORD PTR 0[eax], mm4
movd DWORD PTR 16[eax], mm5
movd DWORD PTR 32[eax], mm6
movd DWORD PTR 48[eax], mm1
}
#else
int i, j;
for (j = 0; j < BLOCK_SIZE; j++)
{
for (i=0;i<BLOCK_SIZE;i++)
curImg[j+joff][mb_x+i] = (imgpel) iClip1( max_imgpel_value, rshift_rnd_sf(tblock[j][i], DQ_BITS) + mpr[j+joff][mb_x+i]);
}
#endif
}
#if defined(_M_IX86) && defined(_DEBUG)
void itrans4x4_sse2(const h264_short_macroblock_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y)
{
__declspec(align(32)) static const short const32[4] = {32, 32, 32, 32};
__asm
{
mov edx, pos_y
shl edx, 4 // imgpel stuff is going to be 16 byte stride
add edx, pos_x
// eax: tblock
lea eax, [edx*2]
add eax, tblock
// ecx: mpr
mov ecx, mb_pred
add ecx, edx
// edx: results
add edx, mb_rec
// load 4x4 matrix
movq mm0, MMWORD PTR 0[eax]
movq mm1, MMWORD PTR 32[eax]
movq mm2, MMWORD PTR 64[eax]
movq mm3, MMWORD PTR 96[eax]
// rotate 4x4 matrix
movq mm4, mm0 // p0 = mm4 (copy)
punpcklwd mm0, mm2 // r0 = mm0
punpckhwd mm4, mm2 // r2 = mm4
movq mm5, mm1 // p1 = mm5 (copy)
punpcklwd mm1, mm3 // r1 = mm1
punpckhwd mm5, mm3 // r3 = mm5
movq mm6, mm0 // r0 = mm6 (copy)
punpcklwd mm0, mm1 // t0 = mm0
punpckhwd mm6, mm1 // t1 = mm6
movq mm1, mm4 // r2 = mm1 (copy)
punpcklwd mm1, mm5 // t2 = mm1
punpckhwd mm4, mm5 // t3 = mm4
/* register state:
mm0: t0
mm1: t2
mm2:
mm3:
mm4: t3
mm5:
mm6: t1
mm7:
*/
/*
p0 = t0 + t2;
p1 = t0 - t2;
p2 = (t1 >> 1) - t3;
p3 = t1 + (t3 >> 1);
*/
movq mm2, mm0 // mm2 = t0 (copy)
paddw mm0, mm1 // mm0 = p0
psubw mm2, mm1 // mm2 = p1, mm1 available
movq mm5, mm6 // mm5 = t1 (copy)
psraw mm5, 1 // mm5 = (t1 >> 1)
psubw mm5, mm4 // mm5 = p2
psraw mm4, 1 // mm4 = (t3 >> 1)
paddw mm6, mm4 // mm6 = p3
/* register state:
mm0: p0
mm1:
mm2: p1
mm3:
mm4:
mm5: p2
mm6: p3
mm7:
*/
/*
*(pTmp++) = p0 + p3;
*(pTmp++) = p1 + p2;
*(pTmp++) = p1 - p2;
*(pTmp++) = p0 - p3;
*/
movq mm3, mm0 // mm3 = p0 (copy)
paddw mm0, mm6 // mm0 = r0
movq mm1, mm2 // mm1 = p1 (copy)
paddw mm1, mm5 // mm1 = r1
psubw mm2, mm5 // mm2 = r2, mm5 available
psubw mm3, mm6 // mm3 = r3
/* register state:
mm0: r0
mm1: r1
mm2: r2
mm3: r3
mm4:
mm5:
mm6:
mm7:
*/
// rotate 4x4 matrix to set up for vertical
movq mm4, mm0 // r0 = mm4 (copy)
punpcklwd mm0, mm2 // p0 = mm0
punpckhwd mm4, mm2 // p2 = mm4
movq mm5, mm1 // r1 = mm5 (copy)
punpcklwd mm1, mm3 // p1 = mm1
punpckhwd mm5, mm3 // p3 = mm5
movq mm6, mm0 // p0 = mm6 (copy)
punpcklwd mm0, mm1 // t0 = mm0
punpckhwd mm6, mm1 // t1 = mm6
movq mm1, mm4 // p2 = mm1 (copy)
punpcklwd mm1, mm5 // t2 = mm1
punpckhwd mm4, mm5 // t3 = mm4
/* register state:
mm0: t0
mm1: t2
mm2:
mm3:
mm4: t3
mm5:
mm6: t1
mm7:
*/
/*
p0 = t0 + t2;
p1 = t0 - t2;
p2 = (t1 >> 1) - t3;
p3 = t1 + (t3 >> 1);
*/
movq mm2, mm0 // mm2 = t0 (copy)
paddw mm0, mm1 // mm0 = p0
psubw mm2, mm1 // mm2 = p1, mm1 available
movq mm5, mm6 // mm5 = t1 (copy)
psraw mm5, 1 // mm5 = (t1 >> 1)
psubw mm5, mm4 // mm5 = p2
psraw mm4, 1 // mm4 = (t3 >> 1)
paddw mm6, mm4 // mm6 = p3
/* register state:
mm0: p0
mm1:
mm2: p1
mm3:
mm4:
mm5: p2
mm6: p3
mm7:
*/
/*
*(pTmp++) = p0 + p3;
*(pTmp++) = p1 + p2;
*(pTmp++) = p1 - p2;
*(pTmp++) = p0 - p3;
*/
movq mm3, mm0 // mm3 = p0 (copy)
paddw mm0, mm6 // mm0 = r0
movq mm1, mm2 // mm1 = p1 (copy)
paddw mm1, mm5 // mm1 = r1
psubw mm2, mm5 // mm2 = r2, mm5 available
psubw mm3, mm6 // mm3 = r3
/* register state:
mm0: r0
mm1: r1
mm2: r2
mm3: r3
mm4:
mm5:
mm6:
mm7:
*/
/* --- 4x4 iDCT done, now time to combine with mpr --- */
// mm0 : constant value 32
movq mm7, const32
paddw mm0, mm7 // rres + 32
psraw mm0, 6 // (rres + 32) >> 6
paddw mm1, mm7 // rres + 32
psraw mm1, 6 // (rres + 32) >> 6
paddw mm2, mm7 // rres + 32
psraw mm2, 6 // (rres + 32) >> 6
paddw mm3, mm7 // rres + 32
psraw mm3, 6 // (rres + 32) >> 6
pxor mm7, mm7
// convert mpr from unsigned char to short
movd mm4, DWORD PTR 0[ecx]
movd mm5, DWORD PTR 16[ecx]
movd mm6, DWORD PTR 32[ecx]
punpcklbw mm4, mm7
punpcklbw mm5, mm7
punpcklbw mm6, mm7
paddsw mm4, mm0 // pred_row + rres_row
movd mm0, DWORD PTR 48[ecx] // reuse mm0 for mpr[3]
paddsw mm5, mm1 // pred_row + rres_row
punpcklbw mm0, mm7
paddsw mm6, mm2 // pred_row + rres_row
paddsw mm0, mm3 // pred_row + rres_row
// results in mm4, mm5, mm6, mm0
// move back to 8 bit
packuswb mm4, mm7
packuswb mm5, mm7
packuswb mm6, mm7
packuswb mm0, mm7
movd DWORD PTR 0[edx], mm4
movd DWORD PTR 16[edx], mm5
movd DWORD PTR 32[edx], mm6
movd DWORD PTR 48[edx], mm0
}
}
#elif defined(_M_X64)
static void itrans4x4_sse2(const h264_int_macroblock_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y)
{
__declspec(align(32)) static const int const32[4] = {32, 32, 32, 32};
__m128i p0,p1,p2,p3;
__m128i t0,t1,t2,t3;
__m128i r0,r1,r2,r3;
__m128i c32, zero;
// horizontal
// load registers in vertical mode, we'll rotate them next
p0 = _mm_loadu_si128((__m128i *)&tblock[pos_y][pos_x]); // 00 01 02 03
p1 = _mm_loadu_si128((__m128i *)&tblock[pos_y+1][pos_x]); // 10 11 12 13
p2 = _mm_loadu_si128((__m128i *)&tblock[pos_y+2][pos_x]); // 20 21 22 23
p3 = _mm_loadu_si128((__m128i *)&tblock[pos_y+3][pos_x]); // 30 31 32 33
// rotate 4x4 matrix
r0 = _mm_unpacklo_epi32(p0, p2); // 00 20 01 21
r1 = _mm_unpacklo_epi32(p1, p3); // 10 30 11 31
r2 = _mm_unpackhi_epi32(p0, p2); // 02 22 03 23
r3 = _mm_unpackhi_epi32(p1, p3); // 12 32 13 33
t0 = _mm_unpacklo_epi32(r0, r1); // 00 10 20 30
t1 = _mm_unpackhi_epi32(r0, r1); // 01 11 21 31
t2 = _mm_unpacklo_epi32(r2, r3); // 02 12 22 32
t3 = _mm_unpackhi_epi32(r2, r3); // 03 13 23 33
p0 = _mm_add_epi32(t0, t2); //t0 + t2;
p1 = _mm_sub_epi32(t0, t2); // t0 - t2;
p2 = _mm_srai_epi32(t1, 1); // t1 >> 1
p2 = _mm_sub_epi32(p2, t3); // (t1 >> 1) - t3;
p3 = _mm_srai_epi32(t3, 1); // (t3 >> 1)
p3 = _mm_add_epi32(p3, t1); // t1 + (t3 >> 1);
t0 = _mm_add_epi32(p0, p3); //p0 + p3;
t1 = _mm_add_epi32(p1, p2);//p1 + p2;
t2 = _mm_sub_epi32(p1, p2); //p1 - p2;
t3 = _mm_sub_epi32(p0, p3); //p0 - p3;
// rotate 4x4 matrix to set up for vertical
r0 = _mm_unpacklo_epi32(t0, t2);
r1 = _mm_unpacklo_epi32(t1, t3);
r2 = _mm_unpackhi_epi32(t0, t2);
r3 = _mm_unpackhi_epi32(t1, t3);
t0 = _mm_unpacklo_epi32(r0, r1);
t1 = _mm_unpackhi_epi32(r0, r1);
t2 = _mm_unpacklo_epi32(r2, r3);
t3 = _mm_unpackhi_epi32(r2, r3);
// vertical
p0 = _mm_add_epi32(t0, t2); //t0 + t2;
p3 = _mm_srai_epi32(t3, 1); // (t3 >> 1)
p3 = _mm_add_epi32(p3, t1); // t1 + (t3 >> 1);
r0 = _mm_add_epi32(p0, p3); //p0 + p3;
r3 = _mm_sub_epi32(p0, p3); //p0 - p3;
p1 = _mm_sub_epi32(t0, t2); // t0 - t2;
p2 = _mm_srai_epi32(t1, 1); // t1 >> 1
p2 = _mm_sub_epi32(p2, t3); // (t1 >> 1) - t3;
r1 = _mm_add_epi32(p1, p2);//p1 + p2;
r2 = _mm_sub_epi32(p1, p2); //p1 - p2;
c32 = _mm_load_si128((const __m128i *)const32);
zero = _mm_setzero_si128();
// (x + 32) >> 6
r0 = _mm_add_epi32(r0, c32);
r0 = _mm_srai_epi32(r0, 6);
r1 = _mm_add_epi32(r1, c32);
r1 = _mm_srai_epi32(r1, 6);
r2 = _mm_add_epi32(r2, c32);
r2 = _mm_srai_epi32(r2, 6);
r3 = _mm_add_epi32(r3, c32);
r3 = _mm_srai_epi32(r3, 6);
// convert to 16bit values
r0 = _mm_packs_epi32(r0, r1);
r2 = _mm_packs_epi32(r2, r3);
// convert mpr from unsigned char to short
p0 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y][pos_x]);
p1 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+1][pos_x]);
p0 = _mm_unpacklo_epi32(p0, p1);
p0 = _mm_unpacklo_epi8(p0, zero); // convert to short
r0 = _mm_add_epi16(r0, p0);
p0 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+2][pos_x]);
p1 = _mm_cvtsi32_si128(*(int32_t *)&mb_pred[pos_y+3][pos_x]);
p0 = _mm_unpacklo_epi32(p0, p1);
p0 = _mm_unpacklo_epi8(p0, zero); // convert to short
r2 = _mm_add_epi16(r2, p0);
r0 = _mm_packus_epi16(r0, r2); // convert to unsigned char
*(int32_t *)&mb_rec[pos_y][pos_x] = _mm_cvtsi128_si32(r0);
r0 = _mm_srli_si128(r0, 4);
*(int32_t *)&mb_rec[pos_y+1][pos_x] = _mm_cvtsi128_si32(r0);
r0 = _mm_srli_si128(r0, 4);
*(int32_t *)&mb_rec[pos_y+2][pos_x] = _mm_cvtsi128_si32(r0);
r0 = _mm_srli_si128(r0, 4);
*(int32_t *)&mb_rec[pos_y+3][pos_x] = _mm_cvtsi128_si32(r0);
}
#endif
void itrans4x4_c(const h264_short_block_t tblock, const h264_imgpel_macroblock_t mb_pred, h264_imgpel_macroblock_t mb_rec, int pos_x, int pos_y)
{
inverse4x4(tblock, (h264_short_block_row_t *)tblock,pos_y,pos_x);
sample_reconstruct(mb_rec, mb_pred, tblock, pos_y, pos_x, 255);
}
void ihadamard4x4(int block[4][4])
{
int i;
int tmp[16];
int *pTmp = tmp;
int p0,p1,p2,p3;
int t0,t1,t2,t3;
// Horizontal
for (i = 0; i < BLOCK_SIZE; i++)
{
t0 = block[i][0];
t1 = block[i][1];
t2 = block[i][2];
t3 = block[i][3];
p0 = t0 + t2;
p1 = t0 - t2;
p2 = t1 - t3;
p3 = t1 + t3;
*(pTmp++) = p0 + p3;
*(pTmp++) = p1 + p2;
*(pTmp++) = p1 - p2;
*(pTmp++) = p0 - p3;
}
// Vertical
for (i = 0; i < BLOCK_SIZE; i++)
{
pTmp = tmp + i;
t0 = *pTmp;
t1 = *(pTmp += BLOCK_SIZE);
t2 = *(pTmp += BLOCK_SIZE);
t3 = *(pTmp += BLOCK_SIZE);
p0 = t0 + t2;
p1 = t0 - t2;
p2 = t1 - t3;
p3 = t1 + t3;
block[0][i] = p0 + p3;
block[1][i] = p1 + p2;
block[2][i] = p1 - p2;
block[3][i] = p0 - p3;
}
}
void ihadamard4x2(int **tblock, int **block)
{
int i;
int tmp[8];
int *pTmp = tmp;
int p0,p1,p2,p3;
int t0,t1,t2,t3;
// Horizontal
*(pTmp++) = tblock[0][0] + tblock[1][0];
*(pTmp++) = tblock[0][1] + tblock[1][1];
*(pTmp++) = tblock[0][2] + tblock[1][2];
*(pTmp++) = tblock[0][3] + tblock[1][3];
*(pTmp++) = tblock[0][0] - tblock[1][0];
*(pTmp++) = tblock[0][1] - tblock[1][1];
*(pTmp++) = tblock[0][2] - tblock[1][2];
*(pTmp ) = tblock[0][3] - tblock[1][3];
// Vertical
pTmp = tmp;
for (i = 0; i < 2; i++)
{
p0 = *(pTmp++);
p1 = *(pTmp++);
p2 = *(pTmp++);
p3 = *(pTmp++);
t0 = p0 + p2;
t1 = p0 - p2;
t2 = p1 - p3;
t3 = p1 + p3;
// coefficients (transposed)
block[0][i] = t0 + t3;
block[1][i] = t1 + t2;
block[2][i] = t1 - t2;
block[3][i] = t0 - t3;
}
}
//following functions perform 8 additions, 8 assignments. Should be a bit faster
void ihadamard2x2(int tblock[4], int block[4])
{
int t0,t1,t2,t3;
t0 = tblock[0] + tblock[1];
t1 = tblock[0] - tblock[1];
t2 = tblock[2] + tblock[3];
t3 = tblock[2] - tblock[3];
block[0] = (t0 + t2);
block[1] = (t1 + t3);
block[2] = (t0 - t2);
block[3] = (t1 - t3);
}

View file

@ -0,0 +1,67 @@
/*!
*************************************************************************************
* \file win32.c
*
* \brief
* Platform dependent code
*
* \author
* Main contributors (see contributors.h for copyright, address and affiliation details)
* - Karsten Suehring <suehring@hhi.de>
*************************************************************************************
*/
#include "global.h"
#ifdef _WIN32
static LARGE_INTEGER freq;
void gettime(TIME_T* time)
{
QueryPerformanceCounter(time);
}
int64 timediff(TIME_T* start, TIME_T* end)
{
return (int64)((end->QuadPart - start->QuadPart));
}
int64 timenorm(int64 cur_time)
{
static int first = 1;
if(first)
{
QueryPerformanceFrequency(&freq);
first = 0;
}
return (int64)(cur_time * 1000 /(freq.QuadPart));
}
#else
static struct timezone tz;
void gettime(TIME_T* time)
{
gettimeofday(time, &tz);
}
int64 timediff(TIME_T* start, TIME_T* end)
{
int t1, t2;
t1 = end->tv_sec - start->tv_sec;
t2 = end->tv_usec - start->tv_usec;
return (int64) t2 + (int64) t1 * (int64) 1000000;
}
int64 timenorm(int64 cur_time)
{
return (int64)(cur_time / (int64) 1000);
}
#endif