Initial community commit
This commit is contained in:
parent
537bcbc862
commit
fc06254474
16440 changed files with 4239995 additions and 2 deletions
66
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/celt_lpc_sse.h
vendored
Normal file
66
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/celt_lpc_sse.h
vendored
Normal file
|
@ -0,0 +1,66 @@
|
|||
/* Copyright (c) 2014, Cisco Systems, INC
|
||||
Written by XiangMingZhu WeiZhou MinPeng YanWang
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CELT_LPC_SSE_H
|
||||
#define CELT_LPC_SSE_H
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
|
||||
#define OVERRIDE_CELT_FIR
|
||||
|
||||
void celt_fir_sse4_1(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *num,
|
||||
opus_val16 *y,
|
||||
int N,
|
||||
int ord,
|
||||
int arch);
|
||||
|
||||
#if defined(OPUS_X86_PRESUME_SSE4_1)
|
||||
#define celt_fir(x, num, y, N, ord, arch) \
|
||||
((void)arch, celt_fir_sse4_1(x, num, y, N, ord, arch))
|
||||
|
||||
#else
|
||||
|
||||
extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *num,
|
||||
opus_val16 *y,
|
||||
int N,
|
||||
int ord,
|
||||
int arch);
|
||||
|
||||
# define celt_fir(x, num, y, N, ord, arch) \
|
||||
((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, arch))
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
89
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/celt_lpc_sse4_1.c
vendored
Normal file
89
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/celt_lpc_sse4_1.c
vendored
Normal file
|
@ -0,0 +1,89 @@
|
|||
/* Copyright (c) 2014, Cisco Systems, INC
|
||||
Written by XiangMingZhu WeiZhou MinPeng YanWang
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
#include "celt_lpc.h"
|
||||
#include "stack_alloc.h"
|
||||
#include "mathops.h"
|
||||
#include "pitch.h"
|
||||
#include "x86cpu.h"
|
||||
|
||||
#if defined(FIXED_POINT)
|
||||
|
||||
void celt_fir_sse4_1(const opus_val16 *x,
|
||||
const opus_val16 *num,
|
||||
opus_val16 *y,
|
||||
int N,
|
||||
int ord,
|
||||
int arch)
|
||||
{
|
||||
int i,j;
|
||||
VARDECL(opus_val16, rnum);
|
||||
|
||||
__m128i vecNoA;
|
||||
opus_int32 noA ;
|
||||
SAVE_STACK;
|
||||
|
||||
ALLOC(rnum, ord, opus_val16);
|
||||
for(i=0;i<ord;i++)
|
||||
rnum[i] = num[ord-i-1];
|
||||
noA = EXTEND32(1) << SIG_SHIFT >> 1;
|
||||
vecNoA = _mm_set_epi32(noA, noA, noA, noA);
|
||||
|
||||
for (i=0;i<N-3;i+=4)
|
||||
{
|
||||
opus_val32 sums[4] = {0};
|
||||
__m128i vecSum, vecX;
|
||||
|
||||
xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
|
||||
|
||||
vecSum = _mm_loadu_si128((__m128i *)sums);
|
||||
vecSum = _mm_add_epi32(vecSum, vecNoA);
|
||||
vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
|
||||
vecX = OP_CVTEPI16_EPI32_M64(x + i);
|
||||
vecSum = _mm_add_epi32(vecSum, vecX);
|
||||
vecSum = _mm_packs_epi32(vecSum, vecSum);
|
||||
_mm_storel_epi64((__m128i *)(y + i), vecSum);
|
||||
}
|
||||
for (;i<N;i++)
|
||||
{
|
||||
opus_val32 sum = 0;
|
||||
for (j=0;j<ord;j++)
|
||||
sum = MAC16_16(sum, rnum[j], x[i+j-ord]);
|
||||
y[i] = SATURATE16(ADD32(EXTEND32(x[i]), PSHR32(sum, SIG_SHIFT)));
|
||||
}
|
||||
|
||||
RESTORE_STACK;
|
||||
}
|
||||
|
||||
#endif
|
185
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/pitch_sse.c
vendored
Normal file
185
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/pitch_sse.c
vendored
Normal file
|
@ -0,0 +1,185 @@
|
|||
/* Copyright (c) 2014, Cisco Systems, INC
|
||||
Written by XiangMingZhu WeiZhou MinPeng YanWang
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "macros.h"
|
||||
#include "celt_lpc.h"
|
||||
#include "stack_alloc.h"
|
||||
#include "mathops.h"
|
||||
#include "pitch.h"
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include "arch.h"
|
||||
|
||||
void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
|
||||
{
|
||||
int j;
|
||||
__m128 xsum1, xsum2;
|
||||
xsum1 = _mm_loadu_ps(sum);
|
||||
xsum2 = _mm_setzero_ps();
|
||||
|
||||
for (j = 0; j < len-3; j += 4)
|
||||
{
|
||||
__m128 x0 = _mm_loadu_ps(x+j);
|
||||
__m128 yj = _mm_loadu_ps(y+j);
|
||||
__m128 y3 = _mm_loadu_ps(y+j+3);
|
||||
|
||||
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj));
|
||||
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55),
|
||||
_mm_shuffle_ps(yj,y3,0x49)));
|
||||
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa),
|
||||
_mm_shuffle_ps(yj,y3,0x9e)));
|
||||
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3));
|
||||
}
|
||||
if (j < len)
|
||||
{
|
||||
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
|
||||
if (++j < len)
|
||||
{
|
||||
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
|
||||
if (++j < len)
|
||||
{
|
||||
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
|
||||
}
|
||||
}
|
||||
}
|
||||
_mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
|
||||
}
|
||||
|
||||
|
||||
void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
|
||||
int N, opus_val32 *xy1, opus_val32 *xy2)
|
||||
{
|
||||
int i;
|
||||
__m128 xsum1, xsum2;
|
||||
xsum1 = _mm_setzero_ps();
|
||||
xsum2 = _mm_setzero_ps();
|
||||
for (i=0;i<N-3;i+=4)
|
||||
{
|
||||
__m128 xi = _mm_loadu_ps(x+i);
|
||||
__m128 y1i = _mm_loadu_ps(y01+i);
|
||||
__m128 y2i = _mm_loadu_ps(y02+i);
|
||||
xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i));
|
||||
xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i));
|
||||
}
|
||||
/* Horizontal sum */
|
||||
xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1));
|
||||
xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55));
|
||||
_mm_store_ss(xy1, xsum1);
|
||||
xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
|
||||
xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
|
||||
_mm_store_ss(xy2, xsum2);
|
||||
for (;i<N;i++)
|
||||
{
|
||||
*xy1 = MAC16_16(*xy1, x[i], y01[i]);
|
||||
*xy2 = MAC16_16(*xy2, x[i], y02[i]);
|
||||
}
|
||||
}
|
||||
|
||||
opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opus_val16 *y,
|
||||
int N)
|
||||
{
|
||||
int i;
|
||||
float xy;
|
||||
__m128 sum;
|
||||
sum = _mm_setzero_ps();
|
||||
/* FIXME: We should probably go 8-way and use 2 sums. */
|
||||
for (i=0;i<N-3;i+=4)
|
||||
{
|
||||
__m128 xi = _mm_loadu_ps(x+i);
|
||||
__m128 yi = _mm_loadu_ps(y+i);
|
||||
sum = _mm_add_ps(sum,_mm_mul_ps(xi, yi));
|
||||
}
|
||||
/* Horizontal sum */
|
||||
sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
|
||||
sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
|
||||
_mm_store_ss(&xy, sum);
|
||||
for (;i<N;i++)
|
||||
{
|
||||
xy = MAC16_16(xy, x[i], y[i]);
|
||||
}
|
||||
return xy;
|
||||
}
|
||||
|
||||
void comb_filter_const_sse(opus_val32 *y, opus_val32 *x, int T, int N,
|
||||
opus_val16 g10, opus_val16 g11, opus_val16 g12)
|
||||
{
|
||||
int i;
|
||||
__m128 x0v;
|
||||
__m128 g10v, g11v, g12v;
|
||||
g10v = _mm_load1_ps(&g10);
|
||||
g11v = _mm_load1_ps(&g11);
|
||||
g12v = _mm_load1_ps(&g12);
|
||||
x0v = _mm_loadu_ps(&x[-T-2]);
|
||||
for (i=0;i<N-3;i+=4)
|
||||
{
|
||||
__m128 yi, yi2, x1v, x2v, x3v, x4v;
|
||||
const opus_val32 *xp = &x[i-T-2];
|
||||
yi = _mm_loadu_ps(x+i);
|
||||
x4v = _mm_loadu_ps(xp+4);
|
||||
#if 0
|
||||
/* Slower version with all loads */
|
||||
x1v = _mm_loadu_ps(xp+1);
|
||||
x2v = _mm_loadu_ps(xp+2);
|
||||
x3v = _mm_loadu_ps(xp+3);
|
||||
#else
|
||||
x2v = _mm_shuffle_ps(x0v, x4v, 0x4e);
|
||||
x1v = _mm_shuffle_ps(x0v, x2v, 0x99);
|
||||
x3v = _mm_shuffle_ps(x2v, x4v, 0x99);
|
||||
#endif
|
||||
|
||||
yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v));
|
||||
#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */
|
||||
yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)));
|
||||
yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
|
||||
#else
|
||||
/* Use partial sums */
|
||||
yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)),
|
||||
_mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
|
||||
yi = _mm_add_ps(yi, yi2);
|
||||
#endif
|
||||
x0v=x4v;
|
||||
_mm_storeu_ps(y+i, yi);
|
||||
}
|
||||
#ifdef CUSTOM_MODES
|
||||
for (;i<N;i++)
|
||||
{
|
||||
y[i] = x[i]
|
||||
+ MULT16_32_Q15(g10,x[i-T])
|
||||
+ MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
|
||||
+ MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#endif
|
192
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/pitch_sse.h
vendored
Normal file
192
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/pitch_sse.h
vendored
Normal file
|
@ -0,0 +1,192 @@
|
|||
/* Copyright (c) 2013 Jean-Marc Valin and John Ridges
|
||||
Copyright (c) 2014, Cisco Systems, INC MingXiang WeiZhou MinPeng YanWang*/
|
||||
/**
|
||||
@file pitch_sse.h
|
||||
@brief Pitch analysis
|
||||
*/
|
||||
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef PITCH_SSE_H
|
||||
#define PITCH_SSE_H
|
||||
|
||||
#if defined(HAVE_CONFIG_H)
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
|
||||
void xcorr_kernel_sse4_1(
|
||||
const opus_int16 *x,
|
||||
const opus_int16 *y,
|
||||
opus_val32 sum[4],
|
||||
int len);
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
|
||||
void xcorr_kernel_sse(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *y,
|
||||
opus_val32 sum[4],
|
||||
int len);
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
|
||||
#define OVERRIDE_XCORR_KERNEL
|
||||
#define xcorr_kernel(x, y, sum, len, arch) \
|
||||
((void)arch, xcorr_kernel_sse4_1(x, y, sum, len))
|
||||
|
||||
#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
|
||||
#define OVERRIDE_XCORR_KERNEL
|
||||
#define xcorr_kernel(x, y, sum, len, arch) \
|
||||
((void)arch, xcorr_kernel_sse(x, y, sum, len))
|
||||
|
||||
#elif (defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)) || (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
|
||||
|
||||
extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *y,
|
||||
opus_val32 sum[4],
|
||||
int len);
|
||||
|
||||
#define OVERRIDE_XCORR_KERNEL
|
||||
#define xcorr_kernel(x, y, sum, len, arch) \
|
||||
((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
|
||||
opus_val32 celt_inner_prod_sse4_1(
|
||||
const opus_int16 *x,
|
||||
const opus_int16 *y,
|
||||
int N);
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT)
|
||||
opus_val32 celt_inner_prod_sse2(
|
||||
const opus_int16 *x,
|
||||
const opus_int16 *y,
|
||||
int N);
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
|
||||
opus_val32 celt_inner_prod_sse(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *y,
|
||||
int N);
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
|
||||
#define OVERRIDE_CELT_INNER_PROD
|
||||
#define celt_inner_prod(x, y, N, arch) \
|
||||
((void)arch, celt_inner_prod_sse4_1(x, y, N))
|
||||
|
||||
#elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
|
||||
#define OVERRIDE_CELT_INNER_PROD
|
||||
#define celt_inner_prod(x, y, N, arch) \
|
||||
((void)arch, celt_inner_prod_sse2(x, y, N))
|
||||
|
||||
#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
|
||||
#define OVERRIDE_CELT_INNER_PROD
|
||||
#define celt_inner_prod(x, y, N, arch) \
|
||||
((void)arch, celt_inner_prod_sse(x, y, N))
|
||||
|
||||
|
||||
#elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
|
||||
|
||||
extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *y,
|
||||
int N);
|
||||
|
||||
#define OVERRIDE_CELT_INNER_PROD
|
||||
#define celt_inner_prod(x, y, N, arch) \
|
||||
((*CELT_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y, N))
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
|
||||
|
||||
#define OVERRIDE_DUAL_INNER_PROD
|
||||
#define OVERRIDE_COMB_FILTER_CONST
|
||||
|
||||
#undef dual_inner_prod
|
||||
#undef comb_filter_const
|
||||
|
||||
void dual_inner_prod_sse(const opus_val16 *x,
|
||||
const opus_val16 *y01,
|
||||
const opus_val16 *y02,
|
||||
int N,
|
||||
opus_val32 *xy1,
|
||||
opus_val32 *xy2);
|
||||
|
||||
void comb_filter_const_sse(opus_val32 *y,
|
||||
opus_val32 *x,
|
||||
int T,
|
||||
int N,
|
||||
opus_val16 g10,
|
||||
opus_val16 g11,
|
||||
opus_val16 g12);
|
||||
|
||||
|
||||
#if defined(OPUS_X86_PRESUME_SSE)
|
||||
# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
|
||||
((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2))
|
||||
|
||||
# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
|
||||
((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12))
|
||||
#else
|
||||
|
||||
extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *y01,
|
||||
const opus_val16 *y02,
|
||||
int N,
|
||||
opus_val32 *xy1,
|
||||
opus_val32 *xy2);
|
||||
|
||||
#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
|
||||
((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
|
||||
|
||||
extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
|
||||
opus_val32 *y,
|
||||
opus_val32 *x,
|
||||
int T,
|
||||
int N,
|
||||
opus_val16 g10,
|
||||
opus_val16 g11,
|
||||
opus_val16 g12);
|
||||
|
||||
#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
|
||||
((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12))
|
||||
|
||||
#define NON_STATIC_COMB_FILTER_CONST_C
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
95
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/pitch_sse2.c
vendored
Normal file
95
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/pitch_sse2.c
vendored
Normal file
|
@ -0,0 +1,95 @@
|
|||
/* Copyright (c) 2014, Cisco Systems, INC
|
||||
Written by XiangMingZhu WeiZhou MinPeng YanWang
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "macros.h"
|
||||
#include "celt_lpc.h"
|
||||
#include "stack_alloc.h"
|
||||
#include "mathops.h"
|
||||
#include "pitch.h"
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT)
|
||||
opus_val32 celt_inner_prod_sse2(const opus_val16 *x, const opus_val16 *y,
|
||||
int N)
|
||||
{
|
||||
opus_int i, dataSize16;
|
||||
opus_int32 sum;
|
||||
|
||||
__m128i inVec1_76543210, inVec1_FEDCBA98, acc1;
|
||||
__m128i inVec2_76543210, inVec2_FEDCBA98, acc2;
|
||||
|
||||
sum = 0;
|
||||
dataSize16 = N & ~15;
|
||||
|
||||
acc1 = _mm_setzero_si128();
|
||||
acc2 = _mm_setzero_si128();
|
||||
|
||||
for (i=0;i<dataSize16;i+=16)
|
||||
{
|
||||
inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
|
||||
inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
|
||||
|
||||
inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8]));
|
||||
inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8]));
|
||||
|
||||
inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
|
||||
inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98);
|
||||
|
||||
acc1 = _mm_add_epi32(acc1, inVec1_76543210);
|
||||
acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98);
|
||||
}
|
||||
|
||||
acc1 = _mm_add_epi32( acc1, acc2 );
|
||||
|
||||
if (N - i >= 8)
|
||||
{
|
||||
inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
|
||||
inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
|
||||
|
||||
inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
|
||||
|
||||
acc1 = _mm_add_epi32(acc1, inVec1_76543210);
|
||||
i += 8;
|
||||
}
|
||||
|
||||
acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64( acc1, acc1));
|
||||
acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16( acc1, 0x0E));
|
||||
sum += _mm_cvtsi128_si32(acc1);
|
||||
|
||||
for (;i<N;i++) {
|
||||
sum = silk_SMLABB(sum, x[i], y[i]);
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
#endif
|
195
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/pitch_sse4_1.c
vendored
Normal file
195
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/pitch_sse4_1.c
vendored
Normal file
|
@ -0,0 +1,195 @@
|
|||
/* Copyright (c) 2014, Cisco Systems, INC
|
||||
Written by XiangMingZhu WeiZhou MinPeng YanWang
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "macros.h"
|
||||
#include "celt_lpc.h"
|
||||
#include "stack_alloc.h"
|
||||
#include "mathops.h"
|
||||
#include "pitch.h"
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
|
||||
#include <smmintrin.h>
|
||||
#include "x86cpu.h"
|
||||
|
||||
opus_val32 celt_inner_prod_sse4_1(const opus_val16 *x, const opus_val16 *y,
|
||||
int N)
|
||||
{
|
||||
opus_int i, dataSize16;
|
||||
opus_int32 sum;
|
||||
__m128i inVec1_76543210, inVec1_FEDCBA98, acc1;
|
||||
__m128i inVec2_76543210, inVec2_FEDCBA98, acc2;
|
||||
__m128i inVec1_3210, inVec2_3210;
|
||||
|
||||
sum = 0;
|
||||
dataSize16 = N & ~15;
|
||||
|
||||
acc1 = _mm_setzero_si128();
|
||||
acc2 = _mm_setzero_si128();
|
||||
|
||||
for (i=0;i<dataSize16;i+=16) {
|
||||
inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
|
||||
inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
|
||||
|
||||
inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8]));
|
||||
inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8]));
|
||||
|
||||
inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
|
||||
inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98);
|
||||
|
||||
acc1 = _mm_add_epi32(acc1, inVec1_76543210);
|
||||
acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98);
|
||||
}
|
||||
|
||||
acc1 = _mm_add_epi32(acc1, acc2);
|
||||
|
||||
if (N - i >= 8)
|
||||
{
|
||||
inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
|
||||
inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
|
||||
|
||||
inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
|
||||
|
||||
acc1 = _mm_add_epi32(acc1, inVec1_76543210);
|
||||
i += 8;
|
||||
}
|
||||
|
||||
if (N - i >= 4)
|
||||
{
|
||||
inVec1_3210 = OP_CVTEPI16_EPI32_M64(&x[i + 0]);
|
||||
inVec2_3210 = OP_CVTEPI16_EPI32_M64(&y[i + 0]);
|
||||
|
||||
inVec1_3210 = _mm_mullo_epi32(inVec1_3210, inVec2_3210);
|
||||
|
||||
acc1 = _mm_add_epi32(acc1, inVec1_3210);
|
||||
i += 4;
|
||||
}
|
||||
|
||||
acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64(acc1, acc1));
|
||||
acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16(acc1, 0x0E));
|
||||
|
||||
sum += _mm_cvtsi128_si32(acc1);
|
||||
|
||||
for (;i<N;i++)
|
||||
{
|
||||
sum = silk_SMLABB(sum, x[i], y[i]);
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[ 4 ], int len)
|
||||
{
|
||||
int j;
|
||||
|
||||
__m128i vecX, vecX0, vecX1, vecX2, vecX3;
|
||||
__m128i vecY0, vecY1, vecY2, vecY3;
|
||||
__m128i sum0, sum1, sum2, sum3, vecSum;
|
||||
__m128i initSum;
|
||||
|
||||
celt_assert(len >= 3);
|
||||
|
||||
sum0 = _mm_setzero_si128();
|
||||
sum1 = _mm_setzero_si128();
|
||||
sum2 = _mm_setzero_si128();
|
||||
sum3 = _mm_setzero_si128();
|
||||
|
||||
for (j=0;j<(len-7);j+=8)
|
||||
{
|
||||
vecX = _mm_loadu_si128((__m128i *)(&x[j + 0]));
|
||||
vecY0 = _mm_loadu_si128((__m128i *)(&y[j + 0]));
|
||||
vecY1 = _mm_loadu_si128((__m128i *)(&y[j + 1]));
|
||||
vecY2 = _mm_loadu_si128((__m128i *)(&y[j + 2]));
|
||||
vecY3 = _mm_loadu_si128((__m128i *)(&y[j + 3]));
|
||||
|
||||
sum0 = _mm_add_epi32(sum0, _mm_madd_epi16(vecX, vecY0));
|
||||
sum1 = _mm_add_epi32(sum1, _mm_madd_epi16(vecX, vecY1));
|
||||
sum2 = _mm_add_epi32(sum2, _mm_madd_epi16(vecX, vecY2));
|
||||
sum3 = _mm_add_epi32(sum3, _mm_madd_epi16(vecX, vecY3));
|
||||
}
|
||||
|
||||
sum0 = _mm_add_epi32(sum0, _mm_unpackhi_epi64( sum0, sum0));
|
||||
sum0 = _mm_add_epi32(sum0, _mm_shufflelo_epi16( sum0, 0x0E));
|
||||
|
||||
sum1 = _mm_add_epi32(sum1, _mm_unpackhi_epi64( sum1, sum1));
|
||||
sum1 = _mm_add_epi32(sum1, _mm_shufflelo_epi16( sum1, 0x0E));
|
||||
|
||||
sum2 = _mm_add_epi32(sum2, _mm_unpackhi_epi64( sum2, sum2));
|
||||
sum2 = _mm_add_epi32(sum2, _mm_shufflelo_epi16( sum2, 0x0E));
|
||||
|
||||
sum3 = _mm_add_epi32(sum3, _mm_unpackhi_epi64( sum3, sum3));
|
||||
sum3 = _mm_add_epi32(sum3, _mm_shufflelo_epi16( sum3, 0x0E));
|
||||
|
||||
vecSum = _mm_unpacklo_epi64(_mm_unpacklo_epi32(sum0, sum1),
|
||||
_mm_unpacklo_epi32(sum2, sum3));
|
||||
|
||||
for (;j<(len-3);j+=4)
|
||||
{
|
||||
vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]);
|
||||
vecX0 = _mm_shuffle_epi32(vecX, 0x00);
|
||||
vecX1 = _mm_shuffle_epi32(vecX, 0x55);
|
||||
vecX2 = _mm_shuffle_epi32(vecX, 0xaa);
|
||||
vecX3 = _mm_shuffle_epi32(vecX, 0xff);
|
||||
|
||||
vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
|
||||
vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]);
|
||||
vecY2 = OP_CVTEPI16_EPI32_M64(&y[j + 2]);
|
||||
vecY3 = OP_CVTEPI16_EPI32_M64(&y[j + 3]);
|
||||
|
||||
sum0 = _mm_mullo_epi32(vecX0, vecY0);
|
||||
sum1 = _mm_mullo_epi32(vecX1, vecY1);
|
||||
sum2 = _mm_mullo_epi32(vecX2, vecY2);
|
||||
sum3 = _mm_mullo_epi32(vecX3, vecY3);
|
||||
|
||||
sum0 = _mm_add_epi32(sum0, sum1);
|
||||
sum2 = _mm_add_epi32(sum2, sum3);
|
||||
vecSum = _mm_add_epi32(vecSum, sum0);
|
||||
vecSum = _mm_add_epi32(vecSum, sum2);
|
||||
}
|
||||
|
||||
for (;j<len;j++)
|
||||
{
|
||||
vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]);
|
||||
vecX0 = _mm_shuffle_epi32(vecX, 0x00);
|
||||
|
||||
vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
|
||||
|
||||
sum0 = _mm_mullo_epi32(vecX0, vecY0);
|
||||
vecSum = _mm_add_epi32(vecSum, sum0);
|
||||
}
|
||||
|
||||
initSum = _mm_loadu_si128((__m128i *)(&sum[0]));
|
||||
initSum = _mm_add_epi32(initSum, vecSum);
|
||||
_mm_storeu_si128((__m128i *)sum, initSum);
|
||||
}
|
||||
#endif
|
50
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/vq_sse.h
vendored
Normal file
50
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/vq_sse.h
vendored
Normal file
|
@ -0,0 +1,50 @@
|
|||
/* Copyright (c) 2016 Jean-Marc Valin */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef VQ_SSE_H
|
||||
#define VQ_SSE_H
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
|
||||
#define OVERRIDE_OP_PVQ_SEARCH
|
||||
|
||||
opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch);
|
||||
|
||||
#if defined(OPUS_X86_PRESUME_SSE2)
|
||||
#define op_pvq_search(x, iy, K, N, arch) \
|
||||
(op_pvq_search_sse2(x, iy, K, N, arch))
|
||||
|
||||
#else
|
||||
|
||||
extern opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
|
||||
celt_norm *_X, int *iy, int K, int N, int arch);
|
||||
|
||||
# define op_pvq_search(X, iy, K, N, arch) \
|
||||
((*OP_PVQ_SEARCH_IMPL[(arch) & OPUS_ARCHMASK])(X, iy, K, N, arch))
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
218
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/vq_sse2.c
vendored
Normal file
218
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/vq_sse2.c
vendored
Normal file
|
@ -0,0 +1,218 @@
|
|||
/* Copyright (c) 2007-2008 CSIRO
|
||||
Copyright (c) 2007-2009 Xiph.Org Foundation
|
||||
Copyright (c) 2007-2016 Jean-Marc Valin */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include "celt_lpc.h"
|
||||
#include "stack_alloc.h"
|
||||
#include "mathops.h"
|
||||
#include "vq.h"
|
||||
#include "x86cpu.h"
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE2) /* OpenMPT */
|
||||
#ifndef FIXED_POINT
|
||||
|
||||
opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
|
||||
{
|
||||
int i, j;
|
||||
int pulsesLeft;
|
||||
float xy, yy;
|
||||
VARDECL(celt_norm, y);
|
||||
VARDECL(celt_norm, X);
|
||||
VARDECL(float, signy);
|
||||
__m128 signmask;
|
||||
__m128 sums;
|
||||
__m128i fours;
|
||||
SAVE_STACK;
|
||||
|
||||
(void)arch;
|
||||
/* All bits set to zero, except for the sign bit. */
|
||||
signmask = _mm_set_ps1(-0.f);
|
||||
fours = _mm_set_epi32(4, 4, 4, 4);
|
||||
ALLOC(y, N+3, celt_norm);
|
||||
ALLOC(X, N+3, celt_norm);
|
||||
ALLOC(signy, N+3, float);
|
||||
|
||||
OPUS_COPY(X, _X, N);
|
||||
X[N] = X[N+1] = X[N+2] = 0;
|
||||
sums = _mm_setzero_ps();
|
||||
for (j=0;j<N;j+=4)
|
||||
{
|
||||
__m128 x4, s4;
|
||||
x4 = _mm_loadu_ps(&X[j]);
|
||||
s4 = _mm_cmplt_ps(x4, _mm_setzero_ps());
|
||||
/* Get rid of the sign */
|
||||
x4 = _mm_andnot_ps(signmask, x4);
|
||||
sums = _mm_add_ps(sums, x4);
|
||||
/* Clear y and iy in case we don't do the projection. */
|
||||
_mm_storeu_ps(&y[j], _mm_setzero_ps());
|
||||
_mm_storeu_si128((__m128i*)&iy[j], _mm_setzero_si128());
|
||||
_mm_storeu_ps(&X[j], x4);
|
||||
_mm_storeu_ps(&signy[j], s4);
|
||||
}
|
||||
sums = _mm_add_ps(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||
sums = _mm_add_ps(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(2, 3, 0, 1)));
|
||||
|
||||
xy = yy = 0;
|
||||
|
||||
pulsesLeft = K;
|
||||
|
||||
/* Do a pre-search by projecting on the pyramid */
|
||||
if (K > (N>>1))
|
||||
{
|
||||
__m128i pulses_sum;
|
||||
__m128 yy4, xy4;
|
||||
__m128 rcp4;
|
||||
opus_val32 sum = _mm_cvtss_f32(sums);
|
||||
/* If X is too small, just replace it with a pulse at 0 */
|
||||
/* Prevents infinities and NaNs from causing too many pulses
|
||||
to be allocated. 64 is an approximation of infinity here. */
|
||||
if (!(sum > EPSILON && sum < 64))
|
||||
{
|
||||
X[0] = QCONST16(1.f,14);
|
||||
j=1; do
|
||||
X[j]=0;
|
||||
while (++j<N);
|
||||
sums = _mm_set_ps1(1.f);
|
||||
}
|
||||
/* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
|
||||
rcp4 = _mm_mul_ps(_mm_set_ps1((float)(K+.8)), _mm_rcp_ps(sums));
|
||||
xy4 = yy4 = _mm_setzero_ps();
|
||||
pulses_sum = _mm_setzero_si128();
|
||||
for (j=0;j<N;j+=4)
|
||||
{
|
||||
__m128 rx4, x4, y4;
|
||||
__m128i iy4;
|
||||
x4 = _mm_loadu_ps(&X[j]);
|
||||
rx4 = _mm_mul_ps(x4, rcp4);
|
||||
iy4 = _mm_cvttps_epi32(rx4);
|
||||
pulses_sum = _mm_add_epi32(pulses_sum, iy4);
|
||||
_mm_storeu_si128((__m128i*)&iy[j], iy4);
|
||||
y4 = _mm_cvtepi32_ps(iy4);
|
||||
xy4 = _mm_add_ps(xy4, _mm_mul_ps(x4, y4));
|
||||
yy4 = _mm_add_ps(yy4, _mm_mul_ps(y4, y4));
|
||||
/* double the y[] vector so we don't have to do it in the search loop. */
|
||||
_mm_storeu_ps(&y[j], _mm_add_ps(y4, y4));
|
||||
}
|
||||
pulses_sum = _mm_add_epi32(pulses_sum, _mm_shuffle_epi32(pulses_sum, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||
pulses_sum = _mm_add_epi32(pulses_sum, _mm_shuffle_epi32(pulses_sum, _MM_SHUFFLE(2, 3, 0, 1)));
|
||||
pulsesLeft -= _mm_cvtsi128_si32(pulses_sum);
|
||||
xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||
xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(2, 3, 0, 1)));
|
||||
xy = _mm_cvtss_f32(xy4);
|
||||
yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||
yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(2, 3, 0, 1)));
|
||||
yy = _mm_cvtss_f32(yy4);
|
||||
}
|
||||
X[N] = X[N+1] = X[N+2] = -100;
|
||||
y[N] = y[N+1] = y[N+2] = 100;
|
||||
celt_sig_assert(pulsesLeft>=0);
|
||||
|
||||
/* This should never happen, but just in case it does (e.g. on silence)
|
||||
we fill the first bin with pulses. */
|
||||
if (pulsesLeft > N+3)
|
||||
{
|
||||
opus_val16 tmp = (opus_val16)pulsesLeft;
|
||||
yy = MAC16_16(yy, tmp, tmp);
|
||||
yy = MAC16_16(yy, tmp, y[0]);
|
||||
iy[0] += pulsesLeft;
|
||||
pulsesLeft=0;
|
||||
}
|
||||
|
||||
for (i=0;i<pulsesLeft;i++)
|
||||
{
|
||||
int best_id;
|
||||
__m128 xy4, yy4;
|
||||
__m128 max, max2;
|
||||
__m128i count;
|
||||
__m128i pos;
|
||||
/* The squared magnitude term gets added anyway, so we might as well
|
||||
add it outside the loop */
|
||||
yy = ADD16(yy, 1);
|
||||
xy4 = _mm_load1_ps(&xy);
|
||||
yy4 = _mm_load1_ps(&yy);
|
||||
max = _mm_setzero_ps();
|
||||
pos = _mm_setzero_si128();
|
||||
count = _mm_set_epi32(3, 2, 1, 0);
|
||||
for (j=0;j<N;j+=4)
|
||||
{
|
||||
__m128 x4, y4, r4;
|
||||
x4 = _mm_loadu_ps(&X[j]);
|
||||
y4 = _mm_loadu_ps(&y[j]);
|
||||
x4 = _mm_add_ps(x4, xy4);
|
||||
y4 = _mm_add_ps(y4, yy4);
|
||||
y4 = _mm_rsqrt_ps(y4);
|
||||
r4 = _mm_mul_ps(x4, y4);
|
||||
/* Update the index of the max. */
|
||||
pos = _mm_max_epi16(pos, _mm_and_si128(count, _mm_castps_si128(_mm_cmpgt_ps(r4, max))));
|
||||
/* Update the max. */
|
||||
max = _mm_max_ps(max, r4);
|
||||
/* Update the indices (+4) */
|
||||
count = _mm_add_epi32(count, fours);
|
||||
}
|
||||
/* Horizontal max */
|
||||
max2 = _mm_max_ps(max, _mm_shuffle_ps(max, max, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||
max2 = _mm_max_ps(max2, _mm_shuffle_ps(max2, max2, _MM_SHUFFLE(2, 3, 0, 1)));
|
||||
/* Now that max2 contains the max at all positions, look at which value(s) of the
|
||||
partial max is equal to the global max. */
|
||||
pos = _mm_and_si128(pos, _mm_castps_si128(_mm_cmpeq_ps(max, max2)));
|
||||
pos = _mm_max_epi16(pos, _mm_unpackhi_epi64(pos, pos));
|
||||
pos = _mm_max_epi16(pos, _mm_shufflelo_epi16(pos, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||
best_id = _mm_cvtsi128_si32(pos);
|
||||
|
||||
/* Updating the sums of the new pulse(s) */
|
||||
xy = ADD32(xy, EXTEND32(X[best_id]));
|
||||
/* We're multiplying y[j] by two so we don't have to do it here */
|
||||
yy = ADD16(yy, y[best_id]);
|
||||
|
||||
/* Only now that we've made the final choice, update y/iy */
|
||||
/* Multiplying y[j] by 2 so we don't have to do it everywhere else */
|
||||
y[best_id] += 2;
|
||||
iy[best_id]++;
|
||||
}
|
||||
|
||||
/* Put the original sign back */
|
||||
for (j=0;j<N;j+=4)
|
||||
{
|
||||
__m128i y4;
|
||||
__m128i s4;
|
||||
y4 = _mm_loadu_si128((__m128i*)&iy[j]);
|
||||
s4 = _mm_castps_si128(_mm_loadu_ps(&signy[j]));
|
||||
y4 = _mm_xor_si128(_mm_add_epi32(y4, s4), s4);
|
||||
_mm_storeu_si128((__m128i*)&iy[j], y4);
|
||||
}
|
||||
RESTORE_STACK;
|
||||
return yy;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* OpenMPT */
|
167
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/x86_celt_map.c
vendored
Normal file
167
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/x86_celt_map.c
vendored
Normal file
|
@ -0,0 +1,167 @@
|
|||
/* Copyright (c) 2014, Cisco Systems, INC
|
||||
Written by XiangMingZhu WeiZhou MinPeng YanWang
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if defined(HAVE_CONFIG_H)
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "x86/x86cpu.h"
|
||||
#include "celt_lpc.h"
|
||||
#include "pitch.h"
|
||||
#include "pitch_sse.h"
|
||||
#include "vq.h"
|
||||
|
||||
#if defined(OPUS_HAVE_RTCD)
|
||||
|
||||
# if defined(FIXED_POINT)
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)
|
||||
|
||||
void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *num,
|
||||
opus_val16 *y,
|
||||
int N,
|
||||
int ord,
|
||||
int arch
|
||||
) = {
|
||||
celt_fir_c, /* non-sse */
|
||||
celt_fir_c,
|
||||
celt_fir_c,
|
||||
MAY_HAVE_SSE4_1(celt_fir), /* sse4.1 */
|
||||
MAY_HAVE_SSE4_1(celt_fir) /* avx */
|
||||
};
|
||||
|
||||
void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *y,
|
||||
opus_val32 sum[4],
|
||||
int len
|
||||
) = {
|
||||
xcorr_kernel_c, /* non-sse */
|
||||
xcorr_kernel_c,
|
||||
xcorr_kernel_c,
|
||||
MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1 */
|
||||
MAY_HAVE_SSE4_1(xcorr_kernel) /* avx */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
|
||||
(!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
|
||||
|
||||
opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *y,
|
||||
int N
|
||||
) = {
|
||||
celt_inner_prod_c, /* non-sse */
|
||||
celt_inner_prod_c,
|
||||
MAY_HAVE_SSE2(celt_inner_prod),
|
||||
MAY_HAVE_SSE4_1(celt_inner_prod), /* sse4.1 */
|
||||
MAY_HAVE_SSE4_1(celt_inner_prod) /* avx */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
# else
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)
|
||||
|
||||
void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *y,
|
||||
opus_val32 sum[4],
|
||||
int len
|
||||
) = {
|
||||
xcorr_kernel_c, /* non-sse */
|
||||
MAY_HAVE_SSE(xcorr_kernel),
|
||||
MAY_HAVE_SSE(xcorr_kernel),
|
||||
MAY_HAVE_SSE(xcorr_kernel),
|
||||
MAY_HAVE_SSE(xcorr_kernel)
|
||||
};
|
||||
|
||||
opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *y,
|
||||
int N
|
||||
) = {
|
||||
celt_inner_prod_c, /* non-sse */
|
||||
MAY_HAVE_SSE(celt_inner_prod),
|
||||
MAY_HAVE_SSE(celt_inner_prod),
|
||||
MAY_HAVE_SSE(celt_inner_prod),
|
||||
MAY_HAVE_SSE(celt_inner_prod)
|
||||
};
|
||||
|
||||
void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *y01,
|
||||
const opus_val16 *y02,
|
||||
int N,
|
||||
opus_val32 *xy1,
|
||||
opus_val32 *xy2
|
||||
) = {
|
||||
dual_inner_prod_c, /* non-sse */
|
||||
MAY_HAVE_SSE(dual_inner_prod),
|
||||
MAY_HAVE_SSE(dual_inner_prod),
|
||||
MAY_HAVE_SSE(dual_inner_prod),
|
||||
MAY_HAVE_SSE(dual_inner_prod)
|
||||
};
|
||||
|
||||
void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
|
||||
opus_val32 *y,
|
||||
opus_val32 *x,
|
||||
int T,
|
||||
int N,
|
||||
opus_val16 g10,
|
||||
opus_val16 g11,
|
||||
opus_val16 g12
|
||||
) = {
|
||||
comb_filter_const_c, /* non-sse */
|
||||
MAY_HAVE_SSE(comb_filter_const),
|
||||
MAY_HAVE_SSE(comb_filter_const),
|
||||
MAY_HAVE_SSE(comb_filter_const),
|
||||
MAY_HAVE_SSE(comb_filter_const)
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)
|
||||
opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
|
||||
celt_norm *_X, int *iy, int K, int N, int arch
|
||||
) = {
|
||||
op_pvq_search_c, /* non-sse */
|
||||
op_pvq_search_c,
|
||||
MAY_HAVE_SSE2(op_pvq_search),
|
||||
MAY_HAVE_SSE2(op_pvq_search),
|
||||
MAY_HAVE_SSE2(op_pvq_search)
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
157
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/x86cpu.c
vendored
Normal file
157
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/x86cpu.c
vendored
Normal file
|
@ -0,0 +1,157 @@
|
|||
/* Copyright (c) 2014, Cisco Systems, INC
|
||||
Written by XiangMingZhu WeiZhou MinPeng YanWang
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "cpu_support.h"
|
||||
#include "macros.h"
|
||||
#include "main.h"
|
||||
#include "pitch.h"
|
||||
#include "x86cpu.h"
|
||||
|
||||
#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
|
||||
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
#include <intrin.h>
|
||||
static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
|
||||
{
|
||||
__cpuid((int*)CPUInfo, InfoType);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#if defined(CPU_INFO_BY_C)
|
||||
#include <cpuid.h>
|
||||
#endif
|
||||
|
||||
static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
|
||||
{
|
||||
#if defined(CPU_INFO_BY_ASM)
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
/* %ebx is PIC register in 32-bit, so mustn't clobber it. */
|
||||
__asm__ __volatile__ (
|
||||
"xchg %%ebx, %1\n"
|
||||
"cpuid\n"
|
||||
"xchg %%ebx, %1\n":
|
||||
"=a" (CPUInfo[0]),
|
||||
"=r" (CPUInfo[1]),
|
||||
"=c" (CPUInfo[2]),
|
||||
"=d" (CPUInfo[3]) :
|
||||
"0" (InfoType)
|
||||
);
|
||||
#else
|
||||
__asm__ __volatile__ (
|
||||
"cpuid":
|
||||
"=a" (CPUInfo[0]),
|
||||
"=b" (CPUInfo[1]),
|
||||
"=c" (CPUInfo[2]),
|
||||
"=d" (CPUInfo[3]) :
|
||||
"0" (InfoType)
|
||||
);
|
||||
#endif
|
||||
#elif defined(CPU_INFO_BY_C)
|
||||
__get_cpuid(InfoType, &(CPUInfo[0]), &(CPUInfo[1]), &(CPUInfo[2]), &(CPUInfo[3]));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
typedef struct CPU_Feature{
|
||||
/* SIMD: 128-bit */
|
||||
int HW_SSE;
|
||||
int HW_SSE2;
|
||||
int HW_SSE41;
|
||||
/* SIMD: 256-bit */
|
||||
int HW_AVX;
|
||||
} CPU_Feature;
|
||||
|
||||
static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
|
||||
{
|
||||
unsigned int info[4] = {0};
|
||||
unsigned int nIds = 0;
|
||||
|
||||
cpuid(info, 0);
|
||||
nIds = info[0];
|
||||
|
||||
if (nIds >= 1){
|
||||
cpuid(info, 1);
|
||||
cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
|
||||
cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
|
||||
cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
|
||||
cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0;
|
||||
}
|
||||
else {
|
||||
cpu_feature->HW_SSE = 0;
|
||||
cpu_feature->HW_SSE2 = 0;
|
||||
cpu_feature->HW_SSE41 = 0;
|
||||
cpu_feature->HW_AVX = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int opus_select_arch(void)
|
||||
{
|
||||
CPU_Feature cpu_feature;
|
||||
int arch;
|
||||
|
||||
opus_cpu_feature_check(&cpu_feature);
|
||||
|
||||
arch = 0;
|
||||
if (!cpu_feature.HW_SSE)
|
||||
{
|
||||
return arch;
|
||||
}
|
||||
arch++;
|
||||
|
||||
if (!cpu_feature.HW_SSE2)
|
||||
{
|
||||
return arch;
|
||||
}
|
||||
arch++;
|
||||
|
||||
if (!cpu_feature.HW_SSE41)
|
||||
{
|
||||
return arch;
|
||||
}
|
||||
arch++;
|
||||
|
||||
if (!cpu_feature.HW_AVX)
|
||||
{
|
||||
return arch;
|
||||
}
|
||||
arch++;
|
||||
|
||||
return arch;
|
||||
}
|
||||
|
||||
#endif
|
95
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/x86cpu.h
vendored
Normal file
95
Src/external_dependencies/openmpt-trunk/include/opus/celt/x86/x86cpu.h
vendored
Normal file
|
@ -0,0 +1,95 @@
|
|||
/* Copyright (c) 2014, Cisco Systems, INC
|
||||
Written by XiangMingZhu WeiZhou MinPeng YanWang
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if !defined(X86CPU_H)
|
||||
# define X86CPU_H
|
||||
|
||||
# if defined(OPUS_X86_MAY_HAVE_SSE)
|
||||
# define MAY_HAVE_SSE(name) name ## _sse
|
||||
# else
|
||||
# define MAY_HAVE_SSE(name) name ## _c
|
||||
# endif
|
||||
|
||||
# if defined(OPUS_X86_MAY_HAVE_SSE2)
|
||||
# define MAY_HAVE_SSE2(name) name ## _sse2
|
||||
# else
|
||||
# define MAY_HAVE_SSE2(name) name ## _c
|
||||
# endif
|
||||
|
||||
# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
|
||||
# define MAY_HAVE_SSE4_1(name) name ## _sse4_1
|
||||
# else
|
||||
# define MAY_HAVE_SSE4_1(name) name ## _c
|
||||
# endif
|
||||
|
||||
# if defined(OPUS_X86_MAY_HAVE_AVX)
|
||||
# define MAY_HAVE_AVX(name) name ## _avx
|
||||
# else
|
||||
# define MAY_HAVE_AVX(name) name ## _c
|
||||
# endif
|
||||
|
||||
# if defined(OPUS_HAVE_RTCD)
|
||||
int opus_select_arch(void);
|
||||
# endif
|
||||
|
||||
/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32()
|
||||
or _mm_cvtepi16_epi32() when optimizations are disabled, even though the
|
||||
actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory
|
||||
reference, these require 16-byte alignment and load a full 16 bytes (instead
|
||||
of 4 or 8), possibly reading out of bounds.
|
||||
|
||||
We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or
|
||||
_mm_loadl_epi64(), which should have the same semantics as an m32 or m64
|
||||
reference in the PMOVSXWD instruction itself, but gcc is not smart enough to
|
||||
optimize this out when optimizations ARE enabled.
|
||||
|
||||
Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32
|
||||
(which is fair, since technically the compiler is always allowed to do the
|
||||
dereference before invoking the function implementing the intrinsic).
|
||||
However, it is smart enough to eliminate the extra MOVD instruction.
|
||||
For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out
|
||||
the extra MOVQ if it's specified explicitly */
|
||||
|
||||
# if defined(__clang__) || !defined(__OPTIMIZE__)
|
||||
# define OP_CVTEPI8_EPI32_M32(x) \
|
||||
(_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
|
||||
# else
|
||||
# define OP_CVTEPI8_EPI32_M32(x) \
|
||||
(_mm_cvtepi8_epi32(*(__m128i *)(x)))
|
||||
#endif
|
||||
|
||||
/* similar reasoning about the instruction sequence as in the 32-bit macro above,
|
||||
*/
|
||||
# if defined(__clang__) || !defined(__OPTIMIZE__)
|
||||
# define OP_CVTEPI16_EPI32_M64(x) \
|
||||
(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
|
||||
# else
|
||||
# define OP_CVTEPI16_EPI32_M64(x) \
|
||||
(_mm_cvtepi16_epi32(*(__m128i *)(x)))
|
||||
# endif
|
||||
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue