Initial community commit
This commit is contained in:
parent
537bcbc862
commit
fc06254474
16440 changed files with 4239995 additions and 2 deletions
57
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/LPC_inv_pred_gain_arm.h
vendored
Normal file
57
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/LPC_inv_pred_gain_arm.h
vendored
Normal file
|
@ -0,0 +1,57 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2017 Google Inc.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef SILK_LPC_INV_PRED_GAIN_ARM_H
|
||||
# define SILK_LPC_INV_PRED_GAIN_ARM_H
|
||||
|
||||
# include "celt/arm/armcpu.h"
|
||||
|
||||
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
|
||||
opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse prediction gain in energy domain, Q30 */
|
||||
const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
|
||||
const opus_int order /* I Prediction order */
|
||||
);
|
||||
|
||||
# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON)
|
||||
# define OVERRIDE_silk_LPC_inverse_pred_gain (1)
|
||||
# define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((void)(arch), PRESUME_NEON(silk_LPC_inverse_pred_gain)(A_Q12, order))
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# if !defined(OVERRIDE_silk_LPC_inverse_pred_gain)
|
||||
/*Is run-time CPU detection enabled on this platform?*/
|
||||
# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
|
||||
extern opus_int32 (*const SILK_LPC_INVERSE_PRED_GAIN_IMPL[OPUS_ARCHMASK+1])(const opus_int16 *A_Q12, const opus_int order);
|
||||
# define OVERRIDE_silk_LPC_inverse_pred_gain (1)
|
||||
# define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((*SILK_LPC_INVERSE_PRED_GAIN_IMPL[(arch)&OPUS_ARCHMASK])(A_Q12, order))
|
||||
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
|
||||
# define OVERRIDE_silk_LPC_inverse_pred_gain (1)
|
||||
# define silk_LPC_inverse_pred_gain(A_Q12, order, arch) ((void)(arch), silk_LPC_inverse_pred_gain_neon(A_Q12, order))
|
||||
# endif
|
||||
# endif
|
||||
|
||||
#endif /* end SILK_LPC_INV_PRED_GAIN_ARM_H */
|
280
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/LPC_inv_pred_gain_neon_intr.c
vendored
Normal file
280
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/LPC_inv_pred_gain_neon_intr.c
vendored
Normal file
|
@ -0,0 +1,280 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2017 Google Inc.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "SigProc_FIX.h"
|
||||
#include "define.h"
|
||||
|
||||
#define QA 24
|
||||
#define A_LIMIT SILK_FIX_CONST( 0.99975, QA )
|
||||
|
||||
#define MUL32_FRAC_Q(a32, b32, Q) ((opus_int32)(silk_RSHIFT_ROUND64(silk_SMULL(a32, b32), Q)))
|
||||
|
||||
/* The difficulty is how to judge a 64-bit signed integer tmp64 is 32-bit overflowed,
|
||||
* since NEON has no 64-bit min, max or comparison instructions.
|
||||
* A failed idea is to compare the results of vmovn(tmp64) and vqmovn(tmp64) whether they are equal or not.
|
||||
* However, this idea fails when the tmp64 is something like 0xFFFFFFF980000000.
|
||||
* Here we know that mult2Q >= 1, so the highest bit (bit 63, sign bit) of tmp64 must equal to bit 62.
|
||||
* tmp64 was shifted left by 1 and we got tmp64'. If high_half(tmp64') != 0 and high_half(tmp64') != -1,
|
||||
* then we know that bit 31 to bit 63 of tmp64 can not all be the sign bit, and therefore tmp64 is 32-bit overflowed.
|
||||
* That is, we judge if tmp64' > 0x00000000FFFFFFFF, or tmp64' <= 0xFFFFFFFF00000000.
|
||||
* We use narrowing shift right 31 bits to tmp32' to save data bandwidth and instructions.
|
||||
* That is, we judge if tmp32' > 0x00000000, or tmp32' <= 0xFFFFFFFF.
|
||||
*/
|
||||
|
||||
/* Compute inverse of LPC prediction gain, and */
|
||||
/* test if LPC coefficients are stable (all poles within unit circle) */
|
||||
static OPUS_INLINE opus_int32 LPC_inverse_pred_gain_QA_neon( /* O Returns inverse prediction gain in energy domain, Q30 */
|
||||
opus_int32 A_QA[ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */
|
||||
const opus_int order /* I Prediction order */
|
||||
)
|
||||
{
|
||||
opus_int k, n, mult2Q;
|
||||
opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp1, tmp2;
|
||||
opus_int32 max, min;
|
||||
int32x4_t max_s32x4, min_s32x4;
|
||||
int32x2_t max_s32x2, min_s32x2;
|
||||
|
||||
max_s32x4 = vdupq_n_s32( silk_int32_MIN );
|
||||
min_s32x4 = vdupq_n_s32( silk_int32_MAX );
|
||||
invGain_Q30 = SILK_FIX_CONST( 1, 30 );
|
||||
for( k = order - 1; k > 0; k-- ) {
|
||||
int32x2_t rc_Q31_s32x2, rc_mult2_s32x2;
|
||||
int64x2_t mult2Q_s64x2;
|
||||
|
||||
/* Check for stability */
|
||||
if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Set RC equal to negated AR coef */
|
||||
rc_Q31 = -silk_LSHIFT( A_QA[ k ], 31 - QA );
|
||||
|
||||
/* rc_mult1_Q30 range: [ 1 : 2^30 ] */
|
||||
rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) );
|
||||
silk_assert( rc_mult1_Q30 > ( 1 << 15 ) ); /* reduce A_LIMIT if fails */
|
||||
silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) );
|
||||
|
||||
/* Update inverse gain */
|
||||
/* invGain_Q30 range: [ 0 : 2^30 ] */
|
||||
invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
|
||||
silk_assert( invGain_Q30 >= 0 );
|
||||
silk_assert( invGain_Q30 <= ( 1 << 30 ) );
|
||||
if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */
|
||||
mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) );
|
||||
rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 );
|
||||
|
||||
/* Update AR coefficient */
|
||||
rc_Q31_s32x2 = vdup_n_s32( rc_Q31 );
|
||||
mult2Q_s64x2 = vdupq_n_s64( -mult2Q );
|
||||
rc_mult2_s32x2 = vdup_n_s32( rc_mult2 );
|
||||
|
||||
for( n = 0; n < ( ( k + 1 ) >> 1 ) - 3; n += 4 ) {
|
||||
/* We always calculate extra elements of A_QA buffer when ( k % 4 ) != 0, to take the advantage of SIMD parallelization. */
|
||||
int32x4_t tmp1_s32x4, tmp2_s32x4, t0_s32x4, t1_s32x4, s0_s32x4, s1_s32x4, t_QA0_s32x4, t_QA1_s32x4;
|
||||
int64x2_t t0_s64x2, t1_s64x2, t2_s64x2, t3_s64x2;
|
||||
tmp1_s32x4 = vld1q_s32( A_QA + n );
|
||||
tmp2_s32x4 = vld1q_s32( A_QA + k - n - 4 );
|
||||
tmp2_s32x4 = vrev64q_s32( tmp2_s32x4 );
|
||||
tmp2_s32x4 = vcombine_s32( vget_high_s32( tmp2_s32x4 ), vget_low_s32( tmp2_s32x4 ) );
|
||||
t0_s32x4 = vqrdmulhq_lane_s32( tmp2_s32x4, rc_Q31_s32x2, 0 );
|
||||
t1_s32x4 = vqrdmulhq_lane_s32( tmp1_s32x4, rc_Q31_s32x2, 0 );
|
||||
t_QA0_s32x4 = vqsubq_s32( tmp1_s32x4, t0_s32x4 );
|
||||
t_QA1_s32x4 = vqsubq_s32( tmp2_s32x4, t1_s32x4 );
|
||||
t0_s64x2 = vmull_s32( vget_low_s32 ( t_QA0_s32x4 ), rc_mult2_s32x2 );
|
||||
t1_s64x2 = vmull_s32( vget_high_s32( t_QA0_s32x4 ), rc_mult2_s32x2 );
|
||||
t2_s64x2 = vmull_s32( vget_low_s32 ( t_QA1_s32x4 ), rc_mult2_s32x2 );
|
||||
t3_s64x2 = vmull_s32( vget_high_s32( t_QA1_s32x4 ), rc_mult2_s32x2 );
|
||||
t0_s64x2 = vrshlq_s64( t0_s64x2, mult2Q_s64x2 );
|
||||
t1_s64x2 = vrshlq_s64( t1_s64x2, mult2Q_s64x2 );
|
||||
t2_s64x2 = vrshlq_s64( t2_s64x2, mult2Q_s64x2 );
|
||||
t3_s64x2 = vrshlq_s64( t3_s64x2, mult2Q_s64x2 );
|
||||
t0_s32x4 = vcombine_s32( vmovn_s64( t0_s64x2 ), vmovn_s64( t1_s64x2 ) );
|
||||
t1_s32x4 = vcombine_s32( vmovn_s64( t2_s64x2 ), vmovn_s64( t3_s64x2 ) );
|
||||
s0_s32x4 = vcombine_s32( vshrn_n_s64( t0_s64x2, 31 ), vshrn_n_s64( t1_s64x2, 31 ) );
|
||||
s1_s32x4 = vcombine_s32( vshrn_n_s64( t2_s64x2, 31 ), vshrn_n_s64( t3_s64x2, 31 ) );
|
||||
max_s32x4 = vmaxq_s32( max_s32x4, s0_s32x4 );
|
||||
min_s32x4 = vminq_s32( min_s32x4, s0_s32x4 );
|
||||
max_s32x4 = vmaxq_s32( max_s32x4, s1_s32x4 );
|
||||
min_s32x4 = vminq_s32( min_s32x4, s1_s32x4 );
|
||||
t1_s32x4 = vrev64q_s32( t1_s32x4 );
|
||||
t1_s32x4 = vcombine_s32( vget_high_s32( t1_s32x4 ), vget_low_s32( t1_s32x4 ) );
|
||||
vst1q_s32( A_QA + n, t0_s32x4 );
|
||||
vst1q_s32( A_QA + k - n - 4, t1_s32x4 );
|
||||
}
|
||||
for( ; n < (k + 1) >> 1; n++ ) {
|
||||
opus_int64 tmp64;
|
||||
tmp1 = A_QA[ n ];
|
||||
tmp2 = A_QA[ k - n - 1 ];
|
||||
tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp1,
|
||||
MUL32_FRAC_Q( tmp2, rc_Q31, 31 ) ), rc_mult2 ), mult2Q);
|
||||
if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) {
|
||||
return 0;
|
||||
}
|
||||
A_QA[ n ] = ( opus_int32 )tmp64;
|
||||
tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp2,
|
||||
MUL32_FRAC_Q( tmp1, rc_Q31, 31 ) ), rc_mult2), mult2Q);
|
||||
if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) {
|
||||
return 0;
|
||||
}
|
||||
A_QA[ k - n - 1 ] = ( opus_int32 )tmp64;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check for stability */
|
||||
if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
max_s32x2 = vmax_s32( vget_low_s32( max_s32x4 ), vget_high_s32( max_s32x4 ) );
|
||||
min_s32x2 = vmin_s32( vget_low_s32( min_s32x4 ), vget_high_s32( min_s32x4 ) );
|
||||
max_s32x2 = vmax_s32( max_s32x2, vreinterpret_s32_s64( vshr_n_s64( vreinterpret_s64_s32( max_s32x2 ), 32 ) ) );
|
||||
min_s32x2 = vmin_s32( min_s32x2, vreinterpret_s32_s64( vshr_n_s64( vreinterpret_s64_s32( min_s32x2 ), 32 ) ) );
|
||||
max = vget_lane_s32( max_s32x2, 0 );
|
||||
min = vget_lane_s32( min_s32x2, 0 );
|
||||
if( ( max > 0 ) || ( min < -1 ) ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Set RC equal to negated AR coef */
|
||||
rc_Q31 = -silk_LSHIFT( A_QA[ 0 ], 31 - QA );
|
||||
|
||||
/* Range: [ 1 : 2^30 ] */
|
||||
rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) );
|
||||
|
||||
/* Update inverse gain */
|
||||
/* Range: [ 0 : 2^30 ] */
|
||||
invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 );
|
||||
silk_assert( invGain_Q30 >= 0 );
|
||||
silk_assert( invGain_Q30 <= ( 1 << 30 ) );
|
||||
if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return invGain_Q30;
|
||||
}
|
||||
|
||||
/* For input in Q12 domain */
|
||||
opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse prediction gain in energy domain, Q30 */
|
||||
const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
|
||||
const opus_int order /* I Prediction order */
|
||||
)
|
||||
{
|
||||
#ifdef OPUS_CHECK_ASM
|
||||
const opus_int32 invGain_Q30_c = silk_LPC_inverse_pred_gain_c( A_Q12, order );
|
||||
#endif
|
||||
|
||||
opus_int32 invGain_Q30;
|
||||
if( ( SILK_MAX_ORDER_LPC != 24 ) || ( order & 1 )) {
|
||||
invGain_Q30 = silk_LPC_inverse_pred_gain_c( A_Q12, order );
|
||||
}
|
||||
else {
|
||||
opus_int32 Atmp_QA[ SILK_MAX_ORDER_LPC ];
|
||||
opus_int32 DC_resp;
|
||||
int16x8_t t0_s16x8, t1_s16x8, t2_s16x8;
|
||||
int32x4_t t0_s32x4;
|
||||
const opus_int leftover = order & 7;
|
||||
|
||||
/* Increase Q domain of the AR coefficients */
|
||||
t0_s16x8 = vld1q_s16( A_Q12 + 0 );
|
||||
t1_s16x8 = vld1q_s16( A_Q12 + 8 );
|
||||
t2_s16x8 = vld1q_s16( A_Q12 + 16 );
|
||||
t0_s32x4 = vpaddlq_s16( t0_s16x8 );
|
||||
|
||||
switch( order - leftover )
|
||||
{
|
||||
case 24:
|
||||
t0_s32x4 = vpadalq_s16( t0_s32x4, t2_s16x8 );
|
||||
/* FALLTHROUGH */
|
||||
|
||||
case 16:
|
||||
t0_s32x4 = vpadalq_s16( t0_s32x4, t1_s16x8 );
|
||||
vst1q_s32( Atmp_QA + 16, vshll_n_s16( vget_low_s16 ( t2_s16x8 ), QA - 12 ) );
|
||||
vst1q_s32( Atmp_QA + 20, vshll_n_s16( vget_high_s16( t2_s16x8 ), QA - 12 ) );
|
||||
/* FALLTHROUGH */
|
||||
|
||||
case 8:
|
||||
{
|
||||
const int32x2_t t_s32x2 = vpadd_s32( vget_low_s32( t0_s32x4 ), vget_high_s32( t0_s32x4 ) );
|
||||
const int64x1_t t_s64x1 = vpaddl_s32( t_s32x2 );
|
||||
DC_resp = vget_lane_s32( vreinterpret_s32_s64( t_s64x1 ), 0 );
|
||||
vst1q_s32( Atmp_QA + 8, vshll_n_s16( vget_low_s16 ( t1_s16x8 ), QA - 12 ) );
|
||||
vst1q_s32( Atmp_QA + 12, vshll_n_s16( vget_high_s16( t1_s16x8 ), QA - 12 ) );
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
DC_resp = 0;
|
||||
break;
|
||||
}
|
||||
A_Q12 += order - leftover;
|
||||
|
||||
switch( leftover )
|
||||
{
|
||||
case 6:
|
||||
DC_resp += (opus_int32)A_Q12[ 5 ];
|
||||
DC_resp += (opus_int32)A_Q12[ 4 ];
|
||||
/* FALLTHROUGH */
|
||||
|
||||
case 4:
|
||||
DC_resp += (opus_int32)A_Q12[ 3 ];
|
||||
DC_resp += (opus_int32)A_Q12[ 2 ];
|
||||
/* FALLTHROUGH */
|
||||
|
||||
case 2:
|
||||
DC_resp += (opus_int32)A_Q12[ 1 ];
|
||||
DC_resp += (opus_int32)A_Q12[ 0 ];
|
||||
/* FALLTHROUGH */
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* If the DC is unstable, we don't even need to do the full calculations */
|
||||
if( DC_resp >= 4096 ) {
|
||||
invGain_Q30 = 0;
|
||||
} else {
|
||||
vst1q_s32( Atmp_QA + 0, vshll_n_s16( vget_low_s16 ( t0_s16x8 ), QA - 12 ) );
|
||||
vst1q_s32( Atmp_QA + 4, vshll_n_s16( vget_high_s16( t0_s16x8 ), QA - 12 ) );
|
||||
invGain_Q30 = LPC_inverse_pred_gain_QA_neon( Atmp_QA, order );
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef OPUS_CHECK_ASM
|
||||
silk_assert( invGain_Q30_c == invGain_Q30 );
|
||||
#endif
|
||||
|
||||
return invGain_Q30;
|
||||
}
|
100
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/NSQ_del_dec_arm.h
vendored
Normal file
100
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/NSQ_del_dec_arm.h
vendored
Normal file
|
@ -0,0 +1,100 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2017 Google Inc.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef SILK_NSQ_DEL_DEC_ARM_H
|
||||
#define SILK_NSQ_DEL_DEC_ARM_H
|
||||
|
||||
#include "celt/arm/armcpu.h"
|
||||
|
||||
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
|
||||
void silk_NSQ_del_dec_neon(
|
||||
const silk_encoder_state *psEncC, silk_nsq_state *NSQ,
|
||||
SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[],
|
||||
const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER],
|
||||
const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],
|
||||
const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
|
||||
const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],
|
||||
const opus_int Tilt_Q14[MAX_NB_SUBFR],
|
||||
const opus_int32 LF_shp_Q14[MAX_NB_SUBFR],
|
||||
const opus_int32 Gains_Q16[MAX_NB_SUBFR],
|
||||
const opus_int pitchL[MAX_NB_SUBFR], const opus_int Lambda_Q10,
|
||||
const opus_int LTP_scale_Q14);
|
||||
|
||||
#if !defined(OPUS_HAVE_RTCD)
|
||||
#define OVERRIDE_silk_NSQ_del_dec (1)
|
||||
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
|
||||
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
|
||||
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
|
||||
LTP_scale_Q14, arch) \
|
||||
((void)(arch), \
|
||||
PRESUME_NEON(silk_NSQ_del_dec)( \
|
||||
psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, \
|
||||
AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, \
|
||||
Lambda_Q10, LTP_scale_Q14))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(OVERRIDE_silk_NSQ_del_dec)
|
||||
/*Is run-time CPU detection enabled on this platform?*/
|
||||
#if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
|
||||
!defined(OPUS_ARM_PRESUME_NEON_INTR))
|
||||
extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const silk_encoder_state *psEncC, silk_nsq_state *NSQ,
|
||||
SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[],
|
||||
const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER],
|
||||
const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],
|
||||
const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
|
||||
const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],
|
||||
const opus_int Tilt_Q14[MAX_NB_SUBFR],
|
||||
const opus_int32 LF_shp_Q14[MAX_NB_SUBFR],
|
||||
const opus_int32 Gains_Q16[MAX_NB_SUBFR],
|
||||
const opus_int pitchL[MAX_NB_SUBFR], const opus_int Lambda_Q10,
|
||||
const opus_int LTP_scale_Q14);
|
||||
#define OVERRIDE_silk_NSQ_del_dec (1)
|
||||
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
|
||||
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
|
||||
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
|
||||
LTP_scale_Q14, arch) \
|
||||
((*SILK_NSQ_DEL_DEC_IMPL[(arch)&OPUS_ARCHMASK])( \
|
||||
psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, \
|
||||
AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, \
|
||||
Lambda_Q10, LTP_scale_Q14))
|
||||
#elif defined(OPUS_ARM_PRESUME_NEON_INTR)
|
||||
#define OVERRIDE_silk_NSQ_del_dec (1)
|
||||
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
|
||||
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
|
||||
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
|
||||
LTP_scale_Q14, arch) \
|
||||
((void)(arch), \
|
||||
silk_NSQ_del_dec_neon(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
|
||||
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
|
||||
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
|
||||
LTP_scale_Q14))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* end SILK_NSQ_DEL_DEC_ARM_H */
|
1124
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/NSQ_del_dec_neon_intr.c
vendored
Normal file
1124
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/NSQ_del_dec_neon_intr.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
112
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/NSQ_neon.c
vendored
Normal file
112
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/NSQ_neon.c
vendored
Normal file
|
@ -0,0 +1,112 @@
|
|||
/***********************************************************************
|
||||
Copyright (C) 2014 Vidyo
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "main.h"
|
||||
#include "stack_alloc.h"
|
||||
#include "NSQ.h"
|
||||
#include "celt/cpu_support.h"
|
||||
#include "celt/arm/armcpu.h"
|
||||
|
||||
opus_int32 silk_noise_shape_quantizer_short_prediction_neon(const opus_int32 *buf32, const opus_int32 *coef32, opus_int order)
|
||||
{
|
||||
int32x4_t coef0 = vld1q_s32(coef32);
|
||||
int32x4_t coef1 = vld1q_s32(coef32 + 4);
|
||||
int32x4_t coef2 = vld1q_s32(coef32 + 8);
|
||||
int32x4_t coef3 = vld1q_s32(coef32 + 12);
|
||||
|
||||
int32x4_t a0 = vld1q_s32(buf32 - 15);
|
||||
int32x4_t a1 = vld1q_s32(buf32 - 11);
|
||||
int32x4_t a2 = vld1q_s32(buf32 - 7);
|
||||
int32x4_t a3 = vld1q_s32(buf32 - 3);
|
||||
|
||||
int32x4_t b0 = vqdmulhq_s32(coef0, a0);
|
||||
int32x4_t b1 = vqdmulhq_s32(coef1, a1);
|
||||
int32x4_t b2 = vqdmulhq_s32(coef2, a2);
|
||||
int32x4_t b3 = vqdmulhq_s32(coef3, a3);
|
||||
|
||||
int32x4_t c0 = vaddq_s32(b0, b1);
|
||||
int32x4_t c1 = vaddq_s32(b2, b3);
|
||||
|
||||
int32x4_t d = vaddq_s32(c0, c1);
|
||||
|
||||
int64x2_t e = vpaddlq_s32(d);
|
||||
|
||||
int64x1_t f = vadd_s64(vget_low_s64(e), vget_high_s64(e));
|
||||
|
||||
opus_int32 out = vget_lane_s32(vreinterpret_s32_s64(f), 0);
|
||||
|
||||
out += silk_RSHIFT( order, 1 );
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
opus_int32 silk_NSQ_noise_shape_feedback_loop_neon(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order)
|
||||
{
|
||||
opus_int32 out;
|
||||
if (order == 8)
|
||||
{
|
||||
int32x4_t a00 = vdupq_n_s32(data0[0]);
|
||||
int32x4_t a01 = vld1q_s32(data1); /* data1[0] ... [3] */
|
||||
|
||||
int32x4_t a0 = vextq_s32 (a00, a01, 3); /* data0[0] data1[0] ...[2] */
|
||||
int32x4_t a1 = vld1q_s32(data1 + 3); /* data1[3] ... [6] */
|
||||
|
||||
/*TODO: Convert these once in advance instead of once per sample, like
|
||||
silk_noise_shape_quantizer_short_prediction_neon() does.*/
|
||||
int16x8_t coef16 = vld1q_s16(coef);
|
||||
int32x4_t coef0 = vmovl_s16(vget_low_s16(coef16));
|
||||
int32x4_t coef1 = vmovl_s16(vget_high_s16(coef16));
|
||||
|
||||
/*This is not bit-exact with the C version, since we do not drop the
|
||||
lower 16 bits of each multiply, but wait until the end to truncate
|
||||
precision. This is an encoder-specific calculation (and unlike
|
||||
silk_noise_shape_quantizer_short_prediction_neon(), is not meant to
|
||||
simulate what the decoder will do). We still could use vqdmulhq_s32()
|
||||
like silk_noise_shape_quantizer_short_prediction_neon() and save
|
||||
half the multiplies, but the speed difference is not large, since we
|
||||
then need two extra adds.*/
|
||||
int64x2_t b0 = vmull_s32(vget_low_s32(a0), vget_low_s32(coef0));
|
||||
int64x2_t b1 = vmlal_s32(b0, vget_high_s32(a0), vget_high_s32(coef0));
|
||||
int64x2_t b2 = vmlal_s32(b1, vget_low_s32(a1), vget_low_s32(coef1));
|
||||
int64x2_t b3 = vmlal_s32(b2, vget_high_s32(a1), vget_high_s32(coef1));
|
||||
|
||||
int64x1_t c = vadd_s64(vget_low_s64(b3), vget_high_s64(b3));
|
||||
int64x1_t cS = vrshr_n_s64(c, 15);
|
||||
int32x2_t d = vreinterpret_s32_s64(cS);
|
||||
|
||||
out = vget_lane_s32(d, 0);
|
||||
vst1q_s32(data1, a0);
|
||||
vst1q_s32(data1 + 4, a1);
|
||||
return out;
|
||||
}
|
||||
return silk_NSQ_noise_shape_feedback_loop_c(data0, data1, coef, order);
|
||||
}
|
114
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/NSQ_neon.h
vendored
Normal file
114
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/NSQ_neon.h
vendored
Normal file
|
@ -0,0 +1,114 @@
|
|||
/***********************************************************************
|
||||
Copyright (C) 2014 Vidyo
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
#ifndef SILK_NSQ_NEON_H
|
||||
#define SILK_NSQ_NEON_H
|
||||
|
||||
#include "cpu_support.h"
|
||||
#include "SigProc_FIX.h"
|
||||
|
||||
#undef silk_short_prediction_create_arch_coef
|
||||
/* For vectorized calc, reverse a_Q12 coefs, convert to 32-bit, and shift for vqdmulhq_s32. */
|
||||
static OPUS_INLINE void silk_short_prediction_create_arch_coef_neon(opus_int32 *out, const opus_int16 *in, opus_int order)
|
||||
{
|
||||
out[15] = silk_LSHIFT32(in[0], 15);
|
||||
out[14] = silk_LSHIFT32(in[1], 15);
|
||||
out[13] = silk_LSHIFT32(in[2], 15);
|
||||
out[12] = silk_LSHIFT32(in[3], 15);
|
||||
out[11] = silk_LSHIFT32(in[4], 15);
|
||||
out[10] = silk_LSHIFT32(in[5], 15);
|
||||
out[9] = silk_LSHIFT32(in[6], 15);
|
||||
out[8] = silk_LSHIFT32(in[7], 15);
|
||||
out[7] = silk_LSHIFT32(in[8], 15);
|
||||
out[6] = silk_LSHIFT32(in[9], 15);
|
||||
|
||||
if (order == 16)
|
||||
{
|
||||
out[5] = silk_LSHIFT32(in[10], 15);
|
||||
out[4] = silk_LSHIFT32(in[11], 15);
|
||||
out[3] = silk_LSHIFT32(in[12], 15);
|
||||
out[2] = silk_LSHIFT32(in[13], 15);
|
||||
out[1] = silk_LSHIFT32(in[14], 15);
|
||||
out[0] = silk_LSHIFT32(in[15], 15);
|
||||
}
|
||||
else
|
||||
{
|
||||
out[5] = 0;
|
||||
out[4] = 0;
|
||||
out[3] = 0;
|
||||
out[2] = 0;
|
||||
out[1] = 0;
|
||||
out[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(OPUS_ARM_PRESUME_NEON_INTR)
|
||||
|
||||
#define silk_short_prediction_create_arch_coef(out, in, order) \
|
||||
(silk_short_prediction_create_arch_coef_neon(out, in, order))
|
||||
|
||||
#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
|
||||
|
||||
#define silk_short_prediction_create_arch_coef(out, in, order) \
|
||||
do { if (arch == OPUS_ARCH_ARM_NEON) { silk_short_prediction_create_arch_coef_neon(out, in, order); } } while (0)
|
||||
|
||||
#endif
|
||||
|
||||
opus_int32 silk_noise_shape_quantizer_short_prediction_neon(const opus_int32 *buf32, const opus_int32 *coef32, opus_int order);
|
||||
|
||||
opus_int32 silk_NSQ_noise_shape_feedback_loop_neon(const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef, opus_int order);
|
||||
|
||||
#if defined(OPUS_ARM_PRESUME_NEON_INTR)
|
||||
#undef silk_noise_shape_quantizer_short_prediction
|
||||
#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) \
|
||||
((void)arch,silk_noise_shape_quantizer_short_prediction_neon(in, coefRev, order))
|
||||
|
||||
#undef silk_NSQ_noise_shape_feedback_loop
|
||||
#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) ((void)arch,silk_NSQ_noise_shape_feedback_loop_neon(data0, data1, coef, order))
|
||||
|
||||
#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
|
||||
|
||||
/* silk_noise_shape_quantizer_short_prediction implementations take different parameters based on arch
|
||||
(coef vs. coefRev) so can't use the usual IMPL table implementation */
|
||||
#undef silk_noise_shape_quantizer_short_prediction
|
||||
#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) \
|
||||
(arch == OPUS_ARCH_ARM_NEON ? \
|
||||
silk_noise_shape_quantizer_short_prediction_neon(in, coefRev, order) : \
|
||||
silk_noise_shape_quantizer_short_prediction_c(in, coef, order))
|
||||
|
||||
extern opus_int32
|
||||
(*const SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[OPUS_ARCHMASK+1])(
|
||||
const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef,
|
||||
opus_int order);
|
||||
|
||||
#undef silk_NSQ_noise_shape_feedback_loop
|
||||
#define silk_NSQ_noise_shape_feedback_loop(data0, data1, coef, order, arch) \
|
||||
(SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[(arch)&OPUS_ARCHMASK](data0, data1, \
|
||||
coef, order))
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* SILK_NSQ_NEON_H */
|
47
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/SigProc_FIX_armv4.h
vendored
Normal file
47
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/SigProc_FIX_armv4.h
vendored
Normal file
|
@ -0,0 +1,47 @@
|
|||
/***********************************************************************
|
||||
Copyright (C) 2013 Xiph.Org Foundation and contributors
|
||||
Copyright (c) 2013 Parrot
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef SILK_SIGPROC_FIX_ARMv4_H
|
||||
#define SILK_SIGPROC_FIX_ARMv4_H
|
||||
|
||||
#undef silk_MLA
|
||||
static OPUS_INLINE opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b,
|
||||
opus_int32 c)
|
||||
{
|
||||
opus_int32 res;
|
||||
__asm__(
|
||||
"#silk_MLA\n\t"
|
||||
"mla %0, %1, %2, %3\n\t"
|
||||
: "=&r"(res)
|
||||
: "r"(b), "r"(c), "r"(a)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_MLA(a, b, c) (silk_MLA_armv4(a, b, c))
|
||||
|
||||
#endif
|
61
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/SigProc_FIX_armv5e.h
vendored
Normal file
61
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/SigProc_FIX_armv5e.h
vendored
Normal file
|
@ -0,0 +1,61 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
|
||||
Copyright (c) 2013 Parrot
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef SILK_SIGPROC_FIX_ARMv5E_H
|
||||
#define SILK_SIGPROC_FIX_ARMv5E_H
|
||||
|
||||
#undef silk_SMULTT
|
||||
static OPUS_INLINE opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b)
|
||||
{
|
||||
opus_int32 res;
|
||||
__asm__(
|
||||
"#silk_SMULTT\n\t"
|
||||
"smultt %0, %1, %2\n\t"
|
||||
: "=r"(res)
|
||||
: "%r"(a), "r"(b)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_SMULTT(a, b) (silk_SMULTT_armv5e(a, b))
|
||||
|
||||
#undef silk_SMLATT
|
||||
static OPUS_INLINE opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b,
|
||||
opus_int32 c)
|
||||
{
|
||||
opus_int32 res;
|
||||
__asm__(
|
||||
"#silk_SMLATT\n\t"
|
||||
"smlatt %0, %1, %2, %3\n\t"
|
||||
: "=r"(res)
|
||||
: "%r"(b), "r"(c), "r"(a)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_SMLATT(a, b, c) (silk_SMLATT_armv5e(a, b, c))
|
||||
|
||||
#endif
|
123
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/arm_silk_map.c
vendored
Normal file
123
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/arm_silk_map.c
vendored
Normal file
|
@ -0,0 +1,123 @@
|
|||
/***********************************************************************
|
||||
Copyright (C) 2014 Vidyo
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#endif
|
||||
|
||||
#include "main_FIX.h"
|
||||
#include "NSQ.h"
|
||||
#include "SigProc_FIX.h"
|
||||
|
||||
#if defined(OPUS_HAVE_RTCD)
|
||||
|
||||
# if (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
|
||||
!defined(OPUS_ARM_PRESUME_NEON_INTR))
|
||||
|
||||
void (*const SILK_BIQUAD_ALT_STRIDE2_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const opus_int16 *in, /* I input signal */
|
||||
const opus_int32 *B_Q28, /* I MA coefficients [3] */
|
||||
const opus_int32 *A_Q28, /* I AR coefficients [2] */
|
||||
opus_int32 *S, /* I/O State vector [4] */
|
||||
opus_int16 *out, /* O output signal */
|
||||
const opus_int32 len /* I signal length (must be even) */
|
||||
) = {
|
||||
silk_biquad_alt_stride2_c, /* ARMv4 */
|
||||
silk_biquad_alt_stride2_c, /* EDSP */
|
||||
silk_biquad_alt_stride2_c, /* Media */
|
||||
silk_biquad_alt_stride2_neon, /* Neon */
|
||||
};
|
||||
|
||||
opus_int32 (*const SILK_LPC_INVERSE_PRED_GAIN_IMPL[OPUS_ARCHMASK + 1])( /* O Returns inverse prediction gain in energy domain, Q30 */
|
||||
const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */
|
||||
const opus_int order /* I Prediction order */
|
||||
) = {
|
||||
silk_LPC_inverse_pred_gain_c, /* ARMv4 */
|
||||
silk_LPC_inverse_pred_gain_c, /* EDSP */
|
||||
silk_LPC_inverse_pred_gain_c, /* Media */
|
||||
silk_LPC_inverse_pred_gain_neon, /* Neon */
|
||||
};
|
||||
|
||||
void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const silk_encoder_state *psEncC, /* I Encoder State */
|
||||
silk_nsq_state *NSQ, /* I/O NSQ state */
|
||||
SideInfoIndices *psIndices, /* I/O Quantization Indices */
|
||||
const opus_int16 x16[], /* I Input */
|
||||
opus_int8 pulses[], /* O Quantized pulse signal */
|
||||
const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
|
||||
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
|
||||
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
|
||||
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
|
||||
const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
|
||||
const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
|
||||
const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
|
||||
const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
|
||||
const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
|
||||
const opus_int LTP_scale_Q14 /* I LTP state scaling */
|
||||
) = {
|
||||
silk_NSQ_del_dec_c, /* ARMv4 */
|
||||
silk_NSQ_del_dec_c, /* EDSP */
|
||||
silk_NSQ_del_dec_c, /* Media */
|
||||
silk_NSQ_del_dec_neon, /* Neon */
|
||||
};
|
||||
|
||||
/*There is no table for silk_noise_shape_quantizer_short_prediction because the
|
||||
NEON version takes different parameters than the C version.
|
||||
Instead RTCD is done via if statements at the call sites.
|
||||
See NSQ_neon.h for details.*/
|
||||
|
||||
opus_int32
|
||||
(*const SILK_NSQ_NOISE_SHAPE_FEEDBACK_LOOP_IMPL[OPUS_ARCHMASK+1])(
|
||||
const opus_int32 *data0, opus_int32 *data1, const opus_int16 *coef,
|
||||
opus_int order) = {
|
||||
silk_NSQ_noise_shape_feedback_loop_c, /* ARMv4 */
|
||||
silk_NSQ_noise_shape_feedback_loop_c, /* EDSP */
|
||||
silk_NSQ_noise_shape_feedback_loop_c, /* Media */
|
||||
silk_NSQ_noise_shape_feedback_loop_neon, /* NEON */
|
||||
};
|
||||
|
||||
# endif
|
||||
|
||||
# if defined(FIXED_POINT) && \
|
||||
defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
|
||||
|
||||
void (*const SILK_WARPED_AUTOCORRELATION_FIX_IMPL[OPUS_ARCHMASK + 1])(
|
||||
opus_int32 *corr, /* O Result [order + 1] */
|
||||
opus_int *scale, /* O Scaling of the correlation vector */
|
||||
const opus_int16 *input, /* I Input data to correlate */
|
||||
const opus_int warping_Q16, /* I Warping coefficient */
|
||||
const opus_int length, /* I Length of input */
|
||||
const opus_int order /* I Correlation order (even) */
|
||||
) = {
|
||||
silk_warped_autocorrelation_FIX_c, /* ARMv4 */
|
||||
silk_warped_autocorrelation_FIX_c, /* EDSP */
|
||||
silk_warped_autocorrelation_FIX_c, /* Media */
|
||||
silk_warped_autocorrelation_FIX_neon, /* Neon */
|
||||
};
|
||||
|
||||
# endif
|
||||
|
||||
#endif /* OPUS_HAVE_RTCD */
|
68
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/biquad_alt_arm.h
vendored
Normal file
68
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/biquad_alt_arm.h
vendored
Normal file
|
@ -0,0 +1,68 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2017 Google Inc.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef SILK_BIQUAD_ALT_ARM_H
|
||||
# define SILK_BIQUAD_ALT_ARM_H
|
||||
|
||||
# include "celt/arm/armcpu.h"
|
||||
|
||||
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
|
||||
void silk_biquad_alt_stride2_neon(
|
||||
const opus_int16 *in, /* I input signal */
|
||||
const opus_int32 *B_Q28, /* I MA coefficients [3] */
|
||||
const opus_int32 *A_Q28, /* I AR coefficients [2] */
|
||||
opus_int32 *S, /* I/O State vector [4] */
|
||||
opus_int16 *out, /* O output signal */
|
||||
const opus_int32 len /* I signal length (must be even) */
|
||||
);
|
||||
|
||||
# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON)
|
||||
# define OVERRIDE_silk_biquad_alt_stride2 (1)
|
||||
# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), PRESUME_NEON(silk_biquad_alt_stride2)(in, B_Q28, A_Q28, S, out, len))
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# if !defined(OVERRIDE_silk_biquad_alt_stride2)
|
||||
/*Is run-time CPU detection enabled on this platform?*/
|
||||
# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
|
||||
extern void (*const SILK_BIQUAD_ALT_STRIDE2_IMPL[OPUS_ARCHMASK+1])(
|
||||
const opus_int16 *in, /* I input signal */
|
||||
const opus_int32 *B_Q28, /* I MA coefficients [3] */
|
||||
const opus_int32 *A_Q28, /* I AR coefficients [2] */
|
||||
opus_int32 *S, /* I/O State vector [4] */
|
||||
opus_int16 *out, /* O output signal */
|
||||
const opus_int32 len /* I signal length (must be even) */
|
||||
);
|
||||
# define OVERRIDE_silk_biquad_alt_stride2 (1)
|
||||
# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((*SILK_BIQUAD_ALT_STRIDE2_IMPL[(arch)&OPUS_ARCHMASK])(in, B_Q28, A_Q28, S, out, len))
|
||||
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
|
||||
# define OVERRIDE_silk_biquad_alt_stride2 (1)
|
||||
# define silk_biquad_alt_stride2(in, B_Q28, A_Q28, S, out, len, arch) ((void)(arch), silk_biquad_alt_stride2_neon(in, B_Q28, A_Q28, S, out, len))
|
||||
# endif
|
||||
# endif
|
||||
|
||||
#endif /* end SILK_BIQUAD_ALT_ARM_H */
|
156
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/biquad_alt_neon_intr.c
vendored
Normal file
156
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/biquad_alt_neon_intr.c
vendored
Normal file
|
@ -0,0 +1,156 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2017 Google Inc.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <arm_neon.h>
|
||||
#ifdef OPUS_CHECK_ASM
|
||||
# include <string.h>
|
||||
# include "stack_alloc.h"
|
||||
#endif
|
||||
#include "SigProc_FIX.h"
|
||||
|
||||
static inline void silk_biquad_alt_stride2_kernel( const int32x4_t A_L_s32x4, const int32x4_t A_U_s32x4, const int32x4_t B_Q28_s32x4, const int32x2_t t_s32x2, const int32x4_t in_s32x4, int32x4_t *S_s32x4, int32x2_t *out32_Q14_s32x2 )
|
||||
{
|
||||
int32x4_t t_s32x4, out32_Q14_s32x4;
|
||||
|
||||
*out32_Q14_s32x2 = vadd_s32( vget_low_s32( *S_s32x4 ), t_s32x2 ); /* silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in{0,1} ) */
|
||||
*S_s32x4 = vcombine_s32( vget_high_s32( *S_s32x4 ), vdup_n_s32( 0 ) ); /* S{0,1} = S{2,3}; S{2,3} = 0; */
|
||||
*out32_Q14_s32x2 = vshl_n_s32( *out32_Q14_s32x2, 2 ); /* out32_Q14_{0,1} = silk_LSHIFT( silk_SMLAWB( S{0,1}, B_Q28[ 0 ], in{0,1} ), 2 ); */
|
||||
out32_Q14_s32x4 = vcombine_s32( *out32_Q14_s32x2, *out32_Q14_s32x2 ); /* out32_Q14_{0,1,0,1} */
|
||||
t_s32x4 = vqdmulhq_s32( out32_Q14_s32x4, A_L_s32x4 ); /* silk_SMULWB( out32_Q14_{0,1,0,1}, A{0,0,1,1}_L_Q28 ) */
|
||||
*S_s32x4 = vrsraq_n_s32( *S_s32x4, t_s32x4, 14 ); /* S{0,1} = S{2,3} + silk_RSHIFT_ROUND(); S{2,3} = silk_RSHIFT_ROUND(); */
|
||||
t_s32x4 = vqdmulhq_s32( out32_Q14_s32x4, A_U_s32x4 ); /* silk_SMULWB( out32_Q14_{0,1,0,1}, A{0,0,1,1}_U_Q28 ) */
|
||||
*S_s32x4 = vaddq_s32( *S_s32x4, t_s32x4 ); /* S0 = silk_SMLAWB( S{0,1,2,3}, out32_Q14_{0,1,0,1}, A{0,0,1,1}_U_Q28 ); */
|
||||
t_s32x4 = vqdmulhq_s32( in_s32x4, B_Q28_s32x4 ); /* silk_SMULWB( B_Q28[ {1,1,2,2} ], in{0,1,0,1} ) */
|
||||
*S_s32x4 = vaddq_s32( *S_s32x4, t_s32x4 ); /* S0 = silk_SMLAWB( S0, B_Q28[ {1,1,2,2} ], in{0,1,0,1} ); */
|
||||
}
|
||||
|
||||
void silk_biquad_alt_stride2_neon(
|
||||
const opus_int16 *in, /* I input signal */
|
||||
const opus_int32 *B_Q28, /* I MA coefficients [3] */
|
||||
const opus_int32 *A_Q28, /* I AR coefficients [2] */
|
||||
opus_int32 *S, /* I/O State vector [4] */
|
||||
opus_int16 *out, /* O output signal */
|
||||
const opus_int32 len /* I signal length (must be even) */
|
||||
)
|
||||
{
|
||||
/* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
|
||||
opus_int k = 0;
|
||||
const int32x2_t offset_s32x2 = vdup_n_s32( (1<<14) - 1 );
|
||||
const int32x4_t offset_s32x4 = vcombine_s32( offset_s32x2, offset_s32x2 );
|
||||
int16x4_t in_s16x4 = vdup_n_s16( 0 );
|
||||
int16x4_t out_s16x4;
|
||||
int32x2_t A_Q28_s32x2, A_L_s32x2, A_U_s32x2, B_Q28_s32x2, t_s32x2;
|
||||
int32x4_t A_L_s32x4, A_U_s32x4, B_Q28_s32x4, S_s32x4, out32_Q14_s32x4;
|
||||
int32x2x2_t t0_s32x2x2, t1_s32x2x2, t2_s32x2x2, S_s32x2x2;
|
||||
|
||||
#ifdef OPUS_CHECK_ASM
|
||||
opus_int32 S_c[ 4 ];
|
||||
VARDECL( opus_int16, out_c );
|
||||
SAVE_STACK;
|
||||
ALLOC( out_c, 2 * len, opus_int16 );
|
||||
|
||||
silk_memcpy( &S_c, S, sizeof( S_c ) );
|
||||
silk_biquad_alt_stride2_c( in, B_Q28, A_Q28, S_c, out_c, len );
|
||||
#endif
|
||||
|
||||
/* Negate A_Q28 values and split in two parts */
|
||||
A_Q28_s32x2 = vld1_s32( A_Q28 );
|
||||
A_Q28_s32x2 = vneg_s32( A_Q28_s32x2 );
|
||||
A_L_s32x2 = vshl_n_s32( A_Q28_s32x2, 18 ); /* ( -A_Q28[] & 0x00003FFF ) << 18 */
|
||||
A_L_s32x2 = vreinterpret_s32_u32( vshr_n_u32( vreinterpret_u32_s32( A_L_s32x2 ), 3 ) ); /* ( -A_Q28[] & 0x00003FFF ) << 15 */
|
||||
A_U_s32x2 = vshr_n_s32( A_Q28_s32x2, 14 ); /* silk_RSHIFT( -A_Q28[], 14 ) */
|
||||
A_U_s32x2 = vshl_n_s32( A_U_s32x2, 16 ); /* silk_RSHIFT( -A_Q28[], 14 ) << 16 (Clip two leading bits to conform to C function.) */
|
||||
A_U_s32x2 = vshr_n_s32( A_U_s32x2, 1 ); /* silk_RSHIFT( -A_Q28[], 14 ) << 15 */
|
||||
|
||||
B_Q28_s32x2 = vld1_s32( B_Q28 );
|
||||
t_s32x2 = vld1_s32( B_Q28 + 1 );
|
||||
t0_s32x2x2 = vzip_s32( A_L_s32x2, A_L_s32x2 );
|
||||
t1_s32x2x2 = vzip_s32( A_U_s32x2, A_U_s32x2 );
|
||||
t2_s32x2x2 = vzip_s32( t_s32x2, t_s32x2 );
|
||||
A_L_s32x4 = vcombine_s32( t0_s32x2x2.val[ 0 ], t0_s32x2x2.val[ 1 ] ); /* A{0,0,1,1}_L_Q28 */
|
||||
A_U_s32x4 = vcombine_s32( t1_s32x2x2.val[ 0 ], t1_s32x2x2.val[ 1 ] ); /* A{0,0,1,1}_U_Q28 */
|
||||
B_Q28_s32x4 = vcombine_s32( t2_s32x2x2.val[ 0 ], t2_s32x2x2.val[ 1 ] ); /* B_Q28[ {1,1,2,2} ] */
|
||||
S_s32x4 = vld1q_s32( S ); /* S0 = S[ 0 ]; S3 = S[ 3 ]; */
|
||||
S_s32x2x2 = vtrn_s32( vget_low_s32( S_s32x4 ), vget_high_s32( S_s32x4 ) ); /* S2 = S[ 1 ]; S1 = S[ 2 ]; */
|
||||
S_s32x4 = vcombine_s32( S_s32x2x2.val[ 0 ], S_s32x2x2.val[ 1 ] );
|
||||
|
||||
for( ; k < len - 1; k += 2 ) {
|
||||
int32x4_t in_s32x4[ 2 ], t_s32x4;
|
||||
int32x2_t out32_Q14_s32x2[ 2 ];
|
||||
|
||||
/* S[ 2 * i + 0 ], S[ 2 * i + 1 ], S[ 2 * i + 2 ], S[ 2 * i + 3 ]: Q12 */
|
||||
in_s16x4 = vld1_s16( &in[ 2 * k ] ); /* in{0,1,2,3} = in[ 2 * k + {0,1,2,3} ]; */
|
||||
in_s32x4[ 0 ] = vshll_n_s16( in_s16x4, 15 ); /* in{0,1,2,3} << 15 */
|
||||
t_s32x4 = vqdmulhq_lane_s32( in_s32x4[ 0 ], B_Q28_s32x2, 0 ); /* silk_SMULWB( B_Q28[ 0 ], in{0,1,2,3} ) */
|
||||
in_s32x4[ 1 ] = vcombine_s32( vget_high_s32( in_s32x4[ 0 ] ), vget_high_s32( in_s32x4[ 0 ] ) ); /* in{2,3,2,3} << 15 */
|
||||
in_s32x4[ 0 ] = vcombine_s32( vget_low_s32 ( in_s32x4[ 0 ] ), vget_low_s32 ( in_s32x4[ 0 ] ) ); /* in{0,1,0,1} << 15 */
|
||||
silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, vget_low_s32 ( t_s32x4 ), in_s32x4[ 0 ], &S_s32x4, &out32_Q14_s32x2[ 0 ] );
|
||||
silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, vget_high_s32( t_s32x4 ), in_s32x4[ 1 ], &S_s32x4, &out32_Q14_s32x2[ 1 ] );
|
||||
|
||||
/* Scale back to Q0 and saturate */
|
||||
out32_Q14_s32x4 = vcombine_s32( out32_Q14_s32x2[ 0 ], out32_Q14_s32x2[ 1 ] ); /* out32_Q14_{0,1,2,3} */
|
||||
out32_Q14_s32x4 = vaddq_s32( out32_Q14_s32x4, offset_s32x4 ); /* out32_Q14_{0,1,2,3} + (1<<14) - 1 */
|
||||
out_s16x4 = vqshrn_n_s32( out32_Q14_s32x4, 14 ); /* (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,2,3} + (1<<14) - 1, 14 ) ) */
|
||||
vst1_s16( &out[ 2 * k ], out_s16x4 ); /* out[ 2 * k + {0,1,2,3} ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,2,3} + (1<<14) - 1, 14 ) ); */
|
||||
}
|
||||
|
||||
/* Process leftover. */
|
||||
if( k < len ) {
|
||||
int32x4_t in_s32x4;
|
||||
int32x2_t out32_Q14_s32x2;
|
||||
|
||||
/* S[ 2 * i + 0 ], S[ 2 * i + 1 ]: Q12 */
|
||||
in_s16x4 = vld1_lane_s16( &in[ 2 * k + 0 ], in_s16x4, 0 ); /* in{0,1} = in[ 2 * k + {0,1} ]; */
|
||||
in_s16x4 = vld1_lane_s16( &in[ 2 * k + 1 ], in_s16x4, 1 ); /* in{0,1} = in[ 2 * k + {0,1} ]; */
|
||||
in_s32x4 = vshll_n_s16( in_s16x4, 15 ); /* in{0,1} << 15 */
|
||||
t_s32x2 = vqdmulh_lane_s32( vget_low_s32( in_s32x4 ), B_Q28_s32x2, 0 ); /* silk_SMULWB( B_Q28[ 0 ], in{0,1} ) */
|
||||
in_s32x4 = vcombine_s32( vget_low_s32( in_s32x4 ), vget_low_s32( in_s32x4 ) ); /* in{0,1,0,1} << 15 */
|
||||
silk_biquad_alt_stride2_kernel( A_L_s32x4, A_U_s32x4, B_Q28_s32x4, t_s32x2, in_s32x4, &S_s32x4, &out32_Q14_s32x2 );
|
||||
|
||||
/* Scale back to Q0 and saturate */
|
||||
out32_Q14_s32x2 = vadd_s32( out32_Q14_s32x2, offset_s32x2 ); /* out32_Q14_{0,1} + (1<<14) - 1 */
|
||||
out32_Q14_s32x4 = vcombine_s32( out32_Q14_s32x2, out32_Q14_s32x2 ); /* out32_Q14_{0,1,0,1} + (1<<14) - 1 */
|
||||
out_s16x4 = vqshrn_n_s32( out32_Q14_s32x4, 14 ); /* (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_{0,1,0,1} + (1<<14) - 1, 14 ) ) */
|
||||
vst1_lane_s16( &out[ 2 * k + 0 ], out_s16x4, 0 ); /* out[ 2 * k + 0 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_0 + (1<<14) - 1, 14 ) ); */
|
||||
vst1_lane_s16( &out[ 2 * k + 1 ], out_s16x4, 1 ); /* out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14_1 + (1<<14) - 1, 14 ) ); */
|
||||
}
|
||||
|
||||
vst1q_lane_s32( &S[ 0 ], S_s32x4, 0 ); /* S[ 0 ] = S0; */
|
||||
vst1q_lane_s32( &S[ 1 ], S_s32x4, 2 ); /* S[ 1 ] = S2; */
|
||||
vst1q_lane_s32( &S[ 2 ], S_s32x4, 1 ); /* S[ 2 ] = S1; */
|
||||
vst1q_lane_s32( &S[ 3 ], S_s32x4, 3 ); /* S[ 3 ] = S3; */
|
||||
|
||||
#ifdef OPUS_CHECK_ASM
|
||||
silk_assert( !memcmp( S_c, S, sizeof( S_c ) ) );
|
||||
silk_assert( !memcmp( out_c, out, 2 * len * sizeof( opus_int16 ) ) );
|
||||
RESTORE_STACK;
|
||||
#endif
|
||||
}
|
39
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/macros_arm64.h
vendored
Normal file
39
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/macros_arm64.h
vendored
Normal file
|
@ -0,0 +1,39 @@
|
|||
/***********************************************************************
|
||||
Copyright (C) 2015 Vidyo
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef SILK_MACROS_ARM64_H
|
||||
#define SILK_MACROS_ARM64_H
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#undef silk_ADD_SAT32
|
||||
#define silk_ADD_SAT32(a, b) (vqadds_s32((a), (b)))
|
||||
|
||||
#undef silk_SUB_SAT32
|
||||
#define silk_SUB_SAT32(a, b) (vqsubs_s32((a), (b)))
|
||||
|
||||
#endif /* SILK_MACROS_ARM64_H */
|
110
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/macros_armv4.h
vendored
Normal file
110
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/macros_armv4.h
vendored
Normal file
|
@ -0,0 +1,110 @@
|
|||
/***********************************************************************
|
||||
Copyright (C) 2013 Xiph.Org Foundation and contributors.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef SILK_MACROS_ARMv4_H
|
||||
#define SILK_MACROS_ARMv4_H
|
||||
|
||||
/* This macro only avoids the undefined behaviour from a left shift of
|
||||
a negative value. It should only be used in macros that can't include
|
||||
SigProc_FIX.h. In other cases, use silk_LSHIFT32(). */
|
||||
#define SAFE_SHL(a,b) ((opus_int32)((opus_uint32)(a) << (b)))
|
||||
|
||||
/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
|
||||
#undef silk_SMULWB
|
||||
static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
|
||||
{
|
||||
unsigned rd_lo;
|
||||
int rd_hi;
|
||||
__asm__(
|
||||
"#silk_SMULWB\n\t"
|
||||
"smull %0, %1, %2, %3\n\t"
|
||||
: "=&r"(rd_lo), "=&r"(rd_hi)
|
||||
: "%r"(a), "r"(SAFE_SHL(b,16))
|
||||
);
|
||||
return rd_hi;
|
||||
}
|
||||
#define silk_SMULWB(a, b) (silk_SMULWB_armv4(a, b))
|
||||
|
||||
/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
|
||||
#undef silk_SMLAWB
|
||||
#define silk_SMLAWB(a, b, c) ((a) + silk_SMULWB(b, c))
|
||||
|
||||
/* (a32 * (b32 >> 16)) >> 16 */
|
||||
#undef silk_SMULWT
|
||||
static OPUS_INLINE opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b)
|
||||
{
|
||||
unsigned rd_lo;
|
||||
int rd_hi;
|
||||
__asm__(
|
||||
"#silk_SMULWT\n\t"
|
||||
"smull %0, %1, %2, %3\n\t"
|
||||
: "=&r"(rd_lo), "=&r"(rd_hi)
|
||||
: "%r"(a), "r"(b&~0xFFFF)
|
||||
);
|
||||
return rd_hi;
|
||||
}
|
||||
#define silk_SMULWT(a, b) (silk_SMULWT_armv4(a, b))
|
||||
|
||||
/* a32 + (b32 * (c32 >> 16)) >> 16 */
|
||||
#undef silk_SMLAWT
|
||||
#define silk_SMLAWT(a, b, c) ((a) + silk_SMULWT(b, c))
|
||||
|
||||
/* (a32 * b32) >> 16 */
|
||||
#undef silk_SMULWW
|
||||
static OPUS_INLINE opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
|
||||
{
|
||||
unsigned rd_lo;
|
||||
int rd_hi;
|
||||
__asm__(
|
||||
"#silk_SMULWW\n\t"
|
||||
"smull %0, %1, %2, %3\n\t"
|
||||
: "=&r"(rd_lo), "=&r"(rd_hi)
|
||||
: "%r"(a), "r"(b)
|
||||
);
|
||||
return SAFE_SHL(rd_hi,16)+(rd_lo>>16);
|
||||
}
|
||||
#define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b))
|
||||
|
||||
#undef silk_SMLAWW
|
||||
static OPUS_INLINE opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,
|
||||
opus_int32 c)
|
||||
{
|
||||
unsigned rd_lo;
|
||||
int rd_hi;
|
||||
__asm__(
|
||||
"#silk_SMLAWW\n\t"
|
||||
"smull %0, %1, %2, %3\n\t"
|
||||
: "=&r"(rd_lo), "=&r"(rd_hi)
|
||||
: "%r"(b), "r"(c)
|
||||
);
|
||||
return a+SAFE_SHL(rd_hi,16)+(rd_lo>>16);
|
||||
}
|
||||
#define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c))
|
||||
|
||||
#undef SAFE_SHL
|
||||
|
||||
#endif /* SILK_MACROS_ARMv4_H */
|
220
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/macros_armv5e.h
vendored
Normal file
220
Src/external_dependencies/openmpt-trunk/include/opus/silk/arm/macros_armv5e.h
vendored
Normal file
|
@ -0,0 +1,220 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
|
||||
Copyright (c) 2013 Parrot
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef SILK_MACROS_ARMv5E_H
|
||||
#define SILK_MACROS_ARMv5E_H
|
||||
|
||||
/* This macro only avoids the undefined behaviour from a left shift of
|
||||
a negative value. It should only be used in macros that can't include
|
||||
SigProc_FIX.h. In other cases, use silk_LSHIFT32(). */
|
||||
#define SAFE_SHL(a,b) ((opus_int32)((opus_uint32)(a) << (b)))
|
||||
|
||||
/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
|
||||
#undef silk_SMULWB
|
||||
static OPUS_INLINE opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b)
|
||||
{
|
||||
int res;
|
||||
__asm__(
|
||||
"#silk_SMULWB\n\t"
|
||||
"smulwb %0, %1, %2\n\t"
|
||||
: "=r"(res)
|
||||
: "r"(a), "r"(b)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_SMULWB(a, b) (silk_SMULWB_armv5e(a, b))
|
||||
|
||||
/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
|
||||
#undef silk_SMLAWB
|
||||
static OPUS_INLINE opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b,
|
||||
opus_int16 c)
|
||||
{
|
||||
int res;
|
||||
__asm__(
|
||||
"#silk_SMLAWB\n\t"
|
||||
"smlawb %0, %1, %2, %3\n\t"
|
||||
: "=r"(res)
|
||||
: "r"(b), "r"(c), "r"(a)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_SMLAWB(a, b, c) (silk_SMLAWB_armv5e(a, b, c))
|
||||
|
||||
/* (a32 * (b32 >> 16)) >> 16 */
|
||||
#undef silk_SMULWT
|
||||
static OPUS_INLINE opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b)
|
||||
{
|
||||
int res;
|
||||
__asm__(
|
||||
"#silk_SMULWT\n\t"
|
||||
"smulwt %0, %1, %2\n\t"
|
||||
: "=r"(res)
|
||||
: "r"(a), "r"(b)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_SMULWT(a, b) (silk_SMULWT_armv5e(a, b))
|
||||
|
||||
/* a32 + (b32 * (c32 >> 16)) >> 16 */
|
||||
#undef silk_SMLAWT
|
||||
static OPUS_INLINE opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b,
|
||||
opus_int32 c)
|
||||
{
|
||||
int res;
|
||||
__asm__(
|
||||
"#silk_SMLAWT\n\t"
|
||||
"smlawt %0, %1, %2, %3\n\t"
|
||||
: "=r"(res)
|
||||
: "r"(b), "r"(c), "r"(a)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_SMLAWT(a, b, c) (silk_SMLAWT_armv5e(a, b, c))
|
||||
|
||||
/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
|
||||
#undef silk_SMULBB
|
||||
static OPUS_INLINE opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b)
|
||||
{
|
||||
int res;
|
||||
__asm__(
|
||||
"#silk_SMULBB\n\t"
|
||||
"smulbb %0, %1, %2\n\t"
|
||||
: "=r"(res)
|
||||
: "%r"(a), "r"(b)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_SMULBB(a, b) (silk_SMULBB_armv5e(a, b))
|
||||
|
||||
/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */
|
||||
#undef silk_SMLABB
|
||||
static OPUS_INLINE opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b,
|
||||
opus_int32 c)
|
||||
{
|
||||
int res;
|
||||
__asm__(
|
||||
"#silk_SMLABB\n\t"
|
||||
"smlabb %0, %1, %2, %3\n\t"
|
||||
: "=r"(res)
|
||||
: "%r"(b), "r"(c), "r"(a)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_SMLABB(a, b, c) (silk_SMLABB_armv5e(a, b, c))
|
||||
|
||||
/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */
|
||||
#undef silk_SMULBT
|
||||
static OPUS_INLINE opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b)
|
||||
{
|
||||
int res;
|
||||
__asm__(
|
||||
"#silk_SMULBT\n\t"
|
||||
"smulbt %0, %1, %2\n\t"
|
||||
: "=r"(res)
|
||||
: "r"(a), "r"(b)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_SMULBT(a, b) (silk_SMULBT_armv5e(a, b))
|
||||
|
||||
/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */
|
||||
#undef silk_SMLABT
|
||||
static OPUS_INLINE opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b,
|
||||
opus_int32 c)
|
||||
{
|
||||
int res;
|
||||
__asm__(
|
||||
"#silk_SMLABT\n\t"
|
||||
"smlabt %0, %1, %2, %3\n\t"
|
||||
: "=r"(res)
|
||||
: "r"(b), "r"(c), "r"(a)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_SMLABT(a, b, c) (silk_SMLABT_armv5e(a, b, c))
|
||||
|
||||
/* add/subtract with output saturated */
|
||||
#undef silk_ADD_SAT32
|
||||
static OPUS_INLINE opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b)
|
||||
{
|
||||
int res;
|
||||
__asm__(
|
||||
"#silk_ADD_SAT32\n\t"
|
||||
"qadd %0, %1, %2\n\t"
|
||||
: "=r"(res)
|
||||
: "%r"(a), "r"(b)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_ADD_SAT32(a, b) (silk_ADD_SAT32_armv5e(a, b))
|
||||
|
||||
#undef silk_SUB_SAT32
|
||||
static OPUS_INLINE opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b)
|
||||
{
|
||||
int res;
|
||||
__asm__(
|
||||
"#silk_SUB_SAT32\n\t"
|
||||
"qsub %0, %1, %2\n\t"
|
||||
: "=r"(res)
|
||||
: "r"(a), "r"(b)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_SUB_SAT32(a, b) (silk_SUB_SAT32_armv5e(a, b))
|
||||
|
||||
#undef silk_CLZ16
|
||||
static OPUS_INLINE opus_int32 silk_CLZ16_armv5(opus_int16 in16)
|
||||
{
|
||||
int res;
|
||||
__asm__(
|
||||
"#silk_CLZ16\n\t"
|
||||
"clz %0, %1;\n"
|
||||
: "=r"(res)
|
||||
: "r"(SAFE_SHL(in16,16)|0x8000)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_CLZ16(in16) (silk_CLZ16_armv5(in16))
|
||||
|
||||
#undef silk_CLZ32
|
||||
static OPUS_INLINE opus_int32 silk_CLZ32_armv5(opus_int32 in32)
|
||||
{
|
||||
int res;
|
||||
__asm__(
|
||||
"#silk_CLZ32\n\t"
|
||||
"clz %0, %1\n\t"
|
||||
: "=r"(res)
|
||||
: "r"(in32)
|
||||
);
|
||||
return res;
|
||||
}
|
||||
#define silk_CLZ32(in32) (silk_CLZ32_armv5(in32))
|
||||
|
||||
#undef SAFE_SHL
|
||||
|
||||
#endif /* SILK_MACROS_ARMv5E_H */
|
Loading…
Add table
Add a link
Reference in a new issue