mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-24 13:56:33 +02:00
Merge commit '73b704ac609d83e0be124589f24efd9b94947cf9'
* commit '73b704ac609d83e0be124589f24efd9b94947cf9': arm: Add some missing header #includes floatdsp: move scalarproduct_float from dsputil to avfloatdsp. Conflicts: libavcodec/acelp_pitch_delay.c libavcodec/amrnbdec.c libavcodec/amrwbdec.c libavcodec/ra288.c libavcodec/x86/dsputil_mmx.c libavutil/x86/float_dsp.asm Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
8102f27b5b
@ -292,7 +292,6 @@ typedef struct AACContext {
|
|||||||
FFTContext mdct;
|
FFTContext mdct;
|
||||||
FFTContext mdct_small;
|
FFTContext mdct_small;
|
||||||
FFTContext mdct_ltp;
|
FFTContext mdct_ltp;
|
||||||
DSPContext dsp;
|
|
||||||
FmtConvertContext fmt_conv;
|
FmtConvertContext fmt_conv;
|
||||||
AVFloatDSPContext fdsp;
|
AVFloatDSPContext fdsp;
|
||||||
int random_state;
|
int random_state;
|
||||||
|
@ -928,7 +928,6 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
|
|||||||
|
|
||||||
ff_aac_sbr_init();
|
ff_aac_sbr_init();
|
||||||
|
|
||||||
ff_dsputil_init(&ac->dsp, avctx);
|
|
||||||
ff_fmt_convert_init(&ac->fmt_conv, avctx);
|
ff_fmt_convert_init(&ac->fmt_conv, avctx);
|
||||||
avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
|
avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
|
||||||
|
|
||||||
@ -1394,7 +1393,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
|
|||||||
cfo[k] = ac->random_state;
|
cfo[k] = ac->random_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
|
band_energy = ac->fdsp.scalarproduct_float(cfo, cfo, off_len);
|
||||||
scale = sf[idx] / sqrtf(band_energy);
|
scale = sf[idx] / sqrtf(band_energy);
|
||||||
ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
|
ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
|
||||||
}
|
}
|
||||||
|
@ -21,10 +21,10 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "libavutil/common.h"
|
#include "libavutil/common.h"
|
||||||
|
#include "libavutil/float_dsp.h"
|
||||||
#include "libavutil/libm.h"
|
#include "libavutil/libm.h"
|
||||||
#include "libavutil/mathematics.h"
|
#include "libavutil/mathematics.h"
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "dsputil.h"
|
|
||||||
#include "acelp_pitch_delay.h"
|
#include "acelp_pitch_delay.h"
|
||||||
#include "celp_math.h"
|
#include "celp_math.h"
|
||||||
|
|
||||||
@ -132,7 +132,7 @@ float ff_amr_set_fixed_gain(float fixed_gain_factor, float fixed_mean_energy,
|
|||||||
// Note 10^(0.05 * -10log(average x2)) = 1/sqrt((average x2)).
|
// Note 10^(0.05 * -10log(average x2)) = 1/sqrt((average x2)).
|
||||||
float val = fixed_gain_factor *
|
float val = fixed_gain_factor *
|
||||||
exp2f(M_LOG2_10 * 0.05 *
|
exp2f(M_LOG2_10 * 0.05 *
|
||||||
(ff_scalarproduct_float_c(pred_table, prediction_error, 4) +
|
(avpriv_scalarproduct_float_c(pred_table, prediction_error, 4) +
|
||||||
energy_mean)) /
|
energy_mean)) /
|
||||||
sqrtf(fixed_mean_energy);
|
sqrtf(fixed_mean_energy);
|
||||||
|
|
||||||
|
@ -23,8 +23,8 @@
|
|||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
|
||||||
#include "libavutil/common.h"
|
#include "libavutil/common.h"
|
||||||
|
#include "libavutil/float_dsp.h"
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "dsputil.h"
|
|
||||||
#include "acelp_vectors.h"
|
#include "acelp_vectors.h"
|
||||||
|
|
||||||
const uint8_t ff_fc_2pulses_9bits_track1[16] =
|
const uint8_t ff_fc_2pulses_9bits_track1[16] =
|
||||||
@ -203,7 +203,7 @@ void ff_adaptive_gain_control(float *out, const float *in, float speech_energ,
|
|||||||
int size, float alpha, float *gain_mem)
|
int size, float alpha, float *gain_mem)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
float postfilter_energ = ff_scalarproduct_float_c(in, in, size);
|
float postfilter_energ = avpriv_scalarproduct_float_c(in, in, size);
|
||||||
float gain_scale_factor = 1.0;
|
float gain_scale_factor = 1.0;
|
||||||
float mem = *gain_mem;
|
float mem = *gain_mem;
|
||||||
|
|
||||||
@ -224,7 +224,7 @@ void ff_scale_vector_to_given_sum_of_squares(float *out, const float *in,
|
|||||||
float sum_of_squares, const int n)
|
float sum_of_squares, const int n)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
float scalefactor = ff_scalarproduct_float_c(in, in, n);
|
float scalefactor = avpriv_scalarproduct_float_c(in, in, n);
|
||||||
if (scalefactor)
|
if (scalefactor)
|
||||||
scalefactor = sqrt(sum_of_squares / scalefactor);
|
scalefactor = sqrt(sum_of_squares / scalefactor);
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
|
@ -44,8 +44,8 @@
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
#include "libavutil/channel_layout.h"
|
#include "libavutil/channel_layout.h"
|
||||||
|
#include "libavutil/float_dsp.h"
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "dsputil.h"
|
|
||||||
#include "libavutil/common.h"
|
#include "libavutil/common.h"
|
||||||
#include "libavutil/avassert.h"
|
#include "libavutil/avassert.h"
|
||||||
#include "celp_math.h"
|
#include "celp_math.h"
|
||||||
|
@ -26,10 +26,10 @@
|
|||||||
|
|
||||||
#include "libavutil/channel_layout.h"
|
#include "libavutil/channel_layout.h"
|
||||||
#include "libavutil/common.h"
|
#include "libavutil/common.h"
|
||||||
|
#include "libavutil/float_dsp.h"
|
||||||
#include "libavutil/lfg.h"
|
#include "libavutil/lfg.h"
|
||||||
|
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "dsputil.h"
|
|
||||||
#include "lsp.h"
|
#include "lsp.h"
|
||||||
#include "celp_filters.h"
|
#include "celp_filters.h"
|
||||||
#include "celp_math.h"
|
#include "celp_math.h"
|
||||||
@ -893,7 +893,8 @@ static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc,
|
|||||||
const float *synth_exc, float hb_gain)
|
const float *synth_exc, float hb_gain)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
float energy = ctx->celpm_ctx.dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE);
|
float energy = ctx->celpm_ctx.dot_productf(synth_exc, synth_exc,
|
||||||
|
AMRWB_SFR_SIZE);
|
||||||
|
|
||||||
/* Generate a white-noise excitation */
|
/* Generate a white-noise excitation */
|
||||||
for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
|
for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
|
||||||
|
@ -142,8 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
|
|||||||
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
|
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||||
void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
|
void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||||
|
|
||||||
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
|
|
||||||
|
|
||||||
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
|
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
|
||||||
int len);
|
int len);
|
||||||
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
|
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
|
||||||
@ -293,7 +291,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
|
|||||||
c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
|
c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
|
||||||
}
|
}
|
||||||
|
|
||||||
c->scalarproduct_float = ff_scalarproduct_float_neon;
|
|
||||||
c->vector_clipf = ff_vector_clipf_neon;
|
c->vector_clipf = ff_vector_clipf_neon;
|
||||||
c->vector_clip_int32 = ff_vector_clip_int32_neon;
|
c->vector_clip_int32 = ff_vector_clip_int32_neon;
|
||||||
|
|
||||||
|
@ -531,19 +531,6 @@ function ff_add_pixels_clamped_neon, export=1
|
|||||||
bx lr
|
bx lr
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
function ff_scalarproduct_float_neon, export=1
|
|
||||||
vmov.f32 q2, #0.0
|
|
||||||
1: vld1.32 {q0},[r0,:128]!
|
|
||||||
vld1.32 {q1},[r1,:128]!
|
|
||||||
vmla.f32 q2, q0, q1
|
|
||||||
subs r2, r2, #4
|
|
||||||
bgt 1b
|
|
||||||
vadd.f32 d0, d4, d5
|
|
||||||
vpadd.f32 d0, d0, d0
|
|
||||||
NOVFP vmov.32 r0, d0[0]
|
|
||||||
bx lr
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
function ff_vector_clipf_neon, export=1
|
function ff_vector_clipf_neon, export=1
|
||||||
VFP vdup.32 q1, d0[1]
|
VFP vdup.32 q1, d0[1]
|
||||||
VFP vdup.32 q0, d0[0]
|
VFP vdup.32 q0, d0[0]
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "libavutil/arm/cpu.h"
|
#include "libavutil/arm/cpu.h"
|
||||||
|
#include "libavcodec/avcodec.h"
|
||||||
#include "libavcodec/h264pred.h"
|
#include "libavcodec/h264pred.h"
|
||||||
|
|
||||||
void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride);
|
void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride);
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include "libavutil/attributes.h"
|
#include "libavutil/attributes.h"
|
||||||
#include "libavutil/cpu.h"
|
#include "libavutil/cpu.h"
|
||||||
#include "libavutil/arm/cpu.h"
|
#include "libavutil/arm/cpu.h"
|
||||||
|
#include "libavcodec/dsputil.h"
|
||||||
#include "libavcodec/vp3dsp.h"
|
#include "libavcodec/vp3dsp.h"
|
||||||
|
|
||||||
void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
|
void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
#include "libavutil/arm/cpu.h"
|
#include "libavutil/arm/cpu.h"
|
||||||
#include "libavcodec/vp8dsp.h"
|
#include "libavcodec/vp8dsp.h"
|
||||||
#include "vp8dsp.h"
|
#include "vp8dsp.h"
|
||||||
|
@ -17,6 +17,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
#include "libavcodec/vp8dsp.h"
|
#include "libavcodec/vp8dsp.h"
|
||||||
#include "vp8dsp.h"
|
#include "vp8dsp.h"
|
||||||
|
|
||||||
|
@ -17,6 +17,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
#include "libavcodec/vp8dsp.h"
|
#include "libavcodec/vp8dsp.h"
|
||||||
#include "vp8dsp.h"
|
#include "vp8dsp.h"
|
||||||
|
|
||||||
|
@ -2483,17 +2483,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
|
|||||||
WRAPPER8_16_SQ(rd8x8_c, rd16_c)
|
WRAPPER8_16_SQ(rd8x8_c, rd16_c)
|
||||||
WRAPPER8_16_SQ(bit8x8_c, bit16_c)
|
WRAPPER8_16_SQ(bit8x8_c, bit16_c)
|
||||||
|
|
||||||
float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
|
|
||||||
{
|
|
||||||
float p = 0.0;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < len; i++)
|
|
||||||
p += v1[i] * v2[i];
|
|
||||||
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
|
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
|
||||||
uint32_t maxi, uint32_t maxisign)
|
uint32_t maxi, uint32_t maxisign)
|
||||||
{
|
{
|
||||||
@ -2875,7 +2864,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
|
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
|
||||||
c->apply_window_int16 = apply_window_int16_c;
|
c->apply_window_int16 = apply_window_int16_c;
|
||||||
c->vector_clip_int32 = vector_clip_int32_c;
|
c->vector_clip_int32 = vector_clip_int32_c;
|
||||||
c->scalarproduct_float = ff_scalarproduct_float_c;
|
|
||||||
|
|
||||||
c->shrink[0]= av_image_copy_plane;
|
c->shrink[0]= av_image_copy_plane;
|
||||||
c->shrink[1]= ff_shrink22;
|
c->shrink[1]= ff_shrink22;
|
||||||
|
@ -360,13 +360,6 @@ typedef struct DSPContext {
|
|||||||
|
|
||||||
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
|
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
|
||||||
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
|
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
|
||||||
/**
|
|
||||||
* Calculate the scalar product of two vectors of floats.
|
|
||||||
* @param v1 first vector, 16-byte aligned
|
|
||||||
* @param v2 second vector, 16-byte aligned
|
|
||||||
* @param len length of vectors, multiple of 4
|
|
||||||
*/
|
|
||||||
float (*scalarproduct_float)(const float *v1, const float *v2, int len);
|
|
||||||
|
|
||||||
/* (I)DCT */
|
/* (I)DCT */
|
||||||
void (*fdct)(DCTELEM *block/* align 16*/);
|
void (*fdct)(DCTELEM *block/* align 16*/);
|
||||||
@ -473,17 +466,6 @@ attribute_deprecated void dsputil_init(DSPContext* c, AVCodecContext *avctx);
|
|||||||
|
|
||||||
int ff_check_alignment(void);
|
int ff_check_alignment(void);
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the scalar product of two vectors.
|
|
||||||
*
|
|
||||||
* @param v1 first input vector
|
|
||||||
* @param v2 first input vector
|
|
||||||
* @param len number of elements
|
|
||||||
*
|
|
||||||
* @return sum of elementwise products
|
|
||||||
*/
|
|
||||||
float ff_scalarproduct_float_c(const float *v1, const float *v2, int len);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* permute block according to permuatation.
|
* permute block according to permuatation.
|
||||||
* @param last last non zero element in scantable order
|
* @param last last non zero element in scantable order
|
||||||
|
@ -30,10 +30,10 @@
|
|||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
#include "libavutil/channel_layout.h"
|
#include "libavutil/channel_layout.h"
|
||||||
|
#include "libavutil/float_dsp.h"
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
#include "get_bits.h"
|
#include "get_bits.h"
|
||||||
#include "dsputil.h"
|
|
||||||
#include "qcelpdata.h"
|
#include "qcelpdata.h"
|
||||||
#include "celp_filters.h"
|
#include "celp_filters.h"
|
||||||
#include "acelp_filters.h"
|
#include "acelp_filters.h"
|
||||||
@ -400,12 +400,10 @@ static void apply_gain_ctrl(float *v_out, const float *v_ref, const float *v_in)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < 160; i += 40)
|
for (i = 0; i < 160; i += 40) {
|
||||||
ff_scale_vector_to_given_sum_of_squares(v_out + i, v_in + i,
|
float res = avpriv_scalarproduct_float_c(v_ref + i, v_ref + i, 40);
|
||||||
ff_scalarproduct_float_c(v_ref + i,
|
ff_scale_vector_to_given_sum_of_squares(v_out + i, v_in + i, res, 40);
|
||||||
v_ref + i,
|
}
|
||||||
40),
|
|
||||||
40);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -680,8 +678,9 @@ static void postfilter(QCELPContext *q, float *samples, float *lpc)
|
|||||||
ff_tilt_compensation(&q->postfilter_tilt_mem, 0.3, pole_out + 10, 160);
|
ff_tilt_compensation(&q->postfilter_tilt_mem, 0.3, pole_out + 10, 160);
|
||||||
|
|
||||||
ff_adaptive_gain_control(samples, pole_out + 10,
|
ff_adaptive_gain_control(samples, pole_out + 10,
|
||||||
ff_scalarproduct_float_c(q->formant_mem + 10,
|
avpriv_scalarproduct_float_c(q->formant_mem + 10,
|
||||||
q->formant_mem + 10, 160),
|
q->formant_mem + 10,
|
||||||
|
160),
|
||||||
160, 0.9375, &q->postfilter_agc_mem);
|
160, 0.9375, &q->postfilter_agc_mem);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,7 +84,7 @@ static av_cold int ra288_decode_init(AVCodecContext *avctx)
|
|||||||
static void convolve(float *tgt, const float *src, int len, int n)
|
static void convolve(float *tgt, const float *src, int len, int n)
|
||||||
{
|
{
|
||||||
for (; n >= 0; n--)
|
for (; n >= 0; n--)
|
||||||
tgt[n] = ff_scalarproduct_float_c(src, src - n, len);
|
tgt[n] = avpriv_scalarproduct_float_c(src, src - n, len);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -113,7 +113,7 @@ static void decode(RA288Context *ractx, float gain, int cb_coef)
|
|||||||
for (i=0; i < 5; i++)
|
for (i=0; i < 5; i++)
|
||||||
buffer[i] = codetable[cb_coef][i] * sumsum;
|
buffer[i] = codetable[cb_coef][i] * sumsum;
|
||||||
|
|
||||||
sum = ff_scalarproduct_float_c(buffer, buffer, 5);
|
sum = avpriv_scalarproduct_float_c(buffer, buffer, 5);
|
||||||
|
|
||||||
sum = FFMAX(sum, 5. / (1<<24));
|
sum = FFMAX(sum, 5. / (1<<24));
|
||||||
|
|
||||||
|
@ -26,11 +26,11 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "libavutil/channel_layout.h"
|
#include "libavutil/channel_layout.h"
|
||||||
|
#include "libavutil/float_dsp.h"
|
||||||
#include "libavutil/mathematics.h"
|
#include "libavutil/mathematics.h"
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#define BITSTREAM_READER_LE
|
#define BITSTREAM_READER_LE
|
||||||
#include "get_bits.h"
|
#include "get_bits.h"
|
||||||
#include "dsputil.h"
|
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
|
||||||
#include "lsp.h"
|
#include "lsp.h"
|
||||||
@ -411,8 +411,9 @@ static void decode_frame(SiprContext *ctx, SiprParameters *params,
|
|||||||
convolute_with_sparse(fixed_vector, &fixed_cb, impulse_response,
|
convolute_with_sparse(fixed_vector, &fixed_cb, impulse_response,
|
||||||
SUBFR_SIZE);
|
SUBFR_SIZE);
|
||||||
|
|
||||||
avg_energy =
|
avg_energy = (0.01 + avpriv_scalarproduct_float_c(fixed_vector,
|
||||||
(0.01 + ff_scalarproduct_float_c(fixed_vector, fixed_vector, SUBFR_SIZE)) /
|
fixed_vector,
|
||||||
|
SUBFR_SIZE)) /
|
||||||
SUBFR_SIZE;
|
SUBFR_SIZE;
|
||||||
|
|
||||||
ctx->past_pitch_gain = pitch_gain = gain_cb[params->gc_index[i]][0];
|
ctx->past_pitch_gain = pitch_gain = gain_cb[params->gc_index[i]][0];
|
||||||
@ -454,7 +455,7 @@ static void decode_frame(SiprContext *ctx, SiprParameters *params,
|
|||||||
|
|
||||||
if (ctx->mode == MODE_5k0) {
|
if (ctx->mode == MODE_5k0) {
|
||||||
for (i = 0; i < subframe_count; i++) {
|
for (i = 0; i < subframe_count; i++) {
|
||||||
float energy = ff_scalarproduct_float_c(ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE,
|
float energy = avpriv_scalarproduct_float_c(ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE,
|
||||||
ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE,
|
ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE,
|
||||||
SUBFR_SIZE);
|
SUBFR_SIZE);
|
||||||
ff_adaptive_gain_control(&synth[i * SUBFR_SIZE],
|
ff_adaptive_gain_control(&synth[i * SUBFR_SIZE],
|
||||||
|
@ -25,8 +25,8 @@
|
|||||||
|
|
||||||
#include "sipr.h"
|
#include "sipr.h"
|
||||||
#include "libavutil/common.h"
|
#include "libavutil/common.h"
|
||||||
|
#include "libavutil/float_dsp.h"
|
||||||
#include "libavutil/mathematics.h"
|
#include "libavutil/mathematics.h"
|
||||||
#include "dsputil.h"
|
|
||||||
#include "lsp.h"
|
#include "lsp.h"
|
||||||
#include "celp_filters.h"
|
#include "celp_filters.h"
|
||||||
#include "acelp_vectors.h"
|
#include "acelp_vectors.h"
|
||||||
@ -163,11 +163,11 @@ static float acelp_decode_gain_codef(float gain_corr_factor, const float *fc_v,
|
|||||||
const float *ma_prediction_coeff,
|
const float *ma_prediction_coeff,
|
||||||
int subframe_size, int ma_pred_order)
|
int subframe_size, int ma_pred_order)
|
||||||
{
|
{
|
||||||
mr_energy +=
|
mr_energy += avpriv_scalarproduct_float_c(quant_energy, ma_prediction_coeff,
|
||||||
ff_scalarproduct_float_c(quant_energy, ma_prediction_coeff, ma_pred_order);
|
ma_pred_order);
|
||||||
|
|
||||||
mr_energy = gain_corr_factor * exp(M_LN10 / 20. * mr_energy) /
|
mr_energy = gain_corr_factor * exp(M_LN10 / 20. * mr_energy) /
|
||||||
sqrt((0.01 + ff_scalarproduct_float_c(fc_v, fc_v, subframe_size)));
|
sqrt((0.01 + avpriv_scalarproduct_float_c(fc_v, fc_v, subframe_size)));
|
||||||
return mr_energy;
|
return mr_energy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,8 +30,8 @@
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
#include "libavutil/channel_layout.h"
|
#include "libavutil/channel_layout.h"
|
||||||
|
#include "libavutil/float_dsp.h"
|
||||||
#include "libavutil/mem.h"
|
#include "libavutil/mem.h"
|
||||||
#include "dsputil.h"
|
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
#include "get_bits.h"
|
#include "get_bits.h"
|
||||||
@ -523,7 +523,7 @@ static int kalman_smoothen(WMAVoiceContext *s, int pitch,
|
|||||||
|
|
||||||
/* find best fitting point in history */
|
/* find best fitting point in history */
|
||||||
do {
|
do {
|
||||||
dot = ff_scalarproduct_float_c(in, ptr, size);
|
dot = avpriv_scalarproduct_float_c(in, ptr, size);
|
||||||
if (dot > optimal_gain) {
|
if (dot > optimal_gain) {
|
||||||
optimal_gain = dot;
|
optimal_gain = dot;
|
||||||
best_hist_ptr = ptr;
|
best_hist_ptr = ptr;
|
||||||
@ -532,7 +532,7 @@ static int kalman_smoothen(WMAVoiceContext *s, int pitch,
|
|||||||
|
|
||||||
if (optimal_gain <= 0)
|
if (optimal_gain <= 0)
|
||||||
return -1;
|
return -1;
|
||||||
dot = ff_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
|
dot = avpriv_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
|
||||||
if (dot <= 0) // would be 1.0
|
if (dot <= 0) // would be 1.0
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
@ -562,8 +562,8 @@ static float tilt_factor(const float *lpcs, int n_lpcs)
|
|||||||
{
|
{
|
||||||
float rh0, rh1;
|
float rh0, rh1;
|
||||||
|
|
||||||
rh0 = 1.0 + ff_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
|
rh0 = 1.0 + avpriv_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
|
||||||
rh1 = lpcs[0] + ff_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
|
rh1 = lpcs[0] + avpriv_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
|
||||||
|
|
||||||
return rh1 / rh0;
|
return rh1 / rh0;
|
||||||
}
|
}
|
||||||
@ -656,7 +656,8 @@ static void calc_input_response(WMAVoiceContext *s, float *lpcs,
|
|||||||
-1.8 * tilt_factor(coeffs, remainder - 1),
|
-1.8 * tilt_factor(coeffs, remainder - 1),
|
||||||
coeffs, remainder);
|
coeffs, remainder);
|
||||||
}
|
}
|
||||||
sq = (1.0 / 64.0) * sqrtf(1 / ff_scalarproduct_float_c(coeffs, coeffs, remainder));
|
sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs, coeffs,
|
||||||
|
remainder));
|
||||||
for (n = 0; n < remainder; n++)
|
for (n = 0; n < remainder; n++)
|
||||||
coeffs[n] *= sq;
|
coeffs[n] *= sq;
|
||||||
}
|
}
|
||||||
@ -1320,7 +1321,8 @@ static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
|
|||||||
/* Calculate gain for adaptive & fixed codebook signal.
|
/* Calculate gain for adaptive & fixed codebook signal.
|
||||||
* see ff_amr_set_fixed_gain(). */
|
* see ff_amr_set_fixed_gain(). */
|
||||||
idx = get_bits(gb, 7);
|
idx = get_bits(gb, 7);
|
||||||
fcb_gain = expf(ff_scalarproduct_float_c(s->gain_pred_err, gain_coeff, 6) -
|
fcb_gain = expf(avpriv_scalarproduct_float_c(s->gain_pred_err,
|
||||||
|
gain_coeff, 6) -
|
||||||
5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
|
5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
|
||||||
acb_gain = wmavoice_gain_codebook_acb[idx];
|
acb_gain = wmavoice_gain_codebook_acb[idx];
|
||||||
pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
|
pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
|
||||||
|
@ -463,32 +463,6 @@ cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left
|
|||||||
.src_unaligned:
|
.src_unaligned:
|
||||||
ADD_HFYU_LEFT_LOOP 0, 0
|
ADD_HFYU_LEFT_LOOP 0, 0
|
||||||
|
|
||||||
|
|
||||||
; float scalarproduct_float_sse(const float *v1, const float *v2, int len)
|
|
||||||
INIT_XMM sse
|
|
||||||
cglobal scalarproduct_float, 3,3,2, v1, v2, offset
|
|
||||||
neg offsetq
|
|
||||||
shl offsetq, 2
|
|
||||||
sub v1q, offsetq
|
|
||||||
sub v2q, offsetq
|
|
||||||
xorps xmm0, xmm0
|
|
||||||
.loop:
|
|
||||||
movaps xmm1, [v1q+offsetq]
|
|
||||||
mulps xmm1, [v2q+offsetq]
|
|
||||||
addps xmm0, xmm1
|
|
||||||
add offsetq, 16
|
|
||||||
js .loop
|
|
||||||
movhlps xmm1, xmm0
|
|
||||||
addps xmm0, xmm1
|
|
||||||
movss xmm1, xmm0
|
|
||||||
shufps xmm0, xmm0, 1
|
|
||||||
addss xmm0, xmm1
|
|
||||||
%if ARCH_X86_64 == 0
|
|
||||||
movss r0m, xmm0
|
|
||||||
fld dword r0m
|
|
||||||
%endif
|
|
||||||
RET
|
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
|
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
|
||||||
; int32_t max, unsigned int len)
|
; int32_t max, unsigned int len)
|
||||||
|
@ -1987,8 +1987,6 @@ int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src,
|
|||||||
int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src,
|
int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src,
|
||||||
int w, int left);
|
int w, int left);
|
||||||
|
|
||||||
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
|
|
||||||
|
|
||||||
void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src,
|
void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src,
|
||||||
int32_t min, int32_t max, unsigned int len);
|
int32_t min, int32_t max, unsigned int len);
|
||||||
void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src,
|
void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src,
|
||||||
@ -2252,8 +2250,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
|
|||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
c->scalarproduct_float = ff_scalarproduct_float_sse;
|
|
||||||
|
|
||||||
#if HAVE_INLINE_ASM && CONFIG_VIDEODSP
|
#if HAVE_INLINE_ASM && CONFIG_VIDEODSP
|
||||||
c->gmc = gmc_sse;
|
c->gmc = gmc_sse;
|
||||||
#endif
|
#endif
|
||||||
|
@ -43,6 +43,8 @@ void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
|
|||||||
|
|
||||||
void ff_butterflies_float_neon(float *v1, float *v2, int len);
|
void ff_butterflies_float_neon(float *v1, float *v2, int len);
|
||||||
|
|
||||||
|
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
|
||||||
|
|
||||||
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
|
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
|
||||||
{
|
{
|
||||||
fdsp->vector_fmul = ff_vector_fmul_neon;
|
fdsp->vector_fmul = ff_vector_fmul_neon;
|
||||||
@ -52,4 +54,5 @@ void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
|
|||||||
fdsp->vector_fmul_add = ff_vector_fmul_add_neon;
|
fdsp->vector_fmul_add = ff_vector_fmul_add_neon;
|
||||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
|
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
|
||||||
fdsp->butterflies_float = ff_butterflies_float_neon;
|
fdsp->butterflies_float = ff_butterflies_float_neon;
|
||||||
|
fdsp->scalarproduct_float = ff_scalarproduct_float_neon;
|
||||||
}
|
}
|
||||||
|
@ -256,3 +256,16 @@ function ff_butterflies_float_neon, export=1
|
|||||||
bgt 1b
|
bgt 1b
|
||||||
bx lr
|
bx lr
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
function ff_scalarproduct_float_neon, export=1
|
||||||
|
vmov.f32 q2, #0.0
|
||||||
|
1: vld1.32 {q0},[r0,:128]!
|
||||||
|
vld1.32 {q1},[r1,:128]!
|
||||||
|
vmla.f32 q2, q0, q1
|
||||||
|
subs r2, r2, #4
|
||||||
|
bgt 1b
|
||||||
|
vadd.f32 d0, d4, d5
|
||||||
|
vpadd.f32 d0, d0, d0
|
||||||
|
NOVFP vmov.32 r0, d0[0]
|
||||||
|
bx lr
|
||||||
|
endfunc
|
||||||
|
@ -104,6 +104,17 @@ static void butterflies_float_c(float *restrict v1, float *restrict v2,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
|
||||||
|
{
|
||||||
|
float p = 0.0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < len; i++)
|
||||||
|
p += v1[i] * v2[i];
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
|
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
|
||||||
{
|
{
|
||||||
fdsp->vector_fmul = vector_fmul_c;
|
fdsp->vector_fmul = vector_fmul_c;
|
||||||
@ -114,6 +125,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
|
|||||||
fdsp->vector_fmul_add = vector_fmul_add_c;
|
fdsp->vector_fmul_add = vector_fmul_add_c;
|
||||||
fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
|
fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
|
||||||
fdsp->butterflies_float = butterflies_float_c;
|
fdsp->butterflies_float = butterflies_float_c;
|
||||||
|
fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;
|
||||||
|
|
||||||
#if ARCH_ARM
|
#if ARCH_ARM
|
||||||
ff_float_dsp_init_arm(fdsp);
|
ff_float_dsp_init_arm(fdsp);
|
||||||
|
@ -146,8 +146,30 @@ typedef struct AVFloatDSPContext {
|
|||||||
* @param len length of vectors, multiple of 4
|
* @param len length of vectors, multiple of 4
|
||||||
*/
|
*/
|
||||||
void (*butterflies_float)(float *restrict v1, float *restrict v2, int len);
|
void (*butterflies_float)(float *restrict v1, float *restrict v2, int len);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate the scalar product of two vectors of floats.
|
||||||
|
*
|
||||||
|
* @param v1 first vector, 16-byte aligned
|
||||||
|
* @param v2 second vector, 16-byte aligned
|
||||||
|
* @param len length of vectors, multiple of 4
|
||||||
|
*
|
||||||
|
* @return sum of elementwise products
|
||||||
|
*/
|
||||||
|
float (*scalarproduct_float)(const float *v1, const float *v2, int len);
|
||||||
} AVFloatDSPContext;
|
} AVFloatDSPContext;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the scalar product of two vectors.
|
||||||
|
*
|
||||||
|
* @param v1 first input vector
|
||||||
|
* @param v2 first input vector
|
||||||
|
* @param len number of elements
|
||||||
|
*
|
||||||
|
* @return sum of elementwise products
|
||||||
|
*/
|
||||||
|
float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize a float DSP context.
|
* Initialize a float DSP context.
|
||||||
*
|
*
|
||||||
|
@ -237,3 +237,29 @@ VECTOR_FMUL_REVERSE
|
|||||||
INIT_YMM avx
|
INIT_YMM avx
|
||||||
VECTOR_FMUL_REVERSE
|
VECTOR_FMUL_REVERSE
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
; float scalarproduct_float_sse(const float *v1, const float *v2, int len)
|
||||||
|
INIT_XMM sse
|
||||||
|
cglobal scalarproduct_float, 3,3,2, v1, v2, offset
|
||||||
|
neg offsetq
|
||||||
|
shl offsetq, 2
|
||||||
|
sub v1q, offsetq
|
||||||
|
sub v2q, offsetq
|
||||||
|
xorps xmm0, xmm0
|
||||||
|
.loop:
|
||||||
|
movaps xmm1, [v1q+offsetq]
|
||||||
|
mulps xmm1, [v2q+offsetq]
|
||||||
|
addps xmm0, xmm1
|
||||||
|
add offsetq, 16
|
||||||
|
js .loop
|
||||||
|
movhlps xmm1, xmm0
|
||||||
|
addps xmm0, xmm1
|
||||||
|
movss xmm1, xmm0
|
||||||
|
shufps xmm0, xmm0, 1
|
||||||
|
addss xmm0, xmm1
|
||||||
|
%if ARCH_X86_64 == 0
|
||||||
|
movss r0m, xmm0
|
||||||
|
fld dword r0m
|
||||||
|
%endif
|
||||||
|
RET
|
||||||
|
|
||||||
|
@ -51,6 +51,8 @@ void ff_vector_fmul_reverse_sse(float *dst, const float *src0,
|
|||||||
void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
|
void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
|
||||||
const float *src1, int len);
|
const float *src1, int len);
|
||||||
|
|
||||||
|
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
|
||||||
|
|
||||||
#if HAVE_6REGS && HAVE_INLINE_ASM
|
#if HAVE_6REGS && HAVE_INLINE_ASM
|
||||||
static void vector_fmul_window_3dnowext(float *dst, const float *src0,
|
static void vector_fmul_window_3dnowext(float *dst, const float *src0,
|
||||||
const float *src1, const float *win,
|
const float *src1, const float *win,
|
||||||
@ -135,6 +137,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
|||||||
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
|
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
|
||||||
fdsp->vector_fmul_add = ff_vector_fmul_add_sse;
|
fdsp->vector_fmul_add = ff_vector_fmul_add_sse;
|
||||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
|
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
|
||||||
|
fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(mm_flags)) {
|
||||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
|
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user