From dd3b73f3905c61c99f1d3fb58bc7ee380eb8aa2e Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Sat, 24 Nov 2012 13:03:53 +0000 Subject: [PATCH 1/4] base64: fix signed overflow in shift Signed-off-by: Mans Rullgard --- libavutil/base64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavutil/base64.c b/libavutil/base64.c index 73872b8c99..c1fba897be 100644 --- a/libavutil/base64.c +++ b/libavutil/base64.c @@ -44,10 +44,10 @@ static const uint8_t map2[] = int av_base64_decode(uint8_t *out, const char *in, int out_size) { - int i, v; + int i; + unsigned v = 0; uint8_t *dst = out; - v = 0; for (i = 0; in[i] && in[i] != '='; i++) { unsigned int index= in[i]-43; if (index>=FF_ARRAY_ELEMS(map2) || map2[index] == 0xff) From edd80ec7e32b097043432fa67281ed8c6d044331 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Sat, 24 Nov 2012 13:12:47 +0000 Subject: [PATCH 2/4] aacdec: fix signed overflows in lcg_random() Signed-off-by: Mans Rullgard --- libavcodec/aacdec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index d2a31cae0b..af17acfb75 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -787,7 +787,8 @@ static int decode_audio_specific_config(AACContext *ac, */ static av_always_inline int lcg_random(int previous_val) { - return previous_val * 1664525 + 1013904223; + union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 }; + return v.s; } static av_always_inline void reset_predict_state(PredictorState *ps) From 1e276553886a7ca315a055c489fabe456e789e3f Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Mon, 26 Nov 2012 10:30:51 -0500 Subject: [PATCH 3/4] aacenc: use the correct output buffer This fixes segfault caused by 3d3cf6745e2a5dc9c377244454c3186d75b177fa when SingleChannelElement.ret was renamed to SingleChannelElement.ret_buf. Signed-off-by: Justin Ruggles --- libavcodec/aacenc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c index e9f6e2ffbf..5558e39d17 100644 --- a/libavcodec/aacenc.c +++ b/libavcodec/aacenc.c @@ -191,7 +191,7 @@ WINDOW_FUNC(only_long) { const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; - float *out = sce->ret; + float *out = sce->ret_buf; fdsp->vector_fmul (out, audio, lwindow, 1024); dsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024); @@ -201,7 +201,7 @@ WINDOW_FUNC(long_start) { const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; - float *out = sce->ret; + float *out = sce->ret_buf; fdsp->vector_fmul(out, audio, lwindow, 1024); memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448); @@ -213,7 +213,7 @@ WINDOW_FUNC(long_stop) { const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; - float *out = sce->ret; + float *out = sce->ret_buf; memset(out, 0, sizeof(out[0]) * 448); fdsp->vector_fmul(out + 448, audio + 448, swindow, 128); @@ -226,7 +226,7 @@ WINDOW_FUNC(eight_short) const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; const float *in = audio + 448; - float *out = sce->ret; + float *out = sce->ret_buf; int w; for (w = 0; w < 8; w++) { @@ -251,7 +251,7 @@ static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce, float *audio) { int i; - float *output = sce->ret; + float *output = sce->ret_buf; apply_window[sce->ics.window_sequence[0]](&s->dsp, &s->fdsp, sce, audio); From 284ea790d89441fa1e6b2d72d3c1ed6d61972f0b Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Sat, 22 Sep 2012 18:13:57 -0400 Subject: [PATCH 4/4] dsputil: move vector_fmul_scalar() to AVFloatDSPContext in libavutil --- libavcodec/aacdec.c | 12 ++++----- libavcodec/arm/dsputil_init_neon.c | 3 --- libavcodec/arm/dsputil_neon.S | 38 ----------------------------- libavcodec/dsputil.c | 9 ------- libavcodec/dsputil.h | 10 -------- libavcodec/libmp3lame.c | 14 +++++------ libavcodec/wmaenc.c | 2 +- libavcodec/wmaprodec.c | 22 ++++++++++------- libavutil/arm/float_dsp_init_neon.c | 4 +++ libavutil/arm/float_dsp_neon.S | 38 +++++++++++++++++++++++++++++ libavutil/float_dsp.c | 9 +++++++ libavutil/float_dsp.h | 15 ++++++++++++ 12 files changed, 93 insertions(+), 83 deletions(-) diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index af17acfb75..a69f055859 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -1360,7 +1360,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len); scale = sf[idx] / sqrtf(band_energy); - ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len); + ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len); } } else { const float *vq = ff_aac_codebook_vector_vals[cbt_m1]; @@ -1506,7 +1506,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], } } while (len -= 2); - ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); + ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); } } @@ -1730,10 +1730,10 @@ static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_p c *= 1 - 2 * cpe->ms_mask[idx]; scale = c * sce1->sf[idx]; for (group = 0; group < ics->group_len[g]; group++) - ac->dsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i], - coef0 + group * 128 + offsets[i], - scale, - offsets[i + 1] - offsets[i]); + ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i], + coef0 + group * 128 + offsets[i], + scale, + offsets[i + 1] - offsets[i]); } } else { int bt_run_end = sce1->band_type_run_end[idx]; diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index a132f6f993..b2e7204a60 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -144,8 +144,6 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_vector_fmul_window_neon(float *dst, const float *src0, const float *src1, const float *win, int len); -void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, - int len); void ff_butterflies_float_neon(float *v1, float *v2, int len); float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); void ff_vector_fmul_reverse_neon(float *dst, const float *src0, @@ -305,7 +303,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) } c->vector_fmul_window = ff_vector_fmul_window_neon; - c->vector_fmul_scalar = ff_vector_fmul_scalar_neon; c->butterflies_float = ff_butterflies_float_neon; c->scalarproduct_float = ff_scalarproduct_float_neon; c->vector_fmul_reverse = ff_vector_fmul_reverse_neon; diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index ca1d2dee3f..cf9ad9e583 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -642,44 +642,6 @@ function ff_vorbis_inverse_coupling_neon, export=1 endfunc #endif -function ff_vector_fmul_scalar_neon, export=1 -VFP len .req r2 -NOVFP len .req r3 -VFP vdup.32 q8, d0[0] -NOVFP vdup.32 q8, r2 - bics r12, len, #15 - beq 3f - vld1.32 {q0},[r1,:128]! - vld1.32 {q1},[r1,:128]! -1: vmul.f32 q0, q0, q8 - vld1.32 {q2},[r1,:128]! - vmul.f32 q1, q1, q8 - vld1.32 {q3},[r1,:128]! - vmul.f32 q2, q2, q8 - vst1.32 {q0},[r0,:128]! - vmul.f32 q3, q3, q8 - vst1.32 {q1},[r0,:128]! - subs r12, r12, #16 - beq 2f - vld1.32 {q0},[r1,:128]! - vst1.32 {q2},[r0,:128]! - vld1.32 {q1},[r1,:128]! - vst1.32 {q3},[r0,:128]! - b 1b -2: vst1.32 {q2},[r0,:128]! - vst1.32 {q3},[r0,:128]! - ands len, len, #15 - it eq - bxeq lr -3: vld1.32 {q0},[r1,:128]! - vmul.f32 q0, q0, q8 - vst1.32 {q0},[r0,:128]! - subs len, len, #4 - bgt 3b - bx lr - .unreq len -endfunc - function ff_butterflies_float_neon, export=1 1: vld1.32 {q0},[r0,:128] vld1.32 {q1},[r1,:128] diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 7a3fdba299..d4471dc24c 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2392,14 +2392,6 @@ static void vector_fmul_window_c(float *dst, const float *src0, } } -static void vector_fmul_scalar_c(float *dst, const float *src, float mul, - int len) -{ - int i; - for (i = 0; i < len; i++) - dst[i] = src[i] * mul; -} - static void butterflies_float_c(float *restrict v1, float *restrict v2, int len) { @@ -2869,7 +2861,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) c->scalarproduct_float = ff_scalarproduct_float_c; c->butterflies_float = butterflies_float_c; c->butterflies_float_interleave = butterflies_float_interleave_c; - c->vector_fmul_scalar = vector_fmul_scalar_c; c->shrink[0]= av_image_copy_plane; c->shrink[1]= ff_shrink22; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index f48aa96017..5640f3abea 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -382,16 +382,6 @@ typedef struct DSPContext { void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len); /* assume len is a multiple of 8, and arrays are 16-byte aligned */ void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */); - /** - * Multiply a vector of floats by a scalar float. Source and - * destination vectors must overlap exactly or not at all. - * @param dst result vector, 16-byte aligned - * @param src input vector, 16-byte aligned - * @param mul scalar value - * @param len length of vector, multiple of 4 - */ - void (*vector_fmul_scalar)(float *dst, const float *src, float mul, - int len); /** * Calculate the scalar product of two vectors of floats. * @param v1 first vector, 16-byte aligned diff --git a/libavcodec/libmp3lame.c b/libavcodec/libmp3lame.c index 600f6fd9dd..264a0e2236 100644 --- a/libavcodec/libmp3lame.c +++ b/libavcodec/libmp3lame.c @@ -28,12 +28,12 @@ #include "libavutil/channel_layout.h" #include "libavutil/common.h" +#include "libavutil/float_dsp.h" #include "libavutil/intreadwrite.h" #include "libavutil/log.h" #include "libavutil/opt.h" #include "avcodec.h" #include "audio_frame_queue.h" -#include "dsputil.h" #include "internal.h" #include "mpegaudio.h" #include "mpegaudiodecheader.h" @@ -50,7 +50,7 @@ typedef struct LAMEContext { int reservoir; float *samples_flt[2]; AudioFrameQueue afq; - DSPContext dsp; + AVFloatDSPContext fdsp; } LAMEContext; @@ -167,7 +167,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx) if (ret < 0) goto error; - ff_dsputil_init(&s->dsp, avctx); + avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); return 0; error: @@ -205,10 +205,10 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, return AVERROR(EINVAL); } for (ch = 0; ch < avctx->channels; ch++) { - s->dsp.vector_fmul_scalar(s->samples_flt[ch], - (const float *)frame->data[ch], - 32768.0f, - FFALIGN(frame->nb_samples, 8)); + s->fdsp.vector_fmul_scalar(s->samples_flt[ch], + (const float *)frame->data[ch], + 32768.0f, + FFALIGN(frame->nb_samples, 8)); } ENCODE_BUFFER(lame_encode_buffer_float, float, s->samples_flt); break; diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c index 13d8a1cfbf..044114b516 100644 --- a/libavcodec/wmaenc.c +++ b/libavcodec/wmaenc.c @@ -111,7 +111,7 @@ static void apply_window_and_mdct(AVCodecContext * avctx, const AVFrame *frame) for (ch = 0; ch < avctx->channels; ch++) { memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output)); - s->dsp.vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len); + s->fdsp.vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len); s->dsp.vector_fmul_reverse(&s->output[window_len], s->frame_out[ch], win, len); s->fdsp.vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len); mdct->mdct_calc(mdct, s->coefs[ch], s->output); diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c index 43fdbc068d..ac0cce16bd 100644 --- a/libavcodec/wmaprodec.c +++ b/libavcodec/wmaprodec.c @@ -86,6 +86,7 @@ * subframe in order to reconstruct the output samples. */ +#include "libavutil/float_dsp.h" #include "libavutil/intfloat.h" #include "libavutil/intreadwrite.h" #include "avcodec.h" @@ -170,6 +171,7 @@ typedef struct WMAProDecodeCtx { AVCodecContext* avctx; ///< codec context for av_log AVFrame frame; ///< AVFrame for decoded output DSPContext dsp; ///< accelerated DSP functions + AVFloatDSPContext fdsp; uint8_t frame_data[MAX_FRAMESIZE + FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data PutBitContext pb; ///< context for filling the frame_data buffer @@ -280,6 +282,8 @@ static av_cold int decode_init(AVCodecContext *avctx) s->avctx = avctx; ff_dsputil_init(&s->dsp, avctx); + avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); + init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE); avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; @@ -1008,12 +1012,12 @@ static void inverse_channel_transform(WMAProDecodeCtx *s) } } else if (s->avctx->channels == 2) { int len = FFMIN(sfb[1], s->subframe_len) - sfb[0]; - s->dsp.vector_fmul_scalar(ch_data[0] + sfb[0], - ch_data[0] + sfb[0], - 181.0 / 128, len); - s->dsp.vector_fmul_scalar(ch_data[1] + sfb[0], - ch_data[1] + sfb[0], - 181.0 / 128, len); + s->fdsp.vector_fmul_scalar(ch_data[0] + sfb[0], + ch_data[0] + sfb[0], + 181.0 / 128, len); + s->fdsp.vector_fmul_scalar(ch_data[1] + sfb[0], + ch_data[1] + sfb[0], + 181.0 / 128, len); } } } @@ -1259,9 +1263,9 @@ static int decode_subframe(WMAProDecodeCtx *s) s->channel[c].scale_factor_step; const float quant = pow(10.0, exp / 20.0); int start = s->cur_sfb_offsets[b]; - s->dsp.vector_fmul_scalar(s->tmp + start, - s->channel[c].coeffs + start, - quant, end - start); + s->fdsp.vector_fmul_scalar(s->tmp + start, + s->channel[c].coeffs + start, + quant, end - start); } /** apply imdct (imdct_half == DCTIV with reverse) */ diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c index 3ca0288b31..88eb4b3d2a 100644 --- a/libavutil/arm/float_dsp_init_neon.c +++ b/libavutil/arm/float_dsp_init_neon.c @@ -29,8 +29,12 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int l void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, int len); +void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, + int len); + void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) { fdsp->vector_fmul = ff_vector_fmul_neon; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon; + fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_neon; } diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S index 4aa6f838dd..6d7bd5236e 100644 --- a/libavutil/arm/float_dsp_neon.S +++ b/libavutil/arm/float_dsp_neon.S @@ -108,3 +108,41 @@ NOVFP vdup.32 q15, r2 bx lr .unreq len endfunc + +function ff_vector_fmul_scalar_neon, export=1 +VFP len .req r2 +NOVFP len .req r3 +VFP vdup.32 q8, d0[0] +NOVFP vdup.32 q8, r2 + bics r12, len, #15 + beq 3f + vld1.32 {q0},[r1,:128]! + vld1.32 {q1},[r1,:128]! +1: vmul.f32 q0, q0, q8 + vld1.32 {q2},[r1,:128]! + vmul.f32 q1, q1, q8 + vld1.32 {q3},[r1,:128]! + vmul.f32 q2, q2, q8 + vst1.32 {q0},[r0,:128]! + vmul.f32 q3, q3, q8 + vst1.32 {q1},[r0,:128]! + subs r12, r12, #16 + beq 2f + vld1.32 {q0},[r1,:128]! + vst1.32 {q2},[r0,:128]! + vld1.32 {q1},[r1,:128]! + vst1.32 {q3},[r0,:128]! + b 1b +2: vst1.32 {q2},[r0,:128]! + vst1.32 {q3},[r0,:128]! + ands len, len, #15 + it eq + bxeq lr +3: vld1.32 {q0},[r1,:128]! + vmul.f32 q0, q0, q8 + vst1.32 {q0},[r0,:128]! + subs len, len, #4 + bgt 3b + bx lr + .unreq len +endfunc diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c index 2e90939090..b6b11818b5 100644 --- a/libavutil/float_dsp.c +++ b/libavutil/float_dsp.c @@ -36,10 +36,19 @@ static void vector_fmac_scalar_c(float *dst, const float *src, float mul, dst[i] += src[i] * mul; } +static void vector_fmul_scalar_c(float *dst, const float *src, float mul, + int len) +{ + int i; + for (i = 0; i < len; i++) + dst[i] = src[i] * mul; +} + void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) { fdsp->vector_fmul = vector_fmul_c; fdsp->vector_fmac_scalar = vector_fmac_scalar_c; + fdsp->vector_fmul_scalar = vector_fmul_scalar_c; #if ARCH_ARM ff_float_dsp_init_arm(fdsp); diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h index 95cef62f29..cb4b28f0e2 100644 --- a/libavutil/float_dsp.h +++ b/libavutil/float_dsp.h @@ -51,6 +51,21 @@ typedef struct AVFloatDSPContext { */ void (*vector_fmac_scalar)(float *dst, const float *src, float mul, int len); + + /** + * Multiply a vector of floats by a scalar float. Source and + * destination vectors must overlap exactly or not at all. + * + * @param dst result vector + * constraints: 16-byte aligned + * @param src input vector + * constraints: 16-byte aligned + * @param mul scalar value + * @param len length of vector + * constraints: multiple of 4 + */ + void (*vector_fmul_scalar)(float *dst, const float *src, float mul, + int len); } AVFloatDSPContext; /**