diff --git a/libavcodec/aac.h b/libavcodec/aac.h index e8de1e8525..a15173788f 100644 --- a/libavcodec/aac.h +++ b/libavcodec/aac.h @@ -295,7 +295,7 @@ struct AACContext { FFTContext mdct_ld; FFTContext mdct_ltp; FmtConvertContext fmt_conv; - AVFloatDSPContext fdsp; + AVFloatDSPContext *fdsp; int random_state; /** @} */ diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index 2793881c8b..f74cf156b5 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -1137,7 +1137,10 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) ff_aac_sbr_init(); ff_fmt_convert_init(&ac->fmt_conv, avctx); - avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); + ac->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT); + if (!ac->fdsp) { + return AVERROR(ENOMEM); + } ac->random_state = 0x1f2e3d4c; @@ -1641,9 +1644,9 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], cfo[k] = ac->random_state; } - band_energy = ac->fdsp.scalarproduct_float(cfo, cfo, off_len); + band_energy = ac->fdsp->scalarproduct_float(cfo, cfo, off_len); scale = sf[idx] / sqrtf(band_energy); - ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len); + ac->fdsp->vector_fmul_scalar(cfo, cfo, scale, off_len); } } else { const float *vq = ff_aac_codebook_vector_vals[cbt_m1]; @@ -1789,7 +1792,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], } } while (len -= 2); - ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); + ac->fdsp->vector_fmul_scalar(cfo, cfo, sf[idx], off_len); } } @@ -2002,7 +2005,7 @@ static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe) cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) { for (group = 0; group < ics->group_len[g]; group++) { - ac->fdsp.butterflies_float(ch0 + group * 128 + offsets[i], + ac->fdsp->butterflies_float(ch0 + group * 128 + offsets[i], ch1 + group * 128 + offsets[i], offsets[i+1] - offsets[i]); } @@ -2041,7 +2044,7 @@ static void apply_intensity_stereo(AACContext *ac, c *= 1 - 2 * cpe->ms_mask[idx]; scale = c * sce1->sf[idx]; for (group = 0; group < ics->group_len[g]; group++) - ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i], + ac->fdsp->vector_fmul_scalar(coef1 + group * 128 + offsets[i], coef0 + group * 128 + offsets[i], scale, offsets[i + 1] - offsets[i]); @@ -2409,15 +2412,15 @@ static void windowing_and_mdct_ltp(AACContext *ac, float *out, const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) { - ac->fdsp.vector_fmul(in, in, lwindow_prev, 1024); + ac->fdsp->vector_fmul(in, in, lwindow_prev, 1024); } else { memset(in, 0, 448 * sizeof(float)); - ac->fdsp.vector_fmul(in + 448, in + 448, swindow_prev, 128); + ac->fdsp->vector_fmul(in + 448, in + 448, swindow_prev, 128); } if (ics->window_sequence[0] != LONG_START_SEQUENCE) { - ac->fdsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024); + ac->fdsp->vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024); } else { - ac->fdsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128); + ac->fdsp->vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128); memset(in + 1024 + 576, 0, 448 * sizeof(float)); } ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in); @@ -2470,17 +2473,17 @@ static void update_ltp(AACContext *ac, SingleChannelElement *sce) if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { memcpy(saved_ltp, saved, 512 * sizeof(float)); memset(saved_ltp + 576, 0, 448 * sizeof(float)); - ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); + ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); for (i = 0; i < 64; i++) saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i]; } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { memcpy(saved_ltp, ac->buf_mdct + 512, 448 * sizeof(float)); memset(saved_ltp + 576, 0, 448 * sizeof(float)); - ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); + ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); for (i = 0; i < 64; i++) saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i]; } else { // LONG_STOP or ONLY_LONG - ac->fdsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512); + ac->fdsp->vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512); for (i = 0; i < 512; i++) saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i]; } @@ -2521,19 +2524,19 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce) */ if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { - ac->fdsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512); + ac->fdsp->vector_fmul_window( out, saved, buf, lwindow_prev, 512); } else { memcpy( out, saved, 448 * sizeof(float)); if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { - ac->fdsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64); - ac->fdsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64); - ac->fdsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64); - ac->fdsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64); - ac->fdsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64); + ac->fdsp->vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64); + ac->fdsp->vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64); + ac->fdsp->vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64); + ac->fdsp->vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64); + ac->fdsp->vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64); memcpy( out + 448 + 4*128, temp, 64 * sizeof(float)); } else { - ac->fdsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64); + ac->fdsp->vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64); memcpy( out + 576, buf + 64, 448 * sizeof(float)); } } @@ -2541,9 +2544,9 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce) // buffer update if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { memcpy( saved, temp + 64, 64 * sizeof(float)); - ac->fdsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64); - ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64); - ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64); + ac->fdsp->vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64); + ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64); + ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64); memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { memcpy( saved, buf + 512, 448 * sizeof(float)); @@ -2568,10 +2571,10 @@ static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce) if (ics->use_kb_window[1]) { // AAC LD uses a low overlap sine window instead of a KBD window memcpy(out, saved, 192 * sizeof(float)); - ac->fdsp.vector_fmul_window(out + 192, saved + 192, buf, ff_sine_128, 64); + ac->fdsp->vector_fmul_window(out + 192, saved + 192, buf, ff_sine_128, 64); memcpy( out + 320, buf + 64, 192 * sizeof(float)); } else { - ac->fdsp.vector_fmul_window(out, saved, buf, ff_sine_512, 256); + ac->fdsp->vector_fmul_window(out, saved, buf, ff_sine_512, 256); } // buffer update @@ -3167,6 +3170,7 @@ static av_cold int aac_decode_close(AVCodecContext *avctx) ff_mdct_end(&ac->mdct_small); ff_mdct_end(&ac->mdct_ld); ff_mdct_end(&ac->mdct_ltp); + av_freep(&ac->fdsp); return 0; } diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c index f8ae42472d..94a5685e98 100644 --- a/libavcodec/aacsbr.c +++ b/libavcodec/aacsbr.c @@ -1700,7 +1700,7 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac, } for (ch = 0; ch < nch; ch++) { /* decode channel */ - sbr_qmf_analysis(&ac->fdsp, &sbr->mdct_ana, &sbr->dsp, ch ? R : L, sbr->data[ch].analysis_filterbank_samples, + sbr_qmf_analysis(ac->fdsp, &sbr->mdct_ana, &sbr->dsp, ch ? R : L, sbr->data[ch].analysis_filterbank_samples, (float*)sbr->qmf_filter_scratch, sbr->data[ch].W, sbr->data[ch].Ypos); sbr->c.sbr_lf_gen(ac, sbr, sbr->X_low, @@ -1746,13 +1746,13 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac, nch = 2; } - sbr_qmf_synthesis(&sbr->mdct, &sbr->dsp, &ac->fdsp, + sbr_qmf_synthesis(&sbr->mdct, &sbr->dsp, ac->fdsp, L, sbr->X[0], sbr->qmf_filter_scratch, sbr->data[0].synthesis_filterbank_samples, &sbr->data[0].synthesis_filterbank_samples_offset, downsampled); if (nch == 2) - sbr_qmf_synthesis(&sbr->mdct, &sbr->dsp, &ac->fdsp, + sbr_qmf_synthesis(&sbr->mdct, &sbr->dsp, ac->fdsp, R, sbr->X[1], sbr->qmf_filter_scratch, sbr->data[1].synthesis_filterbank_samples, &sbr->data[1].synthesis_filterbank_samples_offset, diff --git a/libavcodec/mips/aacdec_mips.c b/libavcodec/mips/aacdec_mips.c index e4033668da..5db10f9fff 100644 --- a/libavcodec/mips/aacdec_mips.c +++ b/libavcodec/mips/aacdec_mips.c @@ -90,7 +90,7 @@ static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce) */ if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { - ac->fdsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512); + ac->fdsp->vector_fmul_window( out, saved, buf, lwindow_prev, 512); } else { { float *buf1 = saved; @@ -199,7 +199,7 @@ static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce) } } } else { - ac->fdsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64); + ac->fdsp->vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64); { float *buf1 = buf + 64; float *buf2 = out + 576; @@ -248,9 +248,9 @@ static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce) // buffer update if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { - ac->fdsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64); - ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64); - ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64); + ac->fdsp->vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64); + ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64); + ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64); { float *buf1 = buf + 7*128 + 64; float *buf2 = saved + 448; @@ -561,7 +561,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce) : "memory" ); - ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); + ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); for (i = 0; i < 16; i++){ /* loop unrolled 4 times */ __asm__ volatile ( @@ -646,7 +646,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce) : [loop_end]"r"(loop_end) : "memory" ); - ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); + ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); for (i = 0; i < 16; i++){ /* loop unrolled 8 times */ __asm__ volatile ( @@ -683,7 +683,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce) } } else { // LONG_STOP or ONLY_LONG float *ptr1, *ptr2, *ptr3; - ac->fdsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512); + ac->fdsp->vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512); ptr1 = &saved_ltp[512]; ptr2 = &ac->buf_mdct[1023];