You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	floatdsp: move butterflies_float from dsputil to avfloatdsp.
This makes wmadec/enc, twinvq and mpegaudiodec (i.e. mp2/mp3) independent of dsputil.
This commit is contained in:
		| @@ -1693,9 +1693,9 @@ static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe) | ||||
|             if (cpe->ms_mask[idx] && | ||||
|                     cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) { | ||||
|                 for (group = 0; group < ics->group_len[g]; group++) { | ||||
|                     ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i], | ||||
|                                               ch1 + group * 128 + offsets[i], | ||||
|                                               offsets[i+1] - offsets[i]); | ||||
|                     ac->fdsp.butterflies_float(ch0 + group * 128 + offsets[i], | ||||
|                                                ch1 + group * 128 + offsets[i], | ||||
|                                                offsets[i+1] - offsets[i]); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|   | ||||
| @@ -142,7 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); | ||||
| void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); | ||||
| void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); | ||||
|  | ||||
| void ff_butterflies_float_neon(float *v1, float *v2, int len); | ||||
| float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); | ||||
|  | ||||
| void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, | ||||
| @@ -294,7 +293,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) | ||||
|         c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; | ||||
|     } | ||||
|  | ||||
|     c->butterflies_float          = ff_butterflies_float_neon; | ||||
|     c->scalarproduct_float        = ff_scalarproduct_float_neon; | ||||
|     c->vector_clipf               = ff_vector_clipf_neon; | ||||
|     c->vector_clip_int32          = ff_vector_clip_int32_neon; | ||||
|   | ||||
| @@ -531,18 +531,6 @@ function ff_add_pixels_clamped_neon, export=1 | ||||
|         bx              lr | ||||
| endfunc | ||||
|  | ||||
| function ff_butterflies_float_neon, export=1 | ||||
| 1:      vld1.32         {q0},[r0,:128] | ||||
|         vld1.32         {q1},[r1,:128] | ||||
|         vsub.f32        q2,  q0,  q1 | ||||
|         vadd.f32        q1,  q0,  q1 | ||||
|         vst1.32         {q2},[r1,:128]! | ||||
|         vst1.32         {q1},[r0,:128]! | ||||
|         subs            r2,  r2,  #4 | ||||
|         bgt             1b | ||||
|         bx              lr | ||||
| endfunc | ||||
|  | ||||
| function ff_scalarproduct_float_neon, export=1 | ||||
|         vmov.f32        q2,  #0.0 | ||||
| 1:      vld1.32         {q0},[r0,:128]! | ||||
|   | ||||
| @@ -2353,17 +2353,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) | ||||
| WRAPPER8_16_SQ(rd8x8_c, rd16_c) | ||||
| WRAPPER8_16_SQ(bit8x8_c, bit16_c) | ||||
|  | ||||
| static void butterflies_float_c(float *restrict v1, float *restrict v2, | ||||
|                                 int len) | ||||
| { | ||||
|     int i; | ||||
|     for (i = 0; i < len; i++) { | ||||
|         float t = v1[i] - v2[i]; | ||||
|         v1[i] += v2[i]; | ||||
|         v2[i] = t; | ||||
|     } | ||||
| } | ||||
|  | ||||
| float ff_scalarproduct_float_c(const float *v1, const float *v2, int len) | ||||
| { | ||||
|     float p = 0.0; | ||||
| @@ -2706,7 +2695,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) | ||||
|     c->apply_window_int16 = apply_window_int16_c; | ||||
|     c->vector_clip_int32 = vector_clip_int32_c; | ||||
|     c->scalarproduct_float = ff_scalarproduct_float_c; | ||||
|     c->butterflies_float = butterflies_float_c; | ||||
|  | ||||
|     c->shrink[0]= av_image_copy_plane; | ||||
|     c->shrink[1]= ff_shrink22; | ||||
|   | ||||
| @@ -349,13 +349,6 @@ typedef struct DSPContext { | ||||
|      * @param len length of vectors, multiple of 4 | ||||
|      */ | ||||
|     float (*scalarproduct_float)(const float *v1, const float *v2, int len); | ||||
|     /** | ||||
|      * Calculate the sum and difference of two vectors of floats. | ||||
|      * @param v1  first input vector, sum output, 16-byte aligned | ||||
|      * @param v2  second input vector, difference output, 16-byte aligned | ||||
|      * @param len length of vectors, multiple of 4 | ||||
|      */ | ||||
|     void (*butterflies_float)(float *restrict v1, float *restrict v2, int len); | ||||
|  | ||||
|     /* (I)DCT */ | ||||
|     void (*fdct)(DCTELEM *block/* align 16*/); | ||||
|   | ||||
| @@ -36,6 +36,7 @@ | ||||
| #include <stdio.h> | ||||
|  | ||||
| #include "libavutil/channel_layout.h" | ||||
| #include "libavutil/float_dsp.h" | ||||
| #include "avcodec.h" | ||||
| #include "get_bits.h" | ||||
| #include "dsputil.h" | ||||
| @@ -95,6 +96,7 @@ typedef struct { | ||||
|     GetBitContext gb; | ||||
|  | ||||
|     DSPContext dsp; | ||||
|     AVFloatDSPContext fdsp; | ||||
|     FFTContext fft; | ||||
|     DECLARE_ALIGNED(32, FFTComplex, samples)[COEFFS / 2]; | ||||
|     float *out_samples; | ||||
| @@ -244,6 +246,7 @@ static av_cold int imc_decode_init(AVCodecContext *avctx) | ||||
|         return ret; | ||||
|     } | ||||
|     ff_dsputil_init(&q->dsp, avctx); | ||||
|     avpriv_float_dsp_init(&q->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); | ||||
|     avctx->sample_fmt     = AV_SAMPLE_FMT_FLTP; | ||||
|     avctx->channel_layout = avctx->channels == 1 ? AV_CH_LAYOUT_MONO | ||||
|                                                  : AV_CH_LAYOUT_STEREO; | ||||
| @@ -959,8 +962,8 @@ static int imc_decode_frame(AVCodecContext *avctx, void *data, | ||||
|     } | ||||
|  | ||||
|     if (avctx->channels == 2) { | ||||
|         q->dsp.butterflies_float((float *)q->frame.extended_data[0], | ||||
|                                  (float *)q->frame.extended_data[1], COEFFS); | ||||
|         q->fdsp.butterflies_float((float *)q->frame.extended_data[0], | ||||
|                                   (float *)q->frame.extended_data[1], COEFFS); | ||||
|     } | ||||
|  | ||||
|     *got_frame_ptr   = 1; | ||||
|   | ||||
| @@ -25,6 +25,7 @@ | ||||
|  */ | ||||
|  | ||||
| #include "libavutil/channel_layout.h" | ||||
| #include "libavutil/float_dsp.h" | ||||
| #include "avcodec.h" | ||||
| #include "get_bits.h" | ||||
| #include "internal.h" | ||||
| @@ -82,7 +83,7 @@ typedef struct MPADecodeContext { | ||||
|     int err_recognition; | ||||
|     AVCodecContext* avctx; | ||||
|     MPADSPContext mpadsp; | ||||
|     DSPContext dsp; | ||||
|     AVFloatDSPContext fdsp; | ||||
|     AVFrame frame; | ||||
| } MPADecodeContext; | ||||
|  | ||||
| @@ -434,8 +435,8 @@ static av_cold int decode_init(AVCodecContext * avctx) | ||||
|  | ||||
|     s->avctx = avctx; | ||||
|  | ||||
|     avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); | ||||
|     ff_mpadsp_init(&s->mpadsp); | ||||
|     ff_dsputil_init(&s->dsp, avctx); | ||||
|  | ||||
|     if (avctx->request_sample_fmt == OUT_FMT && | ||||
|         avctx->codec_id != AV_CODEC_ID_MP3ON4) | ||||
| @@ -1157,7 +1158,7 @@ found2: | ||||
|         /* NOTE: the 1/sqrt(2) normalization factor is included in the | ||||
|            global gain */ | ||||
| #if CONFIG_FLOAT | ||||
|        s-> dsp.butterflies_float(g0->sb_hybrid, g1->sb_hybrid, 576); | ||||
|        s->fdsp.butterflies_float(g0->sb_hybrid, g1->sb_hybrid, 576); | ||||
| #else | ||||
|         tab0 = g0->sb_hybrid; | ||||
|         tab1 = g1->sb_hybrid; | ||||
|   | ||||
| @@ -178,7 +178,6 @@ static const ModeTab mode_44_48 = { | ||||
| typedef struct TwinContext { | ||||
|     AVCodecContext *avctx; | ||||
|     AVFrame frame; | ||||
|     DSPContext      dsp; | ||||
|     AVFloatDSPContext fdsp; | ||||
|     FFTContext mdct_ctx[3]; | ||||
|  | ||||
| @@ -693,7 +692,7 @@ static void imdct_output(TwinContext *tctx, enum FrameType ftype, int wtype, | ||||
|     if (tctx->avctx->channels == 2) { | ||||
|         memcpy(&out[1][0],     &prev_buf[2*mtab->size],         size1 * sizeof(out[1][0])); | ||||
|         memcpy(&out[1][size1], &tctx->curr_frame[2*mtab->size], size2 * sizeof(out[1][0])); | ||||
|         tctx->dsp.butterflies_float(out[0], out[1], mtab->size); | ||||
|         tctx->fdsp.butterflies_float(out[0], out[1], mtab->size); | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -1157,7 +1156,6 @@ static av_cold int twin_decode_init(AVCodecContext *avctx) | ||||
|         return -1; | ||||
|     } | ||||
|  | ||||
|     ff_dsputil_init(&tctx->dsp, avctx); | ||||
|     avpriv_float_dsp_init(&tctx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); | ||||
|     if ((ret = init_mdct_win(tctx))) { | ||||
|         av_log(avctx, AV_LOG_ERROR, "Error initializing MDCT\n"); | ||||
|   | ||||
| @@ -82,7 +82,6 @@ int ff_wma_init(AVCodecContext *avctx, int flags2) | ||||
|         || avctx->bit_rate    <= 0) | ||||
|         return -1; | ||||
|  | ||||
|     ff_dsputil_init(&s->dsp, avctx); | ||||
|     ff_fmt_convert_init(&s->fmt_conv, avctx); | ||||
|     avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); | ||||
|  | ||||
|   | ||||
| @@ -132,7 +132,6 @@ typedef struct WMACodecContext { | ||||
|     float lsp_pow_e_table[256]; | ||||
|     float lsp_pow_m_table1[(1 << LSP_POW_BITS)]; | ||||
|     float lsp_pow_m_table2[(1 << LSP_POW_BITS)]; | ||||
|     DSPContext dsp; | ||||
|     FmtConvertContext fmt_conv; | ||||
|     AVFloatDSPContext fdsp; | ||||
|  | ||||
|   | ||||
| @@ -719,7 +719,7 @@ static int wma_decode_block(WMACodecContext *s) | ||||
|             s->channel_coded[0] = 1; | ||||
|         } | ||||
|  | ||||
|         s->dsp.butterflies_float(s->coefs[0], s->coefs[1], s->block_len); | ||||
|         s->fdsp.butterflies_float(s->coefs[0], s->coefs[1], s->block_len); | ||||
|     } | ||||
|  | ||||
| next: | ||||
|   | ||||
| @@ -41,6 +41,8 @@ void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1, | ||||
| void ff_vector_fmul_reverse_neon(float *dst, const float *src0, | ||||
|                                  const float *src1, int len); | ||||
|  | ||||
| void ff_butterflies_float_neon(float *v1, float *v2, int len); | ||||
|  | ||||
| void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) | ||||
| { | ||||
|     fdsp->vector_fmul = ff_vector_fmul_neon; | ||||
| @@ -49,4 +51,5 @@ void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) | ||||
|     fdsp->vector_fmul_window = ff_vector_fmul_window_neon; | ||||
|     fdsp->vector_fmul_add    = ff_vector_fmul_add_neon; | ||||
|     fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon; | ||||
|     fdsp->butterflies_float = ff_butterflies_float_neon; | ||||
| } | ||||
|   | ||||
| @@ -244,3 +244,15 @@ function ff_vector_fmul_reverse_neon, export=1 | ||||
| 2:      vst1.32         {q8-q9},  [r0,:128]! | ||||
|         bx              lr | ||||
| endfunc | ||||
|  | ||||
| function ff_butterflies_float_neon, export=1 | ||||
| 1:      vld1.32         {q0},[r0,:128] | ||||
|         vld1.32         {q1},[r1,:128] | ||||
|         vsub.f32        q2,  q0,  q1 | ||||
|         vadd.f32        q1,  q0,  q1 | ||||
|         vst1.32         {q2},[r1,:128]! | ||||
|         vst1.32         {q1},[r0,:128]! | ||||
|         subs            r2,  r2,  #4 | ||||
|         bgt             1b | ||||
|         bx              lr | ||||
| endfunc | ||||
|   | ||||
| @@ -89,6 +89,18 @@ static void vector_fmul_reverse_c(float *dst, const float *src0, | ||||
|         dst[i] = src0[i] * src1[-i]; | ||||
| } | ||||
|  | ||||
| static void butterflies_float_c(float *restrict v1, float *restrict v2, | ||||
|                                 int len) | ||||
| { | ||||
|     int i; | ||||
|  | ||||
|     for (i = 0; i < len; i++) { | ||||
|         float t = v1[i] - v2[i]; | ||||
|         v1[i] += v2[i]; | ||||
|         v2[i] = t; | ||||
|     } | ||||
| } | ||||
|  | ||||
| void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) | ||||
| { | ||||
|     fdsp->vector_fmul = vector_fmul_c; | ||||
| @@ -98,6 +110,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) | ||||
|     fdsp->vector_fmul_window = vector_fmul_window_c; | ||||
|     fdsp->vector_fmul_add = vector_fmul_add_c; | ||||
|     fdsp->vector_fmul_reverse = vector_fmul_reverse_c; | ||||
|     fdsp->butterflies_float = butterflies_float_c; | ||||
|  | ||||
| #if ARCH_ARM | ||||
|     ff_float_dsp_init_arm(fdsp); | ||||
|   | ||||
| @@ -137,6 +137,15 @@ typedef struct AVFloatDSPContext { | ||||
|      */ | ||||
|     void (*vector_fmul_reverse)(float *dst, const float *src0, | ||||
|                                 const float *src1, int len); | ||||
|  | ||||
|     /** | ||||
|      * Calculate the sum and difference of two vectors of floats. | ||||
|      * | ||||
|      * @param v1  first input vector, sum output, 16-byte aligned | ||||
|      * @param v2  second input vector, difference output, 16-byte aligned | ||||
|      * @param len length of vectors, multiple of 4 | ||||
|      */ | ||||
|     void (*butterflies_float)(float *restrict v1, float *restrict v2, int len); | ||||
| } AVFloatDSPContext; | ||||
|  | ||||
| /** | ||||
|   | ||||
		Reference in New Issue
	
	Block a user