You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	Merge commit '4958f35a2ebc307049ff2104ffb944f5f457feb3'
* commit '4958f35a2ebc307049ff2104ffb944f5f457feb3': dsputil: Move apply_window_int16 to ac3dsp Conflicts: libavcodec/arm/ac3dsp_init_arm.c libavcodec/arm/ac3dsp_neon.S libavcodec/x86/ac3dsp_init.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
		| @@ -239,6 +239,19 @@ static void ac3_downmix_c(float **samples, float (*matrix)[2], | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void apply_window_int16_c(int16_t *output, const int16_t *input, | ||||
|                                  const int16_t *window, unsigned int len) | ||||
| { | ||||
|     int i; | ||||
|     int len2 = len >> 1; | ||||
|  | ||||
|     for (i = 0; i < len2; i++) { | ||||
|         int16_t w       = window[i]; | ||||
|         output[i]       = (MUL16(input[i],       w) + (1 << 14)) >> 15; | ||||
|         output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15; | ||||
|     } | ||||
| } | ||||
|  | ||||
| av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact) | ||||
| { | ||||
|     c->ac3_exponent_min = ac3_exponent_min_c; | ||||
| @@ -253,6 +266,7 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact) | ||||
|     c->sum_square_butterfly_int32 = ac3_sum_square_butterfly_int32_c; | ||||
|     c->sum_square_butterfly_float = ac3_sum_square_butterfly_float_c; | ||||
|     c->downmix = ac3_downmix_c; | ||||
|     c->apply_window_int16 = apply_window_int16_c; | ||||
|  | ||||
|     if (ARCH_ARM) | ||||
|         ff_ac3dsp_init_arm(c, bit_exact); | ||||
|   | ||||
| @@ -134,6 +134,20 @@ typedef struct AC3DSPContext { | ||||
|  | ||||
|     void (*downmix)(float **samples, float (*matrix)[2], int out_ch, | ||||
|                     int in_ch, int len); | ||||
|  | ||||
|     /** | ||||
|      * Apply symmetric window in 16-bit fixed-point. | ||||
|      * @param output destination array | ||||
|      *               constraints: 16-byte aligned | ||||
|      * @param input  source array | ||||
|      *               constraints: 16-byte aligned | ||||
|      * @param window window array | ||||
|      *               constraints: 16-byte aligned, at least len/2 elements | ||||
|      * @param len    full window length | ||||
|      *               constraints: multiple of ? greater than zero | ||||
|      */ | ||||
|     void (*apply_window_int16)(int16_t *output, const int16_t *input, | ||||
|                                const int16_t *window, unsigned int len); | ||||
| } AC3DSPContext; | ||||
|  | ||||
| void ff_ac3dsp_init    (AC3DSPContext *c, int bit_exact); | ||||
|   | ||||
| @@ -70,17 +70,6 @@ av_cold int AC3_NAME(mdct_init)(AC3EncodeContext *s) | ||||
| } | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Apply KBD window to input samples prior to MDCT. | ||||
|  */ | ||||
| static void apply_window(void *dsp, int16_t *output, const int16_t *input, | ||||
|                          const int16_t *window, unsigned int len) | ||||
| { | ||||
|     DSPContext *dsp0 = dsp; | ||||
|     dsp0->apply_window_int16(output, input, window, len); | ||||
| } | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Normalize the input samples to use the maximum available precision. | ||||
|  * This assumes signed 16-bit input samples. | ||||
|   | ||||
| @@ -87,18 +87,6 @@ av_cold int ff_ac3_float_mdct_init(AC3EncodeContext *s) | ||||
| } | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Apply KBD window to input samples prior to MDCT. | ||||
|  */ | ||||
| static void apply_window(void *dsp, float *output, | ||||
|                          const float *input, const float *window, | ||||
|                          unsigned int len) | ||||
| { | ||||
|     AVFloatDSPContext *fdsp = dsp; | ||||
|     fdsp->vector_fmul(output, input, window, len); | ||||
| } | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * Normalize the input samples. | ||||
|  * Not needed for the floating-point encoder. | ||||
|   | ||||
| @@ -34,10 +34,6 @@ | ||||
|  | ||||
| static void scale_coefficients(AC3EncodeContext *s); | ||||
|  | ||||
| static void apply_window(void *dsp, SampleType *output, | ||||
|                          const SampleType *input, const SampleType *window, | ||||
|                          unsigned int len); | ||||
|  | ||||
| static int normalize_samples(AC3EncodeContext *s); | ||||
|  | ||||
| static void clip_coefficients(DSPContext *dsp, CoefType *coef, unsigned int len); | ||||
| @@ -105,11 +101,11 @@ static void apply_mdct(AC3EncodeContext *s) | ||||
|             const SampleType *input_samples = &s->planar_samples[ch][blk * AC3_BLOCK_SIZE]; | ||||
|  | ||||
| #if CONFIG_AC3ENC_FLOAT | ||||
|             apply_window(&s->fdsp, s->windowed_samples, input_samples, | ||||
|                          s->mdct_window, AC3_WINDOW_SIZE); | ||||
|             s->fdsp.vector_fmul(s->windowed_samples, input_samples, | ||||
|                                 s->mdct_window, AC3_WINDOW_SIZE); | ||||
| #else | ||||
|             apply_window(&s->dsp, s->windowed_samples, input_samples, | ||||
|                          s->mdct_window, AC3_WINDOW_SIZE); | ||||
|             s->ac3dsp.apply_window_int16(s->windowed_samples, input_samples, | ||||
|                                          s->mdct_window, AC3_WINDOW_SIZE); | ||||
| #endif | ||||
|  | ||||
|             if (s->fixed_point) | ||||
|   | ||||
| @@ -31,6 +31,8 @@ void ff_ac3_lshift_int16_neon(int16_t *src, unsigned len, unsigned shift); | ||||
| void ff_ac3_rshift_int32_neon(int32_t *src, unsigned len, unsigned shift); | ||||
| void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len); | ||||
| void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs); | ||||
| void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src, | ||||
|                                 const int16_t *window, unsigned n); | ||||
| void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4], | ||||
|                                             const int32_t *coef0, | ||||
|                                             const int32_t *coef1, | ||||
| @@ -64,6 +66,7 @@ av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact) | ||||
|         c->ac3_rshift_int32      = ff_ac3_rshift_int32_neon; | ||||
|         c->float_to_fixed24      = ff_float_to_fixed24_neon; | ||||
|         c->extract_exponents     = ff_ac3_extract_exponents_neon; | ||||
|         c->apply_window_int16    = ff_apply_window_int16_neon; | ||||
|         c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon; | ||||
|         c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_neon; | ||||
|     } | ||||
|   | ||||
| @@ -109,6 +109,29 @@ function ff_ac3_extract_exponents_neon, export=1 | ||||
|         bx              lr | ||||
| endfunc | ||||
|  | ||||
| function ff_apply_window_int16_neon, export=1 | ||||
|         push            {r4,lr} | ||||
|         add             r4,  r1,  r3,  lsl #1 | ||||
|         add             lr,  r0,  r3,  lsl #1 | ||||
|         sub             r4,  r4,  #16 | ||||
|         sub             lr,  lr,  #16 | ||||
|         mov             r12, #-16 | ||||
| 1: | ||||
|         vld1.16         {q0},     [r1,:128]! | ||||
|         vld1.16         {q2},     [r2,:128]! | ||||
|         vld1.16         {q1},     [r4,:128], r12 | ||||
|         vrev64.16       q3,  q2 | ||||
|         vqrdmulh.s16    q0,  q0,  q2 | ||||
|         vqrdmulh.s16    d2,  d2,  d7 | ||||
|         vqrdmulh.s16    d3,  d3,  d6 | ||||
|         vst1.16         {q0},     [r0,:128]! | ||||
|         vst1.16         {q1},     [lr,:128], r12 | ||||
|         subs            r3,  r3,  #16 | ||||
|         bgt             1b | ||||
|  | ||||
|         pop             {r4,pc} | ||||
| endfunc | ||||
|  | ||||
| function ff_ac3_sum_square_butterfly_int32_neon, export=1 | ||||
|         vmov.i64        q0,  #0 | ||||
|         vmov.i64        q1,  #0 | ||||
|   | ||||
| @@ -45,9 +45,6 @@ int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int le | ||||
| int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2, | ||||
|                                              const int16_t *v3, int len, int mul); | ||||
|  | ||||
| void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src, | ||||
|                                 const int16_t *window, unsigned n); | ||||
|  | ||||
| av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) | ||||
| { | ||||
|     const int high_bit_depth = avctx->bits_per_raw_sample > 8; | ||||
| @@ -76,6 +73,4 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) | ||||
|  | ||||
|     c->scalarproduct_int16 = ff_scalarproduct_int16_neon; | ||||
|     c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon; | ||||
|  | ||||
|     c->apply_window_int16 = ff_apply_window_int16_neon; | ||||
| } | ||||
|   | ||||
| @@ -169,29 +169,6 @@ NOVFP   ldr             r2,  [sp] | ||||
|         bx              lr | ||||
| endfunc | ||||
|  | ||||
| function ff_apply_window_int16_neon, export=1 | ||||
|         push            {r4,lr} | ||||
|         add             r4,  r1,  r3,  lsl #1 | ||||
|         add             lr,  r0,  r3,  lsl #1 | ||||
|         sub             r4,  r4,  #16 | ||||
|         sub             lr,  lr,  #16 | ||||
|         mov             r12, #-16 | ||||
| 1: | ||||
|         vld1.16         {q0},     [r1,:128]! | ||||
|         vld1.16         {q2},     [r2,:128]! | ||||
|         vld1.16         {q1},     [r4,:128], r12 | ||||
|         vrev64.16       q3,  q2 | ||||
|         vqrdmulh.s16    q0,  q0,  q2 | ||||
|         vqrdmulh.s16    d2,  d2,  d7 | ||||
|         vqrdmulh.s16    d3,  d3,  d6 | ||||
|         vst1.16         {q0},     [r0,:128]! | ||||
|         vst1.16         {q1},     [lr,:128], r12 | ||||
|         subs            r3,  r3,  #16 | ||||
|         bgt             1b | ||||
|  | ||||
|         pop             {r4,pc} | ||||
| endfunc | ||||
|  | ||||
| function ff_vector_clip_int32_neon, export=1 | ||||
|         vdup.32         q0,  r2 | ||||
|         vdup.32         q1,  r3 | ||||
|   | ||||
| @@ -2495,19 +2495,6 @@ static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, co | ||||
|     return res; | ||||
| } | ||||
|  | ||||
| static void apply_window_int16_c(int16_t *output, const int16_t *input, | ||||
|                                  const int16_t *window, unsigned int len) | ||||
| { | ||||
|     int i; | ||||
|     int len2 = len >> 1; | ||||
|  | ||||
|     for (i = 0; i < len2; i++) { | ||||
|         int16_t w       = window[i]; | ||||
|         output[i]       = (MUL16(input[i],       w) + (1 << 14)) >> 15; | ||||
|         output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15; | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, | ||||
|                                 int32_t max, unsigned int len) | ||||
| { | ||||
| @@ -2799,7 +2786,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) | ||||
|     c->vector_clipf = vector_clipf_c; | ||||
|     c->scalarproduct_int16 = scalarproduct_int16_c; | ||||
|     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; | ||||
|     c->apply_window_int16 = apply_window_int16_c; | ||||
|     c->vector_clip_int32 = vector_clip_int32_c; | ||||
|  | ||||
|     c->shrink[0]= av_image_copy_plane; | ||||
|   | ||||
| @@ -274,20 +274,6 @@ typedef struct DSPContext { | ||||
|      */ | ||||
|     int32_t (*scalarproduct_and_madd_int16)(int16_t *v1/*align 16*/, const int16_t *v2, const int16_t *v3, int len, int mul); | ||||
|  | ||||
|     /** | ||||
|      * Apply symmetric window in 16-bit fixed-point. | ||||
|      * @param output destination array | ||||
|      *               constraints: 16-byte aligned | ||||
|      * @param input  source array | ||||
|      *               constraints: 16-byte aligned | ||||
|      * @param window window array | ||||
|      *               constraints: 16-byte aligned, at least len/2 elements | ||||
|      * @param len    full window length | ||||
|      *               constraints: multiple of ? greater than zero | ||||
|      */ | ||||
|     void (*apply_window_int16)(int16_t *output, const int16_t *input, | ||||
|                                const int16_t *window, unsigned int len); | ||||
|  | ||||
|     /** | ||||
|      * Clip each element in an array of int32_t to a given minimum and maximum value. | ||||
|      * @param dst  destination array | ||||
|   | ||||
| @@ -51,6 +51,19 @@ void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_coefs); | ||||
| void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs); | ||||
| void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs); | ||||
|  | ||||
| void ff_apply_window_int16_round_mmxext(int16_t *output, const int16_t *input, | ||||
|                                         const int16_t *window, unsigned int len); | ||||
| void ff_apply_window_int16_round_sse2(int16_t *output, const int16_t *input, | ||||
|                                       const int16_t *window, unsigned int len); | ||||
| void ff_apply_window_int16_mmxext(int16_t *output, const int16_t *input, | ||||
|                                   const int16_t *window, unsigned int len); | ||||
| void ff_apply_window_int16_sse2(int16_t *output, const int16_t *input, | ||||
|                                 const int16_t *window, unsigned int len); | ||||
| void ff_apply_window_int16_ssse3(int16_t *output, const int16_t *input, | ||||
|                                  const int16_t *window, unsigned int len); | ||||
| void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, | ||||
|                                       const int16_t *window, unsigned int len); | ||||
|  | ||||
| #if ARCH_X86_32 && defined(__INTEL_COMPILER) | ||||
| #       undef HAVE_7REGS | ||||
| #       define HAVE_7REGS 0 | ||||
| @@ -201,6 +214,11 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) | ||||
|     if (EXTERNAL_MMXEXT(cpu_flags)) { | ||||
|         c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; | ||||
|         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext; | ||||
|         if (bit_exact) { | ||||
|             c->apply_window_int16 = ff_apply_window_int16_mmxext; | ||||
|         } else { | ||||
|             c->apply_window_int16 = ff_apply_window_int16_round_mmxext; | ||||
|         } | ||||
|     } | ||||
|     if (EXTERNAL_SSE(cpu_flags)) { | ||||
|         c->float_to_fixed24 = ff_float_to_fixed24_sse; | ||||
| @@ -215,11 +233,19 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) | ||||
|             c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; | ||||
|             c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; | ||||
|         } | ||||
|         if (bit_exact) { | ||||
|             c->apply_window_int16 = ff_apply_window_int16_sse2; | ||||
|         } else if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) { | ||||
|             c->apply_window_int16 = ff_apply_window_int16_round_sse2; | ||||
|         } | ||||
|     } | ||||
|     if (EXTERNAL_SSSE3(cpu_flags)) { | ||||
|         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; | ||||
|         if (!(cpu_flags & AV_CPU_FLAG_ATOM)) { | ||||
|         if (cpu_flags & AV_CPU_FLAG_ATOM) { | ||||
|             c->apply_window_int16 = ff_apply_window_int16_ssse3_atom; | ||||
|         } else { | ||||
|             c->extract_exponents = ff_ac3_extract_exponents_ssse3; | ||||
|             c->apply_window_int16 = ff_apply_window_int16_ssse3; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -86,19 +86,6 @@ int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, | ||||
|                                               const int16_t *v3, | ||||
|                                               int order, int mul); | ||||
|  | ||||
| void ff_apply_window_int16_round_mmxext(int16_t *output, const int16_t *input, | ||||
|                                         const int16_t *window, unsigned int len); | ||||
| void ff_apply_window_int16_round_sse2(int16_t *output, const int16_t *input, | ||||
|                                       const int16_t *window, unsigned int len); | ||||
| void ff_apply_window_int16_mmxext(int16_t *output, const int16_t *input, | ||||
|                                   const int16_t *window, unsigned int len); | ||||
| void ff_apply_window_int16_sse2(int16_t *output, const int16_t *input, | ||||
|                                 const int16_t *window, unsigned int len); | ||||
| void ff_apply_window_int16_ssse3(int16_t *output, const int16_t *input, | ||||
|                                  const int16_t *window, unsigned int len); | ||||
| void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, | ||||
|                                       const int16_t *window, unsigned int len); | ||||
|  | ||||
| void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w); | ||||
| void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w); | ||||
|  | ||||
| @@ -586,12 +573,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, | ||||
|  | ||||
|     c->scalarproduct_int16          = ff_scalarproduct_int16_mmxext; | ||||
|     c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext; | ||||
|  | ||||
|     if (avctx->flags & CODEC_FLAG_BITEXACT) { | ||||
|         c->apply_window_int16 = ff_apply_window_int16_mmxext; | ||||
|     } else { | ||||
|         c->apply_window_int16 = ff_apply_window_int16_round_mmxext; | ||||
|     } | ||||
| #endif /* HAVE_MMXEXT_EXTERNAL */ | ||||
| } | ||||
|  | ||||
| @@ -647,11 +628,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, | ||||
|     } else { | ||||
|         c->vector_clip_int32 = ff_vector_clip_int32_sse2; | ||||
|     } | ||||
|     if (avctx->flags & CODEC_FLAG_BITEXACT) { | ||||
|         c->apply_window_int16 = ff_apply_window_int16_sse2; | ||||
|     } else if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) { | ||||
|         c->apply_window_int16 = ff_apply_window_int16_round_sse2; | ||||
|     } | ||||
|     c->bswap_buf = ff_bswap32_buf_sse2; | ||||
| #endif /* HAVE_SSE2_EXTERNAL */ | ||||
| } | ||||
| @@ -664,10 +640,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, | ||||
|     if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe | ||||
|         c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; | ||||
|  | ||||
|     if (cpu_flags & AV_CPU_FLAG_ATOM) | ||||
|         c->apply_window_int16 = ff_apply_window_int16_ssse3_atom; | ||||
|     else | ||||
|         c->apply_window_int16 = ff_apply_window_int16_ssse3; | ||||
|     if (!(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit | ||||
|         c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; | ||||
|     c->bswap_buf = ff_bswap32_buf_ssse3; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user