Merge commit '9a9e2f1c8aa4539a261625145e5c1f46a8106ac2'

* commit '9a9e2f1c8aa4539a261625145e5c1f46a8106ac2': dsputil: Split audio operations off into a separate context Conflicts: configure libavcodec/takdec.c libavcodec/x86/Makefile libavcodec/x86/dsputil.asm libavcodec/x86/dsputil_init.c libavcodec/x86/dsputil_mmx.c libavcodec/x86/dsputil_x86.h Merged-by: Michael Niedermayer <michaelni@gmx.at>
2025-01-29 22:00:58 +02:00 · 2014-06-22 17:58:28 +02:00 · 2014-06-22 17:58:28 +02:00 · 99497b4683
commit 99497b4683
parent 0dae193d3e 9a9e2f1c8a
41 changed files with 662 additions and 388 deletions
--- a/9
+++ b/9
@ -1795,6 +1795,7 @@ CONFIG_EXTRA="
    aandcttables
    ac3dsp
    audio_frame_queue
+    audiodsp
    blockdsp
    cabac
    dsputil
@ -2004,8 +2005,8 @@ aac_encoder_select="audio_frame_queue mdct sinewin"
 aac_latm_decoder_select="aac_decoder aac_latm_parser"
 ac3_decoder_select="mdct ac3dsp ac3_parser dsputil"
 ac3_fixed_decoder_select="mdct ac3dsp ac3_parser dsputil"
-ac3_encoder_select="mdct ac3dsp dsputil"
-ac3_fixed_encoder_select="mdct ac3dsp dsputil"
+ac3_encoder_select="ac3dsp audiodsp dsputil mdct"
+ac3_fixed_encoder_select="ac3dsp audiodsp dsputil mdct"
 aic_decoder_select="dsputil golomb"
 alac_encoder_select="lpc"
 als_decoder_select="dsputil"
@ -2028,7 +2029,7 @@ binkaudio_rdft_decoder_select="mdct rdft sinewin"
 cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp"
 cllc_decoder_select="dsputil"
 comfortnoise_encoder_select="lpc"
-cook_decoder_select="dsputil mdct sinewin"
+cook_decoder_select="audiodsp mdct sinewin"
 cscd_decoder_select="lzo"
 cscd_decoder_suggest="zlib"
 dca_decoder_select="mdct"
@ -2150,7 +2151,7 @@ svq1_decoder_select="hpeldsp"
 svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc"
 svq3_decoder_select="h264_decoder hpeldsp tpeldsp"
 svq3_decoder_suggest="zlib"
-tak_decoder_select="dsputil"
+tak_decoder_select="audiodsp"
 theora_decoder_select="vp3_decoder"
 thp_decoder_select="mjpeg_decoder"
 tiff_decoder_suggest="zlib"
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@ -33,6 +33,7 @@ OBJS = allcodecs.o                                                      \
 OBJS-$(CONFIG_AANDCTTABLES)            += aandcttab.o
 OBJS-$(CONFIG_AC3DSP)                  += ac3dsp.o
 OBJS-$(CONFIG_AUDIO_FRAME_QUEUE)       += audio_frame_queue.o
+OBJS-$(CONFIG_AUDIODSP)                += audiodsp.o
 OBJS-$(CONFIG_BLOCKDSP)                += blockdsp.o
 OBJS-$(CONFIG_CABAC)                   += cabac.o
 OBJS-$(CONFIG_CRYSTALHD)               += crystalhd.o
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@ -37,6 +37,7 @@
 #include "libavutil/opt.h"
 #include "avcodec.h"
 #include "put_bits.h"
+#include "audiodsp.h"
 #include "ac3dsp.h"
 #include "ac3.h"
 #include "fft.h"
@ -2478,6 +2479,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
    if (ret)
        goto init_fail;

+    ff_audiodsp_init(&s->adsp);
    ff_dsputil_init(&s->dsp, avctx);
    ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT);

--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@ -39,6 +39,7 @@
 #include "fft.h"
 #include "mathops.h"
 #include "put_bits.h"
+#include "audiodsp.h"

 #ifndef CONFIG_AC3ENC_FLOAT
 #define CONFIG_AC3ENC_FLOAT 0
@ -162,6 +163,7 @@ typedef struct AC3EncodeContext {
    AVCodecContext *avctx;                  ///< parent AVCodecContext
    PutBitContext pb;                       ///< bitstream writer context
    DSPContext dsp;
+    AudioDSPContext adsp;
    AVFloatDSPContext fdsp;
    AC3DSPContext ac3dsp;                   ///< AC-3 optimized functions
    FFTContext mdct;                        ///< FFT context for MDCT calculation
--- a/libavcodec/ac3enc_fixed.c
+++ b/libavcodec/ac3enc_fixed.c
@ -29,6 +29,7 @@
 #define FFT_FLOAT 0
 #undef CONFIG_AC3ENC_FLOAT
 #include "internal.h"
+#include "audiodsp.h"
 #include "ac3enc.h"
 #include "eac3enc.h"

@ -111,9 +112,10 @@ static void sum_square_butterfly(AC3EncodeContext *s, int64_t sum[4],
 /*
 * Clip MDCT coefficients to allowable range.
 */
-static void clip_coefficients(DSPContext *dsp, int32_t *coef, unsigned int len)
+static void clip_coefficients(AudioDSPContext *adsp, int32_t *coef,
+                              unsigned int len)
 {
-    dsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len);
+    adsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len);
 }


--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@ -28,6 +28,7 @@

 #define CONFIG_AC3ENC_FLOAT 1
 #include "internal.h"
+#include "audiodsp.h"
 #include "ac3enc.h"
 #include "eac3enc.h"
 #include "kbdwin.h"
@ -117,9 +118,10 @@ static void sum_square_butterfly(AC3EncodeContext *s, float sum[4],
 /*
 * Clip MDCT coefficients to allowable range.
 */
-static void clip_coefficients(DSPContext *dsp, float *coef, unsigned int len)
+static void clip_coefficients(AudioDSPContext *adsp, float *coef,
+                              unsigned int len)
 {
-    dsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len);
+    adsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len);
 }


--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@ -30,6 +30,8 @@

 #include "libavutil/attributes.h"
 #include "libavutil/internal.h"
+
+#include "audiodsp.h"
 #include "internal.h"
 #include "ac3enc.h"
 #include "eac3enc.h"
@ -40,7 +42,8 @@ static void scale_coefficients(AC3EncodeContext *s);

 static int normalize_samples(AC3EncodeContext *s);

-static void clip_coefficients(DSPContext *dsp, CoefType *coef, unsigned int len);
+static void clip_coefficients(AudioDSPContext *adsp, CoefType *coef,
+                              unsigned int len);

 static CoefType calc_cpl_coord(CoefSumType energy_ch, CoefSumType energy_cpl);

@ -164,7 +167,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
        }

        /* coefficients must be clipped in order to be encoded */
-        clip_coefficients(&s->dsp, cpl_coef, num_cpl_coefs);
+        clip_coefficients(&s->adsp, cpl_coef, num_cpl_coefs);
    }

    /* calculate energy in each band in coupling channel and each fbw channel */
@ -407,7 +410,7 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, AVPacket *avpkt,
    if (s->fixed_point)
        scale_coefficients(s);

-    clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1],
+    clip_coefficients(&s->adsp, s->blocks[0].mdct_coef[1],
                      AC3_MAX_COEFS * s->num_blocks * s->channels);

    s->cpl_on = s->cpl_enabled;
--- a/libavcodec/acelp_pitch_delay.c
+++ b/libavcodec/acelp_pitch_delay.c
@ -27,6 +27,7 @@
 #include "avcodec.h"
 #include "acelp_pitch_delay.h"
 #include "celp_math.h"
+#include "audiodsp.h"

 int ff_acelp_decode_8bit_to_1st_delay3(int ac_index)
 {
@ -91,7 +92,7 @@ void ff_acelp_update_past_gain(
 }

 int16_t ff_acelp_decode_gain_code(
-    DSPContext *dsp,
+    AudioDSPContext *adsp,
    int gain_corr_factor,
    const int16_t* fc_v,
    int mr_energy,
@ -118,7 +119,7 @@ int16_t ff_acelp_decode_gain_code(
           );
 #else
    mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) /
-                sqrt(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size));
+                sqrt(adsp->scalarproduct_int16(fc_v, fc_v, subframe_size));
    return mr_energy >> 12;
 #endif
 }
--- a/libavcodec/acelp_pitch_delay.h
+++ b/libavcodec/acelp_pitch_delay.h
@ -24,7 +24,8 @@
 #define AVCODEC_ACELP_PITCH_DELAY_H

 #include <stdint.h>
-#include "dsputil.h"
+
+#include "audiodsp.h"

 #define PITCH_DELAY_MIN             20
 #define PITCH_DELAY_MAX             143
@ -139,7 +140,7 @@ void ff_acelp_update_past_gain(
 /**
 * @brief Decode the adaptive codebook gain and add
 *        correction (4.1.5 and 3.9.1 of G.729).
- * @param dsp initialized dsputil context
+ * @param adsp initialized audio DSP context
 * @param gain_corr_factor gain correction factor (2.13)
 * @param fc_v fixed-codebook vector (2.13)
 * @param mr_energy mean innovation energy and fixed-point correction (7.13)
@ -208,7 +209,7 @@ void ff_acelp_update_past_gain(
 * @remark The routine is used in G.729 and AMR (all modes).
 */
 int16_t ff_acelp_decode_gain_code(
-    DSPContext *dsp,
+    AudioDSPContext *adsp,
    int gain_corr_factor,
    const int16_t* fc_v,
    int mr_energy,
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@ -4,6 +4,7 @@ OBJS                                   += arm/fmtconvert_init_arm.o

 OBJS-$(CONFIG_AC3DSP)                  += arm/ac3dsp_init_arm.o         \
                                          arm/ac3dsp_arm.o
+OBJS-$(CONFIG_AUDIODSP)                += arm/audiodsp_init_arm.o
 OBJS-$(CONFIG_BLOCKDSP)                += arm/blockdsp_init_arm.o
 OBJS-$(CONFIG_DSPUTIL)                 += arm/dsputil_init_arm.o        \
                                          arm/dsputil_arm.o             \
@ -80,11 +81,13 @@ VFP-OBJS-$(CONFIG_DCA_DECODER)         += arm/dcadsp_vfp.o              \
 NEON-OBJS                              += arm/fmtconvert_neon.o

 NEON-OBJS-$(CONFIG_AC3DSP)             += arm/ac3dsp_neon.o
+NEON-OBJS-$(CONFIG_AUDIODSP)           += arm/audiodsp_init_neon.o      \
+                                          arm/audiodsp_neon.o           \
+                                          arm/int_neon.o
 NEON-OBJS-$(CONFIG_BLOCKDSP)           += arm/blockdsp_init_neon.o      \
                                          arm/blockdsp_neon.o
 NEON-OBJS-$(CONFIG_DSPUTIL)            += arm/dsputil_init_neon.o       \
                                          arm/dsputil_neon.o            \
-                                          arm/int_neon.o                \
                                          arm/simple_idct_neon.o
 NEON-OBJS-$(CONFIG_FFT)                += arm/fft_neon.o                \
                                          arm/fft_fixed_neon.o
--- a/libavcodec/arm/audiodsp_arm.h
+++ b/libavcodec/arm/audiodsp_arm.h
@ -0,0 +1,26 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ARM_AUDIODSP_ARM_H
+#define AVCODEC_ARM_AUDIODSP_ARM_H
+
+#include "libavcodec/audiodsp.h"
+
+void ff_audiodsp_init_neon(AudioDSPContext *c);
+
+#endif /* AVCODEC_ARM_AUDIODSP_ARM_H */
--- a/libavcodec/arm/audiodsp_init_arm.c
+++ b/libavcodec/arm/audiodsp_init_arm.c
@ -0,0 +1,33 @@
+/*
+ * ARM optimized audio functions
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/audiodsp.h"
+#include "audiodsp_arm.h"
+
+av_cold void ff_audiodsp_init_arm(AudioDSPContext *c)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
+        ff_audiodsp_init_neon(c);
+}
--- a/libavcodec/arm/audiodsp_init_neon.c
+++ b/libavcodec/arm/audiodsp_init_neon.c
@ -0,0 +1,41 @@
+/*
+ * ARM NEON optimised audio functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavcodec/audiodsp.h"
+#include "audiodsp_arm.h"
+
+void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
+                          int len);
+void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
+                               int32_t max, unsigned int len);
+
+int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
+
+av_cold void ff_audiodsp_init_neon(AudioDSPContext *c)
+{
+    c->vector_clip_int32 = ff_vector_clip_int32_neon;
+    c->vector_clipf      = ff_vector_clipf_neon;
+
+    c->scalarproduct_int16 = ff_scalarproduct_int16_neon;
+}
--- a/libavcodec/arm/audiodsp_neon.S
+++ b/libavcodec/arm/audiodsp_neon.S
@ -0,0 +1,64 @@
+/*
+ * ARM NEON optimised audio functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_vector_clipf_neon, export=1
+VFP     vdup.32         q1,  d0[1]
+VFP     vdup.32         q0,  d0[0]
+NOVFP   vdup.32         q0,  r2
+NOVFP   vdup.32         q1,  r3
+NOVFP   ldr             r2,  [sp]
+        vld1.f32        {q2},[r1,:128]!
+        vmin.f32        q10, q2,  q1
+        vld1.f32        {q3},[r1,:128]!
+        vmin.f32        q11, q3,  q1
+1:      vmax.f32        q8,  q10, q0
+        vmax.f32        q9,  q11, q0
+        subs            r2,  r2,  #8
+        beq             2f
+        vld1.f32        {q2},[r1,:128]!
+        vmin.f32        q10, q2,  q1
+        vld1.f32        {q3},[r1,:128]!
+        vmin.f32        q11, q3,  q1
+        vst1.f32        {q8},[r0,:128]!
+        vst1.f32        {q9},[r0,:128]!
+        b               1b
+2:      vst1.f32        {q8},[r0,:128]!
+        vst1.f32        {q9},[r0,:128]!
+        bx              lr
+endfunc
+
+function ff_vector_clip_int32_neon, export=1
+        vdup.32         q0,  r2
+        vdup.32         q1,  r3
+        ldr             r2,  [sp]
+1:
+        vld1.32         {q2-q3},  [r1,:128]!
+        vmin.s32        q2,  q2,  q1
+        vmin.s32        q3,  q3,  q1
+        vmax.s32        q2,  q2,  q0
+        vmax.s32        q3,  q3,  q0
+        vst1.32         {q2-q3},  [r0,:128]!
+        subs            r2,  r2,  #8
+        bgt             1b
+        bx              lr
+endfunc
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@ -34,13 +34,6 @@ void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
 void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
 void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);

-void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
-                          int len);
-void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
-                               int32_t max, unsigned int len);
-
-int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len);
-
 av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx,
                                  unsigned high_bit_depth)
 {
@ -58,9 +51,4 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx,
    c->add_pixels_clamped        = ff_add_pixels_clamped_neon;
    c->put_pixels_clamped        = ff_put_pixels_clamped_neon;
    c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
-
-    c->vector_clipf      = ff_vector_clipf_neon;
-    c->vector_clip_int32 = ff_vector_clip_int32_neon;
-
-    c->scalarproduct_int16 = ff_scalarproduct_int16_neon;
 }
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@ -126,45 +126,3 @@ function ff_add_pixels_clamped_neon, export=1
        vst1.8          {d6},    [r3,:64], r2
        bx              lr
 endfunc
-
-function ff_vector_clipf_neon, export=1
-VFP     vdup.32         q1,  d0[1]
-VFP     vdup.32         q0,  d0[0]
-NOVFP   vdup.32         q0,  r2
-NOVFP   vdup.32         q1,  r3
-NOVFP   ldr             r2,  [sp]
-        vld1.f32        {q2},[r1,:128]!
-        vmin.f32        q10, q2,  q1
-        vld1.f32        {q3},[r1,:128]!
-        vmin.f32        q11, q3,  q1
-1:      vmax.f32        q8,  q10, q0
-        vmax.f32        q9,  q11, q0
-        subs            r2,  r2,  #8
-        beq             2f
-        vld1.f32        {q2},[r1,:128]!
-        vmin.f32        q10, q2,  q1
-        vld1.f32        {q3},[r1,:128]!
-        vmin.f32        q11, q3,  q1
-        vst1.f32        {q8},[r0,:128]!
-        vst1.f32        {q9},[r0,:128]!
-        b               1b
-2:      vst1.f32        {q8},[r0,:128]!
-        vst1.f32        {q9},[r0,:128]!
-        bx              lr
-endfunc
-
-function ff_vector_clip_int32_neon, export=1
-        vdup.32         q0,  r2
-        vdup.32         q1,  r3
-        ldr             r2,  [sp]
-1:
-        vld1.32         {q2-q3},  [r1,:128]!
-        vmin.s32        q2,  q2,  q1
-        vmin.s32        q3,  q3,  q1
-        vmax.s32        q2,  q2,  q0
-        vmax.s32        q3,  q3,  q0
-        vst1.32         {q2-q3},  [r0,:128]!
-        subs            r2,  r2,  #8
-        bgt             1b
-        bx              lr
-endfunc
--- a/libavcodec/audiodsp.c
+++ b/libavcodec/audiodsp.c
@ -0,0 +1,118 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/common.h"
+#include "audiodsp.h"
+
+static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
+                                   uint32_t maxi, uint32_t maxisign)
+{
+    if (a > mini)
+        return mini;
+    else if ((a ^ (1U << 31)) > maxisign)
+        return maxi;
+    else
+        return a;
+}
+
+static void vector_clipf_c_opposite_sign(float *dst, const float *src,
+                                         float *min, float *max, int len)
+{
+    int i;
+    uint32_t mini        = *(uint32_t *) min;
+    uint32_t maxi        = *(uint32_t *) max;
+    uint32_t maxisign    = maxi ^ (1U << 31);
+    uint32_t *dsti       = (uint32_t *) dst;
+    const uint32_t *srci = (const uint32_t *) src;
+
+    for (i = 0; i < len; i += 8) {
+        dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
+        dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
+        dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
+        dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
+        dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
+        dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
+        dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
+        dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
+    }
+}
+
+static void vector_clipf_c(float *dst, const float *src,
+                           float min, float max, int len)
+{
+    int i;
+
+    if (min < 0 && max > 0) {
+        vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
+    } else {
+        for (i = 0; i < len; i += 8) {
+            dst[i]     = av_clipf(src[i], min, max);
+            dst[i + 1] = av_clipf(src[i + 1], min, max);
+            dst[i + 2] = av_clipf(src[i + 2], min, max);
+            dst[i + 3] = av_clipf(src[i + 3], min, max);
+            dst[i + 4] = av_clipf(src[i + 4], min, max);
+            dst[i + 5] = av_clipf(src[i + 5], min, max);
+            dst[i + 6] = av_clipf(src[i + 6], min, max);
+            dst[i + 7] = av_clipf(src[i + 7], min, max);
+        }
+    }
+}
+
+static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2,
+                                     int order)
+{
+    int res = 0;
+
+    while (order--)
+        res += *v1++ **v2++;
+
+    return res;
+}
+
+static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
+                                int32_t max, unsigned int len)
+{
+    do {
+        *dst++ = av_clip(*src++, min, max);
+        *dst++ = av_clip(*src++, min, max);
+        *dst++ = av_clip(*src++, min, max);
+        *dst++ = av_clip(*src++, min, max);
+        *dst++ = av_clip(*src++, min, max);
+        *dst++ = av_clip(*src++, min, max);
+        *dst++ = av_clip(*src++, min, max);
+        *dst++ = av_clip(*src++, min, max);
+        len   -= 8;
+    } while (len > 0);
+}
+
+av_cold void ff_audiodsp_init(AudioDSPContext *c)
+{
+    c->scalarproduct_int16 = scalarproduct_int16_c;
+    c->vector_clip_int32   = vector_clip_int32_c;
+    c->vector_clipf        = vector_clipf_c;
+
+    if (ARCH_ARM)
+        ff_audiodsp_init_arm(c);
+    if (ARCH_PPC)
+        ff_audiodsp_init_ppc(c);
+    if (ARCH_X86)
+        ff_audiodsp_init_x86(c);
+}
--- a/libavcodec/audiodsp.h
+++ b/libavcodec/audiodsp.h
@ -0,0 +1,59 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AUDIODSP_H
+#define AVCODEC_AUDIODSP_H
+
+#include <stdint.h>
+
+typedef struct AudioDSPContext {
+    /**
+     * Calculate scalar product of two vectors.
+     * @param len length of vectors, should be multiple of 16
+     */
+    int32_t (*scalarproduct_int16)(const int16_t *v1,
+                                   const int16_t *v2 /* align 16 */, int len);
+
+    /**
+     * Clip each element in an array of int32_t to a given minimum and
+     * maximum value.
+     * @param dst  destination array
+     *             constraints: 16-byte aligned
+     * @param src  source array
+     *             constraints: 16-byte aligned
+     * @param min  minimum value
+     *             constraints: must be in the range [-(1 << 24), 1 << 24]
+     * @param max  maximum value
+     *             constraints: must be in the range [-(1 << 24), 1 << 24]
+     * @param len  number of elements in the array
+     *             constraints: multiple of 32 greater than zero
+     */
+    void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
+                              int32_t max, unsigned int len);
+    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
+    void (*vector_clipf)(float *dst /* align 16 */,
+                         const float *src /* align 16 */,
+                         float min, float max, int len /* align 16 */);
+} AudioDSPContext;
+
+void ff_audiodsp_init(AudioDSPContext *c);
+void ff_audiodsp_init_arm(AudioDSPContext *c);
+void ff_audiodsp_init_ppc(AudioDSPContext *c);
+void ff_audiodsp_init_x86(AudioDSPContext *c);
+
+#endif /* AVCODEC_AUDIODSP_H */
--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c
@ -44,9 +44,10 @@

 #include "libavutil/channel_layout.h"
 #include "libavutil/lfg.h"
+
+#include "audiodsp.h"
 #include "avcodec.h"
 #include "get_bits.h"
-#include "dsputil.h"
 #include "bytestream.h"
 #include "fft.h"
 #include "internal.h"
@ -123,7 +124,7 @@ typedef struct cook {
    void (*saturate_output)(struct cook *q, float *out);

    AVCodecContext*     avctx;
-    DSPContext          dsp;
+    AudioDSPContext     adsp;
    GetBitContext       gb;
    /* stream data */
    int                 num_vectors;
@ -873,8 +874,8 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p,
 */
 static void saturate_output_float(COOKContext *q, float *out)
 {
-    q->dsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel,
-                        -1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8));
+    q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel,
+                         -1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8));
 }


@ -1072,7 +1073,7 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
    /* Initialize RNG. */
    av_lfg_init(&q->random_state, 0);

-    ff_dsputil_init(&q->dsp, avctx);
+    ff_audiodsp_init(&q->adsp);

    while (edata_ptr < edata_ptr_end) {
        /* 8 for mono, 16 for stereo, ? for multichannel
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@ -1345,87 +1345,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
 WRAPPER8_16_SQ(bit8x8_c, bit16_c)

-static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
-                                   uint32_t maxi, uint32_t maxisign)
-{
-    if (a > mini)
-        return mini;
-    else if ((a ^ (1U << 31)) > maxisign)
-        return maxi;
-    else
-        return a;
-}
-
-static void vector_clipf_c_opposite_sign(float *dst, const float *src,
-                                         float *min, float *max, int len)
-{
-    int i;
-    uint32_t mini        = *(uint32_t *) min;
-    uint32_t maxi        = *(uint32_t *) max;
-    uint32_t maxisign    = maxi ^ (1U << 31);
-    uint32_t *dsti       = (uint32_t *) dst;
-    const uint32_t *srci = (const uint32_t *) src;
-
-    for (i = 0; i < len; i += 8) {
-        dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
-        dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
-        dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
-        dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
-        dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
-        dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
-        dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
-        dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
-    }
-}
-
-static void vector_clipf_c(float *dst, const float *src,
-                           float min, float max, int len)
-{
-    int i;
-
-    if (min < 0 && max > 0) {
-        vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
-    } else {
-        for (i = 0; i < len; i += 8) {
-            dst[i]     = av_clipf(src[i], min, max);
-            dst[i + 1] = av_clipf(src[i + 1], min, max);
-            dst[i + 2] = av_clipf(src[i + 2], min, max);
-            dst[i + 3] = av_clipf(src[i + 3], min, max);
-            dst[i + 4] = av_clipf(src[i + 4], min, max);
-            dst[i + 5] = av_clipf(src[i + 5], min, max);
-            dst[i + 6] = av_clipf(src[i + 6], min, max);
-            dst[i + 7] = av_clipf(src[i + 7], min, max);
-        }
-    }
-}
-
-static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2,
-                                     int order)
-{
-    int res = 0;
-
-    while (order--)
-        res += *v1++ **v2++;
-
-    return res;
-}
-
-static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
-                                int32_t max, unsigned int len)
-{
-    do {
-        *dst++ = av_clip(*src++, min, max);
-        *dst++ = av_clip(*src++, min, max);
-        *dst++ = av_clip(*src++, min, max);
-        *dst++ = av_clip(*src++, min, max);
-        *dst++ = av_clip(*src++, min, max);
-        *dst++ = av_clip(*src++, min, max);
-        *dst++ = av_clip(*src++, min, max);
-        *dst++ = av_clip(*src++, min, max);
-        len   -= 8;
-    } while (len > 0);
-}
-
 static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
 {
    ff_j_rev_dct(block);
@ -1661,10 +1580,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
    c->try_8x8basis = try_8x8basis_c;
    c->add_8x8basis = add_8x8basis_c;

-    c->scalarproduct_int16 = scalarproduct_int16_c;
-    c->vector_clip_int32   = vector_clip_int32_c;
-    c->vector_clipf        = vector_clipf_c;
-
    c->shrink[0] = av_image_copy_plane;
    c->shrink[1] = ff_shrink22;
    c->shrink[2] = ff_shrink44;
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@ -140,11 +140,6 @@ typedef struct DSPContext {
    void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
    void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len);

-    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
-    void (*vector_clipf)(float *dst /* align 16 */,
-                         const float *src /* align 16 */,
-                         float min, float max, int len /* align 16 */);
-
    /* (I)DCT */
    void (*fdct)(int16_t *block /* align 16 */);
    void (*fdct248)(int16_t *block /* align 16 */);
@ -204,30 +199,6 @@ typedef struct DSPContext {

    void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src,
                      int src_wrap, int width, int height);
-
-    /**
-     * Calculate scalar product of two vectors.
-     * @param len length of vectors, should be multiple of 16
-     */
-    int32_t (*scalarproduct_int16)(const int16_t *v1,
-                                   const int16_t *v2 /* align 16 */, int len);
-
-    /**
-     * Clip each element in an array of int32_t to a given minimum and
-     * maximum value.
-     * @param dst  destination array
-     *             constraints: 16-byte aligned
-     * @param src  source array
-     *             constraints: 16-byte aligned
-     * @param min  minimum value
-     *             constraints: must be in the range [-(1 << 24), 1 << 24]
-     * @param max  maximum value
-     *             constraints: must be in the range [-(1 << 24), 1 << 24]
-     * @param len  number of elements in the array
-     *             constraints: multiple of 32 greater than zero
-     */
-    void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
-                              int32_t max, unsigned int len);
 } DSPContext;

 void ff_dsputil_static_init(void);
--- a/libavcodec/g729dec.c
+++ b/libavcodec/g729dec.c
@ -25,7 +25,7 @@
 #include "avcodec.h"
 #include "libavutil/avutil.h"
 #include "get_bits.h"
-#include "dsputil.h"
+#include "audiodsp.h"
 #include "internal.h"


@ -100,7 +100,7 @@ typedef struct {
 } G729FormatDescription;

 typedef struct {
-    DSPContext dsp;
+    AudioDSPContext adsp;

    /// past excitation signal buffer
    int16_t exc_base[2*SUBFRAME_SIZE+PITCH_DELAY_MAX+INTERPOL_LEN];
@ -381,8 +381,8 @@ static av_cold int decoder_init(AVCodecContext * avctx)
    for(i=0; i<4; i++)
        ctx->quant_energy[i] = -14336; // -14 in (5.10)

-    ff_dsputil_init(&ctx->dsp, avctx);
-    ctx->dsp.scalarproduct_int16 = scalarproduct_int16_c;
+    ff_audiodsp_init(&ctx->adsp);
+    ctx->adsp.scalarproduct_int16 = scalarproduct_int16_c;

    return 0;
 }
@ -578,7 +578,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
            }

            /* Decode the fixed-codebook gain. */
-            ctx->past_gain_code[0] = ff_acelp_decode_gain_code(&ctx->dsp, gain_corr_factor,
+            ctx->past_gain_code[0] = ff_acelp_decode_gain_code(&ctx->adsp, gain_corr_factor,
                                                               fc, MR_ENERGY,
                                                               ctx->quant_energy,
                                                               ma_prediction_coeff,
@ -668,7 +668,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,

        /* Call postfilter and also update voicing decision for use in next frame. */
        ff_g729_postfilter(
-                &ctx->dsp,
+                &ctx->adsp,
                &ctx->ht_prev_data,
                &is_periodic,
                &lp[i][0],
--- a/libavcodec/g729postfilter.c
+++ b/libavcodec/g729postfilter.c
@ -107,7 +107,7 @@ static void residual_filter(int16_t* out, const int16_t* filter_coeffs, const in
 *
 * \return 0 if long-term prediction gain is less than 3dB, 1 -  otherwise
 */
-static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
+static int16_t long_term_filter(AudioDSPContext *adsp, int pitch_delay_int,
                                const int16_t* residual, int16_t *residual_filt,
                                int subframe_size)
 {
@ -161,7 +161,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
    /* Start of best delay searching code */
    gain_num = 0;

-    ener = dsp->scalarproduct_int16(sig_scaled + RES_PREV_DATA_SIZE,
+    ener = adsp->scalarproduct_int16(sig_scaled + RES_PREV_DATA_SIZE,
                                    sig_scaled + RES_PREV_DATA_SIZE,
                                    subframe_size);
    if (ener) {
@ -190,7 +190,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
        corr_int_num = 0;
        best_delay_int = pitch_delay_int - 1;
        for (i = pitch_delay_int - 1; i <= pitch_delay_int + 1; i++) {
-            sum = dsp->scalarproduct_int16(sig_scaled + RES_PREV_DATA_SIZE,
+            sum = adsp->scalarproduct_int16(sig_scaled + RES_PREV_DATA_SIZE,
                                           sig_scaled + RES_PREV_DATA_SIZE - i,
                                           subframe_size);
            if (sum > corr_int_num) {
@ -200,7 +200,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
        }
        if (corr_int_num) {
            /* Compute denominator of pseudo-normalized correlation R'(0). */
-            corr_int_den = dsp->scalarproduct_int16(sig_scaled - best_delay_int + RES_PREV_DATA_SIZE,
+            corr_int_den = adsp->scalarproduct_int16(sig_scaled - best_delay_int + RES_PREV_DATA_SIZE,
                                                    sig_scaled - best_delay_int + RES_PREV_DATA_SIZE,
                                                    subframe_size);

@ -227,7 +227,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
              Also compute maximum value of above denominators over all k. */
            tmp = corr_int_den;
            for (k = 0; k < ANALYZED_FRAC_DELAYS; k++) {
-                sum = dsp->scalarproduct_int16(&delayed_signal[k][1],
+                sum = adsp->scalarproduct_int16(&delayed_signal[k][1],
                                               &delayed_signal[k][1],
                                               subframe_size - 1);
                corr_den[k][0] = sum + delayed_signal[k][0            ] * delayed_signal[k][0            ];
@ -255,7 +255,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
                        int gain_num_short_square;
                        /* Compute numerator of pseudo-normalized
                           correlation R'(k). */
-                        sum = dsp->scalarproduct_int16(&delayed_signal[k][i],
+                        sum = adsp->scalarproduct_int16(&delayed_signal[k][i],
                                                       sig_scaled + RES_PREV_DATA_SIZE,
                                                       subframe_size);
                        gain_num_short = FFMAX(sum >> sh_gain_num, 0);
@ -312,7 +312,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
                             LONG_INT_FILT_LEN,
                             subframe_size + 1);
        /* Compute R'(k) correlation's numerator. */
-        sum = dsp->scalarproduct_int16(residual_filt,
+        sum = adsp->scalarproduct_int16(residual_filt,
                                       sig_scaled + RES_PREV_DATA_SIZE,
                                       subframe_size);

@ -327,7 +327,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
        }

        /* Compute R'(k) correlation's denominator. */
-        sum = dsp->scalarproduct_int16(residual_filt, residual_filt, subframe_size);
+        sum = adsp->scalarproduct_int16(residual_filt, residual_filt, subframe_size);

        tmp = FFMAX(av_log2(sum) - 14, 0);
        sum >>= tmp;
@ -421,7 +421,7 @@ static int16_t long_term_filter(DSPContext *dsp, int pitch_delay_int,
 *
 * \note All members of lp_gn, except 10-19 must be equal to zero.
 */
-static int16_t get_tilt_comp(DSPContext *dsp, int16_t *lp_gn,
+static int16_t get_tilt_comp(AudioDSPContext *adsp, int16_t *lp_gn,
                             const int16_t *lp_gd, int16_t* speech,
                             int subframe_size)
 {
@ -437,8 +437,8 @@ static int16_t get_tilt_comp(DSPContext *dsp, int16_t *lp_gn,
    /* Now lp_gn (starting with 10) contains impulse response
       of A(z/FORMANT_PP_FACTOR_NUM)/A(z/FORMANT_PP_FACTOR_DEN) filter. */

-    rh0 = dsp->scalarproduct_int16(lp_gn + 10, lp_gn + 10, 20);
-    rh1 = dsp->scalarproduct_int16(lp_gn + 10, lp_gn + 11, 20);
+    rh0 = adsp->scalarproduct_int16(lp_gn + 10, lp_gn + 10, 20);
+    rh1 = adsp->scalarproduct_int16(lp_gn + 10, lp_gn + 11, 20);

    /* downscale to avoid overflow */
    temp = av_log2(rh0) - 14;
@ -511,7 +511,7 @@ static int16_t apply_tilt_comp(int16_t* out, int16_t* res_pst, int refl_coeff,
    return tmp;
 }

-void ff_g729_postfilter(DSPContext *dsp, int16_t* ht_prev_data, int* voicing,
+void ff_g729_postfilter(AudioDSPContext *adsp, int16_t* ht_prev_data, int* voicing,
                     const int16_t *lp_filter_coeffs, int pitch_delay_int,
                     int16_t* residual, int16_t* res_filter_data,
                     int16_t* pos_filter_data, int16_t *speech, int subframe_size)
@ -541,7 +541,7 @@ void ff_g729_postfilter(DSPContext *dsp, int16_t* ht_prev_data, int* voicing,

    /* long-term filter. If long-term prediction gain is larger than 3dB (returned value is
       nonzero) then declare current subframe as periodic. */
-    *voicing = FFMAX(*voicing, long_term_filter(dsp, pitch_delay_int,
+    *voicing = FFMAX(*voicing, long_term_filter(adsp, pitch_delay_int,
                                                residual, residual_filt_buf + 10,
                                                subframe_size));

@ -549,7 +549,7 @@ void ff_g729_postfilter(DSPContext *dsp, int16_t* ht_prev_data, int* voicing,
    memmove(residual, residual + subframe_size, RES_PREV_DATA_SIZE * sizeof(int16_t));

    /* short-term filter tilt compensation */
-    tilt_comp_coeff = get_tilt_comp(dsp, lp_gn, lp_gd, residual_filt_buf + 10, subframe_size);
+    tilt_comp_coeff = get_tilt_comp(adsp, lp_gn, lp_gd, residual_filt_buf + 10, subframe_size);

    /* Apply second half of short-term postfilter: 1/A(z/FORMANT_PP_FACTOR_DEN) */
    ff_celp_lp_synthesis_filter(pos_filter_data + 10, lp_gd + 1,
--- a/libavcodec/g729postfilter.h
+++ b/libavcodec/g729postfilter.h
@ -22,7 +22,7 @@
 #define FFMPEG_G729POSTFILTER_H

 #include <stdint.h>
-#include "dsputil.h"
+#include "audiodsp.h"

 /**
 * tilt compensation factor (G.729, k1>0)
@ -94,7 +94,7 @@
 *   Short-term postfilter (4.2.2).
 *   Tilt-compensation (4.2.3)
 */
-void ff_g729_postfilter(DSPContext *dsp, int16_t* ht_prev_data, int* voicing,
+void ff_g729_postfilter(AudioDSPContext *adsp, int16_t* ht_prev_data, int* voicing,
                     const int16_t *lp_filter_coeffs, int pitch_delay_int,
                     int16_t* residual, int16_t* res_filter_data,
                     int16_t* pos_filter_data, int16_t *speech,
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@ -1,5 +1,6 @@
 OBJS                                   += ppc/fmtconvert_altivec.o      \

+OBJS-$(CONFIG_AUDIODSP)                += ppc/audiodsp.o
 OBJS-$(CONFIG_BLOCKDSP)                += ppc/blockdsp.o
 OBJS-$(CONFIG_DSPUTIL)                 += ppc/dsputil_ppc.o
 OBJS-$(CONFIG_FFT)                     += ppc/fft_altivec.o
@ -24,7 +25,6 @@ ALTIVEC-OBJS-$(CONFIG_DSPUTIL)         += ppc/dsputil_altivec.o         \
                                          ppc/fdct_altivec.o            \
                                          ppc/gmc_altivec.o             \
                                          ppc/idct_altivec.o            \
-                                          ppc/int_altivec.o             \

 FFT-OBJS-$(HAVE_GNU_AS)                += ppc/fft_altivec_s.o
 FFT-OBJS-$(HAVE_VSX)                   += ppc/fft_vsx.o
--- a/libavcodec/ppc/int_altivec.c
+++ b/libavcodec/ppc/int_altivec.c
@ -20,7 +20,7 @@

 /**
 * @file
- * miscellaneous integer operations
+ * miscellaneous audio operations
 */

 #include "config.h"
@ -29,10 +29,13 @@
 #endif

 #include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/ppc/cpu.h"
 #include "libavutil/ppc/types_altivec.h"
 #include "libavutil/ppc/util_altivec.h"
-#include "libavcodec/dsputil.h"
-#include "dsputil_altivec.h"
+#include "libavcodec/audiodsp.h"
+
+#if HAVE_ALTIVEC

 static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
                                           int order)
@ -56,7 +59,14 @@ static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
    return ires;
 }

-av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx)
+#endif /* HAVE_ALTIVEC */
+
+av_cold void ff_audiodsp_init_ppc(AudioDSPContext *c)
 {
+#if HAVE_ALTIVEC
+    if (!PPC_ALTIVEC(av_get_cpu_flags()))
+        return;
+
    c->scalarproduct_int16 = scalarproduct_int16_altivec;
+#endif /* HAVE_ALTIVEC */
 }
--- a/libavcodec/ppc/dsputil_altivec.h
+++ b/libavcodec/ppc/dsputil_altivec.h
@ -36,6 +36,5 @@ void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);

 void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx,
                             unsigned high_bit_depth);
-void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx);

 #endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@ -35,7 +35,7 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx,
    int mm_flags = av_get_cpu_flags();
    if (PPC_ALTIVEC(mm_flags)) {
        ff_dsputil_init_altivec(c, avctx, high_bit_depth);
-        ff_int_init_altivec(c, avctx);
+
        c->gmc1 = ff_gmc1_altivec;

        if (!high_bit_depth) {
--- a/libavcodec/ra144.c
+++ b/libavcodec/ra144.c
@ -1681,9 +1681,9 @@ unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy)
 }

 /** inverse root mean square */
-int ff_irms(DSPContext *dsp, const int16_t *data)
+int ff_irms(AudioDSPContext *adsp, const int16_t *data)
 {
-    unsigned int sum = dsp->scalarproduct_int16(data, data, BLOCKSIZE);
+    unsigned int sum = adsp->scalarproduct_int16(data, data, BLOCKSIZE);

    if (sum == 0)
        return 0; /* OOPS - division by zero */
@ -1701,7 +1701,7 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
    if (cba_idx) {
        cba_idx += BLOCKSIZE/2 - 1;
        ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx);
-        m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * gval) >> 12;
+        m[0] = (ff_irms(&ractx->adsp, ractx->buffer_a) * gval) >> 12;
    } else {
        m[0] = 0;
    }
--- a/libavcodec/ra144.h
+++ b/libavcodec/ra144.h
@ -25,7 +25,7 @@
 #include <stdint.h>
 #include "lpc.h"
 #include "audio_frame_queue.h"
-#include "dsputil.h"
+#include "audiodsp.h"

 #define NBLOCKS         4       ///< number of subblocks within a block
 #define BLOCKSIZE       40      ///< subblock size in 16-bit words
@ -36,7 +36,7 @@

 typedef struct RA144Context {
    AVCodecContext *avctx;
-    DSPContext dsp;
+    AudioDSPContext adsp;
    LPCContext lpc_ctx;
    AudioFrameQueue afq;
    int last_frame;
@ -72,7 +72,7 @@ unsigned int ff_rms(const int *data);
 int ff_interp(RA144Context *ractx, int16_t *out, int a, int copyold,
              int energy);
 unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy);
-int ff_irms(DSPContext *dsp, const int16_t *data/*align 16*/);
+int ff_irms(AudioDSPContext *adsp, const int16_t *data/*align 16*/);
 void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
                           int cba_idx, int cb1_idx, int cb2_idx,
                           int gval, int gain);
--- a/libavcodec/ra144dec.c
+++ b/libavcodec/ra144dec.c
@ -34,7 +34,7 @@ static av_cold int ra144_decode_init(AVCodecContext * avctx)
    RA144Context *ractx = avctx->priv_data;

    ractx->avctx = avctx;
-    ff_dsputil_init(&ractx->dsp, avctx);
+    ff_audiodsp_init(&ractx->adsp);

    ractx->lpc_coef[0] = ractx->lpc_tables[0];
    ractx->lpc_coef[1] = ractx->lpc_tables[1];
--- a/libavcodec/ra144enc.c
+++ b/libavcodec/ra144enc.c
@ -61,7 +61,7 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx)
    ractx->lpc_coef[0] = ractx->lpc_tables[0];
    ractx->lpc_coef[1] = ractx->lpc_tables[1];
    ractx->avctx = avctx;
-    ff_dsputil_init(&ractx->dsp, avctx);
+    ff_audiodsp_init(&ractx->adsp);
    ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER,
                      FF_LPC_TYPE_LEVINSON);
    if (ret < 0)
@ -374,7 +374,7 @@ static void ra144_encode_subblock(RA144Context *ractx,
        memcpy(cba, work + LPC_ORDER, sizeof(cba));

        ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1);
-        m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * rms) >> 12;
+        m[0] = (ff_irms(&ractx->adsp, ractx->buffer_a) * rms) >> 12;
    }
    fixed_cb_search(work + LPC_ORDER, coefs, data, cba_idx, &cb1_idx, &cb2_idx);
    for (i = 0; i < BLOCKSIZE; i++) {
--- a/libavcodec/takdec.c
+++ b/libavcodec/takdec.c
@ -28,9 +28,9 @@
 #include "libavutil/internal.h"
 #include "libavutil/samplefmt.h"
 #include "tak.h"
+#include "audiodsp.h"
 #include "thread.h"
 #include "avcodec.h"
-#include "dsputil.h"
 #include "internal.h"
 #include "unary.h"

@ -46,7 +46,7 @@ typedef struct MCDParam {

 typedef struct TAKDecContext {
    AVCodecContext *avctx;                          ///< parent AVCodecContext
-    DSPContext      dsp;
+    AudioDSPContext adsp;
    TAKStreamInfo   ti;
    GetBitContext   gb;                             ///< bitstream reader initialized to start at the current frame

@ -171,7 +171,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx)
 {
    TAKDecContext *s = avctx->priv_data;

-    ff_dsputil_init(&s->dsp, avctx);
+    ff_audiodsp_init(&s->adsp);

    s->avctx = avctx;
    avctx->bits_per_raw_sample = avctx->bits_per_coded_sample;
@ -469,8 +469,8 @@ static int decode_subframe(TAKDecContext *s, int32_t *decoded,
            int v = 1 << (filter_quant - 1);

            if (filter_order & -16)
-                v += s->dsp.scalarproduct_int16(&s->residues[i], s->filter,
-                                                filter_order & -16);
+                v += s->adsp.scalarproduct_int16(&s->residues[i], s->filter,
+                                                 filter_order & -16);
            for (j = filter_order & -16; j < filter_order; j += 4) {
                v += s->residues[i + j + 3] * s->filter[j + 3] +
                     s->residues[i + j + 2] * s->filter[j + 2] +
@ -640,8 +640,8 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
                int v = 1 << 9;

                if (filter_order == 16) {
-                    v += s->dsp.scalarproduct_int16(&s->residues[i], s->filter,
-                                                    filter_order);
+                    v += s->adsp.scalarproduct_int16(&s->residues[i], s->filter,
+                                                     filter_order);
                } else {
                    v += s->residues[i + 7] * s->filter[7] +
                         s->residues[i + 6] * s->filter[6] +
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@ -2,6 +2,7 @@ OBJS                                   += x86/constants.o               \
                                          x86/fmtconvert_init.o         \

 OBJS-$(CONFIG_AC3DSP)                  += x86/ac3dsp_init.o
+OBJS-$(CONFIG_AUDIODSP)                += x86/audiodsp_init.o
 OBJS-$(CONFIG_BLOCKDSP)                += x86/blockdsp_mmx.o
 OBJS-$(CONFIG_DCT)                     += x86/dct_init.o
 OBJS-$(CONFIG_DSPUTIL)                 += x86/dsputil_init.o
@ -69,6 +70,7 @@ YASM-OBJS                              += x86/deinterlace.o             \
                                          x86/fmtconvert.o              \

 YASM-OBJS-$(CONFIG_AC3DSP)             += x86/ac3dsp.o
+YASM-OBJS-$(CONFIG_AUDIODSP)           += x86/audiodsp.o
 YASM-OBJS-$(CONFIG_BLOCKDSP)           += x86/blockdsp.o
 YASM-OBJS-$(CONFIG_DCT)                += x86/dct32.o
 YASM-OBJS-$(CONFIG_DIRAC_DECODER)      += x86/diracdsp_mmx.o x86/diracdsp_yasm.o\
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@ -0,0 +1,133 @@
+;******************************************************************************
+;* optimized audio functions
+;* Copyright (c) 2008 Loren Merritt
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_TEXT
+
+%macro SCALARPRODUCT 0
+; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
+cglobal scalarproduct_int16, 3,3,3, v1, v2, order
+    shl orderq, 1
+    add v1q, orderq
+    add v2q, orderq
+    neg orderq
+    pxor    m2, m2
+.loop:
+    movu    m0, [v1q + orderq]
+    movu    m1, [v1q + orderq + mmsize]
+    pmaddwd m0, [v2q + orderq]
+    pmaddwd m1, [v2q + orderq + mmsize]
+    paddd   m2, m0
+    paddd   m2, m1
+    add     orderq, mmsize*2
+    jl .loop
+    HADDD   m2, m0
+    movd   eax, m2
+%if mmsize == 8
+    emms
+%endif
+    RET
+%endmacro
+
+INIT_MMX mmxext
+SCALARPRODUCT
+INIT_XMM sse2
+SCALARPRODUCT
+
+
+;-----------------------------------------------------------------------------
+; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
+;                           int32_t max, unsigned int len)
+;-----------------------------------------------------------------------------
+
+; %1 = number of xmm registers used
+; %2 = number of inline load/process/store loops per asm loop
+; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop
+; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
+; %5 = suffix
+%macro VECTOR_CLIP_INT32 4-5
+cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
+%if %4
+    cvtsi2ss  m4, minm
+    cvtsi2ss  m5, maxm
+%else
+    movd      m4, minm
+    movd      m5, maxm
+%endif
+    SPLATD    m4
+    SPLATD    m5
+.loop:
+%assign %%i 0
+%rep %2
+    mova      m0,  [srcq+mmsize*(0+%%i)]
+    mova      m1,  [srcq+mmsize*(1+%%i)]
+    mova      m2,  [srcq+mmsize*(2+%%i)]
+    mova      m3,  [srcq+mmsize*(3+%%i)]
+%if %3
+    mova      m7,  [srcq+mmsize*(4+%%i)]
+    mova      m8,  [srcq+mmsize*(5+%%i)]
+    mova      m9,  [srcq+mmsize*(6+%%i)]
+    mova      m10, [srcq+mmsize*(7+%%i)]
+%endif
+    CLIPD  m0,  m4, m5, m6
+    CLIPD  m1,  m4, m5, m6
+    CLIPD  m2,  m4, m5, m6
+    CLIPD  m3,  m4, m5, m6
+%if %3
+    CLIPD  m7,  m4, m5, m6
+    CLIPD  m8,  m4, m5, m6
+    CLIPD  m9,  m4, m5, m6
+    CLIPD  m10, m4, m5, m6
+%endif
+    mova  [dstq+mmsize*(0+%%i)], m0
+    mova  [dstq+mmsize*(1+%%i)], m1
+    mova  [dstq+mmsize*(2+%%i)], m2
+    mova  [dstq+mmsize*(3+%%i)], m3
+%if %3
+    mova  [dstq+mmsize*(4+%%i)], m7
+    mova  [dstq+mmsize*(5+%%i)], m8
+    mova  [dstq+mmsize*(6+%%i)], m9
+    mova  [dstq+mmsize*(7+%%i)], m10
+%endif
+%assign %%i %%i+4*(%3+1)
+%endrep
+    add     srcq, mmsize*4*(%2+%3)
+    add     dstq, mmsize*4*(%2+%3)
+    sub     lend, mmsize*(%2+%3)
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmx
+%define CLIPD CLIPD_MMX
+VECTOR_CLIP_INT32 0, 1, 0, 0
+INIT_XMM sse2
+VECTOR_CLIP_INT32 6, 1, 0, 0, _int
+%define CLIPD CLIPD_SSE2
+VECTOR_CLIP_INT32 6, 2, 0, 1
+INIT_XMM sse4
+%define CLIPD CLIPD_SSE41
+%ifdef m8
+VECTOR_CLIP_INT32 11, 1, 1, 0
+%else
+VECTOR_CLIP_INT32 6, 1, 0, 0
+%endif
--- a/libavcodec/x86/audiodsp.h
+++ b/libavcodec/x86/audiodsp.h
@ -0,0 +1,25 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_AUDIODSP_H
+#define AVCODEC_X86_AUDIODSP_H
+
+void ff_vector_clipf_sse(float *dst, const float *src,
+                         float min, float max, int len);
+
+#endif /* AVCODEC_X86_AUDIODSP_H */
--- a/libavcodec/x86/audiodsp_init.c
+++ b/libavcodec/x86/audiodsp_init.c
@ -0,0 +1,66 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/audiodsp.h"
+#include "audiodsp.h"
+
+int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
+                                      int order);
+int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
+                                    int order);
+
+void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src,
+                              int32_t min, int32_t max, unsigned int len);
+void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src,
+                               int32_t min, int32_t max, unsigned int len);
+void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
+                                   int32_t min, int32_t max, unsigned int len);
+void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
+                               int32_t min, int32_t max, unsigned int len);
+
+av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMX(cpu_flags))
+        c->vector_clip_int32 = ff_vector_clip_int32_mmx;
+
+    if (EXTERNAL_MMXEXT(cpu_flags))
+        c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
+
+    if (EXTERNAL_SSE(cpu_flags))
+        c->vector_clipf = ff_vector_clipf_sse;
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
+        if (cpu_flags & AV_CPU_FLAG_ATOM)
+            c->vector_clip_int32 = ff_vector_clip_int32_int_sse2;
+        else
+            c->vector_clip_int32 = ff_vector_clip_int32_sse2;
+    }
+
+    if (EXTERNAL_SSE4(cpu_flags))
+        c->vector_clip_int32 = ff_vector_clip_int32_sse4;
+}
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@ -30,115 +30,6 @@ cextern pb_80

 SECTION_TEXT

-%macro SCALARPRODUCT 0
-; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
-cglobal scalarproduct_int16, 3,3,3, v1, v2, order
-    shl orderq, 1
-    add v1q, orderq
-    add v2q, orderq
-    neg orderq
-    pxor    m2, m2
-.loop:
-    movu    m0, [v1q + orderq]
-    movu    m1, [v1q + orderq + mmsize]
-    pmaddwd m0, [v2q + orderq]
-    pmaddwd m1, [v2q + orderq + mmsize]
-    paddd   m2, m0
-    paddd   m2, m1
-    add     orderq, mmsize*2
-    jl .loop
-    HADDD   m2, m0
-    movd   eax, m2
-%if mmsize == 8
-    emms
-%endif
-    RET
-%endmacro
-
-INIT_MMX mmxext
-SCALARPRODUCT
-INIT_XMM sse2
-SCALARPRODUCT
-
-
-;-----------------------------------------------------------------------------
-; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
-;                           int32_t max, unsigned int len)
-;-----------------------------------------------------------------------------
-
-; %1 = number of xmm registers used
-; %2 = number of inline load/process/store loops per asm loop
-; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop
-; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
-; %5 = suffix
-%macro VECTOR_CLIP_INT32 4-5
-cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
-%if %4
-    cvtsi2ss  m4, minm
-    cvtsi2ss  m5, maxm
-%else
-    movd      m4, minm
-    movd      m5, maxm
-%endif
-    SPLATD    m4
-    SPLATD    m5
-.loop:
-%assign %%i 0
-%rep %2
-    mova      m0,  [srcq+mmsize*(0+%%i)]
-    mova      m1,  [srcq+mmsize*(1+%%i)]
-    mova      m2,  [srcq+mmsize*(2+%%i)]
-    mova      m3,  [srcq+mmsize*(3+%%i)]
-%if %3
-    mova      m7,  [srcq+mmsize*(4+%%i)]
-    mova      m8,  [srcq+mmsize*(5+%%i)]
-    mova      m9,  [srcq+mmsize*(6+%%i)]
-    mova      m10, [srcq+mmsize*(7+%%i)]
-%endif
-    CLIPD  m0,  m4, m5, m6
-    CLIPD  m1,  m4, m5, m6
-    CLIPD  m2,  m4, m5, m6
-    CLIPD  m3,  m4, m5, m6
-%if %3
-    CLIPD  m7,  m4, m5, m6
-    CLIPD  m8,  m4, m5, m6
-    CLIPD  m9,  m4, m5, m6
-    CLIPD  m10, m4, m5, m6
-%endif
-    mova  [dstq+mmsize*(0+%%i)], m0
-    mova  [dstq+mmsize*(1+%%i)], m1
-    mova  [dstq+mmsize*(2+%%i)], m2
-    mova  [dstq+mmsize*(3+%%i)], m3
-%if %3
-    mova  [dstq+mmsize*(4+%%i)], m7
-    mova  [dstq+mmsize*(5+%%i)], m8
-    mova  [dstq+mmsize*(6+%%i)], m9
-    mova  [dstq+mmsize*(7+%%i)], m10
-%endif
-%assign %%i %%i+4*(%3+1)
-%endrep
-    add     srcq, mmsize*4*(%2+%3)
-    add     dstq, mmsize*4*(%2+%3)
-    sub     lend, mmsize*(%2+%3)
-    jg .loop
-    REP_RET
-%endmacro
-
-INIT_MMX mmx
-%define CLIPD CLIPD_MMX
-VECTOR_CLIP_INT32 0, 1, 0, 0
-INIT_XMM sse2
-VECTOR_CLIP_INT32 6, 1, 0, 0, _int
-%define CLIPD CLIPD_SSE2
-VECTOR_CLIP_INT32 6, 2, 0, 1
-INIT_XMM sse4
-%define CLIPD CLIPD_SSE41
-%ifdef m8
-VECTOR_CLIP_INT32 11, 1, 1, 0
-%else
-VECTOR_CLIP_INT32 6, 1, 0, 0
-%endif
-
 ; %1 = aligned/unaligned
 %macro BSWAP_LOOPS  1
    mov      r3, r2
--- a/libavcodec/x86/dsputil_init.c
+++ b/libavcodec/x86/dsputil_init.c
@ -29,23 +29,9 @@
 #include "dsputil_x86.h"
 #include "idct_xvid.h"

-int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
-                                      int order);
-int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
-                                    int order);
-
 void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w);
 void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w);

-void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src,
-                              int32_t min, int32_t max, unsigned int len);
-void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src,
-                               int32_t min, int32_t max, unsigned int len);
-void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
-                                   int32_t min, int32_t max, unsigned int len);
-void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
-                               int32_t min, int32_t max, unsigned int len);
-
 static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
                                     int cpu_flags, unsigned high_bit_depth)
 {
@ -81,7 +67,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
 #endif /* HAVE_MMX_INLINE */

 #if HAVE_MMX_EXTERNAL
-    c->vector_clip_int32 = ff_vector_clip_int32_mmx;
    c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
 #endif /* HAVE_MMX_EXTERNAL */
 }
@ -96,19 +81,12 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
        c->idct     = ff_idct_xvid_mmxext;
    }
 #endif /* HAVE_MMXEXT_INLINE */
-
-#if HAVE_MMXEXT_EXTERNAL
-    c->scalarproduct_int16          = ff_scalarproduct_int16_mmxext;
-#endif /* HAVE_MMXEXT_EXTERNAL */
 }

 static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
                                     int cpu_flags, unsigned high_bit_depth)
 {
 #if HAVE_YASM
-#if HAVE_SSE_EXTERNAL
-    c->vector_clipf = ff_vector_clipf_sse;
-#endif
 #if HAVE_INLINE_ASM && CONFIG_VIDEODSP
    c->gmc = ff_gmc_sse;
 #endif
@ -128,12 +106,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
 #endif /* HAVE_SSE2_INLINE */

 #if HAVE_SSE2_EXTERNAL
-    c->scalarproduct_int16          = ff_scalarproduct_int16_sse2;
-    if (cpu_flags & AV_CPU_FLAG_ATOM) {
-        c->vector_clip_int32 = ff_vector_clip_int32_int_sse2;
-    } else {
-        c->vector_clip_int32 = ff_vector_clip_int32_sse2;
-    }
    c->bswap_buf = ff_bswap32_buf_sse2;
    c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
 #endif /* HAVE_SSE2_EXTERNAL */
@ -147,14 +119,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
 #endif /* HAVE_SSSE3_EXTERNAL */
 }

-static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx,
-                                      int cpu_flags, unsigned high_bit_depth)
-{
-#if HAVE_SSE4_EXTERNAL
-    c->vector_clip_int32 = ff_vector_clip_int32_sse4;
-#endif /* HAVE_SSE4_EXTERNAL */
-}
-
 av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
                                 unsigned high_bit_depth)
 {
@ -175,9 +139,6 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
    if (EXTERNAL_SSSE3(cpu_flags))
        dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth);

-    if (EXTERNAL_SSE4(cpu_flags))
-        dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth);
-
    if (CONFIG_ENCODERS)
        ff_dsputilenc_init_mmx(c, avctx, high_bit_depth);
 }
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@ -28,7 +28,6 @@
 #include "libavutil/x86/asm.h"
 #include "libavcodec/pixels.h"
 #include "libavcodec/videodsp.h"
-#include "constants.h"
 #include "dsputil_x86.h"
 #include "inline_asm.h"

--- a/libavcodec/x86/dsputil_x86.h
+++ b/libavcodec/x86/dsputil_x86.h
@ -53,10 +53,6 @@ void ff_gmc_sse(uint8_t *dst, uint8_t *src,
                int dxx, int dxy, int dyx, int dyy,
                int shift, int r, int width, int height);

-void ff_vector_clipf_sse(float *dst, const float *src,
-                         float min, float max, int len);
-
-
 void ff_mmx_idct(int16_t *block);
 void ff_mmxext_idct(int16_t *block);