diff --git a/libavcodec/Makefile b/libavcodec/Makefile index f1a07d0a98..024e5cfb5f 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -388,7 +388,7 @@ OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdav.o OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdav.o OBJS-$(CONFIG_VMNC_DECODER) += vmnc.o -OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o vorbis.o \ +OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o vorbisdsp.o vorbis.o \ vorbis_data.o xiph.o OBJS-$(CONFIG_VORBIS_ENCODER) += vorbisenc.o vorbis.o \ vorbis_data.o diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index ac486f47dc..71048f9c4c 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -15,6 +15,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o +OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_init_arm.o OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o @@ -86,6 +87,8 @@ NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \ arm/rv40dsp_neon.o \ arm/h264cmc_neon.o \ +NEON-OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_neon.o + NEON-OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_neon.o NEON-OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_neon.o \ diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 34bb6191f1..ee0e9afa88 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -154,8 +154,6 @@ void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); -void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize); - int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len); int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2, const int16_t *v3, int len, int mul); @@ -307,9 +305,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->vector_clipf = ff_vector_clipf_neon; c->vector_clip_int32 = ff_vector_clip_int32_neon; - if (CONFIG_VORBIS_DECODER) - c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon; - c->scalarproduct_int16 = ff_scalarproduct_int16_neon; c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon; diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index a0d201cd85..ebc70ac109 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -19,7 +19,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "config.h" #include "libavutil/arm/asm.S" function ff_clear_block_neon, export=1 @@ -532,69 +531,6 @@ function ff_add_pixels_clamped_neon, export=1 bx lr endfunc -#if CONFIG_VORBIS_DECODER -function ff_vorbis_inverse_coupling_neon, export=1 - vmov.i32 q10, #1<<31 - subs r2, r2, #4 - mov r3, r0 - mov r12, r1 - beq 3f - - vld1.32 {d24-d25},[r1,:128]! - vld1.32 {d22-d23},[r0,:128]! - vcle.s32 q8, q12, #0 - vand q9, q11, q10 - veor q12, q12, q9 - vand q2, q12, q8 - vbic q3, q12, q8 - vadd.f32 q12, q11, q2 - vsub.f32 q11, q11, q3 -1: vld1.32 {d2-d3}, [r1,:128]! - vld1.32 {d0-d1}, [r0,:128]! - vcle.s32 q8, q1, #0 - vand q9, q0, q10 - veor q1, q1, q9 - vst1.32 {d24-d25},[r3, :128]! - vst1.32 {d22-d23},[r12,:128]! - vand q2, q1, q8 - vbic q3, q1, q8 - vadd.f32 q1, q0, q2 - vsub.f32 q0, q0, q3 - subs r2, r2, #8 - ble 2f - vld1.32 {d24-d25},[r1,:128]! - vld1.32 {d22-d23},[r0,:128]! - vcle.s32 q8, q12, #0 - vand q9, q11, q10 - veor q12, q12, q9 - vst1.32 {d2-d3}, [r3, :128]! - vst1.32 {d0-d1}, [r12,:128]! - vand q2, q12, q8 - vbic q3, q12, q8 - vadd.f32 q12, q11, q2 - vsub.f32 q11, q11, q3 - b 1b - -2: vst1.32 {d2-d3}, [r3, :128]! - vst1.32 {d0-d1}, [r12,:128]! - it lt - bxlt lr - -3: vld1.32 {d2-d3}, [r1,:128] - vld1.32 {d0-d1}, [r0,:128] - vcle.s32 q8, q1, #0 - vand q9, q0, q10 - veor q1, q1, q9 - vand q2, q1, q8 - vbic q3, q1, q8 - vadd.f32 q1, q0, q2 - vsub.f32 q0, q0, q3 - vst1.32 {d2-d3}, [r0,:128]! - vst1.32 {d0-d1}, [r1,:128]! - bx lr -endfunc -#endif - function ff_butterflies_float_neon, export=1 1: vld1.32 {q0},[r0,:128] vld1.32 {q1},[r1,:128] diff --git a/libavcodec/arm/vorbisdsp_init_arm.c b/libavcodec/arm/vorbisdsp_init_arm.c new file mode 100644 index 0000000000..ec8fbd5bc9 --- /dev/null +++ b/libavcodec/arm/vorbisdsp_init_arm.c @@ -0,0 +1,36 @@ +/* + * ARM NEON optimised DSP functions + * Copyright (c) 2008 Mans Rullgard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/arm/cpu.h" +#include "libavcodec/vorbisdsp.h" + +void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize); + +void ff_vorbisdsp_init_arm(VorbisDSPContext *c) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) { + c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon; + } +} diff --git a/libavcodec/arm/vorbisdsp_neon.S b/libavcodec/arm/vorbisdsp_neon.S new file mode 100644 index 0000000000..7df876c2bc --- /dev/null +++ b/libavcodec/arm/vorbisdsp_neon.S @@ -0,0 +1,83 @@ +/* + * ARM NEON optimised DSP functions + * Copyright (c) 2008 Mans Rullgard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/arm/asm.S" + +function ff_vorbis_inverse_coupling_neon, export=1 + vmov.i32 q10, #1<<31 + subs r2, r2, #4 + mov r3, r0 + mov r12, r1 + beq 3f + + vld1.32 {d24-d25},[r1,:128]! + vld1.32 {d22-d23},[r0,:128]! + vcle.s32 q8, q12, #0 + vand q9, q11, q10 + veor q12, q12, q9 + vand q2, q12, q8 + vbic q3, q12, q8 + vadd.f32 q12, q11, q2 + vsub.f32 q11, q11, q3 +1: vld1.32 {d2-d3}, [r1,:128]! + vld1.32 {d0-d1}, [r0,:128]! + vcle.s32 q8, q1, #0 + vand q9, q0, q10 + veor q1, q1, q9 + vst1.32 {d24-d25},[r3, :128]! + vst1.32 {d22-d23},[r12,:128]! + vand q2, q1, q8 + vbic q3, q1, q8 + vadd.f32 q1, q0, q2 + vsub.f32 q0, q0, q3 + subs r2, r2, #8 + ble 2f + vld1.32 {d24-d25},[r1,:128]! + vld1.32 {d22-d23},[r0,:128]! + vcle.s32 q8, q12, #0 + vand q9, q11, q10 + veor q12, q12, q9 + vst1.32 {d2-d3}, [r3, :128]! + vst1.32 {d0-d1}, [r12,:128]! + vand q2, q12, q8 + vbic q3, q12, q8 + vadd.f32 q12, q11, q2 + vsub.f32 q11, q11, q3 + b 1b + +2: vst1.32 {d2-d3}, [r3, :128]! + vst1.32 {d0-d1}, [r12,:128]! + it lt + bxlt lr + +3: vld1.32 {d2-d3}, [r1,:128] + vld1.32 {d0-d1}, [r0,:128] + vcle.s32 q8, q1, #0 + vand q9, q0, q10 + veor q1, q1, q9 + vand q2, q1, q8 + vbic q3, q1, q8 + vadd.f32 q1, q0, q2 + vsub.f32 q0, q0, q3 + vst1.32 {d2-d3}, [r0,:128]! + vst1.32 {d0-d1}, [r1,:128]! + bx lr +endfunc diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 91a4da5532..b747e0a629 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -36,7 +36,6 @@ #include "mathops.h" #include "mpegvideo.h" #include "config.h" -#include "vorbis.h" uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; uint32_t ff_squareTbl[512] = {0, }; @@ -2817,9 +2816,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) c->try_8x8basis= try_8x8basis_c; c->add_8x8basis= add_8x8basis_c; -#if CONFIG_VORBIS_DECODER - c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling; -#endif c->vector_fmul_reverse = vector_fmul_reverse_c; c->vector_fmul_add = vector_fmul_add_c; c->vector_clipf = vector_clipf_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 3a5c94a952..2de14f1b41 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -346,8 +346,6 @@ typedef struct DSPContext { void (*h261_loop_filter)(uint8_t *src, int stride); - /* assume len is a multiple of 4, and arrays are 16-byte aligned */ - void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); /* assume len is a multiple of 16, and arrays are 32-byte aligned */ void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); /* assume len is a multiple of 8, and src arrays are 16-byte aligned */ diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index c8c3025bf0..e1ebf26d0f 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -1,6 +1,7 @@ OBJS += ppc/dsputil_ppc.o \ ppc/videodsp_ppc.o \ +OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c index 9ad73ef5d5..1007fbd2b7 100644 --- a/libavcodec/ppc/dsputil_altivec.c +++ b/libavcodec/ppc/dsputil_altivec.c @@ -1283,29 +1283,6 @@ static int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ui return score; } -static void vorbis_inverse_coupling_altivec(float *mag, float *ang, - int blocksize) -{ - int i; - vector float m, a; - vector bool int t0, t1; - const vector unsigned int v_31 = //XXX - vec_add(vec_add(vec_splat_u32(15),vec_splat_u32(15)),vec_splat_u32(1)); - for (i = 0; i < blocksize; i += 4) { - m = vec_ld(0, mag+i); - a = vec_ld(0, ang+i); - t0 = vec_cmple(m, (vector float)vec_splat_u32(0)); - t1 = vec_cmple(a, (vector float)vec_splat_u32(0)); - a = vec_xor(a, (vector float) vec_sl((vector unsigned int)t0, v_31)); - t0 = (vector bool int)vec_and(a, t1); - t1 = (vector bool int)vec_andc(a, t1); - a = vec_sub(m, (vector float)t1); - m = vec_add(m, (vector float)t0); - vec_stl(a, 0, ang+i); - vec_stl(m, 0, mag+i); - } -} - /* next one assumes that ((line_size % 8) == 0) */ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) { @@ -1403,6 +1380,4 @@ void ff_dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx) c->hadamard8_diff[0] = hadamard8_diff16_altivec; c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; - if (CONFIG_VORBIS_DECODER) - c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec; } diff --git a/libavcodec/ppc/vorbisdsp_altivec.c b/libavcodec/ppc/vorbisdsp_altivec.c new file mode 100644 index 0000000000..cd8bbb90fc --- /dev/null +++ b/libavcodec/ppc/vorbisdsp_altivec.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2006 Luca Barbato + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#if HAVE_ALTIVEC_H +#include +#endif +#include "libavutil/cpu.h" +#include "libavutil/ppc/types_altivec.h" +#include "libavutil/ppc/util_altivec.h" +#include "libavcodec/vorbisdsp.h" + +#if HAVE_ALTIVEC +static void vorbis_inverse_coupling_altivec(float *mag, float *ang, + int blocksize) +{ + int i; + vector float m, a; + vector bool int t0, t1; + const vector unsigned int v_31 = //XXX + vec_add(vec_add(vec_splat_u32(15),vec_splat_u32(15)),vec_splat_u32(1)); + for (i = 0; i < blocksize; i += 4) { + m = vec_ld(0, mag+i); + a = vec_ld(0, ang+i); + t0 = vec_cmple(m, (vector float)vec_splat_u32(0)); + t1 = vec_cmple(a, (vector float)vec_splat_u32(0)); + a = vec_xor(a, (vector float) vec_sl((vector unsigned int)t0, v_31)); + t0 = (vector bool int)vec_and(a, t1); + t1 = (vector bool int)vec_andc(a, t1); + a = vec_sub(m, (vector float)t1); + m = vec_add(m, (vector float)t0); + vec_stl(a, 0, ang+i); + vec_stl(m, 0, mag+i); + } +} +#endif /* HAVE_ALTIVEC */ + +void ff_vorbisdsp_init_ppc(VorbisDSPContext* c) +{ +#if HAVE_ALTIVEC + if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { + c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec; + } +#endif /* HAVE_ALTIVEC */ +} diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c index 9bea908cc1..6b75ae393a 100644 --- a/libavcodec/vorbisdec.c +++ b/libavcodec/vorbisdec.c @@ -29,12 +29,12 @@ #include "libavutil/float_dsp.h" #include "avcodec.h" #include "get_bits.h" -#include "dsputil.h" #include "fft.h" #include "fmtconvert.h" #include "internal.h" #include "vorbis.h" +#include "vorbisdsp.h" #include "xiph.h" #define V_NB_BITS 8 @@ -125,7 +125,7 @@ typedef struct vorbis_context_s { AVCodecContext *avccontext; AVFrame frame; GetBitContext gb; - DSPContext dsp; + VorbisDSPContext dsp; AVFloatDSPContext fdsp; FmtConvertContext fmt_conv; @@ -981,7 +981,7 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext) int hdr_type, ret; vc->avccontext = avccontext; - ff_dsputil_init(&vc->dsp, avccontext); + ff_vorbisdsp_init(&vc->dsp); avpriv_float_dsp_init(&vc->fdsp, avccontext->flags & CODEC_FLAG_BITEXACT); ff_fmt_convert_init(&vc->fmt_conv, avccontext); diff --git a/libavcodec/vorbisdsp.c b/libavcodec/vorbisdsp.c new file mode 100644 index 0000000000..1764438001 --- /dev/null +++ b/libavcodec/vorbisdsp.c @@ -0,0 +1,33 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "vorbisdsp.h" +#include "vorbis.h" + +void ff_vorbisdsp_init(VorbisDSPContext *dsp) +{ + dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling; + + if (ARCH_X86) + ff_vorbisdsp_init_x86(dsp); + if (ARCH_PPC) + ff_vorbisdsp_init_ppc(dsp); + if (ARCH_ARM) + ff_vorbisdsp_init_arm(dsp); +} diff --git a/libavcodec/vorbisdsp.h b/libavcodec/vorbisdsp.h new file mode 100644 index 0000000000..b6b2f29fd8 --- /dev/null +++ b/libavcodec/vorbisdsp.h @@ -0,0 +1,34 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VORBISDSP_H +#define AVCODEC_VORBISDSP_H + +typedef struct VorbisDSPContext { + /* assume len is a multiple of 4, and arrays are 16-byte aligned */ + void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); +} VorbisDSPContext; + +void ff_vorbisdsp_init(VorbisDSPContext *dsp); + +/* for internal use only */ +void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp); +void ff_vorbisdsp_init_arm(VorbisDSPContext *dsp); +void ff_vorbisdsp_init_ppc(VorbisDSPContext *dsp); + +#endif /* AVCODEC_VORBISDSP_H */ diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index b5a7694bcf..6069968a09 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -20,6 +20,7 @@ OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp_init.o \ OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_init.o OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o +OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o OBJS-$(CONFIG_VP5_DECODER) += x86/vp56dsp_init.o OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp_init.o diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 13f215135a..74f7df5002 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -1829,65 +1829,6 @@ void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, avg_pixels8_mmxext(dst, src, stride, 8); } -static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) -{ - int i; - __asm__ volatile ("pxor %%mm7, %%mm7":); - for (i = 0; i < blocksize; i += 2) { - __asm__ volatile ( - "movq %0, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm3 \n\t" - "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0 - "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0 - "pslld $31, %%mm2 \n\t" // keep only the sign bit - "pxor %%mm2, %%mm1 \n\t" - "movq %%mm3, %%mm4 \n\t" - "pand %%mm1, %%mm3 \n\t" - "pandn %%mm1, %%mm4 \n\t" - "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m))) - "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m))) - "movq %%mm3, %1 \n\t" - "movq %%mm0, %0 \n\t" - : "+m"(mag[i]), "+m"(ang[i]) - :: "memory" - ); - } - __asm__ volatile ("femms"); -} - -static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) -{ - int i; - - __asm__ volatile ( - "movaps %0, %%xmm5 \n\t" - :: "m"(ff_pdw_80000000[0]) - ); - for (i = 0; i < blocksize; i += 4) { - __asm__ volatile ( - "movaps %0, %%xmm0 \n\t" - "movaps %1, %%xmm1 \n\t" - "xorps %%xmm2, %%xmm2 \n\t" - "xorps %%xmm3, %%xmm3 \n\t" - "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0 - "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0 - "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit - "xorps %%xmm2, %%xmm1 \n\t" - "movaps %%xmm3, %%xmm4 \n\t" - "andps %%xmm1, %%xmm3 \n\t" - "andnps %%xmm1, %%xmm4 \n\t" - "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m))) - "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m))) - "movaps %%xmm3, %1 \n\t" - "movaps %%xmm0, %0 \n\t" - : "+m"(mag[i]), "+m"(ang[i]) - :: "memory" - ); - } -} - static void vector_clipf_sse(float *dst, const float *src, float min, float max, int len) { @@ -2238,8 +2179,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_3dnow; c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow; } - - c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; #endif /* HAVE_INLINE_ASM */ #if HAVE_YASM @@ -2263,8 +2202,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags) } } - c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; - c->vector_clipf = vector_clipf_sse; #endif /* HAVE_INLINE_ASM */ diff --git a/libavcodec/x86/vorbisdsp_init.c b/libavcodec/x86/vorbisdsp_init.c new file mode 100644 index 0000000000..5243095003 --- /dev/null +++ b/libavcodec/x86/vorbisdsp_init.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2006 Loren Merritt + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/cpu.h" +#include "libavcodec/vorbisdsp.h" +#include "dsputil_mmx.h" // for ff_pdw_80000000 + +#if HAVE_INLINE_ASM +#if ARCH_X86_32 +static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) +{ + int i; + __asm__ volatile ("pxor %%mm7, %%mm7":); + for (i = 0; i < blocksize; i += 2) { + __asm__ volatile ( + "movq %0, %%mm0 \n\t" + "movq %1, %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm1, %%mm3 \n\t" + "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0 + "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0 + "pslld $31, %%mm2 \n\t" // keep only the sign bit + "pxor %%mm2, %%mm1 \n\t" + "movq %%mm3, %%mm4 \n\t" + "pand %%mm1, %%mm3 \n\t" + "pandn %%mm1, %%mm4 \n\t" + "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m))) + "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m))) + "movq %%mm3, %1 \n\t" + "movq %%mm0, %0 \n\t" + : "+m"(mag[i]), "+m"(ang[i]) + :: "memory" + ); + } + __asm__ volatile ("femms"); +} +#endif + +static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) +{ + int i; + + __asm__ volatile ( + "movaps %0, %%xmm5 \n\t" + :: "m"(ff_pdw_80000000[0]) + ); + for (i = 0; i < blocksize; i += 4) { + __asm__ volatile ( + "movaps %0, %%xmm0 \n\t" + "movaps %1, %%xmm1 \n\t" + "xorps %%xmm2, %%xmm2 \n\t" + "xorps %%xmm3, %%xmm3 \n\t" + "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0 + "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0 + "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit + "xorps %%xmm2, %%xmm1 \n\t" + "movaps %%xmm3, %%xmm4 \n\t" + "andps %%xmm1, %%xmm3 \n\t" + "andnps %%xmm1, %%xmm4 \n\t" + "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m))) + "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m))) + "movaps %%xmm3, %1 \n\t" + "movaps %%xmm0, %0 \n\t" + : "+m"(mag[i]), "+m"(ang[i]) + :: "memory" + ); + } +} +#endif + +void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp) +{ +#if HAVE_INLINE_ASM + int mm_flags = av_get_cpu_flags(); + +#if ARCH_X86_32 + if (mm_flags & AV_CPU_FLAG_3DNOW) + dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; +#endif /* ARCH_X86_32 */ + if (mm_flags & AV_CPU_FLAG_SSE) + dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; +#endif /* HAVE_INLINE_ASM */ +}