diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index 83f9bb6f45..273b9ef660 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -131,3 +131,47 @@ VECTOR_CLIP_INT32 11, 1, 1, 0 %else VECTOR_CLIP_INT32 6, 1, 0, 0 %endif + +;----------------------------------------------------- +;void ff_vector_clipf(float *dst, const float *src, +; float min, float max, int len) +;----------------------------------------------------- +INIT_XMM sse +%if UNIX64 +cglobal vector_clipf, 3,3,6, dst, src, len +%else +cglobal vector_clipf, 5,5,6, dst, src, min, max, len +%endif +%if WIN64 + SWAP 0, 2 + SWAP 1, 3 +%elif ARCH_X86_32 + movss m0, minm + movss m1, maxm +%endif + SPLATD m0 + SPLATD m1 + shl lend, 2 + add srcq, lenq + add dstq, lenq + neg lenq +.loop: + mova m2, [srcq+lenq+mmsize*0] + mova m3, [srcq+lenq+mmsize*1] + mova m4, [srcq+lenq+mmsize*2] + mova m5, [srcq+lenq+mmsize*3] + maxps m2, m0 + maxps m3, m0 + maxps m4, m0 + maxps m5, m0 + minps m2, m1 + minps m3, m1 + minps m4, m1 + minps m5, m1 + mova [dstq+lenq+mmsize*0], m2 + mova [dstq+lenq+mmsize*1], m3 + mova [dstq+lenq+mmsize*2], m4 + mova [dstq+lenq+mmsize*3], m5 + add lenq, mmsize*4 + jl .loop + REP_RET diff --git a/libavcodec/x86/audiodsp.h b/libavcodec/x86/audiodsp.h deleted file mode 100644 index 35f9f1485b..0000000000 --- a/libavcodec/x86/audiodsp.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVCODEC_X86_AUDIODSP_H -#define AVCODEC_X86_AUDIODSP_H - -void ff_vector_clipf_sse(float *dst, const float *src, - float min, float max, int len); - -#endif /* AVCODEC_X86_AUDIODSP_H */ diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c index d586bf6c04..a2ce231f32 100644 --- a/libavcodec/x86/audiodsp_init.c +++ b/libavcodec/x86/audiodsp_init.c @@ -24,7 +24,6 @@ #include "libavutil/x86/asm.h" #include "libavutil/x86/cpu.h" #include "libavcodec/audiodsp.h" -#include "audiodsp.h" int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, int order); @@ -39,6 +38,8 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); +void ff_vector_clipf_sse(float *dst, const float *src, + float min, float max, int len); av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) { diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm index e261c0fcc7..a0b2a3a045 100644 --- a/libavcodec/x86/dsputil.asm +++ b/libavcodec/x86/dsputil.asm @@ -188,47 +188,3 @@ INIT_MMX mmx PUT_SIGNED_PIXELS_CLAMPED 0 INIT_XMM sse2 PUT_SIGNED_PIXELS_CLAMPED 3 - -;----------------------------------------------------- -;void ff_vector_clipf(float *dst, const float *src, -; float min, float max, int len) -;----------------------------------------------------- -INIT_XMM sse -%if UNIX64 -cglobal vector_clipf, 3,3,6, dst, src, len -%else -cglobal vector_clipf, 5,5,6, dst, src, min, max, len -%endif -%if WIN64 - SWAP 0, 2 - SWAP 1, 3 -%elif ARCH_X86_32 - movss m0, minm - movss m1, maxm -%endif - SPLATD m0 - SPLATD m1 - shl lend, 2 - add srcq, lenq - add dstq, lenq - neg lenq -.loop: - mova m2, [srcq+lenq+mmsize*0] - mova m3, [srcq+lenq+mmsize*1] - mova m4, [srcq+lenq+mmsize*2] - mova m5, [srcq+lenq+mmsize*3] - maxps m2, m0 - maxps m3, m0 - maxps m4, m0 - maxps m5, m0 - minps m2, m1 - minps m3, m1 - minps m4, m1 - minps m5, m1 - mova [dstq+lenq+mmsize*0], m2 - mova [dstq+lenq+mmsize*1], m3 - mova [dstq+lenq+mmsize*2], m4 - mova [dstq+lenq+mmsize*3], m5 - add lenq, mmsize*4 - jl .loop - REP_RET