From de4bc44abb31d79bf2576dbcdb76606d5f7e971d Mon Sep 17 00:00:00 2001 From: Vitor Sessak Date: Sat, 31 Jul 2010 14:50:51 +0000 Subject: [PATCH] Convert deinterlacing MMX code to YASM Originally committed as revision 24615 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/imgconvert.c | 99 ++++++---------------------------- libavcodec/x86/Makefile | 1 + libavcodec/x86/deinterlace.asm | 81 ++++++++++++++++++++++++++++ libavcodec/x86/dsputil_mmx.h | 13 +++++ 4 files changed, 110 insertions(+), 84 deletions(-) create mode 100644 libavcodec/x86/deinterlace.asm diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c index d0fc1ce6d7..1fb8f7e5d7 100644 --- a/libavcodec/imgconvert.c +++ b/libavcodec/imgconvert.c @@ -39,7 +39,6 @@ #include "libavcore/imgutils.h" #if HAVE_MMX -#include "x86/mmx.h" #include "x86/dsputil_mmx.h" #endif @@ -55,6 +54,14 @@ #define FF_PIXEL_PACKED 1 /**< only one components containing all the channels */ #define FF_PIXEL_PALETTE 2 /**< one components containing indexes for a palette */ +#if HAVE_MMX +#define deinterlace_line_inplace ff_deinterlace_line_inplace_mmx +#define deinterlace_line ff_deinterlace_line_mmx +#else +#define deinterlace_line_inplace deinterlace_line_inplace_c +#define deinterlace_line deinterlace_line_c +#endif + typedef struct PixFmtInfo { uint8_t nb_channels; /**< number of channels (including alpha) */ uint8_t color_type; /**< color type (see FF_COLOR_xxx constants) */ @@ -1119,61 +1126,14 @@ int img_get_alpha_info(const AVPicture *src, return ret; } -#if HAVE_MMX -#define DEINT_INPLACE_LINE_LUM \ - movd_m2r(lum_m4[0],mm0);\ - movd_m2r(lum_m3[0],mm1);\ - movd_m2r(lum_m2[0],mm2);\ - movd_m2r(lum_m1[0],mm3);\ - movd_m2r(lum[0],mm4);\ - punpcklbw_r2r(mm7,mm0);\ - movd_r2m(mm2,lum_m4[0]);\ - punpcklbw_r2r(mm7,mm1);\ - punpcklbw_r2r(mm7,mm2);\ - punpcklbw_r2r(mm7,mm3);\ - punpcklbw_r2r(mm7,mm4);\ - paddw_r2r(mm3,mm1);\ - psllw_i2r(1,mm2);\ - paddw_r2r(mm4,mm0);\ - psllw_i2r(2,mm1);\ - paddw_r2r(mm6,mm2);\ - paddw_r2r(mm2,mm1);\ - psubusw_r2r(mm0,mm1);\ - psrlw_i2r(3,mm1);\ - packuswb_r2r(mm7,mm1);\ - movd_r2m(mm1,lum_m2[0]); - -#define DEINT_LINE_LUM \ - movd_m2r(lum_m4[0],mm0);\ - movd_m2r(lum_m3[0],mm1);\ - movd_m2r(lum_m2[0],mm2);\ - movd_m2r(lum_m1[0],mm3);\ - movd_m2r(lum[0],mm4);\ - punpcklbw_r2r(mm7,mm0);\ - punpcklbw_r2r(mm7,mm1);\ - punpcklbw_r2r(mm7,mm2);\ - punpcklbw_r2r(mm7,mm3);\ - punpcklbw_r2r(mm7,mm4);\ - paddw_r2r(mm3,mm1);\ - psllw_i2r(1,mm2);\ - paddw_r2r(mm4,mm0);\ - psllw_i2r(2,mm1);\ - paddw_r2r(mm6,mm2);\ - paddw_r2r(mm2,mm1);\ - psubusw_r2r(mm0,mm1);\ - psrlw_i2r(3,mm1);\ - packuswb_r2r(mm7,mm1);\ - movd_r2m(mm1,dst[0]); -#endif - +#if !HAVE_MMX /* filter parameters: [-1 4 2 4 -1] // 8 */ -static void deinterlace_line(uint8_t *dst, +static void deinterlace_line_c(uint8_t *dst, const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum, int size) { -#if !HAVE_MMX uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; int sum; @@ -1191,27 +1151,12 @@ static void deinterlace_line(uint8_t *dst, lum++; dst++; } -#else - - { - pxor_r2r(mm7,mm7); - movq_m2r(ff_pw_4,mm6); - } - for (;size > 3; size-=4) { - DEINT_LINE_LUM - lum_m4+=4; - lum_m3+=4; - lum_m2+=4; - lum_m1+=4; - lum+=4; - dst+=4; - } -#endif } -static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *lum_m2, uint8_t *lum_m1, uint8_t *lum, - int size) + +static void deinterlace_line_inplace_c(uint8_t *lum_m4, uint8_t *lum_m3, + uint8_t *lum_m2, uint8_t *lum_m1, + uint8_t *lum, int size) { -#if !HAVE_MMX uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; int sum; @@ -1229,22 +1174,8 @@ static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t * lum_m1++; lum++; } -#else - - { - pxor_r2r(mm7,mm7); - movq_m2r(ff_pw_4,mm6); - } - for (;size > 3; size-=4) { - DEINT_INPLACE_LINE_LUM - lum_m4+=4; - lum_m3+=4; - lum_m2+=4; - lum_m1+=4; - lum+=4; - } -#endif } +#endif /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The top field is copied as is, but the bottom field is deinterlaced diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 3f19f06b88..ea53e33580 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -35,6 +35,7 @@ MMX-OBJS-$(CONFIG_VP6_DECODER) += x86/vp3dsp_mmx.o \ YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \ + x86/deinterlace.o \ $(YASM-OBJS-yes) MMX-OBJS-$(CONFIG_FFT) += x86/fft.o diff --git a/libavcodec/x86/deinterlace.asm b/libavcodec/x86/deinterlace.asm new file mode 100644 index 0000000000..5db94644fb --- /dev/null +++ b/libavcodec/x86/deinterlace.asm @@ -0,0 +1,81 @@ +;****************************************************************************** +;* MMX optimized deinterlacing functions +;* Copyright (c) 2010 Vitor Sessak +;* Copyright (c) 2002 Michael Niedermayer +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86inc.asm" +%include "x86util.asm" + +SECTION_RODATA + +cextern pw_4 + +%macro DEINTERLACE 1 +%ifidn %1, inplace +;void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum, int size) +cglobal deinterlace_line_inplace_mmx, 6,6,7, lum_m4, lum_m3, lum_m2, lum_m1, lum, size +%else +;void ff_deinterlace_line_mmx(uint8_t *dst, const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum, int size) +cglobal deinterlace_line_mmx, 7,7,7, dst, lum_m4, lum_m3, lum_m2, lum_m1, lum, size +%endif + pxor mm7, mm7 + movq mm6, [pw_4] +.nextrow + movd mm0, [lum_m4q] + movd mm1, [lum_m3q] + movd mm2, [lum_m2q] +%ifidn %1, inplace + movd [lum_m4q], mm2 +%endif + movd mm3, [lum_m1q] + movd mm4, [lumq] + punpcklbw mm0, mm7 + punpcklbw mm1, mm7 + punpcklbw mm2, mm7 + punpcklbw mm3, mm7 + punpcklbw mm4, mm7 + paddw mm1, mm3 + psllw mm2, 1 + paddw mm0, mm4 + psllw mm1, 2 + paddw mm2, mm6 + paddw mm1, mm2 + psubusw mm1, mm0 + psrlw mm1, 3 + packuswb mm1, mm7 +%ifidn %1, inplace + movd [lum_m2q], mm1 +%else + movd [dstq], mm1 + add dstq, 4 +%endif + add lum_m4q, 4 + add lum_m3q, 4 + add lum_m2q, 4 + add lum_m1q, 4 + add lumq, 4 + sub sized, 4 + jg .nextrow + REP_RET +%endmacro + +DEINTERLACE "" + +DEINTERLACE inplace diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h index 13067dffc3..5de1c908f1 100644 --- a/libavcodec/x86/dsputil_mmx.h +++ b/libavcodec/x86/dsputil_mmx.h @@ -179,4 +179,17 @@ void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag, void ff_mmx_idct(DCTELEM *block); void ff_mmxext_idct(DCTELEM *block); + +void ff_deinterlace_line_mmx(uint8_t *dst, + const uint8_t *lum_m4, const uint8_t *lum_m3, + const uint8_t *lum_m2, const uint8_t *lum_m1, + const uint8_t *lum, + int size); + +void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4, + const uint8_t *lum_m3, + const uint8_t *lum_m2, + const uint8_t *lum_m1, + const uint8_t *lum, int size); + #endif /* AVCODEC_X86_DSPUTIL_MMX_H */