1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-08 13:22:53 +02:00

Convert deinterlacing MMX code to YASM

Originally committed as revision 24615 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Vitor Sessak 2010-07-31 14:50:51 +00:00
parent c2eae137e9
commit de4bc44abb
4 changed files with 110 additions and 84 deletions

View File

@ -39,7 +39,6 @@
#include "libavcore/imgutils.h" #include "libavcore/imgutils.h"
#if HAVE_MMX #if HAVE_MMX
#include "x86/mmx.h"
#include "x86/dsputil_mmx.h" #include "x86/dsputil_mmx.h"
#endif #endif
@ -55,6 +54,14 @@
#define FF_PIXEL_PACKED 1 /**< only one components containing all the channels */ #define FF_PIXEL_PACKED 1 /**< only one components containing all the channels */
#define FF_PIXEL_PALETTE 2 /**< one components containing indexes for a palette */ #define FF_PIXEL_PALETTE 2 /**< one components containing indexes for a palette */
#if HAVE_MMX
#define deinterlace_line_inplace ff_deinterlace_line_inplace_mmx
#define deinterlace_line ff_deinterlace_line_mmx
#else
#define deinterlace_line_inplace deinterlace_line_inplace_c
#define deinterlace_line deinterlace_line_c
#endif
typedef struct PixFmtInfo { typedef struct PixFmtInfo {
uint8_t nb_channels; /**< number of channels (including alpha) */ uint8_t nb_channels; /**< number of channels (including alpha) */
uint8_t color_type; /**< color type (see FF_COLOR_xxx constants) */ uint8_t color_type; /**< color type (see FF_COLOR_xxx constants) */
@ -1119,61 +1126,14 @@ int img_get_alpha_info(const AVPicture *src,
return ret; return ret;
} }
#if HAVE_MMX #if !HAVE_MMX
#define DEINT_INPLACE_LINE_LUM \
movd_m2r(lum_m4[0],mm0);\
movd_m2r(lum_m3[0],mm1);\
movd_m2r(lum_m2[0],mm2);\
movd_m2r(lum_m1[0],mm3);\
movd_m2r(lum[0],mm4);\
punpcklbw_r2r(mm7,mm0);\
movd_r2m(mm2,lum_m4[0]);\
punpcklbw_r2r(mm7,mm1);\
punpcklbw_r2r(mm7,mm2);\
punpcklbw_r2r(mm7,mm3);\
punpcklbw_r2r(mm7,mm4);\
paddw_r2r(mm3,mm1);\
psllw_i2r(1,mm2);\
paddw_r2r(mm4,mm0);\
psllw_i2r(2,mm1);\
paddw_r2r(mm6,mm2);\
paddw_r2r(mm2,mm1);\
psubusw_r2r(mm0,mm1);\
psrlw_i2r(3,mm1);\
packuswb_r2r(mm7,mm1);\
movd_r2m(mm1,lum_m2[0]);
#define DEINT_LINE_LUM \
movd_m2r(lum_m4[0],mm0);\
movd_m2r(lum_m3[0],mm1);\
movd_m2r(lum_m2[0],mm2);\
movd_m2r(lum_m1[0],mm3);\
movd_m2r(lum[0],mm4);\
punpcklbw_r2r(mm7,mm0);\
punpcklbw_r2r(mm7,mm1);\
punpcklbw_r2r(mm7,mm2);\
punpcklbw_r2r(mm7,mm3);\
punpcklbw_r2r(mm7,mm4);\
paddw_r2r(mm3,mm1);\
psllw_i2r(1,mm2);\
paddw_r2r(mm4,mm0);\
psllw_i2r(2,mm1);\
paddw_r2r(mm6,mm2);\
paddw_r2r(mm2,mm1);\
psubusw_r2r(mm0,mm1);\
psrlw_i2r(3,mm1);\
packuswb_r2r(mm7,mm1);\
movd_r2m(mm1,dst[0]);
#endif
/* filter parameters: [-1 4 2 4 -1] // 8 */ /* filter parameters: [-1 4 2 4 -1] // 8 */
static void deinterlace_line(uint8_t *dst, static void deinterlace_line_c(uint8_t *dst,
const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m4, const uint8_t *lum_m3,
const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum_m2, const uint8_t *lum_m1,
const uint8_t *lum, const uint8_t *lum,
int size) int size)
{ {
#if !HAVE_MMX
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
int sum; int sum;
@ -1191,27 +1151,12 @@ static void deinterlace_line(uint8_t *dst,
lum++; lum++;
dst++; dst++;
} }
#else }
static void deinterlace_line_inplace_c(uint8_t *lum_m4, uint8_t *lum_m3,
uint8_t *lum_m2, uint8_t *lum_m1,
uint8_t *lum, int size)
{ {
pxor_r2r(mm7,mm7);
movq_m2r(ff_pw_4,mm6);
}
for (;size > 3; size-=4) {
DEINT_LINE_LUM
lum_m4+=4;
lum_m3+=4;
lum_m2+=4;
lum_m1+=4;
lum+=4;
dst+=4;
}
#endif
}
static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *lum_m2, uint8_t *lum_m1, uint8_t *lum,
int size)
{
#if !HAVE_MMX
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
int sum; int sum;
@ -1229,22 +1174,8 @@ static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *
lum_m1++; lum_m1++;
lum++; lum++;
} }
#else
{
pxor_r2r(mm7,mm7);
movq_m2r(ff_pw_4,mm6);
}
for (;size > 3; size-=4) {
DEINT_INPLACE_LINE_LUM
lum_m4+=4;
lum_m3+=4;
lum_m2+=4;
lum_m1+=4;
lum+=4;
} }
#endif #endif
}
/* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
top field is copied as is, but the bottom field is deinterlaced top field is copied as is, but the bottom field is deinterlaced

View File

@ -35,6 +35,7 @@ MMX-OBJS-$(CONFIG_VP6_DECODER) += x86/vp3dsp_mmx.o \
YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o
MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o
MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \ MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \
x86/deinterlace.o \
$(YASM-OBJS-yes) $(YASM-OBJS-yes)
MMX-OBJS-$(CONFIG_FFT) += x86/fft.o MMX-OBJS-$(CONFIG_FFT) += x86/fft.o

View File

@ -0,0 +1,81 @@
;******************************************************************************
;* MMX optimized deinterlacing functions
;* Copyright (c) 2010 Vitor Sessak
;* Copyright (c) 2002 Michael Niedermayer
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "x86inc.asm"
%include "x86util.asm"
SECTION_RODATA
cextern pw_4
%macro DEINTERLACE 1
%ifidn %1, inplace
;void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum, int size)
cglobal deinterlace_line_inplace_mmx, 6,6,7, lum_m4, lum_m3, lum_m2, lum_m1, lum, size
%else
;void ff_deinterlace_line_mmx(uint8_t *dst, const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum, int size)
cglobal deinterlace_line_mmx, 7,7,7, dst, lum_m4, lum_m3, lum_m2, lum_m1, lum, size
%endif
pxor mm7, mm7
movq mm6, [pw_4]
.nextrow
movd mm0, [lum_m4q]
movd mm1, [lum_m3q]
movd mm2, [lum_m2q]
%ifidn %1, inplace
movd [lum_m4q], mm2
%endif
movd mm3, [lum_m1q]
movd mm4, [lumq]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpcklbw mm2, mm7
punpcklbw mm3, mm7
punpcklbw mm4, mm7
paddw mm1, mm3
psllw mm2, 1
paddw mm0, mm4
psllw mm1, 2
paddw mm2, mm6
paddw mm1, mm2
psubusw mm1, mm0
psrlw mm1, 3
packuswb mm1, mm7
%ifidn %1, inplace
movd [lum_m2q], mm1
%else
movd [dstq], mm1
add dstq, 4
%endif
add lum_m4q, 4
add lum_m3q, 4
add lum_m2q, 4
add lum_m1q, 4
add lumq, 4
sub sized, 4
jg .nextrow
REP_RET
%endmacro
DEINTERLACE ""
DEINTERLACE inplace

View File

@ -179,4 +179,17 @@ void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
void ff_mmx_idct(DCTELEM *block); void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block); void ff_mmxext_idct(DCTELEM *block);
void ff_deinterlace_line_mmx(uint8_t *dst,
const uint8_t *lum_m4, const uint8_t *lum_m3,
const uint8_t *lum_m2, const uint8_t *lum_m1,
const uint8_t *lum,
int size);
void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
const uint8_t *lum_m3,
const uint8_t *lum_m2,
const uint8_t *lum_m1,
const uint8_t *lum, int size);
#endif /* AVCODEC_X86_DSPUTIL_MMX_H */ #endif /* AVCODEC_X86_DSPUTIL_MMX_H */