mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-08 13:22:53 +02:00
Convert deinterlacing MMX code to YASM
Originally committed as revision 24615 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
c2eae137e9
commit
de4bc44abb
@ -39,7 +39,6 @@
|
|||||||
#include "libavcore/imgutils.h"
|
#include "libavcore/imgutils.h"
|
||||||
|
|
||||||
#if HAVE_MMX
|
#if HAVE_MMX
|
||||||
#include "x86/mmx.h"
|
|
||||||
#include "x86/dsputil_mmx.h"
|
#include "x86/dsputil_mmx.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -55,6 +54,14 @@
|
|||||||
#define FF_PIXEL_PACKED 1 /**< only one components containing all the channels */
|
#define FF_PIXEL_PACKED 1 /**< only one components containing all the channels */
|
||||||
#define FF_PIXEL_PALETTE 2 /**< one components containing indexes for a palette */
|
#define FF_PIXEL_PALETTE 2 /**< one components containing indexes for a palette */
|
||||||
|
|
||||||
|
#if HAVE_MMX
|
||||||
|
#define deinterlace_line_inplace ff_deinterlace_line_inplace_mmx
|
||||||
|
#define deinterlace_line ff_deinterlace_line_mmx
|
||||||
|
#else
|
||||||
|
#define deinterlace_line_inplace deinterlace_line_inplace_c
|
||||||
|
#define deinterlace_line deinterlace_line_c
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct PixFmtInfo {
|
typedef struct PixFmtInfo {
|
||||||
uint8_t nb_channels; /**< number of channels (including alpha) */
|
uint8_t nb_channels; /**< number of channels (including alpha) */
|
||||||
uint8_t color_type; /**< color type (see FF_COLOR_xxx constants) */
|
uint8_t color_type; /**< color type (see FF_COLOR_xxx constants) */
|
||||||
@ -1119,61 +1126,14 @@ int img_get_alpha_info(const AVPicture *src,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_MMX
|
#if !HAVE_MMX
|
||||||
#define DEINT_INPLACE_LINE_LUM \
|
|
||||||
movd_m2r(lum_m4[0],mm0);\
|
|
||||||
movd_m2r(lum_m3[0],mm1);\
|
|
||||||
movd_m2r(lum_m2[0],mm2);\
|
|
||||||
movd_m2r(lum_m1[0],mm3);\
|
|
||||||
movd_m2r(lum[0],mm4);\
|
|
||||||
punpcklbw_r2r(mm7,mm0);\
|
|
||||||
movd_r2m(mm2,lum_m4[0]);\
|
|
||||||
punpcklbw_r2r(mm7,mm1);\
|
|
||||||
punpcklbw_r2r(mm7,mm2);\
|
|
||||||
punpcklbw_r2r(mm7,mm3);\
|
|
||||||
punpcklbw_r2r(mm7,mm4);\
|
|
||||||
paddw_r2r(mm3,mm1);\
|
|
||||||
psllw_i2r(1,mm2);\
|
|
||||||
paddw_r2r(mm4,mm0);\
|
|
||||||
psllw_i2r(2,mm1);\
|
|
||||||
paddw_r2r(mm6,mm2);\
|
|
||||||
paddw_r2r(mm2,mm1);\
|
|
||||||
psubusw_r2r(mm0,mm1);\
|
|
||||||
psrlw_i2r(3,mm1);\
|
|
||||||
packuswb_r2r(mm7,mm1);\
|
|
||||||
movd_r2m(mm1,lum_m2[0]);
|
|
||||||
|
|
||||||
#define DEINT_LINE_LUM \
|
|
||||||
movd_m2r(lum_m4[0],mm0);\
|
|
||||||
movd_m2r(lum_m3[0],mm1);\
|
|
||||||
movd_m2r(lum_m2[0],mm2);\
|
|
||||||
movd_m2r(lum_m1[0],mm3);\
|
|
||||||
movd_m2r(lum[0],mm4);\
|
|
||||||
punpcklbw_r2r(mm7,mm0);\
|
|
||||||
punpcklbw_r2r(mm7,mm1);\
|
|
||||||
punpcklbw_r2r(mm7,mm2);\
|
|
||||||
punpcklbw_r2r(mm7,mm3);\
|
|
||||||
punpcklbw_r2r(mm7,mm4);\
|
|
||||||
paddw_r2r(mm3,mm1);\
|
|
||||||
psllw_i2r(1,mm2);\
|
|
||||||
paddw_r2r(mm4,mm0);\
|
|
||||||
psllw_i2r(2,mm1);\
|
|
||||||
paddw_r2r(mm6,mm2);\
|
|
||||||
paddw_r2r(mm2,mm1);\
|
|
||||||
psubusw_r2r(mm0,mm1);\
|
|
||||||
psrlw_i2r(3,mm1);\
|
|
||||||
packuswb_r2r(mm7,mm1);\
|
|
||||||
movd_r2m(mm1,dst[0]);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* filter parameters: [-1 4 2 4 -1] // 8 */
|
/* filter parameters: [-1 4 2 4 -1] // 8 */
|
||||||
static void deinterlace_line(uint8_t *dst,
|
static void deinterlace_line_c(uint8_t *dst,
|
||||||
const uint8_t *lum_m4, const uint8_t *lum_m3,
|
const uint8_t *lum_m4, const uint8_t *lum_m3,
|
||||||
const uint8_t *lum_m2, const uint8_t *lum_m1,
|
const uint8_t *lum_m2, const uint8_t *lum_m1,
|
||||||
const uint8_t *lum,
|
const uint8_t *lum,
|
||||||
int size)
|
int size)
|
||||||
{
|
{
|
||||||
#if !HAVE_MMX
|
|
||||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
||||||
int sum;
|
int sum;
|
||||||
|
|
||||||
@ -1191,27 +1151,12 @@ static void deinterlace_line(uint8_t *dst,
|
|||||||
lum++;
|
lum++;
|
||||||
dst++;
|
dst++;
|
||||||
}
|
}
|
||||||
#else
|
}
|
||||||
|
|
||||||
|
static void deinterlace_line_inplace_c(uint8_t *lum_m4, uint8_t *lum_m3,
|
||||||
|
uint8_t *lum_m2, uint8_t *lum_m1,
|
||||||
|
uint8_t *lum, int size)
|
||||||
{
|
{
|
||||||
pxor_r2r(mm7,mm7);
|
|
||||||
movq_m2r(ff_pw_4,mm6);
|
|
||||||
}
|
|
||||||
for (;size > 3; size-=4) {
|
|
||||||
DEINT_LINE_LUM
|
|
||||||
lum_m4+=4;
|
|
||||||
lum_m3+=4;
|
|
||||||
lum_m2+=4;
|
|
||||||
lum_m1+=4;
|
|
||||||
lum+=4;
|
|
||||||
dst+=4;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *lum_m2, uint8_t *lum_m1, uint8_t *lum,
|
|
||||||
int size)
|
|
||||||
{
|
|
||||||
#if !HAVE_MMX
|
|
||||||
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
|
||||||
int sum;
|
int sum;
|
||||||
|
|
||||||
@ -1229,22 +1174,8 @@ static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *
|
|||||||
lum_m1++;
|
lum_m1++;
|
||||||
lum++;
|
lum++;
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
|
|
||||||
{
|
|
||||||
pxor_r2r(mm7,mm7);
|
|
||||||
movq_m2r(ff_pw_4,mm6);
|
|
||||||
}
|
|
||||||
for (;size > 3; size-=4) {
|
|
||||||
DEINT_INPLACE_LINE_LUM
|
|
||||||
lum_m4+=4;
|
|
||||||
lum_m3+=4;
|
|
||||||
lum_m2+=4;
|
|
||||||
lum_m1+=4;
|
|
||||||
lum+=4;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
|
|
||||||
/* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
|
/* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
|
||||||
top field is copied as is, but the bottom field is deinterlaced
|
top field is copied as is, but the bottom field is deinterlaced
|
||||||
|
@ -35,6 +35,7 @@ MMX-OBJS-$(CONFIG_VP6_DECODER) += x86/vp3dsp_mmx.o \
|
|||||||
YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o
|
YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o
|
||||||
MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o
|
MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o
|
||||||
MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \
|
MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \
|
||||||
|
x86/deinterlace.o \
|
||||||
$(YASM-OBJS-yes)
|
$(YASM-OBJS-yes)
|
||||||
|
|
||||||
MMX-OBJS-$(CONFIG_FFT) += x86/fft.o
|
MMX-OBJS-$(CONFIG_FFT) += x86/fft.o
|
||||||
|
81
libavcodec/x86/deinterlace.asm
Normal file
81
libavcodec/x86/deinterlace.asm
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
;******************************************************************************
|
||||||
|
;* MMX optimized deinterlacing functions
|
||||||
|
;* Copyright (c) 2010 Vitor Sessak
|
||||||
|
;* Copyright (c) 2002 Michael Niedermayer
|
||||||
|
;*
|
||||||
|
;* This file is part of FFmpeg.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
;* modify it under the terms of the GNU Lesser General Public
|
||||||
|
;* License as published by the Free Software Foundation; either
|
||||||
|
;* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;* Lesser General Public License for more details.
|
||||||
|
;*
|
||||||
|
;* You should have received a copy of the GNU Lesser General Public
|
||||||
|
;* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;******************************************************************************
|
||||||
|
|
||||||
|
%include "x86inc.asm"
|
||||||
|
%include "x86util.asm"
|
||||||
|
|
||||||
|
SECTION_RODATA
|
||||||
|
|
||||||
|
cextern pw_4
|
||||||
|
|
||||||
|
%macro DEINTERLACE 1
|
||||||
|
%ifidn %1, inplace
|
||||||
|
;void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum, int size)
|
||||||
|
cglobal deinterlace_line_inplace_mmx, 6,6,7, lum_m4, lum_m3, lum_m2, lum_m1, lum, size
|
||||||
|
%else
|
||||||
|
;void ff_deinterlace_line_mmx(uint8_t *dst, const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum, int size)
|
||||||
|
cglobal deinterlace_line_mmx, 7,7,7, dst, lum_m4, lum_m3, lum_m2, lum_m1, lum, size
|
||||||
|
%endif
|
||||||
|
pxor mm7, mm7
|
||||||
|
movq mm6, [pw_4]
|
||||||
|
.nextrow
|
||||||
|
movd mm0, [lum_m4q]
|
||||||
|
movd mm1, [lum_m3q]
|
||||||
|
movd mm2, [lum_m2q]
|
||||||
|
%ifidn %1, inplace
|
||||||
|
movd [lum_m4q], mm2
|
||||||
|
%endif
|
||||||
|
movd mm3, [lum_m1q]
|
||||||
|
movd mm4, [lumq]
|
||||||
|
punpcklbw mm0, mm7
|
||||||
|
punpcklbw mm1, mm7
|
||||||
|
punpcklbw mm2, mm7
|
||||||
|
punpcklbw mm3, mm7
|
||||||
|
punpcklbw mm4, mm7
|
||||||
|
paddw mm1, mm3
|
||||||
|
psllw mm2, 1
|
||||||
|
paddw mm0, mm4
|
||||||
|
psllw mm1, 2
|
||||||
|
paddw mm2, mm6
|
||||||
|
paddw mm1, mm2
|
||||||
|
psubusw mm1, mm0
|
||||||
|
psrlw mm1, 3
|
||||||
|
packuswb mm1, mm7
|
||||||
|
%ifidn %1, inplace
|
||||||
|
movd [lum_m2q], mm1
|
||||||
|
%else
|
||||||
|
movd [dstq], mm1
|
||||||
|
add dstq, 4
|
||||||
|
%endif
|
||||||
|
add lum_m4q, 4
|
||||||
|
add lum_m3q, 4
|
||||||
|
add lum_m2q, 4
|
||||||
|
add lum_m1q, 4
|
||||||
|
add lumq, 4
|
||||||
|
sub sized, 4
|
||||||
|
jg .nextrow
|
||||||
|
REP_RET
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
DEINTERLACE ""
|
||||||
|
|
||||||
|
DEINTERLACE inplace
|
@ -179,4 +179,17 @@ void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
|
|||||||
void ff_mmx_idct(DCTELEM *block);
|
void ff_mmx_idct(DCTELEM *block);
|
||||||
void ff_mmxext_idct(DCTELEM *block);
|
void ff_mmxext_idct(DCTELEM *block);
|
||||||
|
|
||||||
|
|
||||||
|
void ff_deinterlace_line_mmx(uint8_t *dst,
|
||||||
|
const uint8_t *lum_m4, const uint8_t *lum_m3,
|
||||||
|
const uint8_t *lum_m2, const uint8_t *lum_m1,
|
||||||
|
const uint8_t *lum,
|
||||||
|
int size);
|
||||||
|
|
||||||
|
void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
|
||||||
|
const uint8_t *lum_m3,
|
||||||
|
const uint8_t *lum_m2,
|
||||||
|
const uint8_t *lum_m1,
|
||||||
|
const uint8_t *lum, int size);
|
||||||
|
|
||||||
#endif /* AVCODEC_X86_DSPUTIL_MMX_H */
|
#endif /* AVCODEC_X86_DSPUTIL_MMX_H */
|
||||||
|
Loading…
Reference in New Issue
Block a user