You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	avfilter/vf_bwdif: add x86 SIMD
Signed-off-by: Thomas Mundt <loudmax@yahoo.de>
This commit is contained in:
		
				
					committed by
					
						 Paul B Mahol
						Paul B Mahol
					
				
			
			
				
	
			
			
			
						parent
						
							d11d78facb
						
					
				
				
					commit
					5024a82e95
				
			
							
								
								
									
										72
									
								
								libavfilter/bwdif.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								libavfilter/bwdif.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,72 @@ | ||||
| /* | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with FFmpeg; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
| #ifndef AVFILTER_BWDIF_H | ||||
| #define AVFILTER_BWDIF_H | ||||
|  | ||||
| #include "libavutil/pixdesc.h" | ||||
| #include "avfilter.h" | ||||
|  | ||||
| enum BWDIFMode { | ||||
|     BWDIF_MODE_SEND_FRAME = 0, ///< send 1 frame for each frame | ||||
|     BWDIF_MODE_SEND_FIELD = 1, ///< send 1 frame for each field | ||||
| }; | ||||
|  | ||||
| enum BWDIFParity { | ||||
|     BWDIF_PARITY_TFF  =  0, ///< top field first | ||||
|     BWDIF_PARITY_BFF  =  1, ///< bottom field first | ||||
|     BWDIF_PARITY_AUTO = -1, ///< auto detection | ||||
| }; | ||||
|  | ||||
| enum BWDIFDeint { | ||||
|     BWDIF_DEINT_ALL        = 0, ///< deinterlace all frames | ||||
|     BWDIF_DEINT_INTERLACED = 1, ///< only deinterlace frames marked as interlaced | ||||
| }; | ||||
|  | ||||
| typedef struct BWDIFContext { | ||||
|     const AVClass *class; | ||||
|  | ||||
|     int mode;           ///< BWDIFMode | ||||
|     int parity;         ///< BWDIFParity | ||||
|     int deint;          ///< BWDIFDeint | ||||
|  | ||||
|     int frame_pending; | ||||
|  | ||||
|     AVFrame *cur; | ||||
|     AVFrame *next; | ||||
|     AVFrame *prev; | ||||
|     AVFrame *out; | ||||
|  | ||||
|     void (*filter_intra)(void *dst1, void *cur1, int w, int prefs, int mrefs, | ||||
|                          int prefs3, int mrefs3, int parity, int clip_max); | ||||
|     void (*filter_line)(void *dst, void *prev, void *cur, void *next, | ||||
|                         int w, int prefs, int mrefs, int prefs2, int mrefs2, | ||||
|                         int prefs3, int mrefs3, int prefs4, int mrefs4, | ||||
|                         int parity, int clip_max); | ||||
|     void (*filter_edge)(void *dst, void *prev, void *cur, void *next, | ||||
|                         int w, int prefs, int mrefs, int prefs2, int mrefs2, | ||||
|                         int parity, int clip_max, int spat); | ||||
|  | ||||
|     const AVPixFmtDescriptor *csp; | ||||
|     int inter_field; | ||||
|     int eof; | ||||
| } BWDIFContext; | ||||
|  | ||||
| void ff_bwdif_init_x86(BWDIFContext *bwdif); | ||||
|  | ||||
| #endif /* AVFILTER_BWDIF_H */ | ||||
| @@ -37,6 +37,7 @@ | ||||
| #include "formats.h" | ||||
| #include "internal.h" | ||||
| #include "video.h" | ||||
| #include "bwdif.h" | ||||
|  | ||||
| /* | ||||
|  * Filter coefficients coef_lf and coef_hf taken from BBC PH-2071 (Weston 3 Field Deinterlacer). | ||||
| @@ -48,51 +49,6 @@ static const uint16_t coef_lf[2] = { 4309, 213 }; | ||||
| static const uint16_t coef_hf[3] = { 5570, 3801, 1016 }; | ||||
| static const uint16_t coef_sp[2] = { 5077, 981 }; | ||||
|  | ||||
| enum BWDIFMode { | ||||
|     BWDIF_MODE_SEND_FRAME = 0, ///< send 1 frame for each frame | ||||
|     BWDIF_MODE_SEND_FIELD = 1, ///< send 1 frame for each field | ||||
| }; | ||||
|  | ||||
| enum BWDIFParity { | ||||
|     BWDIF_PARITY_TFF  =  0, ///< top field first | ||||
|     BWDIF_PARITY_BFF  =  1, ///< bottom field first | ||||
|     BWDIF_PARITY_AUTO = -1, ///< auto detection | ||||
| }; | ||||
|  | ||||
| enum BWDIFDeint { | ||||
|     BWDIF_DEINT_ALL        = 0, ///< deinterlace all frames | ||||
|     BWDIF_DEINT_INTERLACED = 1, ///< only deinterlace frames marked as interlaced | ||||
| }; | ||||
|  | ||||
| typedef struct BWDIFContext { | ||||
|     const AVClass *class; | ||||
|  | ||||
|     int mode;           ///< BWDIFMode | ||||
|     int parity;         ///< BWDIFParity | ||||
|     int deint;          ///< BWDIFDeint | ||||
|  | ||||
|     int frame_pending; | ||||
|  | ||||
|     AVFrame *cur; | ||||
|     AVFrame *next; | ||||
|     AVFrame *prev; | ||||
|     AVFrame *out; | ||||
|  | ||||
|     void (*filter_intra)(void *dst1, void *cur1, int w, int prefs, int mrefs, | ||||
|                          int prefs3, int mrefs3, int parity, int clip_max); | ||||
|     void (*filter_line)(void *dst, void *prev, void *cur, void *next, | ||||
|                         int w, int prefs, int mrefs, int prefs2, int mrefs2, | ||||
|                         int prefs3, int mrefs3, int prefs4, int mrefs4, | ||||
|                         int parity, int clip_max); | ||||
|     void (*filter_edge)(void *dst, void *prev, void *cur, void *next, | ||||
|                         int w, int prefs, int mrefs, int prefs2, int mrefs2, | ||||
|                         int parity, int clip_max, int spat); | ||||
|  | ||||
|     const AVPixFmtDescriptor *csp; | ||||
|     int inter_field; | ||||
|     int eof; | ||||
| } BWDIFContext; | ||||
|  | ||||
| typedef struct ThreadData { | ||||
|     AVFrame *frame; | ||||
|     int plane; | ||||
| @@ -177,10 +133,10 @@ static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs, | ||||
|     FILTER_INTRA() | ||||
| } | ||||
|  | ||||
| static void filter_line(void *dst1, void *prev1, void *cur1, void *next1, | ||||
|                         int w, int prefs, int mrefs, int prefs2, int mrefs2, | ||||
|                         int prefs3, int mrefs3, int prefs4, int mrefs4, | ||||
|                         int parity, int clip_max) | ||||
| static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, | ||||
|                           int w, int prefs, int mrefs, int prefs2, int mrefs2, | ||||
|                           int prefs3, int mrefs3, int prefs4, int mrefs4, | ||||
|                           int parity, int clip_max) | ||||
| { | ||||
|     uint8_t *dst   = dst1; | ||||
|     uint8_t *prev  = prev1; | ||||
| @@ -222,10 +178,10 @@ static void filter_intra_16bit(void *dst1, void *cur1, int w, int prefs, int mre | ||||
|     FILTER_INTRA() | ||||
| } | ||||
|  | ||||
| static void filter_line_16bit(void *dst1, void *prev1, void *cur1, void *next1, | ||||
|                               int w, int prefs, int mrefs, int prefs2, int mrefs2, | ||||
|                               int prefs3, int mrefs3, int prefs4, int mrefs4, | ||||
|                               int parity, int clip_max) | ||||
| static void filter_line_c_16bit(void *dst1, void *prev1, void *cur1, void *next1, | ||||
|                                 int w, int prefs, int mrefs, int prefs2, int mrefs2, | ||||
|                                 int prefs3, int mrefs3, int prefs4, int mrefs4, | ||||
|                                 int parity, int clip_max) | ||||
| { | ||||
|     uint16_t *dst   = dst1; | ||||
|     uint16_t *prev  = prev1; | ||||
| @@ -557,14 +513,17 @@ static int config_props(AVFilterLink *link) | ||||
|     s->csp = av_pix_fmt_desc_get(link->format); | ||||
|     if (s->csp->comp[0].depth > 8) { | ||||
|         s->filter_intra = filter_intra_16bit; | ||||
|         s->filter_line  = filter_line_16bit; | ||||
|         s->filter_line  = filter_line_c_16bit; | ||||
|         s->filter_edge  = filter_edge_16bit; | ||||
|     } else { | ||||
|         s->filter_intra = filter_intra; | ||||
|         s->filter_line  = filter_line; | ||||
|         s->filter_line  = filter_line_c; | ||||
|         s->filter_edge  = filter_edge; | ||||
|     } | ||||
|  | ||||
|     if (ARCH_X86) | ||||
|         ff_bwdif_init_x86(s); | ||||
|  | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| OBJS-$(CONFIG_BLEND_FILTER)                  += x86/vf_blend_init.o | ||||
| OBJS-$(CONFIG_BWDIF_FILTER)                  += x86/vf_bwdif_init.o | ||||
| OBJS-$(CONFIG_EQ_FILTER)                     += x86/vf_eq.o | ||||
| OBJS-$(CONFIG_FSPP_FILTER)                   += x86/vf_fspp_init.o | ||||
| OBJS-$(CONFIG_GRADFUN_FILTER)                += x86/vf_gradfun_init.o | ||||
| @@ -21,6 +22,7 @@ OBJS-$(CONFIG_W3FDIF_FILTER)                 += x86/vf_w3fdif_init.o | ||||
| OBJS-$(CONFIG_YADIF_FILTER)                  += x86/vf_yadif_init.o | ||||
|  | ||||
| YASM-OBJS-$(CONFIG_BLEND_FILTER)             += x86/vf_blend.o | ||||
| YASM-OBJS-$(CONFIG_BWDIF_FILTER)             += x86/vf_bwdif.o | ||||
| YASM-OBJS-$(CONFIG_FSPP_FILTER)              += x86/vf_fspp.o | ||||
| YASM-OBJS-$(CONFIG_GRADFUN_FILTER)           += x86/vf_gradfun.o | ||||
| YASM-OBJS-$(CONFIG_HQDN3D_FILTER)            += x86/vf_hqdn3d.o | ||||
|   | ||||
							
								
								
									
										266
									
								
								libavfilter/x86/vf_bwdif.asm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										266
									
								
								libavfilter/x86/vf_bwdif.asm
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,266 @@ | ||||
| ;***************************************************************************** | ||||
| ;* x86-optimized functions for bwdif filter | ||||
| ;* | ||||
| ;* Copyright (C) 2016 Thomas Mundt <loudmax@yahoo.de> | ||||
| ;* | ||||
| ;* This file is part of FFmpeg. | ||||
| ;* | ||||
| ;* FFmpeg is free software; you can redistribute it and/or | ||||
| ;* modify it under the terms of the GNU Lesser General Public | ||||
| ;* License as published by the Free Software Foundation; either | ||||
| ;* version 2.1 of the License, or (at your option) any later version. | ||||
| ;* | ||||
| ;* FFmpeg is distributed in the hope that it will be useful, | ||||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
| ;* Lesser General Public License for more details. | ||||
| ;* | ||||
| ;* You should have received a copy of the GNU Lesser General Public | ||||
| ;* License along with FFmpeg; if not, write to the Free Software | ||||
| ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
| ;****************************************************************************** | ||||
|  | ||||
| %include "libavutil/x86/x86util.asm" | ||||
|  | ||||
| SECTION_RODATA | ||||
|  | ||||
| pw_coefhf:  times 4 dw  1016, 5570 | ||||
| pw_coefhf1: times 8 dw -3801 | ||||
| pw_coefsp:  times 4 dw  5077, -981 | ||||
| pw_splfdif: times 4 dw  -768,  768 | ||||
|  | ||||
| SECTION .text | ||||
|  | ||||
| %macro LOAD8 2 | ||||
|     movh         %1, %2 | ||||
|     punpcklbw    %1, m7 | ||||
| %endmacro | ||||
|  | ||||
| %macro LOAD12 2 | ||||
|     movu         %1, %2 | ||||
| %endmacro | ||||
|  | ||||
| %macro DISP8 0 | ||||
|     packuswb     m2, m2 | ||||
|     movh     [dstq], m2 | ||||
| %endmacro | ||||
|  | ||||
| %macro DISP12 0 | ||||
|     CLIPW        m2, m7, m12 | ||||
|     movu     [dstq], m2 | ||||
| %endmacro | ||||
|  | ||||
| %macro FILTER 5 | ||||
|     pxor         m7, m7 | ||||
| .loop%1: | ||||
|     LOAD%4       m0, [curq+t0*%5] | ||||
|     LOAD%4       m1, [curq+t1*%5] | ||||
|     LOAD%4       m2, [%2] | ||||
|     LOAD%4       m3, [%3] | ||||
|     mova         m4, m3 | ||||
|     paddw        m3, m2 | ||||
|     psubw        m2, m4 | ||||
|     ABS1         m2, m4 | ||||
|     mova         m8, m3 | ||||
|     mova         m9, m2 | ||||
|     LOAD%4       m3, [prevq+t0*%5] | ||||
|     LOAD%4       m4, [prevq+t1*%5] | ||||
|     psubw        m3, m0 | ||||
|     psubw        m4, m1 | ||||
|     ABS2         m3, m4, m5, m6 | ||||
|     paddw        m3, m4 | ||||
|     psrlw        m2, 1 | ||||
|     psrlw        m3, 1 | ||||
|     pmaxsw       m2, m3 | ||||
|     LOAD%4       m3, [nextq+t0*%5] | ||||
|     LOAD%4       m4, [nextq+t1*%5] | ||||
|     psubw        m3, m0 | ||||
|     psubw        m4, m1 | ||||
|     ABS2         m3, m4, m5, m6 | ||||
|     paddw        m3, m4 | ||||
|     psrlw        m3, 1 | ||||
|     pmaxsw       m2, m3 | ||||
|  | ||||
|     LOAD%4       m3, [%2+t0*2*%5] | ||||
|     LOAD%4       m4, [%3+t0*2*%5] | ||||
|     LOAD%4       m5, [%2+t1*2*%5] | ||||
|     LOAD%4       m6, [%3+t1*2*%5] | ||||
|     paddw        m3, m4 | ||||
|     paddw        m5, m6 | ||||
|     mova         m6, m3 | ||||
|     paddw        m6, m5 | ||||
|     mova        m10, m6 | ||||
|     psrlw        m3, 1 | ||||
|     psrlw        m5, 1 | ||||
|     psubw        m3, m0 | ||||
|     psubw        m5, m1 | ||||
|     mova         m6, m3 | ||||
|     pminsw       m3, m5 | ||||
|     pmaxsw       m5, m6 | ||||
|     mova         m4, m8 | ||||
|     psraw        m4, 1 | ||||
|     mova         m6, m4 | ||||
|     psubw        m6, m0 | ||||
|     psubw        m4, m1 | ||||
|     pmaxsw       m3, m6 | ||||
|     pminsw       m5, m6 | ||||
|     pmaxsw       m3, m4 | ||||
|     pminsw       m5, m4 | ||||
|     mova         m6, m7 | ||||
|     psubw        m6, m3 | ||||
|     pmaxsw       m6, m5 | ||||
|     mova         m3, m2 | ||||
|     pcmpgtw      m3, m7 | ||||
|     pand         m6, m3 | ||||
|     pmaxsw       m2, m6 | ||||
|     mova        m11, m2 | ||||
|  | ||||
|     LOAD%4       m2, [%2+t0*4*%5] | ||||
|     LOAD%4       m3, [%3+t0*4*%5] | ||||
|     LOAD%4       m4, [%2+t1*4*%5] | ||||
|     LOAD%4       m5, [%3+t1*4*%5] | ||||
|     paddw        m2, m3 | ||||
|     paddw        m4, m5 | ||||
|     paddw        m2, m4 | ||||
|     mova         m3, m2 | ||||
|     punpcklwd    m2, m8 | ||||
|     punpckhwd    m3, m8 | ||||
|     pmaddwd      m2, [pw_coefhf] | ||||
|     pmaddwd      m3, [pw_coefhf] | ||||
|     mova         m4, m10 | ||||
|     mova         m6, m4 | ||||
|     pmullw       m4, [pw_coefhf1] | ||||
|     pmulhw       m6, [pw_coefhf1] | ||||
|     mova         m5, m4 | ||||
|     punpcklwd    m4, m6 | ||||
|     punpckhwd    m5, m6 | ||||
|     paddd        m2, m4 | ||||
|     paddd        m3, m5 | ||||
|     psrad        m2, 2 | ||||
|     psrad        m3, 2 | ||||
|  | ||||
|     mova         m4, m0 | ||||
|     paddw        m0, m1 | ||||
| %if ARCH_X86_64 | ||||
|     LOAD%4       m5, [curq+t2*%5] | ||||
|     LOAD%4       m6, [curq+t3*%5] | ||||
| %else | ||||
|     mov          r4, prefs3mp | ||||
|     mov          r5, mrefs3mp | ||||
|     LOAD%4       m5, [curq+t0*%5] | ||||
|     LOAD%4       m6, [curq+t1*%5] | ||||
|     mov          r4, prefsmp | ||||
|     mov          r5, mrefsmp | ||||
| %endif | ||||
|     paddw        m6, m5 | ||||
|     psubw        m1, m4 | ||||
|     ABS1         m1, m4 | ||||
|     pcmpgtw      m1, m9 | ||||
|     mova         m4, m1 | ||||
|     punpcklwd    m1, m4 | ||||
|     punpckhwd    m4, m4 | ||||
|     pand         m2, m1 | ||||
|     pand         m3, m4 | ||||
|     mova         m5, [pw_splfdif] | ||||
|     mova         m7, m5 | ||||
|     pand         m5, m1 | ||||
|     pand         m7, m4 | ||||
|     paddw        m5, [pw_coefsp] | ||||
|     paddw        m7, [pw_coefsp] | ||||
|     mova         m4, m0 | ||||
|     punpcklwd    m0, m6 | ||||
|     punpckhwd    m4, m6 | ||||
|     pmaddwd      m0, m5 | ||||
|     pmaddwd      m4, m7 | ||||
|     paddd        m2, m0 | ||||
|     paddd        m3, m4 | ||||
|     psrad        m2, 13 | ||||
|     psrad        m3, 13 | ||||
|     packssdw     m2, m3 | ||||
|  | ||||
|     mova         m4, m8 | ||||
|     psraw        m4, 1 | ||||
|     mova         m0, m11 | ||||
|     mova         m3, m4 | ||||
|     psubw        m4, m0 | ||||
|     paddw        m3, m0 | ||||
|     CLIPW        m2, m4, m3 | ||||
|     pxor         m7, m7 | ||||
|     DISP%4 | ||||
|  | ||||
|     add        dstq, STEP | ||||
|     add       prevq, STEP | ||||
|     add        curq, STEP | ||||
|     add       nextq, STEP | ||||
|     sub    DWORD wm, mmsize/2 | ||||
|     jg .loop%1 | ||||
| %endmacro | ||||
|  | ||||
| %macro PROC 2 | ||||
| %if ARCH_X86_64 | ||||
|     movsxd       r5, DWORD prefsm | ||||
|     movsxd       r6, DWORD mrefsm | ||||
|     movsxd       r7, DWORD prefs3m | ||||
|     movsxd       r8, DWORD mrefs3m | ||||
|     DECLARE_REG_TMP 5, 6, 7, 8 | ||||
| %else | ||||
|     %define m8  [rsp+ 0] | ||||
|     %define m9  [rsp+16] | ||||
|     %define m10 [rsp+32] | ||||
|     %define m11 [rsp+48] | ||||
|     mov          r4, prefsmp | ||||
|     mov          r5, mrefsmp | ||||
|     DECLARE_REG_TMP 4, 5 | ||||
| %endif | ||||
|     cmp DWORD paritym, 0 | ||||
|     je .parity0 | ||||
|     FILTER 1, prevq, curq, %1, %2 | ||||
|     jmp .ret | ||||
| .parity0: | ||||
|     FILTER 0, curq, nextq, %1, %2 | ||||
| .ret: | ||||
|     RET | ||||
| %endmacro | ||||
|  | ||||
| %macro BWDIF 0 | ||||
| %if ARCH_X86_64 | ||||
| cglobal bwdif_filter_line, 4, 9, 12, 0, dst, prev, cur, next, w, prefs, \ | ||||
|                                         mrefs, prefs2, mrefs2, prefs3, mrefs3, \ | ||||
|                                         prefs4, mrefs4, parity, clip_max | ||||
| %else | ||||
| cglobal bwdif_filter_line, 4, 6, 8, 64, dst, prev, cur, next, w, prefs, \ | ||||
|                                         mrefs, prefs2, mrefs2, prefs3, mrefs3, \ | ||||
|                                         prefs4, mrefs4, parity, clip_max | ||||
| %endif | ||||
|     %define STEP mmsize/2 | ||||
|     PROC 8, 1 | ||||
|  | ||||
| %if ARCH_X86_64 | ||||
| cglobal bwdif_filter_line_12bit, 4, 9, 13, 0, dst, prev, cur, next, w, \ | ||||
|                                               prefs, mrefs, prefs2, mrefs2, \ | ||||
|                                               prefs3, mrefs3, prefs4, \ | ||||
|                                               mrefs4, parity, clip_max | ||||
|     movd        m12, DWORD clip_maxm | ||||
|     SPLATW      m12, m12, 0 | ||||
| %else | ||||
| cglobal bwdif_filter_line_12bit, 4, 6, 8, 80, dst, prev, cur, next, w, \ | ||||
|                                               prefs, mrefs, prefs2, mrefs2, \ | ||||
|                                               prefs3, mrefs3, prefs4, \ | ||||
|                                               mrefs4, parity, clip_max | ||||
|     %define m12 [rsp+64] | ||||
|     movd         m0, DWORD clip_maxm | ||||
|     SPLATW       m0, m0, 0 | ||||
|     mova        m12, m0 | ||||
| %endif | ||||
|     %define STEP mmsize | ||||
|     PROC 12, 2 | ||||
| %endmacro | ||||
|  | ||||
| INIT_XMM ssse3 | ||||
| BWDIF | ||||
| INIT_XMM sse2 | ||||
| BWDIF | ||||
| %if ARCH_X86_32 | ||||
| INIT_MMX mmxext | ||||
| BWDIF | ||||
| %endif | ||||
							
								
								
									
										78
									
								
								libavfilter/x86/vf_bwdif_init.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								libavfilter/x86/vf_bwdif_init.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | ||||
| /* | ||||
|  * Copyright (C) 2016 Thomas Mundt <loudmax@yahoo.de> | ||||
|  * | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with FFmpeg; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
| #include "libavutil/attributes.h" | ||||
| #include "libavutil/cpu.h" | ||||
| #include "libavutil/mem.h" | ||||
| #include "libavutil/x86/asm.h" | ||||
| #include "libavutil/x86/cpu.h" | ||||
| #include "libavfilter/bwdif.h" | ||||
|  | ||||
| void ff_bwdif_filter_line_mmxext(void *dst, void *prev, void *cur, void *next, | ||||
|                                  int w, int prefs, int mrefs, int prefs2, | ||||
|                                  int mrefs2, int prefs3, int mrefs3, int prefs4, | ||||
|                                  int mrefs4, int parity, int clip_max); | ||||
| void ff_bwdif_filter_line_sse2(void *dst, void *prev, void *cur, void *next, | ||||
|                                int w, int prefs, int mrefs, int prefs2, | ||||
|                                int mrefs2, int prefs3, int mrefs3, int prefs4, | ||||
|                                int mrefs4, int parity, int clip_max); | ||||
| void ff_bwdif_filter_line_ssse3(void *dst, void *prev, void *cur, void *next, | ||||
|                                 int w, int prefs, int mrefs, int prefs2, | ||||
|                                 int mrefs2, int prefs3, int mrefs3, int prefs4, | ||||
|                                 int mrefs4, int parity, int clip_max); | ||||
|  | ||||
| void ff_bwdif_filter_line_12bit_mmxext(void *dst, void *prev, void *cur, void *next, | ||||
|                                        int w, int prefs, int mrefs, int prefs2, | ||||
|                                        int mrefs2, int prefs3, int mrefs3, int prefs4, | ||||
|                                        int mrefs4, int parity, int clip_max); | ||||
| void ff_bwdif_filter_line_12bit_sse2(void *dst, void *prev, void *cur, void *next, | ||||
|                                      int w, int prefs, int mrefs, int prefs2, | ||||
|                                      int mrefs2, int prefs3, int mrefs3, int prefs4, | ||||
|                                      int mrefs4, int parity, int clip_max); | ||||
| void ff_bwdif_filter_line_12bit_ssse3(void *dst, void *prev, void *cur, void *next, | ||||
|                                       int w, int prefs, int mrefs, int prefs2, | ||||
|                                       int mrefs2, int prefs3, int mrefs3, int prefs4, | ||||
|                                       int mrefs4, int parity, int clip_max); | ||||
|  | ||||
| av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif) | ||||
| { | ||||
|     int cpu_flags = av_get_cpu_flags(); | ||||
|     int bit_depth = (!bwdif->csp) ? 8 : bwdif->csp->comp[0].depth; | ||||
|  | ||||
|     if (bit_depth <= 8) { | ||||
| #if ARCH_X86_32 | ||||
|         if (EXTERNAL_MMXEXT(cpu_flags)) | ||||
|             bwdif->filter_line = ff_bwdif_filter_line_mmxext; | ||||
| #endif /* ARCH_X86_32 */ | ||||
|         if (EXTERNAL_SSE2(cpu_flags)) | ||||
|             bwdif->filter_line = ff_bwdif_filter_line_sse2; | ||||
|         if (EXTERNAL_SSSE3(cpu_flags)) | ||||
|             bwdif->filter_line = ff_bwdif_filter_line_ssse3; | ||||
|     } else if (bit_depth <= 12) { | ||||
| #if ARCH_X86_32 | ||||
|         if (EXTERNAL_MMXEXT(cpu_flags)) | ||||
|             bwdif->filter_line = ff_bwdif_filter_line_12bit_mmxext; | ||||
| #endif /* ARCH_X86_32 */ | ||||
|         if (EXTERNAL_SSE2(cpu_flags)) | ||||
|             bwdif->filter_line = ff_bwdif_filter_line_12bit_sse2; | ||||
|         if (EXTERNAL_SSSE3(cpu_flags)) | ||||
|             bwdif->filter_line = ff_bwdif_filter_line_12bit_ssse3; | ||||
|     } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user