You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	vp9/x86: add ff_vp9_loop_filter_[vh]_16_16_sse2().
Similar gains in performance as the SSSE3 version Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
		
				
					committed by
					
						 Clément Bœsch
						Clément Bœsch
					
				
			
			
				
	
			
			
			
						parent
						
							3ca7085ae2
						
					
				
				
					commit
					d2a7314f1e
				
			| @@ -177,10 +177,17 @@ itxfm_func(idct, idct, 32, avx); | ||||
| #undef itxfm_func | ||||
| #undef itxfm_funcs | ||||
|  | ||||
| void ff_vp9_loop_filter_v_16_16_ssse3(uint8_t *dst, ptrdiff_t stride, int E, int I, int H); | ||||
| void ff_vp9_loop_filter_v_16_16_avx  (uint8_t *dst, ptrdiff_t stride, int E, int I, int H); | ||||
| void ff_vp9_loop_filter_h_16_16_ssse3(uint8_t *dst, ptrdiff_t stride, int E, int I, int H); | ||||
| void ff_vp9_loop_filter_h_16_16_avx  (uint8_t *dst, ptrdiff_t stride, int E, int I, int H); | ||||
| #define lpf_funcs(size1, size2, opt) \ | ||||
| void ff_vp9_loop_filter_v_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \ | ||||
|                                                     int E, int I, int H); \ | ||||
| void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \ | ||||
|                                                     int E, int I, int H) | ||||
|  | ||||
| lpf_funcs(16, 16, sse2); | ||||
| lpf_funcs(16, 16, ssse3); | ||||
| lpf_funcs(16, 16, avx); | ||||
|  | ||||
| #undef lpf_funcs | ||||
|  | ||||
| #endif /* HAVE_YASM */ | ||||
|  | ||||
| @@ -230,6 +237,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) | ||||
|         init_fpel(2, 1, 16, avg, sse2); | ||||
|         init_fpel(1, 1, 32, avg, sse2); | ||||
|         init_fpel(0, 1, 64, avg, sse2); | ||||
|         if (ARCH_X86_64) { | ||||
|             dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_sse2; | ||||
|             dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_sse2; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     if (EXTERNAL_SSSE3(cpu_flags)) { | ||||
|   | ||||
| @@ -284,11 +284,11 @@ SECTION .text | ||||
| %endif | ||||
|  | ||||
|     ; calc fm mask | ||||
| %if cpuflag(ssse3) | ||||
|     pxor                m0, m0 | ||||
|     movd                m2, Id | ||||
|     movd                m3, Ed | ||||
|     pshufb              m2, m0                          ; I I I I ... | ||||
|     pshufb              m3, m0                          ; E E E E ... | ||||
| %endif | ||||
|     SPLATB_REG          m2, I, m0                       ; I I I I ... | ||||
|     SPLATB_REG          m3, E, m0                       ; E E E E ... | ||||
|     mova                m0, [pb_80] | ||||
|     pxor                m2, m0 | ||||
|     pxor                m3, m0 | ||||
| @@ -340,9 +340,10 @@ SECTION .text | ||||
|     ABSSUB_CMP          m1, m9, m11, m6, m4, m5, m8     ; abs(p2 - p0) <= 1 | ||||
|     pand                m2, m1 | ||||
|     ABSSUB              m4, m10, m11, m5                ; abs(p1 - p0) | ||||
| %if cpuflag(ssse3) | ||||
|     pxor                m0, m0 | ||||
|     movd                m7, Hd | ||||
|     pshufb              m7, m0                          ; H H H H ... | ||||
| %endif | ||||
|     SPLATB_REG          m7, H, m0                       ; H H H H ... | ||||
|     pxor                m7, m8 | ||||
|     pxor                m4, m8 | ||||
|     pcmpgtb             m0, m4, m7                      ; abs(p1 - p0) > H (1/2 hev condition) | ||||
| @@ -665,6 +666,7 @@ cglobal vp9_loop_filter_h_16_16, 5,8,16, 256, dst, stride, E, I, H, mstride, dst | ||||
|     RET | ||||
| %endmacro | ||||
|  | ||||
| LPF_16_16_VH sse2 | ||||
| LPF_16_16_VH ssse3 | ||||
| LPF_16_16_VH avx | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user