You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	Merge commit 'f90ff772e7e35b4923c2de429d1fab9f2569b568'
* commit 'f90ff772e7e35b4923c2de429d1fab9f2569b568': Move H264/QPEL specific asm from dsputil.asm to h264_qpel_*.asm. doc: update the reference for the title Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
		| @@ -68,7 +68,7 @@ $print_page_head = \&FFmpeg_print_page_head; | ||||
| sub FFmpeg_print_page_head($$) | ||||
| { | ||||
|     my $fh = shift; | ||||
|     my $longtitle = "$Texi2HTML::THISDOC{'title_no_texi'}"; | ||||
|     my $longtitle = "$Texi2HTML::THISDOC{'fulltitle_no_texi'}"; | ||||
|     $longtitle .= ": $Texi2HTML::NO_TEXI{'This'}" if exists $Texi2HTML::NO_TEXI{'This'}; | ||||
|     my $description = $DOCUMENT_DESCRIPTION; | ||||
|     $description = $longtitle if (!defined($description)); | ||||
|   | ||||
| @@ -649,194 +649,6 @@ BSWAP32_BUF | ||||
| INIT_XMM ssse3 | ||||
| BSWAP32_BUF | ||||
|  | ||||
| %macro op_avgh 3 | ||||
|     movh   %3, %2 | ||||
|     pavgb  %1, %3 | ||||
|     movh   %2, %1 | ||||
| %endmacro | ||||
|  | ||||
| %macro op_avg 2 | ||||
|     pavgb  %1, %2 | ||||
|     mova   %2, %1 | ||||
| %endmacro | ||||
|  | ||||
| %macro op_puth 2-3 | ||||
|     movh   %2, %1 | ||||
| %endmacro | ||||
|  | ||||
| %macro op_put 2 | ||||
|     mova   %2, %1 | ||||
| %endmacro | ||||
|  | ||||
| ; void pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| %macro PIXELS4_L2 1 | ||||
| %define OP op_%1h | ||||
| cglobal %1_pixels4_l2, 6,6 | ||||
|     movsxdifnidn r3, r3d | ||||
|     movsxdifnidn r4, r4d | ||||
|     test        r5d, 1 | ||||
|     je        .loop | ||||
|     movd         m0, [r1] | ||||
|     movd         m1, [r2] | ||||
|     add          r1, r4 | ||||
|     add          r2, 4 | ||||
|     pavgb        m0, m1 | ||||
|     OP           m0, [r0], m3 | ||||
|     add          r0, r3 | ||||
|     dec         r5d | ||||
| .loop: | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r1+r4] | ||||
|     lea          r1, [r1+2*r4] | ||||
|     pavgb        m0, [r2] | ||||
|     pavgb        m1, [r2+4] | ||||
|     OP           m0, [r0], m3 | ||||
|     OP           m1, [r0+r3], m3 | ||||
|     lea          r0, [r0+2*r3] | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r1+r4] | ||||
|     lea          r1, [r1+2*r4] | ||||
|     pavgb        m0, [r2+8] | ||||
|     pavgb        m1, [r2+12] | ||||
|     OP           m0, [r0], m3 | ||||
|     OP           m1, [r0+r3], m3 | ||||
|     lea          r0, [r0+2*r3] | ||||
|     add          r2, 16 | ||||
|     sub         r5d, 4 | ||||
|     jne       .loop | ||||
|     REP_RET | ||||
| %endmacro | ||||
|  | ||||
| INIT_MMX mmxext | ||||
| PIXELS4_L2 put | ||||
| PIXELS4_L2 avg | ||||
|  | ||||
| ; void pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| %macro PIXELS8_L2 1 | ||||
| %define OP op_%1 | ||||
| cglobal %1_pixels8_l2, 6,6 | ||||
|     movsxdifnidn r3, r3d | ||||
|     movsxdifnidn r4, r4d | ||||
|     test        r5d, 1 | ||||
|     je        .loop | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r2] | ||||
|     add          r1, r4 | ||||
|     add          r2, 8 | ||||
|     pavgb        m0, m1 | ||||
|     OP           m0, [r0] | ||||
|     add          r0, r3 | ||||
|     dec         r5d | ||||
| .loop: | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r1+r4] | ||||
|     lea          r1, [r1+2*r4] | ||||
|     pavgb        m0, [r2] | ||||
|     pavgb        m1, [r2+8] | ||||
|     OP           m0, [r0] | ||||
|     OP           m1, [r0+r3] | ||||
|     lea          r0, [r0+2*r3] | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r1+r4] | ||||
|     lea          r1, [r1+2*r4] | ||||
|     pavgb        m0, [r2+16] | ||||
|     pavgb        m1, [r2+24] | ||||
|     OP           m0, [r0] | ||||
|     OP           m1, [r0+r3] | ||||
|     lea          r0, [r0+2*r3] | ||||
|     add          r2, 32 | ||||
|     sub         r5d, 4 | ||||
|     jne       .loop | ||||
|     REP_RET | ||||
| %endmacro | ||||
|  | ||||
| INIT_MMX mmxext | ||||
| PIXELS8_L2 put | ||||
| PIXELS8_L2 avg | ||||
|  | ||||
| ; void pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| %macro PIXELS16_L2 1 | ||||
| %define OP op_%1 | ||||
| cglobal %1_pixels16_l2, 6,6 | ||||
|     movsxdifnidn r3, r3d | ||||
|     movsxdifnidn r4, r4d | ||||
|     test        r5d, 1 | ||||
|     je        .loop | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r1+8] | ||||
|     pavgb        m0, [r2] | ||||
|     pavgb        m1, [r2+8] | ||||
|     add          r1, r4 | ||||
|     add          r2, 16 | ||||
|     OP           m0, [r0] | ||||
|     OP           m1, [r0+8] | ||||
|     add          r0, r3 | ||||
|     dec         r5d | ||||
| .loop: | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r1+8] | ||||
|     add          r1, r4 | ||||
|     pavgb        m0, [r2] | ||||
|     pavgb        m1, [r2+8] | ||||
|     OP           m0, [r0] | ||||
|     OP           m1, [r0+8] | ||||
|     add          r0, r3 | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r1+8] | ||||
|     add          r1, r4 | ||||
|     pavgb        m0, [r2+16] | ||||
|     pavgb        m1, [r2+24] | ||||
|     OP           m0, [r0] | ||||
|     OP           m1, [r0+8] | ||||
|     add          r0, r3 | ||||
|     add          r2, 32 | ||||
|     sub         r5d, 2 | ||||
|     jne       .loop | ||||
|     REP_RET | ||||
| %endmacro | ||||
|  | ||||
| INIT_MMX mmxext | ||||
| PIXELS16_L2 put | ||||
| PIXELS16_L2 avg | ||||
|  | ||||
| INIT_MMX mmxext | ||||
| ; void pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| %macro PIXELS48 2 | ||||
| %if %2 == 4 | ||||
| %define OP movh | ||||
| %else | ||||
| %define OP mova | ||||
| %endif | ||||
| cglobal %1_pixels%2, 4,5 | ||||
|     movsxdifnidn r2, r2d | ||||
|     lea          r4, [r2*3] | ||||
| .loop: | ||||
|     OP           m0, [r1] | ||||
|     OP           m1, [r1+r2] | ||||
|     OP           m2, [r1+r2*2] | ||||
|     OP           m3, [r1+r4] | ||||
|     lea          r1, [r1+r2*4] | ||||
| %ifidn %1, avg | ||||
|     pavgb        m0, [r0] | ||||
|     pavgb        m1, [r0+r2] | ||||
|     pavgb        m2, [r0+r2*2] | ||||
|     pavgb        m3, [r0+r4] | ||||
| %endif | ||||
|     OP         [r0], m0 | ||||
|     OP      [r0+r2], m1 | ||||
|     OP    [r0+r2*2], m2 | ||||
|     OP      [r0+r4], m3 | ||||
|     sub         r3d, 4 | ||||
|     lea          r0, [r0+r2*4] | ||||
|     jne       .loop | ||||
|     RET | ||||
| %endmacro | ||||
|  | ||||
| PIXELS48 put, 4 | ||||
| PIXELS48 avg, 4 | ||||
| PIXELS48 put, 8 | ||||
| PIXELS48 avg, 8 | ||||
|  | ||||
| INIT_XMM sse2 | ||||
| ; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| cglobal put_pixels16, 4,5,4 | ||||
|   | ||||
| @@ -860,3 +860,172 @@ INIT_XMM ssse3 | ||||
| QPEL16_H_LOWPASS_L2_OP put | ||||
| QPEL16_H_LOWPASS_L2_OP avg | ||||
| %endif | ||||
|  | ||||
| ; void pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| %macro PIXELS4_L2 1 | ||||
| %define OP op_%1h | ||||
| cglobal %1_pixels4_l2, 6,6 | ||||
|     movsxdifnidn r3, r3d | ||||
|     movsxdifnidn r4, r4d | ||||
|     test        r5d, 1 | ||||
|     je        .loop | ||||
|     movd         m0, [r1] | ||||
|     movd         m1, [r2] | ||||
|     add          r1, r4 | ||||
|     add          r2, 4 | ||||
|     pavgb        m0, m1 | ||||
|     OP           m0, [r0], m3 | ||||
|     add          r0, r3 | ||||
|     dec         r5d | ||||
| .loop: | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r1+r4] | ||||
|     lea          r1, [r1+2*r4] | ||||
|     pavgb        m0, [r2] | ||||
|     pavgb        m1, [r2+4] | ||||
|     OP           m0, [r0], m3 | ||||
|     OP           m1, [r0+r3], m3 | ||||
|     lea          r0, [r0+2*r3] | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r1+r4] | ||||
|     lea          r1, [r1+2*r4] | ||||
|     pavgb        m0, [r2+8] | ||||
|     pavgb        m1, [r2+12] | ||||
|     OP           m0, [r0], m3 | ||||
|     OP           m1, [r0+r3], m3 | ||||
|     lea          r0, [r0+2*r3] | ||||
|     add          r2, 16 | ||||
|     sub         r5d, 4 | ||||
|     jne       .loop | ||||
|     REP_RET | ||||
| %endmacro | ||||
|  | ||||
| INIT_MMX mmxext | ||||
| PIXELS4_L2 put | ||||
| PIXELS4_L2 avg | ||||
|  | ||||
| ; void pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| %macro PIXELS8_L2 1 | ||||
| %define OP op_%1 | ||||
| cglobal %1_pixels8_l2, 6,6 | ||||
|     movsxdifnidn r3, r3d | ||||
|     movsxdifnidn r4, r4d | ||||
|     test        r5d, 1 | ||||
|     je        .loop | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r2] | ||||
|     add          r1, r4 | ||||
|     add          r2, 8 | ||||
|     pavgb        m0, m1 | ||||
|     OP           m0, [r0] | ||||
|     add          r0, r3 | ||||
|     dec         r5d | ||||
| .loop: | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r1+r4] | ||||
|     lea          r1, [r1+2*r4] | ||||
|     pavgb        m0, [r2] | ||||
|     pavgb        m1, [r2+8] | ||||
|     OP           m0, [r0] | ||||
|     OP           m1, [r0+r3] | ||||
|     lea          r0, [r0+2*r3] | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r1+r4] | ||||
|     lea          r1, [r1+2*r4] | ||||
|     pavgb        m0, [r2+16] | ||||
|     pavgb        m1, [r2+24] | ||||
|     OP           m0, [r0] | ||||
|     OP           m1, [r0+r3] | ||||
|     lea          r0, [r0+2*r3] | ||||
|     add          r2, 32 | ||||
|     sub         r5d, 4 | ||||
|     jne       .loop | ||||
|     REP_RET | ||||
| %endmacro | ||||
|  | ||||
| INIT_MMX mmxext | ||||
| PIXELS8_L2 put | ||||
| PIXELS8_L2 avg | ||||
|  | ||||
| ; void pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) | ||||
| %macro PIXELS16_L2 1 | ||||
| %define OP op_%1 | ||||
| cglobal %1_pixels16_l2, 6,6 | ||||
|     movsxdifnidn r3, r3d | ||||
|     movsxdifnidn r4, r4d | ||||
|     test        r5d, 1 | ||||
|     je        .loop | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r1+8] | ||||
|     pavgb        m0, [r2] | ||||
|     pavgb        m1, [r2+8] | ||||
|     add          r1, r4 | ||||
|     add          r2, 16 | ||||
|     OP           m0, [r0] | ||||
|     OP           m1, [r0+8] | ||||
|     add          r0, r3 | ||||
|     dec         r5d | ||||
| .loop: | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r1+8] | ||||
|     add          r1, r4 | ||||
|     pavgb        m0, [r2] | ||||
|     pavgb        m1, [r2+8] | ||||
|     OP           m0, [r0] | ||||
|     OP           m1, [r0+8] | ||||
|     add          r0, r3 | ||||
|     mova         m0, [r1] | ||||
|     mova         m1, [r1+8] | ||||
|     add          r1, r4 | ||||
|     pavgb        m0, [r2+16] | ||||
|     pavgb        m1, [r2+24] | ||||
|     OP           m0, [r0] | ||||
|     OP           m1, [r0+8] | ||||
|     add          r0, r3 | ||||
|     add          r2, 32 | ||||
|     sub         r5d, 2 | ||||
|     jne       .loop | ||||
|     REP_RET | ||||
| %endmacro | ||||
|  | ||||
| INIT_MMX mmxext | ||||
| PIXELS16_L2 put | ||||
| PIXELS16_L2 avg | ||||
|  | ||||
| INIT_MMX mmxext | ||||
| ; void pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
| %macro PIXELS48 2 | ||||
| %if %2 == 4 | ||||
| %define OP movh | ||||
| %else | ||||
| %define OP mova | ||||
| %endif | ||||
| cglobal %1_pixels%2, 4,5 | ||||
|     movsxdifnidn r2, r2d | ||||
|     lea          r4, [r2*3] | ||||
| .loop: | ||||
|     OP           m0, [r1] | ||||
|     OP           m1, [r1+r2] | ||||
|     OP           m2, [r1+r2*2] | ||||
|     OP           m3, [r1+r4] | ||||
|     lea          r1, [r1+r2*4] | ||||
| %ifidn %1, avg | ||||
|     pavgb        m0, [r0] | ||||
|     pavgb        m1, [r0+r2] | ||||
|     pavgb        m2, [r0+r2*2] | ||||
|     pavgb        m3, [r0+r4] | ||||
| %endif | ||||
|     OP         [r0], m0 | ||||
|     OP      [r0+r2], m1 | ||||
|     OP    [r0+r2*2], m2 | ||||
|     OP      [r0+r4], m3 | ||||
|     sub         r3d, 4 | ||||
|     lea          r0, [r0+r2*4] | ||||
|     jne       .loop | ||||
|     RET | ||||
| %endmacro | ||||
|  | ||||
| PIXELS48 put, 4 | ||||
| PIXELS48 avg, 4 | ||||
| PIXELS48 put, 8 | ||||
| PIXELS48 avg, 8 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user