You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	swscale/x86/output: Fix yuv2planeX_16* with unaligned destination
Reviewed-by: BBB Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
		| @@ -54,8 +54,8 @@ SECTION .text | |||||||
| ; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple | ; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple | ||||||
| ; of 2. $offset is either 0 or 3. $dither holds 8 values. | ; of 2. $offset is either 0 or 3. $dither holds 8 values. | ||||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||||
| %macro yuv2planeX_mainloop 1 | %macro yuv2planeX_mainloop 2 | ||||||
| .pixelloop: | .pixelloop_%2: | ||||||
| %assign %%i 0 | %assign %%i 0 | ||||||
|     ; the rep here is for the 8bit output mmx case, where dither covers |     ; the rep here is for the 8bit output mmx case, where dither covers | ||||||
|     ; 8 pixels but we can only handle 2 pixels per register, and thus 4 |     ; 8 pixels but we can only handle 2 pixels per register, and thus 4 | ||||||
| @@ -82,7 +82,7 @@ SECTION .text | |||||||
|     mova            m2,  m1 |     mova            m2,  m1 | ||||||
| %endif ; %1 == 8/9/10/16 | %endif ; %1 == 8/9/10/16 | ||||||
|     movsx     cntr_reg,  fltsizem |     movsx     cntr_reg,  fltsizem | ||||||
| .filterloop_ %+ %%i: | .filterloop_%2_ %+ %%i: | ||||||
|     ; input pixels |     ; input pixels | ||||||
|     mov             r6, [srcq+gprsize*cntr_reg-2*gprsize] |     mov             r6, [srcq+gprsize*cntr_reg-2*gprsize] | ||||||
| %if %1 == 16 | %if %1 == 16 | ||||||
| @@ -129,7 +129,7 @@ SECTION .text | |||||||
| %endif ; %1 == 8/9/10/16 | %endif ; %1 == 8/9/10/16 | ||||||
|  |  | ||||||
|     sub       cntr_reg,  2 |     sub       cntr_reg,  2 | ||||||
|     jg .filterloop_ %+ %%i |     jg .filterloop_%2_ %+ %%i | ||||||
|  |  | ||||||
| %if %1 == 16 | %if %1 == 16 | ||||||
|     psrad           m2,  31 - %1 |     psrad           m2,  31 - %1 | ||||||
| @@ -156,7 +156,7 @@ SECTION .text | |||||||
| %endif ; mmxext/sse2/sse4/avx | %endif ; mmxext/sse2/sse4/avx | ||||||
|     pminsw          m2, [yuv2yuvX_%1_upper] |     pminsw          m2, [yuv2yuvX_%1_upper] | ||||||
| %endif ; %1 == 9/10/16 | %endif ; %1 == 9/10/16 | ||||||
|     mova   [dstq+r5*2],  m2 |     mov%2   [dstq+r5*2],  m2 | ||||||
| %endif ; %1 == 8/9/10/16 | %endif ; %1 == 8/9/10/16 | ||||||
|  |  | ||||||
|     add             r5,  mmsize/2 |     add             r5,  mmsize/2 | ||||||
| @@ -164,7 +164,7 @@ SECTION .text | |||||||
|  |  | ||||||
| %assign %%i %%i+2 | %assign %%i %%i+2 | ||||||
| %endrep | %endrep | ||||||
|     jg .pixelloop |     jg .pixelloop_%2 | ||||||
| %endmacro | %endmacro | ||||||
|  |  | ||||||
| %macro yuv2planeX_fn 3 | %macro yuv2planeX_fn 3 | ||||||
| @@ -235,7 +235,16 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset | |||||||
|  |  | ||||||
|     xor             r5,  r5 |     xor             r5,  r5 | ||||||
|  |  | ||||||
| yuv2planeX_mainloop %1 | %if mmsize == 8 || %1 == 8 | ||||||
|  |     yuv2planeX_mainloop %1, a | ||||||
|  | %else ; mmsize == 16 | ||||||
|  |     test          dstq, 15 | ||||||
|  |     jnz .unaligned | ||||||
|  |     yuv2planeX_mainloop %1, a | ||||||
|  |     REP_RET | ||||||
|  | .unaligned: | ||||||
|  |     yuv2planeX_mainloop %1, u | ||||||
|  | %endif ; mmsize == 8/16 | ||||||
|  |  | ||||||
| %if %1 == 8 | %if %1 == 8 | ||||||
| %if ARCH_X86_32 | %if ARCH_X86_32 | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user