You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	sws/x86: improve rounding for yuv2yuvX
This tries to compensate for the errors introduced by the rounding of pmulhw Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
		| @@ -226,10 +226,20 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, | ||||
|                          :: "r"(dither) | ||||
|                          ); | ||||
|     } | ||||
|     filterSize--; | ||||
|     __asm__ volatile( | ||||
|         "pxor      %%xmm0, %%xmm0\n\t" | ||||
|         "punpcklbw %%xmm0, %%xmm3\n\t" | ||||
|         "psraw        $4, %%xmm3\n\t" | ||||
|         "movd          %0, %%xmm1\n\t" | ||||
|         "punpcklwd %%xmm1, %%xmm1\n\t" | ||||
|         "punpckldq %%xmm1, %%xmm1\n\t" | ||||
|         "punpcklqdq %%xmm1, %%xmm1\n\t" | ||||
|         "psllw         $3, %%xmm1\n\t" | ||||
|         "paddw     %%xmm1, %%xmm3\n\t" | ||||
|         "psraw         $4, %%xmm3\n\t" | ||||
|         ::"m"(filterSize) | ||||
|      ); | ||||
|     __asm__ volatile( | ||||
|         "movdqa    %%xmm3, %%xmm4\n\t" | ||||
|         "movdqa    %%xmm3, %%xmm7\n\t" | ||||
|         "movl %3, %%ecx\n\t" | ||||
|   | ||||
| @@ -71,9 +71,20 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, | ||||
|                            const uint8_t *dither, int offset) | ||||
| { | ||||
|     dither_8to16(dither, offset); | ||||
|     __asm__ volatile(\ | ||||
|     filterSize--; | ||||
|     __asm__ volatile( | ||||
|         "movd %0, %%mm1\n\t" | ||||
|         "punpcklwd %%mm1, %%mm1\n\t" | ||||
|         "punpckldq %%mm1, %%mm1\n\t" | ||||
|         "psllw        $3, %%mm1\n\t" | ||||
|         "paddw     %%mm1, %%mm3\n\t" | ||||
|         "paddw     %%mm1, %%mm4\n\t" | ||||
|         "psraw        $4, %%mm3\n\t" | ||||
|         "psraw        $4, %%mm4\n\t" | ||||
|         ::"m"(filterSize) | ||||
|      ); | ||||
|  | ||||
|     __asm__ volatile(\ | ||||
|         "movq    %%mm3, %%mm6\n\t" | ||||
|         "movq    %%mm4, %%mm7\n\t" | ||||
|         "movl %3, %%ecx\n\t" | ||||
|   | ||||
		Reference in New Issue
	
	Block a user