diff --git a/libavcodec/x86/dsputilenc.asm b/libavcodec/x86/dsputilenc.asm index c06f28b978..46330fe116 100644 --- a/libavcodec/x86/dsputilenc.asm +++ b/libavcodec/x86/dsputilenc.asm @@ -487,34 +487,28 @@ cglobal pix_norm1, 2, 4 movd eax, m1 RET -%macro DCT_SAD4 1 - mova m2, [blockq+%1+0 ] - mova m3, [blockq+%1+16] - mova m4, [blockq+%1+32] - mova m5, [blockq+%1+48] - ABS1_SUM m2, m6, m0 - ABS1_SUM m3, m6, m1 - ABS1_SUM m4, m6, m0 - ABS1_SUM m5, m6, m1 -%endmacro - ;----------------------------------------------- ;int ff_sum_abs_dctelem(int16_t *block) ;----------------------------------------------- ; %1 = number of xmm registers used +; %2 = number of inline loops -%macro SUM_ABS_DCTELEM 1 +%macro SUM_ABS_DCTELEM 2 cglobal sum_abs_dctelem, 1, 1, %1, block pxor m0, m0 pxor m1, m1 - DCT_SAD4 0 -%if mmsize == 8 - DCT_SAD4 8 -%endif - DCT_SAD4 64 -%if mmsize == 8 - DCT_SAD4 72 -%endif +%assign %%i 0 +%rep %2 + mova m2, [blockq+mmsize*(0+%%i)] + mova m3, [blockq+mmsize*(1+%%i)] + mova m4, [blockq+mmsize*(2+%%i)] + mova m5, [blockq+mmsize*(3+%%i)] + ABS1_SUM m2, m6, m0 + ABS1_SUM m3, m6, m1 + ABS1_SUM m4, m6, m0 + ABS1_SUM m5, m6, m1 +%assign %%i %%i+4 +%endrep paddusw m0, m1 HSUM m0, m1, eax and eax, 0xFFFF @@ -522,10 +516,10 @@ cglobal sum_abs_dctelem, 1, 1, %1, block %endmacro INIT_MMX mmx -SUM_ABS_DCTELEM 0 +SUM_ABS_DCTELEM 0, 4 INIT_MMX mmxext -SUM_ABS_DCTELEM 0 +SUM_ABS_DCTELEM 0, 4 INIT_XMM sse2 -SUM_ABS_DCTELEM 7 +SUM_ABS_DCTELEM 7, 2 INIT_XMM ssse3 -SUM_ABS_DCTELEM 6 +SUM_ABS_DCTELEM 6, 2