diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm index f29bfd715c..01a3d6f590 100644 --- a/libavcodec/x86/rv34dsp.asm +++ b/libavcodec/x86/rv34dsp.asm @@ -113,15 +113,15 @@ cglobal rv34_idct_dc_noround, 1, 2, 0 movd %1, %2 %endmacro INIT_MMX mmxext -cglobal rv34_idct_add, 3,3,0, d, s, b - ROW_TRANSFORM bq - COL_TRANSFORM [dq], mm0, [pw_col_coeffs+ 0], [pw_col_coeffs+ 8] - mova mm0, [pw_col_coeffs+ 0] - COL_TRANSFORM [dq+sq], mm4, mm0, [pw_col_coeffs+ 8] - mova mm4, [pw_col_coeffs+ 8] - lea dq, [dq + 2*sq] - COL_TRANSFORM [dq], mm6, mm0, mm4 - COL_TRANSFORM [dq+sq], mm7, mm0, mm4 +cglobal rv34_idct_add, 3, 3, 0, dst, s, b + ROW_TRANSFORM bq + COL_TRANSFORM [dstq], mm0, [pw_col_coeffs+ 0], [pw_col_coeffs+ 8] + mova mm0, [pw_col_coeffs+ 0] + COL_TRANSFORM [dstq+sq], mm4, mm0, [pw_col_coeffs+ 8] + mova mm4, [pw_col_coeffs+ 8] + lea dstq, [dstq + 2*sq] + COL_TRANSFORM [dstq], mm6, mm0, mm4 + COL_TRANSFORM [dstq+sq], mm7, mm0, mm4 ret ; ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc); diff --git a/libavfilter/x86/vf_overlay.asm b/libavfilter/x86/vf_overlay.asm index 14ec60ca34..8eecbbd2b2 100644 --- a/libavfilter/x86/vf_overlay.asm +++ b/libavfilter/x86/vf_overlay.asm @@ -33,7 +33,7 @@ pw_257: times 8 dw 257 SECTION .text INIT_XMM sse4 -cglobal overlay_row_44, 5, 7, 6, 0, d, da, s, a, w, r, x +cglobal overlay_row_44, 5, 7, 6, 0, dst, da, s, a, w, r, x xor xq, xq movsxdifnidn wq, wd mov rq, wq @@ -47,7 +47,7 @@ cglobal overlay_row_44, 5, 7, 6, 0, d, da, s, a, w, r, x .loop: pmovzxbw m0, [sq+xq] pmovzxbw m2, [aq+xq] - pmovzxbw m1, [dq+xq] + pmovzxbw m1, [dstq+xq] pmullw m0, m2 pxor m2, m3 pmullw m1, m2 @@ -55,7 +55,7 @@ cglobal overlay_row_44, 5, 7, 6, 0, d, da, s, a, w, r, x paddw m0, m1 pmulhuw m0, m5 packuswb m0, m0 - movq [dq+xq], m0 + movq [dstq+xq], m0 add xq, mmsize/2 cmp xq, wq jl .loop @@ -65,7 +65,7 @@ cglobal overlay_row_44, 5, 7, 6, 0, d, da, s, a, w, r, x RET INIT_XMM sse4 -cglobal overlay_row_22, 5, 7, 6, 0, d, da, s, a, w, r, x +cglobal overlay_row_22, 5, 7, 6, 0, dst, da, s, a, w, r, x xor xq, xq movsxdifnidn wq, wd sub wq, 1 @@ -85,7 +85,7 @@ cglobal overlay_row_22, 5, 7, 6, 0, d, da, s, a, w, r, x pavgw m2, m1 pavgw m2, m1 psrlw m2, 8 - pmovzxbw m1, [dq+xq] + pmovzxbw m1, [dstq+xq] pmullw m0, m2 pxor m2, m3 pmullw m1, m2 @@ -93,7 +93,7 @@ cglobal overlay_row_22, 5, 7, 6, 0, d, da, s, a, w, r, x paddw m0, m1 pmulhuw m0, m5 packuswb m0, m0 - movq [dq+xq], m0 + movq [dstq+xq], m0 add xq, mmsize/2 cmp xq, wq jl .loop @@ -103,7 +103,7 @@ cglobal overlay_row_22, 5, 7, 6, 0, d, da, s, a, w, r, x RET INIT_XMM sse4 -cglobal overlay_row_20, 6, 7, 7, 0, d, da, s, a, w, r, x +cglobal overlay_row_20, 6, 7, 7, 0, dst, da, s, a, w, r, x mov daq, aq add daq, rmp xor xq, xq @@ -126,7 +126,7 @@ cglobal overlay_row_20, 6, 7, 7, 0, d, da, s, a, w, r, x pmaddubsw m1, m6 paddw m2, m1 psrlw m2, 2 - pmovzxbw m1, [dq+xq] + pmovzxbw m1, [dstq+xq] pmullw m0, m2 pxor m2, m3 pmullw m1, m2 @@ -134,7 +134,7 @@ cglobal overlay_row_20, 6, 7, 7, 0, d, da, s, a, w, r, x paddw m0, m1 pmulhuw m0, m5 packuswb m0, m0 - movq [dq+xq], m0 + movq [dstq+xq], m0 add xq, mmsize/2 cmp xq, wq jl .loop