diff --git a/libavcodec/aarch64/vp9itxfm_neon.S b/libavcodec/aarch64/vp9itxfm_neon.S index 539899dae0..97226d1180 100644 --- a/libavcodec/aarch64/vp9itxfm_neon.S +++ b/libavcodec/aarch64/vp9itxfm_neon.S @@ -380,7 +380,7 @@ function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1 .ifc \txfm1\()_\txfm2,idct_idct movrel x4, idct_coeffs .else - movrel x4, iadst8_coeffs + movrel x4, iadst8_coeffs ld1 {v1.8h}, [x4], #16 .endif ld1 {v0.8h}, [x4] @@ -480,23 +480,23 @@ itxfm_func8x8 iadst, iadst function idct16x16_dc_add_neon - movrel x4, idct_coeffs + movrel x4, idct_coeffs ld1 {v0.4h}, [x4] - movi v1.4h, #0 + movi v1.4h, #0 ld1 {v2.h}[0], [x2] - smull v2.4s, v2.4h, v0.h[0] - rshrn v2.4h, v2.4s, #14 - smull v2.4s, v2.4h, v0.h[0] - rshrn v2.4h, v2.4s, #14 + smull v2.4s, v2.4h, v0.h[0] + rshrn v2.4h, v2.4s, #14 + smull v2.4s, v2.4h, v0.h[0] + rshrn v2.4h, v2.4s, #14 dup v2.8h, v2.h[0] st1 {v1.h}[0], [x2] - srshr v2.8h, v2.8h, #6 + srshr v2.8h, v2.8h, #6 - mov x3, x0 - mov x4, #16 + mov x3, x0 + mov x4, #16 1: // Loop to add the constant from v2 into all 16x16 outputs subs x4, x4, #2 @@ -869,7 +869,7 @@ function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1 .ifc \txfm1,idct ld1 {v0.8h,v1.8h}, [x10] .endif - mov x9, #32 + mov x9, #32 .ifc \txfm1\()_\txfm2,idct_idct cmp w3, #10 @@ -1046,10 +1046,10 @@ idct16_partial quarter idct16_partial half function idct32x32_dc_add_neon - movrel x4, idct_coeffs + movrel x4, idct_coeffs ld1 {v0.4h}, [x4] - movi v1.4h, #0 + movi v1.4h, #0 ld1 {v2.h}[0], [x2] smull v2.4s, v2.4h, v0.h[0] @@ -1059,10 +1059,10 @@ function idct32x32_dc_add_neon dup v2.8h, v2.h[0] st1 {v1.h}[0], [x2] - srshr v0.8h, v2.8h, #6 + srshr v0.8h, v2.8h, #6 - mov x3, x0 - mov x4, #32 + mov x3, x0 + mov x4, #32 1: // Loop to add the constant v0 into all 32x32 outputs subs x4, x4, #2 @@ -1230,7 +1230,7 @@ endfunc // x9 = double input stride function idct32_1d_8x32_pass1\suffix\()_neon mov x14, x30 - movi v2.8h, #0 + movi v2.8h, #0 // v16 = IN(0), v17 = IN(2) ... v31 = IN(30) .ifb \suffix @@ -1295,7 +1295,7 @@ function idct32_1d_8x32_pass1\suffix\()_neon .endif add x2, x2, #64 - movi v2.8h, #0 + movi v2.8h, #0 // v16 = IN(1), v17 = IN(3) ... v31 = IN(31) .ifb \suffix .irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S index ab9433ff0f..67a4754ce7 100644 --- a/libavcodec/arm/vp9itxfm_neon.S +++ b/libavcodec/arm/vp9itxfm_neon.S @@ -530,7 +530,7 @@ function idct16x16_dc_add_neon movrel r12, idct_coeffs vld1.16 {d0}, [r12,:64] - vmov.i16 q2, #0 + vmov.i16 q2, #0 vld1.16 {d16[]}, [r2,:16] vmull.s16 q8, d16, d0[0] @@ -793,7 +793,7 @@ function \txfm\()16_1d_4x16_pass1_neon push {lr} mov r12, #32 - vmov.s16 q2, #0 + vmov.s16 q2, #0 .irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 vld1.16 {d\i}, [r2,:64] vst1.16 {d4}, [r2,:64], r12 @@ -1142,7 +1142,7 @@ function idct32x32_dc_add_neon movrel r12, idct_coeffs vld1.16 {d0}, [r12,:64] - vmov.i16 q2, #0 + vmov.i16 q2, #0 vld1.16 {d16[]}, [r2,:16] vmull.s16 q8, d16, d0[0] @@ -1330,7 +1330,7 @@ function idct32_1d_4x32_pass1\suffix\()_neon @ Double stride of the input, since we only read every other line mov r12, #128 - vmov.s16 d4, #0 + vmov.s16 d4, #0 @ d16 = IN(0), d17 = IN(2) ... d31 = IN(30) .ifb \suffix @@ -1394,7 +1394,7 @@ function idct32_1d_4x32_pass1\suffix\()_neon .endif add r2, r2, #64 - vmov.s16 d8, #0 + vmov.s16 d8, #0 @ d16 = IN(1), d17 = IN(3) ... d31 = IN(31) .ifb \suffix .irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 @@ -1533,9 +1533,9 @@ function idct32_1d_4x32_pass2\suffix\()_neon .endif vld1.32 {d12[]}, [r0,:32], r1 vld1.32 {d12[1]}, [r0,:32], r1 - vrshr.s16 q4, q4, #6 + vrshr.s16 q4, q4, #6 vld1.32 {d13[]}, [r0,:32], r1 - vrshr.s16 q5, q5, #6 + vrshr.s16 q5, q5, #6 vld1.32 {d13[1]}, [r0,:32], r1 sub r0, r0, r1, lsl #2 vaddw.u8 q4, q4, d12 diff --git a/libavcodec/arm/vp9lpf_neon.S b/libavcodec/arm/vp9lpf_neon.S index 12984a900c..ae782b2ed0 100644 --- a/libavcodec/arm/vp9lpf_neon.S +++ b/libavcodec/arm/vp9lpf_neon.S @@ -828,7 +828,7 @@ function ff_vp9_loop_filter_v_16_16_neon, export=1 endfunc function vp9_loop_filter_h_16_neon - sub r12, r0, #8 + sub r12, r0, #8 vld1.8 {d16}, [r12,:64], r1 vld1.8 {d24}, [r0, :64], r1 vld1.8 {d17}, [r12,:64], r1