diff --git a/ffmpeg.c b/ffmpeg.c index a9c8ad63a6..926ebe27c7 100644 --- a/ffmpeg.c +++ b/ffmpeg.c @@ -1980,11 +1980,11 @@ static void do_video_out(AVFormatContext *s, OutputStream *ost, if (delta <= -0.6) nb_frames = 0; else if (delta > 0.6) - ost->sync_opts = lrintf(sync_ipts); + ost->sync_opts = lrint(sync_ipts); break; case VSYNC_DROP: case VSYNC_PASSTHROUGH: - ost->sync_opts = lrintf(sync_ipts); + ost->sync_opts = lrint(sync_ipts); break; default: av_assert0(0); diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index 7b301707d9..344f28bea9 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -44,22 +44,22 @@ endfunc .if \avg mov r12, r0 .endif -1: vld1.64 {q0}, [r1], r2 - vld1.64 {q1}, [r1], r2 - vld1.64 {q2}, [r1], r2 +1: vld1.8 {q0}, [r1], r2 + vld1.8 {q1}, [r1], r2 + vld1.8 {q2}, [r1], r2 pld [r1, r2, lsl #2] - vld1.64 {q3}, [r1], r2 + vld1.8 {q3}, [r1], r2 pld [r1] pld [r1, r2] pld [r1, r2, lsl #1] .if \avg - vld1.64 {q8}, [r12,:128], r2 + vld1.8 {q8}, [r12,:128], r2 vrhadd.u8 q0, q0, q8 - vld1.64 {q9}, [r12,:128], r2 + vld1.8 {q9}, [r12,:128], r2 vrhadd.u8 q1, q1, q9 - vld1.64 {q10}, [r12,:128], r2 + vld1.8 {q10}, [r12,:128], r2 vrhadd.u8 q2, q2, q10 - vld1.64 {q11}, [r12,:128], r2 + vld1.8 {q11}, [r12,:128], r2 vrhadd.u8 q3, q3, q11 .endif subs r3, r3, #4 @@ -72,8 +72,8 @@ endfunc .endm .macro pixels16_x2 rnd=1, avg=0 -1: vld1.64 {d0-d2}, [r1], r2 - vld1.64 {d4-d6}, [r1], r2 +1: vld1.8 {d0-d2}, [r1], r2 + vld1.8 {d4-d6}, [r1], r2 pld [r1] pld [r1, r2] subs r3, r3, #2 @@ -88,21 +88,21 @@ endfunc vrhadd.u8 q2, q2, q3 sub r0, r0, r2 .endif - vst1.64 {q0}, [r0,:128], r2 - vst1.64 {q2}, [r0,:128], r2 + vst1.8 {q0}, [r0,:128], r2 + vst1.8 {q2}, [r0,:128], r2 bne 1b bx lr .endm .macro pixels16_y2 rnd=1, avg=0 sub r3, r3, #2 - vld1.64 {q0}, [r1], r2 - vld1.64 {q1}, [r1], r2 + vld1.8 {q0}, [r1], r2 + vld1.8 {q1}, [r1], r2 1: subs r3, r3, #2 avg q2, q0, q1 - vld1.64 {q0}, [r1], r2 + vld1.8 {q0}, [r1], r2 avg q3, q0, q1 - vld1.64 {q1}, [r1], r2 + vld1.8 {q1}, [r1], r2 pld [r1] pld [r1, r2] .if \avg @@ -112,12 +112,12 @@ endfunc vrhadd.u8 q3, q3, q9 sub r0, r0, r2 .endif - vst1.64 {q2}, [r0,:128], r2 - vst1.64 {q3}, [r0,:128], r2 + vst1.8 {q2}, [r0,:128], r2 + vst1.8 {q3}, [r0,:128], r2 bne 1b avg q2, q0, q1 - vld1.64 {q0}, [r1], r2 + vld1.8 {q0}, [r1], r2 avg q3, q0, q1 .if \avg vld1.8 {q8}, [r0,:128], r2 @@ -126,19 +126,17 @@ endfunc vrhadd.u8 q3, q3, q9 sub r0, r0, r2 .endif - vst1.64 {q2}, [r0,:128], r2 - vst1.64 {q3}, [r0,:128], r2 + vst1.8 {q2}, [r0,:128], r2 + vst1.8 {q3}, [r0,:128], r2 bx lr .endm .macro pixels16_xy2 rnd=1, avg=0 sub r3, r3, #2 - vld1.64 {d0-d2}, [r1], r2 - vld1.64 {d4-d6}, [r1], r2 - .ifeq \rnd - vmov.i16 q13, #1 - .endif + vld1.8 {d0-d2}, [r1], r2 + vld1.8 {d4-d6}, [r1], r2 +NRND vmov.i16 q13, #1 pld [r1] pld [r1, r2] vext.8 q1, q0, q1, #1 @@ -148,38 +146,30 @@ endfunc vaddl.u8 q9, d4, d6 vaddl.u8 q11, d5, d7 1: subs r3, r3, #2 - vld1.64 {d0-d2}, [r1], r2 + vld1.8 {d0-d2}, [r1], r2 vadd.u16 q12, q8, q9 pld [r1] - .ifeq \rnd - vadd.u16 q12, q12, q13 - .endif +NRND vadd.u16 q12, q12, q13 vext.8 q15, q0, q1, #1 vadd.u16 q1 , q10, q11 shrn d28, q12, #2 - .ifeq \rnd - vadd.u16 q1, q1, q13 - .endif +NRND vadd.u16 q1, q1, q13 shrn d29, q1, #2 .if \avg vld1.8 {q8}, [r0,:128] vrhadd.u8 q14, q14, q8 .endif vaddl.u8 q8, d0, d30 - vld1.64 {d2-d4}, [r1], r2 + vld1.8 {d2-d4}, [r1], r2 vaddl.u8 q10, d1, d31 - vst1.64 {q14}, [r0,:128], r2 + vst1.8 {q14}, [r0,:128], r2 vadd.u16 q12, q8, q9 pld [r1, r2] - .ifeq \rnd - vadd.u16 q12, q12, q13 - .endif +NRND vadd.u16 q12, q12, q13 vext.8 q2, q1, q2, #1 vadd.u16 q0, q10, q11 shrn d30, q12, #2 - .ifeq \rnd - vadd.u16 q0, q0, q13 - .endif +NRND vadd.u16 q0, q0, q13 shrn d31, q0, #2 .if \avg vld1.8 {q9}, [r0,:128] @@ -187,20 +177,16 @@ endfunc .endif vaddl.u8 q9, d2, d4 vaddl.u8 q11, d3, d5 - vst1.64 {q15}, [r0,:128], r2 + vst1.8 {q15}, [r0,:128], r2 bgt 1b - vld1.64 {d0-d2}, [r1], r2 + vld1.8 {d0-d2}, [r1], r2 vadd.u16 q12, q8, q9 - .ifeq \rnd - vadd.u16 q12, q12, q13 - .endif +NRND vadd.u16 q12, q12, q13 vext.8 q15, q0, q1, #1 vadd.u16 q1 , q10, q11 shrn d28, q12, #2 - .ifeq \rnd - vadd.u16 q1, q1, q13 - .endif +NRND vadd.u16 q1, q1, q13 shrn d29, q1, #2 .if \avg vld1.8 {q8}, [r0,:128] @@ -208,59 +194,55 @@ endfunc .endif vaddl.u8 q8, d0, d30 vaddl.u8 q10, d1, d31 - vst1.64 {q14}, [r0,:128], r2 + vst1.8 {q14}, [r0,:128], r2 vadd.u16 q12, q8, q9 - .ifeq \rnd - vadd.u16 q12, q12, q13 - .endif +NRND vadd.u16 q12, q12, q13 vadd.u16 q0, q10, q11 shrn d30, q12, #2 - .ifeq \rnd - vadd.u16 q0, q0, q13 - .endif +NRND vadd.u16 q0, q0, q13 shrn d31, q0, #2 .if \avg vld1.8 {q9}, [r0,:128] vrhadd.u8 q15, q15, q9 .endif - vst1.64 {q15}, [r0,:128], r2 + vst1.8 {q15}, [r0,:128], r2 bx lr .endm .macro pixels8 rnd=1, avg=0 -1: vld1.64 {d0}, [r1], r2 - vld1.64 {d1}, [r1], r2 - vld1.64 {d2}, [r1], r2 +1: vld1.8 {d0}, [r1], r2 + vld1.8 {d1}, [r1], r2 + vld1.8 {d2}, [r1], r2 pld [r1, r2, lsl #2] - vld1.64 {d3}, [r1], r2 + vld1.8 {d3}, [r1], r2 pld [r1] pld [r1, r2] pld [r1, r2, lsl #1] .if \avg - vld1.64 {d4}, [r0,:64], r2 + vld1.8 {d4}, [r0,:64], r2 vrhadd.u8 d0, d0, d4 - vld1.64 {d5}, [r0,:64], r2 + vld1.8 {d5}, [r0,:64], r2 vrhadd.u8 d1, d1, d5 - vld1.64 {d6}, [r0,:64], r2 + vld1.8 {d6}, [r0,:64], r2 vrhadd.u8 d2, d2, d6 - vld1.64 {d7}, [r0,:64], r2 + vld1.8 {d7}, [r0,:64], r2 vrhadd.u8 d3, d3, d7 sub r0, r0, r2, lsl #2 .endif subs r3, r3, #4 - vst1.64 {d0}, [r0,:64], r2 - vst1.64 {d1}, [r0,:64], r2 - vst1.64 {d2}, [r0,:64], r2 - vst1.64 {d3}, [r0,:64], r2 + vst1.8 {d0}, [r0,:64], r2 + vst1.8 {d1}, [r0,:64], r2 + vst1.8 {d2}, [r0,:64], r2 + vst1.8 {d3}, [r0,:64], r2 bne 1b bx lr .endm .macro pixels8_x2 rnd=1, avg=0 -1: vld1.64 {q0}, [r1], r2 +1: vld1.8 {q0}, [r1], r2 vext.8 d1, d0, d1, #1 - vld1.64 {q1}, [r1], r2 + vld1.8 {q1}, [r1], r2 vext.8 d3, d2, d3, #1 pld [r1] pld [r1, r2] @@ -273,21 +255,21 @@ endfunc vrhadd.u8 q0, q0, q2 sub r0, r0, r2 .endif - vst1.64 {d0}, [r0,:64], r2 - vst1.64 {d1}, [r0,:64], r2 + vst1.8 {d0}, [r0,:64], r2 + vst1.8 {d1}, [r0,:64], r2 bne 1b bx lr .endm .macro pixels8_y2 rnd=1, avg=0 sub r3, r3, #2 - vld1.64 {d0}, [r1], r2 - vld1.64 {d1}, [r1], r2 + vld1.8 {d0}, [r1], r2 + vld1.8 {d1}, [r1], r2 1: subs r3, r3, #2 avg d4, d0, d1 - vld1.64 {d0}, [r1], r2 + vld1.8 {d0}, [r1], r2 avg d5, d0, d1 - vld1.64 {d1}, [r1], r2 + vld1.8 {d1}, [r1], r2 pld [r1] pld [r1, r2] .if \avg @@ -296,12 +278,12 @@ endfunc vrhadd.u8 q2, q2, q1 sub r0, r0, r2 .endif - vst1.64 {d4}, [r0,:64], r2 - vst1.64 {d5}, [r0,:64], r2 + vst1.8 {d4}, [r0,:64], r2 + vst1.8 {d5}, [r0,:64], r2 bne 1b avg d4, d0, d1 - vld1.64 {d0}, [r1], r2 + vld1.8 {d0}, [r1], r2 avg d5, d0, d1 .if \avg vld1.8 {d2}, [r0,:64], r2 @@ -309,19 +291,17 @@ endfunc vrhadd.u8 q2, q2, q1 sub r0, r0, r2 .endif - vst1.64 {d4}, [r0,:64], r2 - vst1.64 {d5}, [r0,:64], r2 + vst1.8 {d4}, [r0,:64], r2 + vst1.8 {d5}, [r0,:64], r2 bx lr .endm .macro pixels8_xy2 rnd=1, avg=0 sub r3, r3, #2 - vld1.64 {q0}, [r1], r2 - vld1.64 {q1}, [r1], r2 - .ifeq \rnd - vmov.i16 q11, #1 - .endif + vld1.8 {q0}, [r1], r2 + vld1.8 {q1}, [r1], r2 +NRND vmov.i16 q11, #1 pld [r1] pld [r1, r2] vext.8 d4, d0, d1, #1 @@ -329,26 +309,22 @@ endfunc vaddl.u8 q8, d0, d4 vaddl.u8 q9, d2, d6 1: subs r3, r3, #2 - vld1.64 {q0}, [r1], r2 + vld1.8 {q0}, [r1], r2 pld [r1] vadd.u16 q10, q8, q9 vext.8 d4, d0, d1, #1 - .ifeq \rnd - vadd.u16 q10, q10, q11 - .endif +NRND vadd.u16 q10, q10, q11 vaddl.u8 q8, d0, d4 shrn d5, q10, #2 - vld1.64 {q1}, [r1], r2 + vld1.8 {q1}, [r1], r2 vadd.u16 q10, q8, q9 pld [r1, r2] .if \avg vld1.8 {d7}, [r0,:64] vrhadd.u8 d5, d5, d7 .endif - .ifeq \rnd - vadd.u16 q10, q10, q11 - .endif - vst1.64 {d5}, [r0,:64], r2 +NRND vadd.u16 q10, q10, q11 + vst1.8 {d5}, [r0,:64], r2 shrn d7, q10, #2 .if \avg vld1.8 {d5}, [r0,:64] @@ -356,15 +332,13 @@ endfunc .endif vext.8 d6, d2, d3, #1 vaddl.u8 q9, d2, d6 - vst1.64 {d7}, [r0,:64], r2 + vst1.8 {d7}, [r0,:64], r2 bgt 1b - vld1.64 {q0}, [r1], r2 + vld1.8 {q0}, [r1], r2 vadd.u16 q10, q8, q9 vext.8 d4, d0, d1, #1 - .ifeq \rnd - vadd.u16 q10, q10, q11 - .endif +NRND vadd.u16 q10, q10, q11 vaddl.u8 q8, d0, d4 shrn d5, q10, #2 vadd.u16 q10, q8, q9 @@ -372,16 +346,14 @@ endfunc vld1.8 {d7}, [r0,:64] vrhadd.u8 d5, d5, d7 .endif - .ifeq \rnd - vadd.u16 q10, q10, q11 - .endif - vst1.64 {d5}, [r0,:64], r2 +NRND vadd.u16 q10, q10, q11 + vst1.8 {d5}, [r0,:64], r2 shrn d7, q10, #2 .if \avg vld1.8 {d5}, [r0,:64] vrhadd.u8 d7, d7, d5 .endif - vst1.64 {d7}, [r0,:64], r2 + vst1.8 {d7}, [r0,:64], r2 bx lr .endm @@ -394,6 +366,8 @@ endfunc .macro shrn rd, rn, rm vrshrn.u16 \rd, \rn, \rm .endm + .macro NRND insn:vararg + .endm .else .macro avg rd, rn, rm vhadd.u8 \rd, \rn, \rm @@ -401,12 +375,16 @@ endfunc .macro shrn rd, rn, rm vshrn.u16 \rd, \rn, \rm .endm + .macro NRND insn:vararg + \insn + .endm .endif function ff_\pfx\name\suf\()_neon, export=1 \name \rnd, \avg endfunc .purgem avg .purgem shrn + .purgem NRND .endm .macro pixfunc2 pfx, name, avg=0 @@ -451,147 +429,147 @@ endfunc pixfunc2 avg_, pixels8_xy2, avg=1 function ff_put_pixels_clamped_neon, export=1 - vld1.64 {d16-d19}, [r0,:128]! + vld1.16 {d16-d19}, [r0,:128]! vqmovun.s16 d0, q8 - vld1.64 {d20-d23}, [r0,:128]! + vld1.16 {d20-d23}, [r0,:128]! vqmovun.s16 d1, q9 - vld1.64 {d24-d27}, [r0,:128]! + vld1.16 {d24-d27}, [r0,:128]! vqmovun.s16 d2, q10 - vld1.64 {d28-d31}, [r0,:128]! + vld1.16 {d28-d31}, [r0,:128]! vqmovun.s16 d3, q11 - vst1.64 {d0}, [r1,:64], r2 + vst1.8 {d0}, [r1,:64], r2 vqmovun.s16 d4, q12 - vst1.64 {d1}, [r1,:64], r2 + vst1.8 {d1}, [r1,:64], r2 vqmovun.s16 d5, q13 - vst1.64 {d2}, [r1,:64], r2 + vst1.8 {d2}, [r1,:64], r2 vqmovun.s16 d6, q14 - vst1.64 {d3}, [r1,:64], r2 + vst1.8 {d3}, [r1,:64], r2 vqmovun.s16 d7, q15 - vst1.64 {d4}, [r1,:64], r2 - vst1.64 {d5}, [r1,:64], r2 - vst1.64 {d6}, [r1,:64], r2 - vst1.64 {d7}, [r1,:64], r2 + vst1.8 {d4}, [r1,:64], r2 + vst1.8 {d5}, [r1,:64], r2 + vst1.8 {d6}, [r1,:64], r2 + vst1.8 {d7}, [r1,:64], r2 bx lr endfunc function ff_put_signed_pixels_clamped_neon, export=1 vmov.u8 d31, #128 - vld1.64 {d16-d17}, [r0,:128]! + vld1.16 {d16-d17}, [r0,:128]! vqmovn.s16 d0, q8 - vld1.64 {d18-d19}, [r0,:128]! + vld1.16 {d18-d19}, [r0,:128]! vqmovn.s16 d1, q9 - vld1.64 {d16-d17}, [r0,:128]! + vld1.16 {d16-d17}, [r0,:128]! vqmovn.s16 d2, q8 - vld1.64 {d18-d19}, [r0,:128]! + vld1.16 {d18-d19}, [r0,:128]! vadd.u8 d0, d0, d31 - vld1.64 {d20-d21}, [r0,:128]! + vld1.16 {d20-d21}, [r0,:128]! vadd.u8 d1, d1, d31 - vld1.64 {d22-d23}, [r0,:128]! + vld1.16 {d22-d23}, [r0,:128]! vadd.u8 d2, d2, d31 - vst1.64 {d0}, [r1,:64], r2 + vst1.8 {d0}, [r1,:64], r2 vqmovn.s16 d3, q9 - vst1.64 {d1}, [r1,:64], r2 + vst1.8 {d1}, [r1,:64], r2 vqmovn.s16 d4, q10 - vst1.64 {d2}, [r1,:64], r2 + vst1.8 {d2}, [r1,:64], r2 vqmovn.s16 d5, q11 - vld1.64 {d24-d25}, [r0,:128]! + vld1.16 {d24-d25}, [r0,:128]! vadd.u8 d3, d3, d31 - vld1.64 {d26-d27}, [r0,:128]! + vld1.16 {d26-d27}, [r0,:128]! vadd.u8 d4, d4, d31 vadd.u8 d5, d5, d31 - vst1.64 {d3}, [r1,:64], r2 + vst1.8 {d3}, [r1,:64], r2 vqmovn.s16 d6, q12 - vst1.64 {d4}, [r1,:64], r2 + vst1.8 {d4}, [r1,:64], r2 vqmovn.s16 d7, q13 - vst1.64 {d5}, [r1,:64], r2 + vst1.8 {d5}, [r1,:64], r2 vadd.u8 d6, d6, d31 vadd.u8 d7, d7, d31 - vst1.64 {d6}, [r1,:64], r2 - vst1.64 {d7}, [r1,:64], r2 + vst1.8 {d6}, [r1,:64], r2 + vst1.8 {d7}, [r1,:64], r2 bx lr endfunc function ff_add_pixels_clamped_neon, export=1 mov r3, r1 - vld1.64 {d16}, [r1,:64], r2 - vld1.64 {d0-d1}, [r0,:128]! + vld1.8 {d16}, [r1,:64], r2 + vld1.16 {d0-d1}, [r0,:128]! vaddw.u8 q0, q0, d16 - vld1.64 {d17}, [r1,:64], r2 - vld1.64 {d2-d3}, [r0,:128]! + vld1.8 {d17}, [r1,:64], r2 + vld1.16 {d2-d3}, [r0,:128]! vqmovun.s16 d0, q0 - vld1.64 {d18}, [r1,:64], r2 + vld1.8 {d18}, [r1,:64], r2 vaddw.u8 q1, q1, d17 - vld1.64 {d4-d5}, [r0,:128]! + vld1.16 {d4-d5}, [r0,:128]! vaddw.u8 q2, q2, d18 - vst1.64 {d0}, [r3,:64], r2 + vst1.8 {d0}, [r3,:64], r2 vqmovun.s16 d2, q1 - vld1.64 {d19}, [r1,:64], r2 - vld1.64 {d6-d7}, [r0,:128]! + vld1.8 {d19}, [r1,:64], r2 + vld1.16 {d6-d7}, [r0,:128]! vaddw.u8 q3, q3, d19 vqmovun.s16 d4, q2 - vst1.64 {d2}, [r3,:64], r2 - vld1.64 {d16}, [r1,:64], r2 + vst1.8 {d2}, [r3,:64], r2 + vld1.8 {d16}, [r1,:64], r2 vqmovun.s16 d6, q3 - vld1.64 {d0-d1}, [r0,:128]! + vld1.16 {d0-d1}, [r0,:128]! vaddw.u8 q0, q0, d16 - vst1.64 {d4}, [r3,:64], r2 - vld1.64 {d17}, [r1,:64], r2 - vld1.64 {d2-d3}, [r0,:128]! + vst1.8 {d4}, [r3,:64], r2 + vld1.8 {d17}, [r1,:64], r2 + vld1.16 {d2-d3}, [r0,:128]! vaddw.u8 q1, q1, d17 - vst1.64 {d6}, [r3,:64], r2 + vst1.8 {d6}, [r3,:64], r2 vqmovun.s16 d0, q0 - vld1.64 {d18}, [r1,:64], r2 - vld1.64 {d4-d5}, [r0,:128]! + vld1.8 {d18}, [r1,:64], r2 + vld1.16 {d4-d5}, [r0,:128]! vaddw.u8 q2, q2, d18 - vst1.64 {d0}, [r3,:64], r2 + vst1.8 {d0}, [r3,:64], r2 vqmovun.s16 d2, q1 - vld1.64 {d19}, [r1,:64], r2 + vld1.8 {d19}, [r1,:64], r2 vqmovun.s16 d4, q2 - vld1.64 {d6-d7}, [r0,:128]! + vld1.16 {d6-d7}, [r0,:128]! vaddw.u8 q3, q3, d19 - vst1.64 {d2}, [r3,:64], r2 + vst1.8 {d2}, [r3,:64], r2 vqmovun.s16 d6, q3 - vst1.64 {d4}, [r3,:64], r2 - vst1.64 {d6}, [r3,:64], r2 + vst1.8 {d4}, [r3,:64], r2 + vst1.8 {d6}, [r3,:64], r2 bx lr endfunc function ff_vector_fmul_neon, export=1 subs r3, r3, #8 - vld1.64 {d0-d3}, [r1,:128]! - vld1.64 {d4-d7}, [r2,:128]! + vld1.32 {d0-d3}, [r1,:128]! + vld1.32 {d4-d7}, [r2,:128]! vmul.f32 q8, q0, q2 vmul.f32 q9, q1, q3 beq 3f bics ip, r3, #15 beq 2f 1: subs ip, ip, #16 - vld1.64 {d0-d1}, [r1,:128]! - vld1.64 {d4-d5}, [r2,:128]! + vld1.32 {d0-d1}, [r1,:128]! + vld1.32 {d4-d5}, [r2,:128]! vmul.f32 q10, q0, q2 - vld1.64 {d2-d3}, [r1,:128]! - vld1.64 {d6-d7}, [r2,:128]! + vld1.32 {d2-d3}, [r1,:128]! + vld1.32 {d6-d7}, [r2,:128]! vmul.f32 q11, q1, q3 - vst1.64 {d16-d19},[r0,:128]! - vld1.64 {d0-d1}, [r1,:128]! - vld1.64 {d4-d5}, [r2,:128]! + vst1.32 {d16-d19},[r0,:128]! + vld1.32 {d0-d1}, [r1,:128]! + vld1.32 {d4-d5}, [r2,:128]! vmul.f32 q8, q0, q2 - vld1.64 {d2-d3}, [r1,:128]! - vld1.64 {d6-d7}, [r2,:128]! + vld1.32 {d2-d3}, [r1,:128]! + vld1.32 {d6-d7}, [r2,:128]! vmul.f32 q9, q1, q3 - vst1.64 {d20-d23},[r0,:128]! + vst1.32 {d20-d23},[r0,:128]! bne 1b ands r3, r3, #15 beq 3f -2: vld1.64 {d0-d1}, [r1,:128]! - vld1.64 {d4-d5}, [r2,:128]! - vst1.64 {d16-d17},[r0,:128]! +2: vld1.32 {d0-d1}, [r1,:128]! + vld1.32 {d4-d5}, [r2,:128]! + vst1.32 {d16-d17},[r0,:128]! vmul.f32 q8, q0, q2 - vld1.64 {d2-d3}, [r1,:128]! - vld1.64 {d6-d7}, [r2,:128]! - vst1.64 {d18-d19},[r0,:128]! + vld1.32 {d2-d3}, [r1,:128]! + vld1.32 {d6-d7}, [r2,:128]! + vst1.32 {d18-d19},[r0,:128]! vmul.f32 q9, q1, q3 -3: vst1.64 {d16-d19},[r0,:128]! +3: vst1.32 {d16-d19},[r0,:128]! bx lr endfunc @@ -604,10 +582,10 @@ function ff_vector_fmul_window_neon, export=1 add r4, r3, r5, lsl #3 add ip, r0, r5, lsl #3 mov r5, #-16 - vld1.64 {d0,d1}, [r1,:128]! - vld1.64 {d2,d3}, [r2,:128], r5 - vld1.64 {d4,d5}, [r3,:128]! - vld1.64 {d6,d7}, [r4,:128], r5 + vld1.32 {d0,d1}, [r1,:128]! + vld1.32 {d2,d3}, [r2,:128], r5 + vld1.32 {d4,d5}, [r3,:128]! + vld1.32 {d6,d7}, [r4,:128], r5 1: subs lr, lr, #4 vmul.f32 d22, d0, d4 vrev64.32 q3, q3 @@ -617,19 +595,19 @@ function ff_vector_fmul_window_neon, export=1 vmul.f32 d21, d1, d6 beq 2f vmla.f32 d22, d3, d7 - vld1.64 {d0,d1}, [r1,:128]! + vld1.32 {d0,d1}, [r1,:128]! vmla.f32 d23, d2, d6 - vld1.64 {d18,d19},[r2,:128], r5 + vld1.32 {d18,d19},[r2,:128], r5 vmls.f32 d20, d3, d4 - vld1.64 {d24,d25},[r3,:128]! + vld1.32 {d24,d25},[r3,:128]! vmls.f32 d21, d2, d5 - vld1.64 {d6,d7}, [r4,:128], r5 + vld1.32 {d6,d7}, [r4,:128], r5 vmov q1, q9 vrev64.32 q11, q11 vmov q2, q12 vswp d22, d23 - vst1.64 {d20,d21},[r0,:128]! - vst1.64 {d22,d23},[ip,:128], r5 + vst1.32 {d20,d21},[r0,:128]! + vst1.32 {d22,d23},[ip,:128], r5 b 1b 2: vmla.f32 d22, d3, d7 vmla.f32 d23, d2, d6 @@ -637,8 +615,8 @@ function ff_vector_fmul_window_neon, export=1 vmls.f32 d21, d2, d5 vrev64.32 q11, q11 vswp d22, d23 - vst1.64 {d20,d21},[r0,:128]! - vst1.64 {d22,d23},[ip,:128], r5 + vst1.32 {d20,d21},[r0,:128]! + vst1.32 {d22,d23},[ip,:128], r5 pop {r4,r5,pc} endfunc diff --git a/libavcodec/vqavideo.c b/libavcodec/vqavideo.c index 6874b75201..a93f4322ab 100644 --- a/libavcodec/vqavideo.c +++ b/libavcodec/vqavideo.c @@ -473,11 +473,9 @@ static int vqa_decode_chunk(VqaContext *s) index_shift = 4; else index_shift = 3; - for (y = 0; y < s->frame.linesize[0] * s->height; - y += s->frame.linesize[0] * s->vector_height) { - - for (x = y; x < y + s->width; x += 4, lobytes++, hibytes++) { - pixel_ptr = x; + for (y = 0; y < s->height; y += s->vector_height) { + for (x = 0; x < s->width; x += 4, lobytes++, hibytes++) { + pixel_ptr = y * s->frame.linesize[0] + x; /* get the vector index, the method for which varies according to * VQA file version */ diff --git a/tests/fate/vpx.mak b/tests/fate/vpx.mak index e468735e21..c66eb4624e 100644 --- a/tests/fate/vpx.mak +++ b/tests/fate/vpx.mak @@ -1,12 +1,3 @@ -FATE_EA_VP6 += fate-ea-vp60 -fate-ea-vp60: CMD = framecrc -i $(SAMPLES)/ea-vp6/g36.vp6 - -FATE_EA_VP6 += fate-ea-vp61 -fate-ea-vp61: CMD = framecrc -i $(SAMPLES)/ea-vp6/MovieSkirmishGondor.vp6 -t 4 - -FATE_AVCONV += $(FATE_EA_VP6) -fate-ea-vp6: $(FATE_EA_VP6) - FATE_VP3 += fate-vp31 fate-vp31: CMD = framecrc -i $(SAMPLES)/vp3/vp31.avi @@ -19,12 +10,21 @@ fate-vp3: $(FATE_VP3) FATE_AVCONV += fate-vp5 fate-vp5: CMD = framecrc -i $(SAMPLES)/vp5/potter512-400-partial.avi -an -FATE_AVCONV += fate-vp6a +FATE_VP6 += fate-vp60 +fate-vp60: CMD = framecrc -i $(SAMPLES)/ea-vp6/g36.vp6 + +FATE_VP6 += fate-vp61 +fate-vp61: CMD = framecrc -i $(SAMPLES)/ea-vp6/MovieSkirmishGondor.vp6 -t 4 + +FATE_VP6 += fate-vp6a fate-vp6a: CMD = framecrc -i $(SAMPLES)/flash-vp6/300x180-Scr-f8-056alpha.flv -FATE_AVCONV += fate-vp6f +FATE_VP6 += fate-vp6f fate-vp6f: CMD = framecrc -i $(SAMPLES)/flash-vp6/clip1024.flv +FATE_AVCONV += $(FATE_VP6) +fate-vp6: $(FATE_VP6) + VP8_SUITE = 001 002 003 004 005 006 007 008 009 010 011 012 013 014 015 016 017 define FATE_VP8_SUITE diff --git a/tests/ref/fate/ea-vp60 b/tests/ref/fate/vp60 similarity index 100% rename from tests/ref/fate/ea-vp60 rename to tests/ref/fate/vp60 diff --git a/tests/ref/fate/ea-vp61 b/tests/ref/fate/vp61 similarity index 100% rename from tests/ref/fate/ea-vp61 rename to tests/ref/fate/vp61