mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-21 10:55:51 +02:00
vp8: Use 2 registers for dst_stride and src_stride in neon bilin filter
Based on a patch by Ronald S. Bultje. Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
parent
5a0bccd281
commit
49ec551595
@ -1576,18 +1576,18 @@ endconst
|
||||
/* Bilinear MC */
|
||||
|
||||
function ff_put_vp8_bilin16_h_neon, export=1
|
||||
ldr r3, [sp, #4] @ mx
|
||||
rsb r12, r3, #8
|
||||
vdup.8 d0, r3
|
||||
ldr r12, [sp, #4] @ mx
|
||||
vdup.8 d0, r12
|
||||
rsb r12, r12, #8
|
||||
vdup.8 d1, r12
|
||||
ldr r12, [sp] @ h
|
||||
1:
|
||||
subs r12, r12, #2
|
||||
vld1.8 {d2-d4}, [r2], r1
|
||||
vld1.8 {d2-d4}, [r2], r3
|
||||
vext.8 q2, q1, q2, #1
|
||||
vmull.u8 q8, d2, d1
|
||||
vmlal.u8 q8, d4, d0
|
||||
vld1.8 {d18-d20},[r2], r1
|
||||
vld1.8 {d18-d20},[r2], r3
|
||||
vmull.u8 q3, d3, d1
|
||||
vmlal.u8 q3, d5, d0
|
||||
vext.8 q10, q9, q10, #1
|
||||
@ -1607,20 +1607,20 @@ function ff_put_vp8_bilin16_h_neon, export=1
|
||||
endfunc
|
||||
|
||||
function ff_put_vp8_bilin16_v_neon, export=1
|
||||
ldr r3, [sp, #8] @ my
|
||||
rsb r12, r3, #8
|
||||
vdup.8 d0, r3
|
||||
ldr r12, [sp, #8] @ my
|
||||
vdup.8 d0, r12
|
||||
rsb r12, r12, #8
|
||||
vdup.8 d1, r12
|
||||
ldr r12, [sp] @ h
|
||||
vld1.8 {q1}, [r2], r1
|
||||
vld1.8 {q1}, [r2], r3
|
||||
1:
|
||||
subs r12, r12, #2
|
||||
vld1.8 {q2}, [r2], r1
|
||||
vld1.8 {q2}, [r2], r3
|
||||
vmull.u8 q3, d2, d1
|
||||
vmlal.u8 q3, d4, d0
|
||||
vmull.u8 q8, d3, d1
|
||||
vmlal.u8 q8, d5, d0
|
||||
vld1.8 {q1}, [r2], r1
|
||||
vld1.8 {q1}, [r2], r3
|
||||
vmull.u8 q9, d4, d1
|
||||
vmlal.u8 q9, d2, d0
|
||||
vmull.u8 q10, d5, d1
|
||||
@ -1637,17 +1637,17 @@ function ff_put_vp8_bilin16_v_neon, export=1
|
||||
endfunc
|
||||
|
||||
function ff_put_vp8_bilin16_hv_neon, export=1
|
||||
ldr r3, [sp, #4] @ mx
|
||||
rsb r12, r3, #8
|
||||
vdup.8 d0, r3
|
||||
ldr r12, [sp, #4] @ mx
|
||||
vdup.8 d0, r12
|
||||
rsb r12, r12, #8
|
||||
vdup.8 d1, r12
|
||||
ldr r3, [sp, #8] @ my
|
||||
rsb r12, r3, #8
|
||||
vdup.8 d2, r3
|
||||
ldr r12, [sp, #8] @ my
|
||||
vdup.8 d2, r12
|
||||
rsb r12, r12, #8
|
||||
vdup.8 d3, r12
|
||||
ldr r12, [sp] @ h
|
||||
|
||||
vld1.8 {d4-d6}, [r2], r1
|
||||
vld1.8 {d4-d6}, [r2], r3
|
||||
vext.8 q3, q2, q3, #1
|
||||
vmull.u8 q8, d4, d1
|
||||
vmlal.u8 q8, d6, d0
|
||||
@ -1657,11 +1657,11 @@ function ff_put_vp8_bilin16_hv_neon, export=1
|
||||
vrshrn.u16 d5, q9, #3
|
||||
1:
|
||||
subs r12, r12, #2
|
||||
vld1.8 {d18-d20},[r2], r1
|
||||
vld1.8 {d18-d20},[r2], r3
|
||||
vext.8 q10, q9, q10, #1
|
||||
vmull.u8 q11, d18, d1
|
||||
vmlal.u8 q11, d20, d0
|
||||
vld1.8 {d26-d28},[r2], r1
|
||||
vld1.8 {d26-d28},[r2], r3
|
||||
vmull.u8 q12, d19, d1
|
||||
vmlal.u8 q12, d21, d0
|
||||
vext.8 q14, q13, q14, #1
|
||||
@ -1693,18 +1693,18 @@ function ff_put_vp8_bilin16_hv_neon, export=1
|
||||
endfunc
|
||||
|
||||
function ff_put_vp8_bilin8_h_neon, export=1
|
||||
ldr r3, [sp, #4] @ mx
|
||||
rsb r12, r3, #8
|
||||
vdup.8 d0, r3
|
||||
ldr r12, [sp, #4] @ mx
|
||||
vdup.8 d0, r12
|
||||
rsb r12, r12, #8
|
||||
vdup.8 d1, r12
|
||||
ldr r12, [sp] @ h
|
||||
1:
|
||||
subs r12, r12, #2
|
||||
vld1.8 {q1}, [r2], r1
|
||||
vld1.8 {q1}, [r2], r3
|
||||
vext.8 d3, d2, d3, #1
|
||||
vmull.u8 q2, d2, d1
|
||||
vmlal.u8 q2, d3, d0
|
||||
vld1.8 {q3}, [r2], r1
|
||||
vld1.8 {q3}, [r2], r3
|
||||
vext.8 d7, d6, d7, #1
|
||||
vmull.u8 q8, d6, d1
|
||||
vmlal.u8 q8, d7, d0
|
||||
@ -1718,18 +1718,18 @@ function ff_put_vp8_bilin8_h_neon, export=1
|
||||
endfunc
|
||||
|
||||
function ff_put_vp8_bilin8_v_neon, export=1
|
||||
ldr r3, [sp, #8] @ my
|
||||
rsb r12, r3, #8
|
||||
vdup.8 d0, r3
|
||||
ldr r12, [sp, #8] @ my
|
||||
vdup.8 d0, r12
|
||||
rsb r12, r12, #8
|
||||
vdup.8 d1, r12
|
||||
ldr r12, [sp] @ h
|
||||
vld1.8 {d2}, [r2], r1
|
||||
vld1.8 {d2}, [r2], r3
|
||||
1:
|
||||
subs r12, r12, #2
|
||||
vld1.8 {d3}, [r2], r1
|
||||
vld1.8 {d3}, [r2], r3
|
||||
vmull.u8 q2, d2, d1
|
||||
vmlal.u8 q2, d3, d0
|
||||
vld1.8 {d2}, [r2], r1
|
||||
vld1.8 {d2}, [r2], r3
|
||||
vmull.u8 q3, d3, d1
|
||||
vmlal.u8 q3, d2, d0
|
||||
vrshrn.u16 d4, q2, #3
|
||||
@ -1742,28 +1742,28 @@ function ff_put_vp8_bilin8_v_neon, export=1
|
||||
endfunc
|
||||
|
||||
function ff_put_vp8_bilin8_hv_neon, export=1
|
||||
ldr r3, [sp, #4] @ mx
|
||||
rsb r12, r3, #8
|
||||
vdup.8 d0, r3
|
||||
ldr r12, [sp, #4] @ mx
|
||||
vdup.8 d0, r12
|
||||
rsb r12, r12, #8
|
||||
vdup.8 d1, r12
|
||||
ldr r3, [sp, #8] @ my
|
||||
rsb r12, r3, #8
|
||||
vdup.8 d2, r3
|
||||
ldr r12, [sp, #8] @ my
|
||||
vdup.8 d2, r12
|
||||
rsb r12, r12, #8
|
||||
vdup.8 d3, r12
|
||||
ldr r12, [sp] @ h
|
||||
|
||||
vld1.8 {q2}, [r2], r1
|
||||
vld1.8 {q2}, [r2], r3
|
||||
vext.8 d5, d4, d5, #1
|
||||
vmull.u8 q9, d4, d1
|
||||
vmlal.u8 q9, d5, d0
|
||||
vrshrn.u16 d22, q9, #3
|
||||
1:
|
||||
subs r12, r12, #2
|
||||
vld1.8 {q3}, [r2], r1
|
||||
vld1.8 {q3}, [r2], r3
|
||||
vext.8 d7, d6, d7, #1
|
||||
vmull.u8 q8, d6, d1
|
||||
vmlal.u8 q8, d7, d0
|
||||
vld1.8 {q2}, [r2], r1
|
||||
vld1.8 {q2}, [r2], r3
|
||||
vext.8 d5, d4, d5, #1
|
||||
vmull.u8 q9, d4, d1
|
||||
vmlal.u8 q9, d5, d0
|
||||
@ -1783,16 +1783,16 @@ function ff_put_vp8_bilin8_hv_neon, export=1
|
||||
endfunc
|
||||
|
||||
function ff_put_vp8_bilin4_h_neon, export=1
|
||||
ldr r3, [sp, #4] @ mx
|
||||
rsb r12, r3, #8
|
||||
vdup.8 d0, r3
|
||||
ldr r12, [sp, #4] @ mx
|
||||
vdup.8 d0, r12
|
||||
rsb r12, r12, #8
|
||||
vdup.8 d1, r12
|
||||
ldr r12, [sp] @ h
|
||||
1:
|
||||
subs r12, r12, #2
|
||||
vld1.8 {d2}, [r2], r1
|
||||
vld1.8 {d2}, [r2], r3
|
||||
vext.8 d3, d2, d3, #1
|
||||
vld1.8 {d6}, [r2], r1
|
||||
vld1.8 {d6}, [r2], r3
|
||||
vext.8 d7, d6, d7, #1
|
||||
vtrn.32 q1, q3
|
||||
vmull.u8 q2, d2, d1
|
||||
@ -1806,16 +1806,16 @@ function ff_put_vp8_bilin4_h_neon, export=1
|
||||
endfunc
|
||||
|
||||
function ff_put_vp8_bilin4_v_neon, export=1
|
||||
ldr r3, [sp, #8] @ my
|
||||
rsb r12, r3, #8
|
||||
vdup.8 d0, r3
|
||||
ldr r12, [sp, #8] @ my
|
||||
vdup.8 d0, r12
|
||||
rsb r12, r12, #8
|
||||
vdup.8 d1, r12
|
||||
ldr r12, [sp] @ h
|
||||
vld1.32 {d2[]}, [r2], r1
|
||||
vld1.32 {d2[]}, [r2], r3
|
||||
1:
|
||||
vld1.32 {d3[]}, [r2]
|
||||
vld1.32 {d2[1]}, [r2], r1
|
||||
vld1.32 {d3[1]}, [r2], r1
|
||||
vld1.32 {d2[1]}, [r2], r3
|
||||
vld1.32 {d3[1]}, [r2], r3
|
||||
vmull.u8 q2, d2, d1
|
||||
vmlal.u8 q2, d3, d0
|
||||
vtrn.32 d3, d2
|
||||
@ -1829,26 +1829,26 @@ function ff_put_vp8_bilin4_v_neon, export=1
|
||||
endfunc
|
||||
|
||||
function ff_put_vp8_bilin4_hv_neon, export=1
|
||||
ldr r3, [sp, #4] @ mx
|
||||
rsb r12, r3, #8
|
||||
vdup.8 d0, r3
|
||||
ldr r12, [sp, #4] @ mx
|
||||
vdup.8 d0, r12
|
||||
rsb r12, r12, #8
|
||||
vdup.8 d1, r12
|
||||
ldr r3, [sp, #8] @ my
|
||||
rsb r12, r3, #8
|
||||
vdup.8 d2, r3
|
||||
ldr r12, [sp, #8] @ my
|
||||
vdup.8 d2, r12
|
||||
rsb r12, r12, #8
|
||||
vdup.8 d3, r12
|
||||
ldr r12, [sp] @ h
|
||||
|
||||
vld1.8 {d4}, [r2], r1
|
||||
vld1.8 {d4}, [r2], r3
|
||||
vext.8 d5, d4, d4, #1
|
||||
vmull.u8 q9, d4, d1
|
||||
vmlal.u8 q9, d5, d0
|
||||
vrshrn.u16 d22, q9, #3
|
||||
1:
|
||||
subs r12, r12, #2
|
||||
vld1.8 {d6}, [r2], r1
|
||||
vld1.8 {d6}, [r2], r3
|
||||
vext.8 d7, d6, d6, #1
|
||||
vld1.8 {d4}, [r2], r1
|
||||
vld1.8 {d4}, [r2], r3
|
||||
vext.8 d5, d4, d4, #1
|
||||
vtrn.32 q3, q2
|
||||
vmull.u8 q8, d6, d1
|
||||
|
Loading…
Reference in New Issue
Block a user