1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-11-26 19:01:44 +02:00

aarch64: vp9mc: Calculate less unused data in the 4 pixel wide horizontal filter

No measured speedup on a Cortex A53, but other cores might benefit.

This is cherrypicked from libav commit
388e0d2515.

Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
Martin Storsjö 2016-12-17 13:14:38 +02:00
parent bff0771590
commit 045e33ae3f

View File

@ -202,9 +202,12 @@ endfunc
ext v23.16b, \src5\().16b, \src6\().16b, #(2*\offset) ext v23.16b, \src5\().16b, \src6\().16b, #(2*\offset)
mla \dst2\().8h, v21.8h, v0.h[\offset] mla \dst2\().8h, v21.8h, v0.h[\offset]
mla \dst4\().8h, v23.8h, v0.h[\offset] mla \dst4\().8h, v23.8h, v0.h[\offset]
.else .elseif \size == 8
mla \dst1\().8h, v20.8h, v0.h[\offset] mla \dst1\().8h, v20.8h, v0.h[\offset]
mla \dst3\().8h, v22.8h, v0.h[\offset] mla \dst3\().8h, v22.8h, v0.h[\offset]
.else
mla \dst1\().4h, v20.4h, v0.h[\offset]
mla \dst3\().4h, v22.4h, v0.h[\offset]
.endif .endif
.endm .endm
// The same as above, but don't accumulate straight into the // The same as above, but don't accumulate straight into the
@ -219,16 +222,24 @@ endfunc
ext v23.16b, \src5\().16b, \src6\().16b, #(2*\offset) ext v23.16b, \src5\().16b, \src6\().16b, #(2*\offset)
mul v21.8h, v21.8h, v0.h[\offset] mul v21.8h, v21.8h, v0.h[\offset]
mul v23.8h, v23.8h, v0.h[\offset] mul v23.8h, v23.8h, v0.h[\offset]
.else .elseif \size == 8
mul v20.8h, v20.8h, v0.h[\offset] mul v20.8h, v20.8h, v0.h[\offset]
mul v22.8h, v22.8h, v0.h[\offset] mul v22.8h, v22.8h, v0.h[\offset]
.else
mul v20.4h, v20.4h, v0.h[\offset]
mul v22.4h, v22.4h, v0.h[\offset]
.endif .endif
.if \size == 4
sqadd \dst1\().4h, \dst1\().4h, v20.4h
sqadd \dst3\().4h, \dst3\().4h, v22.4h
.else
sqadd \dst1\().8h, \dst1\().8h, v20.8h sqadd \dst1\().8h, \dst1\().8h, v20.8h
sqadd \dst3\().8h, \dst3\().8h, v22.8h sqadd \dst3\().8h, \dst3\().8h, v22.8h
.if \size >= 16 .if \size >= 16
sqadd \dst2\().8h, \dst2\().8h, v21.8h sqadd \dst2\().8h, \dst2\().8h, v21.8h
sqadd \dst4\().8h, \dst4\().8h, v23.8h sqadd \dst4\().8h, \dst4\().8h, v23.8h
.endif .endif
.endif
.endm .endm