mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
aarch64: vp9mc: Calculate less unused data in the 4 pixel wide horizontal filter
No measured speedup on a Cortex A53, but other cores might benefit. Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
parent
fea92a4b57
commit
388e0d2515
@ -202,9 +202,12 @@ endfunc
|
||||
ext v23.16b, \src5\().16b, \src6\().16b, #(2*\offset)
|
||||
mla \dst2\().8h, v21.8h, v0.h[\offset]
|
||||
mla \dst4\().8h, v23.8h, v0.h[\offset]
|
||||
.else
|
||||
.elseif \size == 8
|
||||
mla \dst1\().8h, v20.8h, v0.h[\offset]
|
||||
mla \dst3\().8h, v22.8h, v0.h[\offset]
|
||||
.else
|
||||
mla \dst1\().4h, v20.4h, v0.h[\offset]
|
||||
mla \dst3\().4h, v22.4h, v0.h[\offset]
|
||||
.endif
|
||||
.endm
|
||||
// The same as above, but don't accumulate straight into the
|
||||
@ -219,16 +222,24 @@ endfunc
|
||||
ext v23.16b, \src5\().16b, \src6\().16b, #(2*\offset)
|
||||
mul v21.8h, v21.8h, v0.h[\offset]
|
||||
mul v23.8h, v23.8h, v0.h[\offset]
|
||||
.else
|
||||
.elseif \size == 8
|
||||
mul v20.8h, v20.8h, v0.h[\offset]
|
||||
mul v22.8h, v22.8h, v0.h[\offset]
|
||||
.else
|
||||
mul v20.4h, v20.4h, v0.h[\offset]
|
||||
mul v22.4h, v22.4h, v0.h[\offset]
|
||||
.endif
|
||||
.if \size == 4
|
||||
sqadd \dst1\().4h, \dst1\().4h, v20.4h
|
||||
sqadd \dst3\().4h, \dst3\().4h, v22.4h
|
||||
.else
|
||||
sqadd \dst1\().8h, \dst1\().8h, v20.8h
|
||||
sqadd \dst3\().8h, \dst3\().8h, v22.8h
|
||||
.if \size >= 16
|
||||
sqadd \dst2\().8h, \dst2\().8h, v21.8h
|
||||
sqadd \dst4\().8h, \dst4\().8h, v23.8h
|
||||
.endif
|
||||
.endif
|
||||
.endm
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user