mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-24 13:56:33 +02:00
arm: vp9mc: Load only 12 pixels in the 4 pixel wide horizontal filter
This reduces the amount the horizontal filters read beyond the filter width to a consistent 1 pixel. The data is not used so this is usually not noticeable. It becomes a problem when the application allocates frame buffers only for the aligned picture size and the end of it is at a page boundary. This happens for picture sizes which are a multiple of the page size like 1280x640. The frame buffer allocation is based on its most likely done via mmap + MAP_ANONYMOUS so start and end of the buffer are page aligned and the previous and next page are not necessarily mapped. This mirrors the aarch64 change. Signed-off-by: Janne Grunau <janne-ffmpeg@jannau.net> Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
This commit is contained in:
parent
430c38f698
commit
f366256215
@ -279,11 +279,13 @@ function \type\()_8tap_\size\()h_\idx1\idx2
|
||||
sub r1, r1, r5
|
||||
.endif
|
||||
@ size >= 16 loads two qwords and increments r2,
|
||||
@ for size 4/8 it's enough with one qword and no
|
||||
@ postincrement
|
||||
@ size 4 loads 1 d word, increments r2 and loads 1 32-bit lane
|
||||
@ for size 8 it's enough with one qword and no postincrement
|
||||
.if \size >= 16
|
||||
sub r3, r3, r5
|
||||
sub r3, r3, #8
|
||||
.elseif \size == 4
|
||||
sub r3, r3, #8
|
||||
.endif
|
||||
@ Load the filter vector
|
||||
vld1.16 {q0}, [r12,:128]
|
||||
@ -295,9 +297,14 @@ function \type\()_8tap_\size\()h_\idx1\idx2
|
||||
.if \size >= 16
|
||||
vld1.8 {d18, d19, d20}, [r2]!
|
||||
vld1.8 {d24, d25, d26}, [r7]!
|
||||
.else
|
||||
.elseif \size == 8
|
||||
vld1.8 {q9}, [r2]
|
||||
vld1.8 {q12}, [r7]
|
||||
.else @ size == 4
|
||||
vld1.8 {d18}, [r2]!
|
||||
vld1.8 {d24}, [r7]!
|
||||
vld1.32 {d19[0]}, [r2]
|
||||
vld1.32 {d25[0]}, [r7]
|
||||
.endif
|
||||
vmovl.u8 q8, d18
|
||||
vmovl.u8 q9, d19
|
||||
|
Loading…
x
Reference in New Issue
Block a user