You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2026-04-01 01:30:21 +02:00
aarch64: hevcdsp: Make returns match the call site
For cases when returning early without updating any pixels, we
previously returned to return address in the caller's scope,
bypassing one function entirely. While this may seem like a neat
optimization, it makes the return stack predictor mispredict
the returns - which potentially can cost more performance than
it gains.
Secondly, if the armv9.3 feature GCS (Guarded Control Stack) is
enabled, then returns _must_ match the expected value; this feature
is being enabled across linux distributions, and by fixing the
hevc assembly, we can enable the security feature on ffmpeg as well.
(cherry picked from commit 1f7ed8a78d)
This commit is contained in:
@@ -511,8 +511,11 @@ function hevc_loop_filter_luma_body_\bitdepth\()_neon, export=0
|
||||
sqxtun v6.8b, v6.8h
|
||||
sqxtun v7.8b, v7.8h
|
||||
.endif
|
||||
// Use x15 to signal whether any pixels should be updated or not.
|
||||
mov x15, #1
|
||||
ret
|
||||
3: mov x15, #0
|
||||
ret
|
||||
3: ret x6
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
@@ -562,6 +565,7 @@ function ff_hevc_\dir\()_loop_filter_luma_\bitdepth\()_neon, export=1
|
||||
.endif
|
||||
.endif
|
||||
bl hevc_loop_filter_luma_body_\bitdepth\()_neon
|
||||
cbz x15, 9f
|
||||
.if \bitdepth > 8
|
||||
.ifc \dir, v
|
||||
transpose_8x8H v0, v1, v2, v3, v4, v5, v6, v7, v16, v17
|
||||
@@ -587,6 +591,7 @@ function ff_hevc_\dir\()_loop_filter_luma_\bitdepth\()_neon, export=1
|
||||
st1 {v6.8b}, [x10], x1
|
||||
st1 {v7.8b}, [x10]
|
||||
.endif
|
||||
9:
|
||||
ret x6
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
Reference in New Issue
Block a user