From cacf854fe7ad20a4eff5b2e9ef4e5a37f59d6e56 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 26 Sep 2025 19:31:49 +0200
Subject: [PATCH] avcodec/x86/qpel: Remove always-false branches

The ff_avg_pixels{4,8,16}_l2_mmxext() functions are only ever
used in the last step (the one that actually writes to the dst buffer)
where the number of lines to process is always equal to the
dimensions of the block, whereas ff_put_pixels{8,16}_mmxext()
are also used in intermediate calculations where the number of
lines can be 9 or 17.

The code in qpel.asm uses common macros for both and processes
more than one line per loop iteration; it therefore checks
for whether the number of lines is odd and treats this line separately;
yet this special handling is only needed for the put functions,
not the avg functions. It has therefore been %if'ed away for these.

The check is also not needed for ff_put_pixels4_l2_mmxext() which
is only used by H.264 which always processes four lines. Because
ff_{avg,put}_pixels4_l2_mmxext() processes four lines in a single loop
iteration, not only the odd-height handling, but the whole loop
could be removed.

Reviewed-by: James Almer <jamrial@gmail.com>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/x86/qpel.asm | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/libavcodec/x86/qpel.asm b/libavcodec/x86/qpel.asm
index 481251314a..241ed27b8b 100644
--- a/libavcodec/x86/qpel.asm
+++ b/libavcodec/x86/qpel.asm
@@ -51,17 +51,6 @@ SECTION .text
 cglobal %1_pixels4_l2, 6,6
     movsxdifnidn r3, r3d
     movsxdifnidn r4, r4d
-    test        r5d, 1
-    je        .loop
-    movd         m0, [r1]
-    movd         m1, [r2]
-    add          r1, r4
-    add          r2, 4
-    pavgb        m0, m1
-    OP           m0, [r0], m3
-    add          r0, r3
-    dec         r5d
-.loop:
     mova         m0, [r1]
     mova         m1, [r1+r4]
     lea          r1, [r1+2*r4]
@@ -72,15 +61,10 @@ cglobal %1_pixels4_l2, 6,6
     lea          r0, [r0+2*r3]
     mova         m0, [r1]
     mova         m1, [r1+r4]
-    lea          r1, [r1+2*r4]
     pavgb        m0, [r2+8]
     pavgb        m1, [r2+12]
     OP           m0, [r0], m3
     OP           m1, [r0+r3], m3
-    lea          r0, [r0+2*r3]
-    add          r2, 16
-    sub         r5d, 4
-    jne       .loop
     RET
 %endmacro
 
@@ -95,6 +79,7 @@ PIXELS4_L2 avg
 cglobal %1_pixels8_l2, 6,6
     movsxdifnidn r3, r3d
     movsxdifnidn r4, r4d
+%ifidn %1, put
     test        r5d, 1
     je        .loop
     mova         m0, [r1]
@@ -105,6 +90,7 @@ cglobal %1_pixels8_l2, 6,6
     OP           m0, [r0]
     add          r0, r3
     dec         r5d
+%endif
 .loop:
     mova         m0, [r1]
     mova         m1, [r1+r4]
@@ -139,6 +125,7 @@ PIXELS8_L2 avg
 cglobal %1_pixels16_l2, 6,6
     movsxdifnidn r3, r3d
     movsxdifnidn r4, r4d
+%ifidn %1, put
     test        r5d, 1
     je        .loop
     mova         m0, [r1]
@@ -151,6 +138,7 @@ cglobal %1_pixels16_l2, 6,6
     OP           m1, [r0+8]
     add          r0, r3
     dec         r5d
+%endif
 .loop:
     mova         m0, [r1]
     mova         m1, [r1+8]