avcodec/x86/qpel: Remove always-false branches

The ff_avg_pixels{4,8,16}_l2_mmxext() functions are only ever used in the last step (the one that actually writes to the dst buffer) where the number of lines to process is always equal to the dimensions of the block, whereas ff_put_pixels{8,16}_mmxext() are also used in intermediate calculations where the number of lines can be 9 or 17. The code in qpel.asm uses common macros for both and processes more than one line per loop iteration; it therefore checks for whether the number of lines is odd and treats this line separately; yet this special handling is only needed for the put functions, not the avg functions. It has therefore been %if'ed away for these. The check is also not needed for ff_put_pixels4_l2_mmxext() which is only used by H.264 which always processes four lines. Because ff_{avg,put}_pixels4_l2_mmxext() processes four lines in a single loop iteration, not only the odd-height handling, but the whole loop could be removed. Reviewed-by: James Almer <jamrial@gmail.com> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-10-06 05:47:18 +02:00 · 2025-09-26 19:31:49 +02:00
parent 8820e2205c
commit cacf854fe7
1 changed files with 4 additions and 16 deletions
--- a/libavcodec/x86/qpel.asm
+++ b/libavcodec/x86/qpel.asm
@@ -51,17 +51,6 @@ SECTION .text
 cglobal %1_pixels4_l2, 6,6
    movsxdifnidn r3, r3d
    movsxdifnidn r4, r4d
-    test        r5d, 1
-    je        .loop
-    movd         m0, [r1]
-    movd         m1, [r2]
-    add          r1, r4
-    add          r2, 4
-    pavgb        m0, m1
-    OP           m0, [r0], m3
-    add          r0, r3
-    dec         r5d
-.loop:
    mova         m0, [r1]
    mova         m1, [r1+r4]
    lea          r1, [r1+2*r4]
@@ -72,15 +61,10 @@ cglobal %1_pixels4_l2, 6,6
    lea          r0, [r0+2*r3]
    mova         m0, [r1]
    mova         m1, [r1+r4]
-    lea          r1, [r1+2*r4]
    pavgb        m0, [r2+8]
    pavgb        m1, [r2+12]
    OP           m0, [r0], m3
    OP           m1, [r0+r3], m3
-    lea          r0, [r0+2*r3]
-    add          r2, 16
-    sub         r5d, 4
-    jne       .loop
    RET
 %endmacro

@@ -95,6 +79,7 @@ PIXELS4_L2 avg
 cglobal %1_pixels8_l2, 6,6
    movsxdifnidn r3, r3d
    movsxdifnidn r4, r4d
+%ifidn %1, put
    test        r5d, 1
    je        .loop
    mova         m0, [r1]
@@ -105,6 +90,7 @@ cglobal %1_pixels8_l2, 6,6
    OP           m0, [r0]
    add          r0, r3
    dec         r5d
+%endif
 .loop:
    mova         m0, [r1]
    mova         m1, [r1+r4]
@@ -139,6 +125,7 @@ PIXELS8_L2 avg
 cglobal %1_pixels16_l2, 6,6
    movsxdifnidn r3, r3d
    movsxdifnidn r4, r4d
+%ifidn %1, put
    test        r5d, 1
    je        .loop
    mova         m0, [r1]
@@ -151,6 +138,7 @@ cglobal %1_pixels16_l2, 6,6
    OP           m1, [r0+8]
    add          r0, r3
    dec         r5d
+%endif
 .loop:
    mova         m0, [r1]
    mova         m1, [r1+8]