From cacf854fe7ad20a4eff5b2e9ef4e5a37f59d6e56 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Fri, 26 Sep 2025 19:31:49 +0200 Subject: [PATCH] avcodec/x86/qpel: Remove always-false branches The ff_avg_pixels{4,8,16}_l2_mmxext() functions are only ever used in the last step (the one that actually writes to the dst buffer) where the number of lines to process is always equal to the dimensions of the block, whereas ff_put_pixels{8,16}_mmxext() are also used in intermediate calculations where the number of lines can be 9 or 17. The code in qpel.asm uses common macros for both and processes more than one line per loop iteration; it therefore checks for whether the number of lines is odd and treats this line separately; yet this special handling is only needed for the put functions, not the avg functions. It has therefore been %if'ed away for these. The check is also not needed for ff_put_pixels4_l2_mmxext() which is only used by H.264 which always processes four lines. Because ff_{avg,put}_pixels4_l2_mmxext() processes four lines in a single loop iteration, not only the odd-height handling, but the whole loop could be removed. Reviewed-by: James Almer Signed-off-by: Andreas Rheinhardt --- libavcodec/x86/qpel.asm | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/libavcodec/x86/qpel.asm b/libavcodec/x86/qpel.asm index 481251314a..241ed27b8b 100644 --- a/libavcodec/x86/qpel.asm +++ b/libavcodec/x86/qpel.asm @@ -51,17 +51,6 @@ SECTION .text cglobal %1_pixels4_l2, 6,6 movsxdifnidn r3, r3d movsxdifnidn r4, r4d - test r5d, 1 - je .loop - movd m0, [r1] - movd m1, [r2] - add r1, r4 - add r2, 4 - pavgb m0, m1 - OP m0, [r0], m3 - add r0, r3 - dec r5d -.loop: mova m0, [r1] mova m1, [r1+r4] lea r1, [r1+2*r4] @@ -72,15 +61,10 @@ cglobal %1_pixels4_l2, 6,6 lea r0, [r0+2*r3] mova m0, [r1] mova m1, [r1+r4] - lea r1, [r1+2*r4] pavgb m0, [r2+8] pavgb m1, [r2+12] OP m0, [r0], m3 OP m1, [r0+r3], m3 - lea r0, [r0+2*r3] - add r2, 16 - sub r5d, 4 - jne .loop RET %endmacro @@ -95,6 +79,7 @@ PIXELS4_L2 avg cglobal %1_pixels8_l2, 6,6 movsxdifnidn r3, r3d movsxdifnidn r4, r4d +%ifidn %1, put test r5d, 1 je .loop mova m0, [r1] @@ -105,6 +90,7 @@ cglobal %1_pixels8_l2, 6,6 OP m0, [r0] add r0, r3 dec r5d +%endif .loop: mova m0, [r1] mova m1, [r1+r4] @@ -139,6 +125,7 @@ PIXELS8_L2 avg cglobal %1_pixels16_l2, 6,6 movsxdifnidn r3, r3d movsxdifnidn r4, r4d +%ifidn %1, put test r5d, 1 je .loop mova m0, [r1] @@ -151,6 +138,7 @@ cglobal %1_pixels16_l2, 6,6 OP m1, [r0+8] add r0, r3 dec r5d +%endif .loop: mova m0, [r1] mova m1, [r1+8]