1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-10-06 05:47:18 +02:00

avcodec/x86/qpel: Remove always-false branches

The ff_avg_pixels{4,8,16}_l2_mmxext() functions are only ever
used in the last step (the one that actually writes to the dst buffer)
where the number of lines to process is always equal to the
dimensions of the block, whereas ff_put_pixels{8,16}_mmxext()
are also used in intermediate calculations where the number of
lines can be 9 or 17.

The code in qpel.asm uses common macros for both and processes
more than one line per loop iteration; it therefore checks
for whether the number of lines is odd and treats this line separately;
yet this special handling is only needed for the put functions,
not the avg functions. It has therefore been %if'ed away for these.

The check is also not needed for ff_put_pixels4_l2_mmxext() which
is only used by H.264 which always processes four lines. Because
ff_{avg,put}_pixels4_l2_mmxext() processes four lines in a single loop
iteration, not only the odd-height handling, but the whole loop
could be removed.

Reviewed-by: James Almer <jamrial@gmail.com>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt
2025-09-26 19:31:49 +02:00
parent 8820e2205c
commit cacf854fe7

View File

@@ -51,17 +51,6 @@ SECTION .text
cglobal %1_pixels4_l2, 6,6 cglobal %1_pixels4_l2, 6,6
movsxdifnidn r3, r3d movsxdifnidn r3, r3d
movsxdifnidn r4, r4d movsxdifnidn r4, r4d
test r5d, 1
je .loop
movd m0, [r1]
movd m1, [r2]
add r1, r4
add r2, 4
pavgb m0, m1
OP m0, [r0], m3
add r0, r3
dec r5d
.loop:
mova m0, [r1] mova m0, [r1]
mova m1, [r1+r4] mova m1, [r1+r4]
lea r1, [r1+2*r4] lea r1, [r1+2*r4]
@@ -72,15 +61,10 @@ cglobal %1_pixels4_l2, 6,6
lea r0, [r0+2*r3] lea r0, [r0+2*r3]
mova m0, [r1] mova m0, [r1]
mova m1, [r1+r4] mova m1, [r1+r4]
lea r1, [r1+2*r4]
pavgb m0, [r2+8] pavgb m0, [r2+8]
pavgb m1, [r2+12] pavgb m1, [r2+12]
OP m0, [r0], m3 OP m0, [r0], m3
OP m1, [r0+r3], m3 OP m1, [r0+r3], m3
lea r0, [r0+2*r3]
add r2, 16
sub r5d, 4
jne .loop
RET RET
%endmacro %endmacro
@@ -95,6 +79,7 @@ PIXELS4_L2 avg
cglobal %1_pixels8_l2, 6,6 cglobal %1_pixels8_l2, 6,6
movsxdifnidn r3, r3d movsxdifnidn r3, r3d
movsxdifnidn r4, r4d movsxdifnidn r4, r4d
%ifidn %1, put
test r5d, 1 test r5d, 1
je .loop je .loop
mova m0, [r1] mova m0, [r1]
@@ -105,6 +90,7 @@ cglobal %1_pixels8_l2, 6,6
OP m0, [r0] OP m0, [r0]
add r0, r3 add r0, r3
dec r5d dec r5d
%endif
.loop: .loop:
mova m0, [r1] mova m0, [r1]
mova m1, [r1+r4] mova m1, [r1+r4]
@@ -139,6 +125,7 @@ PIXELS8_L2 avg
cglobal %1_pixels16_l2, 6,6 cglobal %1_pixels16_l2, 6,6
movsxdifnidn r3, r3d movsxdifnidn r3, r3d
movsxdifnidn r4, r4d movsxdifnidn r4, r4d
%ifidn %1, put
test r5d, 1 test r5d, 1
je .loop je .loop
mova m0, [r1] mova m0, [r1]
@@ -151,6 +138,7 @@ cglobal %1_pixels16_l2, 6,6
OP m1, [r0+8] OP m1, [r0+8]
add r0, r3 add r0, r3
dec r5d dec r5d
%endif
.loop: .loop:
mova m0, [r1] mova m0, [r1]
mova m1, [r1+8] mova m1, [r1+8]