You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-10-06 05:47:18 +02:00
avcodec/x86/qpel: Remove always-false branches
The ff_avg_pixels{4,8,16}_l2_mmxext() functions are only ever used in the last step (the one that actually writes to the dst buffer) where the number of lines to process is always equal to the dimensions of the block, whereas ff_put_pixels{8,16}_mmxext() are also used in intermediate calculations where the number of lines can be 9 or 17. The code in qpel.asm uses common macros for both and processes more than one line per loop iteration; it therefore checks for whether the number of lines is odd and treats this line separately; yet this special handling is only needed for the put functions, not the avg functions. It has therefore been %if'ed away for these. The check is also not needed for ff_put_pixels4_l2_mmxext() which is only used by H.264 which always processes four lines. Because ff_{avg,put}_pixels4_l2_mmxext() processes four lines in a single loop iteration, not only the odd-height handling, but the whole loop could be removed. Reviewed-by: James Almer <jamrial@gmail.com> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -51,17 +51,6 @@ SECTION .text
|
||||
cglobal %1_pixels4_l2, 6,6
|
||||
movsxdifnidn r3, r3d
|
||||
movsxdifnidn r4, r4d
|
||||
test r5d, 1
|
||||
je .loop
|
||||
movd m0, [r1]
|
||||
movd m1, [r2]
|
||||
add r1, r4
|
||||
add r2, 4
|
||||
pavgb m0, m1
|
||||
OP m0, [r0], m3
|
||||
add r0, r3
|
||||
dec r5d
|
||||
.loop:
|
||||
mova m0, [r1]
|
||||
mova m1, [r1+r4]
|
||||
lea r1, [r1+2*r4]
|
||||
@@ -72,15 +61,10 @@ cglobal %1_pixels4_l2, 6,6
|
||||
lea r0, [r0+2*r3]
|
||||
mova m0, [r1]
|
||||
mova m1, [r1+r4]
|
||||
lea r1, [r1+2*r4]
|
||||
pavgb m0, [r2+8]
|
||||
pavgb m1, [r2+12]
|
||||
OP m0, [r0], m3
|
||||
OP m1, [r0+r3], m3
|
||||
lea r0, [r0+2*r3]
|
||||
add r2, 16
|
||||
sub r5d, 4
|
||||
jne .loop
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
@@ -95,6 +79,7 @@ PIXELS4_L2 avg
|
||||
cglobal %1_pixels8_l2, 6,6
|
||||
movsxdifnidn r3, r3d
|
||||
movsxdifnidn r4, r4d
|
||||
%ifidn %1, put
|
||||
test r5d, 1
|
||||
je .loop
|
||||
mova m0, [r1]
|
||||
@@ -105,6 +90,7 @@ cglobal %1_pixels8_l2, 6,6
|
||||
OP m0, [r0]
|
||||
add r0, r3
|
||||
dec r5d
|
||||
%endif
|
||||
.loop:
|
||||
mova m0, [r1]
|
||||
mova m1, [r1+r4]
|
||||
@@ -139,6 +125,7 @@ PIXELS8_L2 avg
|
||||
cglobal %1_pixels16_l2, 6,6
|
||||
movsxdifnidn r3, r3d
|
||||
movsxdifnidn r4, r4d
|
||||
%ifidn %1, put
|
||||
test r5d, 1
|
||||
je .loop
|
||||
mova m0, [r1]
|
||||
@@ -151,6 +138,7 @@ cglobal %1_pixels16_l2, 6,6
|
||||
OP m1, [r0+8]
|
||||
add r0, r3
|
||||
dec r5d
|
||||
%endif
|
||||
.loop:
|
||||
mova m0, [r1]
|
||||
mova m1, [r1+8]
|
||||
|
Reference in New Issue
Block a user