You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-10-06 05:47:18 +02:00
avcodec/x86/h264_qpel_8bit: Optimize branch away
ff_{avg,put}_h264_qpel8or16_hv2_lowpass_ssse3() currently is almost the disjoint union of the codepaths for sizes 8 and 16. This size is a compile-time constant at every callsite. So split the function and avoid the runtime branch. Reviewed-by: James Almer <jamrial@gmail.com> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -66,7 +66,8 @@ void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_
|
||||
void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, int dstStride);\
|
||||
void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(const uint8_t *src, int16_t *tmp, int srcStride, int size);\
|
||||
void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_op_mmxext(uint8_t *dst, int16_t *tmp, int dstStride, int h);\
|
||||
void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride, int size);\
|
||||
void ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride);\
|
||||
void ff_ ## OPNAME ## _h264_qpel16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride);\
|
||||
void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride);\
|
||||
void ff_ ## OPNAME ## _pixels8_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride);\
|
||||
void ff_ ## OPNAME ## _pixels16_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride);\
|
||||
@@ -172,6 +173,18 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uin
|
||||
ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, srcStride, 16);\
|
||||
}\
|
||||
|
||||
#define SSSE3_HV2_LOWPASS_WRAPPER(OPNAME) \
|
||||
static av_always_inline void \
|
||||
ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride, int size) \
|
||||
{\
|
||||
if (size == 8)\
|
||||
ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_ssse3(dst, tmp, dstStride);\
|
||||
else\
|
||||
ff_ ## OPNAME ## _h264_qpel16_hv2_lowpass_ssse3(dst, tmp, dstStride);\
|
||||
}
|
||||
SSSE3_HV2_LOWPASS_WRAPPER(avg)
|
||||
SSSE3_HV2_LOWPASS_WRAPPER(put)
|
||||
|
||||
#define ff_put_h264_qpel8_h_lowpass_l2_sse2 ff_put_h264_qpel8_h_lowpass_l2_mmxext
|
||||
#define ff_avg_h264_qpel8_h_lowpass_l2_sse2 ff_avg_h264_qpel8_h_lowpass_l2_mmxext
|
||||
#define ff_put_h264_qpel16_h_lowpass_l2_sse2 ff_put_h264_qpel16_h_lowpass_l2_mmxext
|
||||
|
@@ -608,11 +608,14 @@ QPEL8OR16_HV2_LOWPASS_OP put
|
||||
QPEL8OR16_HV2_LOWPASS_OP avg
|
||||
|
||||
%macro QPEL8OR16_HV2_LOWPASS_OP_XMM 1
|
||||
cglobal %1_h264_qpel8or16_hv2_lowpass, 4,4,8 ; dst, tmp, dstStride, size
|
||||
%ifidn %1, avg
|
||||
cglobal %1_h264_qpel8_hv2_lowpass, 3,4,7 ; dst, tmp, dstStride
|
||||
%else
|
||||
cglobal %1_h264_qpel8_hv2_lowpass, 3,4,6 ; dst, tmp, dstStride
|
||||
%endif
|
||||
movsxdifnidn r2, r2d
|
||||
cmp r3d, 16
|
||||
je .op16
|
||||
.loop8:
|
||||
mov r3d, 8
|
||||
.loop:
|
||||
mova m1, [r1+16]
|
||||
mova m0, [r1]
|
||||
mova m2, m1
|
||||
@@ -635,13 +638,17 @@ cglobal %1_h264_qpel8or16_hv2_lowpass, 4,4,8 ; dst, tmp, dstStride, size
|
||||
paddw m0, m2
|
||||
psraw m0, 6
|
||||
packuswb m0, m0
|
||||
op_%1h m0, [r0], m7
|
||||
op_%1h m0, [r0], m6
|
||||
add r1, 48
|
||||
add r0, r2
|
||||
dec r3d
|
||||
jne .loop8
|
||||
jmp .done
|
||||
.op16:
|
||||
jne .loop
|
||||
RET
|
||||
|
||||
cglobal %1_h264_qpel16_hv2_lowpass, 3,4,8 ; dst, tmp, dstStride
|
||||
movsxdifnidn r2, r2d
|
||||
mov r3d, 16
|
||||
.loop:
|
||||
mova m4, [r1+32]
|
||||
mova m5, [r1+16]
|
||||
mova m7, [r1]
|
||||
@@ -688,8 +695,7 @@ cglobal %1_h264_qpel8or16_hv2_lowpass, 4,4,8 ; dst, tmp, dstStride, size
|
||||
add r1, 48
|
||||
add r0, r2
|
||||
dec r3d
|
||||
jne .op16
|
||||
.done:
|
||||
jne .loop
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
|
Reference in New Issue
Block a user