You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-10-06 05:47:18 +02:00
avcodec/fpel: Avoid loop in ff_avg_pixels4_mmxext()
It is only used by h264_qpel.c and only with height four (which is unrolled) and uses a loop in order to handle multiples of four as height. Remove the loop and the height parameter and move the function to h264_qpel_8bit.asm. This leads to a bit of code duplication, but this is simpler than all the %if checks necessary to achieve the same outcome in fpel.asm. Reviewed-by: James Almer <jamrial@gmail.com> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -63,7 +63,6 @@ INIT_MMX mmx
|
||||
OP_PIXELS put, 8
|
||||
|
||||
INIT_MMX mmxext
|
||||
OP_PIXELS avg, 4
|
||||
OP_PIXELS avg, 8
|
||||
|
||||
INIT_XMM sse2
|
||||
|
@@ -22,8 +22,6 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
|
||||
|
@@ -30,6 +30,7 @@
|
||||
#include "fpel.h"
|
||||
|
||||
#if HAVE_X86ASM
|
||||
void ff_avg_pixels4_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
ptrdiff_t stride);
|
||||
void ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
@@ -52,7 +53,6 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t
|
||||
#define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext
|
||||
#define ff_avg_pixels16_l2_sse2(dst, src1, src2, dststride, src1stride, h) \
|
||||
ff_avg_pixels16_l2_mmxext((dst), (src1), (src2), (dststride), (src1stride))
|
||||
#define ff_put_pixels4_mmxext(...)
|
||||
|
||||
#define DEF_QPEL(OPNAME)\
|
||||
void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
|
||||
@@ -191,8 +191,7 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uin
|
||||
#define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext
|
||||
#define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext
|
||||
|
||||
#define H264_MC_C_V_H_HV(OPNAME, SIZE, MMX, ALIGN) \
|
||||
H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
|
||||
#define H264_MC_V_H_HV(OPNAME, SIZE, MMX, ALIGN) \
|
||||
H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\
|
||||
H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
|
||||
H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\
|
||||
@@ -208,11 +207,11 @@ static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src,
|
||||
ff_avg_pixels16_sse2(dst, src, stride, 16);
|
||||
}
|
||||
|
||||
#define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
|
||||
av_unused static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
|
||||
{\
|
||||
ff_ ## OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\
|
||||
}\
|
||||
static void avg_h264_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src,
|
||||
ptrdiff_t stride)
|
||||
{
|
||||
ff_avg_pixels8_mmxext(dst, src, stride, 8);
|
||||
}
|
||||
|
||||
#define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \
|
||||
static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
|
||||
@@ -346,8 +345,7 @@ QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3)
|
||||
QPEL_H264_HV_XMM(put_, PUT_OP, ssse3)
|
||||
QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
|
||||
|
||||
H264_MC(H264_MC_C_V_H_HV, 4, mmxext, 8)
|
||||
H264_MC_C(avg_, 8, mmxext, 8)
|
||||
H264_MC(H264_MC_V_H_HV, 4, mmxext, 8)
|
||||
H264_MC_816(H264_MC_V, sse2)
|
||||
H264_MC_816(H264_MC_HV, sse2)
|
||||
H264_MC_816(H264_MC_H, ssse3)
|
||||
@@ -461,7 +459,8 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
|
||||
if (!high_bit_depth) {
|
||||
SET_QPEL_FUNCS_1PP(put_h264_qpel, 2, 4, mmxext, );
|
||||
c->avg_h264_qpel_pixels_tab[1][0] = avg_h264_qpel8_mc00_mmxext;
|
||||
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, );
|
||||
SET_QPEL_FUNCS_1PP(avg_h264_qpel, 2, 4, mmxext, );
|
||||
c->avg_h264_qpel_pixels_tab[2][0] = ff_avg_pixels4_mmxext;
|
||||
} else if (bit_depth == 10) {
|
||||
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_);
|
||||
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_);
|
||||
|
@@ -25,14 +25,30 @@
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION_RODATA 32
|
||||
|
||||
cextern pw_16
|
||||
cextern pw_5
|
||||
cextern pb_0
|
||||
|
||||
SECTION .text
|
||||
|
||||
; void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
; ptrdiff_t line_size)
|
||||
INIT_MMX mmxext
|
||||
cglobal avg_pixels4, 3,4
|
||||
lea r3, [r2*3]
|
||||
movh m0, [r1]
|
||||
movh m1, [r1+r2]
|
||||
movh m2, [r1+r2*2]
|
||||
movh m3, [r1+r3]
|
||||
pavgb m0, [r0]
|
||||
pavgb m1, [r0+r2]
|
||||
pavgb m2, [r0+r2*2]
|
||||
pavgb m3, [r0+r3]
|
||||
movh [r0], m0
|
||||
movh [r0+r2], m1
|
||||
movh [r0+r2*2], m2
|
||||
movh [r0+r3], m3
|
||||
RET
|
||||
|
||||
%macro op_avgh 3
|
||||
movh %3, %2
|
||||
|
Reference in New Issue
Block a user