Add x86 asm functions for VP8 put_pixels

Originally committed as revision 23858 to svn://svn.ffmpeg.org/ffmpeg/trunk
2025-07-06 06:27:36 +02:00 · 2010-06-28 19:14:40 +00:00
parent a173aa8940
commit 0fecad09fe
2 changed files with 59 additions and 0 deletions
--- a/libavcodec/x86/vp8dsp-init.c
+++ b/libavcodec/x86/vp8dsp-init.c
@ -87,6 +87,16 @@ extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
                                          uint8_t *src, int srcstride,
                                          int height, int mx, int my);
 extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride,
                                    uint8_t *src, int srcstride,
                                    int height, int mx, int my);
 extern void ff_put_vp8_pixels16_mmx(uint8_t *dst, int dststride,
                                    uint8_t *src, int srcstride,
                                    int height, int mx, int my);
 extern void ff_put_vp8_pixels16_sse(uint8_t *dst, int dststride,
                                    uint8_t *src, int srcstride,
                                    int height, int mx, int my);
 #define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
 static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
    uint8_t *dst,  int dststride, uint8_t *src, \
@ -218,6 +228,10 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
 #if HAVE_YASM
    if (mm_flags & FF_MM_MMX) {
        c->vp8_idct_dc_add                  = ff_vp8_idct_dc_add_mmx;
        c->put_vp8_epel_pixels_tab[0][0][0]     =
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
        c->put_vp8_epel_pixels_tab[1][0][0]     =
        c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
    }
    /* note that 4-tap width=16 functions are missing because w=16
@ -231,6 +245,11 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
        VP8_BILINEAR_MC_FUNC(1, 4, mmxext);
    }
    if (mm_flags & FF_MM_SSE) {
        c->put_vp8_epel_pixels_tab[0][0][0]     =
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
    }
    if (mm_flags & FF_MM_SSE2) {
        VP8_LUMA_MC_FUNC(0, 16, sse2);
        VP8_MC_FUNC(1, 8, sse2);
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@ -810,6 +810,46 @@ cglobal put_vp8_bilinear8_h_ssse3, 7,7,5
    jg .nextrow
    REP_RET
 cglobal put_vp8_pixels8_mmx, 5,5
 .nextrow:
    movq  mm0, [r2+r3*0]
    movq  mm1, [r2+r3*1]
    lea    r2, [r2+r3*2]
    movq [r0+r1*0], mm0
    movq [r0+r1*1], mm1
    lea    r0, [r0+r1*2]
    sub   r4d, 2
    jg .nextrow
    REP_RET
 cglobal put_vp8_pixels16_mmx, 5,5
 .nextrow:
    movq  mm0, [r2+r3*0+0]
    movq  mm1, [r2+r3*0+8]
    movq  mm2, [r2+r3*1+0]
    movq  mm3, [r2+r3*1+8]
    lea    r2, [r2+r3*2]
    movq [r0+r1*0+0], mm0
    movq [r0+r1*0+8], mm1
    movq [r0+r1*1+0], mm2
    movq [r0+r1*1+8], mm3
    lea    r0, [r0+r1*2]
    sub   r4d, 2
    jg .nextrow
    REP_RET
 cglobal put_vp8_pixels16_sse, 5,5,2
 .nextrow:
    movups xmm0, [r2+r3*0]
    movups xmm1, [r2+r3*1]
    lea     r2, [r2+r3*2]
    movaps [r0+r1*0], xmm0
    movaps [r0+r1*1], xmm1
    lea     r0, [r0+r1*2]
    sub    r4d, 2
    jg .nextrow
    REP_RET
 ;-----------------------------------------------------------------------------
 ; IDCT functions:
 ;