FFmpeg/libavcodec/x86/dsputil_mmx.h

/*
 * MMX optimized DSP utils
 * Copyright (c) 2007  Aurelien Jacobs <aurel@gnuage.org>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#ifndef AVCODEC_X86_DSPUTIL_MMX_H
#define AVCODEC_X86_DSPUTIL_MMX_H

#include <stdint.h>
#include "libavcodec/dsputil.h"

typedef struct { uint64_t a, b; } xmm_reg;

extern const uint64_t ff_bone;
extern const uint64_t ff_wtwo;

extern const uint64_t ff_pdw_80000000[2];

extern const uint64_t ff_pw_3;
extern const xmm_reg  ff_pw_4;
extern const xmm_reg  ff_pw_5;
extern const xmm_reg  ff_pw_8;
extern const uint64_t ff_pw_15;
extern const xmm_reg  ff_pw_16;
extern const xmm_reg  ff_pw_18;
extern const uint64_t ff_pw_20;
extern const xmm_reg  ff_pw_27;
extern const xmm_reg  ff_pw_28;
extern const xmm_reg  ff_pw_32;
extern const uint64_t ff_pw_42;
extern const uint64_t ff_pw_53;
extern const xmm_reg  ff_pw_63;
extern const xmm_reg  ff_pw_64;
extern const uint64_t ff_pw_96;
extern const uint64_t ff_pw_128;
extern const uint64_t ff_pw_255;

extern const xmm_reg  ff_pb_1;
extern const xmm_reg  ff_pb_3;
extern const uint64_t ff_pb_7;
extern const uint64_t ff_pb_1F;
extern const uint64_t ff_pb_3F;
extern const uint64_t ff_pb_81;
extern const uint64_t ff_pb_A1;
extern const xmm_reg  ff_pb_F8;
extern const uint64_t ff_pb_FC;
extern const xmm_reg  ff_pb_FE;

extern const double ff_pd_1[2];
extern const double ff_pd_2[2];

#define LOAD4(stride,in,a,b,c,d)\
    "movq 0*"#stride"+"#in", "#a"\n\t"\
    "movq 1*"#stride"+"#in", "#b"\n\t"\
    "movq 2*"#stride"+"#in", "#c"\n\t"\
    "movq 3*"#stride"+"#in", "#d"\n\t"

#define STORE4(stride,out,a,b,c,d)\
    "movq "#a", 0*"#stride"+"#out"\n\t"\
    "movq "#b", 1*"#stride"+"#out"\n\t"\
    "movq "#c", 2*"#stride"+"#out"\n\t"\
    "movq "#d", 3*"#stride"+"#out"\n\t"

/* in/out: mma=mma+mmb, mmb=mmb-mma */
#define SUMSUB_BA( a, b ) \
    "paddw "#b", "#a" \n\t"\
    "paddw "#b", "#b" \n\t"\
    "psubw "#a", "#b" \n\t"

#define SBUTTERFLY(a,b,t,n,m)\
    "mov" #m " " #a ", " #t "         \n\t" /* abcd */\
    "punpckl" #n " " #b ", " #a "     \n\t" /* aebf */\
    "punpckh" #n " " #b ", " #t "     \n\t" /* cgdh */\

#define TRANSPOSE4(a,b,c,d,t)\
    SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\
    SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\
    SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
    SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */

// e,f,g,h can be memory
// out: a,d,t,c
#define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\
    "punpcklbw " #e ", " #a " \n\t" /* a0 e0 a1 e1 a2 e2 a3 e3 */\
    "punpcklbw " #f ", " #b " \n\t" /* b0 f0 b1 f1 b2 f2 b3 f3 */\
    "punpcklbw " #g ", " #c " \n\t" /* c0 g0 c1 g1 c2 g2 d3 g3 */\
    "punpcklbw " #h ", " #d " \n\t" /* d0 h0 d1 h1 d2 h2 d3 h3 */\
    SBUTTERFLY(a, b, t, bw, q)   /* a= a0 b0 e0 f0 a1 b1 e1 f1 */\
                                 /* t= a2 b2 e2 f2 a3 b3 e3 f3 */\
    SBUTTERFLY(c, d, b, bw, q)   /* c= c0 d0 g0 h0 c1 d1 g1 h1 */\
                                 /* b= c2 d2 g2 h2 c3 d3 g3 h3 */\
    SBUTTERFLY(a, c, d, wd, q)   /* a= a0 b0 c0 d0 e0 f0 g0 h0 */\
                                 /* d= a1 b1 c1 d1 e1 f1 g1 h1 */\
    SBUTTERFLY(t, b, c, wd, q)   /* t= a2 b2 c2 d2 e2 f2 g2 h2 */\
                                 /* c= a3 b3 c3 d3 e3 f3 g3 h3 */

#if ARCH_X86_64
// permutes 01234567 -> 05736421
#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
    SBUTTERFLY(a,b,%%xmm8,wd,dqa)\
    SBUTTERFLY(c,d,b,wd,dqa)\
    SBUTTERFLY(e,f,d,wd,dqa)\
    SBUTTERFLY(g,h,f,wd,dqa)\
    SBUTTERFLY(a,c,h,dq,dqa)\
    SBUTTERFLY(%%xmm8,b,c,dq,dqa)\
    SBUTTERFLY(e,g,b,dq,dqa)\
    SBUTTERFLY(d,f,g,dq,dqa)\
    SBUTTERFLY(a,e,f,qdq,dqa)\
    SBUTTERFLY(%%xmm8,d,e,qdq,dqa)\
    SBUTTERFLY(h,b,d,qdq,dqa)\
    SBUTTERFLY(c,g,b,qdq,dqa)\
    "movdqa %%xmm8, "#g"              \n\t"
#else
#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
    "movdqa "#h", "#t"                \n\t"\
    SBUTTERFLY(a,b,h,wd,dqa)\
    "movdqa "#h", 16"#t"              \n\t"\
    "movdqa "#t", "#h"                \n\t"\
    SBUTTERFLY(c,d,b,wd,dqa)\
    SBUTTERFLY(e,f,d,wd,dqa)\
    SBUTTERFLY(g,h,f,wd,dqa)\
    SBUTTERFLY(a,c,h,dq,dqa)\
    "movdqa "#h", "#t"                \n\t"\
    "movdqa 16"#t", "#h"              \n\t"\
    SBUTTERFLY(h,b,c,dq,dqa)\
    SBUTTERFLY(e,g,b,dq,dqa)\
    SBUTTERFLY(d,f,g,dq,dqa)\
    SBUTTERFLY(a,e,f,qdq,dqa)\
    SBUTTERFLY(h,d,e,qdq,dqa)\
    "movdqa "#h", 16"#t"              \n\t"\
    "movdqa "#t", "#h"                \n\t"\
    SBUTTERFLY(h,b,d,qdq,dqa)\
    SBUTTERFLY(c,g,b,qdq,dqa)\
    "movdqa 16"#t", "#g"              \n\t"
#endif

#define MOVQ_WONE(regd) \
    __asm__ volatile ( \
    "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
    "psrlw $15, %%" #regd ::)

void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);

void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);

void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx);
void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx);
void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);

void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd);

void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
                                   double *autoc);

void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block);


void ff_deinterlace_line_mmx(uint8_t *dst,
                             const uint8_t *lum_m4, const uint8_t *lum_m3,
                             const uint8_t *lum_m2, const uint8_t *lum_m1,
                             const uint8_t *lum,
                             int size);

void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
                                     const uint8_t *lum_m3,
                                     const uint8_t *lum_m2,
                                     const uint8_t *lum_m1,
                                     const uint8_t *lum, int size);

#endif /* AVCODEC_X86_DSPUTIL_MMX_H */
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-28 00:23:34 +02:00			`/*`
			`* MMX optimized DSP utils`
			`* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>`
			`*`
			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
			`* FFmpeg is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with FFmpeg; if not, write to the Free Software`
			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

Rename libavcodec/i386/ --> libavcodec/x86/. It contains optimizations that are not specific to i386 and libavutil uses this naming scheme already. Originally committed as revision 16270 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-22 11:12:42 +02:00			`#ifndef AVCODEC_X86_DSPUTIL_MMX_H`
			`#define AVCODEC_X86_DSPUTIL_MMX_H`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-28 00:23:34 +02:00
add required include to make this file self-contained Originally committed as revision 11211 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-12-13 00:45:03 +02:00			`#include <stdint.h>`
Use full path for #includes from another directory. Originally committed as revision 13098 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-05-09 14:56:36 +03:00			`#include "libavcodec/dsputil.h"`
add required include to make this file self-contained Originally committed as revision 11211 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-12-13 00:45:03 +02:00
avoid POSIX reserved _t suffix Originally committed as revision 16117 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-14 02:48:16 +02:00			`typedef struct { uint64_t a, b; } xmm_reg;`
clean up an ugliness introduced in r11826. this syntax will require fewer changes when adding future sse2 code. Originally committed as revision 11868 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-02-05 03:16:48 +02:00
use ff_ prefix for extern vars Originally committed as revision 11101 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-28 00:36:15 +02:00			`extern const uint64_t ff_bone;`
			`extern const uint64_t ff_wtwo;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-28 00:23:34 +02:00
			`extern const uint64_t ff_pdw_80000000[2];`

			`extern const uint64_t ff_pw_3;`
Make ff_pw_4 128 bits Originally committed as revision 24207 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-12 01:52:55 +03:00			`extern const xmm_reg ff_pw_4;`
avoid POSIX reserved _t suffix Originally committed as revision 16117 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-14 02:48:16 +02:00			`extern const xmm_reg ff_pw_5;`
			`extern const xmm_reg ff_pw_8;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-28 00:23:34 +02:00			`extern const uint64_t ff_pw_15;`
avoid POSIX reserved _t suffix Originally committed as revision 16117 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-14 02:48:16 +02:00			`extern const xmm_reg ff_pw_16;`
Add header declarations for mmx/sse constants missing them Originally committed as revision 24381 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-21 13:02:07 +03:00			`extern const xmm_reg ff_pw_18;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-28 00:23:34 +02:00			`extern const uint64_t ff_pw_20;`
Add header declarations for mmx/sse constants missing them Originally committed as revision 24381 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-21 13:02:07 +03:00			`extern const xmm_reg ff_pw_27;`
avoid POSIX reserved _t suffix Originally committed as revision 16117 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-14 02:48:16 +02:00			`extern const xmm_reg ff_pw_28;`
			`extern const xmm_reg ff_pw_32;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-28 00:23:34 +02:00			`extern const uint64_t ff_pw_42;`
Add header declarations for mmx/sse constants missing them Originally committed as revision 24381 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-21 13:02:07 +03:00			`extern const uint64_t ff_pw_53;`
			`extern const xmm_reg ff_pw_63;`
convert ff_pw_64 into an xmm_reg for future use in vp6 sse code Originally committed as revision 17192 to svn://svn.ffmpeg.org/ffmpeg/trunk 2009-02-13 01:48:07 +02:00			`extern const xmm_reg ff_pw_64;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-28 00:23:34 +02:00			`extern const uint64_t ff_pw_96;`
			`extern const uint64_t ff_pw_128;`
split encoding part of dsputil_mmx into its own file Originally committed as revision 12223 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-02-26 01:14:22 +02:00			`extern const uint64_t ff_pw_255;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-28 00:23:34 +02:00
VP8 H/V inner loopfilter MMX/MMXEXT/SSE2 optimizations. Originally committed as revision 24250 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-16 02:02:34 +03:00			`extern const xmm_reg ff_pb_1;`
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264 Originally committed as revision 23783 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-06-25 21:25:49 +03:00			`extern const xmm_reg ff_pb_3;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-28 00:23:34 +02:00			`extern const uint64_t ff_pb_7;`
MMX VP3 Loop Filter Originally committed as revision 15630 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-10-17 06:18:08 +03:00			`extern const uint64_t ff_pb_1F;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-28 00:23:34 +02:00			`extern const uint64_t ff_pb_3F;`
MMX VP3 Loop Filter Originally committed as revision 15630 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-10-17 06:18:08 +03:00			`extern const uint64_t ff_pb_81;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-28 00:23:34 +02:00			`extern const uint64_t ff_pb_A1;`
Add header declarations for mmx/sse constants missing them Originally committed as revision 24381 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-21 13:02:07 +03:00			`extern const xmm_reg ff_pb_F8;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-28 00:23:34 +02:00			`extern const uint64_t ff_pb_FC;`
Add header declarations for mmx/sse constants missing them Originally committed as revision 24381 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-21 13:02:07 +03:00			`extern const xmm_reg ff_pb_FE;`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-28 00:23:34 +02:00
			`extern const double ff_pd_1[2];`
			`extern const double ff_pd_2[2];`

Make LOAD4/STORE4 macros more generic. Patch by Victor Pollex victor pollex web de Original thread: [PATCH] mmx implementation of vc-1 inverse transformations Date: 06/21/2008 03:37 PM Originally committed as revision 14108 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-07-08 12:24:11 +03:00			`#define LOAD4(stride,in,a,b,c,d)\`
			`"movq 0*"#stride"+"#in", "#a"\n\t"\`
			`"movq 1*"#stride"+"#in", "#b"\n\t"\`
			`"movq 2*"#stride"+"#in", "#c"\n\t"\`
			`"movq 3*"#stride"+"#in", "#d"\n\t"`

			`#define STORE4(stride,out,a,b,c,d)\`
			`"movq "#a", 0*"#stride"+"#out"\n\t"\`
			`"movq "#b", 1*"#stride"+"#out"\n\t"\`
			`"movq "#c", 2*"#stride"+"#out"\n\t"\`
			`"movq "#d", 3*"#stride"+"#out"\n\t"`

Factorize some duplicated code from CAVS and H.264 into a common file. patch by Christophe Gisquet, christophe.gisquet free fr Originally committed as revision 11504 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-01-11 10:29:58 +02:00			`/* in/out: mma=mma+mmb, mmb=mmb-mma */`
			`#define SUMSUB_BA( a, b ) \`
			`"paddw "#b", "#a" \n\t"\`
			`"paddw "#b", "#b" \n\t"\`
			`"psubw "#a", "#b" \n\t"`

			`#define SBUTTERFLY(a,b,t,n,m)\`
			`"mov" #m " " #a ", " #t " \n\t" /* abcd */\`
			`"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\`
			`"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\`

			`#define TRANSPOSE4(a,b,c,d,t)\`
			`SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\`
			`SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\`
			`SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\`
			`SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */`

MMX VP3 Loop Filter Originally committed as revision 15630 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-10-17 06:18:08 +03:00			`// e,f,g,h can be memory`
			`// out: a,d,t,c`
			`#define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\`
			`"punpcklbw " #e ", " #a " \n\t" /* a0 e0 a1 e1 a2 e2 a3 e3 */\`
			`"punpcklbw " #f ", " #b " \n\t" /* b0 f0 b1 f1 b2 f2 b3 f3 */\`
			`"punpcklbw " #g ", " #c " \n\t" /* c0 g0 c1 g1 c2 g2 d3 g3 */\`
			`"punpcklbw " #h ", " #d " \n\t" /* d0 h0 d1 h1 d2 h2 d3 h3 */\`
			`SBUTTERFLY(a, b, t, bw, q) /* a= a0 b0 e0 f0 a1 b1 e1 f1 */\`
			`/* t= a2 b2 e2 f2 a3 b3 e3 f3 */\`
			`SBUTTERFLY(c, d, b, bw, q) /* c= c0 d0 g0 h0 c1 d1 g1 h1 */\`
			`/* b= c2 d2 g2 h2 c3 d3 g3 h3 */\`
			`SBUTTERFLY(a, c, d, wd, q) /* a= a0 b0 c0 d0 e0 f0 g0 h0 */\`
			`/* d= a1 b1 c1 d1 e1 f1 g1 h1 */\`
			`SBUTTERFLY(t, b, c, wd, q) /* t= a2 b2 c2 d2 e2 f2 g2 h2 */\`
			`/* c= a3 b3 c3 d3 e3 f3 g3 h3 */`

Change semantic of CONFIG_, HAVE_ and ARCH_*. They are now always defined to either 0 or 1. Originally committed as revision 16590 to svn://svn.ffmpeg.org/ffmpeg/trunk 2009-01-14 01:44:16 +02:00			`#if ARCH_X86_64`
Factorize some duplicated code from CAVS and H.264 into a common file. patch by Christophe Gisquet, christophe.gisquet free fr Originally committed as revision 11504 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-01-11 10:29:58 +02:00			`// permutes 01234567 -> 05736421`
			`#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\`
			`SBUTTERFLY(a,b,%%xmm8,wd,dqa)\`
			`SBUTTERFLY(c,d,b,wd,dqa)\`
			`SBUTTERFLY(e,f,d,wd,dqa)\`
			`SBUTTERFLY(g,h,f,wd,dqa)\`
			`SBUTTERFLY(a,c,h,dq,dqa)\`
			`SBUTTERFLY(%%xmm8,b,c,dq,dqa)\`
			`SBUTTERFLY(e,g,b,dq,dqa)\`
			`SBUTTERFLY(d,f,g,dq,dqa)\`
			`SBUTTERFLY(a,e,f,qdq,dqa)\`
			`SBUTTERFLY(%%xmm8,d,e,qdq,dqa)\`
			`SBUTTERFLY(h,b,d,qdq,dqa)\`
			`SBUTTERFLY(c,g,b,qdq,dqa)\`
			`"movdqa %%xmm8, "#g" \n\t"`
			`#else`
			`#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\`
			`"movdqa "#h", "#t" \n\t"\`
			`SBUTTERFLY(a,b,h,wd,dqa)\`
			`"movdqa "#h", 16"#t" \n\t"\`
			`"movdqa "#t", "#h" \n\t"\`
			`SBUTTERFLY(c,d,b,wd,dqa)\`
			`SBUTTERFLY(e,f,d,wd,dqa)\`
			`SBUTTERFLY(g,h,f,wd,dqa)\`
			`SBUTTERFLY(a,c,h,dq,dqa)\`
			`"movdqa "#h", "#t" \n\t"\`
			`"movdqa 16"#t", "#h" \n\t"\`
			`SBUTTERFLY(h,b,c,dq,dqa)\`
			`SBUTTERFLY(e,g,b,dq,dqa)\`
			`SBUTTERFLY(d,f,g,dq,dqa)\`
			`SBUTTERFLY(a,e,f,qdq,dqa)\`
			`SBUTTERFLY(h,d,e,qdq,dqa)\`
			`"movdqa "#h", 16"#t" \n\t"\`
			`"movdqa "#t", "#h" \n\t"\`
			`SBUTTERFLY(h,b,d,qdq,dqa)\`
			`SBUTTERFLY(c,g,b,qdq,dqa)\`
			`"movdqa 16"#t", "#g" \n\t"`
			`#endif`

split encoding part of dsputil_mmx into its own file Originally committed as revision 12223 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-02-26 01:14:22 +02:00			`#define MOVQ_WONE(regd) \`
Convert asm keyword into __asm__. Neither the asm() nor the __asm__() keyword is part of the C99 standard, but while GCC accepts the former in C89 syntax, it is not accepted in C99 unless GNU extensions are turned on (with -fasm). The latter form is accepted in any syntax as an extension (without requiring further command-line options). Sun Studio C99 compiler also does not accept asm() while accepting __asm__(), albeit reporting warnings that it's not valid C99 syntax. Originally committed as revision 15627 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-10-16 16:34:09 +03:00			`__asm__ volatile ( \`
split encoding part of dsputil_mmx into its own file Originally committed as revision 12223 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-02-26 01:14:22 +02:00			`"pcmpeqd %%" #regd ", %%" #regd " \n\t" \`
			`"psrlw $15, %%" #regd ::)`

			`void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);`
Move declarations of some mmx functions to dsputil_mmx.h Originally committed as revision 19739 to svn://svn.ffmpeg.org/ffmpeg/trunk 2009-08-29 19:55:50 +03:00			`void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);`

			`void add_pixels_clamped_mmx(const DCTELEM block, uint8_t pixels, int line_size);`
			`void put_pixels_clamped_mmx(const DCTELEM block, uint8_t pixels, int line_size);`
			`void put_signed_pixels_clamped_mmx(const DCTELEM block, uint8_t pixels, int line_size);`
split encoding part of dsputil_mmx into its own file Originally committed as revision 12223 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-02-26 01:14:22 +02:00
x86: move function prototypes to header files Originally committed as revision 22266 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-03-07 00:37:08 +02:00			`void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx);`
			`void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx);`
			`void ff_put_cavs_qpel8_mc00_mmx2(uint8_t dst, uint8_t src, int stride);`
			`void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t dst, uint8_t src, int stride);`
			`void ff_put_cavs_qpel16_mc00_mmx2(uint8_t dst, uint8_t src, int stride);`
			`void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t dst, uint8_t src, int stride);`

			`void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);`
			`void ff_put_vc1_mspel_mc00_mmx(uint8_t dst, const uint8_t src, int stride, int rnd);`
			`void ff_avg_vc1_mspel_mc00_mmx2(uint8_t dst, const uint8_t src, int stride, int rnd);`

			`void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,`
			`double *autoc);`

			`void ff_mmx_idct(DCTELEM *block);`
			`void ff_mmxext_idct(DCTELEM *block);`

Convert deinterlacing MMX code to YASM Originally committed as revision 24615 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-31 17:50:51 +03:00
			`void ff_deinterlace_line_mmx(uint8_t *dst,`
			`const uint8_t lum_m4, const uint8_t lum_m3,`
			`const uint8_t lum_m2, const uint8_t lum_m1,`
			`const uint8_t *lum,`
			`int size);`

			`void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,`
			`const uint8_t *lum_m3,`
			`const uint8_t *lum_m2,`
			`const uint8_t *lum_m1,`
			`const uint8_t *lum, int size);`

Rename libavcodec/i386/ --> libavcodec/x86/. It contains optimizations that are not specific to i386 and libavutil uses this naming scheme already. Originally committed as revision 16270 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-22 11:12:42 +02:00			`#endif /* AVCODEC_X86_DSPUTIL_MMX_H */`