From 2734ba787b4a2cbc44bbc6499ae82013c790f453 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 26 Jul 2012 22:07:29 -0700 Subject: [PATCH 01/35] vp56: port x86 simd to cpuflags. --- libavcodec/x86/vp56dsp.asm | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/libavcodec/x86/vp56dsp.asm b/libavcodec/x86/vp56dsp.asm index 66a97f1593..27a82bccab 100644 --- a/libavcodec/x86/vp56dsp.asm +++ b/libavcodec/x86/vp56dsp.asm @@ -27,7 +27,8 @@ cextern pw_64 SECTION .text -%macro DIAG4_MMX 6 +%macro DIAG4 6 +%if mmsize == 8 movq m0, [%1+%2] movq m1, [%1+%3] movq m3, m0 @@ -64,9 +65,7 @@ SECTION .text psraw m3, 7 packuswb m0, m3 movq [%6], m0 -%endmacro - -%macro DIAG4_SSE2 6 +%else ; mmsize == 16 movq m0, [%1+%2] movq m1, [%1+%3] punpcklbw m0, m7 @@ -86,9 +85,11 @@ SECTION .text psraw m0, 7 packuswb m0, m0 movq [%6], m0 +%endif ; mmsize == 8/16 %endmacro -%macro SPLAT4REGS_MMX 0 +%macro SPLAT4REGS 0 +%if mmsize == 8 movq m5, m3 punpcklwd m3, m3 movq m4, m3 @@ -102,9 +103,7 @@ SECTION .text movq [rsp+8*12], m4 movq [rsp+8*13], m5 movq [rsp+8*14], m2 -%endmacro - -%macro SPLAT4REGS_SSE2 0 +%else ; mmsize == 16 pshuflw m4, m3, 0x0 pshuflw m5, m3, 0x55 pshuflw m6, m3, 0xAA @@ -113,15 +112,16 @@ SECTION .text punpcklqdq m5, m5 punpcklqdq m6, m6 punpcklqdq m3, m3 +%endif ; mmsize == 8/16 %endmacro -%macro vp6_filter_diag4 2 +%macro vp6_filter_diag4 0 ; void ff_vp6_filter_diag4_(uint8_t *dst, uint8_t *src, int stride, ; const int16_t h_weight[4], const int16_t v_weights[4]) -cglobal vp6_filter_diag4_%1, 5, 7, %2 +cglobal vp6_filter_diag4, 5, 7, 8 mov r5, rsp ; backup stack pointer and rsp, ~(mmsize-1) ; align stack -%ifidn %1, sse2 +%if mmsize == 16 sub rsp, 8*11 %else sub rsp, 8*15 @@ -162,12 +162,8 @@ cglobal vp6_filter_diag4_%1, 5, 7, %2 RET %endmacro -INIT_MMX -%define DIAG4 DIAG4_MMX -%define SPLAT4REGS SPLAT4REGS_MMX -vp6_filter_diag4 mmx, 0 +INIT_MMX mmx +vp6_filter_diag4 -INIT_XMM -%define DIAG4 DIAG4_SSE2 -%define SPLAT4REGS SPLAT4REGS_SSE2 -vp6_filter_diag4 sse2, 8 +INIT_XMM sse2 +vp6_filter_diag4 From 158744a4cd63a8dce2060b366ae7b6509351d6c8 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 26 Jul 2012 22:09:46 -0700 Subject: [PATCH 02/35] vp56: only compile MMX SIMD on x86-32. All x86-64 CPUs have SSE2, so the MMX version will never be used. This leads to smaller binaries. --- libavcodec/x86/vp56dsp.asm | 2 ++ libavcodec/x86/vp56dsp_init.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/libavcodec/x86/vp56dsp.asm b/libavcodec/x86/vp56dsp.asm index 27a82bccab..ca4d97ec15 100644 --- a/libavcodec/x86/vp56dsp.asm +++ b/libavcodec/x86/vp56dsp.asm @@ -162,8 +162,10 @@ cglobal vp6_filter_diag4, 5, 7, 8 RET %endmacro +%if ARCH_X86_32 INIT_MMX mmx vp6_filter_diag4 +%endif INIT_XMM sse2 vp6_filter_diag4 diff --git a/libavcodec/x86/vp56dsp_init.c b/libavcodec/x86/vp56dsp_init.c index 29892812ac..ae04440611 100644 --- a/libavcodec/x86/vp56dsp_init.c +++ b/libavcodec/x86/vp56dsp_init.c @@ -36,9 +36,11 @@ av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum CodecID codec) int mm_flags = av_get_cpu_flags(); if (CONFIG_VP6_DECODER && codec == CODEC_ID_VP6) { +#if ARCH_X86_32 if (mm_flags & AV_CPU_FLAG_MMX) { c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; } +#endif if (mm_flags & AV_CPU_FLAG_SSE2) { c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2; From 76888c64b008bc3acf6e5fe5117a360f2c87aae4 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 26 Jul 2012 22:19:19 -0700 Subject: [PATCH 03/35] rv34: port x86 SIMD to cpuflags. --- libavcodec/x86/rv34dsp.asm | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm index 32bcdced8a..c43b77abd2 100644 --- a/libavcodec/x86/rv34dsp.asm +++ b/libavcodec/x86/rv34dsp.asm @@ -46,7 +46,7 @@ SECTION .text %endmacro %macro rv34_idct 1 -cglobal rv34_idct_%1_mmx2, 1, 2, 0 +cglobal rv34_idct_%1, 1, 2, 0 movsx r1, word [r0] IDCT_DC r1 movd m0, r1 @@ -58,14 +58,15 @@ cglobal rv34_idct_%1_mmx2, 1, 2, 0 REP_RET %endmacro -INIT_MMX +INIT_MMX mmx2 %define IDCT_DC IDCT_DC_ROUND rv34_idct dc %define IDCT_DC IDCT_DC_NOROUND rv34_idct dc_noround ; ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc); -cglobal rv34_idct_dc_add_mmx, 3, 3 +INIT_MMX mmx +cglobal rv34_idct_dc_add, 3, 3 ; calculate DC IDCT_DC_ROUND r2 pxor m1, m1 @@ -167,8 +168,8 @@ cglobal rv34_idct_add, 3,3,0, d, s, b ret ; ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc); -INIT_XMM -cglobal rv34_idct_dc_add_sse4, 3, 3, 6 +INIT_XMM sse4 +cglobal rv34_idct_dc_add, 3, 3, 6 ; load data IDCT_DC_ROUND r2 pxor m1, m1 From 41ecbbc7aa3d491cc9d8d425634c5901eefa088a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 27 Jul 2012 20:22:51 +0300 Subject: [PATCH 04/35] tls: Return AVERROR_EOF if the TLS_read/write functions return 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenSSL returns 0 when the peer has closed the connection. GnuTLS doesn't return that though, but returns GNUTLS_E_UNEXPECTED_PACKET_LENGTH if the connection simply is closed without a clean close notify packet. Tested-by: Antti Seppälä Signed-off-by: Martin Storsjö --- libavformat/tls.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavformat/tls.c b/libavformat/tls.c index fb84fa82b6..866e55f2ba 100644 --- a/libavformat/tls.c +++ b/libavformat/tls.c @@ -209,7 +209,7 @@ static int tls_read(URLContext *h, uint8_t *buf, int size) if (ret > 0) return ret; if (ret == 0) - return AVERROR(EIO); + return AVERROR_EOF; if ((ret = do_tls_poll(h, ret)) < 0) return ret; } @@ -224,7 +224,7 @@ static int tls_write(URLContext *h, const uint8_t *buf, int size) if (ret > 0) return ret; if (ret == 0) - return AVERROR(EIO); + return AVERROR_EOF; if ((ret = do_tls_poll(h, ret)) < 0) return ret; } From 8ebacfb598933f66ff34ad1b12cb1aa14928ac73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 27 Jul 2012 20:24:53 +0300 Subject: [PATCH 05/35] hls: Proceed to the next segment at any error code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, we returned any error code except AVERROR_EOF to the caller - only if AVERROR_EOF or 0 was returned, we proceeded to the next segment. With some setups of web servers, using Connection: close in https and GnuTLS, we don't get a clean error code at the end of segments. In those cases, just proceed to the next segment. Tested-by: Antti Seppälä Signed-off-by: Martin Storsjö --- libavformat/hls.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/libavformat/hls.c b/libavformat/hls.c index 253463edf6..cda65cd362 100644 --- a/libavformat/hls.c +++ b/libavformat/hls.c @@ -420,8 +420,6 @@ reload: ret = ffurl_read(v->input, buf, buf_size); if (ret > 0) return ret; - if (ret < 0 && ret != AVERROR_EOF) - return ret; ffurl_close(v->input); v->input = NULL; v->cur_seq_no++; From 755834e94f46eb1c0cdba28cccf2475ecec3662b Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Wed, 25 Jul 2012 13:25:58 +0200 Subject: [PATCH 06/35] build: Rename YASMDEP variable to DEPYASM for consistency --- Makefile | 2 +- configure | 2 +- library.mak | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 3716e9ee84..37faa76fda 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ ifndef V Q = @ ECHO = printf "$(1)\t%s\n" $(2) BRIEF = CC AS YASM AR LD HOSTCC -SILENT = DEPCC DEPAS DEPHOSTCC YASMDEP RM RANLIB +SILENT = DEPCC DEPAS DEPHOSTCC DEPYASM RM RANLIB MSG = $@ M = @$(call ECHO,$(TAG),$@); $(foreach VAR,$(BRIEF), \ diff --git a/configure b/configure index 715e49b020..6945152dae 100755 --- a/configure +++ b/configure @@ -3411,7 +3411,7 @@ DEPCCFLAGS=$DEPCCFLAGS \$(CPPFLAGS) DEPAS=$as DEPASFLAGS=$DEPASFLAGS \$(CPPFLAGS) YASM=$yasmexe -YASMDEP=$yasmexe +DEPYASM=$yasmexe AR=$ar RANLIB=$ranlib LN_S=$ln_s diff --git a/library.mak b/library.mak index 6159799c5d..cd3fdc28f9 100644 --- a/library.mak +++ b/library.mak @@ -17,7 +17,7 @@ $(SUBDIR)%-test.o: $(SUBDIR)%.c $(COMPILE_C) $(SUBDIR)x86/%.o: $(SUBDIR)x86/%.asm - $(YASMDEP) $(YASMFLAGS) -I $( $(@:.o=.d) + $(DEPYASM) $(YASMFLAGS) -I $( $(@:.o=.d) $(YASM) $(YASMFLAGS) -I $( Date: Sat, 21 Jul 2012 21:28:54 +0200 Subject: [PATCH 07/35] x11grab: cosmetics: consistent naming for x11grab-related things --- configure | 2 +- libavdevice/Makefile | 2 +- libavdevice/alldevices.c | 2 +- libavdevice/avdevice.h | 2 +- libavdevice/x11grab.c | 25 ++++++++++++------------- 5 files changed, 16 insertions(+), 17 deletions(-) diff --git a/configure b/configure index 6945152dae..7e2f6baebc 100755 --- a/configure +++ b/configure @@ -1545,7 +1545,7 @@ sndio_outdev_deps="sndio_h" v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h" vfwcap_indev_deps="capCreateCaptureWindow vfwcap_defines" vfwcap_indev_extralibs="-lavicap32" -x11_grab_device_indev_deps="x11grab XShmCreateImage" +x11grab_indev_deps="x11grab XShmCreateImage" # protocols ffrtmpcrypt_protocol_deps="!librtmp_protocol" diff --git a/libavdevice/Makefile b/libavdevice/Makefile index 7f142176e6..76d11c1f1c 100644 --- a/libavdevice/Makefile +++ b/libavdevice/Makefile @@ -23,7 +23,7 @@ OBJS-$(CONFIG_SNDIO_INDEV) += sndio_common.o sndio_dec.o OBJS-$(CONFIG_SNDIO_OUTDEV) += sndio_common.o sndio_enc.o OBJS-$(CONFIG_V4L2_INDEV) += v4l2.o OBJS-$(CONFIG_VFWCAP_INDEV) += vfwcap.o -OBJS-$(CONFIG_X11_GRAB_DEVICE_INDEV) += x11grab.o +OBJS-$(CONFIG_X11GRAB_INDEV) += x11grab.o # external libraries OBJS-$(CONFIG_LIBCDIO_INDEV) += libcdio.o diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c index f1ff79046c..9ec9fdfc49 100644 --- a/libavdevice/alldevices.c +++ b/libavdevice/alldevices.c @@ -49,7 +49,7 @@ void avdevice_register_all(void) REGISTER_INOUTDEV (SNDIO, sndio); REGISTER_INDEV (V4L2, v4l2); REGISTER_INDEV (VFWCAP, vfwcap); - REGISTER_INDEV (X11_GRAB_DEVICE, x11_grab_device); + REGISTER_INDEV (X11GRAB, x11grab); /* external libraries */ REGISTER_INDEV (LIBCDIO, libcdio); diff --git a/libavdevice/avdevice.h b/libavdevice/avdevice.h index 0efe2a7c38..39166a570a 100644 --- a/libavdevice/avdevice.h +++ b/libavdevice/avdevice.h @@ -36,7 +36,7 @@ * (de)muxers in libavdevice are of the AVFMT_NOFILE type (they use their own * I/O functions). The filename passed to avformat_open_input() often does not * refer to an actually existing file, but has some special device-specific - * meaning - e.g. for the x11grab device it is the display name. + * meaning - e.g. for x11grab it is the display name. * * To use libavdevice, simply call avdevice_register_all() to register all * compiled muxers and demuxers. They all use standard libavformat API. diff --git a/libavdevice/x11grab.c b/libavdevice/x11grab.c index 06cec9daef..dd432b3fd5 100644 --- a/libavdevice/x11grab.c +++ b/libavdevice/x11grab.c @@ -56,8 +56,7 @@ /** * X11 Device Demuxer context */ -struct x11_grab -{ +struct x11grab { const AVClass *class; /**< Class for private options. */ int frame_size; /**< Size in bytes of a grabbed frame */ AVRational time_base; /**< Time base */ @@ -85,10 +84,10 @@ struct x11_grab /** * Draw grabbing region window * - * @param s x11_grab context + * @param s x11grab context */ static void -x11grab_draw_region_win(struct x11_grab *s) +x11grab_draw_region_win(struct x11grab *s) { Display *dpy = s->dpy; int screen; @@ -110,10 +109,10 @@ x11grab_draw_region_win(struct x11_grab *s) /** * Initialize grabbing region window * - * @param s x11_grab context + * @param s x11grab context */ static void -x11grab_region_win_init(struct x11_grab *s) +x11grab_region_win_init(struct x11grab *s) { Display *dpy = s->dpy; int screen; @@ -155,7 +154,7 @@ x11grab_region_win_init(struct x11_grab *s) static int x11grab_read_header(AVFormatContext *s1) { - struct x11_grab *x11grab = s1->priv_data; + struct x11grab *x11grab = s1->priv_data; Display *dpy; AVStream *st = NULL; enum PixelFormat input_pixfmt; @@ -334,7 +333,7 @@ out: * coordinates */ static void -paint_mouse_pointer(XImage *image, struct x11_grab *s) +paint_mouse_pointer(XImage *image, struct x11grab *s) { int x_off = s->x_off; int y_off = s->y_off; @@ -448,7 +447,7 @@ xget_zpixmap(Display *dpy, Drawable d, XImage *image, int x, int y) static int x11grab_read_packet(AVFormatContext *s1, AVPacket *pkt) { - struct x11_grab *s = s1->priv_data; + struct x11grab *s = s1->priv_data; Display *dpy = s->dpy; XImage *image = s->image; int x_off = s->x_off; @@ -558,7 +557,7 @@ x11grab_read_packet(AVFormatContext *s1, AVPacket *pkt) static int x11grab_read_close(AVFormatContext *s1) { - struct x11_grab *x11grab = s1->priv_data; + struct x11grab *x11grab = s1->priv_data; /* Detach cleanly from shared mem */ if (x11grab->use_shm) { @@ -582,7 +581,7 @@ x11grab_read_close(AVFormatContext *s1) return 0; } -#define OFFSET(x) offsetof(struct x11_grab, x) +#define OFFSET(x) offsetof(struct x11grab, x) #define DEC AV_OPT_FLAG_DECODING_PARAM static const AVOption options[] = { { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), AV_OPT_TYPE_STRING, {.str = "vga"}, 0, 0, DEC }, @@ -603,10 +602,10 @@ static const AVClass x11_class = { }; /** x11 grabber device demuxer declaration */ -AVInputFormat ff_x11_grab_device_demuxer = { +AVInputFormat ff_x11grab_demuxer = { .name = "x11grab", .long_name = NULL_IF_CONFIG_SMALL("X11grab"), - .priv_data_size = sizeof(struct x11_grab), + .priv_data_size = sizeof(struct x11grab), .read_header = x11grab_read_header, .read_packet = x11grab_read_packet, .read_close = x11grab_read_close, From 42ade117dd8b5ab76d5331bd193fcfefe0b96385 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Fri, 27 Jul 2012 12:06:01 +0200 Subject: [PATCH 08/35] build: Use portable compiler flag constructs in header compilation rule --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 37faa76fda..5a7d5917e2 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,7 @@ COMPILE_S = $(call COMPILE,AS) $(COMPILE_S) %.ho: %.h - $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ -x c $< + $(CC) $(CCFLAGS) -c $(CC_O) -x c $< %.ver: %.v $(Q)sed 's/$$MAJOR/$($(basename $(@F))_VERSION_MAJOR)/' $^ > $@ From 4a26fdd8520d5ad7ea6458854610521bbda880d5 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Fri, 27 Jul 2012 15:17:27 -0700 Subject: [PATCH 09/35] vp3: port x86 SIMD to cpuflags. --- libavcodec/x86/vp3dsp.asm | 94 +++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm index af2f60c6ae..5877520c6c 100644 --- a/libavcodec/x86/vp3dsp.asm +++ b/libavcodec/x86/vp3dsp.asm @@ -102,8 +102,8 @@ SECTION .text mov [r0+r3 -1], r2w %endmacro -INIT_MMX -cglobal vp3_v_loop_filter_mmx2, 3, 4 +INIT_MMX mmx2 +cglobal vp3_v_loop_filter, 3, 4 %if ARCH_X86_64 movsxd r1, r1d %endif @@ -120,7 +120,7 @@ cglobal vp3_v_loop_filter_mmx2, 3, 4 movq [r0 ], m3 RET -cglobal vp3_h_loop_filter_mmx2, 3, 4 +cglobal vp3_h_loop_filter, 3, 4 %if ARCH_X86_64 movsxd r1, r1d %endif @@ -354,38 +354,6 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4 movq I(2), m2 %endmacro -%macro VP3_IDCT_mmx 1 - ; eax = quantized input - ; ebx = dequantizer matrix - ; ecx = IDCT constants - ; M(I) = ecx + MaskOffset(0) + I * 8 - ; C(I) = ecx + CosineOffset(32) + (I-1) * 8 - ; edx = output - ; r0..r7 = mm0..mm7 -%define OC_8 [pw_8] -%define C(x) [vp3_idct_data+16*(x-1)] - - ; at this point, function has completed dequantization + dezigzag + - ; partial transposition; now do the idct itself -%define I(x) [%1+16* x ] -%define J(x) [%1+16*(x-4)+8] - RowIDCT - Transpose - -%define I(x) [%1+16* x +64] -%define J(x) [%1+16*(x-4)+72] - RowIDCT - Transpose - -%define I(x) [%1+16*x] -%define J(x) [%1+16*x] - ColumnIDCT - -%define I(x) [%1+16*x+8] -%define J(x) [%1+16*x+8] - ColumnIDCT -%endmacro - %macro VP3_1D_IDCT_SSE2 0 movdqa m2, I(3) ; xmm2 = i3 movdqa m6, C(3) ; xmm6 = c3 @@ -501,7 +469,8 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4 movdqa O(7), m%8 %endmacro -%macro VP3_IDCT_sse2 1 +%macro VP3_IDCT 1 +%if mmsize == 16 %define I(x) [%1+16*x] %define O(x) [%1+16*x] %define C(x) [vp3_idct_data+16*(x-1)] @@ -519,11 +488,42 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4 %define ADD(x) paddsw x, [pw_8] VP3_1D_IDCT_SSE2 PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7 +%else ; mmsize == 8 + ; eax = quantized input + ; ebx = dequantizer matrix + ; ecx = IDCT constants + ; M(I) = ecx + MaskOffset(0) + I * 8 + ; C(I) = ecx + CosineOffset(32) + (I-1) * 8 + ; edx = output + ; r0..r7 = mm0..mm7 +%define OC_8 [pw_8] +%define C(x) [vp3_idct_data+16*(x-1)] + + ; at this point, function has completed dequantization + dezigzag + + ; partial transposition; now do the idct itself +%define I(x) [%1+16* x ] +%define J(x) [%1+16*(x-4)+8] + RowIDCT + Transpose + +%define I(x) [%1+16* x +64] +%define J(x) [%1+16*(x-4)+72] + RowIDCT + Transpose + +%define I(x) [%1+16*x] +%define J(x) [%1+16*x] + ColumnIDCT + +%define I(x) [%1+16*x+8] +%define J(x) [%1+16*x+8] + ColumnIDCT +%endif ; mmsize == 16/8 %endmacro -%macro vp3_idct_funcs 1 -cglobal vp3_idct_put_%1, 3, 4, 9 - VP3_IDCT_%1 r2 +%macro vp3_idct_funcs 0 +cglobal vp3_idct_put, 3, 4, 9 + VP3_IDCT r2 movsxdifnidn r1, r1d mova m4, [pb_80] @@ -565,8 +565,8 @@ cglobal vp3_idct_put_%1, 3, 4, 9 %endrep RET -cglobal vp3_idct_add_%1, 3, 4, 9 - VP3_IDCT_%1 r2 +cglobal vp3_idct_add, 3, 4, 9 + VP3_IDCT r2 mov r3, 4 pxor m4, m4 @@ -607,10 +607,10 @@ cglobal vp3_idct_add_%1, 3, 4, 9 RET %endmacro -INIT_MMX -vp3_idct_funcs mmx -INIT_XMM -vp3_idct_funcs sse2 +INIT_MMX mmx +vp3_idct_funcs +INIT_XMM sse2 +vp3_idct_funcs %macro DC_ADD 0 movq m2, [r0 ] @@ -631,8 +631,8 @@ vp3_idct_funcs sse2 movq [r0+r3 ], m5 %endmacro -INIT_MMX -cglobal vp3_idct_dc_add_mmx2, 3, 4 +INIT_MMX mmx2 +cglobal vp3_idct_dc_add, 3, 4 %if ARCH_X86_64 movsxd r1, r1d %endif From d07ff3cd5a31fad25e2fc89ce8ef98da144c0ee6 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 26 Jul 2012 20:43:50 -0700 Subject: [PATCH 10/35] h264_chromamc_10bit: port x86 simd to cpuflags. --- libavcodec/x86/dsputil_mmx.c | 16 +++++------ libavcodec/x86/h264_chromamc_10bit.asm | 40 +++++++++++++------------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index e91ede531e..afbb5312b8 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2117,10 +2117,10 @@ void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \ (uint8_t *dst, uint8_t *src, \ int stride, int h, int x, int y); -CHROMA_MC(put, 2, 10, mmxext) -CHROMA_MC(avg, 2, 10, mmxext) -CHROMA_MC(put, 4, 10, mmxext) -CHROMA_MC(avg, 4, 10, mmxext) +CHROMA_MC(put, 2, 10, mmx2) +CHROMA_MC(avg, 2, 10, mmx2) +CHROMA_MC(put, 4, 10, mmx2) +CHROMA_MC(avg, 4, 10, mmx2) CHROMA_MC(put, 8, 10, sse2) CHROMA_MC(avg, 8, 10, sse2) CHROMA_MC(put, 8, 10, avx) @@ -2740,10 +2740,10 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmx2; } if (bit_depth == 10 && CONFIG_H264CHROMA) { - c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext; - c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext; - c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext; - c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext; + c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmx2; + c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmx2; + c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmx2; + c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmx2; } c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; diff --git a/libavcodec/x86/h264_chromamc_10bit.asm b/libavcodec/x86/h264_chromamc_10bit.asm index 3f7c513069..370c7b5a46 100644 --- a/libavcodec/x86/h264_chromamc_10bit.asm +++ b/libavcodec/x86/h264_chromamc_10bit.asm @@ -60,10 +60,10 @@ SECTION .text ;----------------------------------------------------------------------------- ; void put/avg_h264_chroma_mc8(pixel *dst, pixel *src, int stride, int h, int mx, int my) ;----------------------------------------------------------------------------- -%macro CHROMA_MC8 2 +%macro CHROMA_MC8 1 ; put/avg_h264_chroma_mc8_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/, ; int stride, int h, int mx, int my) -cglobal %1_h264_chroma_mc8_10_%2, 6,7,8 +cglobal %1_h264_chroma_mc8_10, 6,7,8 movsxdifnidn r2, r2d mov r6d, r5d or r6d, r4d @@ -173,8 +173,8 @@ cglobal %1_h264_chroma_mc8_10_%2, 6,7,8 add r0, r2 %endmacro -%macro CHROMA_MC4 2 -cglobal %1_h264_chroma_mc4_10_%2, 6,6,7 +%macro CHROMA_MC4 1 +cglobal %1_h264_chroma_mc4_10, 6,6,7 movsxdifnidn r2, r2d movd m2, r4m ; x movd m3, r5m ; y @@ -203,8 +203,8 @@ cglobal %1_h264_chroma_mc4_10_%2, 6,6,7 ;----------------------------------------------------------------------------- ; void put/avg_h264_chroma_mc2(pixel *dst, pixel *src, int stride, int h, int mx, int my) ;----------------------------------------------------------------------------- -%macro CHROMA_MC2 2 -cglobal %1_h264_chroma_mc2_10_%2, 6,7 +%macro CHROMA_MC2 1 +cglobal %1_h264_chroma_mc2_10, 6,7 movsxdifnidn r2, r2d mov r6d, r4d shl r4d, 16 @@ -250,24 +250,24 @@ cglobal %1_h264_chroma_mc2_10_%2, 6,7 %endmacro %define CHROMAMC_AVG NOTHING -INIT_XMM -CHROMA_MC8 put, sse2 +INIT_XMM sse2 +CHROMA_MC8 put %if HAVE_AVX -INIT_AVX -CHROMA_MC8 put, avx +INIT_XMM avx +CHROMA_MC8 put %endif -INIT_MMX -CHROMA_MC4 put, mmxext -CHROMA_MC2 put, mmxext +INIT_MMX mmx2 +CHROMA_MC4 put +CHROMA_MC2 put %define CHROMAMC_AVG AVG %define PAVG pavgw -INIT_XMM -CHROMA_MC8 avg, sse2 +INIT_XMM sse2 +CHROMA_MC8 avg %if HAVE_AVX -INIT_AVX -CHROMA_MC8 avg, avx +INIT_XMM avx +CHROMA_MC8 avg %endif -INIT_MMX -CHROMA_MC4 avg, mmxext -CHROMA_MC2 avg, mmxext +INIT_MMX mmx2 +CHROMA_MC4 avg +CHROMA_MC2 avg From 18031c49dcf0317b1f0690b3bb2941a6a94db38f Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 20 Jul 2012 22:22:41 +0100 Subject: [PATCH 11/35] build: support non-standard replacements for -E flag This allows using non-standard flags for running the C preprocessor. The -o flag must be included in this setting due to strange syntax required by some compilers. Set the correct flags for tms470. Signed-off-by: Mans Rullgard --- configure | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/configure b/configure index 7e2f6baebc..42a53e5e8b 100755 --- a/configure +++ b/configure @@ -622,6 +622,10 @@ cc_o(){ eval printf '%s\\n' $CC_O } +cc_e(){ + eval printf '%s\\n' $CC_E +} + check_cc(){ log check_cc "$@" cat > $TMPC @@ -633,7 +637,7 @@ check_cpp(){ log check_cpp "$@" cat > $TMPC log_file $TMPC - check_cmd $cc $CPPFLAGS $CFLAGS "$@" -E -o $TMPO $TMPC + check_cmd $cc $CPPFLAGS $CFLAGS "$@" $(cc_e $TMPO) $TMPC } check_as(){ @@ -1733,6 +1737,7 @@ cflags_filter=echo ldflags_filter=echo AS_O='-o $@' +CC_E='-E -o $@' CC_O='-o $@' LD_O='-o $@' HOSTCC_O='-o $@' @@ -2051,7 +2056,8 @@ probe_cc(){ pfx=$1 _cc=$2 - unset _type _ident _cc_o _flags _cflags _ldflags _depflags _DEPCMD _DEPFLAGS + unset _type _ident _cc_e _cc_o _flags _cflags _ldflags + unset _depflags _DEPCMD _DEPFLAGS _flags_filter=echo if $_cc -v 2>&1 | grep -q '^gcc.*LLVM'; then @@ -2114,6 +2120,7 @@ probe_cc(){ _ident=$($_cc -version | head -n1 | tr -s ' ') _flags='--gcc --abi=eabi -me' _cflags='-D__gnuc_va_list=va_list -D__USER_LABEL_PREFIX__=' + _cc_e='-ppl -fe=$@' _cc_o='-fe=$@' as_default="${cross_prefix}gcc" ld_default="${cross_prefix}gcc" @@ -2165,6 +2172,7 @@ probe_cc(){ } set_ccvars(){ + eval ${1}_E=\${_cc_e-\${${1}_E}} eval ${1}_O=\${_cc_o-\${${1}_O}} if [ -n "$_depflags" ]; then From 23565c26415f0015b5ad235709dc44cac3939864 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 20 Jul 2012 23:43:27 +0100 Subject: [PATCH 12/35] build: support non-standard replacements for -c flag This allows non-standard replacements for the -c compiler flag. Some compilers use other flags or no flag at all in place of the usual one. Signed-off-by: Mans Rullgard --- Makefile | 2 +- configure | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 5a7d5917e2..e9580d2164 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,7 @@ LDFLAGS := $(ALLFFLIBS:%=-Llib%) $(LDFLAGS) define COMPILE $(call $(1)DEP,$(1)) - $($(1)) $($(1)FLAGS) $($(1)_DEPFLAGS) -c $($(1)_O) $< + $($(1)) $($(1)FLAGS) $($(1)_DEPFLAGS) $($(1)_C) $($(1)_O) $< endef COMPILE_C = $(call COMPILE,CC) diff --git a/configure b/configure index 42a53e5e8b..afd9c5ae09 100755 --- a/configure +++ b/configure @@ -630,7 +630,7 @@ check_cc(){ log check_cc "$@" cat > $TMPC log_file $TMPC - check_cmd $cc $CPPFLAGS $CFLAGS "$@" -c $(cc_o $TMPO) $TMPC + check_cmd $cc $CPPFLAGS $CFLAGS "$@" $CC_C $(cc_o $TMPO) $TMPC } check_cpp(){ @@ -644,7 +644,7 @@ check_as(){ log check_as "$@" cat > $TMPC log_file $TMPC - check_cmd $as $CPPFLAGS $ASFLAGS "$@" -c -o $TMPO $TMPC + check_cmd $as $CPPFLAGS $ASFLAGS "$@" $AS_C -o $TMPO $TMPC } check_asm(){ @@ -1736,10 +1736,13 @@ asflags_filter=echo cflags_filter=echo ldflags_filter=echo +AS_C='-c' AS_O='-o $@' +CC_C='-c' CC_E='-E -o $@' CC_O='-o $@' LD_O='-o $@' +HOSTCC_C='-c' HOSTCC_O='-o $@' host_cflags='-D_ISOC99_SOURCE -D_XOPEN_SOURCE=600 -O3 -g' @@ -2056,7 +2059,7 @@ probe_cc(){ pfx=$1 _cc=$2 - unset _type _ident _cc_e _cc_o _flags _cflags _ldflags + unset _type _ident _cc_c _cc_e _cc_o _flags _cflags _ldflags unset _depflags _DEPCMD _DEPFLAGS _flags_filter=echo @@ -2172,6 +2175,7 @@ probe_cc(){ } set_ccvars(){ + eval ${1}_C=\${_cc_c-\${${1}_C}} eval ${1}_E=\${_cc_e-\${${1}_E}} eval ${1}_O=\${_cc_o-\${${1}_O}} @@ -3426,7 +3430,9 @@ LN_S=$ln_s CPPFLAGS=$CPPFLAGS CFLAGS=$CFLAGS ASFLAGS=$ASFLAGS +AS_C=$AS_C AS_O=$AS_O +CC_C=$CC_C CC_O=$CC_O LD_O=$LD_O DLLTOOL=$dlltool @@ -3459,6 +3465,7 @@ DEPHOSTCCFLAGS=$DEPHOSTCCFLAGS \$(HOSTCCFLAGS) HOSTCCDEP=$HOSTCCDEP HOSTCCDEP_FLAGS=$HOSTCCDEP_FLAGS HOSTCC_DEPFLAGS=$HOSTCC_DEPFLAGS +HOSTCC_C=$HOSTCC_C HOSTCC_O=$HOSTCC_O TARGET_EXEC=$target_exec TARGET_PATH=$target_path From a5bbb1242c494fad504f2b6ab2816f0268adb03a Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Fri, 27 Jul 2012 17:45:30 -0700 Subject: [PATCH 13/35] h264_loopfilter: port x86 simd to cpuflags. --- libavcodec/x86/h264_deblock.asm | 104 +++++++++++++------------- libavcodec/x86/h264_deblock_10bit.asm | 77 ++++++++++--------- libavcodec/x86/h264dsp_mmx.c | 60 +++++++-------- 3 files changed, 120 insertions(+), 121 deletions(-) diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm index 1982dc4bd3..0891ef33da 100644 --- a/libavcodec/x86/h264_deblock.asm +++ b/libavcodec/x86/h264_deblock.asm @@ -282,8 +282,8 @@ cextern pb_A1 ;----------------------------------------------------------------------------- ; void deblock_v_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- -%macro DEBLOCK_LUMA 1 -cglobal deblock_v_luma_8_%1, 5,5,10 +%macro DEBLOCK_LUMA 0 +cglobal deblock_v_luma_8, 5,5,10 movd m8, [r4] ; tc0 lea r4, [r1*3] dec r2d ; alpha-1 @@ -327,8 +327,8 @@ cglobal deblock_v_luma_8_%1, 5,5,10 ;----------------------------------------------------------------------------- ; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- -INIT_MMX -cglobal deblock_h_luma_8_%1, 5,9 +INIT_MMX cpuname +cglobal deblock_h_luma_8, 5,9 movsxd r7, r1d lea r8, [r7+r7*2] lea r6, [r0-4] @@ -355,7 +355,7 @@ cglobal deblock_h_luma_8_%1, 5,9 %if WIN64 mov [rsp+0x20], r4 %endif - call deblock_v_luma_8_%1 + call deblock_v_luma_8 ; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter) add r6, 2 @@ -384,24 +384,24 @@ cglobal deblock_h_luma_8_%1, 5,9 RET %endmacro -INIT_XMM -DEBLOCK_LUMA sse2 -INIT_AVX -DEBLOCK_LUMA avx +INIT_XMM sse2 +DEBLOCK_LUMA +INIT_XMM avx +DEBLOCK_LUMA %else -%macro DEBLOCK_LUMA 3 +%macro DEBLOCK_LUMA 2 ;----------------------------------------------------------------------------- ; void deblock_v8_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- -cglobal deblock_%2_luma_8_%1, 5,5 +cglobal deblock_%1_luma_8, 5,5 lea r4, [r1*3] dec r2 ; alpha-1 neg r4 dec r3 ; beta-1 add r4, r0 ; pix-3*stride - %assign pad 2*%3+12-(stack_offset&15) + %assign pad 2*%2+12-(stack_offset&15) SUB esp, pad mova m0, [r4+r1] ; p1 @@ -415,7 +415,7 @@ cglobal deblock_%2_luma_8_%1, 5,5 movd m4, [r3] ; tc0 punpcklbw m4, m4 punpcklbw m4, m4 ; tc = 4x tc0[3], 4x tc0[2], 4x tc0[1], 4x tc0[0] - mova [esp+%3], m4 ; tc + mova [esp+%2], m4 ; tc pcmpgtb m4, m3 mova m3, [r4] ; p2 pand m4, m7 @@ -423,7 +423,7 @@ cglobal deblock_%2_luma_8_%1, 5,5 DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1 pand m6, m4 - pand m4, [esp+%3] ; tc + pand m4, [esp+%2] ; tc psubb m7, m4, m6 pand m6, m4 LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4 @@ -431,7 +431,7 @@ cglobal deblock_%2_luma_8_%1, 5,5 mova m4, [r0+2*r1] ; q2 DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1 pand m6, [esp] ; mask - mova m5, [esp+%3] ; tc + mova m5, [esp+%2] ; tc psubb m7, m6 pand m5, m6 mova m3, [r0+r1] @@ -446,8 +446,8 @@ cglobal deblock_%2_luma_8_%1, 5,5 ;----------------------------------------------------------------------------- ; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- -INIT_MMX -cglobal deblock_h_luma_8_%1, 0,5 +INIT_MMX cpuname +cglobal deblock_h_luma_8, 0,5 mov r0, r0mp mov r3, r1m lea r4, [r3*3] @@ -470,11 +470,11 @@ cglobal deblock_h_luma_8_%1, 0,5 PUSH dword r2m PUSH dword 16 PUSH dword r0 - call deblock_%2_luma_8_%1 -%ifidn %2, v8 + call deblock_%1_luma_8 +%ifidn %1, v8 add dword [esp ], 8 ; pix_tmp+0x38 add dword [esp+16], 2 ; tc0+2 - call deblock_%2_luma_8_%1 + call deblock_%1_luma_8 %endif ADD esp, 20 @@ -501,12 +501,12 @@ cglobal deblock_h_luma_8_%1, 0,5 RET %endmacro ; DEBLOCK_LUMA -INIT_MMX -DEBLOCK_LUMA mmxext, v8, 8 -INIT_XMM -DEBLOCK_LUMA sse2, v, 16 -INIT_AVX -DEBLOCK_LUMA avx, v, 16 +INIT_MMX mmx2 +DEBLOCK_LUMA v8, 8 +INIT_XMM sse2 +DEBLOCK_LUMA v, 16 +INIT_XMM avx +DEBLOCK_LUMA v, 16 %endif ; ARCH @@ -608,7 +608,7 @@ DEBLOCK_LUMA avx, v, 16 %define mask1p mask1q %endmacro -%macro DEBLOCK_LUMA_INTRA 2 +%macro DEBLOCK_LUMA_INTRA 1 %define p1 m0 %define p0 m1 %define q0 m2 @@ -643,7 +643,7 @@ DEBLOCK_LUMA avx, v, 16 ;----------------------------------------------------------------------------- ; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_%2_luma_intra_8_%1, 4,6,16 +cglobal deblock_%1_luma_intra_8, 4,6,16 %if ARCH_X86_64 == 0 sub esp, 0x60 %endif @@ -700,12 +700,12 @@ cglobal deblock_%2_luma_intra_8_%1, 4,6,16 %endif RET -INIT_MMX +INIT_MMX cpuname %if ARCH_X86_64 ;----------------------------------------------------------------------------- ; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_h_luma_intra_8_%1, 4,9 +cglobal deblock_h_luma_intra_8, 4,9 movsxd r7, r1d lea r8, [r7*3] lea r6, [r0-4] @@ -721,7 +721,7 @@ cglobal deblock_h_luma_intra_8_%1, 4,9 lea r0, [pix_tmp+0x40] mov r1, 0x10 - call deblock_v_luma_intra_8_%1 + call deblock_v_luma_intra_8 ; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8) lea r5, [r6+r8] @@ -734,7 +734,7 @@ cglobal deblock_h_luma_intra_8_%1, 4,9 add rsp, 0x88 RET %else -cglobal deblock_h_luma_intra_8_%1, 2,4 +cglobal deblock_h_luma_intra_8, 2,4 lea r3, [r1*3] sub r0, 4 lea r2, [r0+r3] @@ -753,10 +753,10 @@ cglobal deblock_h_luma_intra_8_%1, 2,4 PUSH dword r2m PUSH dword 16 PUSH r0 - call deblock_%2_luma_intra_8_%1 -%ifidn %2, v8 + call deblock_%1_luma_intra_8 +%ifidn %1, v8 add dword [rsp], 8 ; pix_tmp+8 - call deblock_%2_luma_intra_8_%1 + call deblock_%1_luma_intra_8 %endif ADD esp, 16 @@ -775,16 +775,16 @@ cglobal deblock_h_luma_intra_8_%1, 2,4 %endif ; ARCH_X86_64 %endmacro ; DEBLOCK_LUMA_INTRA -INIT_XMM -DEBLOCK_LUMA_INTRA sse2, v -INIT_AVX -DEBLOCK_LUMA_INTRA avx , v +INIT_XMM sse2 +DEBLOCK_LUMA_INTRA v +INIT_XMM avx +DEBLOCK_LUMA_INTRA v %if ARCH_X86_64 == 0 -INIT_MMX -DEBLOCK_LUMA_INTRA mmxext, v8 +INIT_MMX mmx2 +DEBLOCK_LUMA_INTRA v8 %endif -INIT_MMX +INIT_MMX mmx2 %macro CHROMA_V_START 0 dec r2d ; alpha-1 @@ -809,13 +809,13 @@ INIT_MMX ;----------------------------------------------------------------------------- ; void ff_deblock_v_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- -cglobal deblock_v_chroma_8_mmxext, 5,6 +cglobal deblock_v_chroma_8, 5,6 CHROMA_V_START movq m0, [t5] movq m1, [t5+r1] movq m2, [r0] movq m3, [r0+r1] - call ff_chroma_inter_body_mmxext + call ff_chroma_inter_body_mmx2 movq [t5+r1], m1 movq [r0], m2 RET @@ -823,7 +823,7 @@ cglobal deblock_v_chroma_8_mmxext, 5,6 ;----------------------------------------------------------------------------- ; void ff_deblock_h_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- -cglobal deblock_h_chroma_8_mmxext, 5,7 +cglobal deblock_h_chroma_8, 5,7 %if UNIX64 %define buf0 [rsp-24] %define buf1 [rsp-16] @@ -839,7 +839,7 @@ cglobal deblock_h_chroma_8_mmxext, 5,7 TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6) movq buf0, m0 movq buf1, m3 - call ff_chroma_inter_body_mmxext + call ff_chroma_inter_body_mmx2 movq m0, buf0 movq m3, buf1 TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6) @@ -849,7 +849,7 @@ cglobal deblock_h_chroma_8_mmxext, 5,7 RET ALIGN 16 -ff_chroma_inter_body_mmxext: +ff_chroma_inter_body_mmx2: LOAD_MASK r2d, r3d movd m6, [r4] ; tc0 punpcklbw m6, m6 @@ -876,13 +876,13 @@ ff_chroma_inter_body_mmxext: ;----------------------------------------------------------------------------- ; void ff_deblock_v_chroma_intra( uint8_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_v_chroma_intra_8_mmxext, 4,5 +cglobal deblock_v_chroma_intra_8, 4,5 CHROMA_V_START movq m0, [t5] movq m1, [t5+r1] movq m2, [r0] movq m3, [r0+r1] - call ff_chroma_intra_body_mmxext + call ff_chroma_intra_body_mmx2 movq [t5+r1], m1 movq [r0], m2 RET @@ -890,15 +890,15 @@ cglobal deblock_v_chroma_intra_8_mmxext, 4,5 ;----------------------------------------------------------------------------- ; void ff_deblock_h_chroma_intra( uint8_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_h_chroma_intra_8_mmxext, 4,6 +cglobal deblock_h_chroma_intra_8, 4,6 CHROMA_H_START TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6) - call ff_chroma_intra_body_mmxext + call ff_chroma_intra_body_mmx2 TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6) RET ALIGN 16 -ff_chroma_intra_body_mmxext: +ff_chroma_intra_body_mmx2: LOAD_MASK r2d, r3d movq m5, m1 movq m6, m2 diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm index ae385e0224..ba2f91490e 100644 --- a/libavcodec/x86/h264_deblock_10bit.asm +++ b/libavcodec/x86/h264_deblock_10bit.asm @@ -151,11 +151,11 @@ cextern pw_4 %endif %endmacro -%macro DEBLOCK_LUMA 1 +%macro DEBLOCK_LUMA 0 ;----------------------------------------------------------------------------- ; void deblock_v_luma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- -cglobal deblock_v_luma_10_%1, 5,5,8*(mmsize/16) +cglobal deblock_v_luma_10, 5,5,8*(mmsize/16) %assign pad 5*mmsize+12-(stack_offset&15) %define tcm [rsp] %define ms1 [rsp+mmsize] @@ -210,7 +210,7 @@ cglobal deblock_v_luma_10_%1, 5,5,8*(mmsize/16) ADD rsp, pad RET -cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16) +cglobal deblock_h_luma_10, 5,6,8*(mmsize/16) %assign pad 7*mmsize+12-(stack_offset&15) %define tcm [rsp] %define ms1 [rsp+mmsize] @@ -301,7 +301,6 @@ cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16) RET %endmacro -INIT_XMM %if ARCH_X86_64 ; in: m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2 ; m12=alpha, m13=beta @@ -339,8 +338,8 @@ INIT_XMM SWAP 3, 9 %endmacro -%macro DEBLOCK_LUMA_64 1 -cglobal deblock_v_luma_10_%1, 5,5,15 +%macro DEBLOCK_LUMA_64 0 +cglobal deblock_v_luma_10, 5,5,15 %define p2 m8 %define p1 m0 %define p0 m1 @@ -377,7 +376,7 @@ cglobal deblock_v_luma_10_%1, 5,5,15 jg .loop REP_RET -cglobal deblock_h_luma_10_%1, 5,7,15 +cglobal deblock_h_luma_10, 5,7,15 shl r2d, 2 shl r3d, 2 LOAD_AB m12, m13, r2, r3 @@ -417,10 +416,10 @@ cglobal deblock_h_luma_10_%1, 5,7,15 REP_RET %endmacro -INIT_XMM -DEBLOCK_LUMA_64 sse2 -INIT_AVX -DEBLOCK_LUMA_64 avx +INIT_XMM sse2 +DEBLOCK_LUMA_64 +INIT_XMM avx +DEBLOCK_LUMA_64 %endif %macro SWAPMOVA 2 @@ -602,8 +601,8 @@ DEBLOCK_LUMA_64 avx ;----------------------------------------------------------------------------- ; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -%macro DEBLOCK_LUMA_INTRA_64 1 -cglobal deblock_v_luma_intra_10_%1, 4,7,16 +%macro DEBLOCK_LUMA_INTRA_64 0 +cglobal deblock_v_luma_intra_10, 4,7,16 %define t0 m1 %define t1 m2 %define t2 m4 @@ -653,7 +652,7 @@ cglobal deblock_v_luma_intra_10_%1, 4,7,16 ;----------------------------------------------------------------------------- ; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_h_luma_intra_10_%1, 4,7,16 +cglobal deblock_h_luma_intra_10, 4,7,16 %define t0 m15 %define t1 m14 %define t2 m2 @@ -712,18 +711,18 @@ cglobal deblock_h_luma_intra_10_%1, 4,7,16 RET %endmacro -INIT_XMM -DEBLOCK_LUMA_INTRA_64 sse2 -INIT_AVX -DEBLOCK_LUMA_INTRA_64 avx +INIT_XMM sse2 +DEBLOCK_LUMA_INTRA_64 +INIT_XMM avx +DEBLOCK_LUMA_INTRA_64 %endif -%macro DEBLOCK_LUMA_INTRA 1 +%macro DEBLOCK_LUMA_INTRA 0 ;----------------------------------------------------------------------------- ; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_v_luma_intra_10_%1, 4,7,8*(mmsize/16) +cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16) LUMA_INTRA_INIT 3 lea r4, [r1*4] lea r5, [r1*3] @@ -751,7 +750,7 @@ cglobal deblock_v_luma_intra_10_%1, 4,7,8*(mmsize/16) ;----------------------------------------------------------------------------- ; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_h_luma_intra_10_%1, 4,7,8*(mmsize/16) +cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16) LUMA_INTRA_INIT 8 %if mmsize == 8 lea r4, [r1*3] @@ -793,15 +792,15 @@ cglobal deblock_h_luma_intra_10_%1, 4,7,8*(mmsize/16) %endmacro %if ARCH_X86_64 == 0 -INIT_MMX -DEBLOCK_LUMA mmxext -DEBLOCK_LUMA_INTRA mmxext -INIT_XMM -DEBLOCK_LUMA sse2 -DEBLOCK_LUMA_INTRA sse2 -INIT_AVX -DEBLOCK_LUMA avx -DEBLOCK_LUMA_INTRA avx +INIT_MMX mmx2 +DEBLOCK_LUMA +DEBLOCK_LUMA_INTRA +INIT_XMM sse2 +DEBLOCK_LUMA +DEBLOCK_LUMA_INTRA +INIT_XMM avx +DEBLOCK_LUMA +DEBLOCK_LUMA_INTRA %endif ; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp @@ -843,11 +842,11 @@ DEBLOCK_LUMA_INTRA avx psraw %1, 6 %endmacro -%macro DEBLOCK_CHROMA 1 +%macro DEBLOCK_CHROMA 0 ;----------------------------------------------------------------------------- ; void deblock_v_chroma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- -cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16) +cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) mov r5, r0 sub r0, r1 sub r0, r1 @@ -881,7 +880,7 @@ cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16) ;----------------------------------------------------------------------------- ; void deblock_v_chroma_intra( uint16_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16) +cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16) mov r4, r0 sub r0, r1 sub r0, r1 @@ -908,10 +907,10 @@ cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16) %endmacro %if ARCH_X86_64 == 0 -INIT_MMX -DEBLOCK_CHROMA mmxext +INIT_MMX mmx2 +DEBLOCK_CHROMA %endif -INIT_XMM -DEBLOCK_CHROMA sse2 -INIT_AVX -DEBLOCK_CHROMA avx +INIT_XMM sse2 +DEBLOCK_CHROMA +INIT_XMM avx +DEBLOCK_CHROMA diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c index c0a40c42d7..3f18f64f4b 100644 --- a/libavcodec/x86/h264dsp_mmx.c +++ b/libavcodec/x86/h264dsp_mmx.c @@ -249,12 +249,12 @@ void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, in int alpha, int beta); #define LF_FUNCS(type, depth)\ -LF_FUNC (h, chroma, depth, mmxext)\ -LF_IFUNC(h, chroma_intra, depth, mmxext)\ -LF_FUNC (v, chroma, depth, mmxext)\ -LF_IFUNC(v, chroma_intra, depth, mmxext)\ -LF_FUNC (h, luma, depth, mmxext)\ -LF_IFUNC(h, luma_intra, depth, mmxext)\ +LF_FUNC (h, chroma, depth, mmx2)\ +LF_IFUNC(h, chroma_intra, depth, mmx2)\ +LF_FUNC (v, chroma, depth, mmx2)\ +LF_IFUNC(v, chroma_intra, depth, mmx2)\ +LF_FUNC (h, luma, depth, mmx2)\ +LF_IFUNC(h, luma_intra, depth, mmx2)\ LF_FUNC (h, luma, depth, sse2)\ LF_IFUNC(h, luma_intra, depth, sse2)\ LF_FUNC (v, luma, depth, sse2)\ @@ -276,24 +276,24 @@ LF_FUNCS( uint8_t, 8) LF_FUNCS(uint16_t, 10) #if ARCH_X86_32 -LF_FUNC (v8, luma, 8, mmxext) -static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +LF_FUNC (v8, luma, 8, mmx2) +static void ff_deblock_v_luma_8_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { if((tc0[0] & tc0[1]) >= 0) - ff_deblock_v8_luma_8_mmxext(pix+0, stride, alpha, beta, tc0); + ff_deblock_v8_luma_8_mmx2(pix+0, stride, alpha, beta, tc0); if((tc0[2] & tc0[3]) >= 0) - ff_deblock_v8_luma_8_mmxext(pix+8, stride, alpha, beta, tc0+2); + ff_deblock_v8_luma_8_mmx2(pix+8, stride, alpha, beta, tc0+2); } -LF_IFUNC(v8, luma_intra, 8, mmxext) -static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, int alpha, int beta) +LF_IFUNC(v8, luma_intra, 8, mmx2) +static void ff_deblock_v_luma_intra_8_mmx2(uint8_t *pix, int stride, int alpha, int beta) { - ff_deblock_v8_luma_intra_8_mmxext(pix+0, stride, alpha, beta); - ff_deblock_v8_luma_intra_8_mmxext(pix+8, stride, alpha, beta); + ff_deblock_v8_luma_intra_8_mmx2(pix+0, stride, alpha, beta); + ff_deblock_v8_luma_intra_8_mmx2(pix+8, stride, alpha, beta); } #endif /* ARCH_X86_32 */ -LF_FUNC (v, luma, 10, mmxext) -LF_IFUNC(v, luma_intra, 10, mmxext) +LF_FUNC (v, luma, 10, mmx2) +LF_IFUNC(v, luma_intra, 10, mmx2) /***********************************/ /* weighted prediction */ @@ -373,17 +373,17 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2; c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2; - c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext; - c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext; + c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmx2; + c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmx2; if (chroma_format_idc == 1) { - c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext; - c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext; + c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmx2; + c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmx2; } #if ARCH_X86_32 - c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext; - c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext; - c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext; - c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext; + c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmx2; + c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmx2; + c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmx2; + c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2; #endif c->weight_h264_pixels_tab[0]= ff_h264_weight_16_mmx2; c->weight_h264_pixels_tab[1]= ff_h264_weight_8_mmx2; @@ -436,12 +436,12 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom if (mm_flags & AV_CPU_FLAG_MMX) { if (mm_flags & AV_CPU_FLAG_MMX2) { #if ARCH_X86_32 - c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_mmxext; - c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_mmxext; - c->h264_v_loop_filter_luma= ff_deblock_v_luma_10_mmxext; - c->h264_h_loop_filter_luma= ff_deblock_h_luma_10_mmxext; - c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext; - c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext; + c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_mmx2; + c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_mmx2; + c->h264_v_loop_filter_luma= ff_deblock_v_luma_10_mmx2; + c->h264_h_loop_filter_luma= ff_deblock_h_luma_10_mmx2; + c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmx2; + c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmx2; #endif c->h264_idct_dc_add= ff_h264_idct_dc_add_10_mmx2; if (mm_flags&AV_CPU_FLAG_SSE2) { From 4d777eedfd339e431d73a3787cc9587775f1ba9c Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 26 Jul 2012 22:16:37 -0700 Subject: [PATCH 14/35] vp3: don't compile mmx IDCT functions on x86-64. 64-bit CPUs always have SSE2, and a SSE2 version exists, thus the MMX version will never be used. --- libavcodec/x86/vp3dsp.asm | 3 +++ libavcodec/x86/vp3dsp_init.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm index 5877520c6c..7a88892c11 100644 --- a/libavcodec/x86/vp3dsp.asm +++ b/libavcodec/x86/vp3dsp.asm @@ -607,8 +607,11 @@ cglobal vp3_idct_add, 3, 4, 9 RET %endmacro +%if ARCH_X86_32 INIT_MMX mmx vp3_idct_funcs +%endif + INIT_XMM sse2 vp3_idct_funcs diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c index cd8e206a2c..704d4a6927 100644 --- a/libavcodec/x86/vp3dsp_init.c +++ b/libavcodec/x86/vp3dsp_init.c @@ -41,11 +41,13 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) #if HAVE_YASM int cpuflags = av_get_cpu_flags(); +#if ARCH_X86_32 if (HAVE_MMX && cpuflags & AV_CPU_FLAG_MMX) { c->idct_put = ff_vp3_idct_put_mmx; c->idct_add = ff_vp3_idct_add_mmx; c->idct_perm = FF_PARTTRANS_IDCT_PERM; } +#endif if (HAVE_MMX2 && cpuflags & AV_CPU_FLAG_MMX2) { c->idct_dc_add = ff_vp3_idct_dc_add_mmx2; From c4ef6a3e4ba50b7e3746a46b51c2f8d16e8cba7b Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 27 Jul 2012 16:28:36 +0200 Subject: [PATCH 15/35] Add missing libavutil/time.h includes. --- libavdevice/fbdev.c | 1 + libavdevice/jack_audio.c | 1 + libavdevice/oss_audio.c | 1 + libavdevice/x11grab.c | 1 + libavformat/rtpdec.c | 1 + libavformat/rtsp.c | 1 + libavformat/rtspdec.c | 1 + libavformat/rtspenc.c | 1 + libavformat/sapenc.c | 1 + libavformat/utils.c | 1 + 10 files changed, 10 insertions(+) diff --git a/libavdevice/fbdev.c b/libavdevice/fbdev.c index d789d41afa..9de1a3b555 100644 --- a/libavdevice/fbdev.c +++ b/libavdevice/fbdev.c @@ -39,6 +39,7 @@ #include "libavutil/log.h" #include "libavutil/mem.h" #include "libavutil/opt.h" +#include "libavutil/time.h" #include "libavutil/parseutils.h" #include "libavutil/pixdesc.h" #include "libavformat/avformat.h" diff --git a/libavdevice/jack_audio.c b/libavdevice/jack_audio.c index 85f9f09740..7b2ceef1bb 100644 --- a/libavdevice/jack_audio.c +++ b/libavdevice/jack_audio.c @@ -27,6 +27,7 @@ #include "libavutil/log.h" #include "libavutil/fifo.h" #include "libavutil/opt.h" +#include "libavutil/time.h" #include "libavcodec/avcodec.h" #include "libavformat/avformat.h" #include "libavformat/internal.h" diff --git a/libavdevice/oss_audio.c b/libavdevice/oss_audio.c index 60432c5fc5..b5fbb318dc 100644 --- a/libavdevice/oss_audio.c +++ b/libavdevice/oss_audio.c @@ -36,6 +36,7 @@ #include "libavutil/log.h" #include "libavutil/opt.h" +#include "libavutil/time.h" #include "libavcodec/avcodec.h" #include "libavformat/avformat.h" #include "libavformat/internal.h" diff --git a/libavdevice/x11grab.c b/libavdevice/x11grab.c index dd432b3fd5..d7c8c8f548 100644 --- a/libavdevice/x11grab.c +++ b/libavdevice/x11grab.c @@ -42,6 +42,7 @@ #include "libavutil/log.h" #include "libavutil/opt.h" #include "libavutil/parseutils.h" +#include "libavutil/time.h" #include #include #include diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c index 87d92557db..2c5e6c8176 100644 --- a/libavformat/rtpdec.c +++ b/libavformat/rtpdec.c @@ -21,6 +21,7 @@ #include "libavutil/mathematics.h" #include "libavutil/avstring.h" +#include "libavutil/time.h" #include "libavcodec/get_bits.h" #include "avformat.h" #include "mpegts.h" diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c index f6f5ac7146..4cf7a90329 100644 --- a/libavformat/rtsp.c +++ b/libavformat/rtsp.c @@ -27,6 +27,7 @@ #include "libavutil/random_seed.h" #include "libavutil/dict.h" #include "libavutil/opt.h" +#include "libavutil/time.h" #include "avformat.h" #include "avio_internal.h" diff --git a/libavformat/rtspdec.c b/libavformat/rtspdec.c index a3565557d4..a125cc6e9e 100644 --- a/libavformat/rtspdec.c +++ b/libavformat/rtspdec.c @@ -23,6 +23,7 @@ #include "libavutil/intreadwrite.h" #include "libavutil/mathematics.h" #include "libavutil/random_seed.h" +#include "libavutil/time.h" #include "avformat.h" #include "internal.h" diff --git a/libavformat/rtspenc.c b/libavformat/rtspenc.c index c7fb2fa9d9..902076d25d 100644 --- a/libavformat/rtspenc.c +++ b/libavformat/rtspenc.c @@ -31,6 +31,7 @@ #include "avio_internal.h" #include "libavutil/intreadwrite.h" #include "libavutil/avstring.h" +#include "libavutil/time.h" #include "url.h" #define SDP_MAX_SIZE 16384 diff --git a/libavformat/sapenc.c b/libavformat/sapenc.c index 7e84a3fb99..235c71105c 100644 --- a/libavformat/sapenc.c +++ b/libavformat/sapenc.c @@ -24,6 +24,7 @@ #include "libavutil/random_seed.h" #include "libavutil/avstring.h" #include "libavutil/intreadwrite.h" +#include "libavutil/time.h" #include "internal.h" #include "network.h" #include "os_support.h" diff --git a/libavformat/utils.c b/libavformat/utils.c index 156c527e25..4d5f4ccaaf 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -35,6 +35,7 @@ #include "libavutil/avstring.h" #include "libavutil/mathematics.h" #include "libavutil/parseutils.h" +#include "libavutil/time.h" #include "riff.h" #include "audiointerleave.h" #include "url.h" From efd34918badd466161a2a2e170ca6d24a7b9b06f Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Thu, 28 Jun 2012 13:33:12 +0200 Subject: [PATCH 16/35] lavf: remove commented out cruft in avformat_find_stream_info() --- libavformat/utils.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/libavformat/utils.c b/libavformat/utils.c index 4d5f4ccaaf..4ec70b7f51 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -2404,8 +2404,6 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options) int64_t duration= pkt->dts - last; double dur= duration * av_q2d(st->time_base); -// if(st->codec->codec_type == AVMEDIA_TYPE_VIDEO) -// av_log(NULL, AV_LOG_ERROR, "%f\n", dur); if (st->info->duration_count < 2) memset(st->info->duration_error, 0, sizeof(st->info->duration_error)); for (i=1; iinfo->duration_error); i++) { @@ -2467,17 +2465,13 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options) if (tb_unreliable(st->codec) && st->info->duration_count > 15 && st->info->duration_gcd > 1 && !st->r_frame_rate.num) av_reduce(&st->r_frame_rate.num, &st->r_frame_rate.den, st->time_base.den, st->time_base.num * st->info->duration_gcd, INT_MAX); if (st->info->duration_count && !st->r_frame_rate.num - && tb_unreliable(st->codec) /*&& - //FIXME we should not special-case MPEG-2, but this needs testing with non-MPEG-2 ... - st->time_base.num*duration_sum[i]/st->info->duration_count*101LL > st->time_base.den*/){ + && tb_unreliable(st->codec)) { int num = 0; double best_error= 2*av_q2d(st->time_base); best_error = best_error*best_error*st->info->duration_count*1000*12*30; for (j=1; jinfo->duration_error); j++) { double error = st->info->duration_error[j] * get_std_framerate(j); -// if(st->codec->codec_type == AVMEDIA_TYPE_VIDEO) -// av_log(NULL, AV_LOG_ERROR, "%f %f\n", get_std_framerate(j) / 12.0/1001, error); if(error < best_error){ best_error= error; num = get_std_framerate(j); From bfe9f48ad7f215fd1468689f0d1184f4b7ce38e6 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Fri, 27 Jul 2012 02:06:29 +0200 Subject: [PATCH 17/35] configure: Move parts that should not be user-selectable to CONFIG_EXTRA --- configure | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/configure b/configure index afd9c5ae09..2be5b98a9b 100755 --- a/configure +++ b/configure @@ -940,7 +940,6 @@ PROGRAM_LIST=" CONFIG_LIST=" $COMPONENT_LIST $PROGRAM_LIST - ac3dsp avcodec avdevice avfilter @@ -955,7 +954,6 @@ CONFIG_LIST=" fastdiv fft frei0r - gcrypt gnutls gpl gray @@ -987,18 +985,14 @@ CONFIG_LIST=" lsp mdct memalign_hack - mpegaudiodsp - nettle network nonfree openssl pic rdft - rtpdec runtime_cpudetect safe_bitstream_reader shared - sinewin small sram static @@ -1204,7 +1198,9 @@ HAVE_LIST=" # options emitted with CONFIG_ prefix but not available on command line CONFIG_EXTRA=" aandcttables + ac3dsp avutil + gcrypt golomb gplv3 h264chroma @@ -1214,6 +1210,10 @@ CONFIG_EXTRA=" huffman lgplv3 lpc + mpegaudiodsp + nettle + rtpdec + sinewin vp3dsp " From 0aa907cfb1bbc647ee4b6da62fac5c89d7b4d318 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Sat, 28 Jul 2012 17:14:48 +0600 Subject: [PATCH 18/35] vc1dec: Do not ignore ff_vc1_parse_frame_header_adv return value Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind Signed-off-by: Michael Niedermayer Signed-off-by: Kostya Shishkov --- libavcodec/vc1dec.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index 51124cf7cd..f49fff8b98 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -5579,11 +5579,17 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, } if (i) { v->pic_header_flag = 0; - if (v->field_mode && i == n_slices1 + 2) - ff_vc1_parse_frame_header_adv(v, &s->gb); - else if (get_bits1(&s->gb)) { + if (v->field_mode && i == n_slices1 + 2) { + if (ff_vc1_parse_frame_header_adv(v, &s->gb) < 0) { + av_log(v->s.avctx, AV_LOG_ERROR, "Field header damaged\n"); + continue; + } + } else if (get_bits1(&s->gb)) { v->pic_header_flag = 1; - ff_vc1_parse_frame_header_adv(v, &s->gb); + if (ff_vc1_parse_frame_header_adv(v, &s->gb) < 0) { + av_log(v->s.avctx, AV_LOG_ERROR, "Slice header damaged\n"); + continue; + } } } s->start_mb_y = (i == 0) ? 0 : FFMAX(0, slices[i-1].mby_start % mb_height); From 1100acbab26883007898c53efeb289f562c6e514 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Sat, 28 Jul 2012 17:14:50 +0600 Subject: [PATCH 19/35] vc1dec: check that coded slice positions and interlacing match. This fixes out of array writes Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind Signed-off-by: Michael Niedermayer Signed-off-by: Kostya Shishkov --- libavcodec/vc1dec.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index f49fff8b98..5726c0adbc 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -5569,6 +5569,12 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, mb_height = s->mb_height >> v->field_mode; for (i = 0; i <= n_slices; i++) { if (i > 0 && slices[i - 1].mby_start >= mb_height) { + if (v->field_mode <= 0) { + av_log(v->s.avctx, AV_LOG_ERROR, "Slice %d starts beyond " + "picture boundary (%d >= %d)\n", i, + slices[i - 1].mby_start, mb_height); + continue; + } v->second_field = 1; v->blocks_off = s->mb_width * s->mb_height << 1; v->mb_off = s->mb_stride * s->mb_height >> 1; From 2bf369b60c7d56dd73887a0156c37676d0fa7e29 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Sat, 28 Jul 2012 16:27:55 +0600 Subject: [PATCH 20/35] vc1: avoid reading beyond the last line in vc1_draw_sprites() Fixes overread Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind Signed-off-by: Michael Niedermayer Signed-off-by: Kostya Shishkov --- libavcodec/vc1dec.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index 5726c0adbc..fd515c754e 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -4938,15 +4938,17 @@ static void vc1_draw_sprites(VC1Context *v, SpriteData* sd) int iline = s->current_picture.f.linesize[plane]; int ycoord = yoff[sprite] + yadv[sprite] * row; int yline = ycoord >> 16; + int next_line; ysub[sprite] = ycoord & 0xFFFF; if (sprite) { iplane = s->last_picture.f.data[plane]; iline = s->last_picture.f.linesize[plane]; } + next_line = FFMIN(yline + 1, (v->sprite_height >> !!plane) - 1) * iline; if (!(xoff[sprite] & 0xFFFF) && xadv[sprite] == 1 << 16) { src_h[sprite][0] = iplane + (xoff[sprite] >> 16) + yline * iline; if (ysub[sprite]) - src_h[sprite][1] = iplane + (xoff[sprite] >> 16) + (yline + 1) * iline; + src_h[sprite][1] = iplane + (xoff[sprite] >> 16) + next_line; } else { if (sr_cache[sprite][0] != yline) { if (sr_cache[sprite][1] == yline) { @@ -4958,7 +4960,9 @@ static void vc1_draw_sprites(VC1Context *v, SpriteData* sd) } } if (ysub[sprite] && sr_cache[sprite][1] != yline + 1) { - v->vc1dsp.sprite_h(v->sr_rows[sprite][1], iplane + (yline + 1) * iline, xoff[sprite], xadv[sprite], width); + v->vc1dsp.sprite_h(v->sr_rows[sprite][1], + iplane + next_line, xoff[sprite], + xadv[sprite], width); sr_cache[sprite][1] = yline + 1; } src_h[sprite][0] = v->sr_rows[sprite][0]; From 45838561f2f14339acdf53ffa3adbfe8e6db7514 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Sat, 28 Jul 2012 18:07:45 +0600 Subject: [PATCH 21/35] vc1dec: Override invalid macroblock quantizer Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind Signed-off-by: Michael Niedermayer Signed-off-by: Kostya Shishkov --- libavcodec/vc1dec.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index fd515c754e..c6cbfc1270 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -1048,6 +1048,11 @@ static void vc1_mc_4mv_chroma4(VC1Context *v) mquant = v->altpq; \ if ((edges&8) && s->mb_y == (s->mb_height - 1)) \ mquant = v->altpq; \ + if (!mquant || mquant > 31) { \ + av_log(v->s.avctx, AV_LOG_ERROR, \ + "Overriding invalid mquant %d\n", mquant); \ + mquant = 1; \ + } \ } /** From 67ef5f4eb54297cb0448653ee59993c790c4cd3e Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Wed, 4 Jul 2012 06:32:30 +0200 Subject: [PATCH 22/35] audiogen: allow specifying random seed as a commandline parameter. --- tests/audiogen.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/audiogen.c b/tests/audiogen.c index 8d27dc2632..acb380da50 100644 --- a/tests/audiogen.c +++ b/tests/audiogen.c @@ -144,8 +144,8 @@ int main(int argc, char **argv) int nb_channels = 2; char *ext; - if (argc < 2 || argc > 4) { - printf("usage: %s file [ []]\n" + if (argc < 2 || argc > 5) { + printf("usage: %s file [ [] []]\n" "generate a test raw 16 bit audio stream\n" "If the file extension is .wav a WAVE header will be added.\n" "default: 44100 Hz stereo\n", argv[0]); @@ -168,6 +168,9 @@ int main(int argc, char **argv) } } + if (argc > 4) + seed = atoi(argv[4]); + outfile = fopen(argv[1], "wb"); if (!outfile) { perror(argv[1]); From 7339340787aaefaaa1519f6cc0ead1e1259878f9 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Tue, 29 May 2012 12:08:58 +0200 Subject: [PATCH 23/35] FATE: add a test for amix audio filter. --- tests/fate/filter.mak | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/fate/filter.mak b/tests/fate/filter.mak index 41916c9b35..2ba51ed9e1 100644 --- a/tests/fate/filter.mak +++ b/tests/fate/filter.mak @@ -1,3 +1,25 @@ +FATE_AMIX += fate-filter-amix-simple +fate-filter-amix-simple: CMD = avconv -filter_complex amix -i $(SRC) -ss 3 -i $(SRC1) -f f32le - +fate-filter-amix-simple: REF = $(SAMPLES)/filter/amix_simple.pcm + +FATE_AMIX += fate-filter-amix-first +fate-filter-amix-first: CMD = avconv -filter_complex amix=duration=first -ss 4 -i $(SRC) -i $(SRC1) -f f32le - +fate-filter-amix-first: REF = $(SAMPLES)/filter/amix_first.pcm + +FATE_AMIX += fate-filter-amix-transition +fate-filter-amix-transition: tests/data/asynth-44100-2-3.wav +fate-filter-amix-transition: SRC2 = $(TARGET_PATH)/tests/data/asynth-44100-2-3.wav +fate-filter-amix-transition: CMD = avconv -filter_complex amix=inputs=3:dropout_transition=0.5 -i $(SRC) -ss 2 -i $(SRC1) -ss 4 -i $(SRC2) -f f32le - +fate-filter-amix-transition: REF = $(SAMPLES)/filter/amix_transition.pcm + +$(FATE_AMIX): tests/data/asynth-44100-2.wav tests/data/asynth-44100-2-2.wav +$(FATE_AMIX): SRC = $(TARGET_PATH)/tests/data/asynth-44100-2.wav +$(FATE_AMIX): SRC1 = $(TARGET_PATH)/tests/data/asynth-44100-2-2.wav +$(FATE_AMIX): CMP = oneoff + +FATE_FILTER += $(FATE_AMIX) +FATE_SAMPLES_AVCONV += $(FATE_AMIX) + FATE_ASYNCTS += fate-filter-asyncts fate-filter-asyncts: SRC = $(SAMPLES)/nellymoser/nellymoser-discont.flv fate-filter-asyncts: CMD = pcm -i $(SRC) -af asyncts From 8112710f17b5ad0eda2798b4d9ddcc5d774bad02 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 9 Jul 2012 12:09:58 +0200 Subject: [PATCH 24/35] FATE: add a test for delogo video filter. --- tests/fate/filter.mak | 5 ++ tests/ref/fate/filter-delogo | 110 +++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 tests/ref/fate/filter-delogo diff --git a/tests/fate/filter.mak b/tests/fate/filter.mak index 2ba51ed9e1..f1185ed425 100644 --- a/tests/fate/filter.mak +++ b/tests/fate/filter.mak @@ -29,4 +29,9 @@ fate-filter-asyncts: REF = $(SAMPLES)/nellymoser/nellymoser-discont.pcm FATE_FILTER += $(FATE_ASYNCTS) FATE_SAMPLES_AVCONV += $(FATE_ASYNCTS) +fate-filter-delogo: CMD = framecrc -i $(SAMPLES)/real/rv30.rm -vf delogo=show=0:x=290:y=25:w=26:h=16 -an + +FATE_FILTER += fate-filter-delogo +FATE_SAMPLES_AVCONV += fate-filter-delogo + fate-filter: $(FATE_FILTER) diff --git a/tests/ref/fate/filter-delogo b/tests/ref/fate/filter-delogo new file mode 100644 index 0000000000..00226982b2 --- /dev/null +++ b/tests/ref/fate/filter-delogo @@ -0,0 +1,110 @@ +#tb 0: 1/1000 +0, 1, 1, 0, 126720, 0x689de87e +0, 33, 33, 0, 126720, 0x3db9e91c +0, 66, 66, 0, 126720, 0x3db9e91c +0, 100, 100, 0, 126720, 0x3db9e91c +0, 133, 133, 0, 126720, 0xfa6ae95e +0, 166, 166, 0, 126720, 0x5bcbf0e6 +0, 200, 200, 0, 126720, 0x94a0f126 +0, 233, 233, 0, 126720, 0x0250f106 +0, 266, 266, 0, 126720, 0xcf6ab4bc +0, 300, 300, 0, 126720, 0x44aeb57c +0, 333, 333, 0, 126720, 0x33b0b5bc +0, 367, 367, 0, 126720, 0xc4bab591 +0, 400, 400, 0, 126720, 0xa492b5ec +0, 433, 433, 0, 126720, 0x1459b85c +0, 467, 467, 0, 126720, 0x806fb8dc +0, 500, 500, 0, 126720, 0xd241b871 +0, 533, 533, 0, 126720, 0x698eb5cc +0, 567, 567, 0, 126720, 0x4719aa98 +0, 600, 600, 0, 126720, 0x9ca1962c +0, 633, 633, 0, 126720, 0x18cda460 +0, 667, 667, 0, 126720, 0xc230b716 +0, 700, 700, 0, 126720, 0x8451a4e2 +0, 734, 734, 0, 126720, 0x59e9a7ea +0, 767, 767, 0, 126720, 0xc77ca73d +0, 800, 800, 0, 126720, 0x725fb976 +0, 834, 834, 0, 126720, 0xb30da3b3 +0, 867, 867, 0, 126720, 0x7af2ea86 +0, 900, 900, 0, 126720, 0x40d4b4eb +0, 934, 934, 0, 126720, 0x49d00307 +0, 967, 967, 0, 126720, 0x44c8848e +0, 1000, 1000, 0, 126720, 0xc6990101 +0, 1034, 1034, 0, 126720, 0x2e01b963 +0, 1067, 1067, 0, 126720, 0xd0e903f0 +0, 1101, 1101, 0, 126720, 0x3457d592 +0, 1134, 1134, 0, 126720, 0x4f1ddb3c +0, 1167, 1167, 0, 126720, 0x3980ace5 +0, 1201, 1201, 0, 126720, 0xb1e37954 +0, 1234, 1234, 0, 126720, 0x619fc554 +0, 1267, 1267, 0, 126720, 0x945fb39e +0, 1301, 1301, 0, 126720, 0xb1d5e0ce +0, 1334, 1334, 0, 126720, 0xf26e1dcc +0, 1368, 1368, 0, 126720, 0x04d5783e +0, 1401, 1401, 0, 126720, 0xbaa0479e +0, 1434, 1434, 0, 126720, 0x20d88b01 +0, 1468, 1468, 0, 126720, 0x59d99901 +0, 1501, 1501, 0, 126720, 0x1c6e09f6 +0, 1534, 1534, 0, 126720, 0xeec50fc5 +0, 1568, 1568, 0, 126720, 0xb3a92827 +0, 1601, 1601, 0, 126720, 0xf62dd2b6 +0, 1634, 1634, 0, 126720, 0x75b1e619 +0, 1668, 1668, 0, 126720, 0x6bbce2c0 +0, 1701, 1701, 0, 126720, 0xd93e023c +0, 1735, 1735, 0, 126720, 0xbbe8e7c2 +0, 1768, 1768, 0, 126720, 0x2272ec17 +0, 1801, 1801, 0, 126720, 0xf5e4ee6e +0, 1835, 1835, 0, 126720, 0x751d2607 +0, 1868, 1868, 0, 126720, 0x44c499c9 +0, 1901, 1901, 0, 126720, 0xddccd842 +0, 1935, 1935, 0, 126720, 0x508dd214 +0, 1968, 1968, 0, 126720, 0x8eb10272 +0, 2001, 2001, 0, 126720, 0x7224b1c6 +0, 2035, 2035, 0, 126720, 0x50ff456c +0, 2068, 2068, 0, 126720, 0xa81e2731 +0, 2102, 2102, 0, 126720, 0x7e50456d +0, 2135, 2135, 0, 126720, 0x44802978 +0, 2168, 2168, 0, 126720, 0x86e88743 +0, 2202, 2202, 0, 126720, 0x0b1087d6 +0, 2235, 2235, 0, 126720, 0xb0227d21 +0, 2268, 2268, 0, 126720, 0x29d10bd2 +0, 2302, 2302, 0, 126720, 0x04b43afa +0, 2335, 2335, 0, 126720, 0xb48e9698 +0, 2369, 2369, 0, 126720, 0x75d760fb +0, 2402, 2402, 0, 126720, 0xa2ab1fdb +0, 2435, 2435, 0, 126720, 0xec30a5ee +0, 2469, 2469, 0, 126720, 0xbdab7c8c +0, 2502, 2502, 0, 126720, 0xac5c3f2c +0, 2535, 2535, 0, 126720, 0xce6350be +0, 2569, 2569, 0, 126720, 0xb109657a +0, 2602, 2602, 0, 126720, 0x723865a4 +0, 2635, 2635, 0, 126720, 0xa9869124 +0, 2669, 2669, 0, 126720, 0xc41af558 +0, 2702, 2702, 0, 126720, 0xcbe6a402 +0, 2736, 2736, 0, 126720, 0xb6735ecb +0, 2769, 2769, 0, 126720, 0xba3059f2 +0, 2802, 2802, 0, 126720, 0xe7d63b8d +0, 2836, 2836, 0, 126720, 0x8f115906 +0, 2869, 2869, 0, 126720, 0xaf6a8dcb +0, 2902, 2902, 0, 126720, 0xb73e846e +0, 2936, 2936, 0, 126720, 0xedd6380f +0, 2969, 2969, 0, 126720, 0xd9026acf +0, 3002, 3002, 0, 126720, 0xa03a650b +0, 3036, 3036, 0, 126720, 0x262765bc +0, 3069, 3069, 0, 126720, 0xaaa9ded1 +0, 3103, 3103, 0, 126720, 0xe4f42665 +0, 3136, 3136, 0, 126720, 0x78daf760 +0, 3169, 3169, 0, 126720, 0x3b0c6ef8 +0, 3203, 3203, 0, 126720, 0xb745df80 +0, 3236, 3236, 0, 126720, 0x08e57b90 +0, 3269, 3269, 0, 126720, 0x6f883ab0 +0, 3303, 3303, 0, 126720, 0x934b4dd5 +0, 3336, 3336, 0, 126720, 0x762f108f +0, 3370, 3370, 0, 126720, 0x91ee0f2b +0, 3403, 3403, 0, 126720, 0x9af6e5e8 +0, 3436, 3436, 0, 126720, 0xdcd95e0a +0, 3470, 3470, 0, 126720, 0x22c33a6e +0, 3503, 3503, 0, 126720, 0x21c1b7f4 +0, 3536, 3536, 0, 126720, 0x0a66a1ed +0, 3570, 3570, 0, 126720, 0x53fea81b +0, 3603, 3603, 0, 126720, 0x597f5567 From 8f9537f3145bf245d3f1e8b47c2c7567f537d416 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Wed, 25 Jul 2012 08:25:38 +0200 Subject: [PATCH 25/35] FATE: add tests for yadif. --- tests/fate/filter.mak | 9 +++++ tests/ref/fate/filter-yadif-mode0 | 32 ++++++++++++++++ tests/ref/fate/filter-yadif-mode1 | 63 +++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+) create mode 100644 tests/ref/fate/filter-yadif-mode0 create mode 100644 tests/ref/fate/filter-yadif-mode1 diff --git a/tests/fate/filter.mak b/tests/fate/filter.mak index f1185ed425..a4c125b5d7 100644 --- a/tests/fate/filter.mak +++ b/tests/fate/filter.mak @@ -34,4 +34,13 @@ fate-filter-delogo: CMD = framecrc -i $(SAMPLES)/real/rv30.rm -vf delogo=show=0: FATE_FILTER += fate-filter-delogo FATE_SAMPLES_AVCONV += fate-filter-delogo +FATE_YADIF += fate-filter-yadif-mode0 +fate-filter-yadif-mode0: CMD = framecrc -i $(SAMPLES)/mpeg2/mpeg2_field_encoding.ts -vf yadif=0 + +FATE_YADIF += fate-filter-yadif-mode1 +fate-filter-yadif-mode1: CMD = framecrc -i $(SAMPLES)/mpeg2/mpeg2_field_encoding.ts -vf yadif=1 + +FATE_FILTER += $(FATE_YADIF) +FATE_SAMPLES_AVCONV += $(FATE_YADIF) + fate-filter: $(FATE_FILTER) diff --git a/tests/ref/fate/filter-yadif-mode0 b/tests/ref/fate/filter-yadif-mode0 new file mode 100644 index 0000000000..ca950d0bd1 --- /dev/null +++ b/tests/ref/fate/filter-yadif-mode0 @@ -0,0 +1,32 @@ +#tb 0: 1/180000 +0, 64800, 64800, 0, 622080, 0x1511cae9 +0, 72000, 72000, 0, 622080, 0x6e77e746 +0, 79200, 79200, 0, 622080, 0x89aac777 +0, 86400, 86400, 0, 622080, 0x7e0a9335 +0, 93600, 93600, 0, 622080, 0x5f34759b +0, 100800, 100800, 0, 622080, 0xfac498a6 +0, 108000, 108000, 0, 622080, 0xe60e7a9e +0, 115200, 115200, 0, 622080, 0x44875bbd +0, 122400, 122400, 0, 622080, 0xfa761aab +0, 129600, 129600, 0, 622080, 0x59be119c +0, 136800, 136800, 0, 622080, 0x21316b36 +0, 144000, 144000, 0, 622080, 0x929fde5b +0, 151200, 151200, 0, 622080, 0xfca8990c +0, 158400, 158400, 0, 622080, 0x1ec87d02 +0, 165600, 165600, 0, 622080, 0x5768eea0 +0, 172800, 172800, 0, 622080, 0x1a0894ab +0, 180000, 180000, 0, 622080, 0xb4e61323 +0, 187200, 187200, 0, 622080, 0xb773341a +0, 194400, 194400, 0, 622080, 0x8a914cf7 +0, 201600, 201600, 0, 622080, 0xf1cfbc7d +0, 208800, 208800, 0, 622080, 0xebaeb317 +0, 216000, 216000, 0, 622080, 0xbae9adf4 +0, 223200, 223200, 0, 622080, 0x593544fd +0, 230400, 230400, 0, 622080, 0x2cd8ec0b +0, 237600, 237600, 0, 622080, 0x8032d9d4 +0, 244800, 244800, 0, 622080, 0x5c67ace7 +0, 252000, 252000, 0, 622080, 0x95714528 +0, 259200, 259200, 0, 622080, 0xa11cbed2 +0, 266400, 266400, 0, 622080, 0x7389f8f1 +0, 273600, 273600, 0, 622080, 0xa694f3f2 +0, 280800, 280800, 0, 622080, 0xac3a3d09 diff --git a/tests/ref/fate/filter-yadif-mode1 b/tests/ref/fate/filter-yadif-mode1 new file mode 100644 index 0000000000..0a2f61cb29 --- /dev/null +++ b/tests/ref/fate/filter-yadif-mode1 @@ -0,0 +1,63 @@ +#tb 0: 1/180000 +0, 64800, 64800, 0, 622080, 0x1511cae9 +0, 68400, 68400, 0, 622080, 0xb88ca855 +0, 72000, 72000, 0, 622080, 0x6e77e746 +0, 75600, 75600, 0, 622080, 0x5da19198 +0, 79200, 79200, 0, 622080, 0x89aac777 +0, 82800, 82800, 0, 622080, 0xee31c8a8 +0, 86400, 86400, 0, 622080, 0x7e0a9335 +0, 90000, 90000, 0, 622080, 0xcbb7aac5 +0, 93600, 93600, 0, 622080, 0x5f34759b +0, 97200, 97200, 0, 622080, 0x19972f1a +0, 100800, 100800, 0, 622080, 0xfac498a6 +0, 104400, 104400, 0, 622080, 0xac7d34b9 +0, 108000, 108000, 0, 622080, 0xe60e7a9e +0, 111600, 111600, 0, 622080, 0x4adfe592 +0, 115200, 115200, 0, 622080, 0x44875bbd +0, 118800, 118800, 0, 622080, 0x5d738330 +0, 122400, 122400, 0, 622080, 0xfa761aab +0, 126000, 126000, 0, 622080, 0xb60b4447 +0, 129600, 129600, 0, 622080, 0x59be119c +0, 133200, 133200, 0, 622080, 0x1e11acf4 +0, 136800, 136800, 0, 622080, 0x21316b36 +0, 140400, 140400, 0, 622080, 0x5ed635d0 +0, 144000, 144000, 0, 622080, 0x929fde5b +0, 147600, 147600, 0, 622080, 0x939857af +0, 151200, 151200, 0, 622080, 0xfca8990c +0, 154800, 154800, 0, 622080, 0x530b28fd +0, 158400, 158400, 0, 622080, 0x1ec87d02 +0, 162000, 162000, 0, 622080, 0x3bc0d5d3 +0, 165600, 165600, 0, 622080, 0x5768eea0 +0, 169200, 169200, 0, 622080, 0x77e0fe99 +0, 172800, 172800, 0, 622080, 0x1a0894ab +0, 176400, 176400, 0, 622080, 0xd2151c1e +0, 180000, 180000, 0, 622080, 0xb4e61323 +0, 183600, 183600, 0, 622080, 0xe021a815 +0, 187200, 187200, 0, 622080, 0xb773341a +0, 190800, 190800, 0, 622080, 0xceae4f12 +0, 194400, 194400, 0, 622080, 0x8a914cf7 +0, 198000, 198000, 0, 622080, 0x4c2f3330 +0, 201600, 201600, 0, 622080, 0xf1cfbc7d +0, 205200, 205200, 0, 622080, 0xf534c392 +0, 208800, 208800, 0, 622080, 0xebaeb317 +0, 212400, 212400, 0, 622080, 0x88f01c11 +0, 216000, 216000, 0, 622080, 0xbae9adf4 +0, 219600, 219600, 0, 622080, 0x654d5df2 +0, 223200, 223200, 0, 622080, 0x593544fd +0, 226800, 226800, 0, 622080, 0x89ef6f8a +0, 230400, 230400, 0, 622080, 0x2cd8ec0b +0, 234000, 234000, 0, 622080, 0x78a7b5f1 +0, 237600, 237600, 0, 622080, 0x8032d9d4 +0, 241200, 241200, 0, 622080, 0x8152d67f +0, 244800, 244800, 0, 622080, 0x5c67ace7 +0, 248400, 248400, 0, 622080, 0x6590ff5f +0, 252000, 252000, 0, 622080, 0x95714528 +0, 255600, 255600, 0, 622080, 0x51d2be96 +0, 259200, 259200, 0, 622080, 0xa11cbed2 +0, 262800, 262800, 0, 622080, 0x483f65f7 +0, 266400, 266400, 0, 622080, 0x7389f8f1 +0, 270000, 270000, 0, 622080, 0x7a69143d +0, 273600, 273600, 0, 622080, 0xa694f3f2 +0, 277200, 277200, 0, 622080, 0xeccc58ff +0, 280800, 280800, 0, 622080, 0xac3a3d09 +0, 284400, 284400, 0, 622080, 0xc4d2c370 From b3fa4788231b0e7bf73d6123e39a278d26d1c3e5 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Wed, 25 Jul 2012 08:34:44 +0200 Subject: [PATCH 26/35] lavfi: bump minor to mark stabilizing the ABI. --- libavfilter/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/version.h b/libavfilter/version.h index c1292bb70d..0e72a47916 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -29,7 +29,7 @@ #include "libavutil/avutil.h" #define LIBAVFILTER_VERSION_MAJOR 3 -#define LIBAVFILTER_VERSION_MINOR 0 +#define LIBAVFILTER_VERSION_MINOR 1 #define LIBAVFILTER_VERSION_MICRO 0 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ From 61f8bb74f33931c899482f5cca17ded571b07d4e Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sat, 28 Jul 2012 12:18:51 +0200 Subject: [PATCH 27/35] mp3dec: remove commented out cruft. --- libavformat/mp3dec.c | 1 - 1 file changed, 1 deletion(-) diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c index 39875201e3..4344e8efcf 100644 --- a/libavformat/mp3dec.c +++ b/libavformat/mp3dec.c @@ -186,7 +186,6 @@ static int mp3_read_header(AVFormatContext *s) static int mp3_read_packet(AVFormatContext *s, AVPacket *pkt) { int ret, size; - // AVStream *st = s->streams[0]; size= MP3_PACKET_SIZE; From 67b1156fe8d80c1858a222e85c5be75e0f040ea1 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sat, 28 Jul 2012 12:20:08 +0200 Subject: [PATCH 28/35] mp3dec: remove a pointless local variable. --- libavformat/mp3dec.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c index 4344e8efcf..11e684d7f3 100644 --- a/libavformat/mp3dec.c +++ b/libavformat/mp3dec.c @@ -185,11 +185,9 @@ static int mp3_read_header(AVFormatContext *s) static int mp3_read_packet(AVFormatContext *s, AVPacket *pkt) { - int ret, size; + int ret; - size= MP3_PACKET_SIZE; - - ret= av_get_packet(s->pb, pkt, size); + ret = av_get_packet(s->pb, pkt, MP3_PACKET_SIZE); pkt->stream_index = 0; if (ret <= 0) { From f73e3938ac70524826664855210446c3739c4a5e Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sat, 28 Jul 2012 12:21:21 +0200 Subject: [PATCH 29/35] mp3dec: forward errors for av_get_packet(). Don't invent a bogus EIO error. The code now doesn't check for ret == 0, but that check is redundant, av_get_packet() never returns 0. --- libavformat/mp3dec.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c index 11e684d7f3..a208488930 100644 --- a/libavformat/mp3dec.c +++ b/libavformat/mp3dec.c @@ -188,11 +188,10 @@ static int mp3_read_packet(AVFormatContext *s, AVPacket *pkt) int ret; ret = av_get_packet(s->pb, pkt, MP3_PACKET_SIZE); + if (ret < 0) + return ret; pkt->stream_index = 0; - if (ret <= 0) { - return AVERROR(EIO); - } if (ret > ID3v1_TAG_SIZE && memcmp(&pkt->data[ret - ID3v1_TAG_SIZE], "TAG", 3) == 0) From ccc10acb5b941973acab49d64459bb110cc0a529 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sat, 28 Jul 2012 12:23:04 +0200 Subject: [PATCH 30/35] wv: return AVERROR_EOF on EOF, not EIO. --- libavformat/wv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/wv.c b/libavformat/wv.c index 49ca486a93..0b6a406654 100644 --- a/libavformat/wv.c +++ b/libavformat/wv.c @@ -254,7 +254,7 @@ static int wv_read_packet(AVFormatContext *s, uint32_t block_samples; if (s->pb->eof_reached) - return AVERROR(EIO); + return AVERROR_EOF; if(wc->block_parsed){ if(wv_read_block_header(s, s->pb, 0) < 0) return -1; From c1d865d5633e8e9eb9ee2bfa876fbcae4cd41cfd Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sat, 28 Jul 2012 12:28:05 +0200 Subject: [PATCH 31/35] wv: return meaningful error codes. --- libavformat/wv.c | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/libavformat/wv.c b/libavformat/wv.c index 0b6a406654..b31144907f 100644 --- a/libavformat/wv.c +++ b/libavformat/wv.c @@ -90,17 +90,17 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen if(!append){ tag = avio_rl32(pb); if (tag != MKTAG('w', 'v', 'p', 'k')) - return -1; + return AVERROR_INVALIDDATA; size = avio_rl32(pb); if(size < 24 || size > WV_BLOCK_LIMIT){ av_log(ctx, AV_LOG_ERROR, "Incorrect block size %i\n", size); - return -1; + return AVERROR_INVALIDDATA; } wc->blksize = size; ver = avio_rl16(pb); if(ver < 0x402 || ver > 0x410){ av_log(ctx, AV_LOG_ERROR, "Unsupported version %03X\n", ver); - return -1; + return AVERROR_PATCHWELCOME; } avio_r8(pb); // track no avio_r8(pb); // track sub index @@ -128,7 +128,7 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen int64_t block_end = avio_tell(pb) + wc->blksize - 24; if(!pb->seekable){ av_log(ctx, AV_LOG_ERROR, "Cannot determine additional parameters\n"); - return -1; + return AVERROR_INVALIDDATA; } while(avio_tell(pb) < block_end){ int id, size; @@ -141,7 +141,7 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen case 0xD: if(size <= 1){ av_log(ctx, AV_LOG_ERROR, "Insufficient channel information\n"); - return -1; + return AVERROR_INVALIDDATA; } chan = avio_r8(pb); switch(size - 2){ @@ -164,7 +164,7 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen break; default: av_log(ctx, AV_LOG_ERROR, "Invalid channel info size %d\n", size); - return -1; + return AVERROR_INVALIDDATA; } break; case 0x27: @@ -178,7 +178,7 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen } if(rate == -1){ av_log(ctx, AV_LOG_ERROR, "Cannot determine custom sampling rate\n"); - return -1; + return AVERROR_INVALIDDATA; } avio_seek(pb, block_end - wc->blksize + 24, SEEK_SET); } @@ -189,15 +189,15 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen if(wc->flags && bpp != wc->bpp){ av_log(ctx, AV_LOG_ERROR, "Bits per sample differ, this block: %i, header block: %i\n", bpp, wc->bpp); - return -1; + return AVERROR_INVALIDDATA; } if(wc->flags && !wc->multichannel && chan != wc->chan){ av_log(ctx, AV_LOG_ERROR, "Channels differ, this block: %i, header block: %i\n", chan, wc->chan); - return -1; + return AVERROR_INVALIDDATA; } if(wc->flags && rate != -1 && rate != wc->rate){ av_log(ctx, AV_LOG_ERROR, "Sampling rate differ, this block: %i, header block: %i\n", rate, wc->rate); - return -1; + return AVERROR_INVALIDDATA; } wc->blksize = size - 24; return 0; @@ -208,11 +208,12 @@ static int wv_read_header(AVFormatContext *s) AVIOContext *pb = s->pb; WVContext *wc = s->priv_data; AVStream *st; + int ret; wc->block_parsed = 0; for(;;){ - if(wv_read_block_header(s, pb, 0) < 0) - return -1; + if ((ret = wv_read_block_header(s, pb, 0)) < 0) + return ret; if(!AV_RN32(wc->extra)) avio_skip(pb, wc->blksize - 24); else @@ -222,7 +223,7 @@ static int wv_read_header(AVFormatContext *s) /* now we are ready: build format streams */ st = avformat_new_stream(s, NULL); if (!st) - return -1; + return AVERROR(ENOMEM); st->codec->codec_type = AVMEDIA_TYPE_AUDIO; st->codec->codec_id = CODEC_ID_WAVPACK; st->codec->channels = wc->chan; @@ -256,8 +257,8 @@ static int wv_read_packet(AVFormatContext *s, if (s->pb->eof_reached) return AVERROR_EOF; if(wc->block_parsed){ - if(wv_read_block_header(s, s->pb, 0) < 0) - return -1; + if ((ret = wv_read_block_header(s, s->pb, 0)) < 0) + return ret; } pos = wc->pos; @@ -275,7 +276,7 @@ static int wv_read_packet(AVFormatContext *s, while(!(wc->flags & WV_END_BLOCK)){ if(avio_rl32(s->pb) != MKTAG('w', 'v', 'p', 'k')){ av_free_packet(pkt); - return -1; + return AVERROR_INVALIDDATA; } if((ret = av_append_packet(s->pb, pkt, 4)) < 0){ av_free_packet(pkt); @@ -285,14 +286,14 @@ static int wv_read_packet(AVFormatContext *s, if(size < 24 || size > WV_BLOCK_LIMIT){ av_free_packet(pkt); av_log(s, AV_LOG_ERROR, "Incorrect block size %d\n", size); - return -1; + return AVERROR_INVALIDDATA; } wc->blksize = size; ver = avio_rl16(s->pb); if(ver < 0x402 || ver > 0x410){ av_free_packet(pkt); av_log(s, AV_LOG_ERROR, "Unsupported version %03X\n", ver); - return -1; + return AVERROR_PATCHWELCOME; } avio_r8(s->pb); // track no avio_r8(s->pb); // track sub index @@ -304,9 +305,9 @@ static int wv_read_packet(AVFormatContext *s, } memcpy(wc->extra, pkt->data + pkt->size - WV_EXTRA_SIZE, WV_EXTRA_SIZE); - if(wv_read_block_header(s, s->pb, 1) < 0){ + if ((ret = wv_read_block_header(s, s->pb, 1)) < 0){ av_free_packet(pkt); - return -1; + return ret; } ret = av_append_packet(s->pb, pkt, wc->blksize); if(ret < 0){ @@ -345,14 +346,14 @@ static int wv_read_seek(AVFormatContext *s, int stream_index, int64_t timestamp, } /* if timestamp is out of bounds, return error */ if(timestamp < 0 || timestamp >= s->duration) - return -1; + return AVERROR(EINVAL); pos = avio_tell(s->pb); do{ ret = av_read_frame(s, pkt); if (ret < 0){ avio_seek(s->pb, pos, SEEK_SET); - return -1; + return ret; } pts = pkt->pts; av_free_packet(pkt); From b3c5ae5607275f691289df737edaf47c72e6028c Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sat, 28 Jul 2012 08:20:19 -0700 Subject: [PATCH 32/35] fft: rename "z" to "zc" to prevent name collision. Without this, cglobal will expand "z" to "zh" to access the high byte in a register's word, which causes a name collision with the ZH(x) macro further up in this file. --- libavcodec/x86/fft_mmx.asm | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm index 527e215522..5c6583b3b7 100644 --- a/libavcodec/x86/fft_mmx.asm +++ b/libavcodec/x86/fft_mmx.asm @@ -516,23 +516,23 @@ INIT_MMX 3dnow FFT48_3DN -%define Z(x) [zq + o1q*(x&6) + mmsize*(x&1)] -%define Z2(x) [zq + o3q + mmsize*(x&1)] -%define ZH(x) [zq + o1q*(x&6) + mmsize*(x&1) + mmsize/2] -%define Z2H(x) [zq + o3q + mmsize*(x&1) + mmsize/2] +%define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)] +%define Z2(x) [zcq + o3q + mmsize*(x&1)] +%define ZH(x) [zcq + o1q*(x&6) + mmsize*(x&1) + mmsize/2] +%define Z2H(x) [zcq + o3q + mmsize*(x&1) + mmsize/2] %macro DECL_PASS 2+ ; name, payload align 16 %1: -DEFINE_ARGS z, w, n, o1, o3 +DEFINE_ARGS zc, w, n, o1, o3 lea o3q, [nq*3] lea o1q, [nq*8] shl o3q, 4 .loop: %2 - add zq, mmsize*2 - add wq, mmsize - sub nd, mmsize/8 + add zcq, mmsize*2 + add wq, mmsize + sub nd, mmsize/8 jg .loop rep ret %endmacro @@ -747,7 +747,7 @@ section .text ; On x86_32, this function does the register saving and restoring for all of fft. ; The others pass args in registers and don't spill anything. -cglobal fft_dispatch%2, 2,5,8, z, nbits +cglobal fft_dispatch%2, 2,5,8, zc, nbits FFT_DISPATCH fullsuffix, nbits RET %endmacro ; DECL_FFT From 96c9cc10947a374b99f56970959cdedbbb1e4331 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 26 Jul 2012 20:26:17 -0700 Subject: [PATCH 33/35] x86inc: sync to latest version from x264. --- libavutil/x86/x86inc.asm | 216 ++++++++++++++++++++++----------------- 1 file changed, 124 insertions(+), 92 deletions(-) diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 4b4a19b208..1030f10a32 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -36,8 +36,8 @@ %define program_name ff -%define UNIX64 0 %define WIN64 0 +%define UNIX64 0 %if ARCH_X86_64 %ifidn __OUTPUT_FORMAT__,win32 %define WIN64 1 @@ -54,11 +54,6 @@ %define mangle(x) x %endif -; FIXME: All of the 64bit asm functions that take a stride as an argument -; via register, assume that the high dword of that register is filled with 0. -; This is true in practice (since we never do any 64bit arithmetic on strides, -; and x264's strides are all positive), but is not guaranteed by the ABI. - ; Name of the .rodata section. ; Kludge: Something on OS X fails to align .rodata even given an align attribute, ; so use a different read-only section. @@ -129,34 +124,38 @@ CPU amdnop ; registers: ; rN and rNq are the native-size register holding function argument N ; rNd, rNw, rNb are dword, word, and byte size +; rNh is the high 8 bits of the word size ; rNm is the original location of arg N (a register or on the stack), dword ; rNmp is native size -%macro DECLARE_REG 5-6 +%macro DECLARE_REG 2-3 %define r%1q %2 - %define r%1d %3 - %define r%1w %4 - %define r%1b %5 - %if %0 == 5 - %define r%1m %3 + %define r%1d %2d + %define r%1w %2w + %define r%1b %2b + %define r%1h %2h + %if %0 == 2 + %define r%1m %2d %define r%1mp %2 %elif ARCH_X86_64 ; memory - %define r%1m [rsp + stack_offset + %6] + %define r%1m [rsp + stack_offset + %3] %define r%1mp qword r %+ %1m %else - %define r%1m [esp + stack_offset + %6] + %define r%1m [esp + stack_offset + %3] %define r%1mp dword r %+ %1m %endif %define r%1 %2 %endmacro -%macro DECLARE_REG_SIZE 2 +%macro DECLARE_REG_SIZE 3 %define r%1q r%1 %define e%1q r%1 %define r%1d e%1 %define e%1d e%1 %define r%1w %1 %define e%1w %1 + %define r%1h %3 + %define e%1h %3 %define r%1b %2 %define e%1b %2 %if ARCH_X86_64 == 0 @@ -164,13 +163,13 @@ CPU amdnop %endif %endmacro -DECLARE_REG_SIZE ax, al -DECLARE_REG_SIZE bx, bl -DECLARE_REG_SIZE cx, cl -DECLARE_REG_SIZE dx, dl -DECLARE_REG_SIZE si, sil -DECLARE_REG_SIZE di, dil -DECLARE_REG_SIZE bp, bpl +DECLARE_REG_SIZE ax, al, ah +DECLARE_REG_SIZE bx, bl, bh +DECLARE_REG_SIZE cx, cl, ch +DECLARE_REG_SIZE dx, dl, dh +DECLARE_REG_SIZE si, sil, null +DECLARE_REG_SIZE di, dil, null +DECLARE_REG_SIZE bp, bpl, null ; t# defines for when per-arch register allocation is more complex than just function arguments @@ -188,6 +187,7 @@ DECLARE_REG_SIZE bp, bpl %define t%1q t%1 %+ q %define t%1d t%1 %+ d %define t%1w t%1 %+ w + %define t%1h t%1 %+ h %define t%1b t%1 %+ b %rotate 1 %endrep @@ -277,6 +277,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 CAT_UNDEF arg_name %+ %%i, q CAT_UNDEF arg_name %+ %%i, d CAT_UNDEF arg_name %+ %%i, w + CAT_UNDEF arg_name %+ %%i, h CAT_UNDEF arg_name %+ %%i, b CAT_UNDEF arg_name %+ %%i, m CAT_UNDEF arg_name %+ %%i, mp @@ -292,6 +293,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 %xdefine %1q r %+ %%i %+ q %xdefine %1d r %+ %%i %+ d %xdefine %1w r %+ %%i %+ w + %xdefine %1h r %+ %%i %+ h %xdefine %1b r %+ %%i %+ b %xdefine %1m r %+ %%i %+ m %xdefine %1mp r %+ %%i %+ mp @@ -305,21 +307,21 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 %if WIN64 ; Windows x64 ;================================================= -DECLARE_REG 0, rcx, ecx, cx, cl -DECLARE_REG 1, rdx, edx, dx, dl -DECLARE_REG 2, R8, R8D, R8W, R8B -DECLARE_REG 3, R9, R9D, R9W, R9B -DECLARE_REG 4, R10, R10D, R10W, R10B, 40 -DECLARE_REG 5, R11, R11D, R11W, R11B, 48 -DECLARE_REG 6, rax, eax, ax, al, 56 -DECLARE_REG 7, rdi, edi, di, dil, 64 -DECLARE_REG 8, rsi, esi, si, sil, 72 -DECLARE_REG 9, rbx, ebx, bx, bl, 80 -DECLARE_REG 10, rbp, ebp, bp, bpl, 88 -DECLARE_REG 11, R12, R12D, R12W, R12B, 96 -DECLARE_REG 12, R13, R13D, R13W, R13B, 104 -DECLARE_REG 13, R14, R14D, R14W, R14B, 112 -DECLARE_REG 14, R15, R15D, R15W, R15B, 120 +DECLARE_REG 0, rcx +DECLARE_REG 1, rdx +DECLARE_REG 2, R8 +DECLARE_REG 3, R9 +DECLARE_REG 4, R10, 40 +DECLARE_REG 5, R11, 48 +DECLARE_REG 6, rax, 56 +DECLARE_REG 7, rdi, 64 +DECLARE_REG 8, rsi, 72 +DECLARE_REG 9, rbx, 80 +DECLARE_REG 10, rbp, 88 +DECLARE_REG 11, R12, 96 +DECLARE_REG 12, R13, 104 +DECLARE_REG 13, R14, 112 +DECLARE_REG 14, R15, 120 %macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names... %assign num_args %1 @@ -366,6 +368,8 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120 %assign xmm_regs_used 0 %endmacro +%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32 + %macro RET 0 WIN64_RESTORE_XMM_INTERNAL rsp POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7 @@ -375,31 +379,23 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120 ret %endmacro -%macro REP_RET 0 - %if regs_used > 7 || xmm_regs_used > 6 || mmsize == 32 - RET - %else - rep ret - %endif -%endmacro - %elif ARCH_X86_64 ; *nix x64 ;============================================= -DECLARE_REG 0, rdi, edi, di, dil -DECLARE_REG 1, rsi, esi, si, sil -DECLARE_REG 2, rdx, edx, dx, dl -DECLARE_REG 3, rcx, ecx, cx, cl -DECLARE_REG 4, R8, R8D, R8W, R8B -DECLARE_REG 5, R9, R9D, R9W, R9B -DECLARE_REG 6, rax, eax, ax, al, 8 -DECLARE_REG 7, R10, R10D, R10W, R10B, 16 -DECLARE_REG 8, R11, R11D, R11W, R11B, 24 -DECLARE_REG 9, rbx, ebx, bx, bl, 32 -DECLARE_REG 10, rbp, ebp, bp, bpl, 40 -DECLARE_REG 11, R12, R12D, R12W, R12B, 48 -DECLARE_REG 12, R13, R13D, R13W, R13B, 56 -DECLARE_REG 13, R14, R14D, R14W, R14B, 64 -DECLARE_REG 14, R15, R15D, R15W, R15B, 72 +DECLARE_REG 0, rdi +DECLARE_REG 1, rsi +DECLARE_REG 2, rdx +DECLARE_REG 3, rcx +DECLARE_REG 4, R8 +DECLARE_REG 5, R9 +DECLARE_REG 6, rax, 8 +DECLARE_REG 7, R10, 16 +DECLARE_REG 8, R11, 24 +DECLARE_REG 9, rbx, 32 +DECLARE_REG 10, rbp, 40 +DECLARE_REG 11, R12, 48 +DECLARE_REG 12, R13, 56 +DECLARE_REG 13, R14, 64 +DECLARE_REG 14, R15, 72 %macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names... %assign num_args %1 @@ -411,6 +407,8 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72 DEFINE_ARGS %4 %endmacro +%define has_epilogue regs_used > 9 || mmsize == 32 + %macro RET 0 POP_IF_USED 14, 13, 12, 11, 10, 9 %if mmsize == 32 @@ -419,23 +417,15 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72 ret %endmacro -%macro REP_RET 0 - %if regs_used > 9 || mmsize == 32 - RET - %else - rep ret - %endif -%endmacro - %else ; X86_32 ;============================================================== -DECLARE_REG 0, eax, eax, ax, al, 4 -DECLARE_REG 1, ecx, ecx, cx, cl, 8 -DECLARE_REG 2, edx, edx, dx, dl, 12 -DECLARE_REG 3, ebx, ebx, bx, bl, 16 -DECLARE_REG 4, esi, esi, si, null, 20 -DECLARE_REG 5, edi, edi, di, null, 24 -DECLARE_REG 6, ebp, ebp, bp, null, 28 +DECLARE_REG 0, eax, 4 +DECLARE_REG 1, ecx, 8 +DECLARE_REG 2, edx, 12 +DECLARE_REG 3, ebx, 16 +DECLARE_REG 4, esi, 20 +DECLARE_REG 5, edi, 24 +DECLARE_REG 6, ebp, 28 %define rsp esp %macro DECLARE_ARG 1-* @@ -460,6 +450,8 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 DEFINE_ARGS %4 %endmacro +%define has_epilogue regs_used > 3 || mmsize == 32 + %macro RET 0 POP_IF_USED 6, 5, 4, 3 %if mmsize == 32 @@ -468,14 +460,6 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 ret %endmacro -%macro REP_RET 0 - %if regs_used > 3 || mmsize == 32 - RET - %else - rep ret - %endif -%endmacro - %endif ;====================================================================== %if WIN64 == 0 @@ -485,6 +469,23 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 %endmacro %endif +%macro REP_RET 0 + %if has_epilogue + RET + %else + rep ret + %endif +%endmacro + +%macro TAIL_CALL 2 ; callee, is_nonadjacent + %if has_epilogue + call %1 + RET + %elif %2 + jmp %1 + %endif +%endmacro + ;============================================================================= ; arch-independent part ;============================================================================= @@ -564,6 +565,8 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %assign cpuflags_avx (1<<11)| cpuflags_sse42 %assign cpuflags_xop (1<<12)| cpuflags_avx %assign cpuflags_fma4 (1<<13)| cpuflags_avx +%assign cpuflags_avx2 (1<<14)| cpuflags_avx +%assign cpuflags_fma3 (1<<15)| cpuflags_avx %assign cpuflags_cache32 (1<<16) %assign cpuflags_cache64 (1<<17) @@ -572,6 +575,9 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %assign cpuflags_misalign (1<<20) %assign cpuflags_aligned (1<<21) ; not a cpu feature, but a function variant %assign cpuflags_atom (1<<22) +%assign cpuflags_bmi1 (1<<23) +%assign cpuflags_bmi2 (1<<24)|cpuflags_bmi1 +%assign cpuflags_tbm (1<<25)|cpuflags_bmi1 %define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x)) %define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x)) @@ -833,25 +839,38 @@ INIT_XMM %endrep %undef i +%macro CHECK_AVX_INSTR_EMU 3-* + %xdefine %%opcode %1 + %xdefine %%dst %2 + %rep %0-2 + %ifidn %%dst, %3 + %error non-avx emulation of ``%%opcode'' is not supported + %endif + %rotate 1 + %endrep +%endmacro + ;%1 == instruction ;%2 == 1 if float, 0 if int ;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm) ;%4 == number of operands given ;%5+: operands %macro RUN_AVX_INSTR 6-7+ - %ifid %5 - %define %%size sizeof%5 + %ifid %6 + %define %%sizeofreg sizeof%6 + %elifid %5 + %define %%sizeofreg sizeof%5 %else - %define %%size mmsize + %define %%sizeofreg mmsize %endif - %if %%size==32 - %if %0 >= 7 + %if %%sizeofreg==32 + %if %4>=3 v%1 %5, %6, %7 %else v%1 %5, %6 %endif %else - %if %%size==8 + %if %%sizeofreg==8 %define %%regmov movq %elif %2 %define %%regmov movaps @@ -861,16 +880,17 @@ INIT_XMM %if %4>=3+%3 %ifnidn %5, %6 - %if avx_enabled && sizeof%5==16 + %if avx_enabled && %%sizeofreg==16 v%1 %5, %6, %7 %else + CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7 %%regmov %5, %6 %1 %5, %7 %endif %else %1 %5, %7 %endif - %elif %3 + %elif %4>=3 %1 %5, %6, %7 %else %1 %5, %6 @@ -901,7 +921,7 @@ INIT_XMM ;%1 == instruction ;%2 == 1 if float, 0 if int -;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 3-operand (xmm, xmm, xmm) +;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm) ;%4 == 1 if symmetric (i.e. doesn't matter which src arg is which), 0 if not %macro AVX_INSTR 4 %macro %1 2-9 fnord, fnord, fnord, %1, %2, %3, %4 @@ -966,6 +986,9 @@ AVX_INSTR mulsd, 1, 0, 1 AVX_INSTR mulss, 1, 0, 1 AVX_INSTR orpd, 1, 0, 1 AVX_INSTR orps, 1, 0, 1 +AVX_INSTR pabsb, 0, 0, 0 +AVX_INSTR pabsw, 0, 0, 0 +AVX_INSTR pabsd, 0, 0, 0 AVX_INSTR packsswb, 0, 0, 0 AVX_INSTR packssdw, 0, 0, 0 AVX_INSTR packuswb, 0, 0, 0 @@ -1017,6 +1040,7 @@ AVX_INSTR pminsd, 0, 0, 1 AVX_INSTR pminub, 0, 0, 1 AVX_INSTR pminuw, 0, 0, 1 AVX_INSTR pminud, 0, 0, 1 +AVX_INSTR pmovmskb, 0, 0, 0 AVX_INSTR pmulhuw, 0, 0, 1 AVX_INSTR pmulhrsw, 0, 0, 1 AVX_INSTR pmulhw, 0, 0, 1 @@ -1027,6 +1051,9 @@ AVX_INSTR pmuldq, 0, 0, 1 AVX_INSTR por, 0, 0, 1 AVX_INSTR psadbw, 0, 0, 1 AVX_INSTR pshufb, 0, 0, 0 +AVX_INSTR pshufd, 0, 1, 0 +AVX_INSTR pshufhw, 0, 1, 0 +AVX_INSTR pshuflw, 0, 1, 0 AVX_INSTR psignb, 0, 0, 0 AVX_INSTR psignw, 0, 0, 0 AVX_INSTR psignd, 0, 0, 0 @@ -1048,6 +1075,7 @@ AVX_INSTR psubsb, 0, 0, 0 AVX_INSTR psubsw, 0, 0, 0 AVX_INSTR psubusb, 0, 0, 0 AVX_INSTR psubusw, 0, 0, 0 +AVX_INSTR ptest, 0, 0, 0 AVX_INSTR punpckhbw, 0, 0, 0 AVX_INSTR punpckhwd, 0, 0, 0 AVX_INSTR punpckhdq, 0, 0, 0 @@ -1112,3 +1140,7 @@ FMA_INSTR fmaddps, mulps, addps FMA_INSTR pmacsdd, pmulld, paddd FMA_INSTR pmacsww, pmullw, paddw FMA_INSTR pmadcswd, pmaddwd, paddd + +; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf. +; This lets us use tzcnt without bumping the yasm version requirement yet. +%define tzcnt rep bsf From f8d8fe255d15f3f4a5b793234ae1a59cf055ae7c Mon Sep 17 00:00:00 2001 From: Loren Merritt Date: Fri, 27 Jul 2012 09:33:41 -0700 Subject: [PATCH 34/35] x86inc: clip num_args to 7 on x86-32. This allows us to unconditionally set the cglobal num_args parameter to a bigger value, thus making writing yasm code even easier than before. Signed-off-by: Ronald S. Bultje --- libavutil/x86/x86inc.asm | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 1030f10a32..7a75951cf6 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -441,6 +441,9 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 %macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names... %assign num_args %1 %assign regs_used %2 + %if num_args > 7 + %assign num_args 7 + %endif %if regs_used > 7 %assign regs_used 7 %endif From c83f44dba11930744e167856b48fbc24a8ff0e63 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sat, 28 Jul 2012 08:01:12 -0700 Subject: [PATCH 35/35] h264_idct_10bit: port x86 assembly to cpuflags. --- libavcodec/x86/h264_idct_10bit.asm | 254 ++++++++++++++--------------- 1 file changed, 127 insertions(+), 127 deletions(-) diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm index 934a7ff633..2aab9864d6 100644 --- a/libavcodec/x86/h264_idct_10bit.asm +++ b/libavcodec/x86/h264_idct_10bit.asm @@ -72,25 +72,25 @@ SECTION .text STORE_DIFFx2 m2, m3, m4, m5, %1, %3 %endmacro -%macro IDCT_ADD_10 1 -cglobal h264_idct_add_10_%1, 3,3 +%macro IDCT_ADD_10 0 +cglobal h264_idct_add_10, 3,3 IDCT4_ADD_10 r0, r1, r2 RET %endmacro -INIT_XMM -IDCT_ADD_10 sse2 +INIT_XMM sse2 +IDCT_ADD_10 %if HAVE_AVX -INIT_AVX -IDCT_ADD_10 avx +INIT_XMM avx +IDCT_ADD_10 %endif ;----------------------------------------------------------------------------- ; h264_idct_add16(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8]) ;----------------------------------------------------------------------------- ;;;;;;; NO FATE SAMPLES TRIGGER THIS -%macro ADD4x4IDCT 1 -add4x4_idct_%1: +%macro ADD4x4IDCT 0 +add4x4_idct %+ SUFFIX: add r5, r0 mova m0, [r2+ 0] mova m1, [r2+16] @@ -107,52 +107,52 @@ add4x4_idct_%1: ret %endmacro -INIT_XMM +INIT_XMM sse2 ALIGN 16 -ADD4x4IDCT sse2 +ADD4x4IDCT %if HAVE_AVX -INIT_AVX +INIT_XMM avx ALIGN 16 -ADD4x4IDCT avx +ADD4x4IDCT %endif -%macro ADD16_OP 3 - cmp byte [r4+%3], 0 - jz .skipblock%2 - mov r5d, [r1+%2*4] - call add4x4_idct_%1 -.skipblock%2: -%if %2<15 +%macro ADD16_OP 2 + cmp byte [r4+%2], 0 + jz .skipblock%1 + mov r5d, [r1+%1*4] + call add4x4_idct %+ SUFFIX +.skipblock%1: +%if %1<15 add r2, 64 %endif %endmacro -%macro IDCT_ADD16_10 1 -cglobal h264_idct_add16_10_%1, 5,6 - ADD16_OP %1, 0, 4+1*8 - ADD16_OP %1, 1, 5+1*8 - ADD16_OP %1, 2, 4+2*8 - ADD16_OP %1, 3, 5+2*8 - ADD16_OP %1, 4, 6+1*8 - ADD16_OP %1, 5, 7+1*8 - ADD16_OP %1, 6, 6+2*8 - ADD16_OP %1, 7, 7+2*8 - ADD16_OP %1, 8, 4+3*8 - ADD16_OP %1, 9, 5+3*8 - ADD16_OP %1, 10, 4+4*8 - ADD16_OP %1, 11, 5+4*8 - ADD16_OP %1, 12, 6+3*8 - ADD16_OP %1, 13, 7+3*8 - ADD16_OP %1, 14, 6+4*8 - ADD16_OP %1, 15, 7+4*8 +%macro IDCT_ADD16_10 0 +cglobal h264_idct_add16_10, 5,6 + ADD16_OP 0, 4+1*8 + ADD16_OP 1, 5+1*8 + ADD16_OP 2, 4+2*8 + ADD16_OP 3, 5+2*8 + ADD16_OP 4, 6+1*8 + ADD16_OP 5, 7+1*8 + ADD16_OP 6, 6+2*8 + ADD16_OP 7, 7+2*8 + ADD16_OP 8, 4+3*8 + ADD16_OP 9, 5+3*8 + ADD16_OP 10, 4+4*8 + ADD16_OP 11, 5+4*8 + ADD16_OP 12, 6+3*8 + ADD16_OP 13, 7+3*8 + ADD16_OP 14, 6+4*8 + ADD16_OP 15, 7+4*8 REP_RET %endmacro -INIT_XMM -IDCT_ADD16_10 sse2 +INIT_XMM sse2 +IDCT_ADD16_10 %if HAVE_AVX -INIT_AVX -IDCT_ADD16_10 avx +INIT_XMM avx +IDCT_ADD16_10 %endif ;----------------------------------------------------------------------------- @@ -185,8 +185,8 @@ IDCT_ADD16_10 avx mova [%1+%3 ], m4 %endmacro -INIT_MMX -cglobal h264_idct_dc_add_10_mmx2,3,3 +INIT_MMX mmx2 +cglobal h264_idct_dc_add_10,3,3 movd m0, [r1] paddd m0, [pd_32] psrad m0, 6 @@ -199,8 +199,8 @@ cglobal h264_idct_dc_add_10_mmx2,3,3 ;----------------------------------------------------------------------------- ; void h264_idct8_dc_add(pixel *dst, dctcoef *block, int stride) ;----------------------------------------------------------------------------- -%macro IDCT8_DC_ADD 1 -cglobal h264_idct8_dc_add_10_%1,3,3,7 +%macro IDCT8_DC_ADD 0 +cglobal h264_idct8_dc_add_10,3,3,7 mov r1d, [r1] add r1, 32 sar r1, 6 @@ -214,45 +214,45 @@ cglobal h264_idct8_dc_add_10_%1,3,3,7 RET %endmacro -INIT_XMM -IDCT8_DC_ADD sse2 +INIT_XMM sse2 +IDCT8_DC_ADD %if HAVE_AVX -INIT_AVX -IDCT8_DC_ADD avx +INIT_XMM avx +IDCT8_DC_ADD %endif ;----------------------------------------------------------------------------- ; h264_idct_add16intra(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8]) ;----------------------------------------------------------------------------- -%macro AC 2 -.ac%2 - mov r5d, [r1+(%2+0)*4] - call add4x4_idct_%1 - mov r5d, [r1+(%2+1)*4] +%macro AC 1 +.ac%1 + mov r5d, [r1+(%1+0)*4] + call add4x4_idct %+ SUFFIX + mov r5d, [r1+(%1+1)*4] add r2, 64 - call add4x4_idct_%1 + call add4x4_idct %+ SUFFIX add r2, 64 - jmp .skipadd%2 + jmp .skipadd%1 %endmacro %assign last_block 16 -%macro ADD16_OP_INTRA 3 - cmp word [r4+%3], 0 - jnz .ac%2 +%macro ADD16_OP_INTRA 2 + cmp word [r4+%2], 0 + jnz .ac%1 mov r5d, [r2+ 0] or r5d, [r2+64] - jz .skipblock%2 - mov r5d, [r1+(%2+0)*4] - call idct_dc_add_%1 -.skipblock%2: -%if %2