diff --git a/configure b/configure index 2ea8a02a43..0e7e94c0e7 100755 --- a/configure +++ b/configure @@ -2927,8 +2927,8 @@ enabled libvpx && { enabled libvpx_encoder && { check_lib2 "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_enc_init_ver VPX_CQ" -lvpx || die "ERROR: libvpx encoder version must be >=0.9.6"; } } enabled libx264 && require libx264 x264.h x264_encoder_encode -lx264 && - { check_cpp_condition x264.h "X264_BUILD >= 99" || - die "ERROR: libx264 version must be >= 0.99."; } + { check_cpp_condition x264.h "X264_BUILD >= 115" || + die "ERROR: libx264 version must be >= 0.115."; } enabled libxavs && require libxavs xavs.h xavs_encoder_encode -lxavs enabled libxvid && require libxvid xvid.h xvid_global -lxvidcore enabled mlib && require mediaLib mlib_types.h mlib_VectorSub_S16_U8_Mod -lmlib diff --git a/ffpresets/libx264-lossless_fast.ffpreset b/ffpresets/libx264-lossless_fast.ffpreset index b7696b5bcb..49b9ed1add 100644 --- a/ffpresets/libx264-lossless_fast.ffpreset +++ b/ffpresets/libx264-lossless_fast.ffpreset @@ -1,5 +1,5 @@ coder=0 -flags=+loop +flags=+loop+cgop cmp=+chroma partitions=-parti8x8+parti4x4+partp8x8-partp4x4-partb8x8 me_method=hex diff --git a/ffpresets/libx264-lossless_max.ffpreset b/ffpresets/libx264-lossless_max.ffpreset index 75c387f162..f32d7b40c6 100644 --- a/ffpresets/libx264-lossless_max.ffpreset +++ b/ffpresets/libx264-lossless_max.ffpreset @@ -1,5 +1,5 @@ coder=1 -flags=+loop +flags=+loop+cgop cmp=+chroma partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8 me_method=esa diff --git a/ffpresets/libx264-lossless_medium.ffpreset b/ffpresets/libx264-lossless_medium.ffpreset index 116e3343ce..0b84612fcb 100644 --- a/ffpresets/libx264-lossless_medium.ffpreset +++ b/ffpresets/libx264-lossless_medium.ffpreset @@ -1,5 +1,5 @@ coder=1 -flags=+loop +flags=+loop+cgop cmp=+chroma partitions=-parti8x8+parti4x4+partp8x8+partp4x4-partb8x8 me_method=hex diff --git a/ffpresets/libx264-lossless_slow.ffpreset b/ffpresets/libx264-lossless_slow.ffpreset index 0d496f6e29..857d3d1986 100644 --- a/ffpresets/libx264-lossless_slow.ffpreset +++ b/ffpresets/libx264-lossless_slow.ffpreset @@ -1,5 +1,5 @@ coder=1 -flags=+loop +flags=+loop+cgop cmp=+chroma partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8 me_method=umh diff --git a/ffpresets/libx264-lossless_slower.ffpreset b/ffpresets/libx264-lossless_slower.ffpreset index 672e0cd637..ef0609f1b6 100644 --- a/ffpresets/libx264-lossless_slower.ffpreset +++ b/ffpresets/libx264-lossless_slower.ffpreset @@ -1,5 +1,5 @@ coder=1 -flags=+loop +flags=+loop+cgop cmp=+chroma partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8 me_method=umh diff --git a/ffpresets/libx264-lossless_ultrafast.ffpreset b/ffpresets/libx264-lossless_ultrafast.ffpreset index a2eda65edf..4cc84f1b4f 100644 --- a/ffpresets/libx264-lossless_ultrafast.ffpreset +++ b/ffpresets/libx264-lossless_ultrafast.ffpreset @@ -1,5 +1,5 @@ coder=0 -flags=+loop +flags=+loop+cgop cmp=+chroma partitions=-parti8x8-parti4x4-partp8x8-partp4x4-partb8x8 me_method=dia diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 2b00575463..6723118693 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -28,7 +28,7 @@ OBJS-$(CONFIG_AANDCT) += aandcttab.o OBJS-$(CONFIG_AC3DSP) += ac3dsp.o OBJS-$(CONFIG_CRYSTALHD) += crystalhd.o OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o -OBJS-$(CONFIG_DCT) += dct.o +OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o OBJS-$(CONFIG_DWT) += dwt.o OBJS-$(CONFIG_DXVA2) += dxva2.o FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o cos_fixed_tables.o diff --git a/libavcodec/dct.c b/libavcodec/dct.c index ef3cd50a79..c30cff664e 100644 --- a/libavcodec/dct.c +++ b/libavcodec/dct.c @@ -30,9 +30,7 @@ #include #include "libavutil/mathematics.h" #include "dct.h" - -#define DCT32_FLOAT -#include "dct32.c" +#include "dct32.h" /* sin((M_PI * x / (2*n)) */ #define SIN(s,n,x) (s->costab[(n) - (x)]) @@ -210,7 +208,7 @@ av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse) } } - s->dct32 = dct32; + s->dct32 = ff_dct32_float; if (HAVE_MMX) ff_dct_init_mmx(s); return 0; diff --git a/libavcodec/dct32.c b/libavcodec/dct32.c index 4e843ee832..fb53d53ab1 100644 --- a/libavcodec/dct32.c +++ b/libavcodec/dct32.c @@ -19,10 +19,19 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifdef DCT32_FLOAT +#include "dct32.h" +#include "mathops.h" + +#if DCT32_FLOAT +# define dct32 ff_dct32_float # define FIXHR(x) ((float)(x)) # define MULH3(x, y, s) ((s)*(y)*(x)) # define INTFLOAT float +#else +# define dct32 ff_dct32_fixed +# define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) +# define MULH3(x, y, s) MULH((s)*(x), y) +# define INTFLOAT int #endif @@ -103,7 +112,7 @@ #define ADD(a, b) val##a += val##b /* DCT32 without 1/sqrt(2) coef zero scaling. */ -static void dct32(INTFLOAT *out, const INTFLOAT *tab) +void dct32(INTFLOAT *out, const INTFLOAT *tab) { INTFLOAT tmp0, tmp1; diff --git a/libavcodec/dct32.h b/libavcodec/dct32.h new file mode 100644 index 0000000000..110338d25c --- /dev/null +++ b/libavcodec/dct32.h @@ -0,0 +1,25 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DCT32_H +#define AVCODEC_DCT32_H + +void ff_dct32_float(float *dst, const float *src); +void ff_dct32_fixed(int *dst, const int *src); + +#endif diff --git a/libavcodec/dct32_fixed.c b/libavcodec/dct32_fixed.c new file mode 100644 index 0000000000..7eb9dc1a53 --- /dev/null +++ b/libavcodec/dct32_fixed.c @@ -0,0 +1,20 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define DCT32_FLOAT 0 +#include "dct32.c" diff --git a/libavcodec/dct32_float.c b/libavcodec/dct32_float.c new file mode 100644 index 0000000000..727ec3caca --- /dev/null +++ b/libavcodec/dct32_float.c @@ -0,0 +1,20 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define DCT32_FLOAT 1 +#include "dct32.c" diff --git a/libavcodec/iirfilter.c b/libavcodec/iirfilter.c index bc63c3991a..27461fb389 100644 --- a/libavcodec/iirfilter.c +++ b/libavcodec/iirfilter.c @@ -324,7 +324,7 @@ int main(void) int i; FILE* fd; - fcoeffs = ff_iir_filter_init_coeffs(FF_FILTER_TYPE_BUTTERWORTH, + fcoeffs = ff_iir_filter_init_coeffs(NULL, FF_FILTER_TYPE_BUTTERWORTH, FF_FILTER_MODE_LOWPASS, FILT_ORDER, cutoff_coeff, 0.0, 0.0); fstate = ff_iir_filter_init_state(FILT_ORDER); diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c index 838cb703e8..eae21fe2bb 100644 --- a/libavcodec/libx264.c +++ b/libavcodec/libx264.c @@ -367,6 +367,8 @@ static av_cold int X264_init(AVCodecContext *avctx) x4->params.b_interlaced = avctx->flags & CODEC_FLAG_INTERLACED_DCT; + x4->params.b_open_gop = !(avctx->flags & CODEC_FLAG_CLOSED_GOP); + x4->params.i_slice_count = avctx->slices; x4->params.vui.b_fullrange = avctx->pix_fmt == PIX_FMT_YUVJ420P; diff --git a/libavcodec/mpc.c b/libavcodec/mpc.c index d9a1fb776a..ca4c3d0dcb 100644 --- a/libavcodec/mpc.c +++ b/libavcodec/mpc.c @@ -36,7 +36,7 @@ void ff_mpc_init(void) { - ff_mpa_synth_init(ff_mpa_synth_window); + ff_mpa_synth_init_fixed(ff_mpa_synth_window_fixed); } /** @@ -51,8 +51,8 @@ static void mpc_synth(MPCContext *c, int16_t *out, int channels) for(ch = 0; ch < channels; ch++){ samples_ptr = samples + ch; for(i = 0; i < SAMPLES_PER_BAND; i++) { - ff_mpa_synth_filter(c->synth_buf[ch], &(c->synth_buf_offset[ch]), - ff_mpa_synth_window, &dither_state, + ff_mpa_synth_filter_fixed(c->synth_buf[ch], &(c->synth_buf_offset[ch]), + ff_mpa_synth_window_fixed, &dither_state, samples_ptr, channels, c->sb_samples[ch][i]); samples_ptr += 32 * channels; diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h index 138085366f..005598797d 100644 --- a/libavcodec/mpegaudio.h +++ b/libavcodec/mpegaudio.h @@ -158,9 +158,9 @@ typedef struct HuffTable { int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf); int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate); -extern MPA_INT ff_mpa_synth_window[]; -void ff_mpa_synth_init(MPA_INT *window); -void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, +extern MPA_INT ff_mpa_synth_window_fixed[]; +void ff_mpa_synth_init_fixed(MPA_INT *window); +void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset, MPA_INT *window, int *dither_state, OUT_INT *samples, int incr, INTFLOAT sb_samples[SBLIMIT]); diff --git a/libavcodec/mpegaudio_tablegen.c b/libavcodec/mpegaudio_tablegen.c index 0888e78620..90c9de430a 100644 --- a/libavcodec/mpegaudio_tablegen.c +++ b/libavcodec/mpegaudio_tablegen.c @@ -33,9 +33,9 @@ int main(void) WRITE_ARRAY("static const", int8_t, table_4_3_exp); WRITE_ARRAY("static const", uint32_t, table_4_3_value); - WRITE_ARRAY("static const", uint32_t, exp_table); + WRITE_ARRAY("static const", uint32_t, exp_table_fixed); WRITE_ARRAY("static const", float, exp_table_float); - WRITE_2D_ARRAY("static const", uint32_t, expval_table); + WRITE_2D_ARRAY("static const", uint32_t, expval_table_fixed); WRITE_2D_ARRAY("static const", float, expval_table_float); return 0; diff --git a/libavcodec/mpegaudio_tablegen.h b/libavcodec/mpegaudio_tablegen.h index 01c4174a60..214959348a 100644 --- a/libavcodec/mpegaudio_tablegen.h +++ b/libavcodec/mpegaudio_tablegen.h @@ -33,8 +33,8 @@ #else static int8_t table_4_3_exp[TABLE_4_3_SIZE]; static uint32_t table_4_3_value[TABLE_4_3_SIZE]; -static uint32_t exp_table[512]; -static uint32_t expval_table[512][16]; +static uint32_t exp_table_fixed[512]; +static uint32_t expval_table_fixed[512][16]; static float exp_table_float[512]; static float expval_table_float[512][16]; @@ -59,10 +59,10 @@ static void mpegaudio_tableinit(void) for (exponent = 0; exponent < 512; exponent++) { for (value = 0; value < 16; value++) { double f = (double)value * cbrtf(value) * pow(2, (exponent - 400) * 0.25 + FRAC_BITS + 5); - expval_table[exponent][value] = llrint(f); + expval_table_fixed[exponent][value] = llrint(f); expval_table_float[exponent][value] = f; } - exp_table[exponent] = expval_table[exponent][1]; + exp_table_fixed[exponent] = expval_table_fixed[exponent][1]; exp_table_float[exponent] = expval_table_float[exponent][1]; } } diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c index bdff815d4d..c2c822223e 100644 --- a/libavcodec/mpegaudiodec.c +++ b/libavcodec/mpegaudiodec.c @@ -29,6 +29,7 @@ #include "get_bits.h" #include "dsputil.h" #include "mathops.h" +#include "dct32.h" /* * TODO: @@ -57,7 +58,7 @@ # define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) # define MULH3(x, y, s) MULH((s)*(x), y) # define MULLx(x, y, s) MULL(x,y,s) -# define RENAME(a) a +# define RENAME(a) a ## _fixed # define OUT_FMT AV_SAMPLE_FMT_S16 #endif @@ -68,12 +69,6 @@ #include "mpegaudiodata.h" #include "mpegaudiodectab.h" -#if CONFIG_FLOAT -# include "fft.h" -#else -# include "dct32.c" -#endif - static void compute_antialias(MPADecodeContext *s, GranuleDef *g); static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window, int *dither_state, OUT_INT *samples, int incr); @@ -626,7 +621,7 @@ static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window, 32 samples. */ /* XXX: optimize by avoiding ring buffer usage */ #if !CONFIG_FLOAT -void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, +void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset, MPA_INT *window, int *dither_state, OUT_INT *samples, int incr, INTFLOAT sb_samples[SBLIMIT]) @@ -637,7 +632,7 @@ void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, offset = *synth_buf_offset; synth_buf = synth_buf_ptr + offset; - dct32(synth_buf, sb_samples); + ff_dct32_fixed(synth_buf, sb_samples); apply_window_mp3_c(synth_buf, window, dither_state, samples, incr); offset = (offset - 32) & 511; diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c index a64870a3f9..e1165074f7 100644 --- a/libavcodec/qdm2.c +++ b/libavcodec/qdm2.c @@ -38,7 +38,7 @@ #include "avcodec.h" #include "get_bits.h" #include "dsputil.h" -#include "fft.h" +#include "rdft.h" #include "mpegaudio.h" #include "qdm2data.h" @@ -1616,8 +1616,8 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index) OUT_INT *samples_ptr = samples + ch; for (i = 0; i < 8; i++) { - ff_mpa_synth_filter(q->synth_buf[ch], &(q->synth_buf_offset[ch]), - ff_mpa_synth_window, &dither_state, + ff_mpa_synth_filter_fixed(q->synth_buf[ch], &(q->synth_buf_offset[ch]), + ff_mpa_synth_window_fixed, &dither_state, samples_ptr, q->nb_channels, q->sb_samples[ch][(8 * index) + i]); samples_ptr += 32 * q->nb_channels; @@ -1646,7 +1646,7 @@ static av_cold void qdm2_init(QDM2Context *q) { initialized = 1; qdm2_init_vlc(); - ff_mpa_synth_init(ff_mpa_synth_window); + ff_mpa_synth_init_fixed(ff_mpa_synth_window_fixed); softclip_table_init(); rnd_table_init(); init_noise_samples(); diff --git a/libavcodec/x86/dsputilenc_yasm.asm b/libavcodec/x86/dsputilenc_yasm.asm index a4f2d0cf51..016b354d6c 100644 --- a/libavcodec/x86/dsputilenc_yasm.asm +++ b/libavcodec/x86/dsputilenc_yasm.asm @@ -59,12 +59,12 @@ SECTION .text %endmacro %macro HADAMARD8 0 - SUMSUB_BADC m0, m1, m2, m3 - SUMSUB_BADC m4, m5, m6, m7 - SUMSUB_BADC m0, m2, m1, m3 - SUMSUB_BADC m4, m6, m5, m7 - SUMSUB_BADC m0, m4, m1, m5 - SUMSUB_BADC m2, m6, m3, m7 + SUMSUB_BADC w, 0, 1, 2, 3 + SUMSUB_BADC w, 4, 5, 6, 7 + SUMSUB_BADC w, 0, 2, 1, 3 + SUMSUB_BADC w, 4, 6, 5, 7 + SUMSUB_BADC w, 0, 4, 1, 5 + SUMSUB_BADC w, 2, 6, 3, 7 %endmacro %macro ABS1_SUM 3 diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm index e90b0b1186..4f6f1d7bf8 100644 --- a/libavcodec/x86/h264_idct.asm +++ b/libavcodec/x86/h264_idct.asm @@ -59,11 +59,11 @@ SECTION .text movq m2, [%2+16] movq m3, [%2+24] - IDCT4_1D 0, 1, 2, 3, 4, 5 + IDCT4_1D w, 0, 1, 2, 3, 4, 5 mova m6, [pw_32] TRANSPOSE4x4W 0, 1, 2, 3, 4 paddw m0, m6 - IDCT4_1D 0, 1, 2, 3, 4, 5 + IDCT4_1D w, 0, 1, 2, 3, 4, 5 pxor m7, m7 STORE_DIFFx2 m0, m1, m4, m5, m7, 6, %1, %3 @@ -118,13 +118,13 @@ cglobal h264_idct_add_mmx, 3, 3, 0 mova m2, %1 mova m5, %2 - SUMSUB_BA m5, m2 - SUMSUB_BA m6, m5 - SUMSUB_BA m4, m2 - SUMSUB_BA m7, m6 - SUMSUB_BA m0, m4 - SUMSUB_BA m3, m2 - SUMSUB_BA m1, m5 + SUMSUB_BA w, 5, 2 + SUMSUB_BA w, 6, 5 + SUMSUB_BA w, 4, 2 + SUMSUB_BA w, 7, 6 + SUMSUB_BA w, 0, 4 + SUMSUB_BA w, 3, 2 + SUMSUB_BA w, 1, 5 SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567 %endmacro @@ -715,10 +715,10 @@ x264_add8x4_idct_sse2: movhps m1, [r2+40] movhps m2, [r2+48] movhps m3, [r2+56] - IDCT4_1D 0,1,2,3,4,5 + IDCT4_1D w,0,1,2,3,4,5 TRANSPOSE2x4x4W 0,1,2,3,4 paddw m0, [pw_32] - IDCT4_1D 0,1,2,3,4,5 + IDCT4_1D w,0,1,2,3,4,5 pxor m7, m7 STORE_DIFFx2 m0, m1, m4, m5, m7, 6, r0, r3 lea r0, [r0+r3*2] @@ -859,8 +859,8 @@ cglobal h264_idct_add8_sse2, 5, 7, 8 ;void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul) %macro WALSH4_1D 5 - SUMSUB_BADC m%4, m%3, m%2, m%1, m%5 - SUMSUB_BADC m%4, m%2, m%3, m%1, m%5 + SUMSUB_BADC w, %4, %3, %2, %1, %5 + SUMSUB_BADC w, %4, %2, %3, %1, %5 SWAP %1, %4, %3 %endmacro diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 9b175c1488..14b49705dc 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -1106,10 +1106,10 @@ cglobal vp8_idct_dc_add4uv_mmx, 3, 3 ; %5/%6 are temporary registers ; we assume m6/m7 have constant words 20091/17734 loaded in them %macro VP8_IDCT_TRANSFORM4x4_1D 6 - SUMSUB_BA m%3, m%1, m%5 ;t0, t1 + SUMSUB_BA w, %3, %1, %5 ;t0, t1 VP8_MULTIPLY_SUMSUB m%2, m%4, m%5,m%6 ;t2, t3 - SUMSUB_BA m%4, m%3, m%5 ;tmp0, tmp3 - SUMSUB_BA m%2, m%1, m%5 ;tmp1, tmp2 + SUMSUB_BA w, %4, %3, %5 ;tmp0, tmp3 + SUMSUB_BA w, %2, %1, %5 ;tmp1, tmp2 SWAP %4, %1 SWAP %4, %3 %endmacro @@ -1181,8 +1181,8 @@ VP8_IDCT_ADD sse %endmacro %macro HADAMARD4_1D 4 - SUMSUB_BADC m%2, m%1, m%4, m%3 - SUMSUB_BADC m%4, m%2, m%3, m%1 + SUMSUB_BADC w, %2, %1, %4, %3 + SUMSUB_BADC w, %4, %2, %3, %1 SWAP %1, %4, %3 %endmacro diff --git a/libavcodec/x86/x86util.asm b/libavcodec/x86/x86util.asm index 7e5b67419a..18ba9d1ad2 100644 --- a/libavcodec/x86/x86util.asm +++ b/libavcodec/x86/x86util.asm @@ -208,6 +208,17 @@ pminub %2, %4 %endmacro +%macro ABSD2_MMX 4 + pxor %3, %3 + pxor %4, %4 + pcmpgtd %3, %1 + pcmpgtd %4, %2 + pxor %1, %3 + pxor %2, %4 + psubd %1, %3 + psubd %2, %4 +%endmacro + %macro ABSB_SSSE3 2 pabsb %1, %1 %endmacro @@ -230,12 +241,7 @@ %macro SPLATB_MMX 3 movd %1, [%2-3] ;to avoid crossing a cacheline punpcklbw %1, %1 -%if mmsize==16 - pshuflw %1, %1, 0xff - punpcklqdq %1, %1 -%else - pshufw %1, %1, 0xff -%endif + SPLATW %1, %1, 3 %endmacro %macro SPLATB_SSSE3 3 @@ -243,125 +249,169 @@ pshufb %1, %3 %endmacro -%macro PALIGNR_MMX 4 - %ifnidn %4, %2 +%macro PALIGNR_MMX 4-5 ; [dst,] src1, src2, imm, tmp + %define %%dst %1 +%if %0==5 +%ifnidn %1, %2 + mova %%dst, %2 +%endif + %rotate 1 +%endif +%ifnidn %4, %2 mova %4, %2 - %endif - %if mmsize == 8 - psllq %1, (8-%3)*8 +%endif +%if mmsize==8 + psllq %%dst, (8-%3)*8 psrlq %4, %3*8 - %else - pslldq %1, 16-%3 +%else + pslldq %%dst, 16-%3 psrldq %4, %3 - %endif - por %1, %4 +%endif + por %%dst, %4 %endmacro -%macro PALIGNR_SSSE3 4 +%macro PALIGNR_SSSE3 4-5 +%if %0==5 + palignr %1, %2, %3, %4 +%else palignr %1, %2, %3 +%endif %endmacro %macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from %ifnum %5 - mova m%1, m%5 - mova m%3, m%5 + pand m%3, m%5, m%4 ; src .. y6 .. y4 + pand m%1, m%5, m%2 ; dst .. y6 .. y4 %else mova m%1, %5 - mova m%3, m%1 + pand m%3, m%1, m%4 ; src .. y6 .. y4 + pand m%1, m%1, m%2 ; dst .. y6 .. y4 %endif - pand m%1, m%2 ; dst .. y6 .. y4 - pand m%3, m%4 ; src .. y6 .. y4 - psrlw m%2, 8 ; dst .. y7 .. y5 - psrlw m%4, 8 ; src .. y7 .. y5 + psrlw m%2, 8 ; dst .. y7 .. y5 + psrlw m%4, 8 ; src .. y7 .. y5 %endmacro -%macro SUMSUB_BA 2-3 -%if %0==2 - paddw %1, %2 - paddw %2, %2 - psubw %2, %1 +%macro SUMSUB_BA 3-4 +%if %0==3 + padd%1 m%2, m%3 + padd%1 m%3, m%3 + psub%1 m%3, m%2 %else - mova %3, %1 - paddw %1, %2 - psubw %2, %3 -%endif -%endmacro - -%macro SUMSUB_BADC 4-5 -%if %0==5 - SUMSUB_BA %1, %2, %5 - SUMSUB_BA %3, %4, %5 +%if avx_enabled == 0 + mova m%4, m%2 + padd%1 m%2, m%3 + psub%1 m%3, m%4 %else - paddw %1, %2 - paddw %3, %4 - paddw %2, %2 - paddw %4, %4 - psubw %2, %1 - psubw %4, %3 + padd%1 m%4, m%2, m%3 + psub%1 m%3, m%2 + SWAP %2, %4 +%endif %endif %endmacro -%macro SUMSUB2_AB 3 - mova %3, %1 - paddw %1, %1 - paddw %1, %2 - psubw %3, %2 - psubw %3, %2 +%macro SUMSUB_BADC 5-6 +%if %0==6 + SUMSUB_BA %1, %2, %3, %6 + SUMSUB_BA %1, %4, %5, %6 +%else + padd%1 m%2, m%3 + padd%1 m%4, m%5 + padd%1 m%3, m%3 + padd%1 m%5, m%5 + psub%1 m%3, m%2 + psub%1 m%5, m%4 +%endif %endmacro -%macro SUMSUB2_BA 3 - mova m%3, m%1 - paddw m%1, m%2 - paddw m%1, m%2 - psubw m%2, m%3 - psubw m%2, m%3 +%macro SUMSUB2_AB 4 +%ifnum %3 + psub%1 m%4, m%2, m%3 + psub%1 m%4, m%3 + padd%1 m%2, m%2 + padd%1 m%2, m%3 +%else + mova m%4, m%2 + padd%1 m%2, m%2 + padd%1 m%2, %3 + psub%1 m%4, %3 + psub%1 m%4, %3 +%endif %endmacro -%macro SUMSUBD2_AB 4 - mova %4, %1 - mova %3, %2 - psraw %2, 1 ; %2: %2>>1 - psraw %1, 1 ; %1: %1>>1 - paddw %2, %4 ; %2: %2>>1+%1 - psubw %1, %3 ; %1: %1>>1-%2 +%macro SUMSUB2_BA 4 +%if avx_enabled == 0 + mova m%4, m%2 + padd%1 m%2, m%3 + padd%1 m%2, m%3 + psub%1 m%3, m%4 + psub%1 m%3, m%4 +%else + padd%1 m%4, m%2, m%3 + padd%1 m%4, m%3 + psub%1 m%3, m%2 + psub%1 m%3, m%2 + SWAP %2, %4 +%endif +%endmacro + +%macro SUMSUBD2_AB 5 +%ifnum %4 + psra%1 m%5, m%2, 1 ; %3: %3>>1 + psra%1 m%4, m%3, 1 ; %2: %2>>1 + padd%1 m%4, m%2 ; %3: %3>>1+%2 + psub%1 m%5, m%3 ; %2: %2>>1-%3 + SWAP %2, %5 + SWAP %3, %4 +%else + mova %5, m%2 + mova %4, m%3 + psra%1 m%3, 1 ; %3: %3>>1 + psra%1 m%2, 1 ; %2: %2>>1 + padd%1 m%3, %5 ; %3: %3>>1+%2 + psub%1 m%2, %4 ; %2: %2>>1-%3 +%endif %endmacro %macro DCT4_1D 5 %ifnum %5 - SUMSUB_BADC m%4, m%1, m%3, m%2; m%5 - SUMSUB_BA m%3, m%4, m%5 - SUMSUB2_AB m%1, m%2, m%5 + SUMSUB_BADC w, %4, %1, %3, %2, %5 + SUMSUB_BA w, %3, %4, %5 + SUMSUB2_AB w, %1, %2, %5 SWAP %1, %3, %4, %5, %2 %else - SUMSUB_BADC m%4, m%1, m%3, m%2 - SUMSUB_BA m%3, m%4 - mova [%5], m%2 - SUMSUB2_AB m%1, [%5], m%2 + SUMSUB_BADC w, %4, %1, %3, %2 + SUMSUB_BA w, %3, %4 + mova [%5], m%2 + SUMSUB2_AB w, %1, [%5], %2 SWAP %1, %3, %4, %2 %endif %endmacro -%macro IDCT4_1D 5-6 -%ifnum %5 - SUMSUBD2_AB m%2, m%4, m%6, m%5 - ; %2: %2>>1-%4 %4: %2+%4>>1 - SUMSUB_BA m%3, m%1, m%6 - ; %3: %1+%3 %1: %1-%3 - SUMSUB_BADC m%4, m%3, m%2, m%1, m%6 - ; %4: %1+%3 + (%2+%4>>1) - ; %3: %1+%3 - (%2+%4>>1) - ; %2: %1-%3 + (%2>>1-%4) - ; %1: %1-%3 - (%2>>1-%4) +%macro IDCT4_1D 6-7 +%ifnum %6 + SUMSUBD2_AB %1, %3, %5, %7, %6 + ; %3: %3>>1-%5 %5: %3+%5>>1 + SUMSUB_BA %1, %4, %2, %7 + ; %4: %2+%4 %2: %2-%4 + SUMSUB_BADC %1, %5, %4, %3, %2, %7 + ; %5: %2+%4 + (%3+%5>>1) + ; %4: %2+%4 - (%3+%5>>1) + ; %3: %2-%4 + (%3>>1-%5) + ; %2: %2-%4 - (%3>>1-%5) %else - SUMSUBD2_AB m%2, m%4, [%5], [%5+16] - SUMSUB_BA m%3, m%1 - SUMSUB_BADC m%4, m%3, m%2, m%1 +%ifidn %1, w + SUMSUBD2_AB %1, %3, %5, [%6], [%6+16] +%else + SUMSUBD2_AB %1, %3, %5, [%6], [%6+32] %endif - SWAP %1, %4, %3 - ; %1: %1+%3 + (%2+%4>>1) row0 - ; %2: %1-%3 + (%2>>1-%4) row1 - ; %3: %1-%3 - (%2>>1-%4) row2 - ; %4: %1+%3 - (%2+%4>>1) row3 + SUMSUB_BA %1, %4, %2 + SUMSUB_BADC %1, %5, %4, %3, %2 +%endif + SWAP %2, %5, %4 + ; %2: %2+%4 + (%3+%5>>1) row0 + ; %3: %2-%4 + (%3>>1-%5) row1 + ; %4: %2-%4 - (%3>>1-%5) row2 + ; %5: %2+%4 - (%3+%5>>1) row3 %endmacro diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c index a21af775de..84881592b6 100644 --- a/libavformat/asfdec.c +++ b/libavformat/asfdec.c @@ -848,7 +848,10 @@ static int asf_read_frame_header(AVFormatContext *s, AVIOContext *pb){ } if (asf->packet_flags & 0x01) { DO_2BITS(asf->packet_segsizetype >> 6, asf->packet_frag_size, 0); // 0 is illegal - if(asf->packet_frag_size > asf->packet_size_left - rsize){ + if (rsize > asf->packet_size_left) { + av_log(s, AV_LOG_ERROR, "packet_replic_size is invalid\n"); + return -1; + } else if(asf->packet_frag_size > asf->packet_size_left - rsize){ if (asf->packet_frag_size > asf->packet_size_left - rsize + asf->packet_padsize) { av_log(s, AV_LOG_ERROR, "packet_frag_size is invalid (%d-%d)\n", asf->packet_size_left, rsize); return -1; @@ -1261,21 +1264,22 @@ static int asf_read_seek(AVFormatContext *s, int stream_index, int64_t pts, int if (!asf->index_read) asf_build_simple_index(s, stream_index); - if(!(asf->index_read && st->index_entries)){ - if(av_seek_frame_binary(s, stream_index, pts, flags)<0) - return -1; - }else{ + if((asf->index_read && st->index_entries)){ index= av_index_search_timestamp(st, pts, flags); - if(index<0) - return -1; + if(index >= 0) { + /* find the position */ + pos = st->index_entries[index].pos; - /* find the position */ - pos = st->index_entries[index].pos; - - /* do the seek */ - av_log(s, AV_LOG_DEBUG, "SEEKTO: %"PRId64"\n", pos); - avio_seek(s->pb, pos, SEEK_SET); + /* do the seek */ + av_log(s, AV_LOG_DEBUG, "SEEKTO: %"PRId64"\n", pos); + avio_seek(s->pb, pos, SEEK_SET); + asf_reset_header(s); + return 0; + } } + /* no index or seeking by index failed */ + if(av_seek_frame_binary(s, stream_index, pts, flags)<0) + return -1; asf_reset_header(s); return 0; } @@ -1290,4 +1294,5 @@ AVInputFormat ff_asf_demuxer = { asf_read_close, asf_read_seek, asf_read_pts, + .flags = AVFMT_NOBINSEARCH | AVFMT_NOGENSEARCH, }; diff --git a/libavformat/avformat.h b/libavformat/avformat.h index db48a57ec4..ec51a57ca8 100644 --- a/libavformat/avformat.h +++ b/libavformat/avformat.h @@ -258,6 +258,8 @@ typedef struct AVFormatParameters { #define AVFMT_VARIABLE_FPS 0x0400 /**< Format allows variable fps. */ #define AVFMT_NODIMENSIONS 0x0800 /**< Format does not need width/height */ #define AVFMT_NOSTREAMS 0x1000 /**< Format does not require any streams */ +#define AVFMT_NOBINSEARCH 0x2000 /**< Format does not allow to fallback to binary search via read_timestamp */ +#define AVFMT_NOGENSEARCH 0x4000 /**< Format does not allow to fallback to generic search */ typedef struct AVOutputFormat { const char *name; diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c index 44d21683ba..3a7465ef94 100644 --- a/libavformat/flvdec.c +++ b/libavformat/flvdec.c @@ -375,7 +375,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt) size = avio_rb24(s->pb); dts = avio_rb24(s->pb); dts |= avio_r8(s->pb) << 24; -// av_log(s, AV_LOG_DEBUG, "type:%d, size:%d, dts:%d\n", type, size, dts); + av_dlog(s, "type:%d, size:%d, dts:%"PRId64"\n", type, size, dts); if (url_feof(s->pb)) return AVERROR_EOF; avio_skip(s->pb, 3); /* stream id, always 0 */ @@ -421,7 +421,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt) st= create_stream(s, is_audio); s->ctx_flags &= ~AVFMTCTX_NOHEADER; } -// av_log(s, AV_LOG_DEBUG, "%d %X %d \n", is_audio, flags, st->discard); + av_dlog(s, "%d %X %d \n", is_audio, flags, st->discard); if( (st->discard >= AVDISCARD_NONKEY && !((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY || is_audio)) ||(st->discard >= AVDISCARD_BIDIR && ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_DISP_INTER && !is_audio)) || st->discard >= AVDISCARD_ALL diff --git a/libavformat/utils.c b/libavformat/utils.c index b6368edcd1..70429a7650 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -1742,10 +1742,12 @@ int av_seek_frame(AVFormatContext *s, int stream_index, int64_t timestamp, int f return 0; } - if(s->iformat->read_timestamp) + if(s->iformat->read_timestamp && !(s->iformat->flags & AVFMT_NOBINSEARCH)) return av_seek_frame_binary(s, stream_index, timestamp, flags); - else + else if (!(s->iformat->flags & AVFMT_NOGENSEARCH)) return av_seek_frame_generic(s, stream_index, timestamp, flags); + else + return -1; } int avformat_seek_file(AVFormatContext *s, int stream_index, int64_t min_ts, int64_t ts, int64_t max_ts, int flags)