diff --git a/configure b/configure
index 2ea8a02a43..0e7e94c0e7 100755
--- a/configure
+++ b/configure
@@ -2927,8 +2927,8 @@ enabled libvpx     && {
     enabled libvpx_encoder && { check_lib2 "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_enc_init_ver VPX_CQ" -lvpx ||
                                 die "ERROR: libvpx encoder version must be >=0.9.6"; } }
 enabled libx264    && require  libx264 x264.h x264_encoder_encode -lx264 &&
-                      { check_cpp_condition x264.h "X264_BUILD >= 99" ||
-                        die "ERROR: libx264 version must be >= 0.99."; }
+                      { check_cpp_condition x264.h "X264_BUILD >= 115" ||
+                        die "ERROR: libx264 version must be >= 0.115."; }
 enabled libxavs    && require  libxavs xavs.h xavs_encoder_encode -lxavs
 enabled libxvid    && require  libxvid xvid.h xvid_global -lxvidcore
 enabled mlib       && require  mediaLib mlib_types.h mlib_VectorSub_S16_U8_Mod -lmlib
diff --git a/ffpresets/libx264-lossless_fast.ffpreset b/ffpresets/libx264-lossless_fast.ffpreset
index b7696b5bcb..49b9ed1add 100644
--- a/ffpresets/libx264-lossless_fast.ffpreset
+++ b/ffpresets/libx264-lossless_fast.ffpreset
@@ -1,5 +1,5 @@
 coder=0
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=-parti8x8+parti4x4+partp8x8-partp4x4-partb8x8
 me_method=hex
diff --git a/ffpresets/libx264-lossless_max.ffpreset b/ffpresets/libx264-lossless_max.ffpreset
index 75c387f162..f32d7b40c6 100644
--- a/ffpresets/libx264-lossless_max.ffpreset
+++ b/ffpresets/libx264-lossless_max.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
 me_method=esa
diff --git a/ffpresets/libx264-lossless_medium.ffpreset b/ffpresets/libx264-lossless_medium.ffpreset
index 116e3343ce..0b84612fcb 100644
--- a/ffpresets/libx264-lossless_medium.ffpreset
+++ b/ffpresets/libx264-lossless_medium.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=-parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
 me_method=hex
diff --git a/ffpresets/libx264-lossless_slow.ffpreset b/ffpresets/libx264-lossless_slow.ffpreset
index 0d496f6e29..857d3d1986 100644
--- a/ffpresets/libx264-lossless_slow.ffpreset
+++ b/ffpresets/libx264-lossless_slow.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
 me_method=umh
diff --git a/ffpresets/libx264-lossless_slower.ffpreset b/ffpresets/libx264-lossless_slower.ffpreset
index 672e0cd637..ef0609f1b6 100644
--- a/ffpresets/libx264-lossless_slower.ffpreset
+++ b/ffpresets/libx264-lossless_slower.ffpreset
@@ -1,5 +1,5 @@
 coder=1
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
 me_method=umh
diff --git a/ffpresets/libx264-lossless_ultrafast.ffpreset b/ffpresets/libx264-lossless_ultrafast.ffpreset
index a2eda65edf..4cc84f1b4f 100644
--- a/ffpresets/libx264-lossless_ultrafast.ffpreset
+++ b/ffpresets/libx264-lossless_ultrafast.ffpreset
@@ -1,5 +1,5 @@
 coder=0
-flags=+loop
+flags=+loop+cgop
 cmp=+chroma
 partitions=-parti8x8-parti4x4-partp8x8-partp4x4-partb8x8
 me_method=dia
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 2b00575463..6723118693 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -28,7 +28,7 @@ OBJS-$(CONFIG_AANDCT)                  += aandcttab.o
 OBJS-$(CONFIG_AC3DSP)                  += ac3dsp.o
 OBJS-$(CONFIG_CRYSTALHD)               += crystalhd.o
 OBJS-$(CONFIG_ENCODERS)                += faandct.o jfdctfst.o jfdctint.o
-OBJS-$(CONFIG_DCT)                     += dct.o
+OBJS-$(CONFIG_DCT)                     += dct.o dct32_fixed.o dct32_float.o
 OBJS-$(CONFIG_DWT)                     += dwt.o
 OBJS-$(CONFIG_DXVA2)                   += dxva2.o
 FFT-OBJS-$(CONFIG_HARDCODED_TABLES)    += cos_tables.o cos_fixed_tables.o
diff --git a/libavcodec/dct.c b/libavcodec/dct.c
index ef3cd50a79..c30cff664e 100644
--- a/libavcodec/dct.c
+++ b/libavcodec/dct.c
@@ -30,9 +30,7 @@
 #include <math.h>
 #include "libavutil/mathematics.h"
 #include "dct.h"
-
-#define DCT32_FLOAT
-#include "dct32.c"
+#include "dct32.h"
 
 /* sin((M_PI * x / (2*n)) */
 #define SIN(s,n,x) (s->costab[(n) - (x)])
@@ -210,7 +208,7 @@ av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
         }
     }
 
-    s->dct32 = dct32;
+    s->dct32 = ff_dct32_float;
     if (HAVE_MMX)     ff_dct_init_mmx(s);
 
     return 0;
diff --git a/libavcodec/dct32.c b/libavcodec/dct32.c
index 4e843ee832..fb53d53ab1 100644
--- a/libavcodec/dct32.c
+++ b/libavcodec/dct32.c
@@ -19,10 +19,19 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifdef DCT32_FLOAT
+#include "dct32.h"
+#include "mathops.h"
+
+#if DCT32_FLOAT
+#   define dct32 ff_dct32_float
 #   define FIXHR(x)       ((float)(x))
 #   define MULH3(x, y, s) ((s)*(y)*(x))
 #   define INTFLOAT float
+#else
+#   define dct32 ff_dct32_fixed
+#   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
+#   define MULH3(x, y, s) MULH((s)*(x), y)
+#   define INTFLOAT int
 #endif
 
 
@@ -103,7 +112,7 @@
 #define ADD(a, b) val##a += val##b
 
 /* DCT32 without 1/sqrt(2) coef zero scaling. */
-static void dct32(INTFLOAT *out, const INTFLOAT *tab)
+void dct32(INTFLOAT *out, const INTFLOAT *tab)
 {
     INTFLOAT tmp0, tmp1;
 
diff --git a/libavcodec/dct32.h b/libavcodec/dct32.h
new file mode 100644
index 0000000000..110338d25c
--- /dev/null
+++ b/libavcodec/dct32.h
@@ -0,0 +1,25 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DCT32_H
+#define AVCODEC_DCT32_H
+
+void ff_dct32_float(float *dst, const float *src);
+void ff_dct32_fixed(int *dst, const int *src);
+
+#endif
diff --git a/libavcodec/dct32_fixed.c b/libavcodec/dct32_fixed.c
new file mode 100644
index 0000000000..7eb9dc1a53
--- /dev/null
+++ b/libavcodec/dct32_fixed.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define DCT32_FLOAT 0
+#include "dct32.c"
diff --git a/libavcodec/dct32_float.c b/libavcodec/dct32_float.c
new file mode 100644
index 0000000000..727ec3caca
--- /dev/null
+++ b/libavcodec/dct32_float.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define DCT32_FLOAT 1
+#include "dct32.c"
diff --git a/libavcodec/iirfilter.c b/libavcodec/iirfilter.c
index bc63c3991a..27461fb389 100644
--- a/libavcodec/iirfilter.c
+++ b/libavcodec/iirfilter.c
@@ -324,7 +324,7 @@ int main(void)
     int i;
     FILE* fd;
 
-    fcoeffs = ff_iir_filter_init_coeffs(FF_FILTER_TYPE_BUTTERWORTH,
+    fcoeffs = ff_iir_filter_init_coeffs(NULL, FF_FILTER_TYPE_BUTTERWORTH,
                                         FF_FILTER_MODE_LOWPASS, FILT_ORDER,
                                         cutoff_coeff, 0.0, 0.0);
     fstate  = ff_iir_filter_init_state(FILT_ORDER);
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 838cb703e8..eae21fe2bb 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -367,6 +367,8 @@ static av_cold int X264_init(AVCodecContext *avctx)
 
     x4->params.b_interlaced   = avctx->flags & CODEC_FLAG_INTERLACED_DCT;
 
+    x4->params.b_open_gop     = !(avctx->flags & CODEC_FLAG_CLOSED_GOP);
+
     x4->params.i_slice_count  = avctx->slices;
 
     x4->params.vui.b_fullrange = avctx->pix_fmt == PIX_FMT_YUVJ420P;
diff --git a/libavcodec/mpc.c b/libavcodec/mpc.c
index d9a1fb776a..ca4c3d0dcb 100644
--- a/libavcodec/mpc.c
+++ b/libavcodec/mpc.c
@@ -36,7 +36,7 @@
 
 void ff_mpc_init(void)
 {
-    ff_mpa_synth_init(ff_mpa_synth_window);
+    ff_mpa_synth_init_fixed(ff_mpa_synth_window_fixed);
 }
 
 /**
@@ -51,8 +51,8 @@ static void mpc_synth(MPCContext *c, int16_t *out, int channels)
     for(ch = 0;  ch < channels; ch++){
         samples_ptr = samples + ch;
         for(i = 0; i < SAMPLES_PER_BAND; i++) {
-            ff_mpa_synth_filter(c->synth_buf[ch], &(c->synth_buf_offset[ch]),
-                                ff_mpa_synth_window, &dither_state,
+            ff_mpa_synth_filter_fixed(c->synth_buf[ch], &(c->synth_buf_offset[ch]),
+                                ff_mpa_synth_window_fixed, &dither_state,
                                 samples_ptr, channels,
                                 c->sb_samples[ch][i]);
             samples_ptr += 32 * channels;
diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index 138085366f..005598797d 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -158,9 +158,9 @@ typedef struct HuffTable {
 
 int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
 int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
-extern MPA_INT ff_mpa_synth_window[];
-void ff_mpa_synth_init(MPA_INT *window);
-void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
+extern MPA_INT ff_mpa_synth_window_fixed[];
+void ff_mpa_synth_init_fixed(MPA_INT *window);
+void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
                          MPA_INT *window, int *dither_state,
                          OUT_INT *samples, int incr,
                          INTFLOAT sb_samples[SBLIMIT]);
diff --git a/libavcodec/mpegaudio_tablegen.c b/libavcodec/mpegaudio_tablegen.c
index 0888e78620..90c9de430a 100644
--- a/libavcodec/mpegaudio_tablegen.c
+++ b/libavcodec/mpegaudio_tablegen.c
@@ -33,9 +33,9 @@ int main(void)
 
     WRITE_ARRAY("static const", int8_t, table_4_3_exp);
     WRITE_ARRAY("static const", uint32_t, table_4_3_value);
-    WRITE_ARRAY("static const", uint32_t, exp_table);
+    WRITE_ARRAY("static const", uint32_t, exp_table_fixed);
     WRITE_ARRAY("static const", float, exp_table_float);
-    WRITE_2D_ARRAY("static const", uint32_t, expval_table);
+    WRITE_2D_ARRAY("static const", uint32_t, expval_table_fixed);
     WRITE_2D_ARRAY("static const", float, expval_table_float);
 
     return 0;
diff --git a/libavcodec/mpegaudio_tablegen.h b/libavcodec/mpegaudio_tablegen.h
index 01c4174a60..214959348a 100644
--- a/libavcodec/mpegaudio_tablegen.h
+++ b/libavcodec/mpegaudio_tablegen.h
@@ -33,8 +33,8 @@
 #else
 static int8_t   table_4_3_exp[TABLE_4_3_SIZE];
 static uint32_t table_4_3_value[TABLE_4_3_SIZE];
-static uint32_t exp_table[512];
-static uint32_t expval_table[512][16];
+static uint32_t exp_table_fixed[512];
+static uint32_t expval_table_fixed[512][16];
 static float exp_table_float[512];
 static float expval_table_float[512][16];
 
@@ -59,10 +59,10 @@ static void mpegaudio_tableinit(void)
     for (exponent = 0; exponent < 512; exponent++) {
         for (value = 0; value < 16; value++) {
             double f = (double)value * cbrtf(value) * pow(2, (exponent - 400) * 0.25 + FRAC_BITS + 5);
-            expval_table[exponent][value] = llrint(f);
+            expval_table_fixed[exponent][value] = llrint(f);
             expval_table_float[exponent][value] = f;
         }
-        exp_table[exponent] = expval_table[exponent][1];
+        exp_table_fixed[exponent] = expval_table_fixed[exponent][1];
         exp_table_float[exponent] = expval_table_float[exponent][1];
     }
 }
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index bdff815d4d..c2c822223e 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -29,6 +29,7 @@
 #include "get_bits.h"
 #include "dsputil.h"
 #include "mathops.h"
+#include "dct32.h"
 
 /*
  * TODO:
@@ -57,7 +58,7 @@
 #   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
 #   define MULH3(x, y, s) MULH((s)*(x), y)
 #   define MULLx(x, y, s) MULL(x,y,s)
-#   define RENAME(a)      a
+#   define RENAME(a)      a ## _fixed
 #   define OUT_FMT AV_SAMPLE_FMT_S16
 #endif
 
@@ -68,12 +69,6 @@
 #include "mpegaudiodata.h"
 #include "mpegaudiodectab.h"
 
-#if CONFIG_FLOAT
-#    include "fft.h"
-#else
-#    include "dct32.c"
-#endif
-
 static void compute_antialias(MPADecodeContext *s, GranuleDef *g);
 static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
                                int *dither_state, OUT_INT *samples, int incr);
@@ -626,7 +621,7 @@ static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
    32 samples. */
 /* XXX: optimize by avoiding ring buffer usage */
 #if !CONFIG_FLOAT
-void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
+void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
                          MPA_INT *window, int *dither_state,
                          OUT_INT *samples, int incr,
                          INTFLOAT sb_samples[SBLIMIT])
@@ -637,7 +632,7 @@ void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
     offset = *synth_buf_offset;
     synth_buf = synth_buf_ptr + offset;
 
-    dct32(synth_buf, sb_samples);
+    ff_dct32_fixed(synth_buf, sb_samples);
     apply_window_mp3_c(synth_buf, window, dither_state, samples, incr);
 
     offset = (offset - 32) & 511;
diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index a64870a3f9..e1165074f7 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -38,7 +38,7 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
-#include "fft.h"
+#include "rdft.h"
 #include "mpegaudio.h"
 
 #include "qdm2data.h"
@@ -1616,8 +1616,8 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index)
         OUT_INT *samples_ptr = samples + ch;
 
         for (i = 0; i < 8; i++) {
-            ff_mpa_synth_filter(q->synth_buf[ch], &(q->synth_buf_offset[ch]),
-                ff_mpa_synth_window, &dither_state,
+            ff_mpa_synth_filter_fixed(q->synth_buf[ch], &(q->synth_buf_offset[ch]),
+                ff_mpa_synth_window_fixed, &dither_state,
                 samples_ptr, q->nb_channels,
                 q->sb_samples[ch][(8 * index) + i]);
             samples_ptr += 32 * q->nb_channels;
@@ -1646,7 +1646,7 @@ static av_cold void qdm2_init(QDM2Context *q) {
     initialized = 1;
 
     qdm2_init_vlc();
-    ff_mpa_synth_init(ff_mpa_synth_window);
+    ff_mpa_synth_init_fixed(ff_mpa_synth_window_fixed);
     softclip_table_init();
     rnd_table_init();
     init_noise_samples();
diff --git a/libavcodec/x86/dsputilenc_yasm.asm b/libavcodec/x86/dsputilenc_yasm.asm
index a4f2d0cf51..016b354d6c 100644
--- a/libavcodec/x86/dsputilenc_yasm.asm
+++ b/libavcodec/x86/dsputilenc_yasm.asm
@@ -59,12 +59,12 @@ SECTION .text
 %endmacro
 
 %macro HADAMARD8 0
-    SUMSUB_BADC       m0, m1, m2, m3
-    SUMSUB_BADC       m4, m5, m6, m7
-    SUMSUB_BADC       m0, m2, m1, m3
-    SUMSUB_BADC       m4, m6, m5, m7
-    SUMSUB_BADC       m0, m4, m1, m5
-    SUMSUB_BADC       m2, m6, m3, m7
+    SUMSUB_BADC       w, 0, 1, 2, 3
+    SUMSUB_BADC       w, 4, 5, 6, 7
+    SUMSUB_BADC       w, 0, 2, 1, 3
+    SUMSUB_BADC       w, 4, 6, 5, 7
+    SUMSUB_BADC       w, 0, 4, 1, 5
+    SUMSUB_BADC       w, 2, 6, 3, 7
 %endmacro
 
 %macro ABS1_SUM 3
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index e90b0b1186..4f6f1d7bf8 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -59,11 +59,11 @@ SECTION .text
     movq         m2, [%2+16]
     movq         m3, [%2+24]
 
-    IDCT4_1D      0, 1, 2, 3, 4, 5
+    IDCT4_1D      w, 0, 1, 2, 3, 4, 5
     mova         m6, [pw_32]
     TRANSPOSE4x4W 0, 1, 2, 3, 4
     paddw        m0, m6
-    IDCT4_1D      0, 1, 2, 3, 4, 5
+    IDCT4_1D      w, 0, 1, 2, 3, 4, 5
     pxor         m7, m7
 
     STORE_DIFFx2 m0, m1, m4, m5, m7, 6, %1, %3
@@ -118,13 +118,13 @@ cglobal h264_idct_add_mmx, 3, 3, 0
 
     mova         m2, %1
     mova         m5, %2
-    SUMSUB_BA    m5, m2
-    SUMSUB_BA    m6, m5
-    SUMSUB_BA    m4, m2
-    SUMSUB_BA    m7, m6
-    SUMSUB_BA    m0, m4
-    SUMSUB_BA    m3, m2
-    SUMSUB_BA    m1, m5
+    SUMSUB_BA    w, 5, 2
+    SUMSUB_BA    w, 6, 5
+    SUMSUB_BA    w, 4, 2
+    SUMSUB_BA    w, 7, 6
+    SUMSUB_BA    w, 0, 4
+    SUMSUB_BA    w, 3, 2
+    SUMSUB_BA    w, 1, 5
     SWAP          7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
 %endmacro
 
@@ -715,10 +715,10 @@ x264_add8x4_idct_sse2:
     movhps m1, [r2+40]
     movhps m2, [r2+48]
     movhps m3, [r2+56]
-    IDCT4_1D 0,1,2,3,4,5
+    IDCT4_1D w,0,1,2,3,4,5
     TRANSPOSE2x4x4W 0,1,2,3,4
     paddw m0, [pw_32]
-    IDCT4_1D 0,1,2,3,4,5
+    IDCT4_1D w,0,1,2,3,4,5
     pxor  m7, m7
     STORE_DIFFx2 m0, m1, m4, m5, m7, 6, r0, r3
     lea   r0, [r0+r3*2]
@@ -859,8 +859,8 @@ cglobal h264_idct_add8_sse2, 5, 7, 8
 ;void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul)
 
 %macro WALSH4_1D 5
-    SUMSUB_BADC m%4, m%3, m%2, m%1, m%5
-    SUMSUB_BADC m%4, m%2, m%3, m%1, m%5
+    SUMSUB_BADC w, %4, %3, %2, %1, %5
+    SUMSUB_BADC w, %4, %2, %3, %1, %5
     SWAP %1, %4, %3
 %endmacro
 
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index 9b175c1488..14b49705dc 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -1106,10 +1106,10 @@ cglobal vp8_idct_dc_add4uv_mmx, 3, 3
 ;           %5/%6 are temporary registers
 ;           we assume m6/m7 have constant words 20091/17734 loaded in them
 %macro VP8_IDCT_TRANSFORM4x4_1D 6
-    SUMSUB_BA           m%3, m%1, m%5     ;t0, t1
+    SUMSUB_BA         w, %3,  %1,  %5     ;t0, t1
     VP8_MULTIPLY_SUMSUB m%2, m%4, m%5,m%6 ;t2, t3
-    SUMSUB_BA           m%4, m%3, m%5     ;tmp0, tmp3
-    SUMSUB_BA           m%2, m%1, m%5     ;tmp1, tmp2
+    SUMSUB_BA         w, %4,  %3,  %5     ;tmp0, tmp3
+    SUMSUB_BA         w, %2,  %1,  %5     ;tmp1, tmp2
     SWAP                 %4,  %1
     SWAP                 %4,  %3
 %endmacro
@@ -1181,8 +1181,8 @@ VP8_IDCT_ADD sse
 %endmacro
 
 %macro HADAMARD4_1D 4
-    SUMSUB_BADC m%2, m%1, m%4, m%3
-    SUMSUB_BADC m%4, m%2, m%3, m%1
+    SUMSUB_BADC w, %2, %1, %4, %3
+    SUMSUB_BADC w, %4, %2, %3, %1
     SWAP %1, %4, %3
 %endmacro
 
diff --git a/libavcodec/x86/x86util.asm b/libavcodec/x86/x86util.asm
index 7e5b67419a..18ba9d1ad2 100644
--- a/libavcodec/x86/x86util.asm
+++ b/libavcodec/x86/x86util.asm
@@ -208,6 +208,17 @@
     pminub  %2, %4
 %endmacro
 
+%macro ABSD2_MMX 4
+    pxor    %3, %3
+    pxor    %4, %4
+    pcmpgtd %3, %1
+    pcmpgtd %4, %2
+    pxor    %1, %3
+    pxor    %2, %4
+    psubd   %1, %3
+    psubd   %2, %4
+%endmacro
+
 %macro ABSB_SSSE3 2
     pabsb   %1, %1
 %endmacro
@@ -230,12 +241,7 @@
 %macro SPLATB_MMX 3
     movd      %1, [%2-3] ;to avoid crossing a cacheline
     punpcklbw %1, %1
-%if mmsize==16
-    pshuflw   %1, %1, 0xff
-    punpcklqdq %1, %1
-%else
-    pshufw    %1, %1, 0xff
-%endif
+    SPLATW    %1, %1, 3
 %endmacro
 
 %macro SPLATB_SSSE3 3
@@ -243,125 +249,169 @@
     pshufb    %1, %3
 %endmacro
 
-%macro PALIGNR_MMX 4
-    %ifnidn %4, %2
+%macro PALIGNR_MMX 4-5 ; [dst,] src1, src2, imm, tmp
+    %define %%dst %1
+%if %0==5
+%ifnidn %1, %2
+    mova    %%dst, %2
+%endif
+    %rotate 1
+%endif
+%ifnidn %4, %2
     mova    %4, %2
-    %endif
-    %if mmsize == 8
-    psllq   %1, (8-%3)*8
+%endif
+%if mmsize==8
+    psllq   %%dst, (8-%3)*8
     psrlq   %4, %3*8
-    %else
-    pslldq  %1, 16-%3
+%else
+    pslldq  %%dst, 16-%3
     psrldq  %4, %3
-    %endif
-    por     %1, %4
+%endif
+    por     %%dst, %4
 %endmacro
 
-%macro PALIGNR_SSSE3 4
+%macro PALIGNR_SSSE3 4-5
+%if %0==5
+    palignr %1, %2, %3, %4
+%else
     palignr %1, %2, %3
+%endif
 %endmacro
 
 %macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from
 %ifnum %5
-    mova   m%1, m%5
-    mova   m%3, m%5
+    pand   m%3, m%5, m%4 ; src .. y6 .. y4
+    pand   m%1, m%5, m%2 ; dst .. y6 .. y4
 %else
     mova   m%1, %5
-    mova   m%3, m%1
+    pand   m%3, m%1, m%4 ; src .. y6 .. y4
+    pand   m%1, m%1, m%2 ; dst .. y6 .. y4
 %endif
-    pand   m%1, m%2 ; dst .. y6 .. y4
-    pand   m%3, m%4 ; src .. y6 .. y4
-    psrlw  m%2, 8   ; dst .. y7 .. y5
-    psrlw  m%4, 8   ; src .. y7 .. y5
+    psrlw  m%2, 8        ; dst .. y7 .. y5
+    psrlw  m%4, 8        ; src .. y7 .. y5
 %endmacro
 
-%macro SUMSUB_BA 2-3
-%if %0==2
-    paddw   %1, %2
-    paddw   %2, %2
-    psubw   %2, %1
+%macro SUMSUB_BA 3-4
+%if %0==3
+    padd%1  m%2, m%3
+    padd%1  m%3, m%3
+    psub%1  m%3, m%2
 %else
-    mova    %3, %1
-    paddw   %1, %2
-    psubw   %2, %3
-%endif
-%endmacro
-
-%macro SUMSUB_BADC 4-5
-%if %0==5
-    SUMSUB_BA %1, %2, %5
-    SUMSUB_BA %3, %4, %5
+%if avx_enabled == 0
+    mova    m%4, m%2
+    padd%1  m%2, m%3
+    psub%1  m%3, m%4
 %else
-    paddw   %1, %2
-    paddw   %3, %4
-    paddw   %2, %2
-    paddw   %4, %4
-    psubw   %2, %1
-    psubw   %4, %3
+    padd%1  m%4, m%2, m%3
+    psub%1  m%3, m%2
+    SWAP    %2, %4
+%endif
 %endif
 %endmacro
 
-%macro SUMSUB2_AB 3
-    mova    %3, %1
-    paddw   %1, %1
-    paddw   %1, %2
-    psubw   %3, %2
-    psubw   %3, %2
+%macro SUMSUB_BADC 5-6
+%if %0==6
+    SUMSUB_BA %1, %2, %3, %6
+    SUMSUB_BA %1, %4, %5, %6
+%else
+    padd%1  m%2, m%3
+    padd%1  m%4, m%5
+    padd%1  m%3, m%3
+    padd%1  m%5, m%5
+    psub%1  m%3, m%2
+    psub%1  m%5, m%4
+%endif
 %endmacro
 
-%macro SUMSUB2_BA 3
-    mova    m%3, m%1
-    paddw   m%1, m%2
-    paddw   m%1, m%2
-    psubw   m%2, m%3
-    psubw   m%2, m%3
+%macro SUMSUB2_AB 4
+%ifnum %3
+    psub%1  m%4, m%2, m%3
+    psub%1  m%4, m%3
+    padd%1  m%2, m%2
+    padd%1  m%2, m%3
+%else
+    mova    m%4, m%2
+    padd%1  m%2, m%2
+    padd%1  m%2, %3
+    psub%1  m%4, %3
+    psub%1  m%4, %3
+%endif
 %endmacro
 
-%macro SUMSUBD2_AB 4
-    mova    %4, %1
-    mova    %3, %2
-    psraw   %2, 1  ; %2: %2>>1
-    psraw   %1, 1  ; %1: %1>>1
-    paddw   %2, %4 ; %2: %2>>1+%1
-    psubw   %1, %3 ; %1: %1>>1-%2
+%macro SUMSUB2_BA 4
+%if avx_enabled == 0
+    mova    m%4, m%2
+    padd%1  m%2, m%3
+    padd%1  m%2, m%3
+    psub%1  m%3, m%4
+    psub%1  m%3, m%4
+%else
+    padd%1  m%4, m%2, m%3
+    padd%1  m%4, m%3
+    psub%1  m%3, m%2
+    psub%1  m%3, m%2
+    SWAP     %2,  %4
+%endif
+%endmacro
+
+%macro SUMSUBD2_AB 5
+%ifnum %4
+    psra%1  m%5, m%2, 1  ; %3: %3>>1
+    psra%1  m%4, m%3, 1  ; %2: %2>>1
+    padd%1  m%4, m%2     ; %3: %3>>1+%2
+    psub%1  m%5, m%3     ; %2: %2>>1-%3
+    SWAP     %2, %5
+    SWAP     %3, %4
+%else
+    mova    %5, m%2
+    mova    %4, m%3
+    psra%1  m%3, 1  ; %3: %3>>1
+    psra%1  m%2, 1  ; %2: %2>>1
+    padd%1  m%3, %5 ; %3: %3>>1+%2
+    psub%1  m%2, %4 ; %2: %2>>1-%3
+%endif
 %endmacro
 
 %macro DCT4_1D 5
 %ifnum %5
-    SUMSUB_BADC m%4, m%1, m%3, m%2; m%5
-    SUMSUB_BA   m%3, m%4, m%5
-    SUMSUB2_AB  m%1, m%2, m%5
+    SUMSUB_BADC w, %4, %1, %3, %2, %5
+    SUMSUB_BA   w, %3, %4, %5
+    SUMSUB2_AB  w, %1, %2, %5
     SWAP %1, %3, %4, %5, %2
 %else
-    SUMSUB_BADC m%4, m%1, m%3, m%2
-    SUMSUB_BA   m%3, m%4
-    mova       [%5], m%2
-    SUMSUB2_AB m%1, [%5], m%2
+    SUMSUB_BADC w, %4, %1, %3, %2
+    SUMSUB_BA   w, %3, %4
+    mova     [%5], m%2
+    SUMSUB2_AB  w, %1, [%5], %2
     SWAP %1, %3, %4, %2
 %endif
 %endmacro
 
-%macro IDCT4_1D 5-6
-%ifnum %5
-    SUMSUBD2_AB m%2, m%4, m%6, m%5
-    ; %2: %2>>1-%4 %4: %2+%4>>1
-    SUMSUB_BA   m%3, m%1, m%6
-    ; %3: %1+%3 %1: %1-%3
-    SUMSUB_BADC m%4, m%3, m%2, m%1, m%6
-    ; %4: %1+%3 + (%2+%4>>1)
-    ; %3: %1+%3 - (%2+%4>>1)
-    ; %2: %1-%3 + (%2>>1-%4)
-    ; %1: %1-%3 - (%2>>1-%4)
+%macro IDCT4_1D 6-7
+%ifnum %6
+    SUMSUBD2_AB %1, %3, %5, %7, %6
+    ; %3: %3>>1-%5 %5: %3+%5>>1
+    SUMSUB_BA   %1, %4, %2, %7
+    ; %4: %2+%4 %2: %2-%4
+    SUMSUB_BADC %1, %5, %4, %3, %2, %7
+    ; %5: %2+%4 + (%3+%5>>1)
+    ; %4: %2+%4 - (%3+%5>>1)
+    ; %3: %2-%4 + (%3>>1-%5)
+    ; %2: %2-%4 - (%3>>1-%5)
 %else
-    SUMSUBD2_AB m%2, m%4, [%5], [%5+16]
-    SUMSUB_BA   m%3, m%1
-    SUMSUB_BADC m%4, m%3, m%2, m%1
+%ifidn %1, w
+    SUMSUBD2_AB %1, %3, %5, [%6], [%6+16]
+%else
+    SUMSUBD2_AB %1, %3, %5, [%6], [%6+32]
 %endif
-    SWAP %1, %4, %3
-    ; %1: %1+%3 + (%2+%4>>1) row0
-    ; %2: %1-%3 + (%2>>1-%4) row1
-    ; %3: %1-%3 - (%2>>1-%4) row2
-    ; %4: %1+%3 - (%2+%4>>1) row3
+    SUMSUB_BA   %1, %4, %2
+    SUMSUB_BADC %1, %5, %4, %3, %2
+%endif
+    SWAP %2, %5, %4
+    ; %2: %2+%4 + (%3+%5>>1) row0
+    ; %3: %2-%4 + (%3>>1-%5) row1
+    ; %4: %2-%4 - (%3>>1-%5) row2
+    ; %5: %2+%4 - (%3+%5>>1) row3
 %endmacro
 
 
diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index a21af775de..84881592b6 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -848,7 +848,10 @@ static int asf_read_frame_header(AVFormatContext *s, AVIOContext *pb){
     }
     if (asf->packet_flags & 0x01) {
         DO_2BITS(asf->packet_segsizetype >> 6, asf->packet_frag_size, 0); // 0 is illegal
-        if(asf->packet_frag_size > asf->packet_size_left - rsize){
+        if (rsize > asf->packet_size_left) {
+            av_log(s, AV_LOG_ERROR, "packet_replic_size is invalid\n");
+            return -1;
+        } else if(asf->packet_frag_size > asf->packet_size_left - rsize){
             if (asf->packet_frag_size > asf->packet_size_left - rsize + asf->packet_padsize) {
                 av_log(s, AV_LOG_ERROR, "packet_frag_size is invalid (%d-%d)\n", asf->packet_size_left, rsize);
                 return -1;
@@ -1261,21 +1264,22 @@ static int asf_read_seek(AVFormatContext *s, int stream_index, int64_t pts, int
     if (!asf->index_read)
         asf_build_simple_index(s, stream_index);
 
-    if(!(asf->index_read && st->index_entries)){
-        if(av_seek_frame_binary(s, stream_index, pts, flags)<0)
-            return -1;
-    }else{
+    if((asf->index_read && st->index_entries)){
         index= av_index_search_timestamp(st, pts, flags);
-        if(index<0)
-            return -1;
+        if(index >= 0) {
+            /* find the position */
+            pos = st->index_entries[index].pos;
 
-        /* find the position */
-        pos = st->index_entries[index].pos;
-
-        /* do the seek */
-        av_log(s, AV_LOG_DEBUG, "SEEKTO: %"PRId64"\n", pos);
-        avio_seek(s->pb, pos, SEEK_SET);
+            /* do the seek */
+            av_log(s, AV_LOG_DEBUG, "SEEKTO: %"PRId64"\n", pos);
+            avio_seek(s->pb, pos, SEEK_SET);
+            asf_reset_header(s);
+            return 0;
+        }
     }
+    /* no index or seeking by index failed */
+    if(av_seek_frame_binary(s, stream_index, pts, flags)<0)
+        return -1;
     asf_reset_header(s);
     return 0;
 }
@@ -1290,4 +1294,5 @@ AVInputFormat ff_asf_demuxer = {
     asf_read_close,
     asf_read_seek,
     asf_read_pts,
+    .flags = AVFMT_NOBINSEARCH | AVFMT_NOGENSEARCH,
 };
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index db48a57ec4..ec51a57ca8 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -258,6 +258,8 @@ typedef struct AVFormatParameters {
 #define AVFMT_VARIABLE_FPS  0x0400 /**< Format allows variable fps. */
 #define AVFMT_NODIMENSIONS  0x0800 /**< Format does not need width/height */
 #define AVFMT_NOSTREAMS     0x1000 /**< Format does not require any streams */
+#define AVFMT_NOBINSEARCH   0x2000 /**< Format does not allow to fallback to binary search via read_timestamp */
+#define AVFMT_NOGENSEARCH   0x4000 /**< Format does not allow to fallback to generic search */
 
 typedef struct AVOutputFormat {
     const char *name;
diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
index 44d21683ba..3a7465ef94 100644
--- a/libavformat/flvdec.c
+++ b/libavformat/flvdec.c
@@ -375,7 +375,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
     size = avio_rb24(s->pb);
     dts = avio_rb24(s->pb);
     dts |= avio_r8(s->pb) << 24;
-//    av_log(s, AV_LOG_DEBUG, "type:%d, size:%d, dts:%d\n", type, size, dts);
+    av_dlog(s, "type:%d, size:%d, dts:%"PRId64"\n", type, size, dts);
     if (url_feof(s->pb))
         return AVERROR_EOF;
     avio_skip(s->pb, 3); /* stream id, always 0 */
@@ -421,7 +421,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
         st= create_stream(s, is_audio);
         s->ctx_flags &= ~AVFMTCTX_NOHEADER;
     }
-//    av_log(s, AV_LOG_DEBUG, "%d %X %d \n", is_audio, flags, st->discard);
+    av_dlog(s, "%d %X %d \n", is_audio, flags, st->discard);
     if(  (st->discard >= AVDISCARD_NONKEY && !((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY ||         is_audio))
        ||(st->discard >= AVDISCARD_BIDIR  &&  ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_DISP_INTER && !is_audio))
        || st->discard >= AVDISCARD_ALL
diff --git a/libavformat/utils.c b/libavformat/utils.c
index b6368edcd1..70429a7650 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1742,10 +1742,12 @@ int av_seek_frame(AVFormatContext *s, int stream_index, int64_t timestamp, int f
         return 0;
     }
 
-    if(s->iformat->read_timestamp)
+    if(s->iformat->read_timestamp && !(s->iformat->flags & AVFMT_NOBINSEARCH))
         return av_seek_frame_binary(s, stream_index, timestamp, flags);
-    else
+    else if (!(s->iformat->flags & AVFMT_NOGENSEARCH))
         return av_seek_frame_generic(s, stream_index, timestamp, flags);
+    else
+        return -1;
 }
 
 int avformat_seek_file(AVFormatContext *s, int stream_index, int64_t min_ts, int64_t ts, int64_t max_ts, int flags)