From 3b636f21daa6996e20ef97131a1c3649c3043304 Mon Sep 17 00:00:00 2001
From: David Conrad <lessen42@gmail.com>
Date: Tue, 22 Jun 2010 19:24:09 +0000
Subject: [PATCH] Native VP8 decoder.

Patch by David Conrad <lessen42 gmail com> and myself.

Originally committed as revision 23719 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 doc/general.texi       |    4 +-
 libavcodec/Makefile    |    2 +
 libavcodec/allcodecs.c |    1 +
 libavcodec/avcodec.h   |    4 +-
 libavcodec/dsputil.c   |   13 +
 libavcodec/vp56.h      |   75 +++
 libavcodec/vp8.c       | 1412 ++++++++++++++++++++++++++++++++++++++++
 libavcodec/vp8data.h   |  752 +++++++++++++++++++++
 libavcodec/vp8dsp.c    |  373 +++++++++++
 libavcodec/vp8dsp.h    |   67 ++
 10 files changed, 2699 insertions(+), 4 deletions(-)
 create mode 100644 libavcodec/vp8.c
 create mode 100644 libavcodec/vp8data.h
 create mode 100644 libavcodec/vp8dsp.c
 create mode 100644 libavcodec/vp8dsp.h
diff --git a/doc/general.texi b/doc/general.texi
index 32d5375b25..561a8f12f6 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -441,8 +441,8 @@ following image formats are supported:
     @tab fourcc: VP50
 @item On2 VP6                @tab     @tab  X
     @tab fourcc: VP60,VP61,VP62
-@item VP8                    @tab  X  @tab  X
-    @tab fourcc: VP80, de/encoding supported through external library libvpx
+@item VP8                    @tab  E  @tab  X
+    @tab fourcc: VP80, encoding supported through external library libvpx
 @item planar RGB             @tab     @tab  X
     @tab fourcc: 8BPS
 @item Q-team QPEG            @tab     @tab  X
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 8850f19a6c..1f4ee8bd76 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -375,6 +375,8 @@ OBJS-$(CONFIG_VP5_DECODER)             += vp5.o vp56.o vp56data.o vp56dsp.o \
                                           vp3dsp.o
 OBJS-$(CONFIG_VP6_DECODER)             += vp6.o vp56.o vp56data.o vp56dsp.o \
                                           vp3dsp.o vp6dsp.o huffman.o
+OBJS-$(CONFIG_VP8_DECODER)             += vp8.o vp8dsp.o vp56.o vp56data.o \
+                                          h264pred.o
 OBJS-$(CONFIG_VQA_DECODER)             += vqavideo.o
 OBJS-$(CONFIG_WAVPACK_DECODER)         += wavpack.o
 OBJS-$(CONFIG_WMAPRO_DECODER)          += wmaprodec.o wma.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 3f0b9752cf..0e3b9b3666 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -201,6 +201,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER (VP6, vp6);
     REGISTER_DECODER (VP6A, vp6a);
     REGISTER_DECODER (VP6F, vp6f);
+    REGISTER_DECODER (VP8, vp8);
     REGISTER_DECODER (VQA, vqa);
     REGISTER_ENCDEC  (WMV1, wmv1);
     REGISTER_ENCDEC  (WMV2, wmv2);
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 15b00922d3..0389915141 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -30,8 +30,8 @@
 #include "libavutil/avutil.h"
 
 #define LIBAVCODEC_VERSION_MAJOR 52
-#define LIBAVCODEC_VERSION_MINOR 77
-#define LIBAVCODEC_VERSION_MICRO  1
+#define LIBAVCODEC_VERSION_MINOR 78
+#define LIBAVCODEC_VERSION_MICRO  0
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index a09d19d750..fdf23f0a50 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -39,6 +39,7 @@
 #include "ac3dec.h"
 #include "vorbis.h"
 #include "png.h"
+#include "vp8dsp.h"
 
 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
 uint32_t ff_squareTbl[512] = {0, };
@@ -2656,6 +2657,18 @@ static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
 }
 #endif /* CONFIG_RV40_DECODER */
 
+#if CONFIG_VP8_DECODER
+void ff_put_vp8_pixels16_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) {
+    put_pixels16_c(dst, src, stride, h);
+}
+void ff_put_vp8_pixels8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) {
+    put_pixels8_c(dst, src, stride, h);
+}
+void ff_put_vp8_pixels4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) {
+    put_pixels4_c(dst, src, stride, h);
+}
+#endif
+
 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
     int i;
diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h
index 0b6f2a870c..4eb414bbbe 100644
--- a/libavcodec/vp56.h
+++ b/libavcodec/vp56.h
@@ -237,6 +237,12 @@ static inline int vp56_rac_get(VP56RangeCoder *c)
     return bit;
 }
 
+// rounding is different than vp56_rac_get, is vp56_rac_get wrong?
+static inline int vp8_rac_get(VP56RangeCoder *c)
+{
+    return vp56_rac_get_prob(c, 128);
+}
+
 static inline int vp56_rac_gets(VP56RangeCoder *c, int bits)
 {
     int value = 0;
@@ -248,12 +254,46 @@ static inline int vp56_rac_gets(VP56RangeCoder *c, int bits)
     return value;
 }
 
+static inline int vp8_rac_get_uint(VP56RangeCoder *c, int bits)
+{
+    int value = 0;
+
+    while (bits--) {
+        value = (value << 1) | vp8_rac_get(c);
+    }
+
+    return value;
+}
+
+// fixme: add 1 bit to all the calls to this?
+static inline int vp8_rac_get_sint(VP56RangeCoder *c, int bits)
+{
+    int v;
+
+    if (!vp8_rac_get(c))
+        return 0;
+
+    v = vp8_rac_get_uint(c, bits);
+
+    if (vp8_rac_get(c))
+        v = -v;
+
+    return v;
+}
+
+// P(7)
 static inline int vp56_rac_gets_nn(VP56RangeCoder *c, int bits)
 {
     int v = vp56_rac_gets(c, 7) << 1;
     return v + !v;
 }
 
+static inline int vp8_rac_get_nn(VP56RangeCoder *c)
+{
+    int v = vp8_rac_get_uint(c, 7) << 1;
+    return v + !v;
+}
+
 static inline int vp56_rac_get_tree(VP56RangeCoder *c,
                                     const VP56Tree *tree,
                                     const uint8_t *probs)
@@ -267,4 +307,39 @@ static inline int vp56_rac_get_tree(VP56RangeCoder *c,
     return -tree->val;
 }
 
+/**
+ * This is identical to vp8_rac_get_tree except for the possibility of starting
+ * on a node other than the root node, needed for coeff decode where this is
+ * used to save a bit after a 0 token (by disallowing EOB to immediately follow.)
+ */
+static inline int vp8_rac_get_tree_with_offset(VP56RangeCoder *c, const int8_t (*tree)[2],
+                                               const uint8_t *probs, int i)
+{
+    do {
+        i = tree[i][vp56_rac_get_prob(c, probs[i])];
+    } while (i > 0);
+
+    return -i;
+}
+
+// how probabilities are associated with decisions is different I think
+// well, the new scheme fits in the old but this way has one fewer branches per decision
+static inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t (*tree)[2],
+                                   const uint8_t *probs)
+{
+    return vp8_rac_get_tree_with_offset(c, tree, probs, 0);
+}
+
+// DCTextra
+static inline int vp8_rac_get_coeff(VP56RangeCoder *c, const uint8_t *prob)
+{
+    int v = 0;
+
+    do {
+        v = (v<<1) + vp56_rac_get_prob(c, *prob++);
+    } while (*prob);
+
+    return v;
+}
+
 #endif /* AVCODEC_VP56_H */
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
new file mode 100644
index 0000000000..b1c40e8492
--- /dev/null
+++ b/libavcodec/vp8.c
@@ -0,0 +1,1412 @@
+/**
+ * VP8 compatible video decoder
+ *
+ * Copyright (C) 2010 David Conrad
+ * Copyright (C) 2010 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "vp56.h"
+#include "vp8data.h"
+#include "vp8dsp.h"
+#include "h264pred.h"
+#include "rectangle.h"
+
+typedef struct {
+    uint8_t segment;
+    uint8_t skip;
+    // todo: make it possible to check for at least (i4x4 or split_mv)
+    // in one op. are others needed?
+    uint8_t mode;
+    uint8_t ref_frame;
+    uint8_t partitioning;
+    VP56mv mv;
+    VP56mv bmv[16];
+} VP8Macroblock;
+
+typedef struct {
+    AVCodecContext *avctx;
+    DSPContext dsp;
+    VP8DSPContext vp8dsp;
+    H264PredContext hpc;
+    AVFrame frames[4];
+    AVFrame *framep[4];
+    uint8_t *edge_emu_buffer;
+    VP56RangeCoder c;   ///< header context, includes mb modes and motion vectors
+    int profile;
+
+    int mb_width;   /* number of horizontal MB */
+    int mb_height;  /* number of vertical MB */
+    int linesize;
+    int uvlinesize;
+
+    int keyframe;
+    int invisible;
+    int update_last;    ///< update VP56_FRAME_PREVIOUS with the current one
+    int update_golden;  ///< VP56_FRAME_NONE if not updated, or which frame to copy if so
+    int update_altref;
+
+    /**
+     * If this flag is not set, all the probability updates
+     * are discarded after this frame is decoded.
+     */
+    int update_probabilities;
+
+    /**
+     * All coefficients are contained in separate arith coding contexts.
+     * There can be 1, 2, 4, or 8 of these after the header context.
+     */
+    int num_coeff_partitions;
+    VP56RangeCoder coeff_partition[8];
+
+    VP8Macroblock *macroblocks;
+    VP8Macroblock *macroblocks_base;
+    int mb_stride;
+
+    uint8_t *intra4x4_pred_mode;
+    uint8_t *intra4x4_pred_mode_base;
+    int b4_stride;
+
+    /**
+     * For coeff decode, we need to know whether the above block had non-zero
+     * coefficients. This means for each macroblock, we need data for 4 luma
+     * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
+     * per macroblock. We keep the last row in top_nnz.
+     */
+    uint8_t (*top_nnz)[9];
+    DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
+
+    /**
+     * This is the index plus one of the last non-zero coeff
+     * for each of the blocks in the current macroblock.
+     * So, 0 -> no coeffs
+     *     1 -> dc-only (special transform)
+     *     2+-> full transform
+     */
+    DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
+    DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
+
+    int chroma_pred_mode;    ///< 8x8c pred mode of the current macroblock
+
+    int mbskip_enabled;
+    int sign_bias[4]; ///< one state [0, 1] per ref frame type
+
+    /**
+     * Base parameters for segmentation, i.e. per-macroblock parameters.
+     * These must be kept unchanged even if segmentation is not used for
+     * a frame, since the values persist between interframes.
+     */
+    struct {
+        int enabled;
+        int absolute_vals;
+        int update_map;
+        int8_t base_quant[4];
+        int8_t filter_level[4];     ///< base loop filter level
+    } segmentation;
+
+    /**
+     * Macroblocks can have one of 4 different quants in a frame when
+     * segmentation is enabled.
+     * If segmentation is disabled, only the first segment's values are used.
+     */
+    struct {
+        // [0] - DC qmul  [1] - AC qmul
+        int16_t luma_qmul[2];
+        int16_t luma_dc_qmul[2];    ///< luma dc-only block quant
+        int16_t chroma_qmul[2];
+    } qmat[4];
+
+    struct {
+        int simple;
+        int level;
+        int sharpness;
+    } filter;
+
+    struct {
+        int enabled;    ///< whether each mb can have a different strength based on mode/ref
+
+        /**
+         * filter strength adjustment for the following macroblock modes:
+         * [0] - i4x4
+         * [1] - zero mv
+         * [2] - inter modes except for zero or split mv
+         * [3] - split mv
+         *  i16x16 modes never have any adjustment
+         */
+        int8_t mode[4];
+
+        /**
+         * filter strength adjustment for macroblocks that reference:
+         * [0] - intra / VP56_FRAME_CURRENT
+         * [1] - VP56_FRAME_PREVIOUS
+         * [2] - VP56_FRAME_GOLDEN
+         * [3] - altref / VP56_FRAME_GOLDEN2
+         */
+        int8_t ref[4];
+    } lf_delta;
+
+    /**
+     * These are all of the updatable probabilities for binary decisions.
+     * They are only implictly reset on keyframes, making it quite likely
+     * for an interframe to desync if a prior frame's header was corrupt
+     * or missing outright!
+     */
+    struct {
+        uint8_t segmentid[3];
+        uint8_t mbskip;
+        uint8_t intra;
+        uint8_t last;
+        uint8_t golden;
+        uint8_t pred16x16[4];
+        uint8_t pred8x8c[3];
+        uint8_t token[4][8][3][NUM_DCT_TOKENS-1];
+        uint8_t mvc[2][19];
+    } prob[2];
+} VP8Context;
+
+#define RL24(p) (AV_RL16(p) + ((p)[2] << 16))
+
+static void vp8_decode_flush(AVCodecContext *avctx)
+{
+    VP8Context *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < 4; i++)
+        if (s->frames[i].data[0])
+            avctx->release_buffer(avctx, &s->frames[i]);
+    memset(s->framep, 0, sizeof(s->framep));
+
+    av_freep(&s->macroblocks_base);
+    av_freep(&s->intra4x4_pred_mode_base);
+    av_freep(&s->top_nnz);
+    av_freep(&s->edge_emu_buffer);
+
+    s->macroblocks        = NULL;
+    s->intra4x4_pred_mode = NULL;
+}
+
+static int update_dimensions(VP8Context *s, int width, int height)
+{
+    int i;
+
+    if (avcodec_check_dimensions(s->avctx, width, height))
+        return AVERROR_INVALIDDATA;
+
+    vp8_decode_flush(s->avctx);
+
+    avcodec_set_dimensions(s->avctx, width, height);
+
+    s->mb_width  = (s->avctx->coded_width +15) / 16;
+    s->mb_height = (s->avctx->coded_height+15) / 16;
+
+    // we allocate a border around the top/left of intra4x4 modes
+    // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle
+    s->mb_stride = s->mb_width+1;
+    s->b4_stride = 4*s->mb_stride;
+
+    s->macroblocks_base        = av_mallocz(s->mb_stride*(s->mb_height+1)*sizeof(*s->macroblocks));
+    s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1));
+    s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
+
+    s->macroblocks        = s->macroblocks_base        + 1 + s->mb_stride;
+    s->intra4x4_pred_mode = s->intra4x4_pred_mode_base + 4 + s->b4_stride;
+
+    memset(s->intra4x4_pred_mode_base, DC_PRED, s->b4_stride);
+    for (i = 0; i < 4*s->mb_height; i++)
+        s->intra4x4_pred_mode[i*s->b4_stride-1] = DC_PRED;
+
+    return 0;
+}
+
+static void parse_segment_info(VP8Context *s)
+{
+    VP56RangeCoder *c = &s->c;
+    int i;
+
+    s->segmentation.update_map = vp8_rac_get(c);
+
+    if (vp8_rac_get(c)) { // update segment feature data
+        s->segmentation.absolute_vals = vp8_rac_get(c);
+
+        for (i = 0; i < 4; i++)
+            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
+
+        for (i = 0; i < 4; i++)
+            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
+    }
+    if (s->segmentation.update_map)
+        for (i = 0; i < 3; i++)
+            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
+}
+
+static void update_lf_deltas(VP8Context *s)
+{
+    VP56RangeCoder *c = &s->c;
+    int i;
+
+    for (i = 0; i < 4; i++)
+        s->lf_delta.ref[i]  = vp8_rac_get_sint(c, 6);
+
+    for (i = 0; i < 4; i++)
+        s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
+}
+
+static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
+{
+    const uint8_t *sizes = buf;
+    int i;
+
+    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
+
+    buf      += 3*(s->num_coeff_partitions-1);
+    buf_size -= 3*(s->num_coeff_partitions-1);
+    if (buf_size < 0)
+        return -1;
+
+    for (i = 0; i < s->num_coeff_partitions-1; i++) {
+        int size = RL24(sizes + 3*i);
+        if (buf_size - size < 0)
+            return -1;
+
+        vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
+        buf      += size;
+        buf_size -= size;
+    }
+    vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
+
+    return 0;
+}
+
+static void get_quants(VP8Context *s)
+{
+    VP56RangeCoder *c = &s->c;
+    int i, base_qi;
+
+    int yac_qi     = vp8_rac_get_uint(c, 7);
+    int ydc_delta  = vp8_rac_get_sint(c, 4);
+    int y2dc_delta = vp8_rac_get_sint(c, 4);
+    int y2ac_delta = vp8_rac_get_sint(c, 4);
+    int uvdc_delta = vp8_rac_get_sint(c, 4);
+    int uvac_delta = vp8_rac_get_sint(c, 4);
+
+    for (i = 0; i < 4; i++) {
+        if (s->segmentation.enabled) {
+            base_qi = s->segmentation.base_quant[i];
+            if (!s->segmentation.absolute_vals)
+                base_qi += yac_qi;
+        } else
+            base_qi = yac_qi;
+
+        s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)];
+        s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip(base_qi             , 0, 127)];
+        s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)];
+        s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100;
+        s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)];
+        s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)];
+
+        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
+        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
+    }
+}
+
+/**
+ * Determine which buffers golden and altref should be updated with after this frame.
+ * The spec isn't clear here, so I'm going by my understanding of what libvpx does
+ *
+ * Intra frames update all 3 references
+ * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
+ * If the update (golden|altref) flag is set, it's updated with the current frame
+ *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
+ * If the flag is not set, the number read means:
+ *      0: no update
+ *      1: VP56_FRAME_PREVIOUS
+ *      2: update golden with altref, or update altref with golden
+ */
+static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
+{
+    VP56RangeCoder *c = &s->c;
+
+    if (update)
+        return VP56_FRAME_CURRENT;
+
+    switch (vp8_rac_get_uint(c, 2)) {
+    case 1:
+        return VP56_FRAME_PREVIOUS;
+    case 2:
+        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
+    }
+    return VP56_FRAME_NONE;
+}
+
+static void update_refs(VP8Context *s)
+{
+    VP56RangeCoder *c = &s->c;
+
+    int update_golden = vp8_rac_get(c);
+    int update_altref = vp8_rac_get(c);
+
+    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
+    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
+}
+
+static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
+{
+    VP56RangeCoder *c = &s->c;
+    int header_size, hscale, vscale, i, j, k, l, ret;
+    int width  = s->avctx->width;
+    int height = s->avctx->height;
+
+    s->keyframe  = !(buf[0] & 1);
+    s->profile   =  (buf[0]>>1) & 7;
+    s->invisible = !(buf[0] & 0x10);
+    header_size  = RL24(buf) >> 5;
+    buf      += 3;
+    buf_size -= 3;
+
+    if (s->profile)
+        av_log(s->avctx, AV_LOG_WARNING, "Profile %d not fully handled\n", s->profile);
+
+    if (header_size > buf_size - 7*s->keyframe) {
+        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (s->keyframe) {
+        if (RL24(buf) != 0x2a019d) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", RL24(buf));
+            return AVERROR_INVALIDDATA;
+        }
+        width  = AV_RL16(buf+3) & 0x3fff;
+        height = AV_RL16(buf+5) & 0x3fff;
+        hscale = buf[4] >> 6;
+        vscale = buf[6] >> 6;
+        buf      += 7;
+        buf_size -= 7;
+
+        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
+        memcpy(s->prob->token    , vp8_token_default_probs , sizeof(s->prob->token));
+        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
+        memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
+        memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
+        memset(&s->segmentation, 0, sizeof(s->segmentation));
+    }
+
+    if (!s->macroblocks_base || /* first frame */
+        width != s->avctx->width || height != s->avctx->height) {
+        if ((ret = update_dimensions(s, width, height) < 0))
+            return ret;
+    }
+
+    vp56_init_range_decoder(c, buf, header_size);
+    buf      += header_size;
+    buf_size -= header_size;
+
+    if (s->keyframe) {
+        if (vp8_rac_get(c))
+            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
+        vp8_rac_get(c); // whether we can skip clamping in dsp functions
+    }
+
+    if ((s->segmentation.enabled = vp8_rac_get(c)))
+        parse_segment_info(s);
+    else
+        s->segmentation.update_map = 0; // FIXME: move this to some init function?
+
+    s->filter.simple    = vp8_rac_get(c);
+    s->filter.level     = vp8_rac_get_uint(c, 6);
+    s->filter.sharpness = vp8_rac_get_uint(c, 3);
+
+    if ((s->lf_delta.enabled = vp8_rac_get(c)))
+        if (vp8_rac_get(c))
+            update_lf_deltas(s);
+
+    if (setup_partitions(s, buf, buf_size)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    get_quants(s);
+
+    if (!s->keyframe) {
+        update_refs(s);
+        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
+        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
+    }
+
+    // if we aren't saving this frame's probabilities for future frames,
+    // make a copy of the current probabilities
+    if (!(s->update_probabilities = vp8_rac_get(c)))
+        s->prob[1] = s->prob[0];
+
+    s->update_last = s->keyframe || vp8_rac_get(c);
+
+    for (i = 0; i < 4; i++)
+        for (j = 0; j < 8; j++)
+            for (k = 0; k < 3; k++)
+                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
+                    if (vp56_rac_get_prob(c, vp8_token_update_probs[i][j][k][l]))
+                        s->prob->token[i][j][k][l] = vp8_rac_get_uint(c, 8);
+
+    if ((s->mbskip_enabled = vp8_rac_get(c)))
+        s->prob->mbskip = vp8_rac_get_uint(c, 8);
+
+    if (!s->keyframe) {
+        s->prob->intra  = vp8_rac_get_uint(c, 8);
+        s->prob->last   = vp8_rac_get_uint(c, 8);
+        s->prob->golden = vp8_rac_get_uint(c, 8);
+
+        if (vp8_rac_get(c))
+            for (i = 0; i < 4; i++)
+                s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
+        if (vp8_rac_get(c))
+            for (i = 0; i < 3; i++)
+                s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
+
+        // 17.2 MV probability update
+        for (i = 0; i < 2; i++)
+            for (j = 0; j < 19; j++)
+                if (vp56_rac_get_prob(c, vp8_mv_update_prob[i][j]))
+                    s->prob->mvc[i][j] = vp8_rac_get_nn(c);
+    }
+
+    return 0;
+}
+
+static inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src,
+                            int mb_x, int mb_y)
+{
+#define MARGIN (16 << 2)
+    dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN),
+                     ((s->mb_width  - 1 - mb_x) << 6) + MARGIN);
+    dst->y = av_clip(src->y, -((mb_y << 6) + MARGIN),
+                     ((s->mb_height - 1 - mb_y) << 6) + MARGIN);
+}
+
+static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+                          VP56mv near[2], VP56mv *best, int cnt[4])
+{
+    VP8Macroblock *mb_edge[3] = { mb - s->mb_stride     /* top */,
+                                  mb - 1                /* left */,
+                                  mb - s->mb_stride - 1 /* top-left */ };
+    enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT };
+    VP56mv near_mv[4]  = {{ 0 }};
+    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
+    int idx = CNT_ZERO, n;
+    int best_idx = CNT_ZERO;
+
+    /* Process MB on top, left and top-left */
+    for (n = 0; n < 3; n++) {
+        VP8Macroblock *edge = mb_edge[n];
+        if (edge->ref_frame != VP56_FRAME_CURRENT) {
+            if (edge->mv.x | edge->mv.y) {
+                VP56mv tmp = edge->mv;
+                if (s->sign_bias[mb->ref_frame] != s->sign_bias[edge->ref_frame]) {
+                    tmp.x *= -1;
+                    tmp.y *= -1;
+                }
+                if ((tmp.x ^ near_mv[idx].x) | (tmp.y ^ near_mv[idx].y))
+                    near_mv[++idx] = tmp;
+                cnt[idx]       += 1 + (n != 2);
+            } else
+                cnt[CNT_ZERO] += 1 + (n != 2);
+        }
+    }
+
+    /* If we have three distinct MV's, merge first and last if they're the same */
+    if (cnt[CNT_SPLITMV] &&
+        !((near_mv[1+EDGE_TOP].x ^ near_mv[1+EDGE_TOPLEFT].x) |
+          (near_mv[1+EDGE_TOP].y ^ near_mv[1+EDGE_TOPLEFT].y)))
+        cnt[CNT_NEAREST] += 1;
+
+    cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]->mode   == VP8_MVMODE_SPLIT) +
+                        (mb_edge[EDGE_TOP]->mode    == VP8_MVMODE_SPLIT)) * 2 +
+                       (mb_edge[EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
+
+    /* Swap near and nearest if necessary */
+    if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
+        FFSWAP(int,    cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
+        FFSWAP(VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
+    }
+
+    /* Choose the best mv out of 0,0 and the nearest mv */
+    if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])
+        best_idx = CNT_NEAREST;
+
+    clamp_mv(s, best, &near_mv[best_idx], mb_x, mb_y);
+    near[0] = near_mv[CNT_NEAREST];
+    near[1] = near_mv[CNT_NEAR];
+}
+
+/**
+ * Motion vector coding, 17.1.
+ */
+static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
+{
+    int x = 0;
+
+    if (vp56_rac_get_prob(c, p[0])) {
+        int i;
+
+        for (i = 0; i < 3; i++)
+            x += vp56_rac_get_prob(c, p[9 + i]) << i;
+        for (i = 9; i > 3; i--)
+            x += vp56_rac_get_prob(c, p[9 + i]) << i;
+        if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
+            x += 8;
+    } else
+        x = vp8_rac_get_tree(c, vp8_small_mvtree, &p[2]);
+
+    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
+}
+
+static const uint8_t *get_submv_prob(const VP56mv *left, const VP56mv *top)
+{
+    int l_is_zero = !(left->x | left->y);
+    int t_is_zero = !(top->x  | top->y);
+    int equal = !((left->x ^ top->x) | (left->y ^ top->y));
+
+    if (equal)
+        return l_is_zero ? vp8_submv_prob[4] : vp8_submv_prob[3];
+    if (t_is_zero)
+        return vp8_submv_prob[2];
+    return l_is_zero ? vp8_submv_prob[1] : vp8_submv_prob[0];
+}
+
+/**
+ * Split motion vector prediction, 16.4.
+ */
+static void decode_splitmvs(VP8Context    *s,  VP56RangeCoder *c,
+                            VP8Macroblock *mb, VP56mv         *base_mv)
+{
+    int part_idx = mb->partitioning =
+        vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob);
+    int n, num = vp8_mbsplit_count[part_idx];
+    VP56mv part_mv[16];
+
+    for (n = 0; n < num; n++) {
+        int k = vp8_mbfirstidx[part_idx][n];
+        const VP56mv *left  = (k & 3) ? &mb->bmv[k - 1] : &mb[-1].bmv[k + 3],
+                     *above = (k > 3) ? &mb->bmv[k - 4] : &mb[-s->mb_stride].bmv[k + 12];
+        const uint8_t *submv_prob = get_submv_prob(left, above);
+
+        switch (vp8_rac_get_tree(c, vp8_submv_ref_tree, submv_prob)) {
+        case VP8_SUBMVMODE_NEW4X4:
+            part_mv[n].y = base_mv->y + read_mv_component(c, s->prob->mvc[0]);
+            part_mv[n].x = base_mv->x + read_mv_component(c, s->prob->mvc[1]);
+            break;
+        case VP8_SUBMVMODE_ZERO4X4:
+            part_mv[n].x = 0;
+            part_mv[n].y = 0;
+            break;
+        case VP8_SUBMVMODE_LEFT4X4:
+            part_mv[n] = *left;
+            break;
+        case VP8_SUBMVMODE_TOP4X4:
+            part_mv[n] = *above;
+            break;
+        }
+
+        /* fill out over the 4x4 blocks in MB */
+        for (k = 0; k < 16; k++)
+            if (vp8_mbsplits[part_idx][k] == n) {
+                mb->bmv[k]      = part_mv[n];
+            }
+    }
+}
+
+static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4,
+                                         int stride, int keyframe)
+{
+    int x, y, t, l;
+    const uint8_t *ctx = vp8_pred4x4_prob_inter;
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            if (keyframe) {
+                t = intra4x4[x - stride];
+                l = intra4x4[x - 1];
+                ctx = vp8_pred4x4_prob_intra[t][l];
+            }
+            intra4x4[x] = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
+        }
+        intra4x4 += stride;
+    }
+}
+
+static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+                           uint8_t *intra4x4)
+{
+    VP56RangeCoder *c = &s->c;
+    int n;
+
+    if (s->segmentation.update_map)
+        mb->segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
+
+    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
+
+    if (s->keyframe) {
+        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
+
+        if (mb->mode == MODE_I4x4) {
+            decode_intra4x4_modes(c, intra4x4, s->b4_stride, 1);
+        } else
+            fill_rectangle(intra4x4, 4, 4, s->b4_stride, vp8_pred4x4_mode[mb->mode], 1);
+
+        s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
+        mb->ref_frame = VP56_FRAME_CURRENT;
+    } else if (vp56_rac_get_prob(c, s->prob->intra)) {
+        VP56mv near[2], best;
+        int cnt[4] = { 0 };
+        uint8_t p[4];
+
+        // inter MB, 16.2
+        if (vp56_rac_get_prob(c, s->prob->last))
+            mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
+                VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
+        else
+            mb->ref_frame = VP56_FRAME_PREVIOUS;
+
+        // motion vectors, 16.3
+        find_near_mvs(s, mb, mb_x, mb_y, near, &best, cnt);
+        for (n = 0; n < 4; n++)
+            p[n] = vp8_mode_contexts[cnt[n]][n];
+        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_mvinter, p);
+        switch (mb->mode) {
+        case VP8_MVMODE_SPLIT:
+            decode_splitmvs(s, c, mb, &best);
+            mb->mv = mb->bmv[15];
+            break;
+        case VP8_MVMODE_ZERO:
+            mb->mv.x = 0;
+            mb->mv.y = 0;
+            break;
+        case VP8_MVMODE_NEAREST:
+            clamp_mv(s, &mb->mv, &near[0], mb_x, mb_y);
+            break;
+        case VP8_MVMODE_NEAR:
+            clamp_mv(s, &mb->mv, &near[1], mb_x, mb_y);
+            break;
+        case VP8_MVMODE_NEW:
+            mb->mv.y = best.y + read_mv_component(c, s->prob->mvc[0]);
+            mb->mv.x = best.x + read_mv_component(c, s->prob->mvc[1]);
+            break;
+        }
+        if (mb->mode != VP8_MVMODE_SPLIT) {
+            for (n = 0; n < 16; n++)
+                mb->bmv[n] = mb->mv;
+        }
+    } else {
+        // intra MB, 16.1
+        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
+
+        if (mb->mode == MODE_I4x4) {
+            decode_intra4x4_modes(c, intra4x4, s->b4_stride, 0);
+        } else
+            fill_rectangle(intra4x4, 4, 4, s->b4_stride, vp8_pred4x4_mode[mb->mode], 1);
+
+        s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
+        mb->ref_frame = VP56_FRAME_CURRENT;
+    }
+}
+
+/**
+ * @param i initial coeff index, 0 unless a separate DC block is coded
+ * @param zero_nhood the initial prediction context for number of surrounding
+ *                   all-zero blocks (only left/top, so 0-2)
+ * @param qmul[0] dc dequant factor
+ * @param qmul[1] ac dequant factor
+ * @return 0 if no coeffs were decoded
+ *         otherwise, the index of the last coeff decoded plus one
+ */
+static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
+                               uint8_t probs[8][3][NUM_DCT_TOKENS-1],
+                               int i, int zero_nhood, int16_t qmul[2])
+{
+    int token, nonzero = 0;
+    int offset = 0;
+
+    for (; i < 16; i++) {
+        token = vp8_rac_get_tree_with_offset(c, vp8_coeff_tree, probs[vp8_coeff_band[i]][zero_nhood], offset);
+
+        if (token == DCT_EOB)
+            break;
+        else if (token >= DCT_CAT1) {
+            int cat = token-DCT_CAT1;
+            token = vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]);
+            token += vp8_dct_cat_offset[cat];
+        }
+
+        // after the first token, the non-zero prediction context becomes
+        // based on the last decoded coeff
+        if (!token) {
+            zero_nhood = 0;
+            offset = 1;
+            continue;
+        } else if (token == 1)
+            zero_nhood = 1;
+        else
+            zero_nhood = 2;
+
+        // todo: full [16] qmat? load into register?
+        block[zigzag_scan[i]] = (vp8_rac_get(c) ? -token : token) * qmul[!!i];
+        nonzero = i+1;
+        offset = 0;
+    }
+    return nonzero;
+}
+
+static void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
+                             uint8_t t_nnz[9], uint8_t l_nnz[9])
+{
+    LOCAL_ALIGNED_16(DCTELEM, dc,[16]);
+    int i, x, y, luma_start = 0, luma_ctx = 3;
+    int nnz_pred, nnz, nnz_total = 0;
+    int segment = s->segmentation.enabled ? mb->segment : 0;
+
+    s->dsp.clear_blocks((DCTELEM *)s->block);
+
+    if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
+        AV_ZERO128(dc);
+        AV_ZERO128(dc+8);
+        nnz_pred = t_nnz[8] + l_nnz[8];
+
+        // decode DC values and do hadamard
+        nnz = decode_block_coeffs(c, dc, s->prob->token[1], 0, nnz_pred,
+                                  s->qmat[segment].luma_dc_qmul);
+        l_nnz[8] = t_nnz[8] = !!nnz;
+        nnz_total += nnz;
+        s->vp8dsp.vp8_luma_dc_wht(s->block, dc);
+        luma_start = 1;
+        luma_ctx = 0;
+    }
+
+    // luma blocks
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++) {
+            nnz_pred = l_nnz[y] + t_nnz[x];
+            nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
+                                      nnz_pred, s->qmat[segment].luma_qmul);
+            // nnz+luma_start may be one more than the actual last index, but we don't care
+            s->non_zero_count_cache[y][x] = nnz + luma_start;
+            t_nnz[x] = l_nnz[y] = !!nnz;
+            nnz_total += nnz;
+        }
+
+    // chroma blocks
+    // TODO: what to do about dimensions? 2nd dim for luma is x,
+    // but for chroma it's (y<<1)|x
+    for (i = 4; i < 6; i++)
+        for (y = 0; y < 2; y++)
+            for (x = 0; x < 2; x++) {
+                nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
+                nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
+                                          nnz_pred, s->qmat[segment].chroma_qmul);
+                s->non_zero_count_cache[i][(y<<1)+x] = nnz;
+                t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
+                nnz_total += nnz;
+            }
+
+    // if there were no coded coeffs despite the macroblock not being marked skip,
+    // we MUST not do the inner loop filter and should not do IDCT
+    // Since skip isn't used for bitstream prediction, just manually set it.
+    if (!nnz_total)
+        mb->skip = 1;
+}
+
+static int check_intra_pred_mode(int mode, int mb_x, int mb_y)
+{
+    if (mode == DC_PRED8x8) {
+        if (!(mb_x|mb_y))
+            mode = DC_128_PRED8x8;
+        else if (!mb_y)
+            mode = LEFT_DC_PRED8x8;
+        else if (!mb_x)
+            mode = TOP_DC_PRED8x8;
+    }
+    return mode;
+}
+
+static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
+                          uint8_t *bmode, int mb_x, int mb_y)
+{
+    int x, y, mode, nnz, tr;
+
+    if (mb->mode < MODE_I4x4) {
+        mode = check_intra_pred_mode(mb->mode, mb_x, mb_y);
+        s->hpc.pred16x16[mode](dst[0], s->linesize);
+    } else {
+        uint8_t *ptr = dst[0];
+
+        // all blocks on the right edge of the macroblock use bottom edge
+        // the top macroblock for their topright edge
+        uint8_t *tr_right = ptr - s->linesize + 16;
+
+        // if we're on the right edge of the frame, said edge is extended
+        // from the top macroblock
+        if (mb_x == s->mb_width-1) {
+            tr = tr_right[-1]*0x01010101;
+            tr_right = (uint8_t *)&tr;
+        }
+
+        for (y = 0; y < 4; y++) {
+            uint8_t *topright = ptr + 4 - s->linesize;
+            for (x = 0; x < 4; x++) {
+                if (x == 3)
+                    topright = tr_right;
+
+                s->hpc.pred4x4[bmode[x]](ptr+4*x, topright, s->linesize);
+
+                nnz = s->non_zero_count_cache[y][x];
+                if (nnz) {
+                    if (nnz == 1)
+                        s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
+                    else
+                        s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
+                }
+                topright += 4;
+            }
+
+            ptr   += 4*s->linesize;
+            bmode += s->b4_stride;
+        }
+    }
+
+    mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y);
+    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
+    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
+}
+
+/**
+ * Generic MC function.
+ *
+ * @param s VP8 decoding context
+ * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
+ * @param dst target buffer for block data at block position
+ * @param src reference picture buffer at origin (0, 0)
+ * @param mv motion vector (relative to block position) to get pixel data from
+ * @param x_off horizontal position of block from origin (0, 0)
+ * @param y_off vertical position of block from origin (0, 0)
+ * @param block_w width of block (16, 8 or 4)
+ * @param block_h height of block (always same as block_w)
+ * @param width width of src/dst plane data
+ * @param height height of src/dst plane data
+ * @param linesize size of a single line of plane data, including padding
+ */
+static inline void vp8_mc(VP8Context *s, int luma,
+                          uint8_t *dst, uint8_t *src, const VP56mv *mv,
+                          int x_off, int y_off, int block_w, int block_h,
+                          int width, int height, int linesize,
+                          h264_chroma_mc_func mc_func[3][3])
+{
+    static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 };
+    int mx = (mv->x << luma)&7, mx_idx = idx[mx];
+    int my = (mv->y << luma)&7, my_idx = idx[my];
+
+    x_off += mv->x >> (3 - luma);
+    y_off += mv->y >> (3 - luma);
+
+    // edge emulation
+    src += y_off * linesize + x_off;
+    if (x_off < 2 || x_off >= width  - block_w - 3 ||
+        y_off < 2 || y_off >= height - block_h - 3) {
+        ff_emulated_edge_mc(s->edge_emu_buffer, src - 2 * linesize - 2, linesize,
+                            block_w + 5, block_h + 5,
+                            x_off - 2, y_off - 2, width, height);
+        src = s->edge_emu_buffer + 2 + linesize * 2;
+    }
+
+    mc_func[my_idx][mx_idx](dst, src, linesize, block_h, mx, my);
+}
+
+/**
+ * Apply motion vectors to prediction buffer, chapter 18.
+ */
+static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
+                          int mb_x, int mb_y)
+{
+    int x_off = mb_x << 4, y_off = mb_y << 4;
+    int width = 16*s->mb_width, height = 16*s->mb_height;
+    VP56mv uvmv;
+
+    if (mb->mode < VP8_MVMODE_SPLIT) {
+        /* Y */
+        vp8_mc(s, 1, dst[0], s->framep[mb->ref_frame]->data[0], &mb->mv,
+               x_off, y_off, 16, 16, width, height, s->linesize,
+               s->vp8dsp.put_vp8_epel_pixels_tab[0]);
+
+        /* U/V */
+        uvmv = mb->mv;
+        if (s->profile == 3) {
+            uvmv.x &= ~7;
+            uvmv.y &= ~7;
+        }
+        x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
+        vp8_mc(s, 0, dst[1], s->framep[mb->ref_frame]->data[1], &uvmv,
+               x_off, y_off, 8, 8, width, height, s->uvlinesize,
+               s->vp8dsp.put_vp8_epel_pixels_tab[1]);
+        vp8_mc(s, 0, dst[2], s->framep[mb->ref_frame]->data[2], &uvmv,
+               x_off, y_off, 8, 8, width, height, s->uvlinesize,
+               s->vp8dsp.put_vp8_epel_pixels_tab[1]);
+    } else {
+        int x, y;
+
+        /* Y */
+        for (y = 0; y < 4; y++) {
+            for (x = 0; x < 4; x++) {
+                vp8_mc(s, 1, dst[0] + 4*y*s->linesize + x*4,
+                       s->framep[mb->ref_frame]->data[0], &mb->bmv[4*y + x],
+                       4*x + x_off, 4*y + y_off, 4, 4,
+                       width, height, s->linesize,
+                       s->vp8dsp.put_vp8_epel_pixels_tab[2]);
+            }
+        }
+
+        /* U/V */
+        x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
+        for (y = 0; y < 2; y++) {
+            for (x = 0; x < 2; x++) {
+                uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
+                         mb->bmv[ 2*y    * 4 + 2*x+1].x +
+                         mb->bmv[(2*y+1) * 4 + 2*x  ].x +
+                         mb->bmv[(2*y+1) * 4 + 2*x+1].x;
+                uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
+                         mb->bmv[ 2*y    * 4 + 2*x+1].y +
+                         mb->bmv[(2*y+1) * 4 + 2*x  ].y +
+                         mb->bmv[(2*y+1) * 4 + 2*x+1].y;
+                uvmv.x = (uvmv.x + (uvmv.x < 0 ? -2 : 2)) / 4;
+                uvmv.y = (uvmv.y + (uvmv.y < 0 ? -2 : 2)) / 4;
+                if (s->profile == 3) {
+                    uvmv.x &= ~7;
+                    uvmv.y &= ~7;
+                }
+                vp8_mc(s, 0, dst[1] + 4*y*s->uvlinesize + x*4,
+                       s->framep[mb->ref_frame]->data[1], &uvmv,
+                       4*x + x_off, 4*y + y_off, 4, 4,
+                       width, height, s->uvlinesize,
+                       s->vp8dsp.put_vp8_epel_pixels_tab[2]);
+                vp8_mc(s, 0, dst[2] + 4*y*s->uvlinesize + x*4,
+                       s->framep[mb->ref_frame]->data[2], &uvmv,
+                       4*x + x_off, 4*y + y_off, 4, 4,
+                       width, height, s->uvlinesize,
+                       s->vp8dsp.put_vp8_epel_pixels_tab[2]);
+            }
+        }
+    }
+}
+
+static void idct_mb(VP8Context *s, uint8_t *y_dst, uint8_t *u_dst, uint8_t *v_dst,
+                    VP8Macroblock *mb)
+{
+    int x, y, nnz;
+
+    if (mb->mode != MODE_I4x4)
+        for (y = 0; y < 4; y++) {
+            for (x = 0; x < 4; x++) {
+                nnz = s->non_zero_count_cache[y][x];
+                if (nnz) {
+                    if (nnz == 1)
+                        s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
+                    else
+                        s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
+                }
+            }
+            y_dst += 4*s->linesize;
+        }
+
+    for (y = 0; y < 2; y++) {
+        for (x = 0; x < 2; x++) {
+            nnz = s->non_zero_count_cache[4][(y<<1)+x];
+            if (nnz) {
+                if (nnz == 1)
+                    s->vp8dsp.vp8_idct_dc_add(u_dst+4*x, s->block[4][(y<<1)+x], s->uvlinesize);
+                else
+                    s->vp8dsp.vp8_idct_add(u_dst+4*x, s->block[4][(y<<1)+x], s->uvlinesize);
+            }
+
+            nnz = s->non_zero_count_cache[5][(y<<1)+x];
+            if (nnz) {
+                if (nnz == 1)
+                    s->vp8dsp.vp8_idct_dc_add(v_dst+4*x, s->block[5][(y<<1)+x], s->uvlinesize);
+                else
+                    s->vp8dsp.vp8_idct_add(v_dst+4*x, s->block[5][(y<<1)+x], s->uvlinesize);
+            }
+        }
+        u_dst += 4*s->uvlinesize;
+        v_dst += 4*s->uvlinesize;
+    }
+}
+
+static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, int *level, int *inner, int *hev_thresh)
+{
+    int interior_limit, filter_level;
+
+    if (s->segmentation.enabled) {
+        filter_level = s->segmentation.filter_level[mb->segment];
+        if (!s->segmentation.absolute_vals)
+            filter_level += s->filter.level;
+    } else
+        filter_level = s->filter.level;
+
+    if (s->lf_delta.enabled) {
+        filter_level += s->lf_delta.ref[mb->ref_frame];
+
+        if (mb->ref_frame == VP56_FRAME_CURRENT) {
+            if (mb->mode == MODE_I4x4)
+                filter_level += s->lf_delta.mode[0];
+        } else {
+            if (mb->mode == VP8_MVMODE_ZERO)
+                filter_level += s->lf_delta.mode[1];
+            else if (mb->mode == VP8_MVMODE_SPLIT)
+                filter_level += s->lf_delta.mode[3];
+            else
+                filter_level += s->lf_delta.mode[2];
+        }
+    }
+    filter_level = av_clip(filter_level, 0, 63);
+
+    interior_limit = filter_level;
+    if (s->filter.sharpness) {
+        interior_limit >>= s->filter.sharpness > 4 ? 2 : 1;
+        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
+    }
+    interior_limit = FFMAX(interior_limit, 1);
+
+    *level = filter_level;
+    *inner = interior_limit;
+
+    if (hev_thresh) {
+        *hev_thresh = filter_level >= 15;
+
+        if (s->keyframe) {
+            if (filter_level >= 40)
+                *hev_thresh = 2;
+        } else {
+            if (filter_level >= 40)
+                *hev_thresh = 3;
+            else if (filter_level >= 20)
+                *hev_thresh = 2;
+        }
+    }
+}
+
+// TODO: look at backup_mb_border / xchg_mb_border in h264.c
+static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, int mb_x, int mb_y)
+{
+    int filter_level, inner_limit, hev_thresh;
+
+    filter_level_for_mb(s, mb, &filter_level, &inner_limit, &hev_thresh);
+    if (!filter_level)
+        return;
+
+    if (mb_x) {
+        s->vp8dsp.vp8_h_loop_filter16(dst[0], s->linesize,   filter_level+2, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_h_loop_filter8 (dst[1], s->uvlinesize, filter_level+2, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_h_loop_filter8 (dst[2], s->uvlinesize, filter_level+2, inner_limit, hev_thresh);
+    }
+
+    if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) {
+        s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+ 4, s->linesize,   filter_level, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+ 8, s->linesize,   filter_level, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+12, s->linesize,   filter_level, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_h_loop_filter8_inner (dst[1]+ 4, s->uvlinesize, filter_level, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_h_loop_filter8_inner (dst[2]+ 4, s->uvlinesize, filter_level, inner_limit, hev_thresh);
+    }
+
+    if (mb_y) {
+        s->vp8dsp.vp8_v_loop_filter16(dst[0], s->linesize,   filter_level+2, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_v_loop_filter8 (dst[1], s->uvlinesize, filter_level+2, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_v_loop_filter8 (dst[2], s->uvlinesize, filter_level+2, inner_limit, hev_thresh);
+    }
+
+    if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) {
+        s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+ 4*s->linesize,   s->linesize,   filter_level, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+ 8*s->linesize,   s->linesize,   filter_level, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+12*s->linesize,   s->linesize,   filter_level, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_v_loop_filter8_inner (dst[1]+ 4*s->uvlinesize, s->uvlinesize, filter_level, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_v_loop_filter8_inner (dst[2]+ 4*s->uvlinesize, s->uvlinesize, filter_level, inner_limit, hev_thresh);
+    }
+}
+
+static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, int mb_x, int mb_y)
+{
+    int filter_level, inner_limit, mbedge_lim, bedge_lim;
+
+    filter_level_for_mb(s, mb, &filter_level, &inner_limit, NULL);
+    if (!filter_level)
+        return;
+
+    mbedge_lim = 2*(filter_level+2) + inner_limit;
+     bedge_lim = 2* filter_level    + inner_limit;
+
+    if (mb_x)
+        s->vp8dsp.vp8_h_loop_filter_simple(dst, s->linesize, mbedge_lim);
+    if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) {
+        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, s->linesize, bedge_lim);
+        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, s->linesize, bedge_lim);
+        s->vp8dsp.vp8_h_loop_filter_simple(dst+12, s->linesize, bedge_lim);
+    }
+
+    if (mb_y)
+        s->vp8dsp.vp8_v_loop_filter_simple(dst, s->linesize, mbedge_lim);
+    if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) {
+        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*s->linesize, s->linesize, bedge_lim);
+        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*s->linesize, s->linesize, bedge_lim);
+        s->vp8dsp.vp8_v_loop_filter_simple(dst+12*s->linesize, s->linesize, bedge_lim);
+    }
+}
+
+static void filter_mb_row(VP8Context *s, int mb_y)
+{
+    VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride;
+    uint8_t *dst[3] = {
+        s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize,
+        s->framep[VP56_FRAME_CURRENT]->data[1] +  8*mb_y*s->uvlinesize,
+        s->framep[VP56_FRAME_CURRENT]->data[2] +  8*mb_y*s->uvlinesize
+    };
+    int mb_x;
+
+    for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
+        filter_mb(s, dst, mb++, mb_x, mb_y);
+        dst[0] += 16;
+        dst[1] += 8;
+        dst[2] += 8;
+    }
+}
+
+static void filter_mb_row_simple(VP8Context *s, int mb_y)
+{
+    uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize;
+    VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride;
+    int mb_x;
+
+    for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
+        filter_mb_simple(s, dst, mb++, mb_x, mb_y);
+        dst += 16;
+    }
+}
+
+static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+                            AVPacket *avpkt)
+{
+    VP8Context *s = avctx->priv_data;
+    int ret, mb_x, mb_y, i, y, referenced;
+    enum AVDiscard skip_thresh;
+    AVFrame *curframe;
+
+    if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
+        return ret;
+
+    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
+                                || s->update_altref == VP56_FRAME_CURRENT;
+
+    skip_thresh = !referenced ? AVDISCARD_NONREF :
+                    !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
+
+    if (avctx->skip_frame >= skip_thresh) {
+        s->invisible = 1;
+        goto skip_decode;
+    }
+
+    for (i = 0; i < 4; i++)
+        if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
+            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
+            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
+            curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
+            break;
+        }
+    if (curframe->data[0])
+        avctx->release_buffer(avctx, curframe);
+
+    curframe->key_frame = s->keyframe;
+    curframe->pict_type = s->keyframe ? FF_I_TYPE : FF_P_TYPE;
+    curframe->reference = referenced ? 3 : 0;
+    if ((ret = avctx->get_buffer(avctx, curframe))) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
+        return ret;
+    }
+
+    // Given that arithmetic probabilities are updated every frame, it's quite likely
+    // that the values we have on a random interframe are complete junk if we didn't
+    // start decode on a keyframe. So just don't display anything rather than junk.
+    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
+                         !s->framep[VP56_FRAME_GOLDEN] ||
+                         !s->framep[VP56_FRAME_GOLDEN2])) {
+        av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->linesize   = curframe->linesize[0];
+    s->uvlinesize = curframe->linesize[1];
+
+    if (!s->edge_emu_buffer)
+        s->edge_emu_buffer = av_malloc(21*s->linesize);
+
+    memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
+
+    // top edge of 127 for intra prediction
+    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
+        memset(curframe->data[0] - s->linesize  -1, 127, s->linesize  +1);
+        memset(curframe->data[1] - s->uvlinesize-1, 127, s->uvlinesize+1);
+        memset(curframe->data[2] - s->uvlinesize-1, 127, s->uvlinesize+1);
+    }
+
+    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
+        VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
+        VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride;
+        uint8_t *intra4x4 = s->intra4x4_pred_mode + 4*mb_y*s->b4_stride;
+        uint8_t *dst[3] = {
+            curframe->data[0] + 16*mb_y*s->linesize,
+            curframe->data[1] +  8*mb_y*s->uvlinesize,
+            curframe->data[2] +  8*mb_y*s->uvlinesize
+        };
+
+        memset(s->left_nnz, 0, sizeof(s->left_nnz));
+
+        // left edge of 129 for intra prediction
+        if (!(avctx->flags & CODEC_FLAG_EMU_EDGE))
+            for (i = 0; i < 3; i++)
+                for (y = 0; y < 16>>!!i; y++)
+                    dst[i][y*curframe->linesize[i]-1] = 129;
+
+        for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
+            decode_mb_mode(s, mb, mb_x, mb_y, intra4x4 + 4*mb_x);
+
+            if (!mb->skip)
+                decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
+            else {
+                AV_ZERO128(s->non_zero_count_cache);    // luma
+                AV_ZERO64(s->non_zero_count_cache[4]);  // chroma
+            }
+
+            if (mb->mode <= MODE_I4x4) {
+                intra_predict(s, dst, mb, intra4x4 + 4*mb_x, mb_x, mb_y);
+                memset(mb->bmv, 0, sizeof(mb->bmv));
+            } else {
+                inter_predict(s, dst, mb, mb_x, mb_y);
+            }
+
+            if (!mb->skip) {
+                idct_mb(s, dst[0], dst[1], dst[2], mb);
+            } else {
+                AV_ZERO64(s->left_nnz);
+                AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
+
+                // Reset DC block predictors if they would exist if the mb had coefficients
+                if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
+                    s->left_nnz[8]      = 0;
+                    s->top_nnz[mb_x][8] = 0;
+                }
+            }
+
+            dst[0] += 16;
+            dst[1] += 8;
+            dst[2] += 8;
+            mb++;
+        }
+        if (mb_y && s->filter.level && avctx->skip_loop_filter < skip_thresh) {
+            if (s->filter.simple)
+                filter_mb_row_simple(s, mb_y-1);
+            else
+                filter_mb_row(s, mb_y-1);
+        }
+    }
+    if (s->filter.level && avctx->skip_loop_filter < skip_thresh) {
+        if (s->filter.simple)
+            filter_mb_row_simple(s, mb_y-1);
+        else
+            filter_mb_row(s, mb_y-1);
+    }
+
+skip_decode:
+    // if future frames don't use the updated probabilities,
+    // reset them to the values we saved
+    if (!s->update_probabilities)
+        s->prob[0] = s->prob[1];
+
+    // check if golden and altref are swapped
+    if (s->update_altref == VP56_FRAME_GOLDEN &&
+        s->update_golden == VP56_FRAME_GOLDEN2)
+        FFSWAP(AVFrame *, s->framep[VP56_FRAME_GOLDEN], s->framep[VP56_FRAME_GOLDEN2]);
+    else {
+        if (s->update_altref != VP56_FRAME_NONE)
+            s->framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
+
+        if (s->update_golden != VP56_FRAME_NONE)
+            s->framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
+    }
+
+    if (s->update_last) // move cur->prev
+        s->framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_CURRENT];
+
+    // release no longer referenced frames
+    for (i = 0; i < 4; i++)
+        if (s->frames[i].data[0] &&
+            &s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
+            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
+            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
+            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
+            avctx->release_buffer(avctx, &s->frames[i]);
+
+    if (!s->invisible) {
+        *(AVFrame*)data = *s->framep[VP56_FRAME_CURRENT];
+        *data_size = sizeof(AVFrame);
+    }
+
+    return avpkt->size;
+}
+
+static av_cold int vp8_decode_init(AVCodecContext *avctx)
+{
+    VP8Context *s = avctx->priv_data;
+
+    s->avctx = avctx;
+    avctx->pix_fmt = PIX_FMT_YUV420P;
+
+    dsputil_init(&s->dsp, avctx);
+    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8);
+    ff_vp8dsp_init(&s->vp8dsp);
+
+    // intra pred needs edge emulation among other things
+    if (avctx->flags&CODEC_FLAG_EMU_EDGE) {
+        av_log(avctx, AV_LOG_ERROR, "Edge emulation not supproted\n");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    return 0;
+}
+
+static av_cold int vp8_decode_free(AVCodecContext *avctx)
+{
+    vp8_decode_flush(avctx);
+    return 0;
+}
+
+AVCodec vp8_decoder = {
+    "vp8",
+    AVMEDIA_TYPE_VIDEO,
+    CODEC_ID_VP8,
+    sizeof(VP8Context),
+    vp8_decode_init,
+    NULL,
+    vp8_decode_free,
+    vp8_decode_frame,
+    CODEC_CAP_DR1,
+    .flush = vp8_decode_flush,
+    .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
+};
diff --git a/libavcodec/vp8data.h b/libavcodec/vp8data.h
new file mode 100644
index 0000000000..bf776dbc3e
--- /dev/null
+++ b/libavcodec/vp8data.h
@@ -0,0 +1,752 @@
+/**
+ * VP8 compatible video decoder
+ *
+ * Copyright (C) 2010 David Conrad
+ * Copyright (C) 2010 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+// TODO: move these #define ane enum to a better header...
+
+#define VP8_MAX_QUANT 127
+
+enum dct_token {
+    DCT_0,
+    DCT_1,
+    DCT_2,
+    DCT_3,
+    DCT_4,
+    DCT_CAT1,
+    DCT_CAT2,
+    DCT_CAT3,
+    DCT_CAT4,
+    DCT_CAT5,
+    DCT_CAT6,
+    DCT_EOB,
+
+    NUM_DCT_TOKENS
+};
+
+#include "h264pred.h"
+
+// used to signal 4x4 intra pred in luma MBs
+#define MODE_I4x4 4
+
+enum inter_mvmode {
+    VP8_MVMODE_NEAREST = MODE_I4x4 + 1,
+    VP8_MVMODE_NEAR,
+    VP8_MVMODE_ZERO,
+    VP8_MVMODE_NEW,
+    VP8_MVMODE_SPLIT
+};
+
+enum inter_submvmode {
+    VP8_SUBMVMODE_LEFT4X4,
+    VP8_SUBMVMODE_TOP4X4,
+    VP8_SUBMVMODE_ZERO4X4,
+    VP8_SUBMVMODE_NEW4X4
+};
+
+static const uint8_t vp8_pred4x4_mode[] =
+{
+    [DC_PRED8x8]    = DC_PRED,
+    [VERT_PRED8x8]  = VERT_PRED,
+    [HOR_PRED8x8]   = HOR_PRED,
+    [PLANE_PRED8x8] = TM_VP8_PRED,
+};
+
+static const int8_t vp8_pred16x16_tree_intra[4][2] =
+{
+    { -MODE_I4x4, 1 },                      // '0'
+     { 2, 3 },
+      {  -DC_PRED8x8,  -VERT_PRED8x8 },     // '100', '101'
+      { -HOR_PRED8x8, -PLANE_PRED8x8 },     // '110', '111'
+};
+
+static const int8_t vp8_pred16x16_tree_inter[4][2] =
+{
+    { -DC_PRED8x8, 1 },                     // '0'
+     { 2, 3 },
+      {  -VERT_PRED8x8, -HOR_PRED8x8 },     // '100', '101'
+      { -PLANE_PRED8x8, -MODE_I4x4 },       // '110', '111'
+};
+
+static const int vp8_mode_contexts[6][4] = {
+    {   7,   1,   1, 143 },
+    {  14,  18,  14, 107 },
+    { 135,  64,  57,  68 },
+    {  60,  56, 128,  65 },
+    { 159, 134, 128,  34 },
+    { 234, 188, 128,  28 },
+};
+
+static const int8_t vp8_pred16x16_tree_mvinter[4][2] = {
+    { -VP8_MVMODE_ZERO,      1 },           // '0'
+     { -VP8_MVMODE_NEAREST,  2 },           // '10'
+      { -VP8_MVMODE_NEAR,    3 },           // '110'
+       { -VP8_MVMODE_NEW, -VP8_MVMODE_SPLIT } // '1110', '1111'
+};
+
+static const int8_t vp8_small_mvtree[7][2] = {
+    {  1, 4 },
+     {  2, 3 },
+      { -0, -1 },                           // '000', '001'
+      { -2, -3 },                           // '010', '011'
+     {  5,  6 },
+      { -4, -5 },                           // '100', '101'
+      { -6, -7 }                            // '110', '111'
+};
+
+static const uint8_t vp8_mbsplits[4][16] = {
+    {  0,  0,  0,  0,  0,  0,  0,  0,
+       1,  1,  1,  1,  1,  1,  1,  1  },
+    {  0,  0,  1,  1,  0,  0,  1,  1,
+       0,  0,  1,  1,  0,  0,  1,  1  },
+    {  0,  0,  1,  1,  0,  0,  1,  1,
+       2,  2,  3,  3,  2,  2,  3,  3  },
+    {  0,  1,  2,  3,  4,  5,  6,  7,
+       8,  9, 10, 11, 12, 13, 14, 15  }
+};
+
+static const uint8_t vp8_mbfirstidx[4][16] = {
+    {  0,  8 }, {  0,  2 }, {  0,  2,  8,  10 },
+    {  0,  1,  2,  3,  4,  5,  6,  7,
+       8,  9, 10, 11, 12, 13, 14, 15 }
+};
+
+static const int8_t vp8_mbsplit_tree[3][2] = {
+    { -3,  1 },                             // '0' - 16 individual MVs
+     { -2,  2 },                            // '10' - quarter-based MVs
+      { -0, -1 }                            // '110' - top/bottom MVs,
+                                            // '111' - left/right MVs
+};
+static const uint8_t vp8_mbsplit_count[4] = {   2,   2,   4,  16 };
+static const uint8_t vp8_mbsplit_prob[3]  = { 110, 111, 150 };
+
+static const uint8_t vp8_submv_prob[5][3] = {
+    { 147, 136,  18 },
+    { 106, 145,   1 },
+    { 179, 121,   1 },
+    { 223,   1,  34 },
+    { 208,   1,   1 }
+};
+
+static const int8_t vp8_submv_ref_tree[3][2] = {
+    { -VP8_SUBMVMODE_LEFT4X4, 1 },          // '0'
+     { -VP8_SUBMVMODE_TOP4X4, 2 },          // '10'
+      { -VP8_SUBMVMODE_ZERO4X4, -VP8_SUBMVMODE_NEW4X4 } // '110', '111'
+};
+
+static const uint8_t vp8_pred16x16_prob_intra[4] = { 145, 156, 163, 128 };
+static const uint8_t vp8_pred16x16_prob_inter[4] = { 112,  86, 140,  37 };
+
+static const int8_t vp8_pred4x4_tree[9][2] =
+{
+    { -DC_PRED, 1 },                                    // '0'
+     { -TM_VP8_PRED, 2 },                               // '10'
+      { -VERT_PRED, 3 },                                // '110'
+       { 4, 6 },
+        { -HOR_PRED, 5 },                               // '11100'
+         { -DIAG_DOWN_RIGHT_PRED, -VERT_RIGHT_PRED },   // '111010', '111011'
+        { -DIAG_DOWN_LEFT_PRED, 7 },                    // '11110'
+         { -VERT_LEFT_PRED, 8 },                        // '111110'
+          { -HOR_DOWN_PRED, -HOR_UP_PRED },             // '1111110', '1111111'
+};
+
+static const int8_t vp8_pred8x8c_tree[3][2] =
+{
+    { -DC_PRED8x8, 1 },                 // '0'
+     { -VERT_PRED8x8, 2 },              // '10
+      { -HOR_PRED8x8, -PLANE_PRED8x8 }, // '110', '111'
+};
+
+static const uint8_t vp8_pred8x8c_prob_intra[3] = { 142, 114, 183 };
+static const uint8_t vp8_pred8x8c_prob_inter[3] = { 162, 101, 204 };
+
+static const uint8_t vp8_pred4x4_prob_inter[9] =
+{
+    120, 90, 79, 133, 87, 85, 80, 111, 151
+};
+
+static const uint8_t vp8_pred4x4_prob_intra[10][10][9] =
+{
+    {
+        {  39,  53, 200,  87,  26,  21,  43, 232, 171 },
+        {  56,  34,  51, 104, 114, 102,  29,  93,  77 },
+        {  88,  88, 147, 150,  42,  46,  45, 196, 205 },
+        { 107,  54,  32,  26,  51,   1,  81,  43,  31 },
+        {  39,  28,  85, 171,  58, 165,  90,  98,  64 },
+        {  34,  22, 116, 206,  23,  34,  43, 166,  73 },
+        {  34,  19,  21, 102, 132, 188,  16,  76, 124 },
+        {  68,  25, 106,  22,  64, 171,  36, 225, 114 },
+        {  62,  18,  78,  95,  85,  57,  50,  48,  51 },
+        {  43,  97, 183, 117,  85,  38,  35, 179,  61 },
+    },
+    {
+        { 112, 113,  77,  85, 179, 255,  38, 120, 114 },
+        {  40,  42,   1, 196, 245, 209,  10,  25, 109 },
+        { 193, 101,  35, 159, 215, 111,  89,  46, 111 },
+        { 100,  80,   8,  43, 154,   1,  51,  26,  71 },
+        {  88,  43,  29, 140, 166, 213,  37,  43, 154 },
+        {  61,  63,  30, 155,  67,  45,  68,   1, 209 },
+        {  41,  40,   5, 102, 211, 183,   4,   1, 221 },
+        { 142,  78,  78,  16, 255, 128,  34, 197, 171 },
+        {  51,  50,  17, 168, 209, 192,  23,  25,  82 },
+        {  60, 148,  31, 172, 219, 228,  21,  18, 111 },
+    },
+    {
+        { 175,  69, 143,  80,  85,  82,  72, 155, 103 },
+        {  56,  58,  10, 171, 218, 189,  17,  13, 152 },
+        { 231, 120,  48,  89, 115, 113, 120, 152, 112 },
+        { 144,  71,  10,  38, 171, 213, 144,  34,  26 },
+        { 114,  26,  17, 163,  44, 195,  21,  10, 173 },
+        { 121,  24,  80, 195,  26,  62,  44,  64,  85 },
+        {  63,  20,   8, 114, 114, 208,  12,   9, 226 },
+        { 170,  46,  55,  19, 136, 160,  33, 206,  71 },
+        {  81,  40,  11,  96, 182,  84,  29,  16,  36 },
+        { 152, 179,  64, 126, 170, 118,  46,  70,  95 },
+    },
+    {
+        {  75,  79, 123,  47,  51, 128,  81, 171,   1 },
+        {  57,  17,   5,  71, 102,  57,  53,  41,  49 },
+        { 125,  98,  42,  88, 104,  85, 117, 175,  82 },
+        { 115,  21,   2,  10, 102, 255, 166,  23,   6 },
+        {  38,  33,  13, 121,  57,  73,  26,   1,  85 },
+        {  41,  10,  67, 138,  77, 110,  90,  47, 114 },
+        {  57,  18,  10, 102, 102, 213,  34,  20,  43 },
+        { 101,  29,  16,  10,  85, 128, 101, 196,  26 },
+        { 117,  20,  15,  36, 163, 128,  68,   1,  26 },
+        {  95,  84,  53,  89, 128, 100, 113, 101,  45 },
+    },
+    {
+        {  63,  59,  90, 180,  59, 166,  93,  73, 154 },
+        {  40,  40,  21, 116, 143, 209,  34,  39, 175 },
+        { 138,  31,  36, 171,  27, 166,  38,  44, 229 },
+        {  57,  46,  22,  24, 128,   1,  54,  17,  37 },
+        {  47,  15,  16, 183,  34, 223,  49,  45, 183 },
+        {  46,  17,  33, 183,   6,  98,  15,  32, 183 },
+        {  40,   3,   9, 115,  51, 192,  18,   6, 223 },
+        {  65,  32,  73, 115,  28, 128,  23, 128, 205 },
+        {  87,  37,   9, 115,  59,  77,  64,  21,  47 },
+        {  67,  87,  58, 169,  82, 115,  26,  59, 179 },
+    },
+    {
+        {  54,  57, 112, 184,   5,  41,  38, 166, 213 },
+        {  30,  34,  26, 133, 152, 116,  10,  32, 134 },
+        { 104,  55,  44, 218,   9,  54,  53, 130, 226 },
+        {  75,  32,  12,  51, 192, 255, 160,  43,  51 },
+        {  39,  19,  53, 221,  26, 114,  32,  73, 255 },
+        {  31,   9,  65, 234,   2,  15,   1, 118,  73 },
+        {  56,  21,  23, 111,  59, 205,  45,  37, 192 },
+        {  88,  31,  35,  67, 102,  85,  55, 186,  85 },
+        {  55,  38,  70, 124,  73, 102,   1,  34,  98 },
+        {  64,  90,  70, 205,  40,  41,  23,  26,  57 },
+    },
+    {
+        {  86,  40,  64, 135, 148, 224,  45, 183, 128 },
+        {  22,  26,  17, 131, 240, 154,  14,   1, 209 },
+        { 164,  50,  31, 137, 154, 133,  25,  35, 218 },
+        {  83,  12,  13,  54, 192, 255,  68,  47,  28 },
+        {  45,  16,  21,  91,  64, 222,   7,   1, 197 },
+        {  56,  21,  39, 155,  60, 138,  23, 102, 213 },
+        {  18,  11,   7,  63, 144, 171,   4,   4, 246 },
+        {  85,  26,  85,  85, 128, 128,  32, 146, 171 },
+        {  35,  27,  10, 146, 174, 171,  12,  26, 128 },
+        {  51, 103,  44, 131, 131, 123,  31,   6, 158 },
+    },
+    {
+        {  68,  45, 128,  34,   1,  47,  11, 245, 171 },
+        {  62,  17,  19,  70, 146,  85,  55,  62,  70 },
+        { 102,  61,  71,  37,  34,  53,  31, 243, 192 },
+        {  75,  15,   9,   9,  64, 255, 184, 119,  16 },
+        {  37,  43,  37, 154, 100, 163,  85, 160,   1 },
+        {  63,   9,  92, 136,  28,  64,  32, 201,  85 },
+        {  56,   8,  17, 132, 137, 255,  55, 116, 128 },
+        {  86,   6,  28,   5,  64, 255,  25, 248,   1 },
+        {  58,  15,  20,  82, 135,  57,  26, 121,  40 },
+        {  69,  60,  71,  38,  73, 119,  28, 222,  37 },
+    },
+    {
+        { 101,  75, 128, 139, 118, 146, 116, 128,  85 },
+        {  56,  41,  15, 176, 236,  85,  37,   9,  62 },
+        { 190,  80,  35,  99, 180,  80, 126,  54,  45 },
+        { 146,  36,  19,  30, 171, 255,  97,  27,  20 },
+        {  71,  30,  17, 119, 118, 255,  17,  18, 138 },
+        { 101,  38,  60, 138,  55,  70,  43,  26, 142 },
+        {  32,  41,  20, 117, 151, 142,  20,  21, 163 },
+        { 138,  45,  61,  62, 219,   1,  81, 188,  64 },
+        { 112,  19,  12,  61, 195, 128,  48,   4,  24 },
+        {  85, 126,  47,  87, 176,  51,  41,  20,  32 },
+    },
+    {
+        {  66, 102, 167,  99,  74,  62,  40, 234, 128 },
+        {  41,  53,   9, 178, 241, 141,  26,   8, 107 },
+        { 134, 183,  89, 137,  98, 101, 106, 165, 148 },
+        { 104,  79,  12,  27, 217, 255,  87,  17,   7 },
+        {  74,  43,  26, 146,  73, 166,  49,  23, 157 },
+        {  65,  38, 105, 160,  51,  52,  31, 115, 128 },
+        {  47,  41,  14, 110, 182, 183,  21,  17, 194 },
+        {  87,  68,  71,  44, 114,  51,  15, 186,  23 },
+        {  66,  45,  25, 102, 197, 189,  23,  18,  22 },
+        {  72, 187, 100, 130, 157, 111,  32,  75,  80 },
+    },
+};
+
+static const int8_t vp8_segmentid_tree[][2] =
+{
+    { 1, 2 },
+     { -0, -1 },    // '00', '01'
+     { -2, -3 },    // '10', '11'
+};
+
+static const uint8_t vp8_coeff_band[16] =
+{
+    0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7
+};
+
+static const int8_t vp8_coeff_tree[NUM_DCT_TOKENS-1][2] =
+{
+    { -DCT_EOB, 1 },                // '0'
+     { -DCT_0, 2 },                 // '10'
+      { -DCT_1, 3 },                // '110'
+       { 4, 6 },
+        { -DCT_2, 5 },              // '11100'
+         { -DCT_3, -DCT_4 },        // '111010', '111011'
+        { 7, 8 },
+         { -DCT_CAT1, -DCT_CAT2 },  // '111100', '111101'
+         { 9, 10 },
+          { -DCT_CAT3, -DCT_CAT4 }, // '1111100', '1111101'
+          { -DCT_CAT5, -DCT_CAT6 }, // '1111110', '1111111'
+};
+
+static const uint8_t vp8_dct_cat1_prob[] = { 159, 0 };
+static const uint8_t vp8_dct_cat2_prob[] = { 165, 145, 0 };
+static const uint8_t vp8_dct_cat3_prob[] = { 173, 148, 140, 0 };
+static const uint8_t vp8_dct_cat4_prob[] = { 176, 155, 140, 135, 0 };
+static const uint8_t vp8_dct_cat5_prob[] = { 180, 157, 141, 134, 130, 0 };
+static const uint8_t vp8_dct_cat6_prob[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
+
+static const uint8_t * const vp8_dct_cat_prob[6] =
+{
+    vp8_dct_cat1_prob,
+    vp8_dct_cat2_prob,
+    vp8_dct_cat3_prob,
+    vp8_dct_cat4_prob,
+    vp8_dct_cat5_prob,
+    vp8_dct_cat6_prob,
+};
+
+static const uint8_t vp8_dct_cat_offset[6] = { 5, 7, 11, 19, 35, 67 };
+
+static const uint8_t vp8_token_default_probs[4][8][3][NUM_DCT_TOKENS-1] =
+{
+    {
+        {
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
+            { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
+            { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1,  98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
+            { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
+            {  78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+        },
+        {
+            {   1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
+            { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
+            {  77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
+            { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
+            {  37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
+            { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
+            { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
+            { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
+            {  80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 246,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+    },
+    {
+        {
+            { 198,  35, 237, 223, 193, 187, 162, 160, 145, 155,  62 },
+            { 131,  45, 198, 221, 172, 176, 220, 157, 252, 221,   1 },
+            {  68,  47, 146, 208, 149, 167, 221, 162, 255, 223, 128 },
+        },
+        {
+            {   1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
+            { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
+            {  81,  99, 181, 242, 176, 190, 249, 202, 255, 255, 128 },
+        },
+        {
+            {   1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
+            {  99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
+            {  23,  91, 163, 242, 170, 187, 247, 210, 255, 255, 128 },
+        },
+        {
+            {   1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
+            { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
+            {  44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
+            {  94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
+            {  22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 },
+        },
+        {
+            {   1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
+            { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
+            {  35,  77, 181, 251, 193, 211, 255, 205, 128, 128, 128 },
+        },
+        {
+            {   1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
+            { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
+            {  45,  99, 188, 251, 195, 217, 255, 224, 128, 128, 128 },
+        },
+        {
+            {   1,   1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
+            { 203,   1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 137,   1, 177, 255, 224, 255, 128, 128, 128, 128, 128 },
+        },
+    },
+    {
+        {
+            { 253,   9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
+            { 175,  13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
+            {  73,  17, 171, 221, 161, 179, 236, 167, 255, 234, 128 },
+        },
+        {
+            {   1,  95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
+            { 239,  90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
+            { 155,  77, 195, 248, 188, 195, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1,  24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
+            { 201,  51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
+            {  69,  46, 190, 239, 201, 218, 255, 228, 128, 128, 128 },
+        },
+        {
+            {   1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
+            { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1,  16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 190,  36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
+            { 149,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 213,  62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
+            {  55,  93, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+    },
+    {
+        {
+            { 202,  24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
+            { 126,  38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
+            {  61,  46, 138, 219, 151, 178, 240, 170, 255, 216, 128 },
+        },
+        {
+            {   1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
+            { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
+            {  39,  77, 162, 232, 172, 180, 245, 178, 255, 255, 128 },
+        },
+        {
+            {   1,  52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
+            { 124,  74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
+            {  24,  71, 130, 219, 154, 170, 243, 182, 255, 255, 128 },
+        },
+        {
+            {   1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
+            { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
+            {  28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 },
+        },
+        {
+            {   1,  81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
+            { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
+            {  20,  95, 153, 243, 164, 173, 255, 203, 128, 128, 128 },
+        },
+        {
+            {   1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
+            { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
+            {  47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
+            { 141,  84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
+            {  42,  80, 160, 240, 162, 185, 255, 205, 128, 128, 128 },
+        },
+        {
+            {   1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 244,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 238,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+    },
+};
+
+static const uint8_t vp8_token_update_probs[4][8][3][NUM_DCT_TOKENS-1] =
+{
+    {
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 },
+            { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+    },
+    {
+        {
+            { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 },
+            { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+    },
+    {
+        {
+            { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 },
+            { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+    },
+    {
+        {
+            { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 },
+            { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+    },
+};
+
+// fixme: copied from h264data.h
+static const uint8_t zigzag_scan[16]={
+    0+0*4, 1+0*4, 0+1*4, 0+2*4,
+    1+1*4, 2+0*4, 3+0*4, 2+1*4,
+    1+2*4, 0+3*4, 1+3*4, 2+2*4,
+    3+1*4, 3+2*4, 2+3*4, 3+3*4,
+};
+
+static const uint8_t vp8_dc_qlookup[VP8_MAX_QUANT+1] =
+{
+      4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
+     18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
+     29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
+     44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
+     59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+     75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+     91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
+    122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
+};
+
+static const uint16_t vp8_ac_qlookup[VP8_MAX_QUANT+1] =
+{
+      4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
+     20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
+     36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
+     52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
+     78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
+    110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
+    155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
+    213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
+};
+
+static const uint8_t vp8_mv_update_prob[2][19] = {
+    { 237,
+      246,
+      253, 253, 254, 254, 254, 254, 254,
+      254, 254, 254, 254, 254, 250, 250, 252, 254, 254 },
+    { 231,
+      243,
+      245, 253, 254, 254, 254, 254, 254,
+      254, 254, 254, 254, 254, 251, 251, 254, 254, 254 }
+};
+
+static const uint8_t vp8_mv_default_prob[2][19] = {
+    { 162,
+      128,
+      225, 146, 172, 147, 214, 39, 156,
+      128, 129, 132,  75, 145, 178, 206, 239, 254, 254 },
+    { 164,
+      128,
+      204, 170, 119, 235, 140, 230, 228,
+      128, 130, 130,  74, 148, 180, 203, 236, 254, 254 }
+};
diff --git a/libavcodec/vp8dsp.c b/libavcodec/vp8dsp.c
new file mode 100644
index 0000000000..50b029776c
--- /dev/null
+++ b/libavcodec/vp8dsp.c
@@ -0,0 +1,373 @@
+/**
+ * VP8 compatible video decoder
+ *
+ * Copyright (C) 2010 David Conrad
+ * Copyright (C) 2010 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dsputil.h"
+#include "vp8dsp.h"
+
+// TODO: Maybe add dequant
+static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16])
+{
+    int i, t0, t1, t2, t3;
+
+    for (i = 0; i < 4; i++) {
+        t0 = dc[0*4+i] + dc[3*4+i];
+        t1 = dc[1*4+i] + dc[2*4+i];
+        t2 = dc[1*4+i] - dc[2*4+i];
+        t3 = dc[0*4+i] - dc[3*4+i];
+
+        dc[0*4+i] = t0 + t1;
+        dc[1*4+i] = t3 + t2;
+        dc[2*4+i] = t0 - t1;
+        dc[3*4+i] = t3 - t2;
+    }
+
+    for (i = 0; i < 4; i++) {
+        t0 = dc[i*4+0] + dc[i*4+3] + 3; // rounding
+        t1 = dc[i*4+1] + dc[i*4+2];
+        t2 = dc[i*4+1] - dc[i*4+2];
+        t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding
+
+        *block[i][0] = (t0 + t1) >> 3;
+        *block[i][1] = (t3 + t2) >> 3;
+        *block[i][2] = (t0 - t1) >> 3;
+        *block[i][3] = (t3 - t2) >> 3;
+    }
+}
+
+
+#define MUL_20091(a) ((((a)*20091) >> 16) + (a))
+#define MUL_35468(a)  (((a)*35468) >> 16)
+
+static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], int stride)
+{
+    int i, t0, t1, t2, t3;
+    DCTELEM tmp[16];
+
+    for (i = 0; i < 4; i++) {
+        t0 = block[0*4+i] + block[2*4+i];
+        t1 = block[0*4+i] - block[2*4+i];
+        t2 = MUL_35468(block[1*4+i]) - MUL_20091(block[3*4+i]);
+        t3 = MUL_20091(block[1*4+i]) + MUL_35468(block[3*4+i]);
+
+        tmp[i*4+0] = t0 + t3;
+        tmp[i*4+1] = t1 + t2;
+        tmp[i*4+2] = t1 - t2;
+        tmp[i*4+3] = t0 - t3;
+    }
+
+    for (i = 0; i < 4; i++) {
+        t0 = tmp[0*4+i] + tmp[2*4+i];
+        t1 = tmp[0*4+i] - tmp[2*4+i];
+        t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]);
+        t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]);
+
+        dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
+        dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
+        dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
+        dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
+        dst += stride;
+    }
+}
+
+static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], int stride)
+{
+    int i, dc = (block[0] + 4) >> 3;
+
+    for (i = 0; i < 4; i++) {
+        dst[0] = av_clip_uint8(dst[0] + dc);
+        dst[1] = av_clip_uint8(dst[1] + dc);
+        dst[2] = av_clip_uint8(dst[2] + dc);
+        dst[3] = av_clip_uint8(dst[3] + dc);
+        dst += stride;
+    }
+}
+
+
+// because I like only having two parameters to pass functions...
+#define LOAD_PIXELS\
+    int av_unused p3 = p[-4*stride];\
+    int av_unused p2 = p[-3*stride];\
+    int av_unused p1 = p[-2*stride];\
+    int av_unused p0 = p[-1*stride];\
+    int av_unused q0 = p[ 0*stride];\
+    int av_unused q1 = p[ 1*stride];\
+    int av_unused q2 = p[ 2*stride];\
+    int av_unused q3 = p[ 3*stride];
+
+static av_always_inline void filter_common(uint8_t *p, int stride, int is4tap)
+{
+    LOAD_PIXELS
+    int a, f1, f2;
+
+    a = 3*(q0 - p0);
+
+    if (is4tap)
+        a += av_clip_int8(p1 - q1);
+
+    a = av_clip_int8(a);
+
+    // We deviate from the spec here with c(a+3) >> 3
+    // since that's what libvpx does.
+    f1 = FFMIN(a+4, 127) >> 3;
+    f2 = FFMIN(a+3, 127) >> 3;
+
+    // Despite what the spec says, we do need to clamp here to
+    // be bitexact with libvpx.
+    p[-1*stride] = av_clip_uint8(p0 + f2);
+    p[ 0*stride] = av_clip_uint8(q0 - f1);
+
+    // only used for _inner on blocks without high edge variance
+    if (!is4tap) {
+        a = (f1+1)>>1;
+        p[-2*stride] = av_clip_uint8(p1 + a);
+        p[ 1*stride] = av_clip_uint8(q1 - a);
+    }
+}
+
+static av_always_inline int simple_limit(uint8_t *p, int stride, int flim)
+{
+    LOAD_PIXELS
+    return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim;
+}
+
+/**
+ * E - limit at the macroblock edge
+ * I - limit for interior difference
+ */
+static av_always_inline int normal_limit(uint8_t *p, int stride, int E, int I)
+{
+    LOAD_PIXELS
+    return simple_limit(p, stride, 2*E+I)
+        && FFABS(p3-p2) <= I && FFABS(p2-p1) <= I && FFABS(p1-p0) <= I
+        && FFABS(q3-q2) <= I && FFABS(q2-q1) <= I && FFABS(q1-q0) <= I;
+}
+
+// high edge variance
+static av_always_inline int hev(uint8_t *p, int stride, int thresh)
+{
+    LOAD_PIXELS
+    return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh;
+}
+
+static av_always_inline void filter_mbedge(uint8_t *p, int stride)
+{
+    int a0, a1, a2, w;
+
+    LOAD_PIXELS
+
+    w = av_clip_int8(p1-q1);
+    w = av_clip_int8(w + 3*(q0-p0));
+
+    a0 = (27*w + 63) >> 7;
+    a1 = (18*w + 63) >> 7;
+    a2 = ( 9*w + 63) >> 7;
+
+    p[-3*stride] = av_clip_uint8(p2 + a2);
+    p[-2*stride] = av_clip_uint8(p1 + a1);
+    p[-1*stride] = av_clip_uint8(p0 + a0);
+    p[ 0*stride] = av_clip_uint8(q0 - a0);
+    p[ 1*stride] = av_clip_uint8(q1 - a1);
+    p[ 2*stride] = av_clip_uint8(q2 - a2);
+}
+
+#define LOOP_FILTER(dir, size, stridea, strideb) \
+static void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\
+                                     int flim_E, int flim_I, int hev_thresh)\
+{\
+    int i;\
+\
+    for (i = 0; i < size; i++)\
+        if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
+            if (hev(dst+i*stridea, strideb, hev_thresh))\
+                filter_common(dst+i*stridea, strideb, 1);\
+            else\
+                filter_mbedge(dst+i*stridea, strideb);\
+        }\
+}\
+\
+static void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\
+                                      int flim_E, int flim_I, int hev_thresh)\
+{\
+    int i, hv;\
+\
+    for (i = 0; i < size; i++)\
+        if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
+            hv = hev(dst+i*stridea, strideb, hev_thresh);\
+            filter_common(dst+i*stridea, strideb, hv);\
+        }\
+}
+
+LOOP_FILTER(v, 16, 1, stride)
+LOOP_FILTER(h, 16, stride, 1)
+LOOP_FILTER(v,  8, 1, stride)
+LOOP_FILTER(h,  8, stride, 1)
+
+static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim)
+{
+    int i;
+
+    for (i = 0; i < 16; i++)
+        if (simple_limit(dst+i, stride, flim))
+            filter_common(dst+i, stride, 1);
+}
+
+static void vp8_h_loop_filter_simple_c(uint8_t *dst, int stride, int flim)
+{
+    int i;
+
+    for (i = 0; i < 16; i++)
+        if (simple_limit(dst+i*stride, 1, flim))
+            filter_common(dst+i*stride, 1, 1);
+}
+
+static const uint8_t subpel_filters[7][6] = {
+    { 0,   6, 123,  12,   1,   0 },
+    { 2,  11, 108,  36,   8,   1 },
+    { 0,   9,  93,  50,   6,   0 },
+    { 3,  16,  77,  77,  16,   3 },
+    { 0,   6,  50,  93,   9,   0 },
+    { 1,   8,  36, 108,  11,   2 },
+    { 0,   1,  12, 123,   6,   0 },
+};
+
+
+#define FILTER_6TAP(src, F, stride) \
+    av_clip_uint8((F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \
+                   F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7)
+
+#define FILTER_4TAP(src, F, stride) \
+    av_clip_uint8((F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \
+                   F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7)
+
+#define VP8_EPEL_H(SIZE, FILTER, FILTERNAME) \
+static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, uint8_t *src, int stride, int h, int mx, int my) \
+{ \
+    const uint8_t *filter = subpel_filters[mx-1]; \
+    int x, y; \
+\
+    for (y = 0; y < h; y++) { \
+        for (x = 0; x < SIZE; x++) \
+            dst[x] = FILTER(src, filter, 1); \
+        dst += stride; \
+        src += stride; \
+    } \
+}
+#define VP8_EPEL_V(SIZE, FILTER, FILTERNAME) \
+static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, uint8_t *src, int stride, int h, int mx, int my) \
+{ \
+    const uint8_t *filter = subpel_filters[my-1]; \
+    int x, y; \
+\
+    for (y = 0; y < h; y++) { \
+        for (x = 0; x < SIZE; x++) \
+            dst[x] = FILTER(src, filter, stride); \
+        dst += stride; \
+        src += stride; \
+    } \
+}
+#define VP8_EPEL_HV(SIZE, FILTERX, FILTERY, FILTERNAME) \
+static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, uint8_t *src, int stride, int h, int mx, int my) \
+{ \
+    const uint8_t *filter = subpel_filters[mx-1]; \
+    int x, y; \
+    uint8_t tmp_array[(2*SIZE+5)*SIZE]; \
+    uint8_t *tmp = tmp_array; \
+    src -= 2*stride; \
+\
+    for (y = 0; y < h+5; y++) { \
+        for (x = 0; x < SIZE; x++) \
+            tmp[x] = FILTERX(src, filter, 1); \
+        tmp += SIZE; \
+        src += stride; \
+    } \
+\
+    tmp = tmp_array + 2*SIZE; \
+    filter = subpel_filters[my-1]; \
+\
+    for (y = 0; y < h; y++) { \
+        for (x = 0; x < SIZE; x++) \
+            dst[x] = FILTERY(tmp, filter, SIZE); \
+        dst += stride; \
+        tmp += SIZE; \
+    } \
+}
+
+VP8_EPEL_H(16, FILTER_4TAP, h4)
+VP8_EPEL_H(8,  FILTER_4TAP, h4)
+VP8_EPEL_H(4,  FILTER_4TAP, h4)
+VP8_EPEL_H(16, FILTER_6TAP, h6)
+VP8_EPEL_H(8,  FILTER_6TAP, h6)
+VP8_EPEL_H(4,  FILTER_6TAP, h6)
+VP8_EPEL_V(16, FILTER_4TAP, v4)
+VP8_EPEL_V(8,  FILTER_4TAP, v4)
+VP8_EPEL_V(4,  FILTER_4TAP, v4)
+VP8_EPEL_V(16, FILTER_6TAP, v6)
+VP8_EPEL_V(8,  FILTER_6TAP, v6)
+VP8_EPEL_V(4,  FILTER_6TAP, v6)
+VP8_EPEL_HV(16, FILTER_4TAP, FILTER_4TAP, h4v4)
+VP8_EPEL_HV(8,  FILTER_4TAP, FILTER_4TAP, h4v4)
+VP8_EPEL_HV(4,  FILTER_4TAP, FILTER_4TAP, h4v4)
+VP8_EPEL_HV(16, FILTER_4TAP, FILTER_6TAP, h4v6)
+VP8_EPEL_HV(8,  FILTER_4TAP, FILTER_6TAP, h4v6)
+VP8_EPEL_HV(4,  FILTER_4TAP, FILTER_6TAP, h4v6)
+VP8_EPEL_HV(16, FILTER_6TAP, FILTER_4TAP, h6v4)
+VP8_EPEL_HV(8,  FILTER_6TAP, FILTER_4TAP, h6v4)
+VP8_EPEL_HV(4,  FILTER_6TAP, FILTER_4TAP, h6v4)
+VP8_EPEL_HV(16, FILTER_6TAP, FILTER_6TAP, h6v6)
+VP8_EPEL_HV(8,  FILTER_6TAP, FILTER_6TAP, h6v6)
+VP8_EPEL_HV(4,  FILTER_6TAP, FILTER_6TAP, h6v6)
+
+#define VP8_MC_FUNC(IDX, SIZE) \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][0] = ff_put_vp8_pixels ## SIZE ## _c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c
+
+av_cold void ff_vp8dsp_init(VP8DSPContext *dsp)
+{
+    dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c;
+    dsp->vp8_idct_add    = vp8_idct_add_c;
+    dsp->vp8_idct_dc_add = vp8_idct_dc_add_c;
+
+    dsp->vp8_v_loop_filter16 = vp8_v_loop_filter16_c;
+    dsp->vp8_h_loop_filter16 = vp8_h_loop_filter16_c;
+    dsp->vp8_v_loop_filter8  = vp8_v_loop_filter8_c;
+    dsp->vp8_h_loop_filter8  = vp8_h_loop_filter8_c;
+
+    dsp->vp8_v_loop_filter16_inner = vp8_v_loop_filter16_inner_c;
+    dsp->vp8_h_loop_filter16_inner = vp8_h_loop_filter16_inner_c;
+    dsp->vp8_v_loop_filter8_inner  = vp8_v_loop_filter8_inner_c;
+    dsp->vp8_h_loop_filter8_inner  = vp8_h_loop_filter8_inner_c;
+
+    dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c;
+    dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c;
+
+    VP8_MC_FUNC(0, 16);
+    VP8_MC_FUNC(1, 8);
+    VP8_MC_FUNC(2, 4);
+}
diff --git a/libavcodec/vp8dsp.h b/libavcodec/vp8dsp.h
new file mode 100644
index 0000000000..fa7bf76d29
--- /dev/null
+++ b/libavcodec/vp8dsp.h
@@ -0,0 +1,67 @@
+/**
+ * VP8 compatible video decoder
+ *
+ * Copyright (C) 2010 David Conrad
+ * Copyright (C) 2010 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#ifndef AVCODEC_VP8DSP_H
+#define AVCODEC_VP8DSP_H
+
+#include "dsputil.h"
+
+typedef struct VP8DSPContext {
+    void (*vp8_luma_dc_wht)(DCTELEM block[4][4][16], DCTELEM dc[16]);
+    void (*vp8_idct_add)(uint8_t *dst, DCTELEM block[16], int stride);
+    void (*vp8_idct_dc_add)(uint8_t *dst, DCTELEM block[16], int stride);
+
+    // loop filter applied to edges between macroblocks
+    void (*vp8_v_loop_filter16)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_h_loop_filter16)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_v_loop_filter8)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_h_loop_filter8)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
+
+    // loop filter applied to inner macroblock edges
+    void (*vp8_v_loop_filter16_inner)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_h_loop_filter16_inner)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_v_loop_filter8_inner)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_h_loop_filter8_inner)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
+
+    void (*vp8_v_loop_filter_simple)(uint8_t *dst, int stride, int flim);
+    void (*vp8_h_loop_filter_simple)(uint8_t *dst, int stride, int flim);
+
+    /**
+     * first dimension: width>>3, height is assumed equal to width
+     * second dimension: 0 if no vertical interpolation is needed;
+     *                   1 4-tap vertical interpolation filter (my & 1)
+     *                   2 6-tap vertical interpolation filter (!(my & 1))
+     * third dimension: same as second dimention, for horizontal interpolation
+     * so something like put_vp8_epel_pixels_tab[width>>3][2*!!my-(my&1)][2*!!mx-(mx&1)](..., mx, my)
+     */
+    h264_chroma_mc_func put_vp8_epel_pixels_tab[3][3][3];
+} VP8DSPContext;
+
+void ff_put_vp8_pixels16_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y);
+void ff_put_vp8_pixels8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y);
+void ff_put_vp8_pixels4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y);
+
+void ff_vp8dsp_init(VP8DSPContext *c);
+
+#endif /* AVCODEC_VP8DSP_H */