From 09e8163625ec985fa4388000fe90b9a3550122aa Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 19 Jul 2011 14:23:47 +0200
Subject: [PATCH 01/14] imgconvert: remove unused glue and xglue macros

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/imgconvert.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index 351ed7ada7..c446aaca38 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -42,9 +42,6 @@
 #include "x86/dsputil_mmx.h"
 #endif
 
-#define xglue(x, y) x ## y
-#define glue(x, y) xglue(x, y)
-
 #define FF_COLOR_RGB      0 /**< RGB color space */
 #define FF_COLOR_GRAY     1 /**< gray color space */
 #define FF_COLOR_YUV      2 /**< YUV color space. 16 <= Y <= 235, 16 <= U, V <= 240 */

From 364d64275c66ac3b6dff699e80c167b6815e8f57 Mon Sep 17 00:00:00 2001
From: Thierry Foucu <tfoucu@gmail.com>
Date: Wed, 20 Jul 2011 20:33:22 -0700
Subject: [PATCH 02/14] ffprobe: display bitstream level.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 ffprobe.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ffprobe.c b/ffprobe.c
index cb4a4c3106..2457b063a5 100644
--- a/ffprobe.c
+++ b/ffprobe.c
@@ -201,6 +201,7 @@ static void show_stream(AVFormatContext *fmt_ctx, int stream_idx)
             }
             printf("pix_fmt=%s\n",                 dec_ctx->pix_fmt != PIX_FMT_NONE ?
                    av_pix_fmt_descriptors[dec_ctx->pix_fmt].name : "unknown");
+            printf("level=%d\n",                   dec_ctx->level);
             break;
 
         case AVMEDIA_TYPE_AUDIO:

From 0637e50579c77ce16373a45dc173aa449e3a8011 Mon Sep 17 00:00:00 2001
From: Thierry Foucu <tfoucu@gmail.com>
Date: Wed, 20 Jul 2011 20:33:23 -0700
Subject: [PATCH 03/14] mpeg4: decode Level Profile for MPEG4 Part 2.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/mpeg4videodec.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c
index df0191dc9f..b48f44d998 100644
--- a/libavcodec/mpeg4videodec.c
+++ b/libavcodec/mpeg4videodec.c
@@ -1523,6 +1523,22 @@ static int mpeg4_decode_gop_header(MpegEncContext * s, GetBitContext *gb){
     return 0;
 }
 
+static int mpeg4_decode_profile_level(MpegEncContext * s, GetBitContext *gb){
+  int profile_and_level_indication;
+
+  profile_and_level_indication = get_bits(gb, 8);
+
+  s->avctx->profile = (profile_and_level_indication & 0xf0) >> 4;
+  s->avctx->level   = (profile_and_level_indication & 0x0f);
+
+  // for Simple profile, level 0
+  if (s->avctx->profile == 0 && s->avctx->level == 8) {
+      s->avctx->level = 0;
+  }
+
+  return 0;
+}
+
 static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
     int width, height, vo_ver_id;
 
@@ -2177,6 +2193,9 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
         else if(startcode == GOP_STARTCODE){
             mpeg4_decode_gop_header(s, gb);
         }
+        else if(startcode == VOS_STARTCODE){
+            mpeg4_decode_profile_level(s, gb);
+        }
         else if(startcode == VOP_STARTCODE){
             break;
         }

From c4ab43ff3c65de1d3d565a12e1ac8fae176bbd1a Mon Sep 17 00:00:00 2001
From: Thierry Foucu <tfoucu@gmail.com>
Date: Wed, 20 Jul 2011 20:33:24 -0700
Subject: [PATCH 04/14] mpeg4: add Mpeg4 Profiles names.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/avcodec.h       | 17 +++++++++++++++++
 libavcodec/mpeg4videodec.c | 20 ++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 2d4515519a..b6cac7c77d 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2231,6 +2231,23 @@ typedef struct AVCodecContext {
 #define FF_PROFILE_VC1_COMPLEX  2
 #define FF_PROFILE_VC1_ADVANCED 3
 
+#define FF_PROFILE_MPEG4_SIMPLE                     0
+#define FF_PROFILE_MPEG4_SIMPLE_SCALABLE            1
+#define FF_PROFILE_MPEG4_CORE                       2
+#define FF_PROFILE_MPEG4_MAIN                       3
+#define FF_PROFILE_MPEG4_N_BIT                      4
+#define FF_PROFILE_MPEG4_SCALABLE_TEXTURE           5
+#define FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION      6
+#define FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE     7
+#define FF_PROFILE_MPEG4_HYBRID                     8
+#define FF_PROFILE_MPEG4_ADVANCED_REAL_TIME         9
+#define FF_PROFILE_MPEG4_CORE_SCALABLE             10
+#define FF_PROFILE_MPEG4_ADVANCED_CODING           11
+#define FF_PROFILE_MPEG4_ADVANCED_CORE             12
+#define FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE 13
+#define FF_PROFILE_MPEG4_SIMPLE_STUDIO             14
+#define FF_PROFILE_MPEG4_ADVANCED_SIMPLE           15
+
     /**
      * level
      * - encoding: Set by user.
diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c
index b48f44d998..b1abb618ef 100644
--- a/libavcodec/mpeg4videodec.c
+++ b/libavcodec/mpeg4videodec.c
@@ -2256,6 +2256,25 @@ static av_cold int decode_init(AVCodecContext *avctx)
     return 0;
 }
 
+static const AVProfile mpeg4_video_profiles[] = {
+    { FF_PROFILE_MPEG4_SIMPLE,                    "Simple Profile" },
+    { FF_PROFILE_MPEG4_SIMPLE_SCALABLE,           "Simple Scalable Profile" },
+    { FF_PROFILE_MPEG4_CORE,                      "Core Profile" },
+    { FF_PROFILE_MPEG4_MAIN,                      "Main Profile" },
+    { FF_PROFILE_MPEG4_N_BIT,                     "N-bit Profile" },
+    { FF_PROFILE_MPEG4_SCALABLE_TEXTURE,          "Scalable Texture Profile" },
+    { FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION,     "Simple Face Animation Profile" },
+    { FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE,    "Basic Animated Texture Profile" },
+    { FF_PROFILE_MPEG4_HYBRID,                    "Hybrid Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_REAL_TIME,        "Advanced Real Time Simple Profile" },
+    { FF_PROFILE_MPEG4_CORE_SCALABLE,             "Code Scalable Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_CODING,           "Advanced Coding Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_CORE,             "Advanced Core Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE, "Advanced Scalable Texture Profile" },
+    { FF_PROFILE_MPEG4_SIMPLE_STUDIO,             "Simple Studio Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_SIMPLE,           "Advanced Simple Profile" },
+};
+
 AVCodec ff_mpeg4_decoder = {
     "mpeg4",
     AVMEDIA_TYPE_VIDEO,
@@ -2270,6 +2289,7 @@ AVCodec ff_mpeg4_decoder = {
     .max_lowres= 3,
     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2"),
     .pix_fmts= ff_hwaccel_pixfmt_list_420,
+    .profiles = NULL_IF_CONFIG_SMALL(mpeg4_video_profiles),
     .update_thread_context= ONLY_IF_THREADS_ENABLED(ff_mpeg_update_thread_context)
 };
 

From 73c0dd939d503aaf83d3266181a156615eeb77d5 Mon Sep 17 00:00:00 2001
From: Thierry Foucu <tfoucu@gmail.com>
Date: Wed, 20 Jul 2011 20:33:25 -0700
Subject: [PATCH 05/14] mov: add clcp type track as Subtitle stream.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavformat/mov.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 0de2bfc325..92b1b14f15 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -451,7 +451,7 @@ static int mov_read_hdlr(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
     else if(type == MKTAG('m','1','a',' '))
         st->codec->codec_id = CODEC_ID_MP2;
-    else if(type == MKTAG('s','u','b','p'))
+    else if((type == MKTAG('s','u','b','p')) || (type == MKTAG('c','l','c','p')))
         st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
 
     avio_rb32(pb); /* component  manufacture */

From 0a72533e9854aa615bb6d1569dd5f0c4cd031429 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Wed, 20 Jul 2011 20:01:56 +0100
Subject: [PATCH 06/14] jfdctint: add 10-bit version

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/bfin/dsputil_bfin.c  |   6 +-
 libavcodec/dct-test.c           |   2 +-
 libavcodec/dsputil.c            |  27 ++-
 libavcodec/dsputil.h            |   6 +-
 libavcodec/jfdctint.c           | 413 ++------------------------------
 libavcodec/jfdctint_template.c  | 405 +++++++++++++++++++++++++++++++
 libavcodec/mpegvideo_enc.c      |   3 +-
 libavcodec/ppc/dsputil_ppc.c    |   5 +-
 libavcodec/x86/dsputilenc_mmx.c |   3 +-
 9 files changed, 454 insertions(+), 416 deletions(-)
 create mode 100644 libavcodec/jfdctint_template.c

diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c
index d06bd8e4fd..691c060733 100644
--- a/libavcodec/bfin/dsputil_bfin.c
+++ b/libavcodec/bfin/dsputil_bfin.c
@@ -253,10 +253,10 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
 /*     c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; */
     }
 
-    if (avctx->dct_algo == FF_DCT_AUTO)
-        c->fdct               = ff_bfin_fdct;
-
     if (avctx->bits_per_raw_sample <= 8) {
+        if (avctx->dct_algo == FF_DCT_AUTO)
+            c->fdct                  = ff_bfin_fdct;
+
         if (avctx->idct_algo == FF_IDCT_VP3) {
             c->idct_permutation_type = FF_NO_IDCT_PERM;
             c->idct                  = ff_bfin_vp3_idct;
diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c
index 136f5c4742..962b370c2b 100644
--- a/libavcodec/dct-test.c
+++ b/libavcodec/dct-test.c
@@ -88,7 +88,7 @@ static const struct algo fdct_tab[] = {
     { "REF-DBL",        ff_ref_fdct,           NO_PERM    },
     { "FAAN",           ff_faandct,            FAAN_SCALE },
     { "IJG-AAN-INT",    fdct_ifast,            SCALE_PERM },
-    { "IJG-LLM-INT",    ff_jpeg_fdct_islow,    NO_PERM    },
+    { "IJG-LLM-INT",    ff_jpeg_fdct_islow_8,  NO_PERM    },
 
 #if HAVE_MMX
     { "MMX",            ff_fdct_mmx,           NO_PERM,   AV_CPU_FLAG_MMX     },
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 4008389a9d..a99be55eac 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2848,17 +2848,22 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     ff_check_alignment();
 
 #if CONFIG_ENCODERS
-    if(avctx->dct_algo==FF_DCT_FASTINT) {
-        c->fdct = fdct_ifast;
-        c->fdct248 = fdct_ifast248;
-    }
-    else if(avctx->dct_algo==FF_DCT_FAAN) {
-        c->fdct = ff_faandct;
-        c->fdct248 = ff_faandct248;
-    }
-    else {
-        c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
-        c->fdct248 = ff_fdct248_islow;
+    if (avctx->bits_per_raw_sample == 10) {
+        c->fdct    = ff_jpeg_fdct_islow_10;
+        c->fdct248 = ff_fdct248_islow_10;
+    } else {
+        if(avctx->dct_algo==FF_DCT_FASTINT) {
+            c->fdct    = fdct_ifast;
+            c->fdct248 = fdct_ifast248;
+        }
+        else if(avctx->dct_algo==FF_DCT_FAAN) {
+            c->fdct    = ff_faandct;
+            c->fdct248 = ff_faandct248;
+        }
+        else {
+            c->fdct    = ff_jpeg_fdct_islow_8; //slow/accurate/default
+            c->fdct248 = ff_fdct248_islow_8;
+        }
     }
 #endif //CONFIG_ENCODERS
 
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 8cd3af60ca..47c13a1adb 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -40,8 +40,10 @@ typedef short DCTELEM;
 
 void fdct_ifast (DCTELEM *data);
 void fdct_ifast248 (DCTELEM *data);
-void ff_jpeg_fdct_islow (DCTELEM *data);
-void ff_fdct248_islow (DCTELEM *data);
+void ff_jpeg_fdct_islow_8(DCTELEM *data);
+void ff_jpeg_fdct_islow_10(DCTELEM *data);
+void ff_fdct248_islow_8(DCTELEM *data);
+void ff_fdct248_islow_10(DCTELEM *data);
 
 void j_rev_dct (DCTELEM *data);
 void j_rev_dct4 (DCTELEM *data);
diff --git a/libavcodec/jfdctint.c b/libavcodec/jfdctint.c
index 072c7440b5..0482bc5643 100644
--- a/libavcodec/jfdctint.c
+++ b/libavcodec/jfdctint.c
@@ -1,402 +1,25 @@
-/*
- * jfdctint.c
- *
- * This file is part of the Independent JPEG Group's software.
- *
- * The authors make NO WARRANTY or representation, either express or implied,
- * with respect to this software, its quality, accuracy, merchantability, or
- * fitness for a particular purpose.  This software is provided "AS IS", and
- * you, its user, assume the entire risk as to its quality and accuracy.
- *
- * This software is copyright (C) 1991-1996, Thomas G. Lane.
- * All Rights Reserved except as specified below.
- *
- * Permission is hereby granted to use, copy, modify, and distribute this
- * software (or portions thereof) for any purpose, without fee, subject to
- * these conditions:
- * (1) If any part of the source code for this software is distributed, then
- * this README file must be included, with this copyright and no-warranty
- * notice unaltered; and any additions, deletions, or changes to the original
- * files must be clearly indicated in accompanying documentation.
- * (2) If only executable code is distributed, then the accompanying
- * documentation must state that "this software is based in part on the work
- * of the Independent JPEG Group".
- * (3) Permission for use of this software is granted only if the user accepts
- * full responsibility for any undesirable consequences; the authors accept
- * NO LIABILITY for damages of any kind.
- *
- * These conditions apply to any software derived from or based on the IJG
- * code, not just to the unmodified library.  If you use our work, you ought
- * to acknowledge us.
- *
- * Permission is NOT granted for the use of any IJG author's name or company
- * name in advertising or publicity relating to this software or products
- * derived from it.  This software may be referred to only as "the Independent
- * JPEG Group's software".
- *
- * We specifically permit and encourage the use of this software as the basis
- * of commercial products, provided that all warranty or liability claims are
- * assumed by the product vendor.
- *
- * This file contains a slow-but-accurate integer implementation of the
- * forward DCT (Discrete Cosine Transform).
- *
- * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
- * on each column.  Direct algorithms are also available, but they are
- * much more complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on an algorithm described in
- *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
- *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
- *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
- * The primary algorithm described there uses 11 multiplies and 29 adds.
- * We use their alternate method with 12 multiplies and 32 adds.
- * The advantage of this method is that no data path contains more than one
- * multiplication; this allows a very simple and accurate implementation in
- * scaled fixed-point arithmetic, with a minimal number of shifts.
- */
-
 /**
- * @file
- * Independent JPEG Group's slow & accurate dct.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "libavutil/common.h"
-#include "dsputil.h"
-
-#define DCTSIZE 8
-#define BITS_IN_JSAMPLE 8
-#define GLOBAL(x) x
-#define RIGHT_SHIFT(x, n) ((x) >> (n))
-#define MULTIPLY16C16(var,const) ((var)*(const))
-
-#if 1 //def USE_ACCURATE_ROUNDING
-#define DESCALE(x,n)  RIGHT_SHIFT((x) + (1 << ((n) - 1)), n)
-#else
-#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
-#endif
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
-#endif
-
-
-/*
- * The poop on this scaling stuff is as follows:
+ * This file is part of Libav.
  *
- * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
- * larger than the true DCT outputs.  The final outputs are therefore
- * a factor of N larger than desired; since N=8 this can be cured by
- * a simple right shift at the end of the algorithm.  The advantage of
- * this arrangement is that we save two multiplications per 1-D DCT,
- * because the y0 and y4 outputs need not be divided by sqrt(N).
- * In the IJG code, this factor of 8 is removed by the quantization step
- * (in jcdctmgr.c), NOT in this module.
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
  *
- * We have to do addition and subtraction of the integer inputs, which
- * is no problem, and multiplication by fractional constants, which is
- * a problem to do in integer arithmetic.  We multiply all the constants
- * by CONST_SCALE and convert them to integer constants (thus retaining
- * CONST_BITS bits of precision in the constants).  After doing a
- * multiplication we have to divide the product by CONST_SCALE, with proper
- * rounding, to produce the correct output.  This division can be done
- * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
- * as long as possible so that partial sums can be added together with
- * full fractional precision.
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
  *
- * The outputs of the first pass are scaled up by PASS1_BITS bits so that
- * they are represented to better-than-integral precision.  These outputs
- * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
- * with the recommended scaling.  (For 12-bit sample data, the intermediate
- * array is int32_t anyway.)
- *
- * To avoid overflow of the 32-bit intermediate results in pass 2, we must
- * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
- * shows that the values given below are the most effective.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#if BITS_IN_JSAMPLE == 8
-#define CONST_BITS  13
-#define PASS1_BITS  4   /* set this to 2 if 16x16 multiplies are faster */
-#else
-#define CONST_BITS  13
-#define PASS1_BITS  1   /* lose a little precision to avoid overflow */
-#endif
+#define BIT_DEPTH 8
+#include "jfdctint_template.c"
+#undef BIT_DEPTH
 
-/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
- * causing a lot of useless floating-point operations at run time.
- * To get around this we use the following pre-calculated constants.
- * If you change CONST_BITS you may want to add appropriate values.
- * (With a reasonable C compiler, you can just rely on the FIX() macro...)
- */
-
-#if CONST_BITS == 13
-#define FIX_0_298631336  ((int32_t)  2446)      /* FIX(0.298631336) */
-#define FIX_0_390180644  ((int32_t)  3196)      /* FIX(0.390180644) */
-#define FIX_0_541196100  ((int32_t)  4433)      /* FIX(0.541196100) */
-#define FIX_0_765366865  ((int32_t)  6270)      /* FIX(0.765366865) */
-#define FIX_0_899976223  ((int32_t)  7373)      /* FIX(0.899976223) */
-#define FIX_1_175875602  ((int32_t)  9633)      /* FIX(1.175875602) */
-#define FIX_1_501321110  ((int32_t)  12299)     /* FIX(1.501321110) */
-#define FIX_1_847759065  ((int32_t)  15137)     /* FIX(1.847759065) */
-#define FIX_1_961570560  ((int32_t)  16069)     /* FIX(1.961570560) */
-#define FIX_2_053119869  ((int32_t)  16819)     /* FIX(2.053119869) */
-#define FIX_2_562915447  ((int32_t)  20995)     /* FIX(2.562915447) */
-#define FIX_3_072711026  ((int32_t)  25172)     /* FIX(3.072711026) */
-#else
-#define FIX_0_298631336  FIX(0.298631336)
-#define FIX_0_390180644  FIX(0.390180644)
-#define FIX_0_541196100  FIX(0.541196100)
-#define FIX_0_765366865  FIX(0.765366865)
-#define FIX_0_899976223  FIX(0.899976223)
-#define FIX_1_175875602  FIX(1.175875602)
-#define FIX_1_501321110  FIX(1.501321110)
-#define FIX_1_847759065  FIX(1.847759065)
-#define FIX_1_961570560  FIX(1.961570560)
-#define FIX_2_053119869  FIX(2.053119869)
-#define FIX_2_562915447  FIX(2.562915447)
-#define FIX_3_072711026  FIX(3.072711026)
-#endif
-
-
-/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
- * For 8-bit samples with the recommended scaling, all the variable
- * and constant values involved are no more than 16 bits wide, so a
- * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
- * For 12-bit samples, a full 32-bit multiplication will be needed.
- */
-
-#if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2
-#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
-#else
-#define MULTIPLY(var,const)  ((var) * (const))
-#endif
-
-
-static av_always_inline void row_fdct(DCTELEM * data){
-  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  int tmp10, tmp11, tmp12, tmp13;
-  int z1, z2, z3, z4, z5;
-  DCTELEM *dataptr;
-  int ctr;
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    tmp0 = dataptr[0] + dataptr[7];
-    tmp7 = dataptr[0] - dataptr[7];
-    tmp1 = dataptr[1] + dataptr[6];
-    tmp6 = dataptr[1] - dataptr[6];
-    tmp2 = dataptr[2] + dataptr[5];
-    tmp5 = dataptr[2] - dataptr[5];
-    tmp3 = dataptr[3] + dataptr[4];
-    tmp4 = dataptr[3] - dataptr[4];
-
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
-     */
-
-    tmp10 = tmp0 + tmp3;
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-
-    dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
-    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
-    dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
-                                   CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
-                                   CONST_BITS-PASS1_BITS);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * cK represents cos(K*pi/16).
-     * i0..i3 in the paper are tmp4..tmp7 here.
-     */
-
-    z1 = tmp4 + tmp7;
-    z2 = tmp5 + tmp6;
-    z3 = tmp4 + tmp6;
-    z4 = tmp5 + tmp7;
-    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-
-    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
-    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
-    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-
-    z3 += z5;
-    z4 += z5;
-
-    dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
-    dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;         /* advance pointer to next row */
-  }
-}
-
-/*
- * Perform the forward DCT on one block of samples.
- */
-
-GLOBAL(void)
-ff_jpeg_fdct_islow (DCTELEM * data)
-{
-  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  int tmp10, tmp11, tmp12, tmp13;
-  int z1, z2, z3, z4, z5;
-  DCTELEM *dataptr;
-  int ctr;
-
-  row_fdct(data);
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
-     */
-
-    tmp10 = tmp0 + tmp3;
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-
-    dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
-                                           CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
-                                           CONST_BITS+PASS1_BITS);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * cK represents cos(K*pi/16).
-     * i0..i3 in the paper are tmp4..tmp7 here.
-     */
-
-    z1 = tmp4 + tmp7;
-    z2 = tmp5 + tmp6;
-    z3 = tmp4 + tmp6;
-    z4 = tmp5 + tmp7;
-    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-
-    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
-    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
-    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-
-    z3 += z5;
-    z4 += z5;
-
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
-                                           CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
-                                           CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
-                                           CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
-                                           CONST_BITS+PASS1_BITS);
-
-    dataptr++;                  /* advance pointer to next column */
-  }
-}
-
-/*
- * The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT
- * on the rows and then, instead of doing even and odd, part on the colums
- * you do even part two times.
- */
-GLOBAL(void)
-ff_fdct248_islow (DCTELEM * data)
-{
-  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  int tmp10, tmp11, tmp12, tmp13;
-  int z1;
-  DCTELEM *dataptr;
-  int ctr;
-
-  row_fdct(data);
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
-     tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
-     tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
-     tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
-     tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
-     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
-     tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
-     tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
-
-     tmp10 = tmp0 + tmp3;
-     tmp11 = tmp1 + tmp2;
-     tmp12 = tmp1 - tmp2;
-     tmp13 = tmp0 - tmp3;
-
-     dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
-     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
-
-     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
-     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
-                                            CONST_BITS+PASS1_BITS);
-     dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
-                                            CONST_BITS+PASS1_BITS);
-
-     tmp10 = tmp4 + tmp7;
-     tmp11 = tmp5 + tmp6;
-     tmp12 = tmp5 - tmp6;
-     tmp13 = tmp4 - tmp7;
-
-     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
-     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
-
-     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
-     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
-                                            CONST_BITS+PASS1_BITS);
-     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
-                                            CONST_BITS+PASS1_BITS);
-
-     dataptr++;                 /* advance pointer to next column */
-  }
-}
+#define BIT_DEPTH 10
+#include "jfdctint_template.c"
+#undef BIT_DEPTH
diff --git a/libavcodec/jfdctint_template.c b/libavcodec/jfdctint_template.c
new file mode 100644
index 0000000000..e60e72a412
--- /dev/null
+++ b/libavcodec/jfdctint_template.c
@@ -0,0 +1,405 @@
+/*
+ * jfdctint.c
+ *
+ * This file is part of the Independent JPEG Group's software.
+ *
+ * The authors make NO WARRANTY or representation, either express or implied,
+ * with respect to this software, its quality, accuracy, merchantability, or
+ * fitness for a particular purpose.  This software is provided "AS IS", and
+ * you, its user, assume the entire risk as to its quality and accuracy.
+ *
+ * This software is copyright (C) 1991-1996, Thomas G. Lane.
+ * All Rights Reserved except as specified below.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute this
+ * software (or portions thereof) for any purpose, without fee, subject to
+ * these conditions:
+ * (1) If any part of the source code for this software is distributed, then
+ * this README file must be included, with this copyright and no-warranty
+ * notice unaltered; and any additions, deletions, or changes to the original
+ * files must be clearly indicated in accompanying documentation.
+ * (2) If only executable code is distributed, then the accompanying
+ * documentation must state that "this software is based in part on the work
+ * of the Independent JPEG Group".
+ * (3) Permission for use of this software is granted only if the user accepts
+ * full responsibility for any undesirable consequences; the authors accept
+ * NO LIABILITY for damages of any kind.
+ *
+ * These conditions apply to any software derived from or based on the IJG
+ * code, not just to the unmodified library.  If you use our work, you ought
+ * to acknowledge us.
+ *
+ * Permission is NOT granted for the use of any IJG author's name or company
+ * name in advertising or publicity relating to this software or products
+ * derived from it.  This software may be referred to only as "the Independent
+ * JPEG Group's software".
+ *
+ * We specifically permit and encourage the use of this software as the basis
+ * of commercial products, provided that all warranty or liability claims are
+ * assumed by the product vendor.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ */
+
+/**
+ * @file
+ * Independent JPEG Group's slow & accurate dct.
+ */
+
+#include "libavutil/common.h"
+#include "dsputil.h"
+
+#include "bit_depth_template.c"
+
+#define DCTSIZE 8
+#define BITS_IN_JSAMPLE BIT_DEPTH
+#define GLOBAL(x) x
+#define RIGHT_SHIFT(x, n) ((x) >> (n))
+#define MULTIPLY16C16(var,const) ((var)*(const))
+
+#if 1 //def USE_ACCURATE_ROUNDING
+#define DESCALE(x,n)  RIGHT_SHIFT((x) + (1 << ((n) - 1)), n)
+#else
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+#error  "Sorry, this code only copes with 8x8 DCTs."
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true DCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D DCT,
+ * because the y0 and y4 outputs need not be divided by sqrt(N).
+ * In the IJG code, this factor of 8 is removed by the quantization step
+ * (in jcdctmgr.c), NOT in this module.
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (For 12-bit sample data, the intermediate
+ * array is int32_t anyway.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#undef CONST_BITS
+#undef PASS1_BITS
+#undef OUT_SHIFT
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  4   /* set this to 2 if 16x16 multiplies are faster */
+#define OUT_SHIFT   PASS1_BITS
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1   /* lose a little precision to avoid overflow */
+#define OUT_SHIFT   (PASS1_BITS + 1)
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((int32_t)  2446)      /* FIX(0.298631336) */
+#define FIX_0_390180644  ((int32_t)  3196)      /* FIX(0.390180644) */
+#define FIX_0_541196100  ((int32_t)  4433)      /* FIX(0.541196100) */
+#define FIX_0_765366865  ((int32_t)  6270)      /* FIX(0.765366865) */
+#define FIX_0_899976223  ((int32_t)  7373)      /* FIX(0.899976223) */
+#define FIX_1_175875602  ((int32_t)  9633)      /* FIX(1.175875602) */
+#define FIX_1_501321110  ((int32_t)  12299)     /* FIX(1.501321110) */
+#define FIX_1_847759065  ((int32_t)  15137)     /* FIX(1.847759065) */
+#define FIX_1_961570560  ((int32_t)  16069)     /* FIX(1.961570560) */
+#define FIX_2_053119869  ((int32_t)  16819)     /* FIX(2.053119869) */
+#define FIX_2_562915447  ((int32_t)  20995)     /* FIX(2.562915447) */
+#define FIX_3_072711026  ((int32_t)  25172)     /* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+static av_always_inline void FUNC(row_fdct)(DCTELEM *data)
+{
+  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int tmp10, tmp11, tmp12, tmp13;
+  int z1, z2, z3, z4, z5;
+  DCTELEM *dataptr;
+  int ctr;
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
+    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                                   CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+                                   CONST_BITS-PASS1_BITS);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents cos(K*pi/16).
+     * i0..i3 in the paper are tmp4..tmp7 here.
+     */
+
+    z1 = tmp4 + tmp7;
+    z2 = tmp5 + tmp6;
+    z3 = tmp4 + tmp6;
+    z4 = tmp5 + tmp7;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+
+    z3 += z5;
+    z4 += z5;
+
+    dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
+    dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;         /* advance pointer to next row */
+  }
+}
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+FUNC(ff_jpeg_fdct_islow)(DCTELEM *data)
+{
+  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int tmp10, tmp11, tmp12, tmp13;
+  int z1, z2, z3, z4, z5;
+  DCTELEM *dataptr;
+  int ctr;
+
+  FUNC(row_fdct)(data);
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
+    dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                                 CONST_BITS + OUT_SHIFT);
+    dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+                                 CONST_BITS + OUT_SHIFT);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents cos(K*pi/16).
+     * i0..i3 in the paper are tmp4..tmp7 here.
+     */
+
+    z1 = tmp4 + tmp7;
+    z2 = tmp5 + tmp6;
+    z3 = tmp4 + tmp6;
+    z4 = tmp5 + tmp7;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+
+    z3 += z5;
+    z4 += z5;
+
+    dataptr[DCTSIZE*7] = DESCALE(tmp4 + z1 + z3, CONST_BITS + OUT_SHIFT);
+    dataptr[DCTSIZE*5] = DESCALE(tmp5 + z2 + z4, CONST_BITS + OUT_SHIFT);
+    dataptr[DCTSIZE*3] = DESCALE(tmp6 + z2 + z3, CONST_BITS + OUT_SHIFT);
+    dataptr[DCTSIZE*1] = DESCALE(tmp7 + z1 + z4, CONST_BITS + OUT_SHIFT);
+
+    dataptr++;                  /* advance pointer to next column */
+  }
+}
+
+/*
+ * The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT
+ * on the rows and then, instead of doing even and odd, part on the colums
+ * you do even part two times.
+ */
+GLOBAL(void)
+FUNC(ff_fdct248_islow)(DCTELEM *data)
+{
+  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  int tmp10, tmp11, tmp12, tmp13;
+  int z1;
+  DCTELEM *dataptr;
+  int ctr;
+
+  FUNC(row_fdct)(data);
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
+     tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+     tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
+     tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
+     tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
+     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+     tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
+     tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
+
+     tmp10 = tmp0 + tmp3;
+     tmp11 = tmp1 + tmp2;
+     tmp12 = tmp1 - tmp2;
+     tmp13 = tmp0 - tmp3;
+
+     dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
+     dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
+
+     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+     dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                                  CONST_BITS+OUT_SHIFT);
+     dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+                                  CONST_BITS+OUT_SHIFT);
+
+     tmp10 = tmp4 + tmp7;
+     tmp11 = tmp5 + tmp6;
+     tmp12 = tmp5 - tmp6;
+     tmp13 = tmp4 - tmp7;
+
+     dataptr[DCTSIZE*1] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
+     dataptr[DCTSIZE*5] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
+
+     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+     dataptr[DCTSIZE*3] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                                  CONST_BITS + OUT_SHIFT);
+     dataptr[DCTSIZE*7] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+                                  CONST_BITS + OUT_SHIFT);
+
+     dataptr++;                 /* advance pointer to next column */
+  }
+}
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 4b4636b32b..c4ca7b30a9 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -69,7 +69,8 @@ void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][6
 
     for(qscale=qmin; qscale<=qmax; qscale++){
         int i;
-        if (dsp->fdct == ff_jpeg_fdct_islow
+        if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
+            dsp->fdct == ff_jpeg_fdct_islow_10
 #ifdef FAAN_POSTSCALE
             || dsp->fdct == ff_faandct
 #endif
diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c
index c1f68fc5f7..b6de39fae7 100644
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -172,8 +172,9 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
         c->gmc1 = gmc1_altivec;
 
 #if CONFIG_ENCODERS
-        if (avctx->dct_algo == FF_DCT_AUTO ||
-            avctx->dct_algo == FF_DCT_ALTIVEC) {
+        if (avctx->bits_per_raw_sample <= 8 &&
+            (avctx->dct_algo == FF_DCT_AUTO ||
+             avctx->dct_algo == FF_DCT_ALTIVEC)) {
             c->fdct = fdct_altivec;
         }
 #endif //CONFIG_ENCODERS
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index 037389137b..ea03e9232f 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -1101,7 +1101,8 @@ void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
 
     if (mm_flags & AV_CPU_FLAG_MMX) {
         const int dct_algo = avctx->dct_algo;
-        if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
+        if (avctx->bits_per_raw_sample <= 8 &&
+            (dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) {
             if(mm_flags & AV_CPU_FLAG_SSE2){
                 c->fdct = ff_fdct_sse2;
             }else if(mm_flags & AV_CPU_FLAG_MMX2){

From 5cc2600964c72dad995efb18c918a63e0965f8db Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 21 Jul 2011 12:39:41 +0100
Subject: [PATCH 07/14] dsputil: create 16/32-bit dctcoef versions of some
 functions

High bitdepth H.264 needs 32-bit transform coefficients, whereas
dnxhd does not.  This creates a conflict with the templated
functions operating on DCTELEM data.  This patch adds a field
allowing the caller to choose the element size in dsputil_init()
and adds the required functions.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/dsputil.c          |  24 +++++---
 libavcodec/dsputil.h          |   5 ++
 libavcodec/dsputil_template.c | 106 +++++++++++++++++++---------------
 libavcodec/h264.c             |   1 +
 4 files changed, 80 insertions(+), 56 deletions(-)

diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index a99be55eac..b18164065d 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -3159,13 +3159,13 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
 
 
-#define BIT_DEPTH_FUNCS(depth)\
+#define BIT_DEPTH_FUNCS(depth, dct)\
     c->draw_edges                    = FUNCC(draw_edges            , depth);\
     c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
-    c->clear_block                   = FUNCC(clear_block           , depth);\
-    c->clear_blocks                  = FUNCC(clear_blocks          , depth);\
-    c->add_pixels8                   = FUNCC(add_pixels8           , depth);\
-    c->add_pixels4                   = FUNCC(add_pixels4           , depth);\
+    c->clear_block                   = FUNCC(clear_block  ## dct   , depth);\
+    c->clear_blocks                  = FUNCC(clear_blocks ## dct   , depth);\
+    c->add_pixels8                   = FUNCC(add_pixels8  ## dct   , depth);\
+    c->add_pixels4                   = FUNCC(add_pixels4  ## dct   , depth);\
     c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\
     c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\
 \
@@ -3199,15 +3199,23 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 
     switch (avctx->bits_per_raw_sample) {
     case 9:
-        BIT_DEPTH_FUNCS(9);
+        if (c->dct_bits == 32) {
+            BIT_DEPTH_FUNCS(9, _32);
+        } else {
+            BIT_DEPTH_FUNCS(9, _16);
+        }
         break;
     case 10:
-        BIT_DEPTH_FUNCS(10);
+        if (c->dct_bits == 32) {
+            BIT_DEPTH_FUNCS(10, _32);
+        } else {
+            BIT_DEPTH_FUNCS(10, _16);
+        }
         break;
     default:
         av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
     case 8:
-        BIT_DEPTH_FUNCS(8);
+        BIT_DEPTH_FUNCS(8, _16);
         break;
     }
 
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 47c13a1adb..0ba36d77ec 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -219,6 +219,11 @@ void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int lin
  * DSPContext.
  */
 typedef struct DSPContext {
+    /**
+     * Size of DCT coefficients.
+     */
+    int dct_bits;
+
     /* pixel ops : interface with DCT */
     void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
     void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
index 9f8cf557c8..5863275c00 100644
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -192,43 +192,66 @@ void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, i
     }
 }
 
-static void FUNCC(add_pixels8)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
-{
-    int i;
-    pixel *restrict pixels = (pixel *restrict)_pixels;
-    dctcoef *block = (dctcoef*)_block;
-    line_size /= sizeof(pixel);
-
-    for(i=0;i<8;i++) {
-        pixels[0] += block[0];
-        pixels[1] += block[1];
-        pixels[2] += block[2];
-        pixels[3] += block[3];
-        pixels[4] += block[4];
-        pixels[5] += block[5];
-        pixels[6] += block[6];
-        pixels[7] += block[7];
-        pixels += line_size;
-        block += 8;
-    }
+#define DCTELEM_FUNCS(dctcoef, suffix)                                  \
+static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels,     \
+                                         DCTELEM *_block,               \
+                                         int line_size)                 \
+{                                                                       \
+    int i;                                                              \
+    pixel *restrict pixels = (pixel *restrict)_pixels;                  \
+    dctcoef *block = (dctcoef*)_block;                                  \
+    line_size /= sizeof(pixel);                                         \
+                                                                        \
+    for(i=0;i<8;i++) {                                                  \
+        pixels[0] += block[0];                                          \
+        pixels[1] += block[1];                                          \
+        pixels[2] += block[2];                                          \
+        pixels[3] += block[3];                                          \
+        pixels[4] += block[4];                                          \
+        pixels[5] += block[5];                                          \
+        pixels[6] += block[6];                                          \
+        pixels[7] += block[7];                                          \
+        pixels += line_size;                                            \
+        block += 8;                                                     \
+    }                                                                   \
+}                                                                       \
+                                                                        \
+static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels,     \
+                                         DCTELEM *_block,               \
+                                         int line_size)                 \
+{                                                                       \
+    int i;                                                              \
+    pixel *restrict pixels = (pixel *restrict)_pixels;                  \
+    dctcoef *block = (dctcoef*)_block;                                  \
+    line_size /= sizeof(pixel);                                         \
+                                                                        \
+    for(i=0;i<4;i++) {                                                  \
+        pixels[0] += block[0];                                          \
+        pixels[1] += block[1];                                          \
+        pixels[2] += block[2];                                          \
+        pixels[3] += block[3];                                          \
+        pixels += line_size;                                            \
+        block += 4;                                                     \
+    }                                                                   \
+}                                                                       \
+                                                                        \
+static void FUNCC(clear_block ## suffix)(DCTELEM *block)                \
+{                                                                       \
+    memset(block, 0, sizeof(dctcoef)*64);                               \
+}                                                                       \
+                                                                        \
+/**                                                                     \
+ * memset(blocks, 0, sizeof(DCTELEM)*6*64)                              \
+ */                                                                     \
+static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks)              \
+{                                                                       \
+    memset(blocks, 0, sizeof(dctcoef)*6*64);                            \
 }
 
-static void FUNCC(add_pixels4)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
-{
-    int i;
-    pixel *restrict pixels = (pixel *restrict)_pixels;
-    dctcoef *block = (dctcoef*)_block;
-    line_size /= sizeof(pixel);
-
-    for(i=0;i<4;i++) {
-        pixels[0] += block[0];
-        pixels[1] += block[1];
-        pixels[2] += block[2];
-        pixels[3] += block[3];
-        pixels += line_size;
-        block += 4;
-    }
-}
+DCTELEM_FUNCS(DCTELEM, _16)
+#if BIT_DEPTH > 8
+DCTELEM_FUNCS(dctcoef, _32)
+#endif
 
 #define PIXOP2(OPNAME, OP) \
 static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
@@ -1231,16 +1254,3 @@ void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
 void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
     FUNCC(avg_pixels16)(dst, src, stride, 16);
 }
-
-static void FUNCC(clear_block)(DCTELEM *block)
-{
-    memset(block, 0, sizeof(dctcoef)*64);
-}
-
-/**
- * memset(blocks, 0, sizeof(DCTELEM)*6*64)
- */
-static void FUNCC(clear_blocks)(DCTELEM *blocks)
-{
-    memset(blocks, 0, sizeof(dctcoef)*6*64);
-}
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 5adce242ce..928a96ab35 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -3702,6 +3702,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
 
                     ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
                     ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
+                    s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
                     dsputil_init(&s->dsp, s->avctx);
                 } else {
                     av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);

From 874f1a901d9f2d1d6825be309ed046abc7672b03 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 21 Jul 2011 02:31:57 +0100
Subject: [PATCH 08/14] dsputil: template get_pixels() for different bit depths

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/alpha/dsputil_alpha.c    |  3 ++-
 libavcodec/arm/dsputil_init_armv6.c |  3 ++-
 libavcodec/bfin/dsputil_bfin.c      |  2 +-
 libavcodec/dsputil.c                | 21 +--------------------
 libavcodec/dsputil_template.c       | 23 +++++++++++++++++++++++
 libavcodec/mlib/dsputil_mlib.c      |  3 ++-
 libavcodec/ppc/dsputil_altivec.c    |  5 ++---
 libavcodec/ps2/dsputil_mmi.c        |  2 +-
 libavcodec/x86/dsputilenc_mmx.c     |  7 +++++--
 9 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c
index 039608b9ed..3efcf096b4 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/dsputil_alpha.c
@@ -321,7 +321,8 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
         c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
         c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
 
-        c->get_pixels       = get_pixels_mvi;
+        if (!high_bit_depth)
+            c->get_pixels   = get_pixels_mvi;
         c->diff_pixels      = diff_pixels_mvi;
         c->sad[0]           = pix_abs16x16_mvi_asm;
         c->sad[1]           = pix_abs8x8_mvi;
diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c
index d442415aec..07124c2773 100644
--- a/libavcodec/arm/dsputil_init_armv6.c
+++ b/libavcodec/arm/dsputil_init_armv6.c
@@ -106,8 +106,9 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
     c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6;
     }
 
+    if (!high_bit_depth)
+        c->get_pixels = ff_get_pixels_armv6;
     c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
-    c->get_pixels = ff_get_pixels_armv6;
     c->diff_pixels = ff_diff_pixels_armv6;
 
     c->pix_abs[0][0] = ff_pix_abs16_armv6;
diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c
index 691c060733..f88ae66b15 100644
--- a/libavcodec/bfin/dsputil_bfin.c
+++ b/libavcodec/bfin/dsputil_bfin.c
@@ -199,12 +199,12 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
 {
     const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
 
-    c->get_pixels         = ff_bfin_get_pixels;
     c->diff_pixels        = ff_bfin_diff_pixels;
     c->put_pixels_clamped = ff_bfin_put_pixels_clamped;
     c->add_pixels_clamped = ff_bfin_add_pixels_clamped;
 
     if (!high_bit_depth)
+    c->get_pixels         = ff_bfin_get_pixels;
     c->clear_blocks       = bfin_clear_blocks;
     c->pix_sum            = ff_bfin_pix_sum;
     c->pix_norm1          = ff_bfin_pix_norm1;
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index b18164065d..09e58f46d2 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -307,25 +307,6 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
     return s;
 }
 
-static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
-{
-    int i;
-
-    /* read the pixels */
-    for(i=0;i<8;i++) {
-        block[0] = pixels[0];
-        block[1] = pixels[1];
-        block[2] = pixels[2];
-        block[3] = pixels[3];
-        block[4] = pixels[4];
-        block[5] = pixels[5];
-        block[6] = pixels[6];
-        block[7] = pixels[7];
-        pixels += line_size;
-        block += 8;
-    }
-}
-
 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
                           const uint8_t *s2, int stride){
     int i;
@@ -2927,7 +2908,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
         }
     }
 
-    c->get_pixels = get_pixels_c;
     c->diff_pixels = diff_pixels_c;
     c->put_pixels_clamped = ff_put_pixels_clamped_c;
     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
@@ -3160,6 +3140,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 
 
 #define BIT_DEPTH_FUNCS(depth, dct)\
+    c->get_pixels                    = FUNCC(get_pixels   ## dct   , depth);\
     c->draw_edges                    = FUNCC(draw_edges            , depth);\
     c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
     c->clear_block                   = FUNCC(clear_block  ## dct   , depth);\
diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
index 5863275c00..72ed6bfd2a 100644
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -193,6 +193,29 @@ void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, i
 }
 
 #define DCTELEM_FUNCS(dctcoef, suffix)                                  \
+static void FUNCC(get_pixels ## suffix)(DCTELEM *restrict _block,       \
+                                        const uint8_t *_pixels,         \
+                                        int line_size)                  \
+{                                                                       \
+    const pixel *pixels = (const pixel *) _pixels;                      \
+    dctcoef *restrict block = (dctcoef *) _block;                       \
+    int i;                                                              \
+                                                                        \
+    /* read the pixels */                                               \
+    for(i=0;i<8;i++) {                                                  \
+        block[0] = pixels[0];                                           \
+        block[1] = pixels[1];                                           \
+        block[2] = pixels[2];                                           \
+        block[3] = pixels[3];                                           \
+        block[4] = pixels[4];                                           \
+        block[5] = pixels[5];                                           \
+        block[6] = pixels[6];                                           \
+        block[7] = pixels[7];                                           \
+        pixels += line_size / sizeof(pixel);                            \
+        block += 8;                                                     \
+    }                                                                   \
+}                                                                       \
+                                                                        \
 static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels,     \
                                          DCTELEM *_block,               \
                                          int line_size)                 \
diff --git a/libavcodec/mlib/dsputil_mlib.c b/libavcodec/mlib/dsputil_mlib.c
index c0f2c036c6..b915ffeb20 100644
--- a/libavcodec/mlib/dsputil_mlib.c
+++ b/libavcodec/mlib/dsputil_mlib.c
@@ -423,11 +423,12 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
 {
     const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
 
-    c->get_pixels  = get_pixels_mlib;
     c->diff_pixels = diff_pixels_mlib;
     c->add_pixels_clamped = add_pixels_clamped_mlib;
 
     if (!high_bit_depth) {
+    c->get_pixels  = get_pixels_mlib;
+
     c->put_pixels_tab[0][0] = put_pixels16_mlib;
     c->put_pixels_tab[0][1] = put_pixels16_x2_mlib;
     c->put_pixels_tab[0][2] = put_pixels16_y2_mlib;
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c
index 7bdd5705b9..f0526722d6 100644
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -1387,11 +1387,10 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
     c->sse[0]= sse16_altivec;
     c->pix_sum = pix_sum_altivec;
     c->diff_pixels = diff_pixels_altivec;
-    c->get_pixels = get_pixels_altivec;
-    if (!high_bit_depth)
-    c->clear_block = clear_block_altivec;
     c->add_bytes= add_bytes_altivec;
     if (!high_bit_depth) {
+    c->get_pixels = get_pixels_altivec;
+    c->clear_block = clear_block_altivec;
     c->put_pixels_tab[0][0] = put_pixels16_altivec;
     /* the two functions do the same thing, so use the same code */
     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
diff --git a/libavcodec/ps2/dsputil_mmi.c b/libavcodec/ps2/dsputil_mmi.c
index 585709679b..94a5a8cb48 100644
--- a/libavcodec/ps2/dsputil_mmi.c
+++ b/libavcodec/ps2/dsputil_mmi.c
@@ -152,9 +152,9 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
 
     c->put_pixels_tab[0][0] = put_pixels16_mmi;
     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
-    }
 
     c->get_pixels = get_pixels_mmi;
+    }
 
     if (avctx->bits_per_raw_sample <= 8 &&
         (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2)) {
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index ea03e9232f..7362234c86 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -1098,6 +1098,7 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int si
 void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
 {
     int mm_flags = av_get_cpu_flags();
+    int bit_depth = avctx->bits_per_raw_sample;
 
     if (mm_flags & AV_CPU_FLAG_MMX) {
         const int dct_algo = avctx->dct_algo;
@@ -1112,7 +1113,8 @@ void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
             }
         }
 
-        c->get_pixels = get_pixels_mmx;
+        if (bit_depth <= 8)
+            c->get_pixels = get_pixels_mmx;
         c->diff_pixels = diff_pixels_mmx;
         c->pix_sum = pix_sum16_mmx;
 
@@ -1159,7 +1161,8 @@ void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
         }
 
         if(mm_flags & AV_CPU_FLAG_SSE2){
-            c->get_pixels = get_pixels_sse2;
+            if (bit_depth <= 8)
+                c->get_pixels = get_pixels_sse2;
             c->sum_abs_dctelem= sum_abs_dctelem_sse2;
 #if HAVE_YASM && HAVE_ALIGNED_STACK
             c->hadamard8_diff[0]= ff_hadamard8_diff16_sse2;

From a617c6aaa3e6c0201c9ea31d319808273efd78b0 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 21 Jul 2011 10:05:15 +0100
Subject: [PATCH 09/14] dsputil: update per-arch init funcs for non-h264 high
 bit depth

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/alpha/dsputil_alpha.c    | 2 +-
 libavcodec/arm/dsputil_init_arm.c   | 2 +-
 libavcodec/arm/dsputil_init_armv6.c | 2 +-
 libavcodec/arm/dsputil_init_neon.c  | 2 +-
 libavcodec/arm/dsputil_iwmmxt.c     | 2 +-
 libavcodec/bfin/dsputil_bfin.c      | 2 +-
 libavcodec/mlib/dsputil_mlib.c      | 2 +-
 libavcodec/ppc/dsputil_altivec.c    | 2 +-
 libavcodec/ppc/dsputil_ppc.c        | 2 +-
 libavcodec/ppc/h264_altivec.c       | 2 +-
 libavcodec/ps2/dsputil_mmi.c        | 2 +-
 libavcodec/sh4/dsputil_align.c      | 2 +-
 libavcodec/sh4/dsputil_sh4.c        | 2 +-
 libavcodec/sparc/dsputil_vis.c      | 2 +-
 libavcodec/x86/dsputil_mmx.c        | 2 +-
 15 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c
index 3efcf096b4..acd8e03f2b 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/dsputil_alpha.c
@@ -270,7 +270,7 @@ static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
 
 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
 {
-    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
     if (!high_bit_depth) {
     c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c
index 5b4b24a43e..743be1a239 100644
--- a/libavcodec/arm/dsputil_init_arm.c
+++ b/libavcodec/arm/dsputil_init_arm.c
@@ -75,7 +75,7 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block)
 
 void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
 {
-    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
     ff_put_pixels_clamped = c->put_pixels_clamped;
     ff_add_pixels_clamped = c->add_pixels_clamped;
diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c
index 07124c2773..eff4ee38c2 100644
--- a/libavcodec/arm/dsputil_init_armv6.c
+++ b/libavcodec/arm/dsputil_init_armv6.c
@@ -72,7 +72,7 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size);
 
 void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
 {
-    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
     if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 &&
         (avctx->idct_algo == FF_IDCT_AUTO ||
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index ce45caf53e..238fdae41f 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -175,7 +175,7 @@ void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
 
 void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
 {
-    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
     if (!avctx->lowres && avctx->bits_per_raw_sample <= 8) {
         if (avctx->idct_algo == FF_IDCT_AUTO ||
diff --git a/libavcodec/arm/dsputil_iwmmxt.c b/libavcodec/arm/dsputil_iwmmxt.c
index 86f8fddfcf..dec5d77472 100644
--- a/libavcodec/arm/dsputil_iwmmxt.c
+++ b/libavcodec/arm/dsputil_iwmmxt.c
@@ -155,7 +155,7 @@ static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
 {
     int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */
-    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
     if (avctx->dsp_mask) {
         if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c
index f88ae66b15..58ec394d7b 100644
--- a/libavcodec/bfin/dsputil_bfin.c
+++ b/libavcodec/bfin/dsputil_bfin.c
@@ -197,7 +197,7 @@ static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_si
 
 void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
 {
-    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
     c->diff_pixels        = ff_bfin_diff_pixels;
     c->put_pixels_clamped = ff_bfin_put_pixels_clamped;
diff --git a/libavcodec/mlib/dsputil_mlib.c b/libavcodec/mlib/dsputil_mlib.c
index b915ffeb20..bfa5153def 100644
--- a/libavcodec/mlib/dsputil_mlib.c
+++ b/libavcodec/mlib/dsputil_mlib.c
@@ -421,7 +421,7 @@ static void ff_fdct_mlib(DCTELEM *data)
 
 void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
 {
-    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
     c->diff_pixels = diff_pixels_mlib;
     c->add_pixels_clamped = add_pixels_clamped_mlib;
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c
index f0526722d6..7ab533600d 100644
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -1373,7 +1373,7 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l
 
 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
 {
-    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
     c->pix_abs[0][1] = sad16_x2_altivec;
     c->pix_abs[0][2] = sad16_y2_altivec;
diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c
index b6de39fae7..24b12ee2ae 100644
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -145,7 +145,7 @@ static void prefetch_ppc(void *mem, int stride, int h)
 
 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
 {
-    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
     // Common optimizations whether AltiVec is available or not
     c->prefetch = prefetch_ppc;
diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c
index 05fae831c9..8dd4ea392e 100644
--- a/libavcodec/ppc/h264_altivec.c
+++ b/libavcodec/ppc/h264_altivec.c
@@ -967,7 +967,7 @@ H264_WEIGHT( 8, 8)
 H264_WEIGHT( 8, 4)
 
 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
-    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
     if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
     if (!high_bit_depth) {
diff --git a/libavcodec/ps2/dsputil_mmi.c b/libavcodec/ps2/dsputil_mmi.c
index 94a5a8cb48..77755b0d4c 100644
--- a/libavcodec/ps2/dsputil_mmi.c
+++ b/libavcodec/ps2/dsputil_mmi.c
@@ -142,7 +142,7 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
 {
     const int idct_algo= avctx->idct_algo;
-    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
     if (!high_bit_depth) {
     c->clear_blocks = clear_blocks_mmi;
diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c
index db40ece670..0c293a1248 100644
--- a/libavcodec/sh4/dsputil_align.c
+++ b/libavcodec/sh4/dsputil_align.c
@@ -333,7 +333,7 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)
 
 void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
 {
-        const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+        const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
         if (!high_bit_depth) {
         c->put_pixels_tab[0][0] = put_rnd_pixels16_o;
diff --git a/libavcodec/sh4/dsputil_sh4.c b/libavcodec/sh4/dsputil_sh4.c
index bf58a9cbbd..51c1a53d7d 100644
--- a/libavcodec/sh4/dsputil_sh4.c
+++ b/libavcodec/sh4/dsputil_sh4.c
@@ -92,7 +92,7 @@ static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx)
 {
         const int idct_algo= avctx->idct_algo;
-        const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+        const int high_bit_depth = avctx->bits_per_raw_sample > 8;
         dsputil_init_align(c,avctx);
 
         if (!high_bit_depth)
diff --git a/libavcodec/sparc/dsputil_vis.c b/libavcodec/sparc/dsputil_vis.c
index 9811b2622e..e45bfd28e6 100644
--- a/libavcodec/sparc/dsputil_vis.c
+++ b/libavcodec/sparc/dsputil_vis.c
@@ -3953,7 +3953,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
 {
   /* VIS-specific optimizations */
   int accel = vis_level ();
-  const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+  const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
   if (accel & ACCEL_SPARC_VIS) {
       if (avctx->bits_per_raw_sample <= 8 &&
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 84ae47b04d..dd318a1399 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2437,7 +2437,7 @@ void ff_vector_clip_int32_sse41   (int32_t *dst, const int32_t *src, int32_t min
 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 {
     int mm_flags = av_get_cpu_flags();
-    const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
     const int bit_depth = avctx->bits_per_raw_sample;
 
     if (avctx->dsp_mask) {

From 5ab21439fdfb8e239eb778128590d95967067b46 Mon Sep 17 00:00:00 2001
From: Joseph Artsimovich <joseph@mirriad.com>
Date: Wed, 20 Jul 2011 18:58:27 +0100
Subject: [PATCH 10/14] dnxhd: 10-bit support

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/dnxhddata.c     | 202 ++++++++++++++++++++++++++++++-
 libavcodec/dnxhddata.h     |   2 +-
 libavcodec/dnxhddec.c      |  57 +++++----
 libavcodec/dnxhdenc.c      | 240 ++++++++++++++++++++++++++++---------
 libavcodec/dnxhdenc.h      |   4 +
 libavcodec/x86/dnxhd_mmx.c |   3 +-
 6 files changed, 427 insertions(+), 81 deletions(-)

diff --git a/libavcodec/dnxhddata.c b/libavcodec/dnxhddata.c
index 677fb8f5be..576e85e05c 100644
--- a/libavcodec/dnxhddata.c
+++ b/libavcodec/dnxhddata.c
@@ -22,6 +22,28 @@
 #include "avcodec.h"
 #include "dnxhddata.h"
 
+static const uint8_t dnxhd_1235_luma_weight[] = {
+     0, 32, 32, 32, 33, 35, 38, 39,
+    32, 33, 32, 33, 36, 36, 39, 42,
+    32, 32, 33, 36, 35, 37, 41, 43,
+    31, 33, 34, 36, 36, 40, 42, 48,
+    32, 34, 36, 37, 39, 42, 46, 51,
+    36, 37, 37, 39, 41, 46, 51, 55,
+    37, 39, 41, 41, 47, 50, 55, 56,
+    41, 42, 41, 44, 50, 53, 60, 60
+};
+
+static const uint8_t dnxhd_1235_chroma_weight[] = {
+     0, 32, 33, 34, 39, 41, 54, 59,
+    33, 34, 35, 38, 43, 49, 58, 84,
+    34, 37, 39, 44, 46, 55, 74, 87,
+    40, 42, 47, 48, 58, 70, 87, 86,
+    43, 50, 56, 63, 72, 94, 91, 82,
+    55, 63, 65, 75, 93, 89, 85, 73,
+    61, 67, 82, 81, 83, 90, 79, 73,
+    74, 84, 75, 78, 90, 85, 73, 73
+};
+
 static const uint8_t dnxhd_1237_luma_weight[] = {
      0,  32,  33,  34, 34, 36, 37, 36,
     36,  37,  38,  38, 38, 39, 41, 44,
@@ -132,6 +154,28 @@ static const uint8_t dnxhd_1243_chroma_weight[] = {
     46, 45, 46, 47, 47, 48, 47, 47,
 };
 
+static const uint8_t dnxhd_1250_luma_weight[] = {
+     0, 32, 35, 35, 36, 36, 41, 43,
+    32, 34, 35, 36, 37, 39, 43, 47,
+    33, 34, 36, 38, 38, 42, 42, 50,
+    34, 36, 38, 38, 41, 40, 47, 54,
+    35, 38, 39, 40, 39, 45, 49, 58,
+    38, 39, 40, 39, 46, 47, 54, 60,
+    38, 39, 41, 46, 46, 48, 57, 62,
+    40, 41, 44, 45, 49, 54, 63, 63
+};
+
+static const uint8_t dnxhd_1250_chroma_weight[] = {
+     0, 32, 35, 36, 40, 42, 51, 51,
+    35, 36, 39, 39, 43, 51, 52, 55,
+    36, 41, 41, 43, 51, 53, 54, 56,
+    43, 44, 45, 50, 54, 54, 55, 57,
+    45, 48, 50, 51, 55, 58, 59, 58,
+    49, 52, 49, 57, 58, 62, 58, 60,
+    51, 51, 56, 58, 62, 61, 59, 62,
+    52, 52, 60, 61, 59, 59, 63, 63
+};
+
 static const uint8_t dnxhd_1251_luma_weight[] = {
      0, 32, 32, 34, 34, 34, 34, 35,
     35, 35, 36, 37, 36, 36, 35, 36,
@@ -604,6 +648,146 @@ static const uint8_t dnxhd_1235_1241_run[62] = {
     49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
 };
 
+static const uint8_t dnxhd_1250_dc_codes[14] = {
+    10, 62, 11, 12, 13, 0, 1, 2, 3, 4, 14, 30, 126, 127
+};
+static const uint8_t dnxhd_1250_dc_bits[14] = {
+    4, 6, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 7, 7
+};
+static const uint16_t dnxhd_1250_ac_codes[257] = {
+        0,     1,     4,    10,    11,    24,    25,    26,
+       54,    55,    56,    57,   116,   117,   118,   119,
+      240,   241,   242,   243,   244,   245,   492,   493,
+      494,   495,   496,   497,   498,   998,   999,  1000,
+     1001,  1002,  1003,  1004,  1005,  1006,  2014,  2015,
+     2016,  2017,  2018,  2019,  2020,  2021,  2022,  2023,
+     2024,  2025,  4052,  4053,  4054,  4055,  4056,  4057,
+     4058,  4059,  4060,  4061,  4062,  4063,  4064,  4065,
+     4066,  4067,  8136,  8137,  8138,  8139,  8140,  8141,
+     8142,  8143,  8144,  8145,  8146,  8147,  8148,  8149,
+     8150,  8151,  8152,  8153,  8154,  8155,  8156, 16314,
+    16315, 16316, 16317, 16318, 16319, 16320, 16321, 16322,
+    16323, 16324, 16325, 16326, 16327, 16328, 16329, 16330,
+    16331, 16332, 16333, 16334, 16335, 16336, 16337, 16338,
+    32678, 32679, 32680, 32681, 32682, 32683, 32684, 32685,
+    32686, 32687, 32688, 32689, 32690, 32691, 32692, 32693,
+    32694, 32695, 32696, 32697, 32698, 32699, 32700, 32701,
+    32702, 32703, 32704, 32705, 32706, 32707, 32708, 32709,
+    32710, 32711, 32712, 65426, 65427, 65428, 65429, 65430,
+    65431, 65432, 65433, 65434, 65435, 65436, 65437, 65438,
+    65439, 65440, 65441, 65442, 65443, 65444, 65445, 65446,
+    65447, 65448, 65449, 65450, 65451, 65452, 65453, 65454,
+    65455, 65456, 65457, 65458, 65459, 65460, 65461, 65462,
+    65463, 65464, 65465, 65466, 65467, 65468, 65469, 65470,
+    65471, 65472, 65473, 65474, 65475, 65476, 65477, 65478,
+    65479, 65480, 65481, 65482, 65483, 65484, 65485, 65486,
+    65487, 65488, 65489, 65490, 65491, 65492, 65493, 65494,
+    65495, 65496, 65497, 65498, 65499, 65500, 65501, 65502,
+    65503, 65504, 65505, 65506, 65507, 65508, 65509, 65510,
+    65511, 65512, 65513, 65514, 65515, 65516, 65517, 65518,
+    65519, 65520, 65521, 65522, 65523, 65524, 65525, 65526,
+    65527, 65528, 65529, 65530, 65531, 65532, 65533, 65534,
+    65535
+};
+static const uint8_t dnxhd_1250_ac_bits[257] = {
+     2,  2,  3,  4,  4,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,
+     8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16
+};
+static const uint8_t dnxhd_1250_ac_level[257] = {
+     1,  1,  2,  3,  0,  4,  5,  2,  6,  7,  8,  3,  9, 10, 11,  4,
+    12, 13, 14, 15, 16,  5, 17, 18, 19, 20, 21, 22,  6, 23, 24, 25,
+    26, 27, 28, 29,  7,  8, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+     9, 10, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 11,
+    12, 13, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,  1,  2,
+     3,  4,  5, 14, 15, 16, 17,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 18, 19, 20, 21,
+    27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+    43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 55, 56, 22, 23, 24,
+    25, 26, 27, 54, 57, 58, 59, 60, 61, 62, 63, 64, 28, 29, 30, 31,
+    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+    64,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+    64
+};
+static const uint8_t dnxhd_1250_ac_run_flag[257] = {
+    0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
+    0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+    0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
+    1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1
+};
+static const uint8_t dnxhd_1250_ac_index_flag[257] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
+    1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
+    0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1
+};
+static const uint16_t dnxhd_1250_run_codes[62] = {
+       0,    4,    5,   12,   26,   27,   28,   58,
+     118,  119,  120,  242,  486,  487,  976,  977,
+     978,  979,  980,  981,  982,  983,  984,  985,
+     986,  987,  988,  989,  990,  991,  992,  993,
+     994,  995,  996,  997,  998,  999, 1000, 1001,
+    1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
+    1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017,
+    1018, 1019, 1020, 1021, 1022, 1023
+};
+static const uint8_t dnxhd_1250_run_bits[62] = {
+     1,  3,  3,  4,  5,  5,  5,  6,  7,  7,  7,  8,  9,  9, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
+};
+static const uint8_t dnxhd_1250_run[62] = {
+     1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+    49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62
+};
+
 static const uint8_t dnxhd_1251_dc_codes[12] = {
     0, 12, 13, 1, 2, 3, 4, 5, 14, 30, 62, 63,
 };
@@ -878,6 +1062,13 @@ static const uint8_t dnxhd_1252_ac_index_flag[257] = {
 };
 
 const CIDEntry ff_dnxhd_cid_table[] = {
+    { 1235, 1920, 1080, 0, 917504, 917504, 6, 10,
+      dnxhd_1235_luma_weight, dnxhd_1235_chroma_weight,
+      dnxhd_1235_1241_dc_codes, dnxhd_1235_1241_dc_bits,
+      dnxhd_1235_1241_ac_codes, dnxhd_1235_1241_ac_bits, dnxhd_1235_1241_ac_level,
+      dnxhd_1235_1241_ac_run_flag, dnxhd_1235_1241_ac_index_flag,
+      dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1235_1241_run,
+      { 175, 185, 365, 440 } },
     { 1237, 1920, 1080, 0, 606208, 606208, 4, 8,
       dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
@@ -913,6 +1104,13 @@ const CIDEntry ff_dnxhd_cid_table[] = {
       dnxhd_1238_ac_run_flag, dnxhd_1238_ac_index_flag,
       dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1238_run,
       { 185, 220 } },
+    { 1250, 1280,  720, 0, 458752, 458752, 6, 10,
+      dnxhd_1250_luma_weight, dnxhd_1250_chroma_weight,
+      dnxhd_1250_dc_codes, dnxhd_1250_dc_bits,
+      dnxhd_1250_ac_codes, dnxhd_1250_ac_bits, dnxhd_1250_ac_level,
+      dnxhd_1250_ac_run_flag, dnxhd_1250_ac_index_flag,
+      dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run,
+      { 90, 180, 220 } },
     { 1251, 1280,  720, 0, 458752, 458752, 4, 8,
       dnxhd_1251_luma_weight, dnxhd_1251_chroma_weight,
       dnxhd_1251_dc_codes, dnxhd_1251_dc_bits,
@@ -945,7 +1143,7 @@ int ff_dnxhd_get_cid_table(int cid)
     return -1;
 }
 
-int ff_dnxhd_find_cid(AVCodecContext *avctx)
+int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth)
 {
     int i, j;
     int mbs = avctx->bit_rate/1000000;
@@ -955,7 +1153,7 @@ int ff_dnxhd_find_cid(AVCodecContext *avctx)
         const CIDEntry *cid = &ff_dnxhd_cid_table[i];
         if (cid->width == avctx->width && cid->height == avctx->height &&
             cid->interlaced == !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT) &&
-            cid->bit_depth == 8) { // until 10 bit is supported
+            cid->bit_depth == bit_depth) {
             for (j = 0; j < sizeof(cid->bit_rates); j++) {
                 if (cid->bit_rates[j] == mbs)
                     return cid->cid;
diff --git a/libavcodec/dnxhddata.h b/libavcodec/dnxhddata.h
index df841872fc..74cf0b522f 100644
--- a/libavcodec/dnxhddata.h
+++ b/libavcodec/dnxhddata.h
@@ -46,6 +46,6 @@ typedef struct {
 extern const CIDEntry ff_dnxhd_cid_table[];
 
 int ff_dnxhd_get_cid_table(int cid);
-int ff_dnxhd_find_cid(AVCodecContext *avctx);
+int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth);
 
 #endif /* AVCODEC_DNXHDDATA_H */
diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c
index 6928b32263..7b32cd3ed4 100644
--- a/libavcodec/dnxhddec.c
+++ b/libavcodec/dnxhddec.c
@@ -1,6 +1,9 @@
 /*
  * VC3/DNxHD decoder.
  * Copyright (c) 2007 SmartJog S.A., Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
+ * Copyright (c) 2011 MirriAd Ltd
+ *
+ * 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com>
  *
  * This file is part of Libav.
  *
@@ -43,6 +46,7 @@ typedef struct {
     DECLARE_ALIGNED(16, DCTELEM, blocks)[8][64];
     ScanTable scantable;
     const CIDEntry *cid_table;
+    int bit_depth; // 8, 10 or 0 if not initialized at all.
 } DNXHDContext;
 
 #define DNXHD_VLC_BITS 9
@@ -53,7 +57,6 @@ static av_cold int dnxhd_decode_init(AVCodecContext *avctx)
     DNXHDContext *ctx = avctx->priv_data;
 
     ctx->avctx = avctx;
-    dsputil_init(&ctx->dsp, avctx);
     avctx->coded_frame = &ctx->picture;
     ctx->picture.type = AV_PICTURE_TYPE_I;
     ctx->picture.key_frame = 1;
@@ -73,7 +76,7 @@ static int dnxhd_init_vlc(DNXHDContext *ctx, int cid)
         init_vlc(&ctx->ac_vlc, DNXHD_VLC_BITS, 257,
                  ctx->cid_table->ac_bits, 1, 1,
                  ctx->cid_table->ac_codes, 2, 2, 0);
-        init_vlc(&ctx->dc_vlc, DNXHD_DC_VLC_BITS, ctx->cid_table->bit_depth+4,
+        init_vlc(&ctx->dc_vlc, DNXHD_DC_VLC_BITS, ctx->bit_depth + 4,
                  ctx->cid_table->dc_bits, 1, 1,
                  ctx->cid_table->dc_codes, 1, 1, 0);
         init_vlc(&ctx->run_vlc, DNXHD_VLC_BITS, 62,
@@ -110,8 +113,19 @@ static int dnxhd_decode_header(DNXHDContext *ctx, const uint8_t *buf, int buf_si
     av_dlog(ctx->avctx, "width %d, heigth %d\n", ctx->width, ctx->height);
 
     if (buf[0x21] & 0x40) {
-        av_log(ctx->avctx, AV_LOG_ERROR, "10 bit per component\n");
-        return -1;
+        ctx->avctx->pix_fmt = PIX_FMT_YUV422P10;
+        ctx->avctx->bits_per_raw_sample = 10;
+        if (ctx->bit_depth != 10) {
+            dsputil_init(&ctx->dsp, ctx->avctx);
+            ctx->bit_depth = 10;
+        }
+    } else {
+        ctx->avctx->pix_fmt = PIX_FMT_YUV422P;
+        ctx->avctx->bits_per_raw_sample = 8;
+        if (ctx->bit_depth != 8) {
+            dsputil_init(&ctx->dsp, ctx->avctx);
+            ctx->bit_depth = 8;
+        }
     }
 
     ctx->cid = AV_RB32(buf + 0x28);
@@ -204,7 +218,7 @@ static void dnxhd_decode_dct_block(DNXHDContext *ctx, DCTELEM *block, int n, int
         //av_log(ctx->avctx, AV_LOG_DEBUG, "j %d\n", j);
         //av_log(ctx->avctx, AV_LOG_DEBUG, "level %d, weigth %d\n", level, weigth_matrix[i]);
         level = (2*level+1) * qscale * weigth_matrix[i];
-        if (ctx->cid_table->bit_depth == 10) {
+        if (ctx->bit_depth == 10) {
             if (weigth_matrix[i] != 8)
                 level += 8;
             level >>= 4;
@@ -220,10 +234,11 @@ static void dnxhd_decode_dct_block(DNXHDContext *ctx, DCTELEM *block, int n, int
 
 static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y)
 {
+    int shift1 = ctx->bit_depth == 10;
     int dct_linesize_luma   = ctx->picture.linesize[0];
     int dct_linesize_chroma = ctx->picture.linesize[1];
     uint8_t *dest_y, *dest_u, *dest_v;
-    int dct_offset;
+    int dct_y_offset, dct_x_offset;
     int qscale, i;
 
     qscale = get_bits(&ctx->gb, 11);
@@ -240,9 +255,9 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y)
         dct_linesize_chroma <<= 1;
     }
 
-    dest_y = ctx->picture.data[0] + ((y * dct_linesize_luma)   << 4) + (x << 4);
-    dest_u = ctx->picture.data[1] + ((y * dct_linesize_chroma) << 4) + (x << 3);
-    dest_v = ctx->picture.data[2] + ((y * dct_linesize_chroma) << 4) + (x << 3);
+    dest_y = ctx->picture.data[0] + ((y * dct_linesize_luma)   << 4) + (x << (4 + shift1));
+    dest_u = ctx->picture.data[1] + ((y * dct_linesize_chroma) << 4) + (x << (3 + shift1));
+    dest_v = ctx->picture.data[2] + ((y * dct_linesize_chroma) << 4) + (x << (3 + shift1));
 
     if (ctx->cur_field) {
         dest_y += ctx->picture.linesize[0];
@@ -250,18 +265,19 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y)
         dest_v += ctx->picture.linesize[2];
     }
 
-    dct_offset = dct_linesize_luma << 3;
-    ctx->dsp.idct_put(dest_y,                  dct_linesize_luma, ctx->blocks[0]);
-    ctx->dsp.idct_put(dest_y + 8,              dct_linesize_luma, ctx->blocks[1]);
-    ctx->dsp.idct_put(dest_y + dct_offset,     dct_linesize_luma, ctx->blocks[4]);
-    ctx->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize_luma, ctx->blocks[5]);
+    dct_y_offset = dct_linesize_luma << 3;
+    dct_x_offset = 8 << shift1;
+    ctx->dsp.idct_put(dest_y,                               dct_linesize_luma, ctx->blocks[0]);
+    ctx->dsp.idct_put(dest_y + dct_x_offset,                dct_linesize_luma, ctx->blocks[1]);
+    ctx->dsp.idct_put(dest_y + dct_y_offset,                dct_linesize_luma, ctx->blocks[4]);
+    ctx->dsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, ctx->blocks[5]);
 
     if (!(ctx->avctx->flags & CODEC_FLAG_GRAY)) {
-        dct_offset = dct_linesize_chroma << 3;
-        ctx->dsp.idct_put(dest_u,              dct_linesize_chroma, ctx->blocks[2]);
-        ctx->dsp.idct_put(dest_v,              dct_linesize_chroma, ctx->blocks[3]);
-        ctx->dsp.idct_put(dest_u + dct_offset, dct_linesize_chroma, ctx->blocks[6]);
-        ctx->dsp.idct_put(dest_v + dct_offset, dct_linesize_chroma, ctx->blocks[7]);
+        dct_y_offset = dct_linesize_chroma << 3;
+        ctx->dsp.idct_put(dest_u,                dct_linesize_chroma, ctx->blocks[2]);
+        ctx->dsp.idct_put(dest_v,                dct_linesize_chroma, ctx->blocks[3]);
+        ctx->dsp.idct_put(dest_u + dct_y_offset, dct_linesize_chroma, ctx->blocks[6]);
+        ctx->dsp.idct_put(dest_v + dct_y_offset, dct_linesize_chroma, ctx->blocks[7]);
     }
 
     return 0;
@@ -273,7 +289,7 @@ static int dnxhd_decode_macroblocks(DNXHDContext *ctx, const uint8_t *buf, int b
     for (y = 0; y < ctx->mb_height; y++) {
         ctx->last_dc[0] =
         ctx->last_dc[1] =
-        ctx->last_dc[2] = 1<<(ctx->cid_table->bit_depth+2); // for levels +2^(bitdepth-1)
+        ctx->last_dc[2] = 1 << (ctx->bit_depth + 2); // for levels +2^(bitdepth-1)
         init_get_bits(&ctx->gb, buf + ctx->mb_scan_index[y], (buf_size - ctx->mb_scan_index[y]) << 3);
         for (x = 0; x < ctx->mb_width; x++) {
             //START_TIMER;
@@ -306,7 +322,6 @@ static int dnxhd_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
         first_field = 1;
     }
 
-    avctx->pix_fmt = PIX_FMT_YUV422P;
     if (av_image_check_size(ctx->width, ctx->height, 0, avctx))
         return -1;
     avcodec_set_dimensions(avctx, ctx->width, ctx->height);
diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c
index b65d0bf669..3fb10d4380 100644
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@@ -1,8 +1,10 @@
 /*
  * VC3/DNxHD encoder
  * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
+ * Copyright (c) 2011 MirriAd Ltd
  *
  * VC-3 encoder funded by the British Broadcasting Corporation
+ * 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com>
  *
  * This file is part of Libav.
  *
@@ -32,6 +34,7 @@
 #include "dnxhdenc.h"
 
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+#define DNX10BIT_QMAT_SHIFT 18 // The largest value that will not lead to overflow for 10bit samples.
 
 static const AVOption options[]={
     {"nitris_compat", "encode with Avid Nitris compatibility", offsetof(DNXHDEncContext, nitris_compat), FF_OPT_TYPE_INT, {.dbl = 0}, 0, 1, VE},
@@ -41,7 +44,7 @@ static const AVClass class = { "dnxhd", av_default_item_name, options, LIBAVUTIL
 
 #define LAMBDA_FRAC_BITS 10
 
-static void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
+static void dnxhd_8bit_get_pixels_8x4_sym(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
 {
     int i;
     for (i = 0; i < 4; i++) {
@@ -58,6 +61,43 @@ static void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels,
     memcpy(block + 24, block - 32, sizeof(*block) * 8);
 }
 
+static av_always_inline void dnxhd_10bit_get_pixels_8x4_sym(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
+{
+    int i;
+
+    block += 32;
+
+    for (i = 0; i < 4; i++) {
+        memcpy(block + i     * 8, pixels + i * line_size, 8 * sizeof(*block));
+        memcpy(block - (i+1) * 8, pixels + i * line_size, 8 * sizeof(*block));
+    }
+}
+
+static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, DCTELEM *block,
+                                    int n, int qscale, int *overflow)
+{
+    const uint8_t *scantable= ctx->intra_scantable.scantable;
+    const int *qmat = ctx->q_intra_matrix[qscale];
+    int last_non_zero = 0;
+
+    ctx->dsp.fdct(block);
+
+    // Divide by 4 with rounding, to compensate scaling of DCT coefficients
+    block[0] = (block[0] + 2) >> 2;
+
+    for (int i = 1; i < 64; ++i) {
+        int j = scantable[i];
+        int sign = block[j] >> 31;
+        int level = (block[j] ^ sign) - sign;
+        level = level * qmat[j] >> DNX10BIT_QMAT_SHIFT;
+        block[j] = (level ^ sign) - sign;
+        if (level)
+            last_non_zero = i;
+    }
+
+    return last_non_zero;
+}
+
 static int dnxhd_init_vlc(DNXHDEncContext *ctx)
 {
     int i, j, level, run;
@@ -118,31 +158,55 @@ static int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
     // init first elem to 1 to avoid div by 0 in convert_matrix
     uint16_t weight_matrix[64] = {1,}; // convert_matrix needs uint16_t*
     int qscale, i;
+    const uint8_t *luma_weight_table   = ctx->cid_table->luma_weight;
+    const uint8_t *chroma_weight_table = ctx->cid_table->chroma_weight;
 
     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l,   (ctx->m.avctx->qmax+1) * 64 *     sizeof(int),      fail);
     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c,   (ctx->m.avctx->qmax+1) * 64 *     sizeof(int),      fail);
     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail);
     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail);
 
-    for (i = 1; i < 64; i++) {
-        int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
-        weight_matrix[j] = ctx->cid_table->luma_weight[i];
-    }
-    ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix,
-                      ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
-    for (i = 1; i < 64; i++) {
-        int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
-        weight_matrix[j] = ctx->cid_table->chroma_weight[i];
-    }
-    ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix,
-                      ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
-    for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
-        for (i = 0; i < 64; i++) {
-            ctx->qmatrix_l  [qscale]   [i] <<= 2; ctx->qmatrix_c  [qscale]   [i] <<= 2;
-            ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2;
-            ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2;
+    if (ctx->cid_table->bit_depth == 8) {
+        for (i = 1; i < 64; i++) {
+            int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
+            weight_matrix[j] = ctx->cid_table->luma_weight[i];
+        }
+        ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix,
+                          ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
+        for (i = 1; i < 64; i++) {
+            int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
+            weight_matrix[j] = ctx->cid_table->chroma_weight[i];
+        }
+        ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix,
+                          ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
+
+        for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
+            for (i = 0; i < 64; i++) {
+                ctx->qmatrix_l  [qscale]   [i] <<= 2; ctx->qmatrix_c  [qscale]   [i] <<= 2;
+                ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2;
+                ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2;
+            }
+        }
+    } else {
+        // 10-bit
+        for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
+            for (i = 1; i < 64; i++) {
+                int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
+
+                // The quantization formula from the VC-3 standard is:
+                // quantized = sign(block[i]) * floor(abs(block[i]/s) * p / (qscale * weight_table[i]))
+                // Where p is 32 for 8-bit samples and 8 for 10-bit ones.
+                // The s factor compensates scaling of DCT coefficients done by the DCT routines,
+                // and therefore is not present in standard.  It's 8 for 8-bit samples and 4 for 10-bit ones.
+                // We want values of ctx->qtmatrix_l and ctx->qtmatrix_r to be:
+                // ((1 << DNX10BIT_QMAT_SHIFT) * (p / s)) / (qscale * weight_table[i])
+                // For 10-bit samples, p / s == 2
+                ctx->qmatrix_l[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / (qscale * luma_weight_table[i]);
+                ctx->qmatrix_c[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / (qscale * chroma_weight_table[i]);
+            }
         }
     }
+
     return 0;
  fail:
     return -1;
@@ -165,10 +229,22 @@ static int dnxhd_init_rc(DNXHDEncContext *ctx)
 static int dnxhd_encode_init(AVCodecContext *avctx)
 {
     DNXHDEncContext *ctx = avctx->priv_data;
-    int i, index;
+    int i, index, bit_depth;
 
-    ctx->cid = ff_dnxhd_find_cid(avctx);
-    if (!ctx->cid || avctx->pix_fmt != PIX_FMT_YUV422P) {
+    switch (avctx->pix_fmt) {
+    case PIX_FMT_YUV422P:
+        bit_depth = 8;
+        break;
+    case PIX_FMT_YUV422P10:
+        bit_depth = 10;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "pixel format is incompatible with DNxHD\n");
+        return -1;
+    }
+
+    ctx->cid = ff_dnxhd_find_cid(avctx, bit_depth);
+    if (!ctx->cid) {
         av_log(avctx, AV_LOG_ERROR, "video parameters incompatible with DNxHD\n");
         return -1;
     }
@@ -181,15 +257,25 @@ static int dnxhd_encode_init(AVCodecContext *avctx)
     ctx->m.mb_intra = 1;
     ctx->m.h263_aic = 1;
 
-    ctx->get_pixels_8x4_sym = dnxhd_get_pixels_8x4;
+    avctx->bits_per_raw_sample = ctx->cid_table->bit_depth;
 
     dsputil_init(&ctx->m.dsp, avctx);
     ff_dct_common_init(&ctx->m);
+    if (!ctx->m.dct_quantize)
+        ctx->m.dct_quantize = dct_quantize_c;
+
+    if (ctx->cid_table->bit_depth == 10) {
+       ctx->m.dct_quantize = dnxhd_10bit_dct_quantize;
+       ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym;
+       ctx->block_width_l2 = 4;
+    } else {
+       ctx->get_pixels_8x4_sym = dnxhd_8bit_get_pixels_8x4_sym;
+       ctx->block_width_l2 = 3;
+    }
+
 #if HAVE_MMX
     ff_dnxhd_init_mmx(ctx);
 #endif
-    if (!ctx->m.dct_quantize)
-        ctx->m.dct_quantize = dct_quantize_c;
 
     ctx->m.mb_height = (avctx->height + 15) / 16;
     ctx->m.mb_width  = (avctx->width  + 15) / 16;
@@ -255,7 +341,7 @@ static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf)
     AV_WB16(buf + 0x1a, avctx->width);  // SPL
     AV_WB16(buf + 0x1d, avctx->height>>ctx->interlaced); // NAL
 
-    buf[0x21] = 0x38; // FIXME 8 bit per comp
+    buf[0x21] = ctx->cid_table->bit_depth == 10 ? 0x58 : 0x38;
     buf[0x22] = 0x88 + (ctx->interlaced<<2);
     AV_WB32(buf + 0x28, ctx->cid); // CID
     buf[0x2c] = ctx->interlaced ? 0 : 0x80;
@@ -321,15 +407,27 @@ static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, DCTELEM *b
         if (level) {
             if (level < 0) {
                 level = (1-2*level) * qscale * weight_matrix[i];
-                if (weight_matrix[i] != 32)
-                    level += 32;
-                level >>= 6;
+                if (ctx->cid_table->bit_depth == 10) {
+                    if (weight_matrix[i] != 8)
+                        level += 8;
+                    level >>= 4;
+                } else {
+                    if (weight_matrix[i] != 32)
+                        level += 32;
+                    level >>= 6;
+                }
                 level = -level;
             } else {
                 level = (2*level+1) * qscale * weight_matrix[i];
-                if (weight_matrix[i] != 32)
-                    level += 32;
-                level >>= 6;
+                if (ctx->cid_table->bit_depth == 10) {
+                    if (weight_matrix[i] != 8)
+                        level += 8;
+                    level >>= 4;
+                } else {
+                    if (weight_matrix[i] != 32)
+                        level += 32;
+                    level >>= 6;
+                }
             }
             block[j] = level;
         }
@@ -364,22 +462,24 @@ static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, DCTELEM *bl
 
 static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
 {
-    const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize)   + (mb_x << 4);
-    const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
-    const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
+    const int bs = ctx->block_width_l2;
+    const int bw = 1 << bs;
+    const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize)   + (mb_x << bs+1);
+    const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
+    const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
     DSPContext *dsp = &ctx->m.dsp;
 
-    dsp->get_pixels(ctx->blocks[0], ptr_y,     ctx->m.linesize);
-    dsp->get_pixels(ctx->blocks[1], ptr_y + 8, ctx->m.linesize);
-    dsp->get_pixels(ctx->blocks[2], ptr_u,     ctx->m.uvlinesize);
-    dsp->get_pixels(ctx->blocks[3], ptr_v,     ctx->m.uvlinesize);
+    dsp->get_pixels(ctx->blocks[0], ptr_y,      ctx->m.linesize);
+    dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize);
+    dsp->get_pixels(ctx->blocks[2], ptr_u,      ctx->m.uvlinesize);
+    dsp->get_pixels(ctx->blocks[3], ptr_v,      ctx->m.uvlinesize);
 
     if (mb_y+1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
         if (ctx->interlaced) {
-            ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset,     ctx->m.linesize);
-            ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
-            ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset,    ctx->m.uvlinesize);
-            ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset,    ctx->m.uvlinesize);
+            ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset,      ctx->m.linesize);
+            ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
+            ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset,     ctx->m.uvlinesize);
+            ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset,     ctx->m.uvlinesize);
         } else {
             dsp->clear_block(ctx->blocks[4]);
             dsp->clear_block(ctx->blocks[5]);
@@ -387,10 +487,10 @@ static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, in
             dsp->clear_block(ctx->blocks[7]);
         }
     } else {
-        dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset,     ctx->m.linesize);
-        dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
-        dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset,    ctx->m.uvlinesize);
-        dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset,    ctx->m.uvlinesize);
+        dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset,      ctx->m.linesize);
+        dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
+        dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset,     ctx->m.uvlinesize);
+        dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset,     ctx->m.uvlinesize);
     }
 }
 
@@ -417,7 +517,7 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, i
 
     ctx->m.last_dc[0] =
     ctx->m.last_dc[1] =
-    ctx->m.last_dc[2] = 1024;
+    ctx->m.last_dc[2] = 1 << (ctx->cid_table->bit_depth + 2);
 
     for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
         unsigned mb = mb_y * ctx->m.mb_width + mb_x;
@@ -440,6 +540,8 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, i
             diff = block[0] - ctx->m.last_dc[n];
             if (diff < 0) nbits = av_log2_16bit(-2*diff);
             else          nbits = av_log2_16bit( 2*diff);
+
+            assert(nbits < ctx->cid_table->bit_depth + 4);
             dc_bits += ctx->cid_table->dc_bits[nbits] + nbits;
 
             ctx->m.last_dc[n] = block[0];
@@ -465,7 +567,7 @@ static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg, int jobnr, int
 
     ctx->m.last_dc[0] =
     ctx->m.last_dc[1] =
-    ctx->m.last_dc[2] = 1024;
+    ctx->m.last_dc[2] = 1 << (ctx->cid_table->bit_depth + 2);
     for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
         unsigned mb = mb_y * ctx->m.mb_width + mb_x;
         int qscale = ctx->mb_qscale[mb];
@@ -515,13 +617,39 @@ static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg, int jobnr, int
     DNXHDEncContext *ctx = avctx->priv_data;
     int mb_y = jobnr, mb_x;
     ctx = ctx->thread[threadnr];
-    for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
-        unsigned mb  = mb_y * ctx->m.mb_width + mb_x;
-        uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize) + (mb_x<<4);
-        int sum      = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
-        int varc     = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8;
-        ctx->mb_cmp[mb].value = varc;
-        ctx->mb_cmp[mb].mb = mb;
+    if (ctx->cid_table->bit_depth == 8) {
+        uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize);
+        for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x, pix += 16) {
+            unsigned mb  = mb_y * ctx->m.mb_width + mb_x;
+            int sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
+            int varc = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8;
+            ctx->mb_cmp[mb].value = varc;
+            ctx->mb_cmp[mb].mb = mb;
+        }
+    } else { // 10-bit
+        int const linesize = ctx->m.linesize >> 1;
+        for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x) {
+            uint16_t *pix = (uint16_t*)ctx->thread[0]->src[0] + ((mb_y << 4) * linesize) + (mb_x << 4);
+            unsigned mb  = mb_y * ctx->m.mb_width + mb_x;
+            int sum = 0;
+            int sqsum = 0;
+            int mean, sqmean;
+            // Macroblocks are 16x16 pixels, unlike DCT blocks which are 8x8.
+            for (int i = 0; i < 16; ++i) {
+                for (int j = 0; j < 16; ++j) {
+                    // Turn 16-bit pixels into 10-bit ones.
+                    int const sample = (unsigned)pix[j] >> 6;
+                    sum += sample;
+                    sqsum += sample * sample;
+                    // 2^10 * 2^10 * 16 * 16 = 2^28, which is less than INT_MAX
+                }
+                pix += linesize;
+            }
+            mean = sum >> 8; // 16*16 == 2^8
+            sqmean = sqsum >> 8;
+            ctx->mb_cmp[mb].value = sqmean - mean * mean;
+            ctx->mb_cmp[mb].mb = mb;
+        }
     }
     return 0;
 }
@@ -871,7 +999,7 @@ AVCodec ff_dnxhd_encoder = {
     dnxhd_encode_picture,
     dnxhd_encode_end,
     .capabilities = CODEC_CAP_SLICE_THREADS,
-    .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_NONE},
+    .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_YUV422P10, PIX_FMT_NONE},
     .long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"),
     .priv_class = &class,
 };
diff --git a/libavcodec/dnxhdenc.h b/libavcodec/dnxhdenc.h
index bb24540a9f..861546a54c 100644
--- a/libavcodec/dnxhdenc.h
+++ b/libavcodec/dnxhdenc.h
@@ -52,8 +52,12 @@ typedef struct DNXHDEncContext {
 
     struct DNXHDEncContext *thread[MAX_THREADS];
 
+    // Because our samples are either 8 or 16 bits for 8-bit and 10-bit
+    // encoding respectively, these refer either to bytes or to two-byte words.
     unsigned dct_y_offset;
     unsigned dct_uv_offset;
+    unsigned block_width_l2;
+
     int interlaced;
     int cur_field;
 
diff --git a/libavcodec/x86/dnxhd_mmx.c b/libavcodec/x86/dnxhd_mmx.c
index f00ce9b188..e193d62e21 100644
--- a/libavcodec/x86/dnxhd_mmx.c
+++ b/libavcodec/x86/dnxhd_mmx.c
@@ -53,6 +53,7 @@ static void get_pixels_8x4_sym_sse2(DCTELEM *block, const uint8_t *pixels, int l
 void ff_dnxhd_init_mmx(DNXHDEncContext *ctx)
 {
     if (av_get_cpu_flags() & AV_CPU_FLAG_SSE2) {
-        ctx->get_pixels_8x4_sym = get_pixels_8x4_sym_sse2;
+        if (ctx->cid_table->bit_depth == 8)
+            ctx->get_pixels_8x4_sym = get_pixels_8x4_sym_sse2;
     }
 }

From 4555874af19acd0cbd6e856e032468ff52ceaa63 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 21 Jul 2011 00:55:24 +0100
Subject: [PATCH 11/14] dnxhd: add regression test for 10-bit

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 tests/codec-regression.sh          | 5 +++++
 tests/ref/vsynth1/dnxhd_720p_10bit | 4 ++++
 tests/ref/vsynth2/dnxhd_720p_10bit | 4 ++++
 3 files changed, 13 insertions(+)
 create mode 100644 tests/ref/vsynth1/dnxhd_720p_10bit
 create mode 100644 tests/ref/vsynth2/dnxhd_720p_10bit

diff --git a/tests/codec-regression.sh b/tests/codec-regression.sh
index ccedbdbfef..a420c573c9 100755
--- a/tests/codec-regression.sh
+++ b/tests/codec-regression.sh
@@ -230,6 +230,11 @@ do_video_encoding dnxhd-720p-rd.dnxhd "-threads 4 -mbd rd -s hd720 -b 90M -pix_f
 do_video_decoding "" "-s cif -pix_fmt yuv420p"
 fi
 
+if [ -n "$do_dnxhd_720p_10bit" ] ; then
+do_video_encoding dnxhd-720p-10bit.dnxhd "-s hd720 -b 90M -pix_fmt yuv422p10 -vframes 5 -an"
+do_video_decoding "" "-s cif -pix_fmt yuv420p"
+fi
+
 if [ -n "$do_svq1" ] ; then
 do_video_encoding svq1.mov "-an -vcodec svq1 -qscale 3 -pix_fmt yuv410p"
 do_video_decoding "" "-pix_fmt yuv420p"
diff --git a/tests/ref/vsynth1/dnxhd_720p_10bit b/tests/ref/vsynth1/dnxhd_720p_10bit
new file mode 100644
index 0000000000..4a815a973c
--- /dev/null
+++ b/tests/ref/vsynth1/dnxhd_720p_10bit
@@ -0,0 +1,4 @@
+3ed972af47641d39a19916b0cd119120 *./tests/data/vsynth1/dnxhd-720p-10bit.dnxhd
+2293760 ./tests/data/vsynth1/dnxhd-720p-10bit.dnxhd
+b64efb0b4eb934bb66f4530c12d5d7fa *./tests/data/dnxhd_720p_10bit.vsynth1.out.yuv
+stddev:    6.27 PSNR: 32.18 MAXDIFF:   65 bytes:   760320/  7603200
diff --git a/tests/ref/vsynth2/dnxhd_720p_10bit b/tests/ref/vsynth2/dnxhd_720p_10bit
new file mode 100644
index 0000000000..734df4ffe2
--- /dev/null
+++ b/tests/ref/vsynth2/dnxhd_720p_10bit
@@ -0,0 +1,4 @@
+0b8389955cce583bd2db7d2e727a6f15 *./tests/data/vsynth2/dnxhd-720p-10bit.dnxhd
+2293760 ./tests/data/vsynth2/dnxhd-720p-10bit.dnxhd
+bde04e992df2473e89aef4460265332d *./tests/data/dnxhd_720p_10bit.vsynth2.out.yuv
+stddev:    1.45 PSNR: 44.89 MAXDIFF:   22 bytes:   760320/  7603200

From be7bd626c4832c4bc5295a01bfd68156ac78ccba Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Thu, 14 Jul 2011 13:02:45 -0400
Subject: [PATCH 12/14] eac3enc: use different numbers of blocks per frame to
 allow higher bitrates

---
 libavcodec/ac3enc.c          | 179 ++++++++++++++++++++---------------
 libavcodec/ac3enc.h          |   2 +
 libavcodec/ac3enc_fixed.c    |   2 +-
 libavcodec/ac3enc_float.c    |   2 +-
 libavcodec/ac3enc_template.c |  24 ++---
 libavcodec/eac3enc.c         |  30 ++++--
 6 files changed, 142 insertions(+), 97 deletions(-)

diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index e7a5b1611e..017b1d4b93 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -186,7 +186,7 @@ void ff_ac3_adjust_frame_size(AC3EncodeContext *s)
     s->frame_size = s->frame_size_min +
                     2 * (s->bits_written * s->sample_rate < s->samples_written * s->bit_rate);
     s->bits_written    += s->frame_size * 8;
-    s->samples_written += AC3_FRAME_SIZE;
+    s->samples_written += AC3_BLOCK_SIZE * s->num_blocks;
 }
 
 
@@ -198,7 +198,7 @@ void ff_ac3_compute_coupling_strategy(AC3EncodeContext *s)
 
     /* set coupling use flags for each block/channel */
     /* TODO: turn coupling on/off and adjust start band based on bit usage */
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         for (ch = 1; ch <= s->fbw_channels; ch++)
             block->channel_in_cpl[ch] = s->cpl_on;
@@ -208,7 +208,7 @@ void ff_ac3_compute_coupling_strategy(AC3EncodeContext *s)
        enabled for that block */
     got_cpl_snr = 0;
     num_cpl_blocks = 0;
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         block->num_cpl_channels = 0;
         for (ch = 1; ch <= s->fbw_channels; ch++)
@@ -244,7 +244,7 @@ void ff_ac3_compute_coupling_strategy(AC3EncodeContext *s)
         s->cpl_on = 0;
 
     /* set bandwidth for each channel */
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         for (ch = 1; ch <= s->fbw_channels; ch++) {
             if (block->channel_in_cpl[ch])
@@ -269,7 +269,7 @@ void ff_ac3_apply_rematrixing(AC3EncodeContext *s)
     if (!s->rematrixing_enabled)
         return;
 
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         if (block->new_rematrixing_strategy)
             flags = block->rematrixing_flags;
@@ -318,7 +318,7 @@ static av_cold void exponent_init(AC3EncodeContext *s)
 static void extract_exponents(AC3EncodeContext *s)
 {
     int ch        = !s->cpl_on;
-    int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS * (s->channels - ch + 1);
+    int chan_size = AC3_MAX_COEFS * s->num_blocks * (s->channels - ch + 1);
     AC3Block *block = &s->blocks[0];
 
     s->ac3dsp.extract_exponents(block->exp[ch], block->fixed_coef[ch], chan_size);
@@ -331,6 +331,15 @@ static void extract_exponents(AC3EncodeContext *s)
  */
 #define EXP_DIFF_THRESHOLD 500
 
+/**
+ * Table used to select exponent strategy based on exponent reuse block interval.
+ */
+static const uint8_t exp_strategy_reuse_tab[4][6] = {
+    { EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15 },
+    { EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15 },
+    { EXP_D25, EXP_D25, EXP_D15, EXP_D15, EXP_D15, EXP_D15 },
+    { EXP_D45, EXP_D25, EXP_D25, EXP_D15, EXP_D15, EXP_D15 }
+};
 
 /**
  * Calculate exponent strategies for all channels.
@@ -349,7 +358,7 @@ static void compute_exp_strategy(AC3EncodeContext *s)
            reused in the next frame */
         exp_strategy[0] = EXP_NEW;
         exp += AC3_MAX_COEFS;
-        for (blk = 1; blk < AC3_MAX_BLOCKS; blk++, exp += AC3_MAX_COEFS) {
+        for (blk = 1; blk < s->num_blocks; blk++, exp += AC3_MAX_COEFS) {
             if (ch == CPL_CH) {
                 if (!s->blocks[blk-1].cpl_in_use) {
                     exp_strategy[blk] = EXP_NEW;
@@ -373,23 +382,18 @@ static void compute_exp_strategy(AC3EncodeContext *s)
         /* now select the encoding strategy type : if exponents are often
            recoded, we use a coarse encoding */
         blk = 0;
-        while (blk < AC3_MAX_BLOCKS) {
+        while (blk < s->num_blocks) {
             blk1 = blk + 1;
-            while (blk1 < AC3_MAX_BLOCKS && exp_strategy[blk1] == EXP_REUSE)
+            while (blk1 < s->num_blocks && exp_strategy[blk1] == EXP_REUSE)
                 blk1++;
-            switch (blk1 - blk) {
-            case 1:  exp_strategy[blk] = EXP_D45; break;
-            case 2:
-            case 3:  exp_strategy[blk] = EXP_D25; break;
-            default: exp_strategy[blk] = EXP_D15; break;
-            }
+            exp_strategy[blk] = exp_strategy_reuse_tab[s->num_blks_code][blk1-blk-1];
             blk = blk1;
         }
     }
     if (s->lfe_on) {
         ch = s->lfe_channel;
         s->exp_strategy[ch][0] = EXP_D15;
-        for (blk = 1; blk < AC3_MAX_BLOCKS; blk++)
+        for (blk = 1; blk < s->num_blocks; blk++)
             s->exp_strategy[ch][blk] = EXP_REUSE;
     }
 
@@ -487,7 +491,7 @@ static void encode_exponents(AC3EncodeContext *s)
 
         cpl = (ch == CPL_CH);
         blk = 0;
-        while (blk < AC3_MAX_BLOCKS) {
+        while (blk < s->num_blocks) {
             AC3Block *block = &s->blocks[blk];
             if (cpl && !block->cpl_in_use) {
                 exp += AC3_MAX_COEFS;
@@ -500,7 +504,7 @@ static void encode_exponents(AC3EncodeContext *s)
             /* count the number of EXP_REUSE blocks after the current block
                and set exponent reference block numbers */
             s->exp_ref_block[ch][blk] = blk;
-            while (blk1 < AC3_MAX_BLOCKS && exp_strategy[blk1] == EXP_REUSE) {
+            while (blk1 < s->num_blocks && exp_strategy[blk1] == EXP_REUSE) {
                 s->exp_ref_block[ch][blk1] = blk;
                 blk1++;
             }
@@ -536,7 +540,7 @@ static void group_exponents(AC3EncodeContext *s)
     int exp0, exp1;
 
     bit_count = 0;
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
             int exp_strategy = s->exp_strategy[ch][blk];
@@ -625,30 +629,38 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
     if (s->eac3) {
         /* bitstream info header */
         frame_bits += 35;
-        frame_bits += 1 + 1 + 1;
+        frame_bits += 1 + 1;
+        if (s->num_blocks != 0x6)
+            frame_bits++;
+        frame_bits++;
         /* audio frame header */
-        frame_bits += 2;
+        if (s->num_blocks == 6)
+            frame_bits += 2;
         frame_bits += 10;
         /* exponent strategy */
         if (s->use_frame_exp_strategy)
             frame_bits += 5 * s->fbw_channels;
         else
-            frame_bits += AC3_MAX_BLOCKS * 2 * s->fbw_channels;
+            frame_bits += s->num_blocks * 2 * s->fbw_channels;
         if (s->lfe_on)
-            frame_bits += AC3_MAX_BLOCKS;
+            frame_bits += s->num_blocks;
         /* converter exponent strategy */
-        frame_bits += s->fbw_channels * 5;
+        if (s->num_blks_code != 0x3)
+            frame_bits++;
+        else
+            frame_bits += s->fbw_channels * 5;
         /* snr offsets */
         frame_bits += 10;
         /* block start info */
-        frame_bits++;
+        if (s->num_blocks != 1)
+            frame_bits++;
     } else {
         frame_bits += 49;
         frame_bits += frame_bits_inc[s->channel_mode];
     }
 
     /* audio blocks */
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         if (!s->eac3) {
             /* block switch flags */
             frame_bits += s->fbw_channels;
@@ -750,7 +762,7 @@ static void count_frame_bits(AC3EncodeContext *s)
         /* coupling */
         if (s->channel_mode > AC3_CHMODE_MONO) {
             frame_bits++;
-            for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
+            for (blk = 1; blk < s->num_blocks; blk++) {
                 AC3Block *block = &s->blocks[blk];
                 frame_bits++;
                 if (block->new_cpl_strategy)
@@ -762,7 +774,7 @@ static void count_frame_bits(AC3EncodeContext *s)
             if (s->use_frame_exp_strategy) {
                 frame_bits += 5 * s->cpl_on;
             } else {
-                for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+                for (blk = 0; blk < s->num_blocks; blk++)
                     frame_bits += 2 * s->blocks[blk].cpl_in_use;
             }
         }
@@ -778,7 +790,7 @@ static void count_frame_bits(AC3EncodeContext *s)
     }
 
     /* audio blocks */
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
 
         /* coupling strategy */
@@ -865,7 +877,7 @@ static void bit_alloc_masking(AC3EncodeContext *s)
 {
     int blk, ch;
 
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
             /* We only need psd and mask for calculating bap.
@@ -901,9 +913,9 @@ static void reset_block_bap(AC3EncodeContext *s)
 
     ref_bap = s->bap_buffer;
     for (ch = 0; ch <= s->channels; ch++) {
-        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+        for (blk = 0; blk < s->num_blocks; blk++)
             s->ref_bap[ch][blk] = ref_bap + AC3_MAX_COEFS * s->exp_ref_block[ch][blk];
-        ref_bap += AC3_MAX_COEFS * AC3_MAX_BLOCKS;
+        ref_bap += AC3_MAX_COEFS * s->num_blocks;
     }
     s->ref_bap_set = 1;
 }
@@ -936,7 +948,7 @@ static void count_mantissa_bits_update_ch(AC3EncodeContext *s, int ch,
 {
     int blk;
 
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         if (ch == CPL_CH && !block->cpl_in_use)
             continue;
@@ -980,7 +992,7 @@ static int bit_alloc(AC3EncodeContext *s, int snr_offset)
     snr_offset = (snr_offset - 240) << 2;
 
     reset_block_bap(s);
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
 
         for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
@@ -1194,7 +1206,7 @@ void ff_ac3_quantize_mantissas(AC3EncodeContext *s)
 {
     int blk, ch, ch0=0, got_cpl;
 
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         AC3Mant m = { 0 };
 
@@ -1557,7 +1569,7 @@ void ff_ac3_output_frame(AC3EncodeContext *s, unsigned char *frame)
 
     s->output_frame_header(s);
 
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+    for (blk = 0; blk < s->num_blocks; blk++)
         output_audio_block(s, blk);
 
     output_frame_end(s);
@@ -1585,6 +1597,7 @@ static void dprint_options(AC3EncodeContext *s)
     av_dlog(avctx, "channel_layout: %s\n", strbuf);
     av_dlog(avctx, "sample_rate: %d\n", s->sample_rate);
     av_dlog(avctx, "bit_rate: %d\n", s->bit_rate);
+    av_dlog(avctx, "blocks/frame: %d (code=%d)\n", s->num_blocks, s->num_blks_code);
     if (s->cutoff)
         av_dlog(avctx, "cutoff: %d\n", s->cutoff);
 
@@ -1851,7 +1864,7 @@ av_cold int ff_ac3_encode_close(AVCodecContext *avctx)
     av_freep(&s->qmant_buffer);
     av_freep(&s->cpl_coord_exp_buffer);
     av_freep(&s->cpl_coord_mant_buffer);
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         av_freep(&block->mdct_coef);
         av_freep(&block->fixed_coef);
@@ -1958,18 +1971,30 @@ static av_cold int validate_options(AC3EncodeContext *s)
     /* validate bit rate */
     if (s->eac3) {
         int max_br, min_br, wpf, min_br_dist, min_br_code;
+        int num_blks_code, num_blocks, frame_samples;
 
         /* calculate min/max bitrate */
-        max_br = 2048 * s->sample_rate / AC3_FRAME_SIZE * 16;
-        min_br = ((s->sample_rate + (AC3_FRAME_SIZE-1)) / AC3_FRAME_SIZE) * 16;
+        /* TODO: More testing with 3 and 2 blocks. All E-AC-3 samples I've
+                 found use either 6 blocks or 1 block, even though 2 or 3 blocks
+                 would work as far as the bit rate is concerned. */
+        for (num_blks_code = 3; num_blks_code >= 0; num_blks_code--) {
+            num_blocks = ((int[]){ 1, 2, 3, 6 })[num_blks_code];
+            frame_samples  = AC3_BLOCK_SIZE * num_blocks;
+            max_br = 2048 * s->sample_rate / frame_samples * 16;
+            min_br = ((s->sample_rate + (frame_samples-1)) / frame_samples) * 16;
+            if (avctx->bit_rate <= max_br)
+                break;
+        }
         if (avctx->bit_rate < min_br || avctx->bit_rate > max_br) {
             av_log(avctx, AV_LOG_ERROR, "invalid bit rate. must be %d to %d "
                    "for this sample rate\n", min_br, max_br);
             return AVERROR(EINVAL);
         }
+        s->num_blks_code = num_blks_code;
+        s->num_blocks    = num_blocks;
 
         /* calculate words-per-frame for the selected bitrate */
-        wpf = (avctx->bit_rate / 16) * AC3_FRAME_SIZE / s->sample_rate;
+        wpf = (avctx->bit_rate / 16) * frame_samples / s->sample_rate;
         av_assert1(wpf > 0 && wpf <= 2048);
 
         /* find the closest AC-3 bitrate code to the selected bitrate.
@@ -2001,6 +2026,8 @@ static av_cold int validate_options(AC3EncodeContext *s)
         }
         s->frame_size_code = i << 1;
         s->frame_size_min  = 2 * ff_ac3_frame_size_tab[s->frame_size_code][s->bit_alloc.sr_code];
+        s->num_blks_code   = 0x3;
+        s->num_blocks      = 6;
     }
     s->bit_rate   = avctx->bit_rate;
     s->frame_size = s->frame_size_min;
@@ -2065,13 +2092,13 @@ static av_cold void set_bandwidth(AC3EncodeContext *s)
     /* set number of coefficients for each channel */
     for (ch = 1; ch <= s->fbw_channels; ch++) {
         s->start_freq[ch] = 0;
-        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+        for (blk = 0; blk < s->num_blocks; blk++)
             s->blocks[blk].end_freq[ch] = s->bandwidth_code * 3 + 73;
     }
     /* LFE channel always has 7 coefs */
     if (s->lfe_on) {
         s->start_freq[s->lfe_channel] = 0;
-        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+        for (blk = 0; blk < s->num_blocks; blk++)
             s->blocks[blk].end_freq[ch] = 7;
     }
 
@@ -2108,7 +2135,7 @@ static av_cold void set_bandwidth(AC3EncodeContext *s)
 
         s->start_freq[CPL_CH] = cpl_start_band * 12 + 37;
         s->cpl_end_freq       = cpl_end_band   * 12 + 37;
-        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+        for (blk = 0; blk < s->num_blocks; blk++)
             s->blocks[blk].end_freq[CPL_CH] = s->cpl_end_freq;
     }
 }
@@ -2119,35 +2146,37 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
     AVCodecContext *avctx = s->avctx;
     int blk, ch;
     int channels = s->channels + 1; /* includes coupling channel */
+    int channel_blocks = channels * s->num_blocks;
+    int total_coefs    = AC3_MAX_COEFS * channel_blocks;
 
     if (s->allocate_sample_buffers(s))
         goto alloc_fail;
 
-    FF_ALLOC_OR_GOTO(avctx, s->bap_buffer,  AC3_MAX_BLOCKS * channels *
-                     AC3_MAX_COEFS * sizeof(*s->bap_buffer),  alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * channels *
-                     AC3_MAX_COEFS * sizeof(*s->bap1_buffer), alloc_fail);
-    FF_ALLOCZ_OR_GOTO(avctx, s->mdct_coef_buffer, AC3_MAX_BLOCKS * channels *
-                      AC3_MAX_COEFS * sizeof(*s->mdct_coef_buffer), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, AC3_MAX_BLOCKS * channels *
-                     AC3_MAX_COEFS * sizeof(*s->exp_buffer), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, AC3_MAX_BLOCKS * channels *
-                     128 * sizeof(*s->grouped_exp_buffer), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, AC3_MAX_BLOCKS * channels *
-                     AC3_MAX_COEFS * sizeof(*s->psd_buffer), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, AC3_MAX_BLOCKS * channels *
-                     64 * sizeof(*s->band_psd_buffer), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, AC3_MAX_BLOCKS * channels *
-                     64 * sizeof(*s->mask_buffer), alloc_fail);
-    FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, AC3_MAX_BLOCKS * channels *
-                     AC3_MAX_COEFS * sizeof(*s->qmant_buffer), alloc_fail);
+    FF_ALLOC_OR_GOTO(avctx, s->bap_buffer, total_coefs *
+                     sizeof(*s->bap_buffer), alloc_fail);
+    FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, total_coefs *
+                     sizeof(*s->bap1_buffer), alloc_fail);
+    FF_ALLOCZ_OR_GOTO(avctx, s->mdct_coef_buffer, total_coefs *
+                      sizeof(*s->mdct_coef_buffer), alloc_fail);
+    FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, total_coefs *
+                     sizeof(*s->exp_buffer), alloc_fail);
+    FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, channel_blocks * 128 *
+                     sizeof(*s->grouped_exp_buffer), alloc_fail);
+    FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, total_coefs *
+                     sizeof(*s->psd_buffer), alloc_fail);
+    FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, channel_blocks * 64 *
+                     sizeof(*s->band_psd_buffer), alloc_fail);
+    FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, channel_blocks * 64 *
+                     sizeof(*s->mask_buffer), alloc_fail);
+    FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, total_coefs *
+                     sizeof(*s->qmant_buffer), alloc_fail);
     if (s->cpl_enabled) {
-        FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_exp_buffer, AC3_MAX_BLOCKS * channels *
-                         16 * sizeof(*s->cpl_coord_exp_buffer), alloc_fail);
-        FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_mant_buffer, AC3_MAX_BLOCKS * channels *
-                         16 * sizeof(*s->cpl_coord_mant_buffer), alloc_fail);
+        FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_exp_buffer, channel_blocks * 16 *
+                         sizeof(*s->cpl_coord_exp_buffer), alloc_fail);
+        FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_mant_buffer, channel_blocks * 16 *
+                         sizeof(*s->cpl_coord_mant_buffer), alloc_fail);
     }
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         FF_ALLOCZ_OR_GOTO(avctx, block->mdct_coef, channels * sizeof(*block->mdct_coef),
                           alloc_fail);
@@ -2183,23 +2212,23 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
             }
 
             /* arrangement: channel, block, coeff */
-            block->exp[ch]         = &s->exp_buffer        [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
-            block->mdct_coef[ch]   = &s->mdct_coef_buffer  [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
+            block->exp[ch]         = &s->exp_buffer        [AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
+            block->mdct_coef[ch]   = &s->mdct_coef_buffer  [AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
         }
     }
 
     if (!s->fixed_point) {
-        FF_ALLOCZ_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * channels *
-                          AC3_MAX_COEFS * sizeof(*s->fixed_coef_buffer), alloc_fail);
-        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        FF_ALLOCZ_OR_GOTO(avctx, s->fixed_coef_buffer, total_coefs *
+                          sizeof(*s->fixed_coef_buffer), alloc_fail);
+        for (blk = 0; blk < s->num_blocks; blk++) {
             AC3Block *block = &s->blocks[blk];
             FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels *
                               sizeof(*block->fixed_coef), alloc_fail);
             for (ch = 0; ch < channels; ch++)
-                block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
+                block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
         }
     } else {
-        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        for (blk = 0; blk < s->num_blocks; blk++) {
             AC3Block *block = &s->blocks[blk];
             FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels *
                               sizeof(*block->fixed_coef), alloc_fail);
@@ -2226,14 +2255,14 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
 
     s->eac3 = avctx->codec_id == CODEC_ID_EAC3;
 
-    avctx->frame_size = AC3_FRAME_SIZE;
-
     ff_ac3_common_init();
 
     ret = validate_options(s);
     if (ret)
         return ret;
 
+    avctx->frame_size = AC3_BLOCK_SIZE * s->num_blocks;
+
     s->bitstream_mode = avctx->audio_service_type;
     if (s->bitstream_mode == AV_AUDIO_SERVICE_TYPE_KARAOKE)
         s->bitstream_mode = 0x7;
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index a4a8fd4325..407e751b40 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -151,6 +151,8 @@ typedef struct AC3EncodeContext {
     int bit_rate;                           ///< target bit rate, in bits-per-second
     int sample_rate;                        ///< sampling frequency, in Hz
 
+    int num_blks_code;                      ///< number of blocks code                  (numblkscod)
+    int num_blocks;                         ///< number of blocks per frame
     int frame_size_min;                     ///< minimum frame size in case rounding is necessary
     int frame_size;                         ///< current frame size in bytes
     int frame_size_code;                    ///< frame size code                        (frmsizecod)
diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c
index ed8992ebff..508ecb1efb 100644
--- a/libavcodec/ac3enc_fixed.c
+++ b/libavcodec/ac3enc_fixed.c
@@ -93,7 +93,7 @@ static void scale_coefficients(AC3EncodeContext *s)
 {
     int blk, ch;
 
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         for (ch = 1; ch <= s->channels; ch++) {
             s->ac3dsp.ac3_rshift_int32(block->mdct_coef[ch], AC3_MAX_COEFS,
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c
index 776d185fa4..9f691a1e16 100644
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -103,7 +103,7 @@ static int normalize_samples(AC3EncodeContext *s)
  */
 static void scale_coefficients(AC3EncodeContext *s)
 {
-    int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS;
+    int chan_size = AC3_MAX_COEFS * s->num_blocks;
     s->ac3dsp.float_to_fixed24(s->fixed_coef_buffer + chan_size,
                                s->mdct_coef_buffer  + chan_size,
                                chan_size * s->channels);
diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
index 9b9151b3e0..c4e2a121bf 100644
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -79,13 +79,13 @@ static void deinterleave_input_samples(AC3EncodeContext *s,
         int sinc;
 
         /* copy last 256 samples of previous frame to the start of the current frame */
-        memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][AC3_FRAME_SIZE],
+        memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][AC3_BLOCK_SIZE * s->num_blocks],
                AC3_BLOCK_SIZE * sizeof(s->planar_samples[0][0]));
 
         /* deinterleave */
         sinc = s->channels;
         sptr = samples + s->channel_map[ch];
-        for (i = AC3_BLOCK_SIZE; i < AC3_FRAME_SIZE+AC3_BLOCK_SIZE; i++) {
+        for (i = AC3_BLOCK_SIZE; i < AC3_BLOCK_SIZE * (s->num_blocks + 1); i++) {
             s->planar_samples[ch][i] = *sptr;
             sptr += sinc;
         }
@@ -103,7 +103,7 @@ static void apply_mdct(AC3EncodeContext *s)
     int blk, ch;
 
     for (ch = 0; ch < s->channels; ch++) {
-        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        for (blk = 0; blk < s->num_blocks; blk++) {
             AC3Block *block = &s->blocks[blk];
             const SampleType *input_samples = &s->planar_samples[ch][blk * AC3_BLOCK_SIZE];
 
@@ -159,7 +159,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
     cpl_start     = FFMIN(256, cpl_start + num_cpl_coefs) - num_cpl_coefs;
 
     /* calculate coupling channel from fbw channels */
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         CoefType *cpl_coef = &block->mdct_coef[CPL_CH][cpl_start];
         if (!block->cpl_in_use)
@@ -188,7 +188,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
     while (i < s->cpl_end_freq) {
         int band_size = s->cpl_band_sizes[bnd];
         for (ch = CPL_CH; ch <= s->fbw_channels; ch++) {
-            for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+            for (blk = 0; blk < s->num_blocks; blk++) {
                 AC3Block *block = &s->blocks[blk];
                 if (!block->cpl_in_use || (ch > CPL_CH && !block->channel_in_cpl[ch]))
                     continue;
@@ -203,7 +203,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
     }
 
     /* determine which blocks to send new coupling coordinates for */
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block  = &s->blocks[blk];
         AC3Block *block0 = blk ? &s->blocks[blk-1] : NULL;
         int new_coords = 0;
@@ -261,7 +261,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
        coordinates in successive blocks */
     for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
         blk = 0;
-        while (blk < AC3_MAX_BLOCKS) {
+        while (blk < s->num_blocks) {
             int blk1;
             CoefSumType energy_cpl;
             AC3Block *block  = &s->blocks[blk];
@@ -273,7 +273,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
 
             energy_cpl = energy[blk][CPL_CH][bnd];
             blk1 = blk+1;
-            while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
+            while (!s->blocks[blk1].new_cpl_coords && blk1 < s->num_blocks) {
                 if (s->blocks[blk1].cpl_in_use)
                     energy_cpl += energy[blk1][CPL_CH][bnd];
                 blk1++;
@@ -285,7 +285,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
                     continue;
                 energy_ch = energy[blk][ch][bnd];
                 blk1 = blk+1;
-                while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
+                while (!s->blocks[blk1].new_cpl_coords && blk1 < s->num_blocks) {
                     if (s->blocks[blk1].cpl_in_use)
                         energy_ch += energy[blk1][ch][bnd];
                     blk1++;
@@ -297,7 +297,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
     }
 
     /* calculate exponents/mantissas for coupling coordinates */
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         if (!block->cpl_in_use || !block->new_cpl_coords)
             continue;
@@ -362,7 +362,7 @@ static void compute_rematrixing_strategy(AC3EncodeContext *s)
     if (s->channel_mode != AC3_CHMODE_STEREO)
         return;
 
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         block = &s->blocks[blk];
         block->new_rematrixing_strategy = !blk;
 
@@ -440,7 +440,7 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame,
         scale_coefficients(s);
 
     clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1],
-                      AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels);
+                      AC3_MAX_COEFS * s->num_blocks * s->channels);
 
     s->cpl_on = s->cpl_enabled;
     ff_ac3_compute_coupling_strategy(s);
diff --git a/libavcodec/eac3enc.c b/libavcodec/eac3enc.c
index 09fa80fcb9..038aa2a234 100644
--- a/libavcodec/eac3enc.c
+++ b/libavcodec/eac3enc.c
@@ -63,6 +63,11 @@ void ff_eac3_get_frame_exp_strategy(AC3EncodeContext *s)
 {
     int ch;
 
+    if (s->num_blocks < 6) {
+        s->use_frame_exp_strategy = 0;
+        return;
+    }
+
     s->use_frame_exp_strategy = 1;
     for (ch = !s->cpl_on; ch <= s->fbw_channels; ch++) {
         int expstr = eac3_frame_expstr_index_tab[s->exp_strategy[ch][0]-1]
@@ -89,7 +94,7 @@ void ff_eac3_set_cpl_states(AC3EncodeContext *s)
     /* set first cpl coords */
     for (ch = 1; ch <= s->fbw_channels; ch++)
         first_cpl_coords[ch] = 1;
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         for (ch = 1; ch <= s->fbw_channels; ch++) {
             if (block->channel_in_cpl[ch]) {
@@ -104,7 +109,7 @@ void ff_eac3_set_cpl_states(AC3EncodeContext *s)
     }
 
     /* set first cpl leak */
-    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+    for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
         if (block->cpl_in_use) {
             block->new_cpl_leak = 2;
@@ -130,7 +135,7 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s)
         put_bits(&s->pb, 2, s->bit_alloc.sr_code);  /* sample rate code */
     } else {
         put_bits(&s->pb, 2, s->bit_alloc.sr_code);  /* sample rate code */
-        put_bits(&s->pb, 2, 0x3);                   /* number of blocks = 6 */
+        put_bits(&s->pb, 2, s->num_blks_code);      /* number of blocks */
     }
     put_bits(&s->pb, 3, s->channel_mode);           /* audio coding mode */
     put_bits(&s->pb, 1, s->lfe_on);                 /* LFE channel indicator */
@@ -141,11 +146,15 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s)
     /* TODO: mixing metadata */
     put_bits(&s->pb, 1, 0);                         /* no info metadata */
     /* TODO: info metadata */
+    if (s->num_blocks != 6)
+        put_bits(&s->pb, 1, !(s->avctx->frame_number % 6)); /* converter sync flag */
     put_bits(&s->pb, 1, 0);                         /* no additional bit stream info */
 
     /* frame header */
+    if (s->num_blocks == 6) {
     put_bits(&s->pb, 1, !s->use_frame_exp_strategy);/* exponent strategy syntax */
     put_bits(&s->pb, 1, 0);                         /* aht enabled = no */
+    }
     put_bits(&s->pb, 2, 0);                         /* snr offset strategy = 1 */
     put_bits(&s->pb, 1, 0);                         /* transient pre-noise processing enabled = no */
     put_bits(&s->pb, 1, 0);                         /* block switch syntax enabled = no */
@@ -158,7 +167,7 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s)
     /* coupling strategy use flags */
     if (s->channel_mode > AC3_CHMODE_MONO) {
         put_bits(&s->pb, 1, s->blocks[0].cpl_in_use);
-        for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
+        for (blk = 1; blk < s->num_blocks; blk++) {
             AC3Block *block = &s->blocks[blk];
             put_bits(&s->pb, 1, block->new_cpl_strategy);
             if (block->new_cpl_strategy)
@@ -170,26 +179,31 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s)
         for (ch = !s->cpl_on; ch <= s->fbw_channels; ch++)
             put_bits(&s->pb, 5, s->frame_exp_strategy[ch]);
     } else {
-        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+        for (blk = 0; blk < s->num_blocks; blk++)
             for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++)
                 put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
     }
     if (s->lfe_on) {
-        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+        for (blk = 0; blk < s->num_blocks; blk++)
             put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
     }
-    /* E-AC-3 to AC-3 converter exponent strategy (unfortunately not optional...) */
+    /* E-AC-3 to AC-3 converter exponent strategy (not optional when num blocks == 6) */
+    if (s->num_blocks != 6) {
+        put_bits(&s->pb, 1, 0);
+    } else {
     for (ch = 1; ch <= s->fbw_channels; ch++) {
         if (s->use_frame_exp_strategy)
             put_bits(&s->pb, 5, s->frame_exp_strategy[ch]);
         else
             put_bits(&s->pb, 5, 0);
     }
+    }
     /* snr offsets */
     put_bits(&s->pb, 6, s->coarse_snr_offset);
     put_bits(&s->pb, 4, s->fine_snr_offset[1]);
     /* block start info */
-    put_bits(&s->pb, 1, 0);
+    if (s->num_blocks > 1)
+        put_bits(&s->pb, 1, 0);
 }
 
 

From 4cc843facdc8cd922c423a09c5fff911137e4b4c Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 17 Jul 2011 13:27:46 +0200
Subject: [PATCH 13/14] rtp: remove disabled code

---
 libavformat/rtpdec.c   | 16 ----------------
 libavformat/rtpproto.c | 23 -----------------------
 2 files changed, 39 deletions(-)

diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c
index 2c262d9bd3..37122600a5 100644
--- a/libavformat/rtpdec.c
+++ b/libavformat/rtpdec.c
@@ -218,22 +218,6 @@ static int rtp_valid_packet_in_sequence(RTPStatistics *s, uint16_t seq)
     return 1;
 }
 
-#if 0
-/**
-* This function is currently unused; without a valid local ntp time, I don't see how we could calculate the
-* difference between the arrival and sent timestamp.  As a result, the jitter and transit statistics values
-* never change.  I left this in in case someone else can see a way. (rdm)
-*/
-static void rtcp_update_jitter(RTPStatistics *s, uint32_t sent_timestamp, uint32_t arrival_timestamp)
-{
-    uint32_t transit= arrival_timestamp - sent_timestamp;
-    int d;
-    s->transit= transit;
-    d= FFABS(transit - s->transit);
-    s->jitter += d - ((s->jitter + 8)>>4);
-}
-#endif
-
 int rtp_check_and_send_back_rr(RTPDemuxContext *s, int count)
 {
     AVIOContext *pb;
diff --git a/libavformat/rtpproto.c b/libavformat/rtpproto.c
index dba1d35b03..0367198f36 100644
--- a/libavformat/rtpproto.c
+++ b/libavformat/rtpproto.c
@@ -225,20 +225,6 @@ static int rtp_read(URLContext *h, uint8_t *buf, int size)
     int len, n;
     struct pollfd p[2] = {{s->rtp_fd, POLLIN, 0}, {s->rtcp_fd, POLLIN, 0}};
 
-#if 0
-    for(;;) {
-        from_len = sizeof(from);
-        len = recvfrom (s->rtp_fd, buf, size, 0,
-                        (struct sockaddr *)&from, &from_len);
-        if (len < 0) {
-            if (ff_neterrno() == AVERROR(EAGAIN) ||
-                ff_neterrno() == AVERROR(EINTR))
-                continue;
-            return AVERROR(EIO);
-        }
-        break;
-    }
-#else
     for(;;) {
         if (url_interrupt_cb())
             return AVERROR_EXIT;
@@ -277,7 +263,6 @@ static int rtp_read(URLContext *h, uint8_t *buf, int size)
             return AVERROR(EIO);
         }
     }
-#endif
     return len;
 }
 
@@ -296,14 +281,6 @@ static int rtp_write(URLContext *h, const uint8_t *buf, int size)
     }
 
     ret = ffurl_write(hd, buf, size);
-#if 0
-    {
-        struct timespec ts;
-        ts.tv_sec = 0;
-        ts.tv_nsec = 10 * 1000000;
-        nanosleep(&ts, NULL);
-    }
-#endif
     return ret;
 }
 

From bb32fded3623a20ff8999c2924315841c08c985c Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 21 Jul 2011 19:04:37 +0100
Subject: [PATCH 14/14] dnxhddec: optimise dnxhd_decode_dct_block()

Template the function for 8/10-bit and use lowlevel bitstream
macros.

6% faster overall on i7 gcc 4.5.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/dnxhddec.c | 108 +++++++++++++++++++++++++++---------------
 1 file changed, 69 insertions(+), 39 deletions(-)

diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c
index 7b32cd3ed4..426be2e985 100644
--- a/libavcodec/dnxhddec.c
+++ b/libavcodec/dnxhddec.c
@@ -31,7 +31,7 @@
 #include "dnxhddata.h"
 #include "dsputil.h"
 
-typedef struct {
+typedef struct DNXHDContext {
     AVCodecContext *avctx;
     AVFrame picture;
     GetBitContext gb;
@@ -47,11 +47,16 @@ typedef struct {
     ScanTable scantable;
     const CIDEntry *cid_table;
     int bit_depth; // 8, 10 or 0 if not initialized at all.
+    void (*decode_dct_block)(struct DNXHDContext *ctx, DCTELEM *block,
+                             int n, int qscale);
 } DNXHDContext;
 
 #define DNXHD_VLC_BITS 9
 #define DNXHD_DC_VLC_BITS 7
 
+static void dnxhd_decode_dct_block_8(DNXHDContext *ctx, DCTELEM *block, int n, int qscale);
+static void dnxhd_decode_dct_block_10(DNXHDContext *ctx, DCTELEM *block, int n, int qscale);
+
 static av_cold int dnxhd_decode_init(AVCodecContext *avctx)
 {
     DNXHDContext *ctx = avctx->priv_data;
@@ -118,6 +123,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, const uint8_t *buf, int buf_si
         if (ctx->bit_depth != 10) {
             dsputil_init(&ctx->dsp, ctx->avctx);
             ctx->bit_depth = 10;
+            ctx->decode_dct_block = dnxhd_decode_dct_block_10;
         }
     } else {
         ctx->avctx->pix_fmt = PIX_FMT_YUV422P;
@@ -125,6 +131,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, const uint8_t *buf, int buf_si
         if (ctx->bit_depth != 8) {
             dsputil_init(&ctx->dsp, ctx->avctx);
             ctx->bit_depth = 8;
+            ctx->decode_dct_block = dnxhd_decode_dct_block_8;
         }
     }
 
@@ -165,71 +172,94 @@ static int dnxhd_decode_header(DNXHDContext *ctx, const uint8_t *buf, int buf_si
     return 0;
 }
 
-static int dnxhd_decode_dc(DNXHDContext *ctx)
+static av_always_inline void dnxhd_decode_dct_block(DNXHDContext *ctx,
+                                                    DCTELEM *block, int n,
+                                                    int qscale,
+                                                    int index_bits,
+                                                    int level_bias,
+                                                    int level_shift)
 {
-    int len;
-
-    len = get_vlc2(&ctx->gb, ctx->dc_vlc.table, DNXHD_DC_VLC_BITS, 1);
-    return len ? get_xbits(&ctx->gb, len) : 0;
-}
-
-static void dnxhd_decode_dct_block(DNXHDContext *ctx, DCTELEM *block, int n, int qscale)
-{
-    int i, j, index, index2;
+    int i, j, index1, index2, len;
     int level, component, sign;
-    const uint8_t *weigth_matrix;
+    const uint8_t *weight_matrix;
+    OPEN_READER(bs, &ctx->gb);
 
     if (n&2) {
         component = 1 + (n&1);
-        weigth_matrix = ctx->cid_table->chroma_weight;
+        weight_matrix = ctx->cid_table->chroma_weight;
     } else {
         component = 0;
-        weigth_matrix = ctx->cid_table->luma_weight;
+        weight_matrix = ctx->cid_table->luma_weight;
     }
 
-    ctx->last_dc[component] += dnxhd_decode_dc(ctx);
+    UPDATE_CACHE(bs, &ctx->gb);
+    GET_VLC(len, bs, &ctx->gb, ctx->dc_vlc.table, DNXHD_DC_VLC_BITS, 1);
+    if (len) {
+        level = GET_CACHE(bs, &ctx->gb);
+        LAST_SKIP_BITS(bs, &ctx->gb, len);
+        sign  = ~level >> 31;
+        level = (NEG_USR32(sign ^ level, len) ^ sign) - sign;
+        ctx->last_dc[component] += level;
+    }
     block[0] = ctx->last_dc[component];
     //av_log(ctx->avctx, AV_LOG_DEBUG, "dc %d\n", block[0]);
+
     for (i = 1; ; i++) {
-        index = get_vlc2(&ctx->gb, ctx->ac_vlc.table, DNXHD_VLC_BITS, 2);
-        //av_log(ctx->avctx, AV_LOG_DEBUG, "index %d\n", index);
-        level = ctx->cid_table->ac_level[index];
+        UPDATE_CACHE(bs, &ctx->gb);
+        GET_VLC(index1, bs, &ctx->gb, ctx->ac_vlc.table,
+                DNXHD_VLC_BITS, 2);
+        //av_log(ctx->avctx, AV_LOG_DEBUG, "index %d\n", index1);
+        level = ctx->cid_table->ac_level[index1];
         if (!level) { /* EOB */
             //av_log(ctx->avctx, AV_LOG_DEBUG, "EOB\n");
-            return;
-        }
-        sign = get_sbits(&ctx->gb, 1);
-
-        if (ctx->cid_table->ac_index_flag[index]) {
-            level += get_bits(&ctx->gb, ctx->cid_table->index_bits)<<6;
+            break;
         }
 
-        if (ctx->cid_table->ac_run_flag[index]) {
-            index2 = get_vlc2(&ctx->gb, ctx->run_vlc.table, DNXHD_VLC_BITS, 2);
+        sign = SHOW_SBITS(bs, &ctx->gb, 1);
+        SKIP_BITS(bs, &ctx->gb, 1);
+
+        if (ctx->cid_table->ac_index_flag[index1]) {
+            level += SHOW_UBITS(bs, &ctx->gb, index_bits) << 6;
+            SKIP_BITS(bs, &ctx->gb, index_bits);
+        }
+
+        if (ctx->cid_table->ac_run_flag[index1]) {
+            UPDATE_CACHE(bs, &ctx->gb);
+            GET_VLC(index2, bs, &ctx->gb, ctx->run_vlc.table,
+                    DNXHD_VLC_BITS, 2);
             i += ctx->cid_table->run[index2];
         }
 
         if (i > 63) {
             av_log(ctx->avctx, AV_LOG_ERROR, "ac tex damaged %d, %d\n", n, i);
-            return;
+            break;
         }
 
         j = ctx->scantable.permutated[i];
         //av_log(ctx->avctx, AV_LOG_DEBUG, "j %d\n", j);
-        //av_log(ctx->avctx, AV_LOG_DEBUG, "level %d, weigth %d\n", level, weigth_matrix[i]);
-        level = (2*level+1) * qscale * weigth_matrix[i];
-        if (ctx->bit_depth == 10) {
-            if (weigth_matrix[i] != 8)
-                level += 8;
-            level >>= 4;
-        } else {
-            if (weigth_matrix[i] != 32)
-                level += 32;
-            level >>= 6;
-        }
+        //av_log(ctx->avctx, AV_LOG_DEBUG, "level %d, weight %d\n", level, weight_matrix[i]);
+        level = (2*level+1) * qscale * weight_matrix[i];
+        if (weight_matrix[i] != level_bias)
+            level += level_bias;
+        level >>= level_shift;
+
         //av_log(NULL, AV_LOG_DEBUG, "i %d, j %d, end level %d\n", i, j, level);
         block[j] = (level^sign) - sign;
     }
+
+    CLOSE_READER(bs, &ctx->gb);
+}
+
+static void dnxhd_decode_dct_block_8(DNXHDContext *ctx, DCTELEM *block,
+                                     int n, int qscale)
+{
+    dnxhd_decode_dct_block(ctx, block, n, qscale, 4, 32, 6);
+}
+
+static void dnxhd_decode_dct_block_10(DNXHDContext *ctx, DCTELEM *block,
+                                      int n, int qscale)
+{
+    dnxhd_decode_dct_block(ctx, block, n, qscale, 6, 8, 4);
 }
 
 static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y)
@@ -247,7 +277,7 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y)
 
     for (i = 0; i < 8; i++) {
         ctx->dsp.clear_block(ctx->blocks[i]);
-        dnxhd_decode_dct_block(ctx, ctx->blocks[i], i, qscale);
+        ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale);
     }
 
     if (ctx->picture.interlaced_frame) {