diff --git a/configure b/configure
index 38a7fbb8ef..4c9a299256 100755
--- a/configure
+++ b/configure
@@ -612,31 +612,29 @@ check_deps(){
     done
 }
 
-print_config_h(){
-    enabled $1 && v=1 || v=0
-    echo "#define $2 $v"
-}
-
-print_config_mak(){
-    enabled $1 && v= || v=!
-    echo "$v$2=yes"
-}
-
-print_config_asm(){
-    enabled $1 && v=1 || v=0
-    echo "%define $2 $v"
-}
-
 print_config(){
     pfx=$1
     files=$2
     shift 2
-    for cfg; do
-        ucname="$(toupper $cfg)"
-        for f in $files; do
-            "print_config_${f##*.}" $cfg ${pfx}${ucname} >>$f
-        done
-    done
+    map 'eval echo "$v \${$v:-no}"' "$@" |
+    awk "BEGIN { split(\"$files\", files) }
+        {
+            c = \"$pfx\" toupper(\$1);
+            v = \$2;
+            sub(/yes/, 1, v);
+            sub(/no/,  0, v);
+            for (f in files) {
+                file = files[f];
+                if (file ~ /\\.h\$/) {
+                    printf(\"#define %s %d\\n\", c, v) >>file;
+                } else if (file ~ /\\.asm\$/) {
+                    printf(\"%%define %s %d\\n\", c, v) >>file;
+                } else if (file ~ /\\.mak\$/) {
+                    n = +v ? \"\" : \"!\";
+                    printf(\"%s%s=yes\\n\", n, c) >>file;
+                }
+            }
+        }"
 }
 
 print_enabled(){
diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c
index bd2ed0a5df..31714d73d5 100644
--- a/libavcodec/arm/h264pred_init_arm.c
+++ b/libavcodec/arm/h264pred_init_arm.c
@@ -23,25 +23,25 @@
 #include "libavutil/arm/cpu.h"
 #include "libavcodec/h264pred.h"
 
-void ff_pred16x16_vert_neon(uint8_t *src, int stride);
-void ff_pred16x16_hor_neon(uint8_t *src, int stride);
-void ff_pred16x16_plane_neon(uint8_t *src, int stride);
-void ff_pred16x16_dc_neon(uint8_t *src, int stride);
-void ff_pred16x16_128_dc_neon(uint8_t *src, int stride);
-void ff_pred16x16_left_dc_neon(uint8_t *src, int stride);
-void ff_pred16x16_top_dc_neon(uint8_t *src, int stride);
+void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_hor_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_plane_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_128_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_left_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_top_dc_neon(uint8_t *src, ptrdiff_t stride);
 
-void ff_pred8x8_vert_neon(uint8_t *src, int stride);
-void ff_pred8x8_hor_neon(uint8_t *src, int stride);
-void ff_pred8x8_plane_neon(uint8_t *src, int stride);
-void ff_pred8x8_dc_neon(uint8_t *src, int stride);
-void ff_pred8x8_128_dc_neon(uint8_t *src, int stride);
-void ff_pred8x8_left_dc_neon(uint8_t *src, int stride);
-void ff_pred8x8_top_dc_neon(uint8_t *src, int stride);
-void ff_pred8x8_l0t_dc_neon(uint8_t *src, int stride);
-void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride);
-void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride);
-void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride);
+void ff_pred8x8_vert_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_hor_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_plane_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_128_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_left_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_top_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_l0t_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride);
 
 static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)
 {
diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c
index 5619efd109..f83924da34 100644
--- a/libavcodec/h264pred.c
+++ b/libavcodec/h264pred.c
@@ -48,7 +48,9 @@
 #include "h264pred_template.c"
 #undef BIT_DEPTH
 
-static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright,
+                                   ptrdiff_t stride)
+{
     const unsigned lt = src[-1-1*stride];
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
@@ -63,7 +65,9 @@ static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int st
     AV_WN32A(src+3*stride, v);
 }
 
-static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright,
+                                     ptrdiff_t stride)
+{
     const unsigned lt = src[-1-1*stride];
     LOAD_LEFT_EDGE
 
@@ -73,7 +77,9 @@ static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, int
     AV_WN32A(src+3*stride, ((l2 + 2*l3 + l3 + 2) >> 2)*0x01010101);
 }
 
-static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright,
+                                     ptrdiff_t stride)
+{
     LOAD_TOP_EDGE
     LOAD_LEFT_EDGE
 
@@ -95,7 +101,9 @@ static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, int
     src[3+3*stride]=(l3 + t3)>>1;
 }
 
-static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright,
+                                     ptrdiff_t stride)
+{
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
     LOAD_LEFT_EDGE
@@ -119,7 +127,10 @@ static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright, int
     src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2;
 }
 
-static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_down_left_rv40_nodown_c(uint8_t *src,
+                                            const uint8_t *topright,
+                                            ptrdiff_t stride)
+{
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
     LOAD_LEFT_EDGE
@@ -142,8 +153,11 @@ static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, const uint8_t *toprigh
     src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2;
 }
 
-static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, int stride,
-                                       const int l0, const int l1, const int l2, const int l3, const int l4){
+static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright,
+                                       ptrdiff_t stride,
+                                       const int l0, const int l1, const int l2,
+                                       const int l3, const int l4)
+{
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
 
@@ -165,20 +179,27 @@ static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, in
     src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
 }
 
-static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright,
+                                         ptrdiff_t stride)
+{
     LOAD_LEFT_EDGE
     LOAD_DOWN_LEFT_EDGE
 
     pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4);
 }
 
-static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src,
+                                                const uint8_t *topright,
+                                                ptrdiff_t stride)
+{
     LOAD_LEFT_EDGE
 
     pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3);
 }
 
-static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright,
+                                        ptrdiff_t stride)
+{
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
 
@@ -200,7 +221,9 @@ static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, i
     src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2;
 }
 
-static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright,
+                                         ptrdiff_t stride)
+{
     LOAD_LEFT_EDGE
     LOAD_DOWN_LEFT_EDGE
     LOAD_TOP_EDGE
@@ -224,7 +247,10 @@ static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright,
     src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2;
 }
 
-static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src,
+                                                const uint8_t *topright,
+                                                ptrdiff_t stride)
+{
     LOAD_LEFT_EDGE
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
@@ -247,7 +273,9 @@ static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, const uint8_t *top
     src[3+3*stride]=l3;
 }
 
-static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright,
+                             ptrdiff_t stride)
+{
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
     uint8_t *top = src-stride;
     int y;
@@ -262,15 +290,18 @@ static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
     }
 }
 
-static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
+static void pred16x16_plane_svq3_c(uint8_t *src, ptrdiff_t stride)
+{
     pred16x16_plane_compat_8_c(src, stride, 1, 0);
 }
 
-static void pred16x16_plane_rv40_c(uint8_t *src, int stride){
+static void pred16x16_plane_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
     pred16x16_plane_compat_8_c(src, stride, 0, 1);
 }
 
-static void pred16x16_tm_vp8_c(uint8_t *src, int stride){
+static void pred16x16_tm_vp8_c(uint8_t *src, ptrdiff_t stride)
+{
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
     uint8_t *top = src-stride;
     int y;
@@ -297,7 +328,8 @@ static void pred16x16_tm_vp8_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){
+static void pred8x8_left_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
     int i;
     unsigned dc0;
 
@@ -312,7 +344,8 @@ static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){
+static void pred8x8_top_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
     int i;
     unsigned dc0;
 
@@ -327,7 +360,8 @@ static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_dc_rv40_c(uint8_t *src, int stride){
+static void pred8x8_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
     int i;
     unsigned dc0 = 0;
 
@@ -348,7 +382,8 @@ static void pred8x8_dc_rv40_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
+static void pred8x8_tm_vp8_c(uint8_t *src, ptrdiff_t stride)
+{
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
     uint8_t *top = src-stride;
     int y;
@@ -370,7 +405,9 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
 /**
  * Set the intra prediction function pointers.
  */
-void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc){
+void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth,
+                       const int chroma_format_idc)
+{
 //    MpegEncContext * const s = &h->s;
 
 #undef FUNC
diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h
index d68f39bf8c..33f39447df 100644
--- a/libavcodec/h264pred.h
+++ b/libavcodec/h264pred.h
@@ -90,21 +90,23 @@
  * Context for storing H.264 prediction functions
  */
 typedef struct H264PredContext {
-    void(*pred4x4[9 + 3 + 3])(uint8_t *src, const uint8_t *topright, int stride); //FIXME move to dsp?
-    void(*pred8x8l[9 + 3])(uint8_t *src, int topleft, int topright, int stride);
-    void(*pred8x8[4 + 3 + 4])(uint8_t *src, int stride);
-    void(*pred16x16[4 + 3 + 2])(uint8_t *src, int stride);
+    void(*pred4x4[9 + 3 + 3])(uint8_t *src, const uint8_t *topright,
+                              ptrdiff_t stride);
+    void(*pred8x8l[9 + 3])(uint8_t *src, int topleft, int topright,
+                           ptrdiff_t stride);
+    void(*pred8x8[4 + 3 + 4])(uint8_t *src, ptrdiff_t stride);
+    void(*pred16x16[4 + 3 + 2])(uint8_t *src, ptrdiff_t stride);
 
     void(*pred4x4_add[2])(uint8_t *pix /*align  4*/,
-                          const DCTELEM *block /*align 16*/, int stride);
+                          const DCTELEM *block /*align 16*/, ptrdiff_t stride);
     void(*pred8x8l_add[2])(uint8_t *pix /*align  8*/,
-                           const DCTELEM *block /*align 16*/, int stride);
+                           const DCTELEM *block /*align 16*/, ptrdiff_t stride);
     void(*pred8x8_add[3])(uint8_t *pix /*align  8*/,
                           const int *block_offset,
-                          const DCTELEM *block /*align 16*/, int stride);
+                          const DCTELEM *block /*align 16*/, ptrdiff_t stride);
     void(*pred16x16_add[3])(uint8_t *pix /*align 16*/,
                             const int *block_offset,
-                            const DCTELEM *block /*align 16*/, int stride);
+                            const DCTELEM *block /*align 16*/, ptrdiff_t stride);
 } H264PredContext;
 
 void ff_h264_pred_init(H264PredContext *h, int codec_id,
diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c
index 3a1b1cf94e..f08fdf4ba4 100644
--- a/libavcodec/h264pred_template.c
+++ b/libavcodec/h264pred_template.c
@@ -29,7 +29,9 @@
 
 #include "bit_depth_template.c"
 
-static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){
+static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright,
+                                    ptrdiff_t _stride)
+{
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
     const pixel4 a= AV_RN4PA(src-stride);
@@ -40,7 +42,9 @@ static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int
     AV_WN4PA(src+3*stride, a);
 }
 
-static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, int _stride){
+static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright,
+                                      ptrdiff_t _stride)
+{
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
     AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride]));
@@ -49,7 +53,9 @@ static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, in
     AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride]));
 }
 
-static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright,
+                              ptrdiff_t _stride)
+{
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
     const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
@@ -62,7 +68,9 @@ static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, int _strid
     AV_WN4PA(src+3*stride, a);
 }
 
-static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright,
+                                   ptrdiff_t _stride)
+{
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
     const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
@@ -74,7 +82,9 @@ static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, int _
     AV_WN4PA(src+3*stride, a);
 }
 
-static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright,
+                                  ptrdiff_t _stride)
+{
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
     const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
@@ -86,7 +96,9 @@ static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, int _s
     AV_WN4PA(src+3*stride, a);
 }
 
-static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright,
+                                  ptrdiff_t _stride)
+{
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
     const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
@@ -97,7 +109,9 @@ static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, int _s
     AV_WN4PA(src+3*stride, a);
 }
 
-static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright,
+                                  ptrdiff_t _stride)
+{
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
     const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1);
@@ -108,7 +122,9 @@ static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, int _s
     AV_WN4PA(src+3*stride, a);
 }
 
-static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
+static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright,
+                                  ptrdiff_t _stride)
+{
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
     const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1);
@@ -144,7 +160,9 @@ static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, int _s
     const unsigned av_unused t2 = src[ 2-1*stride];\
     const unsigned av_unused t3 = src[ 3-1*stride];\
 
-static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, int _stride){
+static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,
+                                      ptrdiff_t _stride)
+{
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
     const int lt= src[-1-1*stride];
@@ -169,7 +187,9 @@ static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, in
     src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
 }
 
-static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright, int _stride){
+static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright,
+                                     ptrdiff_t _stride)
+{
     pixel *src = (pixel*)_src;
     const pixel *topright = (const pixel*)_topright;
     int stride = _stride>>(sizeof(pixel)-1);
@@ -195,7 +215,10 @@ static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright, in
     src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
 }
 
-static void FUNCC(pred4x4_vertical_right)(uint8_t *_src, const uint8_t *topright, int _stride){
+static void FUNCC(pred4x4_vertical_right)(uint8_t *_src,
+                                          const uint8_t *topright,
+                                          ptrdiff_t _stride)
+{
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
     const int lt= src[-1-1*stride];
@@ -220,7 +243,10 @@ static void FUNCC(pred4x4_vertical_right)(uint8_t *_src, const uint8_t *topright
     src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
 }
 
-static void FUNCC(pred4x4_vertical_left)(uint8_t *_src, const uint8_t *_topright, int _stride){
+static void FUNCC(pred4x4_vertical_left)(uint8_t *_src,
+                                         const uint8_t *_topright,
+                                         ptrdiff_t _stride)
+{
     pixel *src = (pixel*)_src;
     const pixel *topright = (const pixel*)_topright;
     int stride = _stride>>(sizeof(pixel)-1);
@@ -245,7 +271,9 @@ static void FUNCC(pred4x4_vertical_left)(uint8_t *_src, const uint8_t *_topright
     src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
 }
 
-static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright, int _stride){
+static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,
+                                         ptrdiff_t _stride)
+{
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
     LOAD_LEFT_EDGE
@@ -268,7 +296,10 @@ static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,
     src[3+3*stride]=l3;
 }
 
-static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src, const uint8_t *topright, int _stride){
+static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src,
+                                           const uint8_t *topright,
+                                           ptrdiff_t _stride)
+{
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
     const int lt= src[-1-1*stride];
@@ -293,7 +324,8 @@ static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src, const uint8_t *toprigh
     src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
 }
 
-static void FUNCC(pred16x16_vertical)(uint8_t *_src, int _stride){
+static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
+{
     int i;
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
@@ -310,7 +342,8 @@ static void FUNCC(pred16x16_vertical)(uint8_t *_src, int _stride){
     }
 }
 
-static void FUNCC(pred16x16_horizontal)(uint8_t *_src, int stride){
+static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
+{
     int i;
     pixel *src = (pixel*)_src;
     stride >>= sizeof(pixel)-1;
@@ -334,7 +367,8 @@ static void FUNCC(pred16x16_horizontal)(uint8_t *_src, int stride){
         src += stride;\
     }
 
-static void FUNCC(pred16x16_dc)(uint8_t *_src, int stride){
+static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride)
+{
     int i, dc=0;
     pixel *src = (pixel*)_src;
     pixel4 dcsplat;
@@ -352,7 +386,8 @@ static void FUNCC(pred16x16_dc)(uint8_t *_src, int stride){
     PREDICT_16x16_DC(dcsplat);
 }
 
-static void FUNCC(pred16x16_left_dc)(uint8_t *_src, int stride){
+static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
+{
     int i, dc=0;
     pixel *src = (pixel*)_src;
     pixel4 dcsplat;
@@ -366,7 +401,8 @@ static void FUNCC(pred16x16_left_dc)(uint8_t *_src, int stride){
     PREDICT_16x16_DC(dcsplat);
 }
 
-static void FUNCC(pred16x16_top_dc)(uint8_t *_src, int stride){
+static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
+{
     int i, dc=0;
     pixel *src = (pixel*)_src;
     pixel4 dcsplat;
@@ -381,7 +417,8 @@ static void FUNCC(pred16x16_top_dc)(uint8_t *_src, int stride){
 }
 
 #define PRED16x16_X(n, v) \
-static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, int stride){\
+static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
+{\
     int i;\
     pixel *src = (pixel*)_src;\
     stride >>= sizeof(pixel)-1;\
@@ -392,12 +429,16 @@ PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1)
 PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0)
 PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1)
 
-static inline void FUNCC(pred16x16_plane_compat)(uint8_t *p_src, int p_stride, const int svq3, const int rv40){
+static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src,
+                                                 ptrdiff_t _stride,
+                                                 const int svq3,
+                                                 const int rv40)
+{
   int i, j, k;
   int a;
   INIT_CLIP
-  pixel *src = (pixel*)p_src;
-  int stride = p_stride>>(sizeof(pixel)-1);
+  pixel *src = (pixel*)_src;
+  int stride = _stride>>(sizeof(pixel)-1);
   const pixel * const src0 = src +7-stride;
   const pixel *       src1 = src +8*stride-1;
   const pixel *       src2 = src1-2*stride;    // == src+6*stride-1;
@@ -437,11 +478,13 @@ static inline void FUNCC(pred16x16_plane_compat)(uint8_t *p_src, int p_stride, c
   }
 }
 
-static void FUNCC(pred16x16_plane)(uint8_t *src, int stride){
+static void FUNCC(pred16x16_plane)(uint8_t *src, ptrdiff_t stride)
+{
     FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
 }
 
-static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){
+static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride)
+{
     int i;
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
@@ -454,7 +497,8 @@ static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){
     }
 }
 
-static void FUNCC(pred8x16_vertical)(uint8_t *_src, int _stride){
+static void FUNCC(pred8x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
+{
     int i;
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
@@ -467,7 +511,8 @@ static void FUNCC(pred8x16_vertical)(uint8_t *_src, int _stride){
     }
 }
 
-static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
+static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride)
+{
     int i;
     pixel *src = (pixel*)_src;
     stride >>= sizeof(pixel)-1;
@@ -479,7 +524,8 @@ static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
     }
 }
 
-static void FUNCC(pred8x16_horizontal)(uint8_t *_src, int stride){
+static void FUNCC(pred8x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
+{
     int i;
     pixel *src = (pixel*)_src;
     stride >>= sizeof(pixel)-1;
@@ -491,7 +537,8 @@ static void FUNCC(pred8x16_horizontal)(uint8_t *_src, int stride){
 }
 
 #define PRED8x8_X(n, v)\
-static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\
+static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
+{\
     int i;\
     const pixel4 a = PIXEL_SPLAT_X4(v);\
     pixel *src = (pixel*)_src;\
@@ -506,12 +553,14 @@ PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1)
 PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0)
 PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1)
 
-static void FUNCC(pred8x16_128_dc)(uint8_t *_src, int stride){
+static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride)
+{
     FUNCC(pred8x8_128_dc)(_src, stride);
     FUNCC(pred8x8_128_dc)(_src+8*stride, stride);
 }
 
-static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
+static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride)
+{
     int i;
     int dc0, dc2;
     pixel4 dc0splat, dc2splat;
@@ -536,12 +585,14 @@ static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
     }
 }
 
-static void FUNCC(pred8x16_left_dc)(uint8_t *_src, int stride){
+static void FUNCC(pred8x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
+{
     FUNCC(pred8x8_left_dc)(_src, stride);
     FUNCC(pred8x8_left_dc)(_src+8*stride, stride);
 }
 
-static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
+static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride)
+{
     int i;
     int dc0, dc1;
     pixel4 dc0splat, dc1splat;
@@ -566,7 +617,8 @@ static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
     }
 }
 
-static void FUNCC(pred8x16_top_dc)(uint8_t *_src, int stride){
+static void FUNCC(pred8x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
+{
     int i;
     int dc0, dc1;
     pixel4 dc0splat, dc1splat;
@@ -587,7 +639,8 @@ static void FUNCC(pred8x16_top_dc)(uint8_t *_src, int stride){
     }
 }
 
-static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
+static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride)
+{
     int i;
     int dc0, dc1, dc2;
     pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
@@ -615,7 +668,8 @@ static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
     }
 }
 
-static void FUNCC(pred8x16_dc)(uint8_t *_src, int stride){
+static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride)
+{
     int i;
     int dc0, dc1, dc2, dc3, dc4;
     pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
@@ -658,51 +712,60 @@ static void FUNCC(pred8x16_dc)(uint8_t *_src, int stride){
 }
 
 //the following 4 function should not be optimized!
-static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
+static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
+{
     FUNCC(pred8x8_top_dc)(src, stride);
     FUNCC(pred4x4_dc)(src, NULL, stride);
 }
 
-static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, int stride){
+static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
+{
     FUNCC(pred8x16_top_dc)(src, stride);
     FUNCC(pred4x4_dc)(src, NULL, stride);
 }
 
-static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){
+static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
+{
     FUNCC(pred8x8_dc)(src, stride);
     FUNCC(pred4x4_top_dc)(src, NULL, stride);
 }
 
-static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, int stride){
+static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
+{
     FUNCC(pred8x16_dc)(src, stride);
     FUNCC(pred4x4_top_dc)(src, NULL, stride);
 }
 
-static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){
+static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
+{
     FUNCC(pred8x8_left_dc)(src, stride);
     FUNCC(pred4x4_128_dc)(src + 4*stride                  , NULL, stride);
     FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
 }
 
-static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, int stride){
+static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
+{
     FUNCC(pred8x16_left_dc)(src, stride);
     FUNCC(pred4x4_128_dc)(src + 4*stride                  , NULL, stride);
     FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
 }
 
-static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){
+static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
+{
     FUNCC(pred8x8_left_dc)(src, stride);
     FUNCC(pred4x4_128_dc)(src                  , NULL, stride);
     FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
 }
 
-static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, int stride){
+static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
+{
     FUNCC(pred8x16_left_dc)(src, stride);
     FUNCC(pred4x4_128_dc)(src                  , NULL, stride);
     FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
 }
 
-static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
+static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride)
+{
   int j, k;
   int a;
   INIT_CLIP
@@ -737,7 +800,8 @@ static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
   }
 }
 
-static void FUNCC(pred8x16_plane)(uint8_t *_src, int _stride){
+static void FUNCC(pred8x16_plane)(uint8_t *_src, ptrdiff_t _stride)
+{
   int j, k;
   int a;
   INIT_CLIP
@@ -816,14 +880,16 @@ static void FUNCC(pred8x16_plane)(uint8_t *_src, int _stride){
         src += stride; \
     }
 
-static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft,
+                                   int has_topright, ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
 
     PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));
 }
-static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft,
+                                    int has_topright, ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
     int stride = _stride>>(sizeof(pixel)-1);
@@ -832,19 +898,21 @@ static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft, int has_topr
     const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
     PREDICT_8x8_DC(dc);
 }
-static void FUNCC(pred8x8l_top_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
+static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft,
+                                   int has_topright, ptrdiff_t _stride)
 {
-    pixel *src = (pixel*)p_src;
-    int stride = p_stride>>(sizeof(pixel)-1);
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
 
     PREDICT_8x8_LOAD_TOP;
     const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
     PREDICT_8x8_DC(dc);
 }
-static void FUNCC(pred8x8l_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
+static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft,
+                               int has_topright, ptrdiff_t _stride)
 {
-    pixel *src = (pixel*)p_src;
-    int stride = p_stride>>(sizeof(pixel)-1);
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
 
     PREDICT_8x8_LOAD_LEFT;
     PREDICT_8x8_LOAD_TOP;
@@ -852,10 +920,11 @@ static void FUNCC(pred8x8l_dc)(uint8_t *p_src, int has_topleft, int has_topright
                                      +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
     PREDICT_8x8_DC(dc);
 }
-static void FUNCC(pred8x8l_horizontal)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
+static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft,
+                                       int has_topright, ptrdiff_t _stride)
 {
-    pixel *src = (pixel*)p_src;
-    int stride = p_stride>>(sizeof(pixel)-1);
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
     pixel4 a;
 
     PREDICT_8x8_LOAD_LEFT;
@@ -865,7 +934,8 @@ static void FUNCC(pred8x8l_horizontal)(uint8_t *p_src, int has_topleft, int has_
     ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
 #undef ROW
 }
-static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
+static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft,
+                                     int has_topright, ptrdiff_t _stride)
 {
     int y;
     pixel *src = (pixel*)_src;
@@ -888,10 +958,11 @@ static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft, int has_top
         AV_WN4PA(((pixel4*)(src+y*stride))+1, b);
     }
 }
-static void FUNCC(pred8x8l_down_left)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
+static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft,
+                                      int has_topright, ptrdiff_t _stride)
 {
-    pixel *src = (pixel*)p_src;
-    int stride = p_stride>>(sizeof(pixel)-1);
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_TOPRIGHT;
     SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
@@ -910,10 +981,11 @@ static void FUNCC(pred8x8l_down_left)(uint8_t *p_src, int has_topleft, int has_t
     SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
     SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
 }
-static void FUNCC(pred8x8l_down_right)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
+static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft,
+                                       int has_topright, ptrdiff_t _stride)
 {
-    pixel *src = (pixel*)p_src;
-    int stride = p_stride>>(sizeof(pixel)-1);
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_LEFT;
     PREDICT_8x8_LOAD_TOPLEFT;
@@ -933,10 +1005,11 @@ static void FUNCC(pred8x8l_down_right)(uint8_t *p_src, int has_topleft, int has_
     SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
     SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
 }
-static void FUNCC(pred8x8l_vertical_right)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
+static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft,
+                                           int has_topright, ptrdiff_t _stride)
 {
-    pixel *src = (pixel*)p_src;
-    int stride = p_stride>>(sizeof(pixel)-1);
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_LEFT;
     PREDICT_8x8_LOAD_TOPLEFT;
@@ -963,10 +1036,11 @@ static void FUNCC(pred8x8l_vertical_right)(uint8_t *p_src, int has_topleft, int
     SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
     SRC(7,0)= (t6 + t7 + 1) >> 1;
 }
-static void FUNCC(pred8x8l_horizontal_down)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
+static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft,
+                                            int has_topright, ptrdiff_t _stride)
 {
-    pixel *src = (pixel*)p_src;
-    int stride = p_stride>>(sizeof(pixel)-1);
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_LEFT;
     PREDICT_8x8_LOAD_TOPLEFT;
@@ -993,10 +1067,11 @@ static void FUNCC(pred8x8l_horizontal_down)(uint8_t *p_src, int has_topleft, int
     SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
     SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
 }
-static void FUNCC(pred8x8l_vertical_left)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
+static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft,
+                                          int has_topright, ptrdiff_t _stride)
 {
-    pixel *src = (pixel*)p_src;
-    int stride = p_stride>>(sizeof(pixel)-1);
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_TOPRIGHT;
     SRC(0,0)= (t0 + t1 + 1) >> 1;
@@ -1022,10 +1097,11 @@ static void FUNCC(pred8x8l_vertical_left)(uint8_t *p_src, int has_topleft, int h
     SRC(7,6)= (t10 + t11 + 1) >> 1;
     SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
 }
-static void FUNCC(pred8x8l_horizontal_up)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
+static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,
+                                          int has_topright, ptrdiff_t _stride)
 {
-    pixel *src = (pixel*)p_src;
-    int stride = p_stride>>(sizeof(pixel)-1);
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
     PREDICT_8x8_LOAD_LEFT;
     SRC(0,0)= (l0 + l1 + 1) >> 1;
     SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
@@ -1056,10 +1132,12 @@ static void FUNCC(pred8x8l_horizontal_up)(uint8_t *p_src, int has_topleft, int h
 #undef PL
 #undef SRC
 
-static void FUNCC(pred4x4_vertical_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
+static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, const DCTELEM *_block,
+                                        ptrdiff_t stride)
+{
     int i;
-    pixel *pix = (pixel*)p_pix;
-    const dctcoef *block = (const dctcoef*)p_block;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
     stride >>= sizeof(pixel)-1;
     pix -= stride;
     for(i=0; i<4; i++){
@@ -1073,10 +1151,12 @@ static void FUNCC(pred4x4_vertical_add)(uint8_t *p_pix, const DCTELEM *p_block,
     }
 }
 
-static void FUNCC(pred4x4_horizontal_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
+static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, const DCTELEM *_block,
+                                          ptrdiff_t stride)
+{
     int i;
-    pixel *pix = (pixel*)p_pix;
-    const dctcoef *block = (const dctcoef*)p_block;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
     stride >>= sizeof(pixel)-1;
     for(i=0; i<4; i++){
         pixel v = pix[-1];
@@ -1089,10 +1169,12 @@ static void FUNCC(pred4x4_horizontal_add)(uint8_t *p_pix, const DCTELEM *p_block
     }
 }
 
-static void FUNCC(pred8x8l_vertical_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
+static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, const DCTELEM *_block,
+                                         ptrdiff_t stride)
+{
     int i;
-    pixel *pix = (pixel*)p_pix;
-    const dctcoef *block = (const dctcoef*)p_block;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
     stride >>= sizeof(pixel)-1;
     pix -= stride;
     for(i=0; i<8; i++){
@@ -1110,10 +1192,12 @@ static void FUNCC(pred8x8l_vertical_add)(uint8_t *p_pix, const DCTELEM *p_block,
     }
 }
 
-static void FUNCC(pred8x8l_horizontal_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
+static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, const DCTELEM *_block,
+                                           ptrdiff_t stride)
+{
     int i;
-    pixel *pix = (pixel*)p_pix;
-    const dctcoef *block = (const dctcoef*)p_block;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
     stride >>= sizeof(pixel)-1;
     for(i=0; i<8; i++){
         pixel v = pix[-1];
@@ -1130,25 +1214,36 @@ static void FUNCC(pred8x8l_horizontal_add)(uint8_t *p_pix, const DCTELEM *p_bloc
     }
 }
 
-static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset,
+                                          const DCTELEM *block,
+                                          ptrdiff_t stride)
+{
     int i;
     for(i=0; i<16; i++)
         FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
 }
 
-static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix,
+                                            const int *block_offset,
+                                            const DCTELEM *block,
+                                            ptrdiff_t stride)
+{
     int i;
     for(i=0; i<16; i++)
         FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
 }
 
-static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset,
+                                        const DCTELEM *block, ptrdiff_t stride)
+{
     int i;
     for(i=0; i<4; i++)
         FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
 }
 
-static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
+                                         const DCTELEM *block, ptrdiff_t stride)
+{
     int i;
     for(i=0; i<4; i++)
         FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
@@ -1156,13 +1251,19 @@ static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
         FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
 }
 
-static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset,
+                                          const DCTELEM *block,
+                                          ptrdiff_t stride)
+{
     int i;
     for(i=0; i<4; i++)
         FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
 }
 
-static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix,
+                                           const int *block_offset,
+                                           const DCTELEM *block, ptrdiff_t stride)
+{
     int i;
     for(i=0; i<4; i++)
         FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
diff --git a/libavcodec/raw.c b/libavcodec/raw.c
index 5f8bf33b79..e23dbeadf9 100644
--- a/libavcodec/raw.c
+++ b/libavcodec/raw.c
@@ -147,6 +147,25 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_YUVA444P,    MKTAG('Y', '4',  0 ,  8 ) },
     { AV_PIX_FMT_GRAY8A,      MKTAG('Y', '2',  0 ,  8 ) },
 
+    { AV_PIX_FMT_YUVA420P9LE,  MKTAG('Y', '4', 11 ,  9 ) },
+    { AV_PIX_FMT_YUVA420P9BE,  MKTAG( 9 , 11 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA422P9LE,  MKTAG('Y', '4', 10 ,  9 ) },
+    { AV_PIX_FMT_YUVA422P9BE,  MKTAG( 9 , 10 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA444P9LE,  MKTAG('Y', '4',  0 ,  9 ) },
+    { AV_PIX_FMT_YUVA444P9BE,  MKTAG( 9 ,  0 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA420P10LE, MKTAG('Y', '4', 11 , 10 ) },
+    { AV_PIX_FMT_YUVA420P10BE, MKTAG(10 , 11 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA422P10LE, MKTAG('Y', '4', 10 , 10 ) },
+    { AV_PIX_FMT_YUVA422P10BE, MKTAG(10 , 10 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA444P10LE, MKTAG('Y', '4',  0 , 10 ) },
+    { AV_PIX_FMT_YUVA444P10BE, MKTAG(10 ,  0 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA420P16LE, MKTAG('Y', '4', 11 , 16 ) },
+    { AV_PIX_FMT_YUVA420P16BE, MKTAG(16 , 11 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA422P16LE, MKTAG('Y', '4', 10 , 16 ) },
+    { AV_PIX_FMT_YUVA422P16BE, MKTAG(16 , 10 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA444P16LE, MKTAG('Y', '4',  0 , 16 ) },
+    { AV_PIX_FMT_YUVA444P16BE, MKTAG(16 ,  0 , '4', 'Y') },
+
     /* quicktime */
     { AV_PIX_FMT_YUV420P, MKTAG('R', '4', '2', '0') }, /* Radius DV YUV PAL */
     { AV_PIX_FMT_YUV411P, MKTAG('R', '4', '1', '1') }, /* Radius DV YUV NTSC */
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
index 609cb2303d..21dbf5d9cd 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -53,7 +53,7 @@ cextern pw_32
 ; void pred16x16_vertical(uint8_t *src, int stride)
 ;-----------------------------------------------------------------------------
 
-cglobal pred16x16_vertical_mmx, 2,3
+cglobal pred16x16_vertical_8_mmx, 2,3
     sub   r0, r1
     mov   r2, 8
     movq mm0, [r0+0]
@@ -68,7 +68,7 @@ cglobal pred16x16_vertical_mmx, 2,3
     jg .loop
     REP_RET
 
-cglobal pred16x16_vertical_sse, 2,3
+cglobal pred16x16_vertical_8_sse, 2,3
     sub   r0, r1
     mov   r2, 4
     movaps xmm0, [r0]
@@ -88,7 +88,7 @@ cglobal pred16x16_vertical_sse, 2,3
 ;-----------------------------------------------------------------------------
 
 %macro PRED16x16_H 0
-cglobal pred16x16_horizontal, 2,3
+cglobal pred16x16_horizontal_8, 2,3
     mov       r2, 8
 %if cpuflag(ssse3)
     mova      m2, [pb_3]
@@ -130,7 +130,7 @@ INIT_XMM
 ;-----------------------------------------------------------------------------
 
 %macro PRED16x16_DC 0
-cglobal pred16x16_dc, 2,7
+cglobal pred16x16_dc_8, 2,7
     mov       r4, r0
     sub       r0, r1
     pxor      mm0, mm0
@@ -193,7 +193,7 @@ INIT_XMM
 ;-----------------------------------------------------------------------------
 
 %macro PRED16x16_TM_MMX 0
-cglobal pred16x16_tm_vp8, 2,5
+cglobal pred16x16_tm_vp8_8, 2,5
     sub        r0, r1
     pxor      mm7, mm7
     movq      mm0, [r0+0]
@@ -234,7 +234,7 @@ INIT_MMX mmx2
 PRED16x16_TM_MMX
 INIT_MMX
 
-cglobal pred16x16_tm_vp8_sse2, 2,6,6
+cglobal pred16x16_tm_vp8_8_sse2, 2,6,6
     sub          r0, r1
     pxor       xmm2, xmm2
     movdqa     xmm0, [r0]
@@ -274,7 +274,7 @@ cglobal pred16x16_tm_vp8_sse2, 2,6,6
 ;-----------------------------------------------------------------------------
 
 %macro H264_PRED16x16_PLANE 1
-cglobal pred16x16_plane_%1, 2,9,7
+cglobal pred16x16_plane_%1_8, 2,9,7
     mov          r2, r1           ; +stride
     neg          r1               ; -stride
 
@@ -556,7 +556,7 @@ INIT_XMM
 ;-----------------------------------------------------------------------------
 
 %macro H264_PRED8x8_PLANE 0
-cglobal pred8x8_plane, 2,9,7
+cglobal pred8x8_plane_8, 2,9,7
     mov          r2, r1           ; +stride
     neg          r1               ; -stride
 
@@ -730,7 +730,7 @@ INIT_XMM
 ; void pred8x8_vertical(uint8_t *src, int stride)
 ;-----------------------------------------------------------------------------
 
-cglobal pred8x8_vertical_mmx, 2,2
+cglobal pred8x8_vertical_8_mmx, 2,2
     sub    r0, r1
     movq  mm0, [r0]
 %rep 3
@@ -747,7 +747,7 @@ cglobal pred8x8_vertical_mmx, 2,2
 ;-----------------------------------------------------------------------------
 
 %macro PRED8x8_H 0
-cglobal pred8x8_horizontal, 2,3
+cglobal pred8x8_horizontal_8, 2,3
     mov       r2, 4
 %if cpuflag(ssse3)
     mova      m2, [pb_3]
@@ -774,7 +774,7 @@ INIT_MMX
 ;-----------------------------------------------------------------------------
 ; void pred8x8_top_dc_mmxext(uint8_t *src, int stride)
 ;-----------------------------------------------------------------------------
-cglobal pred8x8_top_dc_mmxext, 2,5
+cglobal pred8x8_top_dc_8_mmxext, 2,5
     sub         r0, r1
     movq       mm0, [r0]
     pxor       mm1, mm1
@@ -809,7 +809,7 @@ cglobal pred8x8_top_dc_mmxext, 2,5
 ;-----------------------------------------------------------------------------
 
 INIT_MMX
-cglobal pred8x8_dc_mmxext, 2,5
+cglobal pred8x8_dc_8_mmxext, 2,5
     sub       r0, r1
     pxor      m7, m7
     movd      m0, [r0+0]
@@ -869,7 +869,7 @@ cglobal pred8x8_dc_mmxext, 2,5
 ; void pred8x8_dc_rv40(uint8_t *src, int stride)
 ;-----------------------------------------------------------------------------
 
-cglobal pred8x8_dc_rv40_mmxext, 2,7
+cglobal pred8x8_dc_rv40_8_mmxext, 2,7
     mov       r4, r0
     sub       r0, r1
     pxor      mm0, mm0
@@ -906,7 +906,7 @@ cglobal pred8x8_dc_rv40_mmxext, 2,7
 ;-----------------------------------------------------------------------------
 
 %macro PRED8x8_TM_MMX 0
-cglobal pred8x8_tm_vp8, 2,6
+cglobal pred8x8_tm_vp8_8, 2,6
     sub        r0, r1
     pxor      mm7, mm7
     movq      mm0, [r0]
@@ -946,7 +946,7 @@ INIT_MMX mmx2
 PRED8x8_TM_MMX
 INIT_MMX
 
-cglobal pred8x8_tm_vp8_sse2, 2,6,4
+cglobal pred8x8_tm_vp8_8_sse2, 2,6,4
     sub          r0, r1
     pxor       xmm1, xmm1
     movq       xmm0, [r0]
@@ -974,7 +974,7 @@ cglobal pred8x8_tm_vp8_sse2, 2,6,4
     jg .loop
     REP_RET
 
-cglobal pred8x8_tm_vp8_ssse3, 2,3,6
+cglobal pred8x8_tm_vp8_8_ssse3, 2,3,6
     sub          r0, r1
     movdqa     xmm4, [tm_shuf]
     pxor       xmm1, xmm1
@@ -1016,7 +1016,7 @@ cglobal pred8x8_tm_vp8_ssse3, 2,3,6
 ; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
 ;-----------------------------------------------------------------------------
 %macro PRED8x8L_TOP_DC 1
-cglobal pred8x8l_top_dc_%1, 4,4
+cglobal pred8x8l_top_dc_8_%1, 4,4
     sub          r0, r3
     pxor        mm7, mm7
     movq        mm0, [r0-8]
@@ -1073,7 +1073,7 @@ PRED8x8L_TOP_DC ssse3
 ;-----------------------------------------------------------------------------
 
 %macro PRED8x8L_DC 1
-cglobal pred8x8l_dc_%1, 4,5
+cglobal pred8x8l_dc_8_%1, 4,5
     sub          r0, r3
     lea          r4, [r0+r3*2]
     movq        mm0, [r0+r3*1-8]
@@ -1176,7 +1176,7 @@ PRED8x8L_DC ssse3
 ;-----------------------------------------------------------------------------
 
 %macro PRED8x8L_HORIZONTAL 1
-cglobal pred8x8l_horizontal_%1, 4,4
+cglobal pred8x8l_horizontal_8_%1, 4,4
     sub          r0, r3
     lea          r2, [r0+r3*2]
     movq        mm0, [r0+r3*1-8]
@@ -1248,7 +1248,7 @@ PRED8x8L_HORIZONTAL ssse3
 ;-----------------------------------------------------------------------------
 
 %macro PRED8x8L_VERTICAL 1
-cglobal pred8x8l_vertical_%1, 4,4
+cglobal pred8x8l_vertical_8_%1, 4,4
     sub          r0, r3
     movq        mm0, [r0-8]
     movq        mm3, [r0]
@@ -1300,7 +1300,7 @@ PRED8x8L_VERTICAL ssse3
 
 INIT_MMX
 %define PALIGNR PALIGNR_MMX
-cglobal pred8x8l_down_left_mmxext, 4,5
+cglobal pred8x8l_down_left_8_mmxext, 4,5
     sub          r0, r3
     movq        mm0, [r0-8]
     movq        mm3, [r0]
@@ -1408,7 +1408,7 @@ cglobal pred8x8l_down_left_mmxext, 4,5
     RET
 
 %macro PRED8x8L_DOWN_LEFT 1
-cglobal pred8x8l_down_left_%1, 4,4
+cglobal pred8x8l_down_left_8_%1, 4,4
     sub          r0, r3
     movq        mm0, [r0-8]
     movq        mm3, [r0]
@@ -1503,7 +1503,7 @@ PRED8x8L_DOWN_LEFT ssse3
 
 INIT_MMX
 %define PALIGNR PALIGNR_MMX
-cglobal pred8x8l_down_right_mmxext, 4,5
+cglobal pred8x8l_down_right_8_mmxext, 4,5
     sub          r0, r3
     lea          r4, [r0+r3*2]
     movq        mm0, [r0+r3*1-8]
@@ -1635,7 +1635,7 @@ cglobal pred8x8l_down_right_mmxext, 4,5
     RET
 
 %macro PRED8x8L_DOWN_RIGHT 1
-cglobal pred8x8l_down_right_%1, 4,5
+cglobal pred8x8l_down_right_8_%1, 4,5
     sub          r0, r3
     lea          r4, [r0+r3*2]
     movq        mm0, [r0+r3*1-8]
@@ -1757,7 +1757,7 @@ PRED8x8L_DOWN_RIGHT ssse3
 
 INIT_MMX
 %define PALIGNR PALIGNR_MMX
-cglobal pred8x8l_vertical_right_mmxext, 4,5
+cglobal pred8x8l_vertical_right_8_mmxext, 4,5
     sub          r0, r3
     lea          r4, [r0+r3*2]
     movq        mm0, [r0+r3*1-8]
@@ -1864,7 +1864,7 @@ cglobal pred8x8l_vertical_right_mmxext, 4,5
     RET
 
 %macro PRED8x8L_VERTICAL_RIGHT 1
-cglobal pred8x8l_vertical_right_%1, 4,5,7
+cglobal pred8x8l_vertical_right_8_%1, 4,5,7
     ; manually spill XMM registers for Win64 because
     ; the code here is initialized with INIT_MMX
     WIN64_SPILL_XMM 7
@@ -1986,7 +1986,7 @@ PRED8x8L_VERTICAL_RIGHT ssse3
 ;-----------------------------------------------------------------------------
 
 %macro PRED8x8L_VERTICAL_LEFT 1
-cglobal pred8x8l_vertical_left_%1, 4,4
+cglobal pred8x8l_vertical_left_8_%1, 4,4
     sub          r0, r3
     movq        mm0, [r0-8]
     movq        mm3, [r0]
@@ -2077,7 +2077,7 @@ PRED8x8L_VERTICAL_LEFT ssse3
 ;-----------------------------------------------------------------------------
 
 %macro PRED8x8L_HORIZONTAL_UP 1
-cglobal pred8x8l_horizontal_up_%1, 4,4
+cglobal pred8x8l_horizontal_up_8_%1, 4,4
     sub          r0, r3
     lea          r2, [r0+r3*2]
     movq        mm0, [r0+r3*1-8]
@@ -2166,7 +2166,7 @@ PRED8x8L_HORIZONTAL_UP ssse3
 
 INIT_MMX
 %define PALIGNR PALIGNR_MMX
-cglobal pred8x8l_horizontal_down_mmxext, 4,5
+cglobal pred8x8l_horizontal_down_8_mmxext, 4,5
     sub          r0, r3
     lea          r4, [r0+r3*2]
     movq        mm0, [r0+r3*1-8]
@@ -2281,7 +2281,7 @@ cglobal pred8x8l_horizontal_down_mmxext, 4,5
     RET
 
 %macro PRED8x8L_HORIZONTAL_DOWN 1
-cglobal pred8x8l_horizontal_down_%1, 4,5
+cglobal pred8x8l_horizontal_down_8_%1, 4,5
     sub          r0, r3
     lea          r4, [r0+r3*2]
     movq        mm0, [r0+r3*1-8]
@@ -2415,7 +2415,7 @@ PRED8x8L_HORIZONTAL_DOWN ssse3
 ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride)
 ;-----------------------------------------------------------------------------
 
-cglobal pred4x4_dc_mmxext, 3,5
+cglobal pred4x4_dc_8_mmxext, 3,5
     pxor   mm7, mm7
     mov     r4, r0
     sub     r0, r2
@@ -2445,7 +2445,7 @@ cglobal pred4x4_dc_mmxext, 3,5
 ;-----------------------------------------------------------------------------
 
 %macro PRED4x4_TM_MMX 0
-cglobal pred4x4_tm_vp8, 3,6
+cglobal pred4x4_tm_vp8_8, 3,6
     sub        r0, r2
     pxor      mm7, mm7
     movd      mm0, [r0]
@@ -2486,7 +2486,7 @@ INIT_MMX mmx2
 PRED4x4_TM_MMX
 INIT_MMX
 
-cglobal pred4x4_tm_vp8_ssse3, 3,3
+cglobal pred4x4_tm_vp8_8_ssse3, 3,3
     sub         r0, r2
     movq       mm6, [tm_shuf]
     pxor       mm1, mm1
@@ -2526,7 +2526,7 @@ cglobal pred4x4_tm_vp8_ssse3, 3,3
 ;-----------------------------------------------------------------------------
 
 INIT_MMX
-cglobal pred4x4_vertical_vp8_mmxext, 3,3
+cglobal pred4x4_vertical_vp8_8_mmxext, 3,3
     sub       r0, r2
     movd      m1, [r0-1]
     movd      m0, [r0]
@@ -2545,7 +2545,7 @@ cglobal pred4x4_vertical_vp8_mmxext, 3,3
 ; void pred4x4_down_left_mmxext(uint8_t *src, const uint8_t *topright, int stride)
 ;-----------------------------------------------------------------------------
 INIT_MMX
-cglobal pred4x4_down_left_mmxext, 3,3
+cglobal pred4x4_down_left_8_mmxext, 3,3
     sub       r0, r2
     movq      m1, [r0]
     punpckldq m1, [r1]
@@ -2572,7 +2572,7 @@ cglobal pred4x4_down_left_mmxext, 3,3
 ;-----------------------------------------------------------------------------
 
 INIT_MMX
-cglobal pred4x4_vertical_left_mmxext, 3,3
+cglobal pred4x4_vertical_left_8_mmxext, 3,3
     sub       r0, r2
     movq      m1, [r0]
     punpckldq m1, [r1]
@@ -2597,7 +2597,7 @@ cglobal pred4x4_vertical_left_mmxext, 3,3
 ;-----------------------------------------------------------------------------
 
 INIT_MMX
-cglobal pred4x4_horizontal_up_mmxext, 3,3
+cglobal pred4x4_horizontal_up_8_mmxext, 3,3
     sub       r0, r2
     lea       r1, [r0+r2*2]
     movd      m0, [r0+r2*1-4]
@@ -2631,7 +2631,7 @@ cglobal pred4x4_horizontal_up_mmxext, 3,3
 
 INIT_MMX
 %define PALIGNR PALIGNR_MMX
-cglobal pred4x4_horizontal_down_mmxext, 3,3
+cglobal pred4x4_horizontal_down_8_mmxext, 3,3
     sub       r0, r2
     lea       r1, [r0+r2*2]
     movh      m0, [r0-4]      ; lt ..
@@ -2667,7 +2667,7 @@ cglobal pred4x4_horizontal_down_mmxext, 3,3
 
 INIT_MMX
 %define PALIGNR PALIGNR_MMX
-cglobal pred4x4_vertical_right_mmxext, 3,3
+cglobal pred4x4_vertical_right_8_mmxext, 3,3
     sub     r0, r2
     lea     r1, [r0+r2*2]
     movh    m0, [r0]                    ; ........t3t2t1t0
@@ -2698,7 +2698,7 @@ cglobal pred4x4_vertical_right_mmxext, 3,3
 
 INIT_MMX
 %define PALIGNR PALIGNR_MMX
-cglobal pred4x4_down_right_mmxext, 3,3
+cglobal pred4x4_down_right_8_mmxext, 3,3
     sub       r0, r2
     lea       r1, [r0+r2*2]
     movq      m1, [r1-8]
diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
index 2ce02f0f6c..83a4ca98c1 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -23,7 +23,9 @@
 #include "libavcodec/h264pred.h"
 
 #define PRED4x4(TYPE, DEPTH, OPT) \
-void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, const uint8_t *topright, int stride);
+void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
+                                                    const uint8_t *topright, \
+                                                    ptrdiff_t stride);
 
 PRED4x4(dc, 10, mmx2)
 PRED4x4(down_left, 10, sse2)
@@ -42,7 +44,8 @@ PRED4x4(horizontal_down, 10, ssse3)
 PRED4x4(horizontal_down, 10, avx)
 
 #define PRED8x8(TYPE, DEPTH, OPT) \
-void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int stride);
+void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
+                                                    ptrdiff_t stride);
 
 PRED8x8(dc, 10, mmx2)
 PRED8x8(dc, 10, sse2)
@@ -52,7 +55,10 @@ PRED8x8(vertical, 10, sse2)
 PRED8x8(horizontal, 10, sse2)
 
 #define PRED8x8L(TYPE, DEPTH, OPT)\
-void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int has_topleft, int has_topright, int stride);
+void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
+                                                     int has_topleft, \
+                                                     int has_topright, \
+                                                     ptrdiff_t stride);
 
 PRED8x8L(dc, 10, sse2)
 PRED8x8L(dc, 10, avx)
@@ -79,7 +85,8 @@ PRED8x8L(horizontal_up, 10, ssse3)
 PRED8x8L(horizontal_up, 10, avx)
 
 #define PRED16x16(TYPE, DEPTH, OPT)\
-void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int stride);
+void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
+                                                      ptrdiff_t stride);
 
 PRED16x16(dc, 10, mmx2)
 PRED16x16(dc, 10, sse2)
@@ -94,79 +101,83 @@ PRED16x16(vertical, 10, sse2)
 PRED16x16(horizontal, 10, mmx2)
 PRED16x16(horizontal, 10, sse2)
 
-void ff_pred16x16_vertical_mmx     (uint8_t *src, int stride);
-void ff_pred16x16_vertical_sse     (uint8_t *src, int stride);
-void ff_pred16x16_horizontal_mmx   (uint8_t *src, int stride);
-void ff_pred16x16_horizontal_mmx2  (uint8_t *src, int stride);
-void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride);
-void ff_pred16x16_dc_mmx2          (uint8_t *src, int stride);
-void ff_pred16x16_dc_sse2          (uint8_t *src, int stride);
-void ff_pred16x16_dc_ssse3         (uint8_t *src, int stride);
-void ff_pred16x16_plane_h264_mmx   (uint8_t *src, int stride);
-void ff_pred16x16_plane_h264_mmx2  (uint8_t *src, int stride);
-void ff_pred16x16_plane_h264_sse2  (uint8_t *src, int stride);
-void ff_pred16x16_plane_h264_ssse3 (uint8_t *src, int stride);
-void ff_pred16x16_plane_rv40_mmx   (uint8_t *src, int stride);
-void ff_pred16x16_plane_rv40_mmx2  (uint8_t *src, int stride);
-void ff_pred16x16_plane_rv40_sse2  (uint8_t *src, int stride);
-void ff_pred16x16_plane_rv40_ssse3 (uint8_t *src, int stride);
-void ff_pred16x16_plane_svq3_mmx   (uint8_t *src, int stride);
-void ff_pred16x16_plane_svq3_mmx2  (uint8_t *src, int stride);
-void ff_pred16x16_plane_svq3_sse2  (uint8_t *src, int stride);
-void ff_pred16x16_plane_svq3_ssse3 (uint8_t *src, int stride);
-void ff_pred16x16_tm_vp8_mmx       (uint8_t *src, int stride);
-void ff_pred16x16_tm_vp8_mmx2      (uint8_t *src, int stride);
-void ff_pred16x16_tm_vp8_sse2      (uint8_t *src, int stride);
-void ff_pred8x8_top_dc_mmxext      (uint8_t *src, int stride);
-void ff_pred8x8_dc_rv40_mmxext     (uint8_t *src, int stride);
-void ff_pred8x8_dc_mmxext          (uint8_t *src, int stride);
-void ff_pred8x8_vertical_mmx       (uint8_t *src, int stride);
-void ff_pred8x8_horizontal_mmx     (uint8_t *src, int stride);
-void ff_pred8x8_horizontal_mmx2    (uint8_t *src, int stride);
-void ff_pred8x8_horizontal_ssse3   (uint8_t *src, int stride);
-void ff_pred8x8_plane_mmx          (uint8_t *src, int stride);
-void ff_pred8x8_plane_mmx2         (uint8_t *src, int stride);
-void ff_pred8x8_plane_sse2         (uint8_t *src, int stride);
-void ff_pred8x8_plane_ssse3        (uint8_t *src, int stride);
-void ff_pred8x8_tm_vp8_mmx         (uint8_t *src, int stride);
-void ff_pred8x8_tm_vp8_mmx2        (uint8_t *src, int stride);
-void ff_pred8x8_tm_vp8_sse2        (uint8_t *src, int stride);
-void ff_pred8x8_tm_vp8_ssse3       (uint8_t *src, int stride);
-void ff_pred8x8l_top_dc_mmxext     (uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_top_dc_ssse3      (uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_dc_mmxext         (uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_dc_ssse3          (uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_horizontal_ssse3  (uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_vertical_mmxext   (uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_vertical_ssse3    (uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_down_left_mmxext  (uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_down_left_sse2    (uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_down_left_ssse3   (uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_down_right_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_down_right_sse2   (uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_down_right_ssse3  (uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_vertical_right_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_vertical_right_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_vertical_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_vertical_left_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_horizontal_up_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_horizontal_down_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_horizontal_down_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred8x8l_horizontal_down_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride);
-void ff_pred4x4_dc_mmxext          (uint8_t *src, const uint8_t *topright, int stride);
-void ff_pred4x4_down_left_mmxext   (uint8_t *src, const uint8_t *topright, int stride);
-void ff_pred4x4_down_right_mmxext  (uint8_t *src, const uint8_t *topright, int stride);
-void ff_pred4x4_vertical_left_mmxext(uint8_t *src, const uint8_t *topright, int stride);
-void ff_pred4x4_vertical_right_mmxext(uint8_t *src, const uint8_t *topright, int stride);
-void ff_pred4x4_horizontal_up_mmxext(uint8_t *src, const uint8_t *topright, int stride);
-void ff_pred4x4_horizontal_down_mmxext(uint8_t *src, const uint8_t *topright, int stride);
-void ff_pred4x4_tm_vp8_mmx         (uint8_t *src, const uint8_t *topright, int stride);
-void ff_pred4x4_tm_vp8_mmx2        (uint8_t *src, const uint8_t *topright, int stride);
-void ff_pred4x4_tm_vp8_ssse3       (uint8_t *src, const uint8_t *topright, int stride);
-void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride);
+/* 8-bit versions */
+PRED16x16(vertical, 8, mmx)
+PRED16x16(vertical, 8, sse)
+PRED16x16(horizontal, 8, mmx)
+PRED16x16(horizontal, 8, mmx2)
+PRED16x16(horizontal, 8, ssse3)
+PRED16x16(dc, 8, mmx2)
+PRED16x16(dc, 8, sse2)
+PRED16x16(dc, 8, ssse3)
+PRED16x16(plane_h264, 8, mmx)
+PRED16x16(plane_h264, 8, mmx2)
+PRED16x16(plane_h264, 8, sse2)
+PRED16x16(plane_h264, 8, ssse3)
+PRED16x16(plane_rv40, 8, mmx)
+PRED16x16(plane_rv40, 8, mmx2)
+PRED16x16(plane_rv40, 8, sse2)
+PRED16x16(plane_rv40, 8, ssse3)
+PRED16x16(plane_svq3, 8, mmx)
+PRED16x16(plane_svq3, 8, mmx2)
+PRED16x16(plane_svq3, 8, sse2)
+PRED16x16(plane_svq3, 8, ssse3)
+PRED16x16(tm_vp8, 8, mmx)
+PRED16x16(tm_vp8, 8, mmx2)
+PRED16x16(tm_vp8, 8, sse2)
+
+PRED8x8(top_dc, 8, mmxext)
+PRED8x8(dc_rv40, 8, mmxext)
+PRED8x8(dc, 8, mmxext)
+PRED8x8(vertical, 8, mmx)
+PRED8x8(horizontal, 8, mmx)
+PRED8x8(horizontal, 8, mmx2)
+PRED8x8(horizontal, 8, ssse3)
+PRED8x8(plane, 8, mmx)
+PRED8x8(plane, 8, mmx2)
+PRED8x8(plane, 8, sse2)
+PRED8x8(plane, 8, ssse3)
+PRED8x8(tm_vp8, 8, mmx)
+PRED8x8(tm_vp8, 8, mmx2)
+PRED8x8(tm_vp8, 8, sse2)
+PRED8x8(tm_vp8, 8, ssse3)
+
+PRED8x8L(top_dc, 8, mmxext)
+PRED8x8L(top_dc, 8, ssse3)
+PRED8x8L(dc, 8, mmxext)
+PRED8x8L(dc, 8, ssse3)
+PRED8x8L(horizontal, 8, mmxext)
+PRED8x8L(horizontal, 8, ssse3)
+PRED8x8L(vertical, 8, mmxext)
+PRED8x8L(vertical, 8, ssse3)
+PRED8x8L(down_left, 8, mmxext)
+PRED8x8L(down_left, 8, sse2)
+PRED8x8L(down_left, 8, ssse3)
+PRED8x8L(down_right, 8, mmxext)
+PRED8x8L(down_right, 8, sse2)
+PRED8x8L(down_right, 8, ssse3)
+PRED8x8L(vertical_right, 8, mmxext)
+PRED8x8L(vertical_right, 8, sse2)
+PRED8x8L(vertical_right, 8, ssse3)
+PRED8x8L(vertical_left, 8, sse2)
+PRED8x8L(vertical_left, 8, ssse3)
+PRED8x8L(horizontal_up, 8, mmxext)
+PRED8x8L(horizontal_up, 8, ssse3)
+PRED8x8L(horizontal_down, 8, mmxext)
+PRED8x8L(horizontal_down, 8, sse2)
+PRED8x8L(horizontal_down, 8, ssse3)
+
+PRED4x4(dc, 8, mmxext)
+PRED4x4(down_left, 8, mmxext)
+PRED4x4(down_right, 8, mmxext)
+PRED4x4(vertical_left, 8, mmxext)
+PRED4x4(vertical_right, 8, mmxext)
+PRED4x4(horizontal_up, 8, mmxext)
+PRED4x4(horizontal_down, 8, mmxext)
+PRED4x4(tm_vp8, 8, mmx)
+PRED4x4(tm_vp8, 8, mmx2)
+PRED4x4(tm_vp8, 8, ssse3)
+PRED4x4(vertical_vp8, 8, mmxext)
 
 void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)
 {
@@ -174,136 +185,136 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
 
     if (bit_depth == 8) {
         if (EXTERNAL_MMX(mm_flags)) {
-            h->pred16x16[VERT_PRED8x8         ] = ff_pred16x16_vertical_mmx;
-            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_mmx;
+            h->pred16x16[VERT_PRED8x8         ] = ff_pred16x16_vertical_8_mmx;
+            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_8_mmx;
             if (chroma_format_idc == 1) {
-                h->pred8x8  [VERT_PRED8x8     ] = ff_pred8x8_vertical_mmx;
-                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_mmx;
+                h->pred8x8  [VERT_PRED8x8     ] = ff_pred8x8_vertical_8_mmx;
+                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_mmx;
             }
             if (codec_id == AV_CODEC_ID_VP8) {
-                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_mmx;
-                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_mmx;
-                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_mmx;
+                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_mmx;
+                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_mmx;
+                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_8_mmx;
             } else {
                 if (chroma_format_idc == 1)
-                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_mmx;
+                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmx;
                 if (codec_id == AV_CODEC_ID_SVQ3) {
                     if (mm_flags & AV_CPU_FLAG_CMOV)
-                        h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx;
+                        h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_mmx;
                 } else if (codec_id == AV_CODEC_ID_RV40) {
-                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_mmx;
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_mmx;
                 } else {
-                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_mmx;
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_mmx;
                 }
             }
         }
 
         if (EXTERNAL_MMXEXT(mm_flags)) {
-            h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_mmx2;
-            h->pred16x16[DC_PRED8x8             ] = ff_pred16x16_dc_mmx2;
+            h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_8_mmx2;
+            h->pred16x16[DC_PRED8x8             ] = ff_pred16x16_dc_8_mmx2;
             if (chroma_format_idc == 1)
-                h->pred8x8[HOR_PRED8x8          ] = ff_pred8x8_horizontal_mmx2;
-            h->pred8x8l [TOP_DC_PRED            ] = ff_pred8x8l_top_dc_mmxext;
-            h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_mmxext;
-            h->pred8x8l [HOR_PRED               ] = ff_pred8x8l_horizontal_mmxext;
-            h->pred8x8l [VERT_PRED              ] = ff_pred8x8l_vertical_mmxext;
-            h->pred8x8l [DIAG_DOWN_RIGHT_PRED   ] = ff_pred8x8l_down_right_mmxext;
-            h->pred8x8l [VERT_RIGHT_PRED        ] = ff_pred8x8l_vertical_right_mmxext;
-            h->pred8x8l [HOR_UP_PRED            ] = ff_pred8x8l_horizontal_up_mmxext;
-            h->pred8x8l [DIAG_DOWN_LEFT_PRED    ] = ff_pred8x8l_down_left_mmxext;
-            h->pred8x8l [HOR_DOWN_PRED          ] = ff_pred8x8l_horizontal_down_mmxext;
-            h->pred4x4  [DIAG_DOWN_RIGHT_PRED   ] = ff_pred4x4_down_right_mmxext;
-            h->pred4x4  [VERT_RIGHT_PRED        ] = ff_pred4x4_vertical_right_mmxext;
-            h->pred4x4  [HOR_DOWN_PRED          ] = ff_pred4x4_horizontal_down_mmxext;
-            h->pred4x4  [DC_PRED                ] = ff_pred4x4_dc_mmxext;
+                h->pred8x8[HOR_PRED8x8          ] = ff_pred8x8_horizontal_8_mmx2;
+            h->pred8x8l [TOP_DC_PRED            ] = ff_pred8x8l_top_dc_8_mmxext;
+            h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_8_mmxext;
+            h->pred8x8l [HOR_PRED               ] = ff_pred8x8l_horizontal_8_mmxext;
+            h->pred8x8l [VERT_PRED              ] = ff_pred8x8l_vertical_8_mmxext;
+            h->pred8x8l [DIAG_DOWN_RIGHT_PRED   ] = ff_pred8x8l_down_right_8_mmxext;
+            h->pred8x8l [VERT_RIGHT_PRED        ] = ff_pred8x8l_vertical_right_8_mmxext;
+            h->pred8x8l [HOR_UP_PRED            ] = ff_pred8x8l_horizontal_up_8_mmxext;
+            h->pred8x8l [DIAG_DOWN_LEFT_PRED    ] = ff_pred8x8l_down_left_8_mmxext;
+            h->pred8x8l [HOR_DOWN_PRED          ] = ff_pred8x8l_horizontal_down_8_mmxext;
+            h->pred4x4  [DIAG_DOWN_RIGHT_PRED   ] = ff_pred4x4_down_right_8_mmxext;
+            h->pred4x4  [VERT_RIGHT_PRED        ] = ff_pred4x4_vertical_right_8_mmxext;
+            h->pred4x4  [HOR_DOWN_PRED          ] = ff_pred4x4_horizontal_down_8_mmxext;
+            h->pred4x4  [DC_PRED                ] = ff_pred4x4_dc_8_mmxext;
             if (codec_id == AV_CODEC_ID_VP8 || codec_id == AV_CODEC_ID_H264) {
-                h->pred4x4  [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_mmxext;
+                h->pred4x4  [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_8_mmxext;
             }
             if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
-                h->pred4x4  [VERT_LEFT_PRED     ] = ff_pred4x4_vertical_left_mmxext;
+                h->pred4x4  [VERT_LEFT_PRED     ] = ff_pred4x4_vertical_left_8_mmxext;
             }
             if (codec_id != AV_CODEC_ID_RV40) {
-                h->pred4x4  [HOR_UP_PRED        ] = ff_pred4x4_horizontal_up_mmxext;
+                h->pred4x4  [HOR_UP_PRED        ] = ff_pred4x4_horizontal_up_8_mmxext;
             }
             if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
                 if (chroma_format_idc == 1) {
-                    h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_mmxext;
-                    h->pred8x8[DC_PRED8x8       ] = ff_pred8x8_dc_mmxext;
+                    h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_8_mmxext;
+                    h->pred8x8[DC_PRED8x8       ] = ff_pred8x8_dc_8_mmxext;
                 }
             }
             if (codec_id == AV_CODEC_ID_VP8) {
-                h->pred16x16[PLANE_PRED8x8      ] = ff_pred16x16_tm_vp8_mmx2;
-                h->pred8x8  [DC_PRED8x8         ] = ff_pred8x8_dc_rv40_mmxext;
-                h->pred8x8  [PLANE_PRED8x8      ] = ff_pred8x8_tm_vp8_mmx2;
-                h->pred4x4  [TM_VP8_PRED        ] = ff_pred4x4_tm_vp8_mmx2;
-                h->pred4x4  [VERT_PRED          ] = ff_pred4x4_vertical_vp8_mmxext;
+                h->pred16x16[PLANE_PRED8x8      ] = ff_pred16x16_tm_vp8_8_mmx2;
+                h->pred8x8  [DC_PRED8x8         ] = ff_pred8x8_dc_rv40_8_mmxext;
+                h->pred8x8  [PLANE_PRED8x8      ] = ff_pred8x8_tm_vp8_8_mmx2;
+                h->pred4x4  [TM_VP8_PRED        ] = ff_pred4x4_tm_vp8_8_mmx2;
+                h->pred4x4  [VERT_PRED          ] = ff_pred4x4_vertical_vp8_8_mmxext;
             } else {
                 if (chroma_format_idc == 1)
-                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2;
+                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmx2;
                 if (codec_id == AV_CODEC_ID_SVQ3) {
-                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_svq3_mmx2;
+                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_svq3_8_mmx2;
                 } else if (codec_id == AV_CODEC_ID_RV40) {
-                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_rv40_mmx2;
+                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_rv40_8_mmx2;
                 } else {
-                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_h264_mmx2;
+                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_h264_8_mmx2;
                 }
             }
         }
 
         if (EXTERNAL_SSE(mm_flags)) {
-            h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse;
+            h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_sse;
         }
 
         if (EXTERNAL_SSE2(mm_flags)) {
-            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_sse2;
-            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_sse2;
-            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_sse2;
-            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_sse2;
-            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_sse2;
-            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_sse2;
+            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_sse2;
+            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_sse2;
+            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2;
+            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_sse2;
+            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_sse2;
+            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_sse2;
             if (codec_id == AV_CODEC_ID_VP8) {
-                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_sse2;
-                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_sse2;
+                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_sse2;
+                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_sse2;
             } else {
                 if (chroma_format_idc == 1)
-                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_sse2;
+                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_sse2;
                 if (codec_id == AV_CODEC_ID_SVQ3) {
-                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2;
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_sse2;
                 } else if (codec_id == AV_CODEC_ID_RV40) {
-                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_sse2;
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_sse2;
                 } else {
-                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_sse2;
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_sse2;
                 }
             }
         }
 
         if (EXTERNAL_SSSE3(mm_flags)) {
-            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_ssse3;
-            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_ssse3;
+            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_8_ssse3;
+            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_ssse3;
             if (chroma_format_idc == 1)
-                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_ssse3;
-            h->pred8x8l [TOP_DC_PRED          ] = ff_pred8x8l_top_dc_ssse3;
-            h->pred8x8l [DC_PRED              ] = ff_pred8x8l_dc_ssse3;
-            h->pred8x8l [HOR_PRED             ] = ff_pred8x8l_horizontal_ssse3;
-            h->pred8x8l [VERT_PRED            ] = ff_pred8x8l_vertical_ssse3;
-            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_ssse3;
-            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_ssse3;
-            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_ssse3;
-            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_ssse3;
-            h->pred8x8l [HOR_UP_PRED          ] = ff_pred8x8l_horizontal_up_ssse3;
-            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_ssse3;
+                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_ssse3;
+            h->pred8x8l [TOP_DC_PRED          ] = ff_pred8x8l_top_dc_8_ssse3;
+            h->pred8x8l [DC_PRED              ] = ff_pred8x8l_dc_8_ssse3;
+            h->pred8x8l [HOR_PRED             ] = ff_pred8x8l_horizontal_8_ssse3;
+            h->pred8x8l [VERT_PRED            ] = ff_pred8x8l_vertical_8_ssse3;
+            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_ssse3;
+            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_ssse3;
+            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_ssse3;
+            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_ssse3;
+            h->pred8x8l [HOR_UP_PRED          ] = ff_pred8x8l_horizontal_up_8_ssse3;
+            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_ssse3;
             if (codec_id == AV_CODEC_ID_VP8) {
-                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_ssse3;
-                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_ssse3;
+                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_ssse3;
+                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_8_ssse3;
             } else {
                 if (chroma_format_idc == 1)
-                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3;
+                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_ssse3;
                 if (codec_id == AV_CODEC_ID_SVQ3) {
-                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3;
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_ssse3;
                 } else if (codec_id == AV_CODEC_ID_RV40) {
-                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_ssse3;
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_ssse3;
                 } else {
-                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_ssse3;
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_ssse3;
                 }
             }
         }
diff --git a/libavformat/nut.c b/libavformat/nut.c
index eaf32e73c5..13f0091ca1 100644
--- a/libavformat/nut.c
+++ b/libavformat/nut.c
@@ -115,6 +115,34 @@ const AVCodecTag ff_nut_video_tags[] = {
     { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '4', 10 ,  8 ) },
     { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '4',  0 ,  8 ) },
     { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '2',  0 ,  8 ) },
+
+    { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '1',  0 ,  9 ) },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG( 9 ,  0 , '1', 'Y') },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '4', 11 ,  9 ) },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG( 9 , 11 , '4', 'Y') },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '4', 10 ,  9 ) },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG( 9 , 10 , '4', 'Y') },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '4',  0 ,  9 ) },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG( 9 ,  0 , '4', 'Y') },
+
+    { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '1',  0 , 10 ) },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG(10 ,  0 , '1', 'Y') },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '4', 11 , 10 ) },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG(10 , 11 , '4', 'Y') },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '4', 10 , 10 ) },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG(10 , 10 , '4', 'Y') },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '4',  0 , 10 ) },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG(10 ,  0 , '4', 'Y') },
+
+    { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '1',  0 , 16 ) },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG(16 ,  0 , '1', 'Y') },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '4', 11 , 16 ) },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG(16 , 11 , '4', 'Y') },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '4', 10 , 16 ) },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG(16 , 10 , '4', 'Y') },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG('Y', '4',  0 , 16 ) },
+    { AV_CODEC_ID_RAWVIDEO, MKTAG(16 ,  0 , '4', 'Y') },
+
     { AV_CODEC_ID_NONE    , 0                         }
 };
 
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index 3b5f2643c2..6ee7024d24 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -609,6 +609,240 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         },
         .flags = PIX_FMT_PLANAR,
     },
+    [AV_PIX_FMT_YUVA420P9BE] = {
+        .name = "yuva420p9be",
+        .nb_components = 4,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .comp = {
+            { 0, 1, 1, 0, 8 },        /* Y */
+            { 1, 1, 1, 0, 8 },        /* U */
+            { 2, 1, 1, 0, 8 },        /* V */
+            { 3, 1, 1, 0, 8 },        /* A */
+        },
+        .flags = PIX_FMT_BE | PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA420P9LE] = {
+        .name = "yuva420p9le",
+        .nb_components = 4,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .comp = {
+            { 0, 1, 1, 0, 8 },        /* Y */
+            { 1, 1, 1, 0, 8 },        /* U */
+            { 2, 1, 1, 0, 8 },        /* V */
+            { 3, 1, 1, 0, 8 },        /* A */
+        },
+        .flags = PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA422P9BE] = {
+        .name = "yuva422p9be",
+        .nb_components = 4,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 1, 1, 0, 8 },        /* Y */
+            { 1, 1, 1, 0, 8 },        /* U */
+            { 2, 1, 1, 0, 8 },        /* V */
+            { 3, 1, 1, 0, 8 },        /* A */
+        },
+        .flags = PIX_FMT_BE | PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA422P9LE] = {
+        .name = "yuva422p9le",
+        .nb_components = 4,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 1, 1, 0, 8 },        /* Y */
+            { 1, 1, 1, 0, 8 },        /* U */
+            { 2, 1, 1, 0, 8 },        /* V */
+            { 3, 1, 1, 0, 8 },        /* A */
+        },
+        .flags = PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA444P9BE] = {
+        .name = "yuva444p9be",
+        .nb_components = 4,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 1, 1, 0, 8 },        /* Y */
+            { 1, 1, 1, 0, 8 },        /* U */
+            { 2, 1, 1, 0, 8 },        /* V */
+            { 3, 1, 1, 0, 8 },        /* A */
+        },
+        .flags = PIX_FMT_BE | PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA444P9LE] = {
+        .name = "yuva444p9le",
+        .nb_components = 4,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 1, 1, 0, 8 },        /* Y */
+            { 1, 1, 1, 0, 8 },        /* U */
+            { 2, 1, 1, 0, 8 },        /* V */
+            { 3, 1, 1, 0, 8 },        /* A */
+        },
+        .flags = PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA420P10BE] = {
+        .name = "yuva420p10be",
+        .nb_components = 4,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .comp = {
+            { 0, 1, 1, 0, 9 },        /* Y */
+            { 1, 1, 1, 0, 9 },        /* U */
+            { 2, 1, 1, 0, 9 },        /* V */
+            { 3, 1, 1, 0, 9 },        /* A */
+        },
+        .flags = PIX_FMT_BE | PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA420P10LE] = {
+        .name = "yuva420p10le",
+        .nb_components = 4,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .comp = {
+            { 0, 1, 1, 0, 15 },        /* Y */
+            { 1, 1, 1, 0, 15 },        /* U */
+            { 2, 1, 1, 0, 15 },        /* V */
+            { 3, 1, 1, 0, 15 },        /* A */
+        },
+        .flags = PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA422P10BE] = {
+        .name = "yuva422p10be",
+        .nb_components = 4,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 1, 1, 0, 15 },        /* Y */
+            { 1, 1, 1, 0, 15 },        /* U */
+            { 2, 1, 1, 0, 15 },        /* V */
+            { 3, 1, 1, 0, 15 },        /* A */
+        },
+        .flags = PIX_FMT_BE | PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA422P10LE] = {
+        .name = "yuva422p10le",
+        .nb_components = 4,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 1, 1, 0, 15 },        /* Y */
+            { 1, 1, 1, 0, 15 },        /* U */
+            { 2, 1, 1, 0, 15 },        /* V */
+            { 3, 1, 1, 0, 15 },        /* A */
+        },
+        .flags = PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA444P10BE] = {
+        .name = "yuva444p10be",
+        .nb_components = 4,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 1, 1, 0, 15 },        /* Y */
+            { 1, 1, 1, 0, 15 },        /* U */
+            { 2, 1, 1, 0, 15 },        /* V */
+            { 3, 1, 1, 0, 15 },        /* A */
+        },
+        .flags = PIX_FMT_BE | PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA444P10LE] = {
+        .name = "yuva444p10le",
+        .nb_components = 4,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 1, 1, 0, 15 },        /* Y */
+            { 1, 1, 1, 0, 15 },        /* U */
+            { 2, 1, 1, 0, 15 },        /* V */
+            { 3, 1, 1, 0, 15 },        /* A */
+        },
+        .flags = PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA420P16BE] = {
+        .name = "yuva420p16be",
+        .nb_components = 4,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .comp = {
+            { 0, 1, 1, 0, 15 },        /* Y */
+            { 1, 1, 1, 0, 15 },        /* U */
+            { 2, 1, 1, 0, 15 },        /* V */
+            { 3, 1, 1, 0, 15 },        /* A */
+        },
+        .flags = PIX_FMT_BE | PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA420P16LE] = {
+        .name = "yuva420p16le",
+        .nb_components = 4,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .comp = {
+            { 0, 1, 1, 0, 15 },        /* Y */
+            { 1, 1, 1, 0, 15 },        /* U */
+            { 2, 1, 1, 0, 15 },        /* V */
+            { 3, 1, 1, 0, 15 },        /* A */
+        },
+        .flags = PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA422P16BE] = {
+        .name = "yuva422p16be",
+        .nb_components = 4,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 1, 1, 0, 15 },        /* Y */
+            { 1, 1, 1, 0, 15 },        /* U */
+            { 2, 1, 1, 0, 15 },        /* V */
+            { 3, 1, 1, 0, 15 },        /* A */
+        },
+        .flags = PIX_FMT_BE | PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA422P16LE] = {
+        .name = "yuva422p16le",
+        .nb_components = 4,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 1, 1, 0, 15 },        /* Y */
+            { 1, 1, 1, 0, 15 },        /* U */
+            { 2, 1, 1, 0, 15 },        /* V */
+            { 3, 1, 1, 0, 15 },        /* A */
+        },
+        .flags = PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA444P16BE] = {
+        .name = "yuva444p16be",
+        .nb_components = 4,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 1, 1, 0, 15 },        /* Y */
+            { 1, 1, 1, 0, 15 },        /* U */
+            { 2, 1, 1, 0, 15 },        /* V */
+            { 3, 1, 1, 0, 15 },        /* A */
+        },
+        .flags = PIX_FMT_BE | PIX_FMT_PLANAR,
+    },
+    [AV_PIX_FMT_YUVA444P16LE] = {
+        .name = "yuva444p16le",
+        .nb_components = 4,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 1, 1, 0, 15 },        /* Y */
+            { 1, 1, 1, 0, 15 },        /* U */
+            { 2, 1, 1, 0, 15 },        /* V */
+            { 3, 1, 1, 0, 15 },        /* A */
+        },
+        .flags = PIX_FMT_PLANAR,
+    },
     [AV_PIX_FMT_VDPAU_H264] = {
         .name = "vdpau_h264",
         .log2_chroma_w = 1,
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index ef7a16ae06..8903b60921 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -181,6 +181,25 @@ enum AVPixelFormat {
     AV_PIX_FMT_YUVA422P_LIBAV,  ///< planar YUV 4:2:2 24bpp, (1 Cr & Cb sample per 2x1 Y & A samples)
     AV_PIX_FMT_YUVA444P_LIBAV,  ///< planar YUV 4:4:4 32bpp, (1 Cr & Cb sample per 1x1 Y & A samples)
 
+    AV_PIX_FMT_YUVA420P9BE,  ///< planar YUV 4:2:0 22.5bpp, (1 Cr & Cb sample per 2x2 Y & A samples), big endian
+    AV_PIX_FMT_YUVA420P9LE,  ///< planar YUV 4:2:0 22.5bpp, (1 Cr & Cb sample per 2x2 Y & A samples), little endian
+    AV_PIX_FMT_YUVA422P9BE,  ///< planar YUV 4:2:2 27bpp, (1 Cr & Cb sample per 2x1 Y & A samples), big endian
+    AV_PIX_FMT_YUVA422P9LE,  ///< planar YUV 4:2:2 27bpp, (1 Cr & Cb sample per 2x1 Y & A samples), little endian
+    AV_PIX_FMT_YUVA444P9BE,  ///< planar YUV 4:4:4 36bpp, (1 Cr & Cb sample per 1x1 Y & A samples), big endian
+    AV_PIX_FMT_YUVA444P9LE,  ///< planar YUV 4:4:4 36bpp, (1 Cr & Cb sample per 1x1 Y & A samples), little endian
+    AV_PIX_FMT_YUVA420P10BE, ///< planar YUV 4:2:0 25bpp, (1 Cr & Cb sample per 2x2 Y & A samples, big endian)
+    AV_PIX_FMT_YUVA420P10LE, ///< planar YUV 4:2:0 25bpp, (1 Cr & Cb sample per 2x2 Y & A samples, little endian)
+    AV_PIX_FMT_YUVA422P10BE, ///< planar YUV 4:2:2 30bpp, (1 Cr & Cb sample per 2x1 Y & A samples, big endian)
+    AV_PIX_FMT_YUVA422P10LE, ///< planar YUV 4:2:2 30bpp, (1 Cr & Cb sample per 2x1 Y & A samples, little endian)
+    AV_PIX_FMT_YUVA444P10BE, ///< planar YUV 4:4:4 40bpp, (1 Cr & Cb sample per 1x1 Y & A samples, big endian)
+    AV_PIX_FMT_YUVA444P10LE, ///< planar YUV 4:4:4 40bpp, (1 Cr & Cb sample per 1x1 Y & A samples, little endian)
+    AV_PIX_FMT_YUVA420P16BE, ///< planar YUV 4:2:0 40bpp, (1 Cr & Cb sample per 2x2 Y & A samples, big endian)
+    AV_PIX_FMT_YUVA420P16LE, ///< planar YUV 4:2:0 40bpp, (1 Cr & Cb sample per 2x2 Y & A samples, little endian)
+    AV_PIX_FMT_YUVA422P16BE, ///< planar YUV 4:2:2 48bpp, (1 Cr & Cb sample per 2x1 Y & A samples, big endian)
+    AV_PIX_FMT_YUVA422P16LE, ///< planar YUV 4:2:2 48bpp, (1 Cr & Cb sample per 2x1 Y & A samples, little endian)
+    AV_PIX_FMT_YUVA444P16BE, ///< planar YUV 4:4:4 64bpp, (1 Cr & Cb sample per 1x1 Y & A samples, big endian)
+    AV_PIX_FMT_YUVA444P16LE, ///< planar YUV 4:4:4 64bpp, (1 Cr & Cb sample per 1x1 Y & A samples, little endian)
+
 #ifndef AV_PIX_FMT_ABI_GIT_MASTER
     AV_PIX_FMT_RGBA64BE=0x123,  ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
     AV_PIX_FMT_RGBA64LE,  ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
@@ -274,6 +293,16 @@ enum AVPixelFormat {
 #define AV_PIX_FMT_GBRP14    AV_PIX_FMT_NE(GBRP14BE,    GBRP14LE)
 #define AV_PIX_FMT_GBRP16    AV_PIX_FMT_NE(GBRP16BE,    GBRP16LE)
 
+#define AV_PIX_FMT_YUVA420P9  AV_PIX_FMT_NE(YUVA420P9BE , YUVA420P9LE)
+#define AV_PIX_FMT_YUVA422P9  AV_PIX_FMT_NE(YUVA422P9BE , YUVA422P9LE)
+#define AV_PIX_FMT_YUVA444P9  AV_PIX_FMT_NE(YUVA444P9BE , YUVA444P9LE)
+#define AV_PIX_FMT_YUVA420P10 AV_PIX_FMT_NE(YUVA420P10BE, YUVA420P10LE)
+#define AV_PIX_FMT_YUVA422P10 AV_PIX_FMT_NE(YUVA422P10BE, YUVA422P10LE)
+#define AV_PIX_FMT_YUVA444P10 AV_PIX_FMT_NE(YUVA444P10BE, YUVA444P10LE)
+#define AV_PIX_FMT_YUVA420P16 AV_PIX_FMT_NE(YUVA420P16BE, YUVA420P16LE)
+#define AV_PIX_FMT_YUVA422P16 AV_PIX_FMT_NE(YUVA422P16BE, YUVA422P16LE)
+#define AV_PIX_FMT_YUVA444P16 AV_PIX_FMT_NE(YUVA444P16BE, YUVA444P16LE)
+
 #if FF_API_PIX_FMT
 #define PixelFormat AVPixelFormat
 
diff --git a/libswscale/input.c b/libswscale/input.c
index 6d76068f9a..d10b4f1062 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -935,6 +935,16 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
     case AV_PIX_FMT_YUV420P14LE:
     case AV_PIX_FMT_YUV420P16LE:
     case AV_PIX_FMT_YUV422P16LE:
+
+    case AV_PIX_FMT_YUVA444P9LE:
+    case AV_PIX_FMT_YUVA422P9LE:
+    case AV_PIX_FMT_YUVA420P9LE:
+    case AV_PIX_FMT_YUVA444P10LE:
+    case AV_PIX_FMT_YUVA422P10LE:
+    case AV_PIX_FMT_YUVA420P10LE:
+    case AV_PIX_FMT_YUVA420P16LE:
+    case AV_PIX_FMT_YUVA422P16LE:
+    case AV_PIX_FMT_YUVA444P16LE:
     case AV_PIX_FMT_YUV444P16LE:
         c->chrToYV12 = bswap16UV_c;
         break;
@@ -954,6 +964,16 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
     case AV_PIX_FMT_YUV420P16BE:
     case AV_PIX_FMT_YUV422P16BE:
     case AV_PIX_FMT_YUV444P16BE:
+
+    case AV_PIX_FMT_YUVA444P9BE:
+    case AV_PIX_FMT_YUVA422P9BE:
+    case AV_PIX_FMT_YUVA420P9BE:
+    case AV_PIX_FMT_YUVA444P10BE:
+    case AV_PIX_FMT_YUVA422P10BE:
+    case AV_PIX_FMT_YUVA420P10BE:
+    case AV_PIX_FMT_YUVA420P16BE:
+    case AV_PIX_FMT_YUVA422P16BE:
+    case AV_PIX_FMT_YUVA444P16BE:
         c->chrToYV12 = bswap16UV_c;
         break;
 #endif
@@ -1165,6 +1185,16 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
     case AV_PIX_FMT_YUV420P16LE:
     case AV_PIX_FMT_YUV422P16LE:
     case AV_PIX_FMT_YUV444P16LE:
+
+    case AV_PIX_FMT_YUVA444P9LE:
+    case AV_PIX_FMT_YUVA422P9LE:
+    case AV_PIX_FMT_YUVA420P9LE:
+    case AV_PIX_FMT_YUVA444P10LE:
+    case AV_PIX_FMT_YUVA422P10LE:
+    case AV_PIX_FMT_YUVA420P10LE:
+    case AV_PIX_FMT_YUVA420P16LE:
+    case AV_PIX_FMT_YUVA422P16LE:
+    case AV_PIX_FMT_YUVA444P16LE:
     case AV_PIX_FMT_GRAY16LE:
         c->lumToYV12 = bswap16Y_c;
         break;
@@ -1184,6 +1214,16 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
     case AV_PIX_FMT_YUV420P16BE:
     case AV_PIX_FMT_YUV422P16BE:
     case AV_PIX_FMT_YUV444P16BE:
+
+    case AV_PIX_FMT_YUVA444P9BE:
+    case AV_PIX_FMT_YUVA422P9BE:
+    case AV_PIX_FMT_YUVA420P9BE:
+    case AV_PIX_FMT_YUVA444P10BE:
+    case AV_PIX_FMT_YUVA422P10BE:
+    case AV_PIX_FMT_YUVA420P10BE:
+    case AV_PIX_FMT_YUVA420P16BE:
+    case AV_PIX_FMT_YUVA422P16BE:
+    case AV_PIX_FMT_YUVA444P16BE:
     case AV_PIX_FMT_GRAY16BE:
         c->lumToYV12 = bswap16Y_c;
         break;
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 37b7712e22..71fb9899c4 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -62,6 +62,28 @@ static av_always_inline void fillPlane(uint8_t *plane, int stride, int width,
     }
 }
 
+static void fill_plane9or10(uint8_t *plane, int stride, int width,
+                            int height, int y, uint8_t val,
+                            const int dst_depth, const int big_endian)
+{
+    int i, j;
+    uint16_t *dst = (uint16_t *) (plane + stride * y);
+#define FILL8TO9_OR_10(wfunc) \
+    for (i = 0; i < height; i++) { \
+        for (j = 0; j < width; j++) { \
+            wfunc(&dst[j], (val << (dst_depth - 8)) |  \
+                               (val >> (16 - dst_depth))); \
+        } \
+        dst += stride / 2; \
+    }
+    if (big_endian) {
+        FILL8TO9_OR_10(AV_WB16);
+    } else {
+        FILL8TO9_OR_10(AV_WL16);
+    }
+}
+
+
 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
                            const uint8_t *_src, const int16_t *filter,
                            const int32_t *filterPos, int filterSize)
@@ -660,8 +682,20 @@ static int swScale(SwsContext *c, const uint8_t *src[],
         }
     }
 
-    if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
-        fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255);
+    if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf) {
+        int length = dstW;
+        int height = dstY - lastDstY;
+        if (is16BPS(c->dstFormat))
+            length *= 2;
+
+        if (is9_OR_10BPS(dstFormat)) {
+            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
+            fill_plane9or10(dst[3], dstStride[3], length, height, lastDstY,
+                            255, desc->comp[3].depth_minus1 + 1,
+                            isBE(dstFormat));
+        } else
+            fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255);
+    }
 
 #if HAVE_MMXEXT_INLINE
     if (av_get_cpu_flags() & AV_CPU_FLAG_MMXEXT)
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 0fe974f054..08cc2ed146 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -140,11 +140,11 @@ static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
 }
 
 static void fillPlane16(uint8_t *plane, int stride, int width, int height, int y,
-                      int alpha, int bits)
+                      int alpha, int bits, const int big_endian)
 {
     int i, j;
     uint8_t *ptr = plane + stride * y;
-    int v = alpha ? -1 : (1<<bits);
+    int v = alpha ? 0xFFFF>>(15-bits) : (1<<bits);
     for (i = 0; i < height; i++) {
         for (j = 0; j < width; j++) {
             AV_WN16(ptr+2*j, v);
@@ -153,6 +153,27 @@ static void fillPlane16(uint8_t *plane, int stride, int width, int height, int y
     }
 }
 
+static void fill_plane9or10(uint8_t *plane, int stride, int width,
+                            int height, int y, uint8_t val,
+                            const int dst_depth, const int big_endian)
+{
+    int i, j;
+    uint16_t *dst = (uint16_t *) (plane + stride * y);
+#define FILL8TO9_OR_10(wfunc) \
+    for (i = 0; i < height; i++) { \
+        for (j = 0; j < width; j++) { \
+            wfunc(&dst[j], (val << (dst_depth - 8)) |  \
+                               (val >> (16 - dst_depth))); \
+        } \
+        dst += stride / 2; \
+    }
+    if (big_endian) {
+        FILL8TO9_OR_10(AV_WB16);
+    } else {
+        FILL8TO9_OR_10(AV_WL16);
+    }
+}
+
 static void copyPlane(const uint8_t *src, int srcStride,
                       int srcSliceY, int srcSliceH, int width,
                       uint8_t *dst, int dstStride)
@@ -780,13 +801,28 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
         // ignore palette for GRAY8
         if (plane == 1 && !dst[2]) continue;
         if (!src[plane] || (plane == 1 && !src[2])) {
+#if 1
             if (is16BPS(c->dstFormat) || isNBPS(c->dstFormat)) {
                 fillPlane16(dst[plane], dstStride[plane], length, height, y,
-                        plane == 3, desc_dst->comp[plane].depth_minus1);
+                        plane == 3, desc_dst->comp[plane].depth_minus1,
+                        isBE(c->dstFormat));
             } else {
                 fillPlane(dst[plane], dstStride[plane], length, height, y,
                         (plane == 3) ? 255 : 128);
             }
+#else
+            int val = (plane == 3) ? 255 : 128;
+            if (is16BPS(c->dstFormat))
+                length *= 2;
+            if (is9_OR_10BPS(c->dstFormat)) {
+                fill_plane9or10(dst[plane], dstStride[plane],
+                                length, height, y, val,
+                                desc_dst->comp[plane].depth_minus1 + 1,
+                                isBE(c->dstFormat));
+            } else
+                fillPlane(dst[plane], dstStride[plane], length, height, y,
+                          val);
+#endif
         } else {
             if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat)
                || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat))
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 21fd718e4c..9ee4e824d1 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -116,6 +116,24 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
     [AV_PIX_FMT_YUVA420P]    = { 1, 1 },
     [AV_PIX_FMT_YUVA422P]    = { 1, 1 },
     [AV_PIX_FMT_YUVA444P]    = { 1, 1 },
+    [AV_PIX_FMT_YUVA420P9BE] = { 1, 1 },
+    [AV_PIX_FMT_YUVA420P9LE] = { 1, 1 },
+    [AV_PIX_FMT_YUVA422P9BE] = { 1, 1 },
+    [AV_PIX_FMT_YUVA422P9LE] = { 1, 1 },
+    [AV_PIX_FMT_YUVA444P9BE] = { 1, 1 },
+    [AV_PIX_FMT_YUVA444P9LE] = { 1, 1 },
+    [AV_PIX_FMT_YUVA420P10BE]= { 1, 1 },
+    [AV_PIX_FMT_YUVA420P10LE]= { 1, 1 },
+    [AV_PIX_FMT_YUVA422P10BE]= { 1, 1 },
+    [AV_PIX_FMT_YUVA422P10LE]= { 1, 1 },
+    [AV_PIX_FMT_YUVA444P10BE]= { 1, 1 },
+    [AV_PIX_FMT_YUVA444P10LE]= { 1, 1 },
+    [AV_PIX_FMT_YUVA420P16BE]= { 1, 1 },
+    [AV_PIX_FMT_YUVA420P16LE]= { 1, 1 },
+    [AV_PIX_FMT_YUVA422P16BE]= { 1, 1 },
+    [AV_PIX_FMT_YUVA422P16LE]= { 1, 1 },
+    [AV_PIX_FMT_YUVA444P16BE]= { 1, 1 },
+    [AV_PIX_FMT_YUVA444P16LE]= { 1, 1 },
     [AV_PIX_FMT_RGB48BE]     = { 1, 1 },
     [AV_PIX_FMT_RGB48LE]     = { 1, 1 },
     [AV_PIX_FMT_RGBA64BE]    = { 1, 0 },
diff --git a/tests/ref/lavfi/pixfmts_copy b/tests/ref/lavfi/pixfmts_copy
index 042f2d5518..d13af3d591 100644
--- a/tests/ref/lavfi/pixfmts_copy
+++ b/tests/ref/lavfi/pixfmts_copy
@@ -74,8 +74,26 @@ yuv444p16le         96a857dba8dc6792c58daec872825b32
 yuv444p9be          07727e5c9040b7f0a17d591288ac330d
 yuv444p9le          4d12d20a68dc28618594c96c2ade4ff4
 yuva420p            3a8c5c142e051367c196f95696e0e2c3
+yuva420p10be        c15ea36665ebb6c6400b2817f8dc0959
+yuva420p10le        ad2d0424033e7acbafa6d58f59b4487e
+yuva420p16be        6afcf758f4b66c0b4173c942d42212d7
+yuva420p16le        13e195aa96329eb49921b6f9f07b875c
+yuva420p9be         bfbc86280eb417d53c3e09df1d6628b2
+yuva420p9le         78f5593bf51a31841ef83df41d0316eb
 yuva422p            45ae66d6f69fd5b77e6831e98d228bf4
+yuva422p10be        90ce250a517843b3e8a1ac0f4fdad733
+yuva422p10le        c74cfda8934e3bf86940b7a08c809b35
+yuva422p16be        c3f7354b6013b43439e02aa02be5fe69
+yuva422p16le        a7ccc43820683ab15061d14cf8efce6c
+yuva422p9be         14c55a16d19499b54b4341f135d3e558
+yuva422p9le         a8bf168e5d2709222192d0aff46b1373
 yuva444p            86b05da54db8c7e8cf5b6638e19c6fc5
+yuva444p10be        bea827ff82f229145a016954120b731f
+yuva444p10le        c51b0554cfba0fabacf979683dceee95
+yuva444p16be        52a9591ec0d5059e49b1b2803f8582aa
+yuva444p16le        a9272ac197e4a4195662ce90f533976c
+yuva444p9be         f72f646ef07cdab613420585aba041ac
+yuva444p9le         6d431b0a27bf4f86ea44ef5f14247a01
 yuvj420p            73661456012f20cda81207b14bb0c0a5
 yuvj422p            aa97862b57f47c5a6506156e9aaf129a
 yuvj440p            ff8b9884a49d546b035f5d2ac1e673df
diff --git a/tests/ref/lavfi/pixfmts_null b/tests/ref/lavfi/pixfmts_null
index 042f2d5518..d13af3d591 100644
--- a/tests/ref/lavfi/pixfmts_null
+++ b/tests/ref/lavfi/pixfmts_null
@@ -74,8 +74,26 @@ yuv444p16le         96a857dba8dc6792c58daec872825b32
 yuv444p9be          07727e5c9040b7f0a17d591288ac330d
 yuv444p9le          4d12d20a68dc28618594c96c2ade4ff4
 yuva420p            3a8c5c142e051367c196f95696e0e2c3
+yuva420p10be        c15ea36665ebb6c6400b2817f8dc0959
+yuva420p10le        ad2d0424033e7acbafa6d58f59b4487e
+yuva420p16be        6afcf758f4b66c0b4173c942d42212d7
+yuva420p16le        13e195aa96329eb49921b6f9f07b875c
+yuva420p9be         bfbc86280eb417d53c3e09df1d6628b2
+yuva420p9le         78f5593bf51a31841ef83df41d0316eb
 yuva422p            45ae66d6f69fd5b77e6831e98d228bf4
+yuva422p10be        90ce250a517843b3e8a1ac0f4fdad733
+yuva422p10le        c74cfda8934e3bf86940b7a08c809b35
+yuva422p16be        c3f7354b6013b43439e02aa02be5fe69
+yuva422p16le        a7ccc43820683ab15061d14cf8efce6c
+yuva422p9be         14c55a16d19499b54b4341f135d3e558
+yuva422p9le         a8bf168e5d2709222192d0aff46b1373
 yuva444p            86b05da54db8c7e8cf5b6638e19c6fc5
+yuva444p10be        bea827ff82f229145a016954120b731f
+yuva444p10le        c51b0554cfba0fabacf979683dceee95
+yuva444p16be        52a9591ec0d5059e49b1b2803f8582aa
+yuva444p16le        a9272ac197e4a4195662ce90f533976c
+yuva444p9be         f72f646ef07cdab613420585aba041ac
+yuva444p9le         6d431b0a27bf4f86ea44ef5f14247a01
 yuvj420p            73661456012f20cda81207b14bb0c0a5
 yuvj422p            aa97862b57f47c5a6506156e9aaf129a
 yuvj440p            ff8b9884a49d546b035f5d2ac1e673df
diff --git a/tests/ref/lavfi/pixfmts_pixdesctest b/tests/ref/lavfi/pixfmts_pixdesctest
index 918efbc2ef..67e5925ce4 100644
--- a/tests/ref/lavfi/pixfmts_pixdesctest
+++ b/tests/ref/lavfi/pixfmts_pixdesctest
@@ -74,8 +74,26 @@ yuv444p16le         96a857dba8dc6792c58daec872825b32
 yuv444p9be          07727e5c9040b7f0a17d591288ac330d
 yuv444p9le          4d12d20a68dc28618594c96c2ade4ff4
 yuva420p            3a8c5c142e051367c196f95696e0e2c3
+yuva420p10be        a3fd7193dc4abb551ab7f88b7e366d33
+yuva420p10le        ad2d0424033e7acbafa6d58f59b4487e
+yuva420p16be        6afcf758f4b66c0b4173c942d42212d7
+yuva420p16le        13e195aa96329eb49921b6f9f07b875c
+yuva420p9be         cb12ffe4ea54b118020b97d7bc0c7fe5
+yuva420p9le         78f5593bf51a31841ef83df41d0316eb
 yuva422p            45ae66d6f69fd5b77e6831e98d228bf4
+yuva422p10be        90ce250a517843b3e8a1ac0f4fdad733
+yuva422p10le        c74cfda8934e3bf86940b7a08c809b35
+yuva422p16be        c3f7354b6013b43439e02aa02be5fe69
+yuva422p16le        a7ccc43820683ab15061d14cf8efce6c
+yuva422p9be         14c55a16d19499b54b4341f135d3e558
+yuva422p9le         a8bf168e5d2709222192d0aff46b1373
 yuva444p            86b05da54db8c7e8cf5b6638e19c6fc5
+yuva444p10be        bea827ff82f229145a016954120b731f
+yuva444p10le        c51b0554cfba0fabacf979683dceee95
+yuva444p16be        52a9591ec0d5059e49b1b2803f8582aa
+yuva444p16le        a9272ac197e4a4195662ce90f533976c
+yuva444p9be         f72f646ef07cdab613420585aba041ac
+yuva444p9le         6d431b0a27bf4f86ea44ef5f14247a01
 yuvj420p            73661456012f20cda81207b14bb0c0a5
 yuvj422p            aa97862b57f47c5a6506156e9aaf129a
 yuvj440p            ff8b9884a49d546b035f5d2ac1e673df
diff --git a/tests/ref/lavfi/pixfmts_scale b/tests/ref/lavfi/pixfmts_scale
index 3713142d86..76d7d34666 100644
--- a/tests/ref/lavfi/pixfmts_scale
+++ b/tests/ref/lavfi/pixfmts_scale
@@ -74,8 +74,26 @@ yuv444p16le         f8bac16bf1f2afbd3626e07bcc815a9f
 yuv444p9be          db739906e3ae3b8792cdc5a0c3387565
 yuv444p9le          693b8d30958ef1a37296b1690b4b36d2
 yuva420p            df46b738bdaf30d3a7f880b5ae45b092
+yuva420p10be        8e5f3b069fdf2f0c14f49ad827991092
+yuva420p10le        e6915376ad7d0f2be9d50099e5ee33ef
+yuva420p16be        aa8ca29a93936c62ef038ca8a57f47d7
+yuva420p16le        ee0cbb31f9cdb897c1982df0caa8ffa0
+yuva420p9be         b8ca5603123aeb953b9d4fc8ec8e35e1
+yuva420p9le         220212a81cda0f2d112f7ae96d532ff9
 yuva422p            650755270debb03d2c03b2e93b64c576
+yuva422p10be        fe0d8c2509d2d23f856093f9aea83cba
+yuva422p10le        c641064c6306c6eaf95b387e5ae08d67
+yuva422p16be        0b8f9385498d2449b18fb15237b0a448
+yuva422p16le        ce22b20deb93b4846a5043aa104f22d1
+yuva422p9be         62e767085855b9605513b39eed787e8e
+yuva422p9le         f69c09e872838fe392dfe1825263d3f1
 yuva444p            72083e0941cc45af9f97b89d3cd16112
+yuva444p10be        d8b6fe8cfb60c6e516563c0a4f5c75a8
+yuva444p10le        4a85b717f21975216144efc11e2f2cda
+yuva444p16be        36a8797545163c24fc0d73f669c3108c
+yuva444p16le        c017c229aacb832a12c2297fb080a7a0
+yuva444p9be         3826abd6dd62d42ad02c9bb610dec561
+yuva444p9le         2c7bfb90f7db9faab6862537801e1143
 yuvj420p            31386dce60a2dcc493da5d0ed9d880df
 yuvj422p            492452e50a3fe66724840cad29be4098
 yuvj440p            7632893e81d3f4f3ace3755f97479897
diff --git a/tests/ref/lavfi/pixfmts_vflip b/tests/ref/lavfi/pixfmts_vflip
index 50a459dec0..45082f79ac 100644
--- a/tests/ref/lavfi/pixfmts_vflip
+++ b/tests/ref/lavfi/pixfmts_vflip
@@ -74,8 +74,26 @@ yuv444p16le         201e3acfa6f7628dfe2a1663de1128b1
 yuv444p9be          6143c321929ade9e0bc93ddea926e936
 yuv444p9le          e43ba2026848ec803fabf74d77c10125
 yuva420p            dc8fd115eaf203a3eac351b92a7d8f18
+yuva420p10be        8dc20d4b654a82680dcb75391f1c8c28
+yuva420p10le        f43a991e8b5fe7e192cf4e0dfee2b6cb
+yuva420p16be        b5c54895e87521f65a298d33bab5eb08
+yuva420p16le        95e208bc6644e23a2126ac5fca085f06
+yuva420p9be         aa122367fc3fde822f812a4b878893f8
+yuva420p9le         4b57b8bd1534743f6740502c74bef385
 yuva422p            6091f9c62a121c09eadb02e9173b2da2
+yuva422p10be        93b6afb2b1d1538b1c87caa040c43f4e
+yuva422p10le        1c9359025ab8d257f0f5296e6b5a5787
+yuva422p16be        3fa019b3d5e6dcb7c8e338837b9cffe1
+yuva422p16le        871f244989e14f5d3eda45abb6b3dfd1
+yuva422p9be         923c816778f782ff8a56bcd1b14ef08d
+yuva422p9le         bd882d40af0369d9ef7891c8e7891024
 yuva444p            9a55e83047abcc7a709f20805070135e
+yuva444p10be        7ae88f488969b527c1348383c0d06ade
+yuva444p10le        79a8dab388eb4d6cd12949b456ef5975
+yuva444p16be        4699a802e8ea3e74e968122980c0b0b0
+yuva444p16le        6f54a8cff38c54a235b92a0f1314e0aa
+yuva444p9be         7472bb4b0c774d5d741035086d5e4330
+yuva444p9le         ae11ddd5a3e8d69a36989f6f2a8897a1
 yuvj420p            200b0332de9944e76c94d2e0699a5a2d
 yuvj422p            a19a89ef145305cf224ef5aa247d075a
 yuvj440p            4240c9348d28af5f3edd0e642002bd2c