From af2ea724951b4b12b4522b462047eebbf9566b84 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 00:46:23 +0100
Subject: [PATCH 01/40] aes: use direct assignments instead of memcpy() or
 loops

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/aes.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/libavutil/aes.c b/libavutil/aes.c
index fc6c4168cd..f3f13bfe41 100644
--- a/libavutil/aes.c
+++ b/libavutil/aes.c
@@ -127,7 +127,7 @@ void av_aes_crypt(AVAES *a, uint8_t *dst_, const uint8_t *src_,
             crypt(a, 0, inv_sbox, dec_multbl);
             if (iv) {
                 addkey(&a->state[0], &a->state[0], iv);
-                memcpy(iv, src, 16);
+                *iv = *src;
             }
             addkey(dst, &a->state[0], &a->round_key[0]);
         } else {
@@ -136,7 +136,7 @@ void av_aes_crypt(AVAES *a, uint8_t *dst_, const uint8_t *src_,
             crypt(a, 2, sbox, enc_multbl);
             addkey(dst, &a->state[0], &a->round_key[0]);
             if (iv)
-                memcpy(iv, dst, 16);
+                *iv = *dst;
         }
         src++;
         dst++;
@@ -221,15 +221,14 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
     if (decrypt) {
         for (i = 1; i < rounds; i++) {
             av_aes_block tmp[3];
-            memcpy(&tmp[2], &a->round_key[i], 16);
+            tmp[2] = a->round_key[i];
             subshift(&tmp[1], 0, sbox);
             mix(tmp, dec_multbl, 1, 3);
-            memcpy(&a->round_key[i], &tmp[0], 16);
+            a->round_key[i] = tmp[0];
         }
     } else {
         for (i = 0; i < (rounds + 1) >> 1; i++) {
-            for (j = 0; j < 16; j++)
-                FFSWAP(int, a->round_key[i].u8[j], a->round_key[rounds-i].u8[j]);
+            FFSWAP(av_aes_block, a->round_key[i], a->round_key[rounds-i]);
         }
     }
 

From 21accb3bb26c874d95796a77feeab049f6f6f80d Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 02:37:31 +0100
Subject: [PATCH 02/40] h264: remove assert() immediately following return
 statement

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/h264_mvpred.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h
index 1359072dc8..f603e7ff38 100644
--- a/libavcodec/h264_mvpred.h
+++ b/libavcodec/h264_mvpred.h
@@ -64,7 +64,6 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
             if(!MB_FIELD
                && IS_INTERLACED(h->left_type[0])){
                 SET_DIAG_MV(*2, >>1, h->left_mb_xy[0]+s->mb_stride, (s->mb_y&1)*2+(i>>5));
-                assert(h->left_mb_xy[0] == h->left_mb_xy[1]);
             }
             if(MB_FIELD
                && !IS_INTERLACED(h->left_type[0])){

From 5d20f19be25c973fe10d0d17db9245002585710d Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 2 Jul 2011 22:10:25 +0100
Subject: [PATCH 03/40] aes: fix invalid array indexing in init code

This makes the code work with clang/x86_32 and removes several warnings.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/aes.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/libavutil/aes.c b/libavutil/aes.c
index f3f13bfe41..8a8bfc2b25 100644
--- a/libavutil/aes.c
+++ b/libavutil/aes.c
@@ -54,6 +54,8 @@ static uint32_t enc_multbl[4][256];
 static uint32_t dec_multbl[4][256];
 #endif
 
+#define ROT(x, s) ((x << s) | (x >> (32-s)))
+
 static inline void addkey(av_aes_block *dst, const av_aes_block *src,
                           const av_aes_block *round_key)
 {
@@ -86,7 +88,6 @@ static void subshift(av_aes_block s0[2], int s, const uint8_t *box)
 
 static inline int mix_core(uint32_t multbl[][256], int a, int b, int c, int d){
 #if CONFIG_SMALL
-#define ROT(x,s) ((x<<s)|(x>>(32-s)))
     return multbl[0][a] ^ ROT(multbl[0][b], 8) ^ ROT(multbl[0][c], 16) ^ ROT(multbl[0][d], 24);
 #else
     return multbl[0][a] ^ multbl[1][b] ^ multbl[2][c] ^ multbl[3][d];
@@ -143,22 +144,29 @@ void av_aes_crypt(AVAES *a, uint8_t *dst_, const uint8_t *src_,
     }
 }
 
-static void init_multbl2(uint8_t tbl[1024], const int c[4],
+static void init_multbl2(uint32_t tbl[][256], const int c[4],
                          const uint8_t *log8, const uint8_t *alog8,
                          const uint8_t *sbox)
 {
-    int i, j;
+    int i;
 
-    for (i = 0; i < 1024; i++) {
-        int x = sbox[i >> 2];
-        if (x)
-            tbl[i] = alog8[log8[x] + log8[c[i & 3]]];
-    }
+    for (i = 0; i < 256; i++) {
+        int x = sbox[i];
+        if (x) {
+            int k, l, m, n;
+            x = log8[x];
+            k = alog8[x + log8[c[0]]];
+            l = alog8[x + log8[c[1]]];
+            m = alog8[x + log8[c[2]]];
+            n = alog8[x + log8[c[3]]];
+            tbl[0][i] = AV_NE(MKBETAG(k,l,m,n), MKTAG(k,l,m,n));
 #if !CONFIG_SMALL
-    for (j = 256; j < 1024; j++)
-        for (i = 0; i < 4; i++)
-            tbl[4*j + i] = tbl[4*j + ((i - 1) & 3) - 1024];
+            tbl[1][i] = ROT(tbl[0][i], 8);
+            tbl[2][i] = ROT(tbl[0][i], 16);
+            tbl[3][i] = ROT(tbl[0][i], 24);
 #endif
+        }
+    }
 }
 
 // this is based on the reference AES code by Paulo Barreto and Vincent Rijmen
@@ -187,9 +195,9 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
             inv_sbox[j] = i;
             sbox[i] = j;
         }
-        init_multbl2(dec_multbl[0], (const int[4]) { 0xe, 0x9, 0xd, 0xb },
+        init_multbl2(dec_multbl, (const int[4]) { 0xe, 0x9, 0xd, 0xb },
                      log8, alog8, inv_sbox);
-        init_multbl2(enc_multbl[0], (const int[4]) { 0x2, 0x1, 0x1, 0x3 },
+        init_multbl2(enc_multbl, (const int[4]) { 0x2, 0x1, 0x1, 0x3 },
                      log8, alog8, sbox);
     }
 

From 050f2b3e7f3a98bc824cfcaf4c825ed4c643e21c Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 02:34:27 +0100
Subject: [PATCH 04/40] mjpeg: remove pointless braces around block of code

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/mjpegdec.c | 40 +++++++++++++++++++---------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index 391d58de6b..3946df6472 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -1520,28 +1520,26 @@ eoi_parser:
                         av_log(avctx, AV_LOG_WARNING, "Found EOI before any SOF, ignoring\n");
                         break;
                     }
-                    {
-                        if (s->interlaced) {
-                            s->bottom_field ^= 1;
-                            /* if not bottom field, do not output image yet */
-                            if (s->bottom_field == !s->interlace_polarity)
-                                goto not_the_end;
-                        }
-                        *picture = *s->picture_ptr;
-                        *data_size = sizeof(AVFrame);
-
-                        if(!s->lossless){
-                            picture->quality= FFMAX3(s->qscale[0], s->qscale[1], s->qscale[2]);
-                            picture->qstride= 0;
-                            picture->qscale_table= s->qscale_table;
-                            memset(picture->qscale_table, picture->quality, (s->width+15)/16);
-                            if(avctx->debug & FF_DEBUG_QP)
-                                av_log(avctx, AV_LOG_DEBUG, "QP: %d\n", picture->quality);
-                            picture->quality*= FF_QP2LAMBDA;
-                        }
-
-                        goto the_end;
+                    if (s->interlaced) {
+                        s->bottom_field ^= 1;
+                        /* if not bottom field, do not output image yet */
+                        if (s->bottom_field == !s->interlace_polarity)
+                            goto not_the_end;
                     }
+                    *picture = *s->picture_ptr;
+                    *data_size = sizeof(AVFrame);
+
+                    if(!s->lossless){
+                        picture->quality= FFMAX3(s->qscale[0], s->qscale[1], s->qscale[2]);
+                        picture->qstride= 0;
+                        picture->qscale_table= s->qscale_table;
+                        memset(picture->qscale_table, picture->quality, (s->width+15)/16);
+                        if(avctx->debug & FF_DEBUG_QP)
+                            av_log(avctx, AV_LOG_DEBUG, "QP: %d\n", picture->quality);
+                        picture->quality*= FF_QP2LAMBDA;
+                    }
+
+                    goto the_end;
                     break;
                 case SOS:
                     if (!s->got_picture) {

From b27565b1432f8bb2c933afa184ace6582ad650ee Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 02:37:03 +0100
Subject: [PATCH 05/40] Remove statements immediately following unconditional
 jumps

This removes a number of compiler warnings.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/cook.c      | 1 -
 libavcodec/flicvideo.c | 1 -
 libavcodec/mjpegdec.c  | 1 -
 libavcodec/pcm.c       | 1 -
 libavcodec/shorten.c   | 3 ---
 libavformat/a64.c      | 1 -
 libavformat/idroqdec.c | 1 -
 libavformat/oma.c      | 1 -
 libavformat/psxstr.c   | 1 -
 libavformat/wc3movie.c | 1 -
 10 files changed, 12 deletions(-)

diff --git a/libavcodec/cook.c b/libavcodec/cook.c
index 84211a6d08..8d1078145e 100644
--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c
@@ -1156,7 +1156,6 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
             default:
                 av_log_ask_for_sample(avctx, "Unknown Cook version.\n");
                 return -1;
-                break;
         }
 
         if(s > 1 && q->subpacket[s].samples_per_channel != q->samples_per_channel) {
diff --git a/libavcodec/flicvideo.c b/libavcodec/flicvideo.c
index ed9972c018..e8c0e14386 100644
--- a/libavcodec/flicvideo.c
+++ b/libavcodec/flicvideo.c
@@ -112,7 +112,6 @@ static av_cold int flic_decode_init(AVCodecContext *avctx)
         case 24 : avctx->pix_fmt = PIX_FMT_BGR24; /* Supposedly BGR, but havent any files to test with */
                   av_log(avctx, AV_LOG_ERROR, "24Bpp FLC/FLX is unsupported due to no test files.\n");
                   return -1;
-                  break;
         default :
                   av_log(avctx, AV_LOG_ERROR, "Unknown FLC/FLX depth of %d Bpp is unsupported.\n",depth);
                   return -1;
diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index 3946df6472..4684190e08 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -1540,7 +1540,6 @@ eoi_parser:
                     }
 
                     goto the_end;
-                    break;
                 case SOS:
                     if (!s->got_picture) {
                         av_log(avctx, AV_LOG_WARNING, "Can not process SOS before SOF, skipping\n");
diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c
index 9100646179..2ad395dd0e 100644
--- a/libavcodec/pcm.c
+++ b/libavcodec/pcm.c
@@ -440,7 +440,6 @@ static int pcm_decode_frame(AVCodecContext *avctx,
         default:
             av_log(avctx, AV_LOG_ERROR, "PCM DVD unsupported sample depth\n");
             return -1;
-            break;
         }
         samples = (short *) dst_int32_t;
         break;
diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index a6e00750e9..e4dfa7c59f 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c
@@ -471,7 +471,6 @@ static int shorten_decode_frame(AVCodecContext *avctx,
                         s->cur_chan = 0;
                         goto frame_done;
                     }
-                    break;
                 }
                 break;
             case FN_VERBATIM:
@@ -489,11 +488,9 @@ static int shorten_decode_frame(AVCodecContext *avctx,
             case FN_QUIT:
                 *data_size = 0;
                 return buf_size;
-                break;
             default:
                 av_log(avctx, AV_LOG_ERROR, "unknown shorten function %d\n", cmd);
                 return -1;
-                break;
         }
     }
 frame_done:
diff --git a/libavformat/a64.c b/libavformat/a64.c
index 0e5576b651..3d313e5fba 100644
--- a/libavformat/a64.c
+++ b/libavformat/a64.c
@@ -55,7 +55,6 @@ static int a64_write_header(struct AVFormatContext *s)
         break;
     default:
         return AVERROR(EINVAL);
-        break;
     }
     avio_write(s->pb, header, 2);
     c->prev_pkt.size = 0;
diff --git a/libavformat/idroqdec.c b/libavformat/idroqdec.c
index 8e991c57d9..d9315966ea 100644
--- a/libavformat/idroqdec.c
+++ b/libavformat/idroqdec.c
@@ -209,7 +209,6 @@ static int roq_read_packet(AVFormatContext *s,
         default:
             av_log(s, AV_LOG_ERROR, "  unknown RoQ chunk (%04X)\n", chunk_type);
             return AVERROR_INVALIDDATA;
-            break;
         }
     }
 
diff --git a/libavformat/oma.c b/libavformat/oma.c
index 6fdf75f17c..0ec81ebefb 100644
--- a/libavformat/oma.c
+++ b/libavformat/oma.c
@@ -149,7 +149,6 @@ static int oma_read_header(AVFormatContext *s,
         default:
             av_log(s, AV_LOG_ERROR, "Unsupported codec %d!\n",buf[32]);
             return -1;
-            break;
     }
 
     st->codec->block_align = framesize;
diff --git a/libavformat/psxstr.c b/libavformat/psxstr.c
index b7a9d3b250..646244238c 100644
--- a/libavformat/psxstr.c
+++ b/libavformat/psxstr.c
@@ -234,7 +234,6 @@ static int str_read_packet(AVFormatContext *s,
             pkt->stream_index =
                 str->channels[channel].audio_stream_index;
             return 0;
-            break;
         default:
             av_log(s, AV_LOG_WARNING, "Unknown sector type %02X\n", sector[0x12]);
             /* drop the sector and move on */
diff --git a/libavformat/wc3movie.c b/libavformat/wc3movie.c
index e57a9bf844..03483de737 100644
--- a/libavformat/wc3movie.c
+++ b/libavformat/wc3movie.c
@@ -152,7 +152,6 @@ static int wc3_read_header(AVFormatContext *s,
                 (uint8_t)fourcc_tag, (uint8_t)(fourcc_tag >> 8), (uint8_t)(fourcc_tag >> 16), (uint8_t)(fourcc_tag >> 24),
                 (uint8_t)fourcc_tag, (uint8_t)(fourcc_tag >> 8), (uint8_t)(fourcc_tag >> 16), (uint8_t)(fourcc_tag >> 24));
             return AVERROR_INVALIDDATA;
-            break;
         }
 
         fourcc_tag = avio_rl32(pb);

From 6728aaf6d8046ec0a82b6955b8bf27bf87358046 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 01:52:37 +0100
Subject: [PATCH 06/40] ffv1: remove unused tables

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/ffv1.c | 74 +----------------------------------------------
 1 file changed, 1 insertion(+), 73 deletions(-)

diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c
index 50f1062ad4..1f1d4d1c62 100644
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -42,25 +42,6 @@
 
 extern const uint8_t ff_log2_run[41];
 
-static const int8_t quant3[256]={
- 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
-};
-
 static const int8_t quant5_10bit[256]={
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -98,42 +79,7 @@ static const int8_t quant5[256]={
 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
 };
-static const int8_t quant7[256]={
- 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
-};
-static const int8_t quant9[256]={
- 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
-};
+
 static const int8_t quant9_10bit[256]={
  0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
@@ -171,24 +117,6 @@ static const int8_t quant11[256]={
 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
 };
-static const int8_t quant13[256]={
- 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
-};
 
 static const uint8_t ver2_state[256]= {
    0,  10,  10,  10,  10,  16,  16,  16,  28,  16,  16,  29,  42,  49,  20,  49,

From c9403419b25e36328cd25dc0855fd946c261a233 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 02:54:58 +0100
Subject: [PATCH 07/40] avidec: simplify convoluted flow in avi_load_index()

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavformat/avidec.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index 871da0bb40..a00b9ced2d 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c
@@ -1241,20 +1241,16 @@ static int avi_load_index(AVFormatContext *s)
                 (tag >> 16) & 0xff,
                 (tag >> 24) & 0xff,
                 size);
-        switch(tag) {
-        case MKTAG('i', 'd', 'x', '1'):
-            if (avi_read_idx1(s, size) < 0)
-                goto skip;
+
+        if (tag == MKTAG('i', 'd', 'x', '1') &&
+            avi_read_idx1(s, size) >= 0) {
             ret = 0;
-                goto the_end;
-            break;
-        default:
-        skip:
-            size += (size & 1);
-            if (avio_skip(pb, size) < 0)
-                goto the_end; // something is wrong here
             break;
         }
+
+        size += (size & 1);
+        if (avio_skip(pb, size) < 0)
+            break; // something is wrong here
     }
  the_end:
     avio_seek(pb, pos, SEEK_SET);

From 904b5d302d4e6b22226d2144c727f58793f3e547 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 12:54:57 +0100
Subject: [PATCH 08/40] fate: add DES test

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 tests/fate2.mak | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/fate2.mak b/tests/fate2.mak
index 5603c314dd..47804c4d8a 100644
--- a/tests/fate2.mak
+++ b/tests/fate2.mak
@@ -231,6 +231,11 @@ FATE_TESTS += fate-base64
 fate-base64: libavutil/base64-test$(EXESUF)
 fate-base64: CMD = run libavutil/base64-test
 
+FATE_TESTS += fate-des
+fate-des: libavutil/des-test$(EXESUF)
+fate-des: CMD = run libavutil/des-test
+fate-des: REF = /dev/null
+
 FATE_TESTS += fate-musepack7
 fate-musepack7: CMD = pcm -i $(SAMPLES)/musepack/inside-mp7.mpc
 fate-musepack7: CMP = oneoff

From 66fe5970abdad0964d0e0d0e100b62e715922886 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 12:55:54 +0100
Subject: [PATCH 09/40] des: reduce number of iterations in test program

Testing a million random keys takes annoying long time.
1000 iterations should be enough.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/des.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/des.c b/libavutil/des.c
index a30ef3ba7c..857ca91319 100644
--- a/libavutil/des.c
+++ b/libavutil/des.c
@@ -402,7 +402,7 @@ int main(void) {
         printf("Partial Monte-Carlo test failed\n");
         return 1;
     }
-    for (i = 0; i < 1000000; i++) {
+    for (i = 0; i < 1000; i++) {
         key[0] = rand64(); key[1] = rand64(); key[2] = rand64();
         data = rand64();
         av_des_init(&d, key, 192, 0);

From 8f175810beba0e4d7fb76c6eb747e5aab76071b4 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 11:20:39 +0100
Subject: [PATCH 10/40] aes: fix for big endian systems

This was missed in 5d20f19 since CONFIG_SMALL was always broken
for big endian.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/aes.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libavutil/aes.c b/libavutil/aes.c
index 8a8bfc2b25..d1fe857914 100644
--- a/libavutil/aes.c
+++ b/libavutil/aes.c
@@ -54,7 +54,11 @@ static uint32_t enc_multbl[4][256];
 static uint32_t dec_multbl[4][256];
 #endif
 
-#define ROT(x, s) ((x << s) | (x >> (32-s)))
+#if HAVE_BIGENDIAN
+#   define ROT(x, s) ((x >> s) | (x << (32-s)))
+#else
+#   define ROT(x, s) ((x << s) | (x >> (32-s)))
+#endif
 
 static inline void addkey(av_aes_block *dst, const av_aes_block *src,
                           const av_aes_block *round_key)

From 10dde477c77e0ac0fecda49fdb1dc71329aa7513 Mon Sep 17 00:00:00 2001
From: Reinhard Tartler <siretart@tauware.de>
Date: Sun, 3 Jul 2011 11:54:24 +0200
Subject: [PATCH 11/40] Update Doxyfile to the format preferred by Doxygen
 1.7.1 (via 'doxygen -u').

This is the version available in Debian stable, so it should be a reasonable
baseline that can be expected to be present on all developer machines.

Moreover, this is the version that is used by the nightly cronjob that
generates the online html version.
---
 Doxyfile | 346 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 283 insertions(+), 63 deletions(-)

diff --git a/Doxyfile b/Doxyfile
index 2502548aed..a4beaba323 100644
--- a/Doxyfile
+++ b/Doxyfile
@@ -1,4 +1,4 @@
-# Doxyfile 1.5.6
+# Doxyfile 1.7.1
 
 # This file describes the settings to be used by the documentation system
 # doxygen (www.doxygen.org) for a project
@@ -54,11 +54,11 @@ CREATE_SUBDIRS         = NO
 # information to generate all constant output in the proper language.
 # The default language is English, other supported languages are:
 # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
-# Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek,
-# Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages),
-# Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish,
-# Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish,
-# and Ukrainian.
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
 
 OUTPUT_LANGUAGE        = English
 
@@ -155,13 +155,6 @@ QT_AUTOBRIEF           = NO
 
 MULTILINE_CPP_IS_BRIEF = NO
 
-# If the DETAILS_AT_TOP tag is set to YES then Doxygen
-# will output the detailed description near the top, like JavaDoc.
-# If set to NO, the detailed description appears after the member
-# documentation.
-
-DETAILS_AT_TOP         = NO
-
 # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
 # member inherits the documentation from any documented member that it
 # re-implements.
@@ -214,6 +207,18 @@ OPTIMIZE_FOR_FORTRAN   = NO
 
 OPTIMIZE_OUTPUT_VHDL   = NO
 
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given extension.
+# Doxygen has a built-in mapping, but you can override or extend it using this
+# tag. The format is ext=language, where ext is a file extension, and language
+# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C,
+# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make
+# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
+# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions
+# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
 # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
 # to include (a tag file for) the STL sources as input, then you should
 # set this tag to YES in order to let doxygen match functions declarations and
@@ -268,6 +273,22 @@ SUBGROUPING            = YES
 
 TYPEDEF_HIDES_STRUCT   = NO
 
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penality.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will rougly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols
+
+SYMBOL_CACHE_SIZE      = 0
+
 #---------------------------------------------------------------------------
 # Build related configuration options
 #---------------------------------------------------------------------------
@@ -366,6 +387,12 @@ HIDE_SCOPE_NAMES       = NO
 
 SHOW_INCLUDE_FILES     = YES
 
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
+# will list include files with double quotes in the documentation
+# rather than with sharp brackets.
+
+FORCE_LOCAL_INCLUDES   = NO
+
 # If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
 # is inserted in the documentation for inline members.
 
@@ -385,6 +412,16 @@ SORT_MEMBER_DOCS       = YES
 
 SORT_BRIEF_DOCS        = NO
 
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
+# will sort the (brief and detailed) documentation of class members so that
+# constructors and destructors are listed first. If set to NO (the default)
+# the constructors will appear in the respective orders defined by
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
+# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
 # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
 # hierarchy of group names into alphabetical order. If set to NO (the default)
 # the group names will appear in their defined order.
@@ -459,7 +496,8 @@ SHOW_DIRECTORIES       = NO
 SHOW_FILES             = YES
 
 # Set the SHOW_NAMESPACES tag to NO to disable the generation of the
-# Namespaces page.  This will remove the Namespaces entry from the Quick Index
+# Namespaces page.
+# This will remove the Namespaces entry from the Quick Index
 # and from the Folder Tree View (if specified). The default is YES.
 
 SHOW_NAMESPACES        = YES
@@ -474,6 +512,15 @@ SHOW_NAMESPACES        = YES
 
 FILE_VERSION_FILTER    =
 
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. The create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option.
+# You can optionally specify a file name after the option, if omitted
+# DoxygenLayout.xml will be used as the name of the layout file.
+
+LAYOUT_FILE            =
+
 #---------------------------------------------------------------------------
 # configuration options related to warning and progress messages
 #---------------------------------------------------------------------------
@@ -577,7 +624,8 @@ EXCLUDE_SYMLINKS       = NO
 # against the file with absolute path, so to exclude all test directories
 # for example use the pattern */test/*
 
-EXCLUDE_PATTERNS       = *.git *.d
+EXCLUDE_PATTERNS       = *.git \
+                         *.d
 
 # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
 # (namespaces, classes, functions, etc.) that should be excluded from the
@@ -591,7 +639,8 @@ EXCLUDE_SYMBOLS        =
 # directories that contain example code fragments that are included (see
 # the \include command).
 
-EXAMPLE_PATH           = libavcodec/ libavformat/
+EXAMPLE_PATH           = libavcodec/ \
+                         libavformat/
 
 # If the value of the EXAMPLE_PATH tag contains directories, you can use the
 # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
@@ -618,14 +667,17 @@ IMAGE_PATH             =
 # by executing (via popen()) the command <filter> <input-file>, where <filter>
 # is the value of the INPUT_FILTER tag, and <input-file> is the name of an
 # input file. Doxygen will then use the output that the filter program writes
-# to standard output.  If FILTER_PATTERNS is specified, this tag will be
+# to standard output.
+# If FILTER_PATTERNS is specified, this tag will be
 # ignored.
 
 INPUT_FILTER           =
 
 # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
-# basis.  Doxygen will compare the file name with each pattern and apply the
-# filter if there is a match.  The filters are a list of the form:
+# basis.
+# Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.
+# The filters are a list of the form:
 # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
 # info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
 # is applied to all files.
@@ -675,7 +727,8 @@ REFERENCES_RELATION    = NO
 # If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
 # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
 # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
-# link to the source code.  Otherwise they will link to the documentstion.
+# link to the source code.
+# Otherwise they will link to the documentation.
 
 REFERENCES_LINK_SOURCE = YES
 
@@ -758,18 +811,50 @@ HTML_FOOTER            =
 
 HTML_STYLESHEET        =
 
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
+# Doxygen will adjust the colors in the stylesheet and background images
+# according to this color. Hue is specified as an angle on a colorwheel,
+# see http://en.wikipedia.org/wiki/Hue for more information.
+# For instance the value 0 represents red, 60 is yellow, 120 is green,
+# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
+# The allowed range is 0 to 359.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
+# the colors in the HTML output. For a value of 0 the output will use
+# grayscales only. A value of 255 will produce the most vivid colors.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
+# the luminance component of the colors in the HTML output. Values below
+# 100 gradually make the output lighter, whereas values above 100 make
+# the output darker. The value divided by 100 is the actual gamma applied,
+# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
+# and 100 does not change the gamma.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting
+# this to NO can help when comparing the output of multiple runs.
+
+HTML_TIMESTAMP         = YES
+
 # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
 # files or namespaces will be aligned in HTML using tables. If set to
 # NO a bullet list will be used.
 
 HTML_ALIGN_MEMBERS     = YES
 
-# If the GENERATE_HTMLHELP tag is set to YES, additional index files
-# will be generated that can be used as input for tools like the
-# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
-# of the generated HTML documentation.
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
 
-GENERATE_HTMLHELP      = NO
+HTML_DYNAMIC_SECTIONS  = NO
 
 # If the GENERATE_DOCSET tag is set to YES, additional index files
 # will be generated that can be used as input for Apple's Xcode 3
@@ -779,6 +864,8 @@ GENERATE_HTMLHELP      = NO
 # directory and running "make install" will install the docset in
 # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
 # it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
 
 GENERATE_DOCSET        = NO
 
@@ -796,13 +883,22 @@ DOCSET_FEEDNAME        = "Doxygen generated docs"
 
 DOCSET_BUNDLE_ID       = org.doxygen.Project
 
-# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
-# documentation will contain sections that can be hidden and shown after the
-# page has loaded. For this to work a browser that supports
-# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
-# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
 
-HTML_DYNAMIC_SECTIONS  = NO
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
 
 # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
 # be used to specify the file name of the resulting .chm file. You
@@ -841,6 +937,76 @@ BINARY_TOC             = NO
 
 TOC_EXPAND             = NO
 
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
+# that can be used as input for Qt's qhelpgenerator to generate a
+# Qt Compressed Help (.qch) of the generated HTML documentation.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
+# add. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
+# Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
+# Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
+#  will be generated, which together with the HTML files, form an Eclipse help
+# plugin. To install this plugin and make it available under the help contents
+# menu in Eclipse, the contents of the directory containing the HTML and XML
+# files needs to be copied into the plugins directory of eclipse. The name of
+# the directory within the plugins directory should be the same as
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
+# the help appears.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have
+# this name.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
 # The DISABLE_INDEX tag can be used to turn on/off the condensed index at
 # top of each HTML page. The value NO (the default) enables the index and
 # the value YES disables it.
@@ -854,27 +1020,30 @@ ENUM_VALUES_PER_LINE   = 4
 
 # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
 # structure should be generated to display hierarchical information.
-# If the tag value is set to FRAME, a side panel will be generated
+# If the tag value is set to YES, a side panel will be generated
 # containing a tree-like index structure (just like the one that
 # is generated for HTML Help). For this to work a browser that supports
-# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
-# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
-# probably better off using the HTML help feature. Other possible values
-# for this tag are: HIERARCHIES, which will generate the Groups, Directories,
-# and Class Hiererachy pages using a tree view instead of an ordered list;
-# ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which
-# disables this behavior completely. For backwards compatibility with previous
-# releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE
-# respectively.
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
 
 GENERATE_TREEVIEW      = NO
 
+# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
+# and Class Hierarchy pages using a tree view instead of an ordered list.
+
+USE_INLINE_TREES       = NO
+
 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
 # used to set the initial width (in pixels) of the frame in which the tree
 # is shown.
 
 TREEVIEW_WIDTH         = 250
 
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
+# links to external symbols imported via tag files in a separate window.
+
+EXT_LINKS_IN_WINDOW    = NO
+
 # Use this tag to change the font size of Latex formulas included
 # as images in the HTML documentation. The default is 10. Note that
 # when you change the font size after a successful doxygen run you need
@@ -883,6 +1052,34 @@ TREEVIEW_WIDTH         = 250
 
 FORMULA_FONTSIZE       = 10
 
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are
+# not supported properly for IE 6.0, but are supported on all modern browsers.
+# Note that when changing this option you need to delete any form_*.png files
+# in the HTML output before the changes have effect.
+
+FORMULA_TRANSPARENT    = YES
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box
+# for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using
+# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
+# (GENERATE_DOCSET) there is already a search function so this one should
+# typically be disabled. For large projects the javascript based search engine
+# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
+
+SEARCHENGINE           = NO
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a PHP enabled web server instead of at the web client
+# using Javascript. Doxygen will generate the search PHP script and index
+# file to put on the web server. The advantage of the server
+# based approach is that it scales better to large projects and allows
+# full text search. The disadvances is that it is more difficult to setup
+# and does not have live searching capabilities.
+
+SERVER_BASED_SEARCH    = NO
+
 #---------------------------------------------------------------------------
 # configuration options related to the LaTeX output
 #---------------------------------------------------------------------------
@@ -900,6 +1097,9 @@ LATEX_OUTPUT           = latex
 
 # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
 # invoked. If left blank `latex' will be used as the default command name.
+# Note that when enabling USE_PDFLATEX this option is only used for
+# generating bitmaps for formulas in the HTML output, but not in the
+# Makefile that is written to the output directory.
 
 LATEX_CMD_NAME         = latex
 
@@ -959,6 +1159,13 @@ LATEX_BATCHMODE        = NO
 
 LATEX_HIDE_INDICES     = NO
 
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include
+# source code with syntax highlighting in the LaTeX output.
+# Note that which sources are shown also depends on other settings
+# such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE      = NO
+
 #---------------------------------------------------------------------------
 # configuration options related to the RTF output
 #---------------------------------------------------------------------------
@@ -1095,8 +1302,10 @@ GENERATE_PERLMOD       = NO
 PERLMOD_LATEX          = NO
 
 # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
-# nicely formatted so it can be parsed by a human reader.  This is useful
-# if you want to understand what is going on.  On the other hand, if this
+# nicely formatted so it can be parsed by a human reader.
+# This is useful
+# if you want to understand what is going on.
+# On the other hand, if this
 # tag is set to NO the size of the Perl module output will be much smaller
 # and Perl will parse it just the same.
 
@@ -1158,17 +1367,22 @@ INCLUDE_FILE_PATTERNS  =
 # undefined via #undef or recursively expanded use the := operator
 # instead of the = operator.
 
-PREDEFINED             = __attribute__(x)="" "RENAME(x)=x ## _TMPL" "DEF(x)=x ## _TMPL" \
-                         HAVE_AV_CONFIG_H HAVE_MMX HAVE_MMX2 HAVE_AMD3DNOW \
-                         "DECLARE_ALIGNED(a,t,n)=t n" "offsetof(x,y)=0x42" \
+PREDEFINED             = "__attribute__(x)=" \
+                         "RENAME(x)=x ## _TMPL" \
+                         "DEF(x)=x ## _TMPL" \
+                         HAVE_AV_CONFIG_H \
+                         HAVE_MMX \
+                         HAVE_MMX2 \
+                         HAVE_AMD3DNOW \
+                         "DECLARE_ALIGNED(a,t,n)=t n" \
+                         "offsetof(x,y)=0x42"
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
 # this tag can be used to specify a list of macro names that should be expanded.
 # The macro definition that is found in the sources will be used.
 # Use the PREDEFINED tag if you want to use a different macro definition.
 
-#EXPAND_AS_DEFINED      = FF_COMMON_FRAME
-EXPAND_AS_DEFINED      = declare_idct(idct, table, idct_row_head, idct_row, idct_row_tail, idct_row_mid)
+EXPAND_AS_DEFINED      = declare_idct
 
 # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
 # doxygen's preprocessor will remove all function-like macros that are alone
@@ -1186,9 +1400,11 @@ SKIP_FUNCTION_MACROS   = YES
 # Optionally an initial location of the external documentation
 # can be added for each tagfile. The format of a tag file without
 # this location is as follows:
-#   TAGFILES = file1 file2 ...
+#
+# TAGFILES = file1 file2 ...
 # Adding location for the tag files is done as follows:
-#   TAGFILES = file1=loc1 "file2 = loc2" ...
+#
+# TAGFILES = file1=loc1 "file2 = loc2" ...
 # where "loc1" and "loc2" can be relative or absolute paths or
 # URLs. If a location is present for each tag, the installdox tool
 # does not have to be run to correct the links.
@@ -1256,6 +1472,14 @@ HIDE_UNDOC_RELATIONS   = YES
 
 HAVE_DOT               = NO
 
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
+# allowed to run in parallel. When set to 0 (the default) doxygen will
+# base this on the number of processors available in the system. You can set it
+# explicitly to a value larger than 0 to get control over the balance
+# between CPU load and processing speed.
+
+DOT_NUM_THREADS        = 0
+
 # By default doxygen will write a font called FreeSans.ttf to the output
 # directory and reference it in all dot files that doxygen generates. This
 # font does not include all possible unicode characters however, so when you need
@@ -1267,6 +1491,11 @@ HAVE_DOT               = NO
 
 DOT_FONTNAME           = FreeSans
 
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE           = 10
+
 # By default doxygen will tell dot to use the output directory to look for the
 # FreeSans.ttf font (which doxygen will put there itself). If you specify a
 # different font using DOT_FONTNAME you can set the path where dot
@@ -1384,10 +1613,10 @@ DOT_GRAPH_MAX_NODES    = 50
 MAX_DOT_GRAPH_DEPTH    = 0
 
 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is enabled by default, which results in a transparent
-# background. Warning: Depending on the platform used, enabling this option
-# may lead to badly anti-aliased labels on the edges of a graph (i.e. they
-# become hard to read).
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
 
 DOT_TRANSPARENT        = YES
 
@@ -1409,12 +1638,3 @@ GENERATE_LEGEND        = YES
 # the various graphs.
 
 DOT_CLEANUP            = YES
-
-#---------------------------------------------------------------------------
-# Configuration::additions related to the search engine
-#---------------------------------------------------------------------------
-
-# The SEARCHENGINE tag specifies whether or not a search engine should be
-# used. If set to NO the values of all tags below this one will be ignored.
-
-SEARCHENGINE           = NO

From 9bfa5363da21a5ba7bed174a82c86f2a089fc917 Mon Sep 17 00:00:00 2001
From: Daniel Kang <daniel.d.kang@gmail.com>
Date: Sat, 2 Jul 2011 22:18:39 -0400
Subject: [PATCH 12/40] H.264: Add x86 assembly for 10-bit H.264 qpel
 functions.

Mainly ported from 8-bit H.264 qpel.

Some code ported from x264. LGPL ok by author.

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/x86/Makefile            |   1 +
 libavcodec/x86/dsputil_mmx.c       | 134 +++--
 libavcodec/x86/h264_qpel_10bit.asm | 891 +++++++++++++++++++++++++++++
 libavcodec/x86/h264_qpel_mmx.c     |  98 ++++
 4 files changed, 1075 insertions(+), 49 deletions(-)
 create mode 100644 libavcodec/x86/h264_qpel_10bit.asm

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 022ab27766..d3cf0da72b 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -46,6 +46,7 @@ MMX-OBJS-$(HAVE_YASM)                  += x86/dsputil_yasm.o            \
                                           x86/fmtconvert.o              \
                                           x86/h264_chromamc.o           \
                                           x86/h264_chromamc_10bit.o     \
+                                          x86/h264_qpel_10bit.o         \
                                           $(YASM-OBJS-yes)
 
 MMX-OBJS-$(CONFIG_FFT)                 += x86/fft.o
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 03c094533f..d6eed82dd8 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2627,44 +2627,56 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
                 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
             }
 
-#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \
-            c->PFX ## _pixels_tab[IDX][ 0] = PFX ## SIZE ## _mc00_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][ 1] = PFX ## SIZE ## _mc10_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][ 2] = PFX ## SIZE ## _mc20_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][ 3] = PFX ## SIZE ## _mc30_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][ 4] = PFX ## SIZE ## _mc01_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][ 5] = PFX ## SIZE ## _mc11_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][ 6] = PFX ## SIZE ## _mc21_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][ 7] = PFX ## SIZE ## _mc31_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][ 8] = PFX ## SIZE ## _mc02_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][ 9] = PFX ## SIZE ## _mc12_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][10] = PFX ## SIZE ## _mc22_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][11] = PFX ## SIZE ## _mc32_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][12] = PFX ## SIZE ## _mc03_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][13] = PFX ## SIZE ## _mc13_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][14] = PFX ## SIZE ## _mc23_ ## CPU; \
-            c->PFX ## _pixels_tab[IDX][15] = PFX ## SIZE ## _mc33_ ## CPU
+#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
+            c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
+            c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU
 
-            SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2);
-            SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2);
-            SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2);
-            SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2);
-            SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
-            SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
+            SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, );
+            SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, );
+            SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, );
+            SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2, );
+            SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2, );
+            SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2, );
 
             if (!high_bit_depth) {
-            SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
-            SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
-            SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
-            SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
-            SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
-            SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
+            SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, );
+            SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2, );
+            SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2, );
+            SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, );
+            SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, );
+            SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, );
             }
+#if HAVE_YASM
+            else if (bit_depth == 10) {
+#if !ARCH_X86_64
+                SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
+                SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
+                SET_QPEL_FUNCS(put_h264_qpel, 1, 8,  10_mmxext, ff_);
+                SET_QPEL_FUNCS(avg_h264_qpel, 1, 8,  10_mmxext, ff_);
+#endif
+                SET_QPEL_FUNCS(put_h264_qpel, 2, 4,  10_mmxext, ff_);
+                SET_QPEL_FUNCS(avg_h264_qpel, 2, 4,  10_mmxext, ff_);
+            }
+#endif
 
-            SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
-            SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
-            SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2);
-            SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2);
+            SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, );
+            SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, );
+            SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, );
+            SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, );
 
 #if HAVE_YASM
             c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
@@ -2725,26 +2737,26 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
                 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
             }
 
-            SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow);
-            SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow);
-            SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow);
-            SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow);
-            SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
-            SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
+            SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, );
+            SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, );
+            SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, );
+            SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow, );
+            SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow, );
+            SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow, );
 
             if (!high_bit_depth) {
-            SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
-            SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
-            SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
-            SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
-            SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
-            SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
+            SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow, );
+            SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow, );
+            SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow, );
+            SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow, );
+            SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow, );
+            SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow, );
             }
 
-            SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
-            SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
-            SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow);
-            SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
+            SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow, );
+            SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, );
+            SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, );
+            SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, );
 
 #if HAVE_YASM
             if (!high_bit_depth) {
@@ -2788,7 +2800,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             H264_QPEL_FUNCS(3, 3, sse2);
             }
 #if HAVE_YASM
+#define H264_QPEL_FUNCS_10(x, y, CPU)\
+            c->put_h264_qpel_pixels_tab[0][x+y*4] = ff_put_h264_qpel16_mc##x##y##_10_##CPU;\
+            c->put_h264_qpel_pixels_tab[1][x+y*4] = ff_put_h264_qpel8_mc##x##y##_10_##CPU;\
+            c->avg_h264_qpel_pixels_tab[0][x+y*4] = ff_avg_h264_qpel16_mc##x##y##_10_##CPU;\
+            c->avg_h264_qpel_pixels_tab[1][x+y*4] = ff_avg_h264_qpel8_mc##x##y##_10_##CPU;
             if (bit_depth == 10) {
+                SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
+                SET_QPEL_FUNCS(put_h264_qpel, 1, 8,  10_sse2, ff_);
+                SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
+                SET_QPEL_FUNCS(avg_h264_qpel, 1, 8,  10_sse2, ff_);
+                H264_QPEL_FUNCS_10(1, 0, sse2_cache64)
+                H264_QPEL_FUNCS_10(2, 0, sse2_cache64)
+                H264_QPEL_FUNCS_10(3, 0, sse2_cache64)
+
                 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_sse2;
                 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_sse2;
             }
@@ -2810,6 +2835,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             H264_QPEL_FUNCS(3, 2, ssse3);
             H264_QPEL_FUNCS(3, 3, ssse3);
             }
+            else if (bit_depth == 10) {
+                H264_QPEL_FUNCS_10(1, 0, ssse3_cache64)
+                H264_QPEL_FUNCS_10(2, 0, ssse3_cache64)
+                H264_QPEL_FUNCS_10(3, 0, ssse3_cache64)
+            }
             c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
 #if HAVE_YASM
             if (!high_bit_depth) {
@@ -2906,6 +2936,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 #if HAVE_AVX && HAVE_YASM
         if (mm_flags & AV_CPU_FLAG_AVX) {
             if (bit_depth == 10) {
+                //AVX implies !cache64.
+                //TODO: Port cache(32|64) detection from x264.
+                H264_QPEL_FUNCS_10(1, 0, sse2)
+                H264_QPEL_FUNCS_10(2, 0, sse2)
+                H264_QPEL_FUNCS_10(3, 0, sse2)
+
                 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_avx;
                 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_avx;
             }
diff --git a/libavcodec/x86/h264_qpel_10bit.asm b/libavcodec/x86/h264_qpel_10bit.asm
new file mode 100644
index 0000000000..15dd72ca36
--- /dev/null
+++ b/libavcodec/x86/h264_qpel_10bit.asm
@@ -0,0 +1,891 @@
+;*****************************************************************************
+;* MMX/SSE2/AVX-optimized 10-bit H.264 qpel code
+;*****************************************************************************
+;* Copyright (C) 2011 x264 project
+;*
+;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION_RODATA 32
+
+cextern pw_16
+cextern pw_1
+cextern pb_0
+
+pw_pixel_max: times 8 dw ((1 << 10)-1)
+
+pad10: times 8 dw 10*1023
+pad20: times 8 dw 20*1023
+pad30: times 8 dw 30*1023
+depad: times 4 dd 32*20*1023 + 512
+depad2: times 8 dw 20*1023 + 16*1022 + 16
+unpad: times 8 dw 16*1022/32 ; needs to be mod 16
+
+tap1: times 4 dw  1, -5
+tap2: times 4 dw 20, 20
+tap3: times 4 dw -5,  1
+pd_0f: times 4 dd 0xffff
+
+SECTION .text
+
+
+%macro AVG_MOV 2
+    pavgw %2, %1
+    mova  %1, %2
+%endmacro
+
+%macro ADDW 3
+%if mmsize == 8
+    paddw %1, %2
+%else
+    movu  %3, %2
+    paddw %1, %3
+%endif
+%endmacro
+
+%macro FILT_H 4
+    paddw  %1, %4
+    psubw  %1, %2  ; a-b
+    psraw  %1, 2   ; (a-b)/4
+    psubw  %1, %2  ; (a-b)/4-b
+    paddw  %1, %3  ; (a-b)/4-b+c
+    psraw  %1, 2   ; ((a-b)/4-b+c)/4
+    paddw  %1, %3  ; ((a-b)/4-b+c)/4+c = (a-5*b+20*c)/16
+%endmacro
+
+%macro PRELOAD_V 0
+    lea      r3, [r2*3]
+    sub      r1, r3
+    movu     m0, [r1+r2]
+    movu     m1, [r1+r2*2]
+    add      r1, r3
+    movu     m2, [r1]
+    movu     m3, [r1+r2]
+    movu     m4, [r1+r2*2]
+    add      r1, r3
+%endmacro
+
+%macro FILT_V 8
+    movu     %6, [r1]
+    paddw    %1, %6
+    mova     %7, %2
+    paddw    %7, %5
+    mova     %8, %3
+    paddw    %8, %4
+    FILT_H   %1, %7, %8, [pw_16]
+    psraw    %1, 1
+    CLIPW    %1, [pb_0], [pw_pixel_max]
+%endmacro
+
+%macro MC 1
+%define OP_MOV mova
+INIT_MMX
+%1 mmxext, put, 4
+INIT_XMM
+%1 sse2  , put, 8
+
+%define OP_MOV AVG_MOV
+INIT_MMX
+%1 mmxext, avg, 4
+INIT_XMM
+%1 sse2  , avg, 8
+%endmacro
+
+%macro MCAxA 8
+%ifdef ARCH_X86_64
+%ifnidn %1,mmxext
+MCAxA_OP %1,%2,%3,%4,%5,%6,%7,%8
+%endif
+%else
+MCAxA_OP %1,%2,%3,%4,%5,%6,%7,%8
+%endif
+%endmacro
+
+%macro MCAxA_OP 8
+cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8
+%ifdef ARCH_X86_32
+    call stub_%2_h264_qpel%4_%3_10_%1
+    mov  r0, r0m
+    mov  r1, r1m
+    add  r0, %4*2
+    add  r1, %4*2
+    call stub_%2_h264_qpel%4_%3_10_%1
+    mov  r0, r0m
+    mov  r1, r1m
+    lea  r0, [r0+r2*%4]
+    lea  r1, [r1+r2*%4]
+    call stub_%2_h264_qpel%4_%3_10_%1
+    mov  r0, r0m
+    mov  r1, r1m
+    lea  r0, [r0+r2*%4+%4*2]
+    lea  r1, [r1+r2*%4+%4*2]
+    call stub_%2_h264_qpel%4_%3_10_%1
+    RET
+%else ; ARCH_X86_64
+    mov r10, r0
+    mov r11, r1
+    call stub_%2_h264_qpel%4_%3_10_%1
+    lea  r0, [r10+%4*2]
+    lea  r1, [r11+%4*2]
+    call stub_%2_h264_qpel%4_%3_10_%1
+    lea  r0, [r10+r2*%4]
+    lea  r1, [r11+r2*%4]
+    call stub_%2_h264_qpel%4_%3_10_%1
+    lea  r0, [r10+r2*%4+%4*2]
+    lea  r1, [r11+r2*%4+%4*2]
+%ifndef UNIX64 ; fall through to function
+    call stub_%2_h264_qpel%4_%3_10_%1
+    RET
+%endif
+%endif
+%endmacro
+
+;cpu, put/avg, mc, 4/8, ...
+%macro cglobal_mc 7
+%assign i %4*2
+MCAxA %1, %2, %3, %4, i, %5,%6,%7
+
+cglobal %2_h264_qpel%4_%3_10_%1, %5,%6,%7
+%ifndef UNIX64 ; no prologue or epilogue for UNIX64
+    call stub_%2_h264_qpel%4_%3_10_%1
+    RET
+%endif
+
+stub_%2_h264_qpel%4_%3_10_%1:
+%endmacro
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc00(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro COPY4 0
+    movu          m0, [r1     ]
+    OP_MOV [r0     ], m0
+    movu          m0, [r1+r2  ]
+    OP_MOV [r0+r2  ], m0
+    movu          m0, [r1+r2*2]
+    OP_MOV [r0+r2*2], m0
+    movu          m0, [r1+r3  ]
+    OP_MOV [r0+r3  ], m0
+%endmacro
+
+%macro MC00 1
+INIT_MMX
+cglobal_mc mmxext, %1, mc00, 4, 3,4,0
+    lea           r3, [r2*3]
+    COPY4
+    ret
+
+INIT_XMM
+cglobal %1_h264_qpel8_mc00_10_sse2, 3,4
+    lea  r3, [r2*3]
+    COPY4
+    lea  r0, [r0+r2*4]
+    lea  r1, [r1+r2*4]
+    COPY4
+    RET
+
+cglobal %1_h264_qpel16_mc00_10_sse2, 3,4
+    mov r3d, 8
+.loop:
+    movu           m0, [r1      ]
+    movu           m1, [r1   +16]
+    OP_MOV [r0      ], m0
+    OP_MOV [r0   +16], m1
+    movu           m0, [r1+r2   ]
+    movu           m1, [r1+r2+16]
+    OP_MOV [r0+r2   ], m0
+    OP_MOV [r0+r2+16], m1
+    lea            r0, [r0+r2*2]
+    lea            r1, [r1+r2*2]
+    dec r3d
+    jg .loop
+    REP_RET
+%endmacro
+
+%define OP_MOV mova
+MC00 put
+
+%define OP_MOV AVG_MOV
+MC00 avg
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc20(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC_CACHE 1
+%define OP_MOV mova
+%define PALIGNR PALIGNR_MMX
+INIT_MMX
+%1 mmxext       , put, 4
+INIT_XMM
+%1 sse2_cache64 , put, 8
+%define PALIGNR PALIGNR_SSSE3
+%1 ssse3_cache64, put, 8
+%1 sse2         , put, 8, 0
+
+%define OP_MOV AVG_MOV
+%define PALIGNR PALIGNR_MMX
+INIT_MMX
+%1 mmxext       , avg, 4
+INIT_XMM
+%1 sse2_cache64 , avg, 8
+%define PALIGNR PALIGNR_SSSE3
+%1 ssse3_cache64, avg, 8
+%1 sse2         , avg, 8, 0
+%endmacro
+
+%macro MC20 3-4
+cglobal_mc %1, %2, mc20, %3, 3,4,9
+    mov     r3d, %3
+    mova     m1, [pw_pixel_max]
+%if num_mmregs > 8
+    mova     m8, [pw_16]
+    %define p16 m8
+%else
+    %define p16 [pw_16]
+%endif
+.nextrow
+%if %0 == 4
+    movu     m2, [r1-4]
+    movu     m3, [r1-2]
+    movu     m4, [r1+0]
+    ADDW     m2, [r1+6], m5
+    ADDW     m3, [r1+4], m5
+    ADDW     m4, [r1+2], m5
+%else ; movu is slow on these processors
+%if mmsize==16
+    movu     m2, [r1-4]
+    movu     m0, [r1+6]
+    mova     m6, m0
+    psrldq   m0, 6
+
+    paddw    m6, m2
+    PALIGNR  m3, m0, m2, 2, m5
+    PALIGNR  m7, m0, m2, 8, m5
+    paddw    m3, m7
+    PALIGNR  m4, m0, m2, 4, m5
+    PALIGNR  m7, m0, m2, 6, m5
+    paddw    m4, m7
+    SWAP      2, 6
+%else
+    movu     m2, [r1-4]
+    movu     m6, [r1+4]
+    PALIGNR  m3, m6, m2, 2, m5
+    paddw    m3, m6
+    PALIGNR  m4, m6, m2, 4, m5
+    PALIGNR  m7, m6, m2, 6, m5
+    paddw    m4, m7
+    paddw    m2, [r1+6]
+%endif
+%endif
+
+    FILT_H   m2, m3, m4, p16
+    psraw    m2, 1
+    pxor     m0, m0
+    CLIPW    m2, m0, m1
+    OP_MOV [r0], m2
+    add      r0, r2
+    add      r1, r2
+    dec     r3d
+    jg .nextrow
+    rep ret
+%endmacro
+
+MC_CACHE MC20
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc30(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC30 3-4
+cglobal_mc %1, %2, mc30, %3, 3,5,9
+    lea r4, [r1+2]
+    jmp stub_%2_h264_qpel%3_mc10_10_%1.body
+%endmacro
+
+MC_CACHE MC30
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc10(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC10 3-4
+cglobal_mc %1, %2, mc10, %3, 3,5,9
+    mov      r4, r1
+.body
+    mov     r3d, %3
+    mova     m1, [pw_pixel_max]
+%if num_mmregs > 8
+    mova     m8, [pw_16]
+    %define p16 m8
+%else
+    %define p16 [pw_16]
+%endif
+.nextrow
+%if %0 == 4
+    movu     m2, [r1-4]
+    movu     m3, [r1-2]
+    movu     m4, [r1+0]
+    ADDW     m2, [r1+6], m5
+    ADDW     m3, [r1+4], m5
+    ADDW     m4, [r1+2], m5
+%else ; movu is slow on these processors
+%if mmsize==16
+    movu     m2, [r1-4]
+    movu     m0, [r1+6]
+    mova     m6, m0
+    psrldq   m0, 6
+
+    paddw    m6, m2
+    PALIGNR  m3, m0, m2, 2, m5
+    PALIGNR  m7, m0, m2, 8, m5
+    paddw    m3, m7
+    PALIGNR  m4, m0, m2, 4, m5
+    PALIGNR  m7, m0, m2, 6, m5
+    paddw    m4, m7
+    SWAP      2, 6
+%else
+    movu     m2, [r1-4]
+    movu     m6, [r1+4]
+    PALIGNR  m3, m6, m2, 2, m5
+    paddw    m3, m6
+    PALIGNR  m4, m6, m2, 4, m5
+    PALIGNR  m7, m6, m2, 6, m5
+    paddw    m4, m7
+    paddw    m2, [r1+6]
+%endif
+%endif
+
+    FILT_H   m2, m3, m4, p16
+    psraw    m2, 1
+    pxor     m0, m0
+    CLIPW    m2, m0, m1
+    movu     m3, [r4]
+    pavgw    m2, m3
+    OP_MOV [r0], m2
+    add      r0, r2
+    add      r1, r2
+    add      r4, r2
+    dec     r3d
+    jg .nextrow
+    rep ret
+%endmacro
+
+MC_CACHE MC10
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc02(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro V_FILT 11
+v_filt%9_%10_10_%11:
+    add    r4, r2
+.no_addr4:
+    FILT_V m0, m1, m2, m3, m4, m5, m6, m7
+    add    r1, r2
+    add    r0, r2
+    ret
+%endmacro
+
+INIT_MMX
+RESET_MM_PERMUTATION
+%assign i 0
+%rep 4
+V_FILT m0, m1, m2, m3, m4, m5, m6, m7, 4, i, mmxext
+SWAP 0,1,2,3,4,5
+%assign i i+1
+%endrep
+
+INIT_XMM
+RESET_MM_PERMUTATION
+%assign i 0
+%rep 6
+V_FILT m0, m1, m2, m3, m4, m5, m6, m7, 8, i, sse2
+SWAP 0,1,2,3,4,5
+%assign i i+1
+%endrep
+
+%macro MC02 3
+cglobal_mc %1, %2, mc02, %3, 3,4,8
+    PRELOAD_V
+
+    sub      r0, r2
+%assign j 0
+%rep %3
+    %assign i (j % 6)
+    call v_filt%3_ %+ i %+ _10_%1.no_addr4
+    OP_MOV [r0], m0
+    SWAP 0,1,2,3,4,5
+    %assign j j+1
+%endrep
+    ret
+%endmacro
+
+MC MC02
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc01(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC01 3
+cglobal_mc %1, %2, mc01, %3, 3,5,8
+    mov      r4, r1
+.body
+    PRELOAD_V
+
+    sub      r4, r2
+    sub      r0, r2
+%assign j 0
+%rep %3
+    %assign i (j % 6)
+    call v_filt%3_ %+ i %+ _10_%1
+    movu     m7, [r4]
+    pavgw    m0, m7
+    OP_MOV [r0], m0
+    SWAP 0,1,2,3,4,5
+    %assign j j+1
+%endrep
+    ret
+%endmacro
+
+MC MC01
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc03(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC03 3
+cglobal_mc %1, %2, mc03, %3, 3,5,8
+    lea r4, [r1+r2]
+    jmp stub_%2_h264_qpel%3_mc01_10_%1.body
+%endmacro
+
+MC MC03
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc11(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro H_FILT_AVG 3-4
+h_filt%2_%3_10_%1:
+;FILT_H with fewer registers and averaged with the FILT_V result
+;m6,m7 are tmp registers, m0 is the FILT_V result, the rest are to be used next in the next iteration
+;unfortunately I need three registers, so m5 will have to be re-read from memory
+    movu     m5, [r4-4]
+    ADDW     m5, [r4+6], m7
+    movu     m6, [r4-2]
+    ADDW     m6, [r4+4], m7
+    paddw    m5, [pw_16]
+    psubw    m5, m6  ; a-b
+    psraw    m5, 2   ; (a-b)/4
+    psubw    m5, m6  ; (a-b)/4-b
+    movu     m6, [r4+0]
+    ADDW     m6, [r4+2], m7
+    paddw    m5, m6  ; (a-b)/4-b+c
+    psraw    m5, 2   ; ((a-b)/4-b+c)/4
+    paddw    m5, m6  ; ((a-b)/4-b+c)/4+c = (a-5*b+20*c)/16
+    psraw    m5, 1
+    CLIPW    m5, [pb_0], [pw_pixel_max]
+;avg FILT_V, FILT_H
+    pavgw    m0, m5
+%if %0!=4
+    movu     m5, [r1+r5]
+%endif
+    ret
+%endmacro
+
+INIT_MMX
+RESET_MM_PERMUTATION
+%assign i 0
+%rep 3
+H_FILT_AVG mmxext, 4, i
+SWAP 0,1,2,3,4,5
+%assign i i+1
+%endrep
+H_FILT_AVG mmxext, 4, i, 0
+
+INIT_XMM
+RESET_MM_PERMUTATION
+%assign i 0
+%rep 6
+%if i==1
+H_FILT_AVG sse2,   8, i, 0
+%else
+H_FILT_AVG sse2,   8, i
+%endif
+SWAP 0,1,2,3,4,5
+%assign i i+1
+%endrep
+
+%macro MC11 3
+; this REALLY needs x86_64
+cglobal_mc %1, %2, mc11, %3, 3,6,8
+    mov      r4, r1
+.body
+    PRELOAD_V
+
+    sub      r0, r2
+    sub      r4, r2
+    mov      r5, r2
+    neg      r5
+%assign j 0
+%rep %3
+    %assign i (j % 6)
+    call v_filt%3_ %+ i %+ _10_%1
+    call h_filt%3_ %+ i %+ _10_%1
+%if %3==8 && i==1
+    movu     m5, [r1+r5]
+%endif
+    OP_MOV [r0], m0
+    SWAP 0,1,2,3,4,5
+    %assign j j+1
+%endrep
+    ret
+%endmacro
+
+MC MC11
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc31(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC31 3
+cglobal_mc %1, %2, mc31, %3, 3,6,8
+    mov r4, r1
+    add r1, 2
+    jmp stub_%2_h264_qpel%3_mc11_10_%1.body
+%endmacro
+
+MC MC31
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc13(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC13 3
+cglobal_mc %1, %2, mc13, %3, 3,7,12
+    lea r4, [r1+r2]
+    jmp stub_%2_h264_qpel%3_mc11_10_%1.body
+%endmacro
+
+MC MC13
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc33(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC33 3
+cglobal_mc %1, %2, mc33, %3, 3,6,8
+    lea r4, [r1+r2]
+    add r1, 2
+    jmp stub_%2_h264_qpel%3_mc11_10_%1.body
+%endmacro
+
+MC MC33
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc22(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro FILT_H2 3
+    psubw  %1, %2  ; a-b
+    psubw  %2, %3  ; b-c
+    psllw  %2, 2
+    psubw  %1, %2  ; a-5*b+4*c
+    psllw  %3, 4
+    paddw  %1, %3  ; a-5*b+20*c
+%endmacro
+
+%macro FILT_VNRD 8
+    movu     %6, [r1]
+    paddw    %1, %6
+    mova     %7, %2
+    paddw    %7, %5
+    mova     %8, %3
+    paddw    %8, %4
+    FILT_H2  %1, %7, %8
+%endmacro
+
+%macro HV 2
+%ifidn %1,sse2
+%define PAD 12
+%define COUNT 2
+%else
+%define PAD 0
+%define COUNT 3
+%endif
+put_hv%2_10_%1:
+    neg      r2           ; This actually saves instructions
+    lea      r1, [r1+r2*2-mmsize+PAD]
+    lea      r4, [rsp+PAD+gprsize]
+    mov     r3d, COUNT
+.v_loop:
+    movu     m0, [r1]
+    sub      r1, r2
+    movu     m1, [r1]
+    sub      r1, r2
+    movu     m2, [r1]
+    sub      r1, r2
+    movu     m3, [r1]
+    sub      r1, r2
+    movu     m4, [r1]
+    sub      r1, r2
+%assign i 0
+%rep %2-1
+    FILT_VNRD m0, m1, m2, m3, m4, m5, m6, m7
+    psubw    m0, [pad20]
+    movu     [r4+i*mmsize*3], m0
+    sub      r1, r2
+    SWAP 0,1,2,3,4,5
+%assign i i+1
+%endrep
+    FILT_VNRD m0, m1, m2, m3, m4, m5, m6, m7
+    psubw    m0, [pad20]
+    movu     [r4+i*mmsize*3], m0
+    add      r4, mmsize
+    lea      r1, [r1+r2*8+mmsize]
+%if %2==8
+    lea      r1, [r1+r2*4]
+%endif
+    dec      r3d
+    jg .v_loop
+    neg      r2
+    ret
+%endmacro
+
+INIT_MMX
+HV mmxext, 4
+INIT_XMM
+HV sse2  , 8
+
+%macro H_LOOP 2
+%if num_mmregs > 8
+    %define s1 m8
+    %define s2 m9
+    %define s3 m10
+    %define d1 m11
+%else
+    %define s1 [tap1]
+    %define s2 [tap2]
+    %define s3 [tap3]
+    %define d1 [depad]
+%endif
+h%2_loop_op_%1:
+    movu       m1, [r1+mmsize-4]
+    movu       m2, [r1+mmsize-2]
+    mova       m3, [r1+mmsize+0]
+    movu       m4, [r1+mmsize+2]
+    movu       m5, [r1+mmsize+4]
+    movu       m6, [r1+mmsize+6]
+%if num_mmregs > 8
+    pmaddwd    m1, s1
+    pmaddwd    m2, s1
+    pmaddwd    m3, s2
+    pmaddwd    m4, s2
+    pmaddwd    m5, s3
+    pmaddwd    m6, s3
+    paddd      m1, d1
+    paddd      m2, d1
+%else
+    mova       m0, s1
+    pmaddwd    m1, m0
+    pmaddwd    m2, m0
+    mova       m0, s2
+    pmaddwd    m3, m0
+    pmaddwd    m4, m0
+    mova       m0, s3
+    pmaddwd    m5, m0
+    pmaddwd    m6, m0
+    mova       m0, d1
+    paddd      m1, m0
+    paddd      m2, m0
+%endif
+    paddd      m3, m5
+    paddd      m4, m6
+    paddd      m1, m3
+    paddd      m2, m4
+    psrad      m1, 10
+    psrad      m2, 10
+    pslld      m2, 16
+    pand       m1, [pd_0f]
+    por        m1, m2
+%if num_mmregs <= 8
+    pxor       m0, m0
+%endif
+    CLIPW      m1, m0, m7
+    add        r1, mmsize*3
+    ret
+%endmacro
+
+INIT_MMX
+H_LOOP mmxext, 4
+INIT_XMM
+H_LOOP sse2  , 8
+
+%macro MC22 3
+cglobal_mc %1, %2, mc22, %3, 3,7,12
+%define PAD mmsize*8*4*2      ; SIZE*16*4*sizeof(pixel)
+    mov      r6, rsp          ; backup stack pointer
+    and     rsp, ~(mmsize-1)  ; align stack
+    sub     rsp, PAD
+
+    call put_hv%3_10_%1
+
+    mov       r3d, %3
+    mova       m7, [pw_pixel_max]
+%if num_mmregs > 8
+    pxor       m0, m0
+    mova       m8, [tap1]
+    mova       m9, [tap2]
+    mova      m10, [tap3]
+    mova      m11, [depad]
+%endif
+    mov        r1, rsp
+.h_loop:
+    call h%3_loop_op_%1
+
+    OP_MOV   [r0], m1
+    add        r0, r2
+    dec       r3d
+    jg .h_loop
+
+    mov     rsp, r6          ; restore stack pointer
+    ret
+%endmacro
+
+MC MC22
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc12(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC12 3
+cglobal_mc %1, %2, mc12, %3, 3,7,12
+%define PAD mmsize*8*4*2        ; SIZE*16*4*sizeof(pixel)
+    mov        r6, rsp          ; backup stack pointer
+    and       rsp, ~(mmsize-1)  ; align stack
+    sub       rsp, PAD
+
+    call put_hv%3_10_%1
+
+    xor       r4d, r4d
+.body
+    mov       r3d, %3
+    pxor       m0, m0
+    mova       m7, [pw_pixel_max]
+%if num_mmregs > 8
+    mova       m8, [tap1]
+    mova       m9, [tap2]
+    mova      m10, [tap3]
+    mova      m11, [depad]
+%endif
+    mov        r1, rsp
+.h_loop:
+    call h%3_loop_op_%1
+
+    movu       m3, [r1+r4-2*mmsize] ; movu needed for mc32, etc
+    paddw      m3, [depad2]
+    psrlw      m3, 5
+    psubw      m3, [unpad]
+    CLIPW      m3, m0, m7
+    pavgw      m1, m3
+
+    OP_MOV   [r0], m1
+    add        r0, r2
+    dec       r3d
+    jg .h_loop
+
+    mov     rsp, r6          ; restore stack pointer
+    ret
+%endmacro
+
+MC MC12
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc32(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC32 3
+cglobal_mc %1, %2, mc32, %3, 3,7,12
+%define PAD mmsize*8*3*2  ; SIZE*16*4*sizeof(pixel)
+    mov  r6, rsp          ; backup stack pointer
+    and rsp, ~(mmsize-1)  ; align stack
+    sub rsp, PAD
+
+    call put_hv%3_10_%1
+
+    mov r4d, 2            ; sizeof(pixel)
+    jmp stub_%2_h264_qpel%3_mc12_10_%1.body
+%endmacro
+
+MC MC32
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc21(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro H_NRD 2
+put_h%2_10_%1:
+    add       rsp, gprsize
+    mov       r3d, %2
+    xor       r4d, r4d
+    mova       m6, [pad20]
+.nextrow
+    movu       m2, [r5-4]
+    movu       m3, [r5-2]
+    movu       m4, [r5+0]
+    ADDW       m2, [r5+6], m5
+    ADDW       m3, [r5+4], m5
+    ADDW       m4, [r5+2], m5
+
+    FILT_H2    m2, m3, m4
+    psubw      m2, m6
+    mova [rsp+r4], m2
+    add       r4d, mmsize*3
+    add        r5, r2
+    dec       r3d
+    jg .nextrow
+    sub       rsp, gprsize
+    ret
+%endmacro
+
+INIT_MMX
+H_NRD mmxext, 4
+INIT_XMM
+H_NRD sse2  , 8
+
+%macro MC21 3
+cglobal_mc %1, %2, mc21, %3, 3,7,12
+    mov   r5, r1
+.body
+%define PAD mmsize*8*3*2   ; SIZE*16*4*sizeof(pixel)
+    mov   r6, rsp          ; backup stack pointer
+    and  rsp, ~(mmsize-1)  ; align stack
+
+    sub  rsp, PAD
+    call put_h%3_10_%1
+
+    sub  rsp, PAD
+    call put_hv%3_10_%1
+
+    mov r4d, PAD-mmsize    ; H buffer
+    jmp stub_%2_h264_qpel%3_mc12_10_%1.body
+%endmacro
+
+MC MC21
+
+;-----------------------------------------------------------------------------
+; void h264_qpel_mc23(uint8_t *dst, uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MC23 3
+cglobal_mc %1, %2, mc23, %3, 3,7,12
+    lea   r5, [r1+r2]
+    jmp stub_%2_h264_qpel%3_mc21_10_%1.body
+%endmacro
+
+MC MC23
diff --git a/libavcodec/x86/h264_qpel_mmx.c b/libavcodec/x86/h264_qpel_mmx.c
index 066f794fb6..c313c0e079 100644
--- a/libavcodec/x86/h264_qpel_mmx.c
+++ b/libavcodec/x86/h264_qpel_mmx.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
+ * Copyright (c) 2011 Daniel Kang
  *
  * This file is part of Libav.
  *
@@ -1199,3 +1200,100 @@ H264_MC_816(H264_MC_HV, sse2)
 H264_MC_816(H264_MC_H, ssse3)
 H264_MC_816(H264_MC_HV, ssse3)
 #endif
+
+
+
+//10bit
+#define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
+void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
+    (uint8_t *dst, uint8_t *src, int stride);
+
+#define LUMA_MC_ALL(DEPTH, TYPE, OPT) \
+    LUMA_MC_OP(put,  4, DEPTH, TYPE, OPT) \
+    LUMA_MC_OP(avg,  4, DEPTH, TYPE, OPT) \
+    LUMA_MC_OP(put,  8, DEPTH, TYPE, OPT) \
+    LUMA_MC_OP(avg,  8, DEPTH, TYPE, OPT) \
+    LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
+    LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
+
+#define LUMA_MC_816(DEPTH, TYPE, OPT) \
+    LUMA_MC_OP(put,  8, DEPTH, TYPE, OPT) \
+    LUMA_MC_OP(avg,  8, DEPTH, TYPE, OPT) \
+    LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
+    LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
+
+LUMA_MC_ALL(10, mc00, mmxext)
+LUMA_MC_ALL(10, mc10, mmxext)
+LUMA_MC_ALL(10, mc20, mmxext)
+LUMA_MC_ALL(10, mc30, mmxext)
+LUMA_MC_ALL(10, mc01, mmxext)
+LUMA_MC_ALL(10, mc11, mmxext)
+LUMA_MC_ALL(10, mc21, mmxext)
+LUMA_MC_ALL(10, mc31, mmxext)
+LUMA_MC_ALL(10, mc02, mmxext)
+LUMA_MC_ALL(10, mc12, mmxext)
+LUMA_MC_ALL(10, mc22, mmxext)
+LUMA_MC_ALL(10, mc32, mmxext)
+LUMA_MC_ALL(10, mc03, mmxext)
+LUMA_MC_ALL(10, mc13, mmxext)
+LUMA_MC_ALL(10, mc23, mmxext)
+LUMA_MC_ALL(10, mc33, mmxext)
+
+LUMA_MC_816(10, mc00, sse2)
+LUMA_MC_816(10, mc10, sse2)
+LUMA_MC_816(10, mc10, sse2_cache64)
+LUMA_MC_816(10, mc10, ssse3_cache64)
+LUMA_MC_816(10, mc20, sse2)
+LUMA_MC_816(10, mc20, sse2_cache64)
+LUMA_MC_816(10, mc20, ssse3_cache64)
+LUMA_MC_816(10, mc30, sse2)
+LUMA_MC_816(10, mc30, sse2_cache64)
+LUMA_MC_816(10, mc30, ssse3_cache64)
+LUMA_MC_816(10, mc01, sse2)
+LUMA_MC_816(10, mc11, sse2)
+LUMA_MC_816(10, mc21, sse2)
+LUMA_MC_816(10, mc31, sse2)
+LUMA_MC_816(10, mc02, sse2)
+LUMA_MC_816(10, mc12, sse2)
+LUMA_MC_816(10, mc22, sse2)
+LUMA_MC_816(10, mc32, sse2)
+LUMA_MC_816(10, mc03, sse2)
+LUMA_MC_816(10, mc13, sse2)
+LUMA_MC_816(10, mc23, sse2)
+LUMA_MC_816(10, mc33, sse2)
+
+#define QPEL16_OPMC(OP, MC, MMX)\
+void ff_ ## OP ## _h264_qpel16_ ## MC ## _10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+    ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst   , src   , stride);\
+    ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
+    src += 8*stride;\
+    dst += 8*stride;\
+    ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst   , src   , stride);\
+    ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
+}
+
+#define QPEL16_OP(MC, MMX)\
+QPEL16_OPMC(put, MC, MMX)\
+QPEL16_OPMC(avg, MC, MMX)
+
+#define QPEL16(MMX)\
+QPEL16_OP(mc00, MMX)\
+QPEL16_OP(mc01, MMX)\
+QPEL16_OP(mc02, MMX)\
+QPEL16_OP(mc03, MMX)\
+QPEL16_OP(mc10, MMX)\
+QPEL16_OP(mc11, MMX)\
+QPEL16_OP(mc12, MMX)\
+QPEL16_OP(mc13, MMX)\
+QPEL16_OP(mc20, MMX)\
+QPEL16_OP(mc21, MMX)\
+QPEL16_OP(mc22, MMX)\
+QPEL16_OP(mc23, MMX)\
+QPEL16_OP(mc30, MMX)\
+QPEL16_OP(mc31, MMX)\
+QPEL16_OP(mc32, MMX)\
+QPEL16_OP(mc33, MMX)
+
+#if ARCH_X86_32 // ARCH_X86_64 implies sse2+
+QPEL16(mmxext)
+#endif

From c558122e4ee53dc4cb82f87749a9c28c38ca9401 Mon Sep 17 00:00:00 2001
From: Gavin Kinsey <gkinsey@ad-holdings.co.uk>
Date: Tue, 21 Jun 2011 13:13:37 +0100
Subject: [PATCH 13/40] Fix segmentation fault in ffprobe

---
 ffprobe.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ffprobe.c b/ffprobe.c
index 711a17246d..edda454cde 100644
--- a/ffprobe.c
+++ b/ffprobe.c
@@ -393,6 +393,7 @@ int main(int argc, char **argv)
     int ret;
 
     av_register_all();
+    init_opts();
 #if CONFIG_AVDEVICE
     avdevice_register_all();
 #endif

From 8c94eab011b1d2d1c5f70c2deff3a05e876fbb7d Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 17:05:22 +0100
Subject: [PATCH 14/40] configure: do not blank $LIBNAME if static libs
 disabled

Whatever reason this was done for no longer applies, and it causes
lots of make warnings.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 configure | 2 --
 1 file changed, 2 deletions(-)

diff --git a/configure b/configure
index 9cde17805d..e315afa1cb 100755
--- a/configure
+++ b/configure
@@ -2524,8 +2524,6 @@ EOF
     exit 1;
 fi
 
-disabled static && LIBNAME=""
-
 die_license_disabled() {
     enabled $1 || { enabled $2 && die "$2 is $1 and --enable-$1 is not specified."; }
 }

From 03256d96e42f9fba88cc64751da925d65aa2e3f2 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 16:07:37 +0100
Subject: [PATCH 15/40] fate: move libavutil test rules to a separate file

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 tests/Makefile           |  1 +
 tests/fate/libavutil.mak | 22 ++++++++++++++++++++++
 tests/fate2.mak          | 23 -----------------------
 3 files changed, 23 insertions(+), 23 deletions(-)
 create mode 100644 tests/fate/libavutil.mak

diff --git a/tests/Makefile b/tests/Makefile
index e6640b35dd..f69acf7e23 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -43,6 +43,7 @@ include $(SRC_PATH)/tests/fate/amrnb.mak
 include $(SRC_PATH)/tests/fate/amrwb.mak
 include $(SRC_PATH)/tests/fate/fft.mak
 include $(SRC_PATH)/tests/fate/h264.mak
+include $(SRC_PATH)/tests/fate/libavutil.mak
 include $(SRC_PATH)/tests/fate/mp3.mak
 include $(SRC_PATH)/tests/fate/vorbis.mak
 include $(SRC_PATH)/tests/fate/vp8.mak
diff --git a/tests/fate/libavutil.mak b/tests/fate/libavutil.mak
new file mode 100644
index 0000000000..2f394ec2df
--- /dev/null
+++ b/tests/fate/libavutil.mak
@@ -0,0 +1,22 @@
+FATE_TESTS += fate-adler32
+fate-adler32: libavutil/adler32-test$(EXESUF)
+fate-adler32: CMD = run libavutil/adler32-test
+fate-adler32: REF = /dev/null
+
+FATE_TESTS += fate-aes
+fate-aes: libavutil/aes-test$(EXESUF)
+fate-aes: CMD = run libavutil/aes-test
+fate-aes: REF = /dev/null
+
+FATE_TESTS += fate-base64
+fate-base64: libavutil/base64-test$(EXESUF)
+fate-base64: CMD = run libavutil/base64-test
+
+FATE_TESTS += fate-des
+fate-des: libavutil/des-test$(EXESUF)
+fate-des: CMD = run libavutil/des-test
+fate-des: REF = /dev/null
+
+FATE_TESTS += fate-sha
+fate-sha: libavutil/sha-test$(EXESUF)
+fate-sha: CMD = run libavutil/sha-test
diff --git a/tests/fate2.mak b/tests/fate2.mak
index 47804c4d8a..ef8c3b8d5b 100644
--- a/tests/fate2.mak
+++ b/tests/fate2.mak
@@ -213,29 +213,6 @@ fate-mjpegb: CMD = framecrc -idct simple -flags +bitexact -i $(SAMPLES)/mjpegb/m
 FATE_TESTS += fate-rv30
 fate-rv30: CMD = framecrc -flags +bitexact -dct fastint -idct simple -i $(SAMPLES)/real/rv30.rm -an
 
-FATE_TESTS += fate-sha
-fate-sha: libavutil/sha-test$(EXESUF)
-fate-sha: CMD = run libavutil/sha-test
-
-FATE_TESTS += fate-adler32
-fate-adler32: libavutil/adler32-test$(EXESUF)
-fate-adler32: CMD = run libavutil/adler32-test
-fate-adler32: REF = /dev/null
-
-FATE_TESTS += fate-aes
-fate-aes: libavutil/aes-test$(EXESUF)
-fate-aes: CMD = run libavutil/aes-test
-fate-aes: REF = /dev/null
-
-FATE_TESTS += fate-base64
-fate-base64: libavutil/base64-test$(EXESUF)
-fate-base64: CMD = run libavutil/base64-test
-
-FATE_TESTS += fate-des
-fate-des: libavutil/des-test$(EXESUF)
-fate-des: CMD = run libavutil/des-test
-fate-des: REF = /dev/null
-
 FATE_TESTS += fate-musepack7
 fate-musepack7: CMD = pcm -i $(SAMPLES)/musepack/inside-mp7.mpc
 fate-musepack7: CMP = oneoff

From 14376a53b20bed4463a0998dfd25012f8a927838 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 16:12:51 +0100
Subject: [PATCH 16/40] crc: add fate test

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 tests/fate/libavutil.mak | 4 ++++
 tests/ref/fate/crc       | 4 ++++
 2 files changed, 8 insertions(+)
 create mode 100644 tests/ref/fate/crc

diff --git a/tests/fate/libavutil.mak b/tests/fate/libavutil.mak
index 2f394ec2df..9fdd450255 100644
--- a/tests/fate/libavutil.mak
+++ b/tests/fate/libavutil.mak
@@ -12,6 +12,10 @@ FATE_TESTS += fate-base64
 fate-base64: libavutil/base64-test$(EXESUF)
 fate-base64: CMD = run libavutil/base64-test
 
+FATE_TESTS += fate-crc
+fate-crc: libavutil/crc-test$(EXESUF)
+fate-crc: CMD = run libavutil/crc-test
+
 FATE_TESTS += fate-des
 fate-des: libavutil/des-test$(EXESUF)
 fate-des: CMD = run libavutil/des-test
diff --git a/tests/ref/fate/crc b/tests/ref/fate/crc
new file mode 100644
index 0000000000..4a82680490
--- /dev/null
+++ b/tests/ref/fate/crc
@@ -0,0 +1,4 @@
+crc EDB88320 =3D5CDD04
+crc 04C11DB7 =E0BAF5C0
+crc 00008005 =BB1F
+crc 00000007 =E3

From 91c9aa0941869531ce8cadbea8cd333a9c4b1244 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 3 Jul 2011 16:57:26 +0200
Subject: [PATCH 17/40] Move some conditionally used code below the appropriate
 #ifdef.

---
 libavformat/asfdec.c | 4 +---
 libavutil/des.c      | 2 ++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index 16bba93c37..4fafb26259 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -84,13 +84,11 @@ static const ff_asf_guid index_guid = {
     0x90, 0x08, 0x00, 0x33, 0xb1, 0xe5, 0xcf, 0x11, 0x89, 0xf4, 0x00, 0xa0, 0xc9, 0x03, 0x49, 0xcb
 };
 
+#ifdef DEBUG
 static const ff_asf_guid stream_bitrate_guid = { /* (http://get.to/sdp) */
     0xce, 0x75, 0xf8, 0x7b, 0x8d, 0x46, 0xd1, 0x11, 0x8d, 0x82, 0x00, 0x60, 0x97, 0xc9, 0xa2, 0xb2
 };
-/**********************************/
-/* decoding */
 
-#ifdef DEBUG
 #define PRINT_IF_GUID(g,cmp) \
 if (!ff_guidcmp(g, &cmp)) \
     av_dlog(NULL, "(GUID: %s) ", #cmp)
diff --git a/libavutil/des.c b/libavutil/des.c
index 857ca91319..5de816387f 100644
--- a/libavutil/des.c
+++ b/libavutil/des.c
@@ -39,6 +39,7 @@ static const uint8_t IP_shuffle[] = {
 };
 #undef T
 
+#if defined(CONFIG_SMALL) || defined(GENTABLES)
 #define T(a, b, c, d) 32-a,32-b,32-c,32-d
 static const uint8_t P_shuffle[] = {
     T(16,  7, 20, 21),
@@ -51,6 +52,7 @@ static const uint8_t P_shuffle[] = {
     T(22, 11,  4, 25)
 };
 #undef T
+#endif
 
 #define T(a, b, c, d, e, f, g) 64-a,64-b,64-c,64-d,64-e,64-f,64-g
 static const uint8_t PC1_shuffle[] = {

From 050f509065b4f328c4621ef4116848303f0d1db0 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 3 Jul 2011 16:56:21 +0200
Subject: [PATCH 18/40] snow: Remove unused code.

---
 libavcodec/snow.c | 342 ----------------------------------------------
 1 file changed, 342 deletions(-)

diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index 28f04f119b..4ab029de5c 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -33,42 +33,6 @@
 #undef NDEBUG
 #include <assert.h>
 
-static const int8_t quant3[256]={
- 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
-};
-static const int8_t quant3b[256]={
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
--1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-};
 static const int8_t quant3bA[256]={
  0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
@@ -87,153 +51,7 @@ static const int8_t quant3bA[256]={
  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
 };
-static const int8_t quant5[256]={
- 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
-};
-static const int8_t quant7[256]={
- 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
--2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
-};
-static const int8_t quant9[256]={
- 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
--3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
-};
-static const int8_t quant11[256]={
- 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
--4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
-};
-static const int8_t quant13[256]={
- 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
--6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
--4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
-};
 
-#if 0 //64*cubic
-static const uint8_t obmc32[1024]={
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
-  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
-  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
-  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
-  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
-  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
-  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
-  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
-  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
-  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
-  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
-  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
-  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
-  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
-  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
-  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
-  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
-  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
-  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
-  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
-  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
-  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
-  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
-  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
-  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
-  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
-  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
-  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
-  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
-  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-//error:0.000022
-};
-static const uint8_t obmc16[256]={
-  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
-  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
-  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
-  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
-  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
-  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
-  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
-  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
-  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
-  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
-  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
-  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
-  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
-  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
-  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
-  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
-//error:0.000033
-};
-#elif 1 // 64*linear
 static const uint8_t obmc32[1024]={
   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
@@ -288,62 +106,6 @@ static const uint8_t obmc16[256]={
   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
 //error:0.000015
 };
-#else //64*cos
-static const uint8_t obmc32[1024]={
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
-  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
-  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
-  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
-  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
-  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
-  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
-  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
-  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
-  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
-  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
-  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
-  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
-  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
-  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
-  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
-  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
-  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
-  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
-  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
-  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
-  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
-  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
-  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
-  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
-  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
-  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
-  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-//error:0.000022
-};
-static const uint8_t obmc16[256]={
-  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
-  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
-  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
-  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
-  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
-  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
-  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
-  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
-  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
-  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
-  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
-  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
-  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
-  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
-  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
-  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
-//error:0.000022
-};
-#endif /* 0 */
 
 //linear *64
 static const uint8_t obmc8[64]={
@@ -509,7 +271,6 @@ static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signe
     if(v){
         const int a= FFABS(v);
         const int e= av_log2(a);
-#if 1
         const int el= FFMIN(e, 10);
         put_rac(c, state+0, 0);
 
@@ -530,35 +291,6 @@ static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signe
 
         if(is_signed)
             put_rac(c, state+11 + el, v < 0); //11..21
-#else
-
-        put_rac(c, state+0, 0);
-        if(e<=9){
-            for(i=0; i<e; i++){
-                put_rac(c, state+1+i, 1);  //1..10
-            }
-            put_rac(c, state+1+i, 0);
-
-            for(i=e-1; i>=0; i--){
-                put_rac(c, state+22+i, (a>>i)&1); //22..31
-            }
-
-            if(is_signed)
-                put_rac(c, state+11 + e, v < 0); //11..21
-        }else{
-            for(i=0; i<e; i++){
-                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
-            }
-            put_rac(c, state+1+9, 0);
-
-            for(i=e-1; i>=0; i--){
-                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
-            }
-
-            if(is_signed)
-                put_rac(c, state+11 + 10, v < 0); //11..21
-        }
-#endif /* 1 */
     }else{
         put_rac(c, state+0, 1);
     }
@@ -789,14 +521,6 @@ static int alloc_blocks(SnowContext *s){
     return 0;
 }
 
-static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
-    uint8_t *bytestream= d->bytestream;
-    uint8_t *bytestream_start= d->bytestream_start;
-    *d= *s;
-    d->bytestream= bytestream;
-    d->bytestream_start= bytestream_start;
-}
-
 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
     const int w= s->b_width << s->block_max_depth;
     const int rem_depth= s->block_max_depth - level;
@@ -1323,40 +1047,6 @@ static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer
         block[3]= ptmp;
         pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
     }
-#if 0
-    for(y=0; y<b_h; y++){
-        for(x=0; x<b_w; x++){
-            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
-            if(add) dst[x + y*dst_stride] += v;
-            else    dst[x + y*dst_stride] -= v;
-        }
-    }
-    for(y=0; y<b_h; y++){
-        uint8_t *obmc2= obmc + (obmc_stride>>1);
-        for(x=0; x<b_w; x++){
-            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
-            if(add) dst[x + y*dst_stride] += v;
-            else    dst[x + y*dst_stride] -= v;
-        }
-    }
-    for(y=0; y<b_h; y++){
-        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
-        for(x=0; x<b_w; x++){
-            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
-            if(add) dst[x + y*dst_stride] += v;
-            else    dst[x + y*dst_stride] -= v;
-        }
-    }
-    for(y=0; y<b_h; y++){
-        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
-        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
-        for(x=0; x<b_w; x++){
-            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
-            if(add) dst[x + y*dst_stride] += v;
-            else    dst[x + y*dst_stride] -= v;
-        }
-    }
-#else
     if(sliced){
         s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
     }else{
@@ -1387,7 +1077,6 @@ static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer
             }
         }
     }
-#endif /* 0 */
 }
 
 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
@@ -4042,27 +3731,6 @@ int main(void){
     for(i=0; i<width*height; i++)
         if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
 
-#if 0
-    printf("testing AC coder\n");
-    memset(s.header_state, 0, sizeof(s.header_state));
-    ff_init_range_encoder(&s.c, buffer[0], 256*256);
-    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
-
-    for(i=-256; i<256; i++){
-        put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
-    }
-    ff_rac_terminate(&s.c);
-
-    memset(s.header_state, 0, sizeof(s.header_state));
-    ff_init_range_decoder(&s.c, buffer[0], 256*256);
-    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
-
-    for(i=-256; i<256; i++){
-        int j;
-        j= get_symbol(&s.c, s.header_state, 1);
-        if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
-    }
-#endif
     {
     int level, orientation, x, y;
     int64_t errors[8][4];
@@ -4120,7 +3788,6 @@ int main(void){
             buf+=stride>>1;
 
             memset(buffer[0], 0, sizeof(int)*width*height);
-#if 1
             for(y=0; y<height; y++){
                 for(x=0; x<width; x++){
                     int tab[4]={0,2,3,1};
@@ -4128,15 +3795,6 @@ int main(void){
                 }
             }
             ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
-#else
-            for(y=0; y<h; y++){
-                for(x=0; x<w; x++){
-                    buf[x + y*stride  ]=169;
-                    buf[x + y*stride-w]=64;
-                }
-            }
-            ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
-#endif
             for(y=0; y<height; y++){
                 for(x=0; x<width; x++){
                     int64_t d= buffer[0][x + y*width];

From 3d85acc8f71581f403ea33711af76d97c4922310 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 3 Jul 2011 13:45:23 +0200
Subject: [PATCH 19/40] doxygen: Fix execute_code() @return documentation.

---
 libavcodec/ansi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/ansi.c b/libavcodec/ansi.c
index e46879dd6b..32c7ce4ecd 100644
--- a/libavcodec/ansi.c
+++ b/libavcodec/ansi.c
@@ -153,7 +153,7 @@ static void draw_char(AVCodecContext *avctx, int c)
 
 /**
  * Execute ANSI escape code
- * @param <0 error
+ * @return 0 on success, negative on error
  */
 static int execute_code(AVCodecContext * avctx, int c)
 {

From 01c17c88ede76f8321cf2c59a535dbbc5b5ff989 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 3 Jul 2011 16:11:16 +0200
Subject: [PATCH 20/40] doxygen: Remove spurious documentation for non-existing
 function parameters.

---
 libavcodec/h264idct_template.c | 1 -
 libswscale/swscale.h           | 1 -
 2 files changed, 2 deletions(-)

diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
index e7f9af7fb0..e288f9bf1b 100644
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@@ -237,7 +237,6 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *
 }
 /**
  * IDCT transforms the 16 dc values and dequantizes them.
- * @param qp quantization parameter
  */
 void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *_output, DCTELEM *_input, int qmul){
 #define stride 16
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 2aa5e50ab2..3899596983 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -235,7 +235,6 @@ int sws_scale(struct SwsContext *context, const uint8_t* const srcSlice[], const
 
 /**
  * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x]
- * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
  * @return -1 if not supported
  */
 int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],

From 24c9babaaf458f943ed1ef74628d157ac29c5d5f Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sat, 2 Jul 2011 13:23:35 +0200
Subject: [PATCH 21/40] doxygen: Fix parameter names to match the function
 prototypes.

---
 libavcodec/aacps.c        | 2 +-
 libavcodec/lpc.c          | 2 +-
 libavcodec/vp8.c          | 2 +-
 libavfilter/graphparser.c | 2 +-
 libavformat/wtv.c         | 2 +-
 libavutil/imgutils.h      | 4 ++--
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/libavcodec/aacps.c b/libavcodec/aacps.c
index 724c13256a..d818790a6f 100644
--- a/libavcodec/aacps.c
+++ b/libavcodec/aacps.c
@@ -77,7 +77,7 @@ static VLC vlc_ps[10];
  * @param avctx contains the current codec context
  * @param gb    pointer to the input bitstream
  * @param ps    pointer to the Parametric Stereo context
- * @param par   pointer to the parameter to be read
+ * @param PAR   pointer to the parameter to be read
  * @param e     envelope to decode
  * @param dt    1: time delta-coded, 0: frequency delta-coded
  */
diff --git a/libavcodec/lpc.c b/libavcodec/lpc.c
index ed985d36ff..31fa324216 100644
--- a/libavcodec/lpc.c
+++ b/libavcodec/lpc.c
@@ -149,7 +149,7 @@ static int estimate_best_order(double *ref, int min_order, int max_order)
 /**
  * Calculate LPC coefficients for multiple orders
  *
- * @param use_lpc LPC method for determining coefficients
+ * @param lpc_type LPC method for determining coefficients
  * 0  = LPC with fixed pre-defined coeffs
  * 1  = LPC with coeffs determined by Levinson-Durbin recursion
  * 2+ = LPC with coeffs determined by Cholesky factorization using (use_lpc-1) passes.
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 282d2fdb4e..fdf63f095d 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -1039,7 +1039,7 @@ static const uint8_t subpel_idx[3][8] = {
  * @param s VP8 decoding context
  * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
  * @param dst target buffer for block data at block position
- * @param src reference picture buffer at origin (0, 0)
+ * @param ref reference picture buffer at origin (0, 0)
  * @param mv motion vector (relative to block position) to get pixel data from
  * @param x_off horizontal position of block from origin (0, 0)
  * @param y_off vertical position of block from origin (0, 0)
diff --git a/libavfilter/graphparser.c b/libavfilter/graphparser.c
index 00fb57ad57..c1624d8471 100644
--- a/libavfilter/graphparser.c
+++ b/libavfilter/graphparser.c
@@ -83,8 +83,8 @@ static char *parse_link_name(const char **buf, AVClass *log_ctx)
  * Create an instance of a filter, initialize and insert it in the
  * filtergraph in *ctx.
  *
+ * @param filt_ctx put here a filter context in case of successful creation and configuration, NULL otherwise.
  * @param ctx the filtergraph context
- * @param put here a filter context in case of successful creation and configuration, NULL otherwise.
  * @param index an index which is supposed to be unique for each filter instance added to the filtergraph
  * @param filt_name the name of the filter to create
  * @param args the arguments provided to the filter during its initialization
diff --git a/libavformat/wtv.c b/libavformat/wtv.c
index 0f9fdeff06..cc6fc8be48 100644
--- a/libavformat/wtv.c
+++ b/libavformat/wtv.c
@@ -766,7 +766,7 @@ enum {
  * Parse WTV chunks
  * @param mode SEEK_TO_DATA or SEEK_TO_PTS
  * @param seekts timestamp
- * @param[out] len Length of data chunk
+ * @param[out] len_ptr Length of data chunk
  * @return stream index of data chunk, or <0 on error
  */
 static int parse_chunks(AVFormatContext *s, int mode, int64_t seekts, int *len_ptr)
diff --git a/libavutil/imgutils.h b/libavutil/imgutils.h
index b569eb1ca4..0666a28ef5 100644
--- a/libavutil/imgutils.h
+++ b/libavutil/imgutils.h
@@ -106,8 +106,8 @@ void av_image_copy_plane(uint8_t       *dst, int dst_linesize,
 /**
  * Copy image in src_data to dst_data.
  *
- * @param dst_linesize linesizes for the image in dst_data
- * @param src_linesize linesizes for the image in src_data
+ * @param dst_linesizes linesizes for the image in dst_data
+ * @param src_linesizes linesizes for the image in src_data
  */
 void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4],
                    const uint8_t *src_data[4], const int src_linesizes[4],

From ff993cd7fcdfeffcac10337c0c6b69c599060c2b Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sat, 2 Jul 2011 14:42:27 +0200
Subject: [PATCH 22/40] doxygen: Drop array size declarations from Doxygen
 parameter names.

Adding [] to a Doxygen parameter name clashes with Doxygen syntax.
---
 libavcodec/cook.c                | 2 +-
 libavcodec/motion_est_template.c | 2 +-
 libavutil/imgutils.h             | 2 +-
 libavutil/lfg.h                  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/cook.c b/libavcodec/cook.c
index 8d1078145e..b5492db27b 100644
--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c
@@ -335,7 +335,7 @@ static av_cold int cook_decode_close(AVCodecContext *avctx)
  * Fill the gain array for the timedomain quantization.
  *
  * @param gb          pointer to the GetBitContext
- * @param gaininfo[9] array of gain indexes
+ * @param gaininfo    array[9] of gain indexes
  */
 
 static void decode_gain_info(GetBitContext *gb, int *gaininfo)
diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c
index b451c76bb4..576bb3da43 100644
--- a/libavcodec/motion_est_template.c
+++ b/libavcodec/motion_est_template.c
@@ -991,7 +991,7 @@ static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dm
 }
 
 /**
-   @param P[10][2] a list of candidate mvs to check before starting the
+   @param P a list of candidate mvs to check before starting the
    iterative search. If one of the candidates is close to the optimal mv, then
    it takes fewer iterations. And it increases the chance that we find the
    optimal mv.
diff --git a/libavutil/imgutils.h b/libavutil/imgutils.h
index 0666a28ef5..6017a70f71 100644
--- a/libavutil/imgutils.h
+++ b/libavutil/imgutils.h
@@ -69,7 +69,7 @@ int av_image_fill_linesizes(int linesizes[4], enum PixelFormat pix_fmt, int widt
  *
  * @param data pointers array to be filled with the pointer for each image plane
  * @param ptr the pointer to a buffer which will contain the image
- * @param linesizes[4] the array containing the linesize for each
+ * @param linesizes the array containing the linesize for each
  * plane, should be filled by av_image_fill_linesizes()
  * @return the size in bytes required for the image buffer, a negative
  * error code in case of failure
diff --git a/libavutil/lfg.h b/libavutil/lfg.h
index 89a635a1b0..904d00a669 100644
--- a/libavutil/lfg.h
+++ b/libavutil/lfg.h
@@ -55,7 +55,7 @@ static inline unsigned int av_mlfg_get(AVLFG *c){
  * Get the next two numbers generated by a Box-Muller Gaussian
  * generator using the random numbers issued by lfg.
  *
- * @param out[2] array where the two generated numbers are placed
+ * @param out array where the two generated numbers are placed
  */
 void av_bmg_get(AVLFG *lfg, double out[2]);
 

From edf4dbff33d8ab5653ecc9124bf4333cf05bab2a Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 17:50:38 +0100
Subject: [PATCH 23/40] md5: fix test program

This makes the md5-test program print something meaningful and not
smash the stack.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/md5.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/libavutil/md5.c b/libavutil/md5.c
index 271d71f48f..b0c678bc27 100644
--- a/libavutil/md5.c
+++ b/libavutil/md5.c
@@ -162,21 +162,29 @@ void av_md5_sum(uint8_t *dst, const uint8_t *src, const int len){
 }
 
 #ifdef TEST
-#include <stdio.h>
-#include <inttypes.h>
 #undef printf
+#include <stdio.h>
+
+static void print_md5(uint8_t *md5)
+{
+    int i;
+    for (i = 0; i < 16; i++)
+        printf("%02x", md5[i]);
+    printf("\n");
+}
+
 int main(void){
-    uint64_t md5val;
+    uint8_t md5val[16];
     int i;
     uint8_t in[1000];
 
     for(i=0; i<1000; i++) in[i]= i*i;
-    av_md5_sum( (uint8_t*)&md5val, in,  1000); printf("%"PRId64"\n", md5val);
-    av_md5_sum( (uint8_t*)&md5val, in,  63); printf("%"PRId64"\n", md5val);
-    av_md5_sum( (uint8_t*)&md5val, in,  64); printf("%"PRId64"\n", md5val);
-    av_md5_sum( (uint8_t*)&md5val, in,  65); printf("%"PRId64"\n", md5val);
+    av_md5_sum(md5val, in, 1000); print_md5(md5val);
+    av_md5_sum(md5val, in,   63); print_md5(md5val);
+    av_md5_sum(md5val, in,   64); print_md5(md5val);
+    av_md5_sum(md5val, in,   65); print_md5(md5val);
     for(i=0; i<1000; i++) in[i]= i % 127;
-    av_md5_sum( (uint8_t*)&md5val, in,  999); printf("%"PRId64"\n", md5val);
+    av_md5_sum(md5val, in,  999); print_md5(md5val);
 
     return 0;
 }

From 44496ff2d60d3f49b55ea794284504a16c2ba4c5 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 17:51:02 +0100
Subject: [PATCH 24/40] md5: include correct headers

This file needs stdint.h but not string.h

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/md5.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/md5.c b/libavutil/md5.c
index b0c678bc27..74e109b72d 100644
--- a/libavutil/md5.c
+++ b/libavutil/md5.c
@@ -30,7 +30,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include <string.h>
+#include <stdint.h>
 #include "bswap.h"
 #include "md5.h"
 

From 8b53755ebd8310bfa9d92013712d2abc07af59ef Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 17:51:34 +0100
Subject: [PATCH 25/40] md5: add fate test

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 tests/fate/libavutil.mak | 4 ++++
 tests/ref/fate/md5       | 5 +++++
 2 files changed, 9 insertions(+)
 create mode 100644 tests/ref/fate/md5

diff --git a/tests/fate/libavutil.mak b/tests/fate/libavutil.mak
index 9fdd450255..d83ced8a98 100644
--- a/tests/fate/libavutil.mak
+++ b/tests/fate/libavutil.mak
@@ -21,6 +21,10 @@ fate-des: libavutil/des-test$(EXESUF)
 fate-des: CMD = run libavutil/des-test
 fate-des: REF = /dev/null
 
+FATE_TESTS += fate-md5
+fate-md5: libavutil/md5-test$(EXESUF)
+fate-md5: CMD = run libavutil/md5-test
+
 FATE_TESTS += fate-sha
 fate-sha: libavutil/sha-test$(EXESUF)
 fate-sha: CMD = run libavutil/sha-test
diff --git a/tests/ref/fate/md5 b/tests/ref/fate/md5
new file mode 100644
index 0000000000..af08a8477f
--- /dev/null
+++ b/tests/ref/fate/md5
@@ -0,0 +1,5 @@
+0bf1bcc8a1d72e2cf58d42182b637e56
+993a3eb298e52aca83ecfbb6a766b4d0
+07c01ca7c733475fad38c84c56f305c1
+9fc8404827cac26385f48f4f58fd32ce
+a22bfef14302c5ca46e0ae91092bc0e0

From fa49fc72d9f25c1b2dccf7a4c43aa9b027e80e4b Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 18:01:26 +0100
Subject: [PATCH 26/40] md5: use AV_WL32 to write result

This is simpler, safer, and removes the undocumented requirement of
aligned output buffer.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/md5.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavutil/md5.c b/libavutil/md5.c
index 74e109b72d..a06e5eae81 100644
--- a/libavutil/md5.c
+++ b/libavutil/md5.c
@@ -32,6 +32,7 @@
 
 #include <stdint.h>
 #include "bswap.h"
+#include "intreadwrite.h"
 #include "md5.h"
 
 typedef struct AVMD5{
@@ -150,7 +151,7 @@ void av_md5_final(AVMD5 *ctx, uint8_t *dst){
     av_md5_update(ctx, (uint8_t*)&finalcount, 8);
 
     for(i=0; i<4; i++)
-        ((uint32_t*)dst)[i]= av_le2ne32(ctx->ABCD[3-i]);
+        AV_WL32(dst + 4*i, ctx->ABCD[3-i]);
 }
 
 void av_md5_sum(uint8_t *dst, const uint8_t *src, const int len){

From 82494cad9db1f32f13b6643b7dce15f2688e3f27 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 18:18:26 +0100
Subject: [PATCH 27/40] md5: cosmetics

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/md5.c | 107 +++++++++++++++++++++++++++---------------------
 1 file changed, 60 insertions(+), 47 deletions(-)

diff --git a/libavutil/md5.c b/libavutil/md5.c
index a06e5eae81..ca0e598d2e 100644
--- a/libavutil/md5.c
+++ b/libavutil/md5.c
@@ -41,7 +41,7 @@ typedef struct AVMD5{
     uint32_t ABCD[4];
 } AVMD5;
 
-const int av_md5_size= sizeof(AVMD5);
+const int av_md5_size = sizeof(AVMD5);
 
 static const uint8_t S[4][4] = {
     { 7, 12, 17, 22 },  /* round 1 */
@@ -72,42 +72,49 @@ static const uint32_t T[64] = { // T[i]= fabs(sin(i+1)<<32)
     0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
 };
 
-#define CORE(i, a, b, c, d) \
-        t = S[i>>4][i&3];\
-        a += T[i];\
-\
-        if(i<32){\
-            if(i<16) a += (d ^ (b&(c^d))) + X[      i &15 ];\
-            else     a += (c ^ (d&(c^b))) + X[ (1+5*i)&15 ];\
-        }else{\
-            if(i<48) a += (b^c^d)         + X[ (5+3*i)&15 ];\
-            else     a += (c^(b|~d))      + X[ (  7*i)&15 ];\
-        }\
-        a = b + (( a << t ) | ( a >> (32 - t) ));
-
-static void body(uint32_t ABCD[4], uint32_t X[16]){
+#define CORE(i, a, b, c, d) do {                                        \
+        t = S[i >> 4][i & 3];                                           \
+        a += T[i];                                                      \
+                                                                        \
+        if (i < 32) {                                                   \
+            if (i < 16) a += (d ^ (b & (c ^ d))) + X[       i  & 15];   \
+            else        a += (c ^ (d & (c ^ b))) + X[(1 + 5*i) & 15];   \
+        } else {                                                        \
+            if (i < 48) a += (b ^ c ^ d)         + X[(5 + 3*i) & 15];   \
+            else        a += (c ^ (b | ~d))      + X[(    7*i) & 15];   \
+        }                                                               \
+        a = b + (a << t | a >> (32 - t));                               \
+    } while (0)
 
+static void body(uint32_t ABCD[4], uint32_t X[16])
+{
     int t;
     int i av_unused;
-    unsigned int a= ABCD[3];
-    unsigned int b= ABCD[2];
-    unsigned int c= ABCD[1];
-    unsigned int d= ABCD[0];
+    unsigned int a = ABCD[3];
+    unsigned int b = ABCD[2];
+    unsigned int c = ABCD[1];
+    unsigned int d = ABCD[0];
 
 #if HAVE_BIGENDIAN
-    for(i=0; i<16; i++)
-        X[i]= av_bswap32(X[i]);
+    for (i = 0; i < 16; i++)
+        X[i] = av_bswap32(X[i]);
 #endif
 
 #if CONFIG_SMALL
-    for( i = 0; i < 64; i++ ){
-        CORE(i,a,b,c,d)
-        t=d; d=c; c=b; b=a; a=t;
+    for (i = 0; i < 64; i++) {
+        CORE(i, a, b, c, d);
+        t = d;
+        d = c;
+        c = b;
+        b = a;
+        a = t;
     }
 #else
-#define CORE2(i) CORE(i,a,b,c,d) CORE((i+1),d,a,b,c) CORE((i+2),c,d,a,b) CORE((i+3),b,c,d,a)
-#define CORE4(i) CORE2(i) CORE2((i+4)) CORE2((i+8)) CORE2((i+12))
-CORE4(0) CORE4(16) CORE4(32) CORE4(48)
+#define CORE2(i)                                                        \
+    CORE( i,   a,b,c,d); CORE((i+1),d,a,b,c);                           \
+    CORE((i+2),c,d,a,b); CORE((i+3),b,c,d,a)
+#define CORE4(i) CORE2(i); CORE2((i+4)); CORE2((i+8)); CORE2((i+12))
+    CORE4(0); CORE4(16); CORE4(32); CORE4(48);
 #endif
 
     ABCD[0] += d;
@@ -116,8 +123,9 @@ CORE4(0) CORE4(16) CORE4(32) CORE4(48)
     ABCD[3] += a;
 }
 
-void av_md5_init(AVMD5 *ctx){
-    ctx->len    = 0;
+void av_md5_init(AVMD5 *ctx)
+{
+    ctx->len     = 0;
 
     ctx->ABCD[0] = 0x10325476;
     ctx->ABCD[1] = 0x98badcfe;
@@ -125,41 +133,44 @@ void av_md5_init(AVMD5 *ctx){
     ctx->ABCD[3] = 0x67452301;
 }
 
-void av_md5_update(AVMD5 *ctx, const uint8_t *src, const int len){
+void av_md5_update(AVMD5 *ctx, const uint8_t *src, const int len)
+{
     int i, j;
 
-    j= ctx->len & 63;
+    j = ctx->len & 63;
     ctx->len += len;
 
-    for( i = 0; i < len; i++ ){
+    for (i = 0; i < len; i++) {
         ctx->block[j++] = src[i];
-        if( 64 == j ){
-            body(ctx->ABCD, (uint32_t*) ctx->block);
+        if (j == 64) {
+            body(ctx->ABCD, (uint32_t *) ctx->block);
             j = 0;
         }
     }
 }
 
-void av_md5_final(AVMD5 *ctx, uint8_t *dst){
+void av_md5_final(AVMD5 *ctx, uint8_t *dst)
+{
     int i;
-    uint64_t finalcount= av_le2ne64(ctx->len<<3);
+    uint64_t finalcount = av_le2ne64(ctx->len << 3);
 
     av_md5_update(ctx, "\200", 1);
-    while((ctx->len & 63)!=56)
+    while ((ctx->len & 63) != 56)
         av_md5_update(ctx, "", 1);
 
-    av_md5_update(ctx, (uint8_t*)&finalcount, 8);
+    av_md5_update(ctx, (uint8_t *)&finalcount, 8);
 
-    for(i=0; i<4; i++)
-        AV_WL32(dst + 4*i, ctx->ABCD[3-i]);
+    for (i = 0; i < 4; i++)
+        AV_WL32(dst + 4*i, ctx->ABCD[3 - i]);
 }
 
-void av_md5_sum(uint8_t *dst, const uint8_t *src, const int len){
-    AVMD5 ctx[1];
+void av_md5_sum(uint8_t *dst, const uint8_t *src, const int len)
+{
+    AVMD5 ctx;
 
-    av_md5_init(ctx);
-    av_md5_update(ctx, src, len);
-    av_md5_final(ctx, dst);
+    av_md5_init(&ctx);
+    av_md5_update(&ctx, src, len);
+    av_md5_final(&ctx, dst);
 }
 
 #ifdef TEST
@@ -179,12 +190,14 @@ int main(void){
     int i;
     uint8_t in[1000];
 
-    for(i=0; i<1000; i++) in[i]= i*i;
+    for (i = 0; i < 1000; i++)
+        in[i] = i * i;
     av_md5_sum(md5val, in, 1000); print_md5(md5val);
     av_md5_sum(md5val, in,   63); print_md5(md5val);
     av_md5_sum(md5val, in,   64); print_md5(md5val);
     av_md5_sum(md5val, in,   65); print_md5(md5val);
-    for(i=0; i<1000; i++) in[i]= i % 127;
+    for (i = 0; i < 1000; i++)
+        in[i] = i % 127;
     av_md5_sum(md5val, in,  999); print_md5(md5val);
 
     return 0;

From c81a2b9b4f5488c831dc27635152394ab632c46a Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 3 Jul 2011 16:33:25 +0200
Subject: [PATCH 28/40] doxygen: Escape '\' in Doxygen documentation.

---
 libavformat/internal.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavformat/internal.h b/libavformat/internal.h
index 30faa00e95..aba890def4 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -157,14 +157,14 @@ void ff_put_v(AVIOContext *bc, uint64_t val);
 
 /**
  * Read a whole line of text from AVIOContext. Stop reading after reaching
- * either a \n, a \0 or EOF. The returned string is always \0 terminated,
+ * either a \\n, a \\0 or EOF. The returned string is always \\0-terminated,
  * and may be truncated if the buffer is too small.
  *
  * @param s the read-only AVIOContext
  * @param buf buffer to store the read line
  * @param maxlen size of the buffer
  * @return the length of the string written in the buffer, not including the
- *         final \0
+ *         final \\0
  */
 int ff_get_line(AVIOContext *s, char *buf, int maxlen);
 

From f75e3da535f297ddbe501ce866e57ccca7645455 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 3 Jul 2011 16:35:10 +0200
Subject: [PATCH 29/40] RTSP: Doxygen comment cleanup

Do not use Doxygen for comments that apply to specific implementation
details; merge some duplicated Doxygen comment blocks.
---
 libavformat/rtsp.c | 18 +++++-------------
 libavformat/rtsp.h |  6 ++++--
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c
index eeea9be4a0..80cd587144 100644
--- a/libavformat/rtsp.c
+++ b/libavformat/rtsp.c
@@ -428,11 +428,6 @@ static void sdp_parse_line(AVFormatContext *s, SDPParseState *s1,
     }
 }
 
-/**
- * Parse the sdp description and allocate the rtp streams and the
- * pollfd array used for udp ones.
- */
-
 int ff_sdp_parse(AVFormatContext *s, const char *content)
 {
     RTSPState *rt = s->priv_data;
@@ -1050,9 +1045,6 @@ retry:
     return 0;
 }
 
-/**
- * @return 0 on success, <0 on error, 1 if protocol is unavailable.
- */
 int ff_rtsp_make_setup_request(AVFormatContext *s, const char *host, int port,
                               int lower_transport, const char *real_challenge)
 {
@@ -1078,7 +1070,7 @@ int ff_rtsp_make_setup_request(AVFormatContext *s, const char *host, int port,
     for (j = RTSP_RTP_PORT_MIN, i = 0; i < rt->nb_rtsp_streams; ++i) {
         char transport[2048];
 
-        /**
+        /*
          * WMS serves all UDP data over a single connection, the RTX, which
          * isn't necessarily the first in the SDP but has to be the first
          * to be set up, else the second/third SETUP will fail with a 461.
@@ -1151,7 +1143,7 @@ int ff_rtsp_make_setup_request(AVFormatContext *s, const char *host, int port,
 
         /* RTP/TCP */
         else if (lower_transport == RTSP_LOWER_TRANSPORT_TCP) {
-            /** For WMS streams, the application streams are only used for
+            /* For WMS streams, the application streams are only used for
              * UDP. When trying to set it up for TCP streams, the server
              * will return an error. Therefore, we skip those streams. */
             if (rt->server_type == RTSP_SERVER_WMS &&
@@ -1482,14 +1474,14 @@ redirect:
         cmd[0] = 0;
         if (rt->server_type == RTSP_SERVER_REAL)
             av_strlcat(cmd,
-                       /**
+                       /*
                         * The following entries are required for proper
                         * streaming from a Realmedia server. They are
                         * interdependent in some way although we currently
                         * don't quite understand how. Values were copied
                         * from mplayer SVN r23589.
-                        * @param CompanyID is a 16-byte ID in base64
-                        * @param ClientChallenge is a 16-byte ID in hex
+                        *   ClientChallenge is a 16-byte ID in hex
+                        *   CompanyID is a 16-byte ID in base64
                         */
                        "ClientChallenge: 9e26d33f2984236010ef6253fb1887f7\r\n"
                        "PlayerStarttime: [28/03/2003:22:50:23 00:00]\r\n"
diff --git a/libavformat/rtsp.h b/libavformat/rtsp.h
index 5eae6bf4f3..7d2460fe2f 100644
--- a/libavformat/rtsp.h
+++ b/libavformat/rtsp.h
@@ -505,8 +505,9 @@ int ff_rtsp_setup_input_streams(AVFormatContext *s, RTSPMessageHeader *reply);
 int ff_rtsp_setup_output_streams(AVFormatContext *s, const char *addr);
 
 /**
- * Parse a SDP description of streams by populating an RTSPState struct
- * within the AVFormatContext.
+ * Parse an SDP description of streams by populating an RTSPState struct
+ * within the AVFormatContext; also allocate the RTP streams and the
+ * pollfd array used for UDP streams.
  */
 int ff_sdp_parse(AVFormatContext *s, const char *content);
 
@@ -525,6 +526,7 @@ int ff_rtsp_fetch_packet(AVFormatContext *s, AVPacket *pkt);
 /**
  * Do the SETUP requests for each stream for the chosen
  * lower transport mode.
+ * @return 0 on success, <0 on error, 1 if protocol is unavailable
  */
 int ff_rtsp_make_setup_request(AVFormatContext *s, const char *host, int port,
                                int lower_transport, const char *real_challenge);

From add41decd94b2d3581a3715ba10f27168b8cdb1b Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 3 Jul 2011 21:02:18 +0100
Subject: [PATCH 30/40] Remove return statements following infinite loops
 without break

These statements cannot be reached and are thus not needed.
This removes a number of compiler warnings.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/h264.c         | 1 -
 libavcodec/mpeg12.c       | 2 --
 libavformat/asfdec.c      | 2 --
 libavformat/bethsoftvid.c | 2 --
 libavformat/mm.c          | 2 --
 libavformat/mmsh.c        | 1 -
 libavformat/mov.c         | 1 -
 libavformat/rtmpproto.c   | 1 -
 libavformat/swfdec.c      | 1 -
 libavutil/opt.c           | 1 -
 libavutil/parseutils.c    | 1 -
 11 files changed, 15 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 2c000a3420..37dab73dc0 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -3552,7 +3552,6 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
         ff_draw_horiz_band(s, 16*s->mb_y, 16);
     }
 #endif
-    return -1; //not reached
 }
 
 /**
diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 03c95c191d..986cad5fd2 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -1928,8 +1928,6 @@ static int slice_decode_thread(AVCodecContext *c, void *arg){
         if(mb_y < 0 || mb_y >= s->end_mb_y)
             return -1;
     }
-
-    return 0; //not reached
 }
 
 /**
diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index 4fafb26259..38fe4f9fa2 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -1099,8 +1099,6 @@ static int asf_read_packet(AVFormatContext *s, AVPacket *pkt)
             assert(asf->packet_size_left < FRAME_HEADER_SIZE || asf->packet_segments < 1);
         asf->packet_time_start = 0;
     }
-
-    return 0;
 }
 
 // Added to support seeking after packets have been read
diff --git a/libavformat/bethsoftvid.c b/libavformat/bethsoftvid.c
index 01e6f55cd4..5e6a776824 100644
--- a/libavformat/bethsoftvid.c
+++ b/libavformat/bethsoftvid.c
@@ -220,8 +220,6 @@ static int vid_read_packet(AVFormatContext *s,
             av_log(s, AV_LOG_ERROR, "unknown block (character = %c, decimal = %d, hex = %x)!!!\n",
                    block_type, block_type, block_type); return -1;
     }
-
-    return 0;
 }
 
 AVInputFormat ff_bethsoftvid_demuxer = {
diff --git a/libavformat/mm.c b/libavformat/mm.c
index bea6161315..dae659f3c6 100644
--- a/libavformat/mm.c
+++ b/libavformat/mm.c
@@ -184,8 +184,6 @@ static int read_packet(AVFormatContext *s,
             avio_skip(pb, length);
         }
     }
-
-    return 0;
 }
 
 AVInputFormat ff_mm_demuxer = {
diff --git a/libavformat/mmsh.c b/libavformat/mmsh.c
index af040e27a9..0ce282c906 100644
--- a/libavformat/mmsh.c
+++ b/libavformat/mmsh.c
@@ -208,7 +208,6 @@ static int get_http_header_data(MMSHContext *mmsh)
             }
         }
     }
-    return 0;
 }
 
 static int mmsh_open(URLContext *h, const char *uri, int flags)
diff --git a/libavformat/mov.c b/libavformat/mov.c
index c720440472..194c2f86dd 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -2308,7 +2308,6 @@ static int mov_probe(AVProbeData *p)
             return score;
         }
     }
-    return score;
 }
 
 // must be done after parsing all trak because there's no order requirement
diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index f499bd3b71..e841f9b8d1 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -761,7 +761,6 @@ static int get_packet(URLContext *s, int for_header)
         }
         ff_rtmp_packet_destroy(&rpkt);
     }
-    return 0;
 }
 
 static int rtmp_close(URLContext *h)
diff --git a/libavformat/swfdec.c b/libavformat/swfdec.c
index eec9524ecf..c838fa8e61 100644
--- a/libavformat/swfdec.c
+++ b/libavformat/swfdec.c
@@ -204,7 +204,6 @@ static int swf_read_packet(AVFormatContext *s, AVPacket *pkt)
     skip:
         avio_skip(pb, len);
     }
-    return 0;
 }
 
 AVInputFormat ff_swf_demuxer = {
diff --git a/libavutil/opt.c b/libavutil/opt.c
index f65f31fcda..acb745cf3d 100644
--- a/libavutil/opt.c
+++ b/libavutil/opt.c
@@ -196,7 +196,6 @@ int av_set_string3(void *obj, const char *name, const char *val, int alloc, cons
                 return 0;
             notfirst=1;
         }
-        return AVERROR(EINVAL);
     }
 
     if (alloc) {
diff --git a/libavutil/parseutils.c b/libavutil/parseutils.c
index 0272c3ef67..c8124c532e 100644
--- a/libavutil/parseutils.c
+++ b/libavutil/parseutils.c
@@ -462,7 +462,6 @@ const char *small_strptime(const char *p, const char *fmt,
             p++;
         }
     }
-    return p;
 }
 
 static time_t mktimegm(struct tm *tm)

From ee8aecd23a962914d7c264c2169eed8b69d031aa Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 4 Jun 2011 12:42:16 +0100
Subject: [PATCH 31/40] Do not include intfloat_readwrite.h in avutil.h

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavformat/4xm.c         | 1 +
 libavformat/aiffenc.c     | 1 +
 libavformat/cafdec.c      | 1 +
 libavformat/ffmdec.c      | 1 +
 libavformat/ffmenc.c      | 1 +
 libavformat/flvdec.c      | 1 +
 libavformat/flvenc.c      | 2 ++
 libavformat/gxfenc.c      | 1 +
 libavformat/matroskaenc.c | 1 +
 libavformat/mov.c         | 1 +
 libavformat/movenc.c      | 1 +
 libavformat/nuv.c         | 1 +
 libavformat/rtmppkt.c     | 1 +
 libavformat/rtmpproto.c   | 1 +
 libavformat/soxdec.c      | 1 +
 libavformat/soxenc.c      | 1 +
 libavformat/thp.c         | 1 +
 libavutil/avutil.h        | 1 -
 18 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/libavformat/4xm.c b/libavformat/4xm.c
index 93c90e8cbc..32699da439 100644
--- a/libavformat/4xm.c
+++ b/libavformat/4xm.c
@@ -28,6 +28,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/intfloat_readwrite.h"
 #include "avformat.h"
 
 #define     RIFF_TAG MKTAG('R', 'I', 'F', 'F')
diff --git a/libavformat/aiffenc.c b/libavformat/aiffenc.c
index 3bdb4f4f8c..5a64688483 100644
--- a/libavformat/aiffenc.c
+++ b/libavformat/aiffenc.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/intfloat_readwrite.h"
 #include "avformat.h"
 #include "aiff.h"
 #include "avio_internal.h"
diff --git a/libavformat/cafdec.c b/libavformat/cafdec.c
index dff3b41281..68686cab97 100644
--- a/libavformat/cafdec.c
+++ b/libavformat/cafdec.c
@@ -29,6 +29,7 @@
 #include "riff.h"
 #include "isom.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/intfloat_readwrite.h"
 #include "libavutil/dict.h"
 #include "caf.h"
 
diff --git a/libavformat/ffmdec.c b/libavformat/ffmdec.c
index dfd86cb28b..91ab2e4370 100644
--- a/libavformat/ffmdec.c
+++ b/libavformat/ffmdec.c
@@ -20,6 +20,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/intfloat_readwrite.h"
 #include "avformat.h"
 #include "ffm.h"
 #if CONFIG_FFSERVER
diff --git a/libavformat/ffmenc.c b/libavformat/ffmenc.c
index 71d93e5c43..9a3eb40ad7 100644
--- a/libavformat/ffmenc.c
+++ b/libavformat/ffmenc.c
@@ -20,6 +20,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/intfloat_readwrite.h"
 #include "avformat.h"
 #include "ffm.h"
 
diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
index c6b386e28f..3b7db0e6ca 100644
--- a/libavformat/flvdec.c
+++ b/libavformat/flvdec.c
@@ -26,6 +26,7 @@
 
 #include "libavutil/avstring.h"
 #include "libavutil/dict.h"
+#include "libavutil/intfloat_readwrite.h"
 #include "libavcodec/bytestream.h"
 #include "libavcodec/mpeg4audio.h"
 #include "avformat.h"
diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c
index 487993cd9a..a3e7e25692 100644
--- a/libavformat/flvenc.c
+++ b/libavformat/flvenc.c
@@ -18,6 +18,8 @@
  * License along with Libav; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
+
+#include "libavutil/intfloat_readwrite.h"
 #include "avformat.h"
 #include "flv.h"
 #include "internal.h"
diff --git a/libavformat/gxfenc.c b/libavformat/gxfenc.c
index 98126fa2c4..ac88475984 100644
--- a/libavformat/gxfenc.c
+++ b/libavformat/gxfenc.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/intfloat_readwrite.h"
 #include "avformat.h"
 #include "gxf.h"
 #include "riff.h"
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index e485539a26..34c65d07a4 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -28,6 +28,7 @@
 #include "avlanguage.h"
 #include "libavutil/samplefmt.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/intfloat_readwrite.h"
 #include "libavutil/random_seed.h"
 #include "libavutil/lfg.h"
 #include "libavutil/dict.h"
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 194c2f86dd..f0b87dd84c 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -26,6 +26,7 @@
 //#define MOV_EXPORT_ALL_METADATA
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/intfloat_readwrite.h"
 #include "libavutil/avstring.h"
 #include "libavutil/dict.h"
 #include "avformat.h"
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index dcc5581443..2e2f869338 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -32,6 +32,7 @@
 #include "libavcodec/put_bits.h"
 #include "internal.h"
 #include "libavutil/avstring.h"
+#include "libavutil/intfloat_readwrite.h"
 #include "libavutil/opt.h"
 #include "libavutil/dict.h"
 #include "rtpenc.h"
diff --git a/libavformat/nuv.c b/libavformat/nuv.c
index 4e1ee5702b..854aadd990 100644
--- a/libavformat/nuv.c
+++ b/libavformat/nuv.c
@@ -20,6 +20,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/intfloat_readwrite.h"
 #include "avformat.h"
 #include "riff.h"
 
diff --git a/libavformat/rtmppkt.c b/libavformat/rtmppkt.c
index 35ef7fdaae..6bf641a742 100644
--- a/libavformat/rtmppkt.c
+++ b/libavformat/rtmppkt.c
@@ -21,6 +21,7 @@
 
 #include "libavcodec/bytestream.h"
 #include "libavutil/avstring.h"
+#include "libavutil/intfloat_readwrite.h"
 #include "avformat.h"
 
 #include "rtmppkt.h"
diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index e841f9b8d1..de4eb0fd23 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -26,6 +26,7 @@
 
 #include "libavcodec/bytestream.h"
 #include "libavutil/avstring.h"
+#include "libavutil/intfloat_readwrite.h"
 #include "libavutil/lfg.h"
 #include "libavutil/sha.h"
 #include "avformat.h"
diff --git a/libavformat/soxdec.c b/libavformat/soxdec.c
index fb7b063f8c..b3b35b12fd 100644
--- a/libavformat/soxdec.c
+++ b/libavformat/soxdec.c
@@ -30,6 +30,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/intfloat_readwrite.h"
 #include "libavutil/dict.h"
 #include "avformat.h"
 #include "pcm.h"
diff --git a/libavformat/soxenc.c b/libavformat/soxenc.c
index 01d0cda2cf..a0faa466df 100644
--- a/libavformat/soxenc.c
+++ b/libavformat/soxenc.c
@@ -30,6 +30,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/intfloat_readwrite.h"
 #include "libavutil/dict.h"
 #include "avformat.h"
 #include "avio_internal.h"
diff --git a/libavformat/thp.c b/libavformat/thp.c
index 6cdcefd377..51dbd810cd 100644
--- a/libavformat/thp.c
+++ b/libavformat/thp.c
@@ -20,6 +20,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/intfloat_readwrite.h"
 #include "avformat.h"
 
 typedef struct ThpDemuxContext {
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index d6e4668b88..b8882a705e 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -123,7 +123,6 @@ char av_get_picture_type_char(enum AVPictureType pict_type);
 #include "error.h"
 #include "mathematics.h"
 #include "rational.h"
-#include "intfloat_readwrite.h"
 #include "log.h"
 #include "pixfmt.h"
 

From 0ebcdf5cdad6bf20a5170735a7f77b23ecc081ac Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 4 Jun 2011 12:58:23 +0100
Subject: [PATCH 32/40] Do not include mathematics.h in avutil.h

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 ffmpeg.c                       | 1 +
 ffplay.c                       | 1 +
 ffserver.c                     | 1 +
 libavcodec/acelp_pitch_delay.c | 1 +
 libavcodec/ituh263dec.c        | 1 +
 libavcodec/mpegvideo_enc.c     | 1 +
 libavcodec/snow.c              | 1 +
 libavcodec/utils.c             | 1 +
 libavcodec/xsubdec.c           | 2 ++
 libavfilter/vf_aspect.c        | 1 +
 libavfilter/vf_crop.c          | 1 +
 libavfilter/vf_overlay.c       | 1 +
 libavfilter/vf_pad.c           | 1 +
 libavfilter/vf_scale.c         | 1 +
 libavfilter/vf_setpts.c        | 1 +
 libavfilter/vf_settb.c         | 1 +
 libavfilter/vsrc_color.c       | 1 +
 libavfilter/vsrc_nullsrc.c     | 1 +
 libavformat/applehttp.c        | 1 +
 libavformat/asfdec.c           | 1 +
 libavformat/assdec.c           | 1 +
 libavformat/audiointerleave.c  | 1 +
 libavformat/avidec.c           | 1 +
 libavformat/dv.c               | 1 +
 libavformat/dvenc.c            | 1 +
 libavformat/ffmetadec.c        | 1 +
 libavformat/gxfenc.c           | 1 +
 libavformat/matroskaenc.c      | 1 +
 libavformat/mov.c              | 1 +
 libavformat/movenc.c           | 1 +
 libavformat/mp3dec.c           | 1 +
 libavformat/mpegenc.c          | 1 +
 libavformat/mpegtsenc.c        | 1 +
 libavformat/mxfdec.c           | 1 +
 libavformat/nsvdec.c           | 2 ++
 libavformat/nut.c              | 1 +
 libavformat/nutdec.c           | 1 +
 libavformat/nutenc.c           | 1 +
 libavformat/oggenc.c           | 1 +
 libavformat/output-example.c   | 1 +
 libavformat/pcm.c              | 1 +
 libavformat/r3d.c              | 1 +
 libavformat/riff.c             | 1 +
 libavformat/rl2.c              | 1 +
 libavformat/rtpdec.c           | 1 +
 libavformat/rtpenc.c           | 1 +
 libavformat/rtsp.c             | 1 +
 libavformat/rtspdec.c          | 1 +
 libavformat/seek.c             | 1 +
 libavformat/utils.c            | 1 +
 libavformat/vqf.c              | 1 +
 libavformat/wav.c              | 2 ++
 libavutil/avutil.h             | 1 -
 tests/seek_test.c              | 1 +
 54 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index b9309aed43..1a4d2a1204 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -40,6 +40,7 @@
 #include "libavutil/fifo.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/dict.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/avstring.h"
 #include "libavutil/libm.h"
diff --git a/ffplay.c b/ffplay.c
index 73b30c400b..706ee25d65 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -25,6 +25,7 @@
 #include <limits.h>
 #include "libavutil/avstring.h"
 #include "libavutil/colorspace.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/dict.h"
diff --git a/ffserver.c b/ffserver.c
index 65a97b20cc..f80ad972e5 100644
--- a/ffserver.c
+++ b/ffserver.c
@@ -37,6 +37,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/lfg.h"
 #include "libavutil/dict.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/random_seed.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/opt.h"
diff --git a/libavcodec/acelp_pitch_delay.c b/libavcodec/acelp_pitch_delay.c
index 4e44bc9122..2b791b5aa3 100644
--- a/libavcodec/acelp_pitch_delay.c
+++ b/libavcodec/acelp_pitch_delay.c
@@ -20,6 +20,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/mathematics.h"
 #include "avcodec.h"
 #include "dsputil.h"
 #include "acelp_pitch_delay.h"
diff --git a/libavcodec/ituh263dec.c b/libavcodec/ituh263dec.c
index 0bb92da833..7071b845fc 100644
--- a/libavcodec/ituh263dec.c
+++ b/libavcodec/ituh263dec.c
@@ -30,6 +30,7 @@
 //#define DEBUG
 #include <limits.h>
 
+#include "libavutil/mathematics.h"
 #include "dsputil.h"
 #include "avcodec.h"
 #include "mpegvideo.h"
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index cd13a5cd2b..73bcc5b229 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -28,6 +28,7 @@
  */
 
 #include "libavutil/intmath.h"
+#include "libavutil/mathematics.h"
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index 4ab029de5c..6a63da7aa8 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -3697,6 +3697,7 @@ AVCodec ff_snow_encoder = {
 #undef printf
 
 #include "libavutil/lfg.h"
+#include "libavutil/mathematics.h"
 
 int main(void){
     int width=256;
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 44a6f8c232..722f758231 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -27,6 +27,7 @@
 
 #include "libavutil/avstring.h"
 #include "libavutil/crc.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/audioconvert.h"
 #include "libavutil/imgutils.h"
diff --git a/libavcodec/xsubdec.c b/libavcodec/xsubdec.c
index 097cbc6c1d..4afefac2c4 100644
--- a/libavcodec/xsubdec.c
+++ b/libavcodec/xsubdec.c
@@ -18,6 +18,8 @@
  * License along with Libav; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
+
+#include "libavutil/mathematics.h"
 #include "libavutil/imgutils.h"
 #include "avcodec.h"
 #include "get_bits.h"
diff --git a/libavfilter/vf_aspect.c b/libavfilter/vf_aspect.c
index 2ede0fd959..b43aa86bea 100644
--- a/libavfilter/vf_aspect.c
+++ b/libavfilter/vf_aspect.c
@@ -23,6 +23,7 @@
  * aspect ratio modification video filters
  */
 
+#include "libavutil/mathematics.h"
 #include "avfilter.h"
 
 typedef struct {
diff --git a/libavfilter/vf_crop.c b/libavfilter/vf_crop.c
index 69e5a520c8..0880d4e5f9 100644
--- a/libavfilter/vf_crop.c
+++ b/libavfilter/vf_crop.c
@@ -30,6 +30,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/libm.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/mathematics.h"
 
 static const char *var_names[] = {
     "E",
diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
index c8bdf51642..39b2375235 100644
--- a/libavfilter/vf_overlay.c
+++ b/libavfilter/vf_overlay.c
@@ -30,6 +30,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/mathematics.h"
 #include "internal.h"
 
 static const char *var_names[] = {
diff --git a/libavfilter/vf_pad.c b/libavfilter/vf_pad.c
index 18873b8837..851172c058 100644
--- a/libavfilter/vf_pad.c
+++ b/libavfilter/vf_pad.c
@@ -32,6 +32,7 @@
 #include "libavutil/avassert.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/mathematics.h"
 #include "drawutils.h"
 
 static const char *var_names[] = {
diff --git a/libavfilter/vf_scale.c b/libavfilter/vf_scale.c
index 65fe01c9ae..9ec686f8f9 100644
--- a/libavfilter/vf_scale.c
+++ b/libavfilter/vf_scale.c
@@ -26,6 +26,7 @@
 #include "avfilter.h"
 #include "libavutil/avstring.h"
 #include "libavutil/eval.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/pixdesc.h"
 #include "libswscale/swscale.h"
 
diff --git a/libavfilter/vf_setpts.c b/libavfilter/vf_setpts.c
index bece3736b0..f2650923d0 100644
--- a/libavfilter/vf_setpts.c
+++ b/libavfilter/vf_setpts.c
@@ -27,6 +27,7 @@
 /* #define DEBUG */
 
 #include "libavutil/eval.h"
+#include "libavutil/mathematics.h"
 #include "avfilter.h"
 
 static const char *var_names[] = {
diff --git a/libavfilter/vf_settb.c b/libavfilter/vf_settb.c
index 9575483c59..eeb4353915 100644
--- a/libavfilter/vf_settb.c
+++ b/libavfilter/vf_settb.c
@@ -25,6 +25,7 @@
 
 #include "libavutil/avstring.h"
 #include "libavutil/eval.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/rational.h"
 #include "avfilter.h"
 #include "internal.h"
diff --git a/libavfilter/vsrc_color.c b/libavfilter/vsrc_color.c
index 6d41c8ab71..0fb08d7dc6 100644
--- a/libavfilter/vsrc_color.c
+++ b/libavfilter/vsrc_color.c
@@ -22,6 +22,7 @@
 #include "libavutil/pixdesc.h"
 #include "libavutil/colorspace.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/parseutils.h"
 #include "drawutils.h"
 
diff --git a/libavfilter/vsrc_nullsrc.c b/libavfilter/vsrc_nullsrc.c
index 629de78a18..dfd56fa495 100644
--- a/libavfilter/vsrc_nullsrc.c
+++ b/libavfilter/vsrc_nullsrc.c
@@ -23,6 +23,7 @@
 
 #include "libavutil/avstring.h"
 #include "libavutil/eval.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/parseutils.h"
 #include "avfilter.h"
 
diff --git a/libavformat/applehttp.c b/libavformat/applehttp.c
index 38f33a24f3..7e0c930271 100644
--- a/libavformat/applehttp.c
+++ b/libavformat/applehttp.c
@@ -27,6 +27,7 @@
 
 #include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
 #include "libavutil/dict.h"
 #include "avformat.h"
diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index 38fe4f9fa2..ac559a0edd 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -25,6 +25,7 @@
 #include "libavutil/common.h"
 #include "libavutil/avstring.h"
 #include "libavutil/dict.h"
+#include "libavutil/mathematics.h"
 #include "libavcodec/mpegaudio.h"
 #include "avformat.h"
 #include "avio_internal.h"
diff --git a/libavformat/assdec.c b/libavformat/assdec.c
index b270200af2..08b520e656 100644
--- a/libavformat/assdec.c
+++ b/libavformat/assdec.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/mathematics.h"
 #include "avformat.h"
 #include "internal.h"
 
diff --git a/libavformat/audiointerleave.c b/libavformat/audiointerleave.c
index e4cb1b8edc..e48f826e14 100644
--- a/libavformat/audiointerleave.c
+++ b/libavformat/audiointerleave.c
@@ -21,6 +21,7 @@
  */
 
 #include "libavutil/fifo.h"
+#include "libavutil/mathematics.h"
 #include "avformat.h"
 #include "audiointerleave.h"
 #include "internal.h"
diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index a00b9ced2d..2ea156e9ec 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c
@@ -21,6 +21,7 @@
 
 #include <strings.h>
 #include "libavutil/intreadwrite.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/bswap.h"
 #include "libavutil/dict.h"
 #include "avformat.h"
diff --git a/libavformat/dv.c b/libavformat/dv.c
index 4b41e0aa8e..f38b954605 100644
--- a/libavformat/dv.c
+++ b/libavformat/dv.c
@@ -32,6 +32,7 @@
 #include "avformat.h"
 #include "libavcodec/dvdata.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/mathematics.h"
 #include "dv.h"
 
 struct DVDemuxContext {
diff --git a/libavformat/dvenc.c b/libavformat/dvenc.c
index 537581ac3a..504e3ee26f 100644
--- a/libavformat/dvenc.c
+++ b/libavformat/dvenc.c
@@ -35,6 +35,7 @@
 #include "libavcodec/dvdata.h"
 #include "dv.h"
 #include "libavutil/fifo.h"
+#include "libavutil/mathematics.h"
 
 struct DVMuxContext {
     const DVprofile*  sys;           /* current DV profile, e.g.: 525/60, 625/50 */
diff --git a/libavformat/ffmetadec.c b/libavformat/ffmetadec.c
index e3d800d3c4..73d3b83ed7 100644
--- a/libavformat/ffmetadec.c
+++ b/libavformat/ffmetadec.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/mathematics.h"
 #include "avformat.h"
 #include "ffmeta.h"
 #include "internal.h"
diff --git a/libavformat/gxfenc.c b/libavformat/gxfenc.c
index ac88475984..5a3ff39ab2 100644
--- a/libavformat/gxfenc.c
+++ b/libavformat/gxfenc.c
@@ -20,6 +20,7 @@
  */
 
 #include "libavutil/intfloat_readwrite.h"
+#include "libavutil/mathematics.h"
 #include "avformat.h"
 #include "gxf.h"
 #include "riff.h"
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 34c65d07a4..d132b65f5b 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -29,6 +29,7 @@
 #include "libavutil/samplefmt.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/intfloat_readwrite.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/random_seed.h"
 #include "libavutil/lfg.h"
 #include "libavutil/dict.h"
diff --git a/libavformat/mov.c b/libavformat/mov.c
index f0b87dd84c..acde35d97e 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -27,6 +27,7 @@
 
 #include "libavutil/intreadwrite.h"
 #include "libavutil/intfloat_readwrite.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/avstring.h"
 #include "libavutil/dict.h"
 #include "avformat.h"
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 2e2f869338..0de7c4d44d 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -33,6 +33,7 @@
 #include "internal.h"
 #include "libavutil/avstring.h"
 #include "libavutil/intfloat_readwrite.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
 #include "libavutil/dict.h"
 #include "rtpenc.h"
diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
index a1db2323ac..70ced02218 100644
--- a/libavformat/mp3dec.c
+++ b/libavformat/mp3dec.c
@@ -22,6 +22,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/dict.h"
+#include "libavutil/mathematics.h"
 #include "avformat.h"
 #include "id3v2.h"
 #include "id3v1.h"
diff --git a/libavformat/mpegenc.c b/libavformat/mpegenc.c
index 820c5bd5e1..5859254492 100644
--- a/libavformat/mpegenc.c
+++ b/libavformat/mpegenc.c
@@ -20,6 +20,7 @@
  */
 
 #include "libavutil/fifo.h"
+#include "libavutil/mathematics.h"
 #include "libavcodec/put_bits.h"
 #include "avformat.h"
 #include "mpeg.h"
diff --git a/libavformat/mpegtsenc.c b/libavformat/mpegtsenc.c
index 26d2cb229f..83ede1d79b 100644
--- a/libavformat/mpegtsenc.c
+++ b/libavformat/mpegtsenc.c
@@ -22,6 +22,7 @@
 #include "libavutil/bswap.h"
 #include "libavutil/crc.h"
 #include "libavutil/dict.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
 #include "libavcodec/mpegvideo.h"
 #include "avformat.h"
diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
index 82daa2a002..cf2bb6cc65 100644
--- a/libavformat/mxfdec.c
+++ b/libavformat/mxfdec.c
@@ -46,6 +46,7 @@
 //#define DEBUG
 
 #include "libavutil/aes.h"
+#include "libavutil/mathematics.h"
 #include "libavcodec/bytestream.h"
 #include "avformat.h"
 #include "mxf.h"
diff --git a/libavformat/nsvdec.c b/libavformat/nsvdec.c
index 6e9cfe3a84..4787331def 100644
--- a/libavformat/nsvdec.c
+++ b/libavformat/nsvdec.c
@@ -18,6 +18,8 @@
  * License along with Libav; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
+
+#include "libavutil/mathematics.h"
 #include "avformat.h"
 #include "riff.h"
 #include "libavutil/dict.h"
diff --git a/libavformat/nut.c b/libavformat/nut.c
index 9a2ee6b0b0..1ce048d645 100644
--- a/libavformat/nut.c
+++ b/libavformat/nut.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/mathematics.h"
 #include "libavutil/tree.h"
 #include "nut.h"
 #include "internal.h"
diff --git a/libavformat/nutdec.c b/libavformat/nutdec.c
index db1b999c90..93888a2bd6 100644
--- a/libavformat/nutdec.c
+++ b/libavformat/nutdec.c
@@ -24,6 +24,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/bswap.h"
 #include "libavutil/dict.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/tree.h"
 #include "avio_internal.h"
 #include "nut.h"
diff --git a/libavformat/nutenc.c b/libavformat/nutenc.c
index 260a7607d8..412c670387 100644
--- a/libavformat/nutenc.c
+++ b/libavformat/nutenc.c
@@ -20,6 +20,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/tree.h"
 #include "libavutil/dict.h"
 #include "libavcodec/mpegaudiodata.h"
diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c
index bc4b3c10b2..2b2189c10e 100644
--- a/libavformat/oggenc.c
+++ b/libavformat/oggenc.c
@@ -20,6 +20,7 @@
  */
 
 #include "libavutil/crc.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/random_seed.h"
 #include "libavcodec/xiph.h"
 #include "libavcodec/bytestream.h"
diff --git a/libavformat/output-example.c b/libavformat/output-example.c
index 4453c03039..3b28b7c7c6 100644
--- a/libavformat/output-example.c
+++ b/libavformat/output-example.c
@@ -34,6 +34,7 @@
 #include <string.h>
 #include <math.h>
 
+#include "libavutil/mathematics.h"
 #include "libavformat/avformat.h"
 #include "libswscale/swscale.h"
 
diff --git a/libavformat/pcm.c b/libavformat/pcm.c
index 26ab1424b2..7d5fed5601 100644
--- a/libavformat/pcm.c
+++ b/libavformat/pcm.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/mathematics.h"
 #include "avformat.h"
 #include "pcm.h"
 
diff --git a/libavformat/r3d.c b/libavformat/r3d.c
index 619c6a7b6c..5dd7f997e5 100644
--- a/libavformat/r3d.c
+++ b/libavformat/r3d.c
@@ -23,6 +23,7 @@
 
 #include "libavutil/intreadwrite.h"
 #include "libavutil/dict.h"
+#include "libavutil/mathematics.h"
 #include "avformat.h"
 
 typedef struct {
diff --git a/libavformat/riff.c b/libavformat/riff.c
index 817349cb82..27f45b91d9 100644
--- a/libavformat/riff.c
+++ b/libavformat/riff.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/mathematics.h"
 #include "libavcodec/avcodec.h"
 #include "avformat.h"
 #include "avio_internal.h"
diff --git a/libavformat/rl2.c b/libavformat/rl2.c
index b4172c284a..12cb391ad8 100644
--- a/libavformat/rl2.c
+++ b/libavformat/rl2.c
@@ -34,6 +34,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/mathematics.h"
 #include "avformat.h"
 
 #define EXTRADATA1_SIZE (6 + 256 * 3) ///< video base, clr, palette
diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c
index a910cf08bb..2c262d9bd3 100644
--- a/libavformat/rtpdec.c
+++ b/libavformat/rtpdec.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/mathematics.h"
 #include "libavcodec/get_bits.h"
 #include "avformat.h"
 #include "mpegts.h"
diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c
index 3da6dfb3a9..c412158778 100644
--- a/libavformat/rtpenc.c
+++ b/libavformat/rtpenc.c
@@ -22,6 +22,7 @@
 #include "avformat.h"
 #include "mpegts.h"
 #include "internal.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/random_seed.h"
 #include "libavutil/opt.h"
 
diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c
index 80cd587144..ce9bf1e1eb 100644
--- a/libavformat/rtsp.c
+++ b/libavformat/rtsp.c
@@ -22,6 +22,7 @@
 #include "libavutil/base64.h"
 #include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/random_seed.h"
 #include "libavutil/dict.h"
diff --git a/libavformat/rtspdec.c b/libavformat/rtspdec.c
index 9cd9be1d4b..2dff46d1de 100644
--- a/libavformat/rtspdec.c
+++ b/libavformat/rtspdec.c
@@ -21,6 +21,7 @@
 
 #include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
 #include "avformat.h"
 
diff --git a/libavformat/seek.c b/libavformat/seek.c
index 71e2f8a6b4..6c4286bb8e 100644
--- a/libavformat/seek.c
+++ b/libavformat/seek.c
@@ -21,6 +21,7 @@
  */
 
 #include "seek.h"
+#include "libavutil/mathematics.h"
 #include "libavutil/mem.h"
 #include "internal.h"
 
diff --git a/libavformat/utils.c b/libavformat/utils.c
index de26a1886e..060e58e9b2 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -31,6 +31,7 @@
 #include "metadata.h"
 #include "id3v2.h"
 #include "libavutil/avstring.h"
+#include "libavutil/mathematics.h"
 #include "riff.h"
 #include "audiointerleave.h"
 #include "url.h"
diff --git a/libavformat/vqf.c b/libavformat/vqf.c
index 5be7dfea21..dd02abd70a 100644
--- a/libavformat/vqf.c
+++ b/libavformat/vqf.c
@@ -22,6 +22,7 @@
 #include "avformat.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/dict.h"
+#include "libavutil/mathematics.h"
 
 typedef struct VqfContext {
     int frame_bit_len;
diff --git a/libavformat/wav.c b/libavformat/wav.c
index 92c9bfcbc4..391461044b 100644
--- a/libavformat/wav.c
+++ b/libavformat/wav.c
@@ -22,6 +22,8 @@
  * License along with Libav; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
+
+#include "libavutil/mathematics.h"
 #include "avformat.h"
 #include "avio_internal.h"
 #include "pcm.h"
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index b8882a705e..53dba004e7 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -121,7 +121,6 @@ char av_get_picture_type_char(enum AVPictureType pict_type);
 
 #include "common.h"
 #include "error.h"
-#include "mathematics.h"
 #include "rational.h"
 #include "log.h"
 #include "pixfmt.h"
diff --git a/tests/seek_test.c b/tests/seek_test.c
index 5d4f41c528..71b2a2d6c0 100644
--- a/tests/seek_test.c
+++ b/tests/seek_test.c
@@ -25,6 +25,7 @@
 #include <string.h>
 
 #include "libavutil/common.h"
+#include "libavutil/mathematics.h"
 #include "libavformat/avformat.h"
 
 #undef exit

From e91709ca178800faf7f6ce228eb260a6efaa2451 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 4 Jun 2011 13:05:35 +0100
Subject: [PATCH 33/40] Do not include rational.h in avutil.h

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/avcodec.h   | 1 +
 libavfilter/avfilter.h | 1 +
 libavutil/avutil.h     | 1 -
 3 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index fbd70395c4..9502a0018b 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -30,6 +30,7 @@
 #include "libavutil/samplefmt.h"
 #include "libavutil/avutil.h"
 #include "libavutil/cpu.h"
+#include "libavutil/rational.h"
 
 #include "libavcodec/version.h"
 
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 33e93e27fc..b84539c942 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -24,6 +24,7 @@
 
 #include "libavutil/avutil.h"
 #include "libavutil/samplefmt.h"
+#include "libavutil/rational.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
 #define LIBAVFILTER_VERSION_MINOR  4
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index 53dba004e7..0cdeef2157 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -121,7 +121,6 @@ char av_get_picture_type_char(enum AVPictureType pict_type);
 
 #include "common.h"
 #include "error.h"
-#include "rational.h"
 #include "log.h"
 #include "pixfmt.h"
 

From d49ea4afb4e39e4af33435452dee8ef4c852d83d Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 4 Jun 2011 13:12:08 +0100
Subject: [PATCH 34/40] Do not include pixfmt.h in avutil.h

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/avcodec.h          | 1 +
 libavfilter/avfilter.h        | 1 +
 libavutil/avutil.h            | 1 -
 libswscale/swscale.h          | 1 +
 libswscale/swscale_internal.h | 1 +
 5 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 9502a0018b..a23c3cbf05 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -30,6 +30,7 @@
 #include "libavutil/samplefmt.h"
 #include "libavutil/avutil.h"
 #include "libavutil/cpu.h"
+#include "libavutil/pixfmt.h"
 #include "libavutil/rational.h"
 
 #include "libavcodec/version.h"
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index b84539c942..d3c977a7e2 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -24,6 +24,7 @@
 
 #include "libavutil/avutil.h"
 #include "libavutil/samplefmt.h"
+#include "libavutil/pixfmt.h"
 #include "libavutil/rational.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index 0cdeef2157..782de7d2aa 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -122,6 +122,5 @@ char av_get_picture_type_char(enum AVPictureType pict_type);
 #include "common.h"
 #include "error.h"
 #include "log.h"
-#include "pixfmt.h"
 
 #endif /* AVUTIL_AVUTIL_H */
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 3899596983..451f07c72a 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -28,6 +28,7 @@
  */
 
 #include "libavutil/avutil.h"
+#include "libavutil/pixfmt.h"
 
 #define LIBSWSCALE_VERSION_MAJOR 2
 #define LIBSWSCALE_VERSION_MINOR 0
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index e4b93c595b..8d03cb629d 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -28,6 +28,7 @@
 #endif
 
 #include "libavutil/avutil.h"
+#include "libavutil/pixfmt.h"
 
 #define STR(s)         AV_TOSTRING(s) //AV_STRINGIFY is too long
 

From abc78a5a7c158e9813db502cedce096101e38890 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 4 Jun 2011 13:34:27 +0100
Subject: [PATCH 35/40] Do not include log.h in avutil.h

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/avcodec.h               | 1 +
 libavfilter/avfilter.h             | 1 +
 libavformat/avformat.h             | 1 +
 libavutil/avutil.h                 | 1 -
 libavutil/eval.c                   | 1 +
 libavutil/file.c                   | 1 +
 libavutil/imgutils.c               | 1 +
 libavutil/opt.c                    | 1 +
 libavutil/parseutils.c             | 1 +
 libpostproc/postprocess_internal.h | 1 +
 libswscale/swscale.h               | 1 +
 libswscale/swscale_internal.h      | 1 +
 12 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index a23c3cbf05..b26bac7bef 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -30,6 +30,7 @@
 #include "libavutil/samplefmt.h"
 #include "libavutil/avutil.h"
 #include "libavutil/cpu.h"
+#include "libavutil/log.h"
 #include "libavutil/pixfmt.h"
 #include "libavutil/rational.h"
 
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index d3c977a7e2..f8295e77c5 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -23,6 +23,7 @@
 #define AVFILTER_AVFILTER_H
 
 #include "libavutil/avutil.h"
+#include "libavutil/log.h"
 #include "libavutil/samplefmt.h"
 #include "libavutil/pixfmt.h"
 #include "libavutil/rational.h"
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 8561a50c01..12490c1209 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -41,6 +41,7 @@ const char *avformat_license(void);
 #include <stdio.h>  /* FILE */
 #include "libavcodec/avcodec.h"
 #include "libavutil/dict.h"
+#include "libavutil/log.h"
 
 #include "avio.h"
 #include "libavformat/version.h"
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index 782de7d2aa..01e4e2fb7f 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -121,6 +121,5 @@ char av_get_picture_type_char(enum AVPictureType pict_type);
 
 #include "common.h"
 #include "error.h"
-#include "log.h"
 
 #endif /* AVUTIL_AVUTIL_H */
diff --git a/libavutil/eval.c b/libavutil/eval.c
index a3788210e3..4bba343bec 100644
--- a/libavutil/eval.c
+++ b/libavutil/eval.c
@@ -28,6 +28,7 @@
 
 #include "avutil.h"
 #include "eval.h"
+#include "log.h"
 
 typedef struct Parser {
     const AVClass *class;
diff --git a/libavutil/file.c b/libavutil/file.c
index f0e48b5b2f..649bb767a0 100644
--- a/libavutil/file.c
+++ b/libavutil/file.c
@@ -17,6 +17,7 @@
  */
 
 #include "file.h"
+#include "log.h"
 #include <fcntl.h>
 #include <sys/stat.h>
 #include <unistd.h>
diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c
index 46853cafcb..5cd71e21ce 100644
--- a/libavutil/imgutils.c
+++ b/libavutil/imgutils.c
@@ -23,6 +23,7 @@
 
 #include "imgutils.h"
 #include "internal.h"
+#include "log.h"
 #include "pixdesc.h"
 
 void av_image_fill_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4],
diff --git a/libavutil/opt.c b/libavutil/opt.c
index acb745cf3d..65e02135d5 100644
--- a/libavutil/opt.c
+++ b/libavutil/opt.c
@@ -30,6 +30,7 @@
 #include "opt.h"
 #include "eval.h"
 #include "dict.h"
+#include "log.h"
 
 #if FF_API_FIND_OPT
 //FIXME order them and do a bin search
diff --git a/libavutil/parseutils.c b/libavutil/parseutils.c
index c8124c532e..9bac7ab9ce 100644
--- a/libavutil/parseutils.c
+++ b/libavutil/parseutils.c
@@ -28,6 +28,7 @@
 #include "avstring.h"
 #include "avutil.h"
 #include "eval.h"
+#include "log.h"
 #include "random_seed.h"
 #include "parseutils.h"
 
diff --git a/libpostproc/postprocess_internal.h b/libpostproc/postprocess_internal.h
index 010e629390..331a96b2dd 100644
--- a/libpostproc/postprocess_internal.h
+++ b/libpostproc/postprocess_internal.h
@@ -28,6 +28,7 @@
 
 #include <string.h>
 #include "libavutil/avutil.h"
+#include "libavutil/log.h"
 #include "postprocess.h"
 
 #define V_DEBLOCK       0x01
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 451f07c72a..80e5eaab00 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -28,6 +28,7 @@
  */
 
 #include "libavutil/avutil.h"
+#include "libavutil/log.h"
 #include "libavutil/pixfmt.h"
 
 #define LIBSWSCALE_VERSION_MAJOR 2
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 8d03cb629d..340227dc00 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -28,6 +28,7 @@
 #endif
 
 #include "libavutil/avutil.h"
+#include "libavutil/log.h"
 #include "libavutil/pixfmt.h"
 
 #define STR(s)         AV_TOSTRING(s) //AV_STRINGIFY is too long

From 4320a309ce10a7eec93aef239a0776a33b1a5a34 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Mon, 27 Jun 2011 13:07:26 -0700
Subject: [PATCH 36/40] H.264: make filter_mb_fast support the case of
 unavailable top mb

Significantly faster deblocking in streams with lots of slices.
---
 libavcodec/h264_loopfilter.c | 42 ++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index bdfbce79f0..1575b1b3d9 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -215,19 +215,20 @@ static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, int16_t
 void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
     MpegEncContext * const s = &h->s;
     int mb_xy;
-    int mb_type, left_type;
+    int mb_type, left_type, top_type;
     int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
     int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
     int chroma444 = CHROMA444;
 
     mb_xy = h->mb_xy;
 
-    if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
+    if(!h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
         ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
         return;
     }
     assert(!FRAME_MBAFF);
     left_type= h->left_type[0];
+    top_type= h->top_type;
 
     mb_type = s->current_picture.mb_type[mb_xy];
     qp = s->current_picture.qscale_table[mb_xy];
@@ -253,13 +254,17 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
             filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h);
         if( IS_8x8DCT(mb_type) ) {
             filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
-            filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
+            if(top_type){
+                filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
+            }
             filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
         } else {
             filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h);
             filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
             filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h);
-            filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
+            if(top_type){
+                filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
+            }
             filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h);
             filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
             filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h);
@@ -273,8 +278,10 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
                 if( IS_8x8DCT(mb_type) ) {
                     filter_mb_edgev( &img_cb[4*2], linesize, bS3, qpc, h);
                     filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h);
-                    filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
-                    filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
+                    if(top_type){
+                        filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
+                        filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
+                    }
                     filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h);
                     filter_mb_edgeh( &img_cr[4*2*linesize], linesize, bS3, qpc, h);
                 } else {
@@ -284,8 +291,10 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
                     filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h);
                     filter_mb_edgev( &img_cb[4*3], linesize, bS3, qpc, h);
                     filter_mb_edgev( &img_cr[4*3], linesize, bS3, qpc, h);
-                    filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
-                    filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
+                    if(top_type){
+                        filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
+                        filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
+                    }
                     filter_mb_edgeh( &img_cb[4*1*linesize], linesize, bS3, qpc, h);
                     filter_mb_edgeh( &img_cr[4*1*linesize], linesize, bS3, qpc, h);
                     filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h);
@@ -300,9 +309,11 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
                 }
                 filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
                 filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
-                filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
+                if(top_type){
+                    filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
+                    filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
+                }
                 filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
-                filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
                 filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
             }
         }
@@ -326,7 +337,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
         }
         if( IS_INTRA(left_type) )
             AV_WN64A(bS[0][0], 0x0004000400040004ULL);
-        if( IS_INTRA(h->top_type) )
+        if( IS_INTRA(top_type) )
             AV_WN64A(bS[1][0], FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL);
 
 #define FILTER(hv,dir,edge)\
@@ -345,16 +356,19 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
         if(left_type)
             FILTER(v,0,0);
         if( edges == 1 ) {
-            FILTER(h,1,0);
+            if(top_type)
+                FILTER(h,1,0);
         } else if( IS_8x8DCT(mb_type) ) {
             FILTER(v,0,2);
-            FILTER(h,1,0);
+            if(top_type)
+                FILTER(h,1,0);
             FILTER(h,1,2);
         } else {
             FILTER(v,0,1);
             FILTER(v,0,2);
             FILTER(v,0,3);
-            FILTER(h,1,0);
+            if(top_type)
+                FILTER(h,1,0);
             FILTER(h,1,1);
             FILTER(h,1,2);
             FILTER(h,1,3);

From cb5469462d427ea38625e255306f07b37d75280f Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Mon, 27 Jun 2011 17:41:28 -0700
Subject: [PATCH 37/40] H.264: faster fill_filter_caches

Reduce aliasing problems and unroll mv/ref loop.
---
 libavcodec/h264.c | 228 ++++++++++++++++++++++------------------------
 1 file changed, 110 insertions(+), 118 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 37dab73dc0..b5a9bc0dfc 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -3054,6 +3054,82 @@ int ff_h264_get_slice_type(const H264Context *h)
     }
 }
 
+static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncContext * const s, int mb_type, int top_xy,
+                                                      int left_xy[2], int top_type, int left_type[2], int mb_xy, int list)
+{
+    int b_stride = h->b_stride;
+    int16_t (*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
+    int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
+    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
+        if(USES_LIST(top_type, list)){
+            const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride;
+            const int b8_xy= 4*top_xy + 2;
+            int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+            AV_COPY128(mv_dst - 1*8, s->current_picture.motion_val[list][b_xy + 0]);
+            ref_cache[0 - 1*8]=
+            ref_cache[1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
+            ref_cache[2 - 1*8]=
+            ref_cache[3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
+        }else{
+            AV_ZERO128(mv_dst - 1*8);
+            AV_WN32A(&ref_cache[0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+        }
+
+        if(!IS_INTERLACED(mb_type^left_type[0])){
+            if(USES_LIST(left_type[0], list)){
+                const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
+                const int b8_xy= 4*left_xy[0] + 1;
+                int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+                AV_COPY32(mv_dst - 1 + 0, s->current_picture.motion_val[list][b_xy + b_stride*0]);
+                AV_COPY32(mv_dst - 1 + 8, s->current_picture.motion_val[list][b_xy + b_stride*1]);
+                AV_COPY32(mv_dst - 1 +16, s->current_picture.motion_val[list][b_xy + b_stride*2]);
+                AV_COPY32(mv_dst - 1 +24, s->current_picture.motion_val[list][b_xy + b_stride*3]);
+                ref_cache[-1 +  0]=
+                ref_cache[-1 +  8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
+                ref_cache[-1 + 16]=
+                ref_cache[-1 + 24]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
+            }else{
+                AV_ZERO32(mv_dst - 1 + 0);
+                AV_ZERO32(mv_dst - 1 + 8);
+                AV_ZERO32(mv_dst - 1 +16);
+                AV_ZERO32(mv_dst - 1 +24);
+                ref_cache[-1 +  0]=
+                ref_cache[-1 +  8]=
+                ref_cache[-1 + 16]=
+                ref_cache[-1 + 24]= LIST_NOT_USED;
+            }
+        }
+    }
+
+    if(!USES_LIST(mb_type, list)){
+        fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0,0), 4);
+        AV_WN32A(&ref_cache[0*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+        AV_WN32A(&ref_cache[1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+        AV_WN32A(&ref_cache[2*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+        AV_WN32A(&ref_cache[3*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+        return;
+    }
+
+    {
+        int8_t *ref = &s->current_picture.ref_index[list][4*mb_xy];
+        int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+        uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
+        uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]],ref2frm[list][ref[3]])&0x00FF00FF)*0x0101;
+        AV_WN32A(&ref_cache[0*8], ref01);
+        AV_WN32A(&ref_cache[1*8], ref01);
+        AV_WN32A(&ref_cache[2*8], ref23);
+        AV_WN32A(&ref_cache[3*8], ref23);
+    }
+
+    {
+        int16_t (*mv_src)[2] = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
+        AV_COPY128(mv_dst + 8*0, mv_src + 0*b_stride);
+        AV_COPY128(mv_dst + 8*1, mv_src + 1*b_stride);
+        AV_COPY128(mv_dst + 8*2, mv_src + 2*b_stride);
+        AV_COPY128(mv_dst + 8*3, mv_src + 3*b_stride);
+    }
+}
+
 /**
  *
  * @return non zero if the loop filter can be skiped
@@ -3063,11 +3139,11 @@ static int fill_filter_caches(H264Context *h, int mb_type){
     const int mb_xy= h->mb_xy;
     int top_xy, left_xy[2];
     int top_type, left_type[2];
+    uint8_t *nnz;
+    uint8_t *nnz_cache;
 
     top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
 
-    //FIXME deblocking could skip the intra and nnz parts.
-
     /* Wow, what a mess, why didn't they simplify the interlacing & intra
      * stuff, I can't imagine that these complex rules are worth it. */
 
@@ -3125,144 +3201,60 @@ static int fill_filter_caches(H264Context *h, int mb_type){
     if(IS_INTRA(mb_type))
         return 0;
 
-    AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]);
-    AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]);
-    AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]);
-    AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]);
+    fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 0);
+    if(h->list_count == 2)
+        fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 1);
 
+    nnz = h->non_zero_count[mb_xy];
+    nnz_cache = h->non_zero_count_cache;
+    AV_COPY32(&nnz_cache[4+8*1], &nnz[ 0]);
+    AV_COPY32(&nnz_cache[4+8*2], &nnz[ 4]);
+    AV_COPY32(&nnz_cache[4+8*3], &nnz[ 8]);
+    AV_COPY32(&nnz_cache[4+8*4], &nnz[12]);
     h->cbp= h->cbp_table[mb_xy];
 
-    {
-        int list;
-        for(list=0; list<h->list_count; list++){
-            int8_t *ref;
-            int y, b_stride;
-            int16_t (*mv_dst)[2];
-            int16_t (*mv_src)[2];
-
-            if(!USES_LIST(mb_type, list)){
-                fill_rectangle(  h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
-                AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
-                AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
-                AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
-                AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
-                continue;
-            }
-
-            ref = &s->current_picture.ref_index[list][4*mb_xy];
-            {
-                int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
-                AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
-                AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
-                ref += 2;
-                AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
-                AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
-            }
-
-            b_stride = h->b_stride;
-            mv_dst   = &h->mv_cache[list][scan8[0]];
-            mv_src   = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
-            for(y=0; y<4; y++){
-                AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
-            }
-
-        }
-    }
-
-
-/*
-0 . T T. T T T T
-1 L . .L . . . .
-2 L . .L . . . .
-3 . T TL . . . .
-4 L . .L . . . .
-5 L . .. . . . .
-*/
-//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
     if(top_type){
-        AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]);
+        nnz = h->non_zero_count[top_xy];
+        AV_COPY32(&nnz_cache[4+8*0], &nnz[3*4]);
     }
 
     if(left_type[0]){
-        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4];
-        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4];
-        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4];
-        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4];
+        nnz = h->non_zero_count[left_xy[0]];
+        nnz_cache[3+8*1]= nnz[3+0*4];
+        nnz_cache[3+8*2]= nnz[3+1*4];
+        nnz_cache[3+8*3]= nnz[3+2*4];
+        nnz_cache[3+8*4]= nnz[3+3*4];
     }
 
     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
     if(!CABAC && h->pps.transform_8x8_mode){
         if(IS_8x8DCT(top_type)){
-            h->non_zero_count_cache[4+8*0]=
-            h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
-            h->non_zero_count_cache[6+8*0]=
-            h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
+            nnz_cache[4+8*0]=
+            nnz_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
+            nnz_cache[6+8*0]=
+            nnz_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
         }
         if(IS_8x8DCT(left_type[0])){
-            h->non_zero_count_cache[3+8*1]=
-            h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
+            nnz_cache[3+8*1]=
+            nnz_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
         }
         if(IS_8x8DCT(left_type[1])){
-            h->non_zero_count_cache[3+8*3]=
-            h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
+            nnz_cache[3+8*3]=
+            nnz_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
         }
 
         if(IS_8x8DCT(mb_type)){
-            h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
-            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
+            nnz_cache[scan8[0   ]]= nnz_cache[scan8[1   ]]=
+            nnz_cache[scan8[2   ]]= nnz_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
 
-            h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
-            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
+            nnz_cache[scan8[0+ 4]]= nnz_cache[scan8[1+ 4]]=
+            nnz_cache[scan8[2+ 4]]= nnz_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
 
-            h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
-            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
+            nnz_cache[scan8[0+ 8]]= nnz_cache[scan8[1+ 8]]=
+            nnz_cache[scan8[2+ 8]]= nnz_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
 
-            h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
-            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
-        }
-    }
-
-    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
-        int list;
-        for(list=0; list<h->list_count; list++){
-            if(USES_LIST(top_type, list)){
-                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
-                const int b8_xy= 4*top_xy + 2;
-                int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
-                AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
-                h->ref_cache[list][scan8[0] + 0 - 1*8]=
-                h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
-                h->ref_cache[list][scan8[0] + 2 - 1*8]=
-                h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
-            }else{
-                AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
-                AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
-            }
-
-            if(!IS_INTERLACED(mb_type^left_type[0])){
-                if(USES_LIST(left_type[0], list)){
-                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
-                    const int b8_xy= 4*left_xy[0] + 1;
-                    int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
-                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
-                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
-                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
-                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
-                    h->ref_cache[list][scan8[0] - 1 + 0 ]=
-                    h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
-                    h->ref_cache[list][scan8[0] - 1 +16 ]=
-                    h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
-                }else{
-                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
-                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
-                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
-                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
-                    h->ref_cache[list][scan8[0] - 1 + 0  ]=
-                    h->ref_cache[list][scan8[0] - 1 + 8  ]=
-                    h->ref_cache[list][scan8[0] - 1 + 16 ]=
-                    h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
-                }
-            }
+            nnz_cache[scan8[0+12]]= nnz_cache[scan8[1+12]]=
+            nnz_cache[scan8[2+12]]= nnz_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
         }
     }
 

From 3b7ebeb4d52a25c7e1038ae90c6c19b0d6f11877 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Wed, 29 Jun 2011 13:27:36 -0700
Subject: [PATCH 38/40] H.264: faster write_back_*

Avoid aliasing, unroll loops, and inline more functions.
---
 libavcodec/h264.c       |   9 ---
 libavcodec/h264.h       | 123 ++++++++++++++++++++++------------------
 libavcodec/h264_cabac.c |  21 +++----
 libavcodec/h264_cavlc.c |   2 +-
 libavcodec/svq3.c       |   2 +-
 5 files changed, 81 insertions(+), 76 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index b5a9bc0dfc..db3d93d5d7 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -60,15 +60,6 @@ static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
     PIX_FMT_NONE
 };
 
-void ff_h264_write_back_intra_pred_mode(H264Context *h){
-    int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
-
-    AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
-    mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
-    mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
-    mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
-}
-
 /**
  * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
  */
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index e3cc815565..8e04db4f22 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -658,7 +658,6 @@ int ff_h264_check_intra4x4_pred_mode(H264Context *h);
  */
 int ff_h264_check_intra_pred_mode(H264Context *h, int mode);
 
-void ff_h264_write_back_intra_pred_mode(H264Context *h);
 void ff_h264_hl_decode_mb(H264Context *h);
 int ff_h264_frame_start(H264Context *h);
 int ff_h264_decode_extradata(H264Context *h);
@@ -1185,7 +1184,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){
 /**
  * gets the predicted intra4x4 prediction mode.
  */
-static inline int pred_intra_mode(H264Context *h, int n){
+static av_always_inline int pred_intra_mode(H264Context *h, int n){
     const int index8= scan8[n];
     const int left= h->intra4x4_pred_mode_cache[index8 - 1];
     const int top = h->intra4x4_pred_mode_cache[index8 - 8];
@@ -1197,69 +1196,83 @@ static inline int pred_intra_mode(H264Context *h, int n){
     else      return min;
 }
 
-static inline void write_back_non_zero_count(H264Context *h){
-    const int mb_xy= h->mb_xy;
+static av_always_inline void write_back_intra_pred_mode(H264Context *h){
+    int8_t *i4x4= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
+    int8_t *i4x4_cache= h->intra4x4_pred_mode_cache;
 
-    AV_COPY32(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[4+8* 1]);
-    AV_COPY32(&h->non_zero_count[mb_xy][ 4], &h->non_zero_count_cache[4+8* 2]);
-    AV_COPY32(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[4+8* 3]);
-    AV_COPY32(&h->non_zero_count[mb_xy][12], &h->non_zero_count_cache[4+8* 4]);
-    AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[4+8* 6]);
-    AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8* 7]);
-    AV_COPY32(&h->non_zero_count[mb_xy][32], &h->non_zero_count_cache[4+8*11]);
-    AV_COPY32(&h->non_zero_count[mb_xy][36], &h->non_zero_count_cache[4+8*12]);
+    AV_COPY32(i4x4, i4x4_cache + 4 + 8*4);
+    i4x4[4]= i4x4_cache[7+8*3];
+    i4x4[5]= i4x4_cache[7+8*2];
+    i4x4[6]= i4x4_cache[7+8*1];
+}
+
+static av_always_inline void write_back_non_zero_count(H264Context *h){
+    const int mb_xy= h->mb_xy;
+    uint8_t *nnz = h->non_zero_count[mb_xy];
+    uint8_t *nnz_cache = h->non_zero_count_cache;
+
+    AV_COPY32(&nnz[ 0], &nnz_cache[4+8* 1]);
+    AV_COPY32(&nnz[ 4], &nnz_cache[4+8* 2]);
+    AV_COPY32(&nnz[ 8], &nnz_cache[4+8* 3]);
+    AV_COPY32(&nnz[12], &nnz_cache[4+8* 4]);
+    AV_COPY32(&nnz[16], &nnz_cache[4+8* 6]);
+    AV_COPY32(&nnz[20], &nnz_cache[4+8* 7]);
+    AV_COPY32(&nnz[32], &nnz_cache[4+8*11]);
+    AV_COPY32(&nnz[36], &nnz_cache[4+8*12]);
 
     if(CHROMA444){
-        AV_COPY32(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[4+8* 8]);
-        AV_COPY32(&h->non_zero_count[mb_xy][28], &h->non_zero_count_cache[4+8* 9]);
-        AV_COPY32(&h->non_zero_count[mb_xy][40], &h->non_zero_count_cache[4+8*13]);
-        AV_COPY32(&h->non_zero_count[mb_xy][44], &h->non_zero_count_cache[4+8*14]);
+        AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]);
+        AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]);
+        AV_COPY32(&nnz[40], &nnz_cache[4+8*13]);
+        AV_COPY32(&nnz[44], &nnz_cache[4+8*14]);
     }
 }
 
-static inline void write_back_motion(H264Context *h, int mb_type){
+static av_always_inline void write_back_motion_list(H264Context *h, MpegEncContext * const s, int b_stride,
+                                                    int b_xy, int b8_xy, int mb_type, int list )
+{
+    int16_t (*mv_dst)[2] = &s->current_picture.motion_val[list][b_xy];
+    int16_t (*mv_src)[2] = &h->mv_cache[list][scan8[0]];
+    AV_COPY128(mv_dst + 0*b_stride, mv_src + 8*0);
+    AV_COPY128(mv_dst + 1*b_stride, mv_src + 8*1);
+    AV_COPY128(mv_dst + 2*b_stride, mv_src + 8*2);
+    AV_COPY128(mv_dst + 3*b_stride, mv_src + 8*3);
+    if( CABAC ) {
+        uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]];
+        uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
+        if(IS_SKIP(mb_type))
+            AV_ZERO128(mvd_dst);
+        else{
+            AV_COPY64(mvd_dst, mvd_src + 8*3);
+            AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0);
+            AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1);
+            AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2);
+        }
+    }
+
+    {
+        int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
+        int8_t *ref_cache = h->ref_cache[list];
+        ref_index[0+0*2]= ref_cache[scan8[0]];
+        ref_index[1+0*2]= ref_cache[scan8[4]];
+        ref_index[0+1*2]= ref_cache[scan8[8]];
+        ref_index[1+1*2]= ref_cache[scan8[12]];
+    }
+}
+
+static av_always_inline void write_back_motion(H264Context *h, int mb_type){
     MpegEncContext * const s = &h->s;
+    const int b_stride = h->b_stride;
     const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy
     const int b8_xy= 4*h->mb_xy;
-    int list;
 
-    if(!USES_LIST(mb_type, 0))
+    if(USES_LIST(mb_type, 0)){
+        write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 0);
+    }else{
         fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, 2, (uint8_t)LIST_NOT_USED, 1);
-
-    for(list=0; list<h->list_count; list++){
-        int y, b_stride;
-        int16_t (*mv_dst)[2];
-        int16_t (*mv_src)[2];
-
-        if(!USES_LIST(mb_type, list))
-            continue;
-
-        b_stride = h->b_stride;
-        mv_dst   = &s->current_picture.motion_val[list][b_xy];
-        mv_src   = &h->mv_cache[list][scan8[0]];
-        for(y=0; y<4; y++){
-            AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y);
-        }
-        if( CABAC ) {
-            uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]];
-            uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
-            if(IS_SKIP(mb_type))
-                AV_ZERO128(mvd_dst);
-            else{
-            AV_COPY64(mvd_dst, mvd_src + 8*3);
-                AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0);
-                AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1);
-                AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2);
-            }
-        }
-
-        {
-            int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
-            ref_index[0+0*2]= h->ref_cache[list][scan8[0]];
-            ref_index[1+0*2]= h->ref_cache[list][scan8[4]];
-            ref_index[0+1*2]= h->ref_cache[list][scan8[8]];
-            ref_index[1+1*2]= h->ref_cache[list][scan8[12]];
-        }
+    }
+    if(USES_LIST(mb_type, 1)){
+        write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 1);
     }
 
     if(h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC){
@@ -1272,7 +1285,7 @@ static inline void write_back_motion(H264Context *h, int mb_type){
     }
 }
 
-static inline int get_dct8x8_allowed(H264Context *h){
+static av_always_inline int get_dct8x8_allowed(H264Context *h){
     if(h->sps.direct_8x8_inference_flag)
         return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8                )*0x0001000100010001ULL));
     else
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index f30f4e1c9c..a643297f8a 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1999,7 +1999,7 @@ decode_intra_mb:
                 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
                 }
             }
-            ff_h264_write_back_intra_pred_mode(h);
+            write_back_intra_pred_mode(h);
             if( ff_h264_check_intra4x4_pred_mode(h) < 0 ) return -1;
         } else {
             h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode( h, h->intra16x16_pred_mode );
@@ -2248,21 +2248,22 @@ decode_intra_mb:
      * the transform mode of the current macroblock there. */
     if (CHROMA444 && IS_8x8DCT(mb_type)){
         int i;
+        uint8_t *nnz_cache = h->non_zero_count_cache;
         for (i = 0; i < 2; i++){
             if (h->left_type[i] && !IS_8x8DCT(h->left_type[i])){
-                h->non_zero_count_cache[3+8* 1 + 2*8*i]=
-                h->non_zero_count_cache[3+8* 2 + 2*8*i]=
-                h->non_zero_count_cache[3+8* 6 + 2*8*i]=
-                h->non_zero_count_cache[3+8* 7 + 2*8*i]=
-                h->non_zero_count_cache[3+8*11 + 2*8*i]=
-                h->non_zero_count_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
+                nnz_cache[3+8* 1 + 2*8*i]=
+                nnz_cache[3+8* 2 + 2*8*i]=
+                nnz_cache[3+8* 6 + 2*8*i]=
+                nnz_cache[3+8* 7 + 2*8*i]=
+                nnz_cache[3+8*11 + 2*8*i]=
+                nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0;
             }
         }
         if (h->top_type && !IS_8x8DCT(h->top_type)){
             uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
-            AV_WN32A(&h->non_zero_count_cache[4+8* 0], top_empty);
-            AV_WN32A(&h->non_zero_count_cache[4+8* 5], top_empty);
-            AV_WN32A(&h->non_zero_count_cache[4+8*10], top_empty);
+            AV_WN32A(&nnz_cache[4+8* 0], top_empty);
+            AV_WN32A(&nnz_cache[4+8* 5], top_empty);
+            AV_WN32A(&nnz_cache[4+8*10], top_empty);
         }
     }
     s->current_picture.mb_type[mb_xy]= mb_type;
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 497166b423..90c411002e 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -731,7 +731,7 @@ decode_intra_mb:
                 else
                     h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
             }
-            ff_h264_write_back_intra_pred_mode(h);
+            write_back_intra_pred_mode(h);
             if( ff_h264_check_intra4x4_pred_mode(h) < 0)
                 return -1;
         }else{
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index 23ab209312..a88b069daf 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -589,7 +589,7 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
             }
         }
 
-        ff_h264_write_back_intra_pred_mode(h);
+        write_back_intra_pred_mode(h);
 
         if (mb_type == 8) {
             ff_h264_check_intra4x4_pred_mode(h);

From ca80f11ec30834566f7b16c46a8f4eeacc9c2ce4 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Wed, 29 Jun 2011 15:02:31 -0700
Subject: [PATCH 39/40] H.264: faster fill_decode_caches

Aliasing avoidance and general cleanup.
---
 libavcodec/h264.h | 241 +++++++++++++++++++++++-----------------------
 1 file changed, 121 insertions(+), 120 deletions(-)

diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 8e04db4f22..6afbced56e 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -307,11 +307,6 @@ typedef struct H264Context{
 #define LIST_NOT_USED -1 //FIXME rename?
 #define PART_NOT_AVAILABLE -2
 
-    /**
-     * is 1 if the specific list MV&references are set to 0,0,-2.
-     */
-    int mv_cache_clean[2];
-
     /**
      * number of neighbors (top and/or left) that used 8x8 dct
      */
@@ -857,6 +852,8 @@ static void fill_decode_caches(H264Context *h, int mb_type){
     int topleft_type, top_type, topright_type, left_type[2];
     const uint8_t * left_block= h->left_block;
     int i;
+    uint8_t *nnz;
+    uint8_t *nnz_cache;
 
     topleft_xy   = h->topleft_mb_xy ;
     top_xy       = h->top_mb_xy     ;
@@ -946,42 +943,45 @@ static void fill_decode_caches(H264Context *h, int mb_type){
 5 L . .. . . . .
 */
 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
+    nnz_cache = h->non_zero_count_cache;
     if(top_type){
-        AV_COPY32(&h->non_zero_count_cache[4+8* 0], &h->non_zero_count[top_xy][4*3]);
+        nnz = h->non_zero_count[top_xy];
+        AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]);
         if(CHROMA444){
-            AV_COPY32(&h->non_zero_count_cache[4+8* 5], &h->non_zero_count[top_xy][4* 7]);
-            AV_COPY32(&h->non_zero_count_cache[4+8*10], &h->non_zero_count[top_xy][4*11]);
+            AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]);
+            AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]);
         }else{
-            AV_COPY32(&h->non_zero_count_cache[4+8* 5], &h->non_zero_count[top_xy][4* 5]);
-            AV_COPY32(&h->non_zero_count_cache[4+8*10], &h->non_zero_count[top_xy][4* 9]);
+            AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 5]);
+            AV_COPY32(&nnz_cache[4+8*10], &nnz[4* 9]);
         }
     }else{
         uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
-        AV_WN32A(&h->non_zero_count_cache[4+8* 0], top_empty);
-        AV_WN32A(&h->non_zero_count_cache[4+8* 5], top_empty);
-        AV_WN32A(&h->non_zero_count_cache[4+8*10], top_empty);
+        AV_WN32A(&nnz_cache[4+8* 0], top_empty);
+        AV_WN32A(&nnz_cache[4+8* 5], top_empty);
+        AV_WN32A(&nnz_cache[4+8*10], top_empty);
     }
 
     for (i=0; i<2; i++) {
         if(left_type[i]){
-            h->non_zero_count_cache[3+8* 1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]];
-            h->non_zero_count_cache[3+8* 2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]];
+            nnz = h->non_zero_count[left_xy[i]];
+            nnz_cache[3+8* 1 + 2*8*i]= nnz[left_block[8+0+2*i]];
+            nnz_cache[3+8* 2 + 2*8*i]= nnz[left_block[8+1+2*i]];
             if(CHROMA444){
-                h->non_zero_count_cache[3+8* 6 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]+4*4];
-                h->non_zero_count_cache[3+8* 7 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]+4*4];
-                h->non_zero_count_cache[3+8*11 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]+8*4];
-                h->non_zero_count_cache[3+8*12 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]+8*4];
+                nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]+4*4];
+                nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4];
+                nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4];
+                nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4];
             }else{
-                h->non_zero_count_cache[3+8* 6 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]];
-                h->non_zero_count_cache[3+8*11 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]];
+                nnz_cache[3+8* 6 +   8*i]= nnz[left_block[8+4+2*i]];
+                nnz_cache[3+8*11 +   8*i]= nnz[left_block[8+5+2*i]];
             }
         }else{
-            h->non_zero_count_cache[3+8* 1 + 2*8*i]=
-            h->non_zero_count_cache[3+8* 2 + 2*8*i]=
-            h->non_zero_count_cache[3+8* 6 + 2*8*i]=
-            h->non_zero_count_cache[3+8* 7 + 2*8*i]=
-            h->non_zero_count_cache[3+8*11 + 2*8*i]=
-            h->non_zero_count_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
+            nnz_cache[3+8* 1 + 2*8*i]=
+            nnz_cache[3+8* 2 + 2*8*i]=
+            nnz_cache[3+8* 6 + 2*8*i]=
+            nnz_cache[3+8* 7 + 2*8*i]=
+            nnz_cache[3+8*11 + 2*8*i]=
+            nnz_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
         }
     }
 
@@ -1005,144 +1005,145 @@ static void fill_decode_caches(H264Context *h, int mb_type){
 
     if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){
         int list;
+        int b_stride = h->b_stride;
         for(list=0; list<h->list_count; list++){
+            int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
+            int8_t *ref = s->current_picture.ref_index[list];
+            int16_t (*mv_cache)[2] = &h->mv_cache[list][scan8[0]];
+            int16_t (*mv)[2] = s->current_picture.motion_val[list];
             if(!USES_LIST(mb_type, list)){
-                /*if(!h->mv_cache_clean[list]){
-                    memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
-                    memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
-                    h->mv_cache_clean[list]= 1;
-                }*/
                 continue;
             }
             assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred));
 
-            h->mv_cache_clean[list]= 0;
-
             if(USES_LIST(top_type, list)){
-                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
-                AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
-                    h->ref_cache[list][scan8[0] + 0 - 1*8]=
-                    h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][4*top_xy + 2];
-                    h->ref_cache[list][scan8[0] + 2 - 1*8]=
-                    h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][4*top_xy + 3];
+                const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride;
+                AV_COPY128(mv_cache[0 - 1*8], mv[b_xy + 0]);
+                ref_cache[0 - 1*8]=
+                ref_cache[1 - 1*8]= ref[4*top_xy + 2];
+                ref_cache[2 - 1*8]=
+                ref_cache[3 - 1*8]= ref[4*top_xy + 3];
             }else{
-                AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
-                AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101);
+                AV_ZERO128(mv_cache[0 - 1*8]);
+                AV_WN32A(&ref_cache[0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101);
             }
 
             if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){
             for(i=0; i<2; i++){
-                int cache_idx = scan8[0] - 1 + i*2*8;
+                int cache_idx = -1 + i*2*8;
                 if(USES_LIST(left_type[i], list)){
                     const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
                     const int b8_xy= 4*left_xy[i] + 1;
-                    AV_COPY32(h->mv_cache[list][cache_idx  ], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]);
-                    AV_COPY32(h->mv_cache[list][cache_idx+8], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]);
-                        h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + (left_block[0+i*2]&~1)];
-                        h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + (left_block[1+i*2]&~1)];
+                    AV_COPY32(mv_cache[cache_idx  ], mv[b_xy + b_stride*left_block[0+i*2]]);
+                    AV_COPY32(mv_cache[cache_idx+8], mv[b_xy + b_stride*left_block[1+i*2]]);
+                    ref_cache[cache_idx  ]= ref[b8_xy + (left_block[0+i*2]&~1)];
+                    ref_cache[cache_idx+8]= ref[b8_xy + (left_block[1+i*2]&~1)];
                 }else{
-                    AV_ZERO32(h->mv_cache [list][cache_idx  ]);
-                    AV_ZERO32(h->mv_cache [list][cache_idx+8]);
-                    h->ref_cache[list][cache_idx  ]=
-                    h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+                    AV_ZERO32(mv_cache[cache_idx  ]);
+                    AV_ZERO32(mv_cache[cache_idx+8]);
+                    ref_cache[cache_idx  ]=
+                    ref_cache[cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                 }
             }
             }else{
                 if(USES_LIST(left_type[0], list)){
                     const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
                     const int b8_xy= 4*left_xy[0] + 1;
-                    AV_COPY32(h->mv_cache[list][scan8[0] - 1], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]);
-                    h->ref_cache[list][scan8[0] - 1]= s->current_picture.ref_index[list][b8_xy + (left_block[0]&~1)];
+                    AV_COPY32(mv_cache[-1], mv[b_xy + b_stride*left_block[0]]);
+                    ref_cache[-1]= ref[b8_xy + (left_block[0]&~1)];
                 }else{
-                    AV_ZERO32(h->mv_cache [list][scan8[0] - 1]);
-                    h->ref_cache[list][scan8[0] - 1]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+                    AV_ZERO32(mv_cache[-1]);
+                    ref_cache[-1]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                 }
             }
 
             if(USES_LIST(topright_type, list)){
-                const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
-                AV_COPY32(h->mv_cache[list][scan8[0] + 4 - 1*8], s->current_picture.motion_val[list][b_xy]);
-                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][4*topright_xy + 2];
+                const int b_xy= h->mb2b_xy[topright_xy] + 3*b_stride;
+                AV_COPY32(mv_cache[4 - 1*8], mv[b_xy]);
+                ref_cache[4 - 1*8]= ref[4*topright_xy + 2];
             }else{
-                AV_ZERO32(h->mv_cache [list][scan8[0] + 4 - 1*8]);
-                h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+                AV_ZERO32(mv_cache[4 - 1*8]);
+                ref_cache[4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
             }
-            if(h->ref_cache[list][scan8[0] + 4 - 1*8] < 0){
+            if(ref_cache[4 - 1*8] < 0){
                 if(USES_LIST(topleft_type, list)){
-                    const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride);
+                    const int b_xy = h->mb2b_xy[topleft_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride);
                     const int b8_xy= 4*topleft_xy + 1 + (h->topleft_partition & 2);
-                    AV_COPY32(h->mv_cache[list][scan8[0] - 1 - 1*8], s->current_picture.motion_val[list][b_xy]);
-                    h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
+                    AV_COPY32(mv_cache[-1 - 1*8], mv[b_xy]);
+                    ref_cache[-1 - 1*8]= ref[b8_xy];
                 }else{
-                    AV_ZERO32(h->mv_cache[list][scan8[0] - 1 - 1*8]);
-                    h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+                    AV_ZERO32(mv_cache[-1 - 1*8]);
+                    ref_cache[-1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                 }
             }
 
             if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF)
                 continue;
 
-            if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))) {
-            h->ref_cache[list][scan8[4 ]] =
-            h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
-            AV_ZERO32(h->mv_cache [list][scan8[4 ]]);
-            AV_ZERO32(h->mv_cache [list][scan8[12]]);
+            if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))){
+                uint8_t (*mvd_cache)[2] = &h->mvd_cache[list][scan8[0]];
+                uint8_t (*mvd)[2] = h->mvd_table[list];
+                ref_cache[2+8*0] =
+                ref_cache[2+8*2] = PART_NOT_AVAILABLE;
+                AV_ZERO32(mv_cache[2+8*0]);
+                AV_ZERO32(mv_cache[2+8*2]);
 
-            if( CABAC ) {
-                /* XXX beurk, Load mvd */
-                if(USES_LIST(top_type, list)){
-                    const int b_xy= h->mb2br_xy[top_xy];
-                    AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]);
-                }else{
-                    AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]);
-                }
-                if(USES_LIST(left_type[0], list)){
-                    const int b_xy= h->mb2br_xy[left_xy[0]] + 6;
-                    AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy - left_block[0]]);
-                    AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy - left_block[1]]);
-                }else{
-                    AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 0*8]);
-                    AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]);
-                }
-                if(USES_LIST(left_type[1], list)){
-                    const int b_xy= h->mb2br_xy[left_xy[1]] + 6;
-                    AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy - left_block[2]]);
-                    AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy - left_block[3]]);
-                }else{
-                    AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 2*8]);
-                    AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 3*8]);
-                }
-                AV_ZERO16(h->mvd_cache [list][scan8[4 ]]);
-                AV_ZERO16(h->mvd_cache [list][scan8[12]]);
-                if(h->slice_type_nos == AV_PICTURE_TYPE_B){
-                    fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1);
-
-                    if(IS_DIRECT(top_type)){
-                        AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101u*(MB_TYPE_DIRECT2>>1));
-                    }else if(IS_8X8(top_type)){
-                        int b8_xy = 4*top_xy;
-                        h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy + 2];
-                        h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 3];
+                if( CABAC ) {
+                    if(USES_LIST(top_type, list)){
+                        const int b_xy= h->mb2br_xy[top_xy];
+                        AV_COPY64(mvd_cache[0 - 1*8], mvd[b_xy + 0]);
                     }else{
-                        AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_16x16>>1));
+                        AV_ZERO64(mvd_cache[0 - 1*8]);
                     }
+                    if(USES_LIST(left_type[0], list)){
+                        const int b_xy= h->mb2br_xy[left_xy[0]] + 6;
+                        AV_COPY16(mvd_cache[-1 + 0*8], mvd[b_xy - left_block[0]]);
+                        AV_COPY16(mvd_cache[-1 + 1*8], mvd[b_xy - left_block[1]]);
+                    }else{
+                        AV_ZERO16(mvd_cache[-1 + 0*8]);
+                        AV_ZERO16(mvd_cache[-1 + 1*8]);
+                    }
+                    if(USES_LIST(left_type[1], list)){
+                        const int b_xy= h->mb2br_xy[left_xy[1]] + 6;
+                        AV_COPY16(mvd_cache[-1 + 2*8], mvd[b_xy - left_block[2]]);
+                        AV_COPY16(mvd_cache[-1 + 3*8], mvd[b_xy - left_block[3]]);
+                    }else{
+                        AV_ZERO16(mvd_cache[-1 + 2*8]);
+                        AV_ZERO16(mvd_cache[-1 + 3*8]);
+                    }
+                    AV_ZERO16(mvd_cache[2+8*0]);
+                    AV_ZERO16(mvd_cache[2+8*2]);
+                    if(h->slice_type_nos == AV_PICTURE_TYPE_B){
+                        uint8_t *direct_cache = &h->direct_cache[scan8[0]];
+                        uint8_t *direct_table = h->direct_table;
+                        fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16>>1, 1);
 
-                    if(IS_DIRECT(left_type[0]))
-                        h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1;
-                    else if(IS_8X8(left_type[0]))
-                        h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[4*left_xy[0] + 1 + (left_block[0]&~1)];
-                    else
-                        h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_16x16>>1;
+                        if(IS_DIRECT(top_type)){
+                            AV_WN32A(&direct_cache[-1*8], 0x01010101u*(MB_TYPE_DIRECT2>>1));
+                        }else if(IS_8X8(top_type)){
+                            int b8_xy = 4*top_xy;
+                            direct_cache[0 - 1*8]= direct_table[b8_xy + 2];
+                            direct_cache[2 - 1*8]= direct_table[b8_xy + 3];
+                        }else{
+                            AV_WN32A(&direct_cache[-1*8], 0x01010101*(MB_TYPE_16x16>>1));
+                        }
 
-                    if(IS_DIRECT(left_type[1]))
-                        h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_DIRECT2>>1;
-                    else if(IS_8X8(left_type[1]))
-                        h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[4*left_xy[1] + 1 + (left_block[2]&~1)];
-                    else
-                        h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_16x16>>1;
+                        if(IS_DIRECT(left_type[0]))
+                            direct_cache[-1 + 0*8]= MB_TYPE_DIRECT2>>1;
+                        else if(IS_8X8(left_type[0]))
+                            direct_cache[-1 + 0*8]= direct_table[4*left_xy[0] + 1 + (left_block[0]&~1)];
+                        else
+                            direct_cache[-1 + 0*8]= MB_TYPE_16x16>>1;
+
+                        if(IS_DIRECT(left_type[1]))
+                            direct_cache[-1 + 2*8]= MB_TYPE_DIRECT2>>1;
+                        else if(IS_8X8(left_type[1]))
+                            direct_cache[-1 + 2*8]= direct_table[4*left_xy[1] + 1 + (left_block[2]&~1)];
+                        else
+                            direct_cache[-1 + 2*8]= MB_TYPE_16x16>>1;
+                    }
                 }
             }
-            }
             if(FRAME_MBAFF){
 #define MAP_MVS\
                     MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\

From 556f8a066cb33241bf29e85d7e24c9acf7ea9043 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <jason@x264.com>
Date: Wed, 29 Jun 2011 15:38:39 -0700
Subject: [PATCH 40/40] H.264: template left MB handling

Faster H.264 decoding with ALLOW_INTERLACE off.
---
 libavcodec/h264.c            |  66 ++++++++--------
 libavcodec/h264.h            | 142 ++++++++++++++++++-----------------
 libavcodec/h264_cabac.c      |  12 +--
 libavcodec/h264_loopfilter.c |  14 ++--
 4 files changed, 121 insertions(+), 113 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index db3d93d5d7..dbac4fda95 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -3046,7 +3046,7 @@ int ff_h264_get_slice_type(const H264Context *h)
 }
 
 static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncContext * const s, int mb_type, int top_xy,
-                                                      int left_xy[2], int top_type, int left_type[2], int mb_xy, int list)
+                                                      int left_xy[LEFT_MBS], int top_type, int left_type[LEFT_MBS], int mb_xy, int list)
 {
     int b_stride = h->b_stride;
     int16_t (*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
@@ -3066,11 +3066,11 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncCon
             AV_WN32A(&ref_cache[0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
         }
 
-        if(!IS_INTERLACED(mb_type^left_type[0])){
-            if(USES_LIST(left_type[0], list)){
-                const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
-                const int b8_xy= 4*left_xy[0] + 1;
-                int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+        if(!IS_INTERLACED(mb_type^left_type[LTOP])){
+            if(USES_LIST(left_type[LTOP], list)){
+                const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
+                const int b8_xy= 4*left_xy[LTOP] + 1;
+                int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[LTOP]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
                 AV_COPY32(mv_dst - 1 + 0, s->current_picture.motion_val[list][b_xy + b_stride*0]);
                 AV_COPY32(mv_dst - 1 + 8, s->current_picture.motion_val[list][b_xy + b_stride*1]);
                 AV_COPY32(mv_dst - 1 +16, s->current_picture.motion_val[list][b_xy + b_stride*2]);
@@ -3128,8 +3128,8 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncCon
 static int fill_filter_caches(H264Context *h, int mb_type){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
-    int top_xy, left_xy[2];
-    int top_type, left_type[2];
+    int top_xy, left_xy[LEFT_MBS];
+    int top_type, left_type[LEFT_MBS];
     uint8_t *nnz;
     uint8_t *nnz_cache;
 
@@ -3138,56 +3138,56 @@ static int fill_filter_caches(H264Context *h, int mb_type){
     /* Wow, what a mess, why didn't they simplify the interlacing & intra
      * stuff, I can't imagine that these complex rules are worth it. */
 
-    left_xy[1] = left_xy[0] = mb_xy-1;
+    left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
     if(FRAME_MBAFF){
         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
         if(s->mb_y&1){
             if (left_mb_field_flag != curr_mb_field_flag) {
-                left_xy[0] -= s->mb_stride;
+                left_xy[LTOP] -= s->mb_stride;
             }
         }else{
             if(curr_mb_field_flag){
                 top_xy      += s->mb_stride & (((s->current_picture.mb_type[top_xy    ]>>7)&1)-1);
             }
             if (left_mb_field_flag != curr_mb_field_flag) {
-                left_xy[1] += s->mb_stride;
+                left_xy[LBOT] += s->mb_stride;
             }
         }
     }
 
     h->top_mb_xy = top_xy;
-    h->left_mb_xy[0] = left_xy[0];
-    h->left_mb_xy[1] = left_xy[1];
+    h->left_mb_xy[LTOP] = left_xy[LTOP];
+    h->left_mb_xy[LBOT] = left_xy[LBOT];
     {
         //for sufficiently low qp, filtering wouldn't do anything
         //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
         int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
         int qp = s->current_picture.qscale_table[mb_xy];
         if(qp <= qp_thresh
-           && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
-           && (top_xy   < 0 || ((qp + s->current_picture.qscale_table[top_xy    ] + 1)>>1) <= qp_thresh)){
+           && (left_xy[LTOP]<0 || ((qp + s->current_picture.qscale_table[left_xy[LTOP]] + 1)>>1) <= qp_thresh)
+           && (top_xy       <0 || ((qp + s->current_picture.qscale_table[top_xy       ] + 1)>>1) <= qp_thresh)){
             if(!FRAME_MBAFF)
                 return 1;
-            if(   (left_xy[0]< 0            || ((qp + s->current_picture.qscale_table[left_xy[1]             ] + 1)>>1) <= qp_thresh)
-               && (top_xy    < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy    -s->mb_stride] + 1)>>1) <= qp_thresh))
+            if(   (left_xy[LTOP]< 0            || ((qp + s->current_picture.qscale_table[left_xy[LBOT]             ] + 1)>>1) <= qp_thresh)
+               && (top_xy       < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy       -s->mb_stride] + 1)>>1) <= qp_thresh))
                 return 1;
         }
     }
 
-    top_type     = s->current_picture.mb_type[top_xy]    ;
-    left_type[0] = s->current_picture.mb_type[left_xy[0]];
-    left_type[1] = s->current_picture.mb_type[left_xy[1]];
+    top_type        = s->current_picture.mb_type[top_xy];
+    left_type[LTOP] = s->current_picture.mb_type[left_xy[LTOP]];
+    left_type[LBOT] = s->current_picture.mb_type[left_xy[LBOT]];
     if(h->deblocking_filter == 2){
-        if(h->slice_table[top_xy     ] != h->slice_num) top_type= 0;
-        if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
+        if(h->slice_table[top_xy       ] != h->slice_num) top_type= 0;
+        if(h->slice_table[left_xy[LBOT]] != h->slice_num) left_type[LTOP]= left_type[LBOT]= 0;
     }else{
-        if(h->slice_table[top_xy     ] == 0xFFFF) top_type= 0;
-        if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
+        if(h->slice_table[top_xy       ] == 0xFFFF) top_type= 0;
+        if(h->slice_table[left_xy[LBOT]] == 0xFFFF) left_type[LTOP]= left_type[LBOT] =0;
     }
-    h->top_type    = top_type    ;
-    h->left_type[0]= left_type[0];
-    h->left_type[1]= left_type[1];
+    h->top_type       = top_type;
+    h->left_type[LTOP]= left_type[LTOP];
+    h->left_type[LBOT]= left_type[LBOT];
 
     if(IS_INTRA(mb_type))
         return 0;
@@ -3209,8 +3209,8 @@ static int fill_filter_caches(H264Context *h, int mb_type){
         AV_COPY32(&nnz_cache[4+8*0], &nnz[3*4]);
     }
 
-    if(left_type[0]){
-        nnz = h->non_zero_count[left_xy[0]];
+    if(left_type[LTOP]){
+        nnz = h->non_zero_count[left_xy[LTOP]];
         nnz_cache[3+8*1]= nnz[3+0*4];
         nnz_cache[3+8*2]= nnz[3+1*4];
         nnz_cache[3+8*3]= nnz[3+2*4];
@@ -3225,13 +3225,13 @@ static int fill_filter_caches(H264Context *h, int mb_type){
             nnz_cache[6+8*0]=
             nnz_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
         }
-        if(IS_8x8DCT(left_type[0])){
+        if(IS_8x8DCT(left_type[LTOP])){
             nnz_cache[3+8*1]=
-            nnz_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF
+            nnz_cache[3+8*2]= (h->cbp_table[left_xy[LTOP]]&0x2000) >> 12; //FIXME check MBAFF
         }
-        if(IS_8x8DCT(left_type[1])){
+        if(IS_8x8DCT(left_type[LBOT])){
             nnz_cache[3+8*3]=
-            nnz_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF
+            nnz_cache[3+8*4]= (h->cbp_table[left_xy[LBOT]]&0x8000) >> 12; //FIXME check MBAFF
         }
 
         if(IS_8x8DCT(mb_type)){
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 6afbced56e..dad06e2007 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -70,6 +70,10 @@
 #define MB_FIELD h->mb_field_decoding_flag
 #define FRAME_MBAFF h->mb_aff_frame
 #define FIELD_PICTURE (s->picture_structure != PICT_FRAME)
+#define LEFT_MBS 2
+#define LTOP 0
+#define LBOT 1
+#define LEFT(i) (i)
 #else
 #define MB_MBAFF 0
 #define MB_FIELD 0
@@ -77,6 +81,10 @@
 #define FIELD_PICTURE 0
 #undef  IS_INTERLACED
 #define IS_INTERLACED(mb_type) 0
+#define LEFT_MBS 1
+#define LTOP 0
+#define LBOT 0
+#define LEFT(i) 0
 #endif
 #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE)
 
@@ -272,12 +280,12 @@ typedef struct H264Context{
     int topleft_mb_xy;
     int top_mb_xy;
     int topright_mb_xy;
-    int left_mb_xy[2];
+    int left_mb_xy[LEFT_MBS];
 
     int topleft_type;
     int top_type;
     int topright_type;
-    int left_type[2];
+    int left_type[LEFT_MBS];
 
     const uint8_t * left_block;
     int topleft_partition;
@@ -767,7 +775,7 @@ static inline void pred_pskip_motion(H264Context * const h, int * const mx, int
 static void fill_decode_neighbors(H264Context *h, int mb_type){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
-    int topleft_xy, top_xy, topright_xy, left_xy[2];
+    int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS];
     static const uint8_t left_block_options[4][32]={
         {0,1,2,3,7,10,8,11,3+0*4, 3+1*4, 3+2*4, 3+3*4, 1+4*4, 1+8*4, 1+5*4, 1+9*4},
         {2,2,3,3,8,11,8,11,3+2*4, 3+2*4, 3+3*4, 3+3*4, 1+5*4, 1+9*4, 1+5*4, 1+9*4},
@@ -784,16 +792,16 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
 
     topleft_xy = top_xy - 1;
     topright_xy= top_xy + 1;
-    left_xy[1] = left_xy[0] = mb_xy-1;
+    left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
     h->left_block = left_block_options[0];
     if(FRAME_MBAFF){
         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
         if(s->mb_y&1){
             if (left_mb_field_flag != curr_mb_field_flag) {
-                left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1;
+                left_xy[LBOT] = left_xy[LTOP] = mb_xy - s->mb_stride - 1;
                 if (curr_mb_field_flag) {
-                    left_xy[1] += s->mb_stride;
+                    left_xy[LBOT] += s->mb_stride;
                     h->left_block = left_block_options[3];
                 } else {
                     topleft_xy += s->mb_stride;
@@ -810,7 +818,7 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
             }
             if (left_mb_field_flag != curr_mb_field_flag) {
                 if (curr_mb_field_flag) {
-                    left_xy[1] += s->mb_stride;
+                    left_xy[LBOT] += s->mb_stride;
                     h->left_block = left_block_options[3];
                 } else {
                     h->left_block = left_block_options[2];
@@ -822,25 +830,25 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
     h->topleft_mb_xy = topleft_xy;
     h->top_mb_xy     = top_xy;
     h->topright_mb_xy= topright_xy;
-    h->left_mb_xy[0] = left_xy[0];
-    h->left_mb_xy[1] = left_xy[1];
+    h->left_mb_xy[LTOP] = left_xy[LTOP];
+    h->left_mb_xy[LBOT] = left_xy[LBOT];
     //FIXME do we need all in the context?
 
     h->topleft_type = s->current_picture.mb_type[topleft_xy] ;
     h->top_type     = s->current_picture.mb_type[top_xy]     ;
     h->topright_type= s->current_picture.mb_type[topright_xy];
-    h->left_type[0] = s->current_picture.mb_type[left_xy[0]] ;
-    h->left_type[1] = s->current_picture.mb_type[left_xy[1]] ;
+    h->left_type[LTOP] = s->current_picture.mb_type[left_xy[LTOP]] ;
+    h->left_type[LBOT] = s->current_picture.mb_type[left_xy[LBOT]] ;
 
     if(FMO){
-    if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0;
-    if(h->slice_table[top_xy     ] != h->slice_num) h->top_type     = 0;
-    if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0;
+    if(h->slice_table[topleft_xy    ] != h->slice_num) h->topleft_type = 0;
+    if(h->slice_table[top_xy        ] != h->slice_num) h->top_type     = 0;
+    if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0;
     }else{
         if(h->slice_table[topleft_xy ] != h->slice_num){
             h->topleft_type = 0;
-            if(h->slice_table[top_xy     ] != h->slice_num) h->top_type     = 0;
-            if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0;
+            if(h->slice_table[top_xy        ] != h->slice_num) h->top_type     = 0;
+            if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0;
         }
     }
     if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0;
@@ -848,23 +856,23 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
 
 static void fill_decode_caches(H264Context *h, int mb_type){
     MpegEncContext * const s = &h->s;
-    int topleft_xy, top_xy, topright_xy, left_xy[2];
-    int topleft_type, top_type, topright_type, left_type[2];
+    int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS];
+    int topleft_type, top_type, topright_type, left_type[LEFT_MBS];
     const uint8_t * left_block= h->left_block;
     int i;
     uint8_t *nnz;
     uint8_t *nnz_cache;
 
-    topleft_xy   = h->topleft_mb_xy ;
-    top_xy       = h->top_mb_xy     ;
-    topright_xy  = h->topright_mb_xy;
-    left_xy[0]   = h->left_mb_xy[0] ;
-    left_xy[1]   = h->left_mb_xy[1] ;
-    topleft_type = h->topleft_type  ;
-    top_type     = h->top_type      ;
-    topright_type= h->topright_type ;
-    left_type[0] = h->left_type[0]  ;
-    left_type[1] = h->left_type[1]  ;
+    topleft_xy     = h->topleft_mb_xy;
+    top_xy         = h->top_mb_xy;
+    topright_xy    = h->topright_mb_xy;
+    left_xy[LTOP]  = h->left_mb_xy[LTOP];
+    left_xy[LBOT]  = h->left_mb_xy[LBOT];
+    topleft_type   = h->topleft_type;
+    top_type       = h->top_type;
+    topright_type  = h->topright_type;
+    left_type[LTOP]= h->left_type[LTOP];
+    left_type[LBOT]= h->left_type[LBOT];
 
     if(!IS_SKIP(mb_type)){
         if(IS_INTRA(mb_type)){
@@ -879,27 +887,27 @@ static void fill_decode_caches(H264Context *h, int mb_type){
                 h->top_samples_available= 0x33FF;
                 h->topright_samples_available= 0x26EA;
             }
-            if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
+            if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])){
                 if(IS_INTERLACED(mb_type)){
-                    if(!(left_type[0] & type_mask)){
+                    if(!(left_type[LTOP] & type_mask)){
                         h->topleft_samples_available&= 0xDFFF;
                         h->left_samples_available&= 0x5FFF;
                     }
-                    if(!(left_type[1] & type_mask)){
+                    if(!(left_type[LBOT] & type_mask)){
                         h->topleft_samples_available&= 0xFF5F;
                         h->left_samples_available&= 0xFF5F;
                     }
                 }else{
-                    int left_typei = s->current_picture.mb_type[left_xy[0] + s->mb_stride];
+                    int left_typei = s->current_picture.mb_type[left_xy[LTOP] + s->mb_stride];
 
-                    assert(left_xy[0] == left_xy[1]);
-                    if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
+                    assert(left_xy[LTOP] == left_xy[LBOT]);
+                    if(!((left_typei & type_mask) && (left_type[LTOP] & type_mask))){
                         h->topleft_samples_available&= 0xDF5F;
                         h->left_samples_available&= 0x5F5F;
                     }
                 }
             }else{
-                if(!(left_type[0] & type_mask)){
+                if(!(left_type[LTOP] & type_mask)){
                     h->topleft_samples_available&= 0xDF5F;
                     h->left_samples_available&= 0x5F5F;
                 }
@@ -921,13 +929,13 @@ static void fill_decode_caches(H264Context *h, int mb_type){
                     h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask);
                 }
                 for(i=0; i<2; i++){
-                    if(IS_INTRA4x4(left_type[i])){
-                        int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[i]];
+                    if(IS_INTRA4x4(left_type[LEFT(i)])){
+                        int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]];
                         h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]];
                         h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]];
                     }else{
                         h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
-                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[i] & type_mask);
+                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[LEFT(i)] & type_mask);
                     }
                 }
             }
@@ -962,8 +970,8 @@ static void fill_decode_caches(H264Context *h, int mb_type){
     }
 
     for (i=0; i<2; i++) {
-        if(left_type[i]){
-            nnz = h->non_zero_count[left_xy[i]];
+        if(left_type[LEFT(i)]){
+            nnz = h->non_zero_count[left_xy[LEFT(i)]];
             nnz_cache[3+8* 1 + 2*8*i]= nnz[left_block[8+0+2*i]];
             nnz_cache[3+8* 2 + 2*8*i]= nnz[left_block[8+1+2*i]];
             if(CHROMA444){
@@ -993,10 +1001,10 @@ static void fill_decode_caches(H264Context *h, int mb_type){
             h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
         }
         // left_cbp
-        if (left_type[0]) {
-            h->left_cbp =   (h->cbp_table[left_xy[0]] & 0x7F0)
-                        |  ((h->cbp_table[left_xy[0]]>>(left_block[0]&(~1)))&2)
-                        | (((h->cbp_table[left_xy[1]]>>(left_block[2]&(~1)))&2) << 2);
+        if (left_type[LTOP]) {
+            h->left_cbp =   (h->cbp_table[left_xy[LTOP]] & 0x7F0)
+                        |  ((h->cbp_table[left_xy[LTOP]]>>(left_block[0]&(~1)))&2)
+                        | (((h->cbp_table[left_xy[LBOT]]>>(left_block[2]&(~1)))&2) << 2);
         } else {
             h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
         }
@@ -1031,9 +1039,9 @@ static void fill_decode_caches(H264Context *h, int mb_type){
             if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){
             for(i=0; i<2; i++){
                 int cache_idx = -1 + i*2*8;
-                if(USES_LIST(left_type[i], list)){
-                    const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
-                    const int b8_xy= 4*left_xy[i] + 1;
+                if(USES_LIST(left_type[LEFT(i)], list)){
+                    const int b_xy= h->mb2b_xy[left_xy[LEFT(i)]] + 3;
+                    const int b8_xy= 4*left_xy[LEFT(i)] + 1;
                     AV_COPY32(mv_cache[cache_idx  ], mv[b_xy + b_stride*left_block[0+i*2]]);
                     AV_COPY32(mv_cache[cache_idx+8], mv[b_xy + b_stride*left_block[1+i*2]]);
                     ref_cache[cache_idx  ]= ref[b8_xy + (left_block[0+i*2]&~1)];
@@ -1042,18 +1050,18 @@ static void fill_decode_caches(H264Context *h, int mb_type){
                     AV_ZERO32(mv_cache[cache_idx  ]);
                     AV_ZERO32(mv_cache[cache_idx+8]);
                     ref_cache[cache_idx  ]=
-                    ref_cache[cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+                    ref_cache[cache_idx+8]= (left_type[LEFT(i)]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                 }
             }
             }else{
-                if(USES_LIST(left_type[0], list)){
-                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
-                    const int b8_xy= 4*left_xy[0] + 1;
+                if(USES_LIST(left_type[LTOP], list)){
+                    const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
+                    const int b8_xy= 4*left_xy[LTOP] + 1;
                     AV_COPY32(mv_cache[-1], mv[b_xy + b_stride*left_block[0]]);
                     ref_cache[-1]= ref[b8_xy + (left_block[0]&~1)];
                 }else{
                     AV_ZERO32(mv_cache[-1]);
-                    ref_cache[-1]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+                    ref_cache[-1]= left_type[LTOP] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                 }
             }
 
@@ -1095,16 +1103,16 @@ static void fill_decode_caches(H264Context *h, int mb_type){
                     }else{
                         AV_ZERO64(mvd_cache[0 - 1*8]);
                     }
-                    if(USES_LIST(left_type[0], list)){
-                        const int b_xy= h->mb2br_xy[left_xy[0]] + 6;
+                    if(USES_LIST(left_type[LTOP], list)){
+                        const int b_xy= h->mb2br_xy[left_xy[LTOP]] + 6;
                         AV_COPY16(mvd_cache[-1 + 0*8], mvd[b_xy - left_block[0]]);
                         AV_COPY16(mvd_cache[-1 + 1*8], mvd[b_xy - left_block[1]]);
                     }else{
                         AV_ZERO16(mvd_cache[-1 + 0*8]);
                         AV_ZERO16(mvd_cache[-1 + 1*8]);
                     }
-                    if(USES_LIST(left_type[1], list)){
-                        const int b_xy= h->mb2br_xy[left_xy[1]] + 6;
+                    if(USES_LIST(left_type[LBOT], list)){
+                        const int b_xy= h->mb2br_xy[left_xy[LBOT]] + 6;
                         AV_COPY16(mvd_cache[-1 + 2*8], mvd[b_xy - left_block[2]]);
                         AV_COPY16(mvd_cache[-1 + 3*8], mvd[b_xy - left_block[3]]);
                     }else{
@@ -1128,17 +1136,17 @@ static void fill_decode_caches(H264Context *h, int mb_type){
                             AV_WN32A(&direct_cache[-1*8], 0x01010101*(MB_TYPE_16x16>>1));
                         }
 
-                        if(IS_DIRECT(left_type[0]))
+                        if(IS_DIRECT(left_type[LTOP]))
                             direct_cache[-1 + 0*8]= MB_TYPE_DIRECT2>>1;
-                        else if(IS_8X8(left_type[0]))
-                            direct_cache[-1 + 0*8]= direct_table[4*left_xy[0] + 1 + (left_block[0]&~1)];
+                        else if(IS_8X8(left_type[LTOP]))
+                            direct_cache[-1 + 0*8]= direct_table[4*left_xy[LTOP] + 1 + (left_block[0]&~1)];
                         else
                             direct_cache[-1 + 0*8]= MB_TYPE_16x16>>1;
 
-                        if(IS_DIRECT(left_type[1]))
+                        if(IS_DIRECT(left_type[LBOT]))
                             direct_cache[-1 + 2*8]= MB_TYPE_DIRECT2>>1;
-                        else if(IS_8X8(left_type[1]))
-                            direct_cache[-1 + 2*8]= direct_table[4*left_xy[1] + 1 + (left_block[2]&~1)];
+                        else if(IS_8X8(left_type[LBOT]))
+                            direct_cache[-1 + 2*8]= direct_table[4*left_xy[LBOT] + 1 + (left_block[2]&~1)];
                         else
                             direct_cache[-1 + 2*8]= MB_TYPE_16x16>>1;
                     }
@@ -1152,10 +1160,10 @@ static void fill_decode_caches(H264Context *h, int mb_type){
                     MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
                     MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
                     MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
-                    MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
-                    MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
-                    MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
-                    MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
+                    MAP_F2F(scan8[0] - 1 + 0*8, left_type[LTOP])\
+                    MAP_F2F(scan8[0] - 1 + 1*8, left_type[LTOP])\
+                    MAP_F2F(scan8[0] - 1 + 2*8, left_type[LBOT])\
+                    MAP_F2F(scan8[0] - 1 + 3*8, left_type[LBOT])
                 if(MB_FIELD){
 #define MAP_F2F(idx, mb_type)\
                     if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
@@ -1179,7 +1187,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){
         }
     }
 
-        h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
+        h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]);
 }
 
 /**
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index a643297f8a..6dacf7a336 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1296,9 +1296,9 @@ static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_sl
 
     if(intra_slice){
         int ctx=0;
-        if( h->left_type[0] & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
+        if( h->left_type[LTOP] & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
             ctx++;
-        if( h->top_type     & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
+        if( h->top_type        & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
             ctx++;
         if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
             return 0;   /* I4x4 */
@@ -1376,10 +1376,10 @@ static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
     int ctx = 0;
 
     /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
-    if( h->left_type[0] && h->chroma_pred_mode_table[mba_xy] != 0 )
+    if( h->left_type[LTOP] && h->chroma_pred_mode_table[mba_xy] != 0 )
         ctx++;
 
-    if( h->top_type     && h->chroma_pred_mode_table[mbb_xy] != 0 )
+    if( h->top_type        && h->chroma_pred_mode_table[mbb_xy] != 0 )
         ctx++;
 
     if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
@@ -1880,7 +1880,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
         int ctx = 0;
         assert(h->slice_type_nos == AV_PICTURE_TYPE_B);
 
-        if( !IS_DIRECT( h->left_type[0]-1 ) )
+        if( !IS_DIRECT( h->left_type[LTOP]-1 ) )
             ctx++;
         if( !IS_DIRECT( h->top_type-1 ) )
             ctx++;
@@ -2250,7 +2250,7 @@ decode_intra_mb:
         int i;
         uint8_t *nnz_cache = h->non_zero_count_cache;
         for (i = 0; i < 2; i++){
-            if (h->left_type[i] && !IS_8x8DCT(h->left_type[i])){
+            if (h->left_type[LEFT(i)] && !IS_8x8DCT(h->left_type[LEFT(i)])){
                 nnz_cache[3+8* 1 + 2*8*i]=
                 nnz_cache[3+8* 2 + 2*8*i]=
                 nnz_cache[3+8* 6 + 2*8*i]=
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index 1575b1b3d9..e6b6141f13 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -227,7 +227,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
         return;
     }
     assert(!FRAME_MBAFF);
-    left_type= h->left_type[0];
+    left_type= h->left_type[LTOP];
     top_type= h->top_type;
 
     mb_type = s->current_picture.mb_type[mb_xy];
@@ -329,7 +329,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
             AV_WN64A(bS[1][2], 0x0002000200020002ULL);
         } else {
             int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0;
-            int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
+            int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[LTOP] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
             int step =  1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1;
             edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
             h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
@@ -411,7 +411,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
     int edge;
     int chroma_qp_avg[2];
     const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
-    const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type;
+    const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type;
 
     // how often to recheck mv-based bS when iterating between edges
     static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1},
@@ -647,9 +647,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
 
     if (FRAME_MBAFF
             // and current and left pair do not have the same interlaced type
-            && IS_INTERLACED(mb_type^h->left_type[0])
+            && IS_INTERLACED(mb_type^h->left_type[LTOP])
             // and left mb is in available to us
-            && h->left_type[0]) {
+            && h->left_type[LTOP]) {
         /* First vertical edge is different in MBAFF frames
          * There are 8 different bS to compute and 2 different Qp
          */
@@ -677,8 +677,8 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
             const uint8_t *off= offset[MB_FIELD][mb_y&1];
             for( i = 0; i < 8; i++ ) {
                 int j= MB_FIELD ? i>>2 : i&1;
-                int mbn_xy = h->left_mb_xy[j];
-                int mbn_type= h->left_type[j];
+                int mbn_xy = h->left_mb_xy[LEFT(j)];
+                int mbn_type= h->left_type[LEFT(j)];
 
                 if( IS_INTRA( mbn_type ) )
                     bS[i] = 4;