From e727bca392995e4fec0104b7f75f89a66618c120 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 1 Feb 2011 12:34:23 +0100
Subject: [PATCH 01/25] lavfi: cleanup avfilter_get_audio_buffer() and pals.

Remove AVFilterBufferRefAudioProps.size, and use nb_samples in its place
everywhere.
This is required as the size in the audio buffer may be aligned, so it
may not contain a well defined number of samples.

Also remove the useless planar parameter, which can be deduced from the
sample format.

This is technically an API and ABI break, but since the audio part of
lavfi is not usable now, this should not be a problem in practice.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavfilter/avfilter.c | 22 ++++++++++++----------
 libavfilter/avfilter.h | 20 ++++++++++----------
 libavfilter/defaults.c | 23 +++++++++++------------
 3 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index e535bdab64..9f81be7a8d 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -265,10 +265,9 @@ static void ff_dlog_ref(void *ctx, AVFilterBufferRef *ref, int end)
                 av_get_picture_type_char(ref->video->pict_type));
     }
     if (ref->audio) {
-        av_dlog(ctx, " cl:%"PRId64"d sn:%d s:%d sr:%d p:%d",
+        av_dlog(ctx, " cl:%"PRId64"d n:%d r:%d p:%d",
                 ref->audio->channel_layout,
                 ref->audio->nb_samples,
-                ref->audio->size,
                 ref->audio->sample_rate,
                 ref->audio->planar);
     }
@@ -368,16 +367,16 @@ fail:
 }
 
 AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms,
-                                             enum AVSampleFormat sample_fmt, int size,
-                                             uint64_t channel_layout, int planar)
+                                             enum AVSampleFormat sample_fmt, int nb_samples,
+                                             uint64_t channel_layout)
 {
     AVFilterBufferRef *ret = NULL;
 
     if (link->dstpad->get_audio_buffer)
-        ret = link->dstpad->get_audio_buffer(link, perms, sample_fmt, size, channel_layout, planar);
+        ret = link->dstpad->get_audio_buffer(link, perms, sample_fmt, nb_samples, channel_layout);
 
     if (!ret)
-        ret = avfilter_default_get_audio_buffer(link, perms, sample_fmt, size, channel_layout, planar);
+        ret = avfilter_default_get_audio_buffer(link, perms, sample_fmt, nb_samples, channel_layout);
 
     if (ret)
         ret->type = AVMEDIA_TYPE_AUDIO;
@@ -585,6 +584,9 @@ void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
     /* prepare to copy the samples if the buffer has insufficient permissions */
     if ((dst->min_perms & samplesref->perms) != dst->min_perms ||
         dst->rej_perms & samplesref->perms) {
+        int  i, planar = av_sample_fmt_is_planar(samplesref->format);
+        int planes = !planar ? 1:
+                     av_get_channel_layout_nb_channels(samplesref->audio->channel_layout);
 
         av_log(link->dst, AV_LOG_DEBUG,
                "Copying audio data in avfilter (have perms %x, need %x, reject %x)\n",
@@ -592,14 +594,14 @@ void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
 
         link->cur_buf = avfilter_default_get_audio_buffer(link, dst->min_perms,
                                                           samplesref->format,
-                                                          samplesref->audio->size,
-                                                          samplesref->audio->channel_layout,
-                                                          samplesref->audio->planar);
+                                                          samplesref->audio->nb_samples,
+                                                          samplesref->audio->channel_layout);
         link->cur_buf->pts                = samplesref->pts;
         link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate;
 
         /* Copy actual data into new samples buffer */
-        memcpy(link->cur_buf->data[0], samplesref->data[0], samplesref->audio->size);
+        for (i = 0; i < planes; i++)
+            memcpy(link->cur_buf->extended_data[i], samplesref->extended_data[i], samplesref->linesize[0]);
 
         avfilter_unref_buffer(samplesref);
     } else
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 6555744f12..19ac057ed4 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -109,7 +109,6 @@ typedef struct AVFilterBuffer {
 typedef struct AVFilterBufferRefAudioProps {
     uint64_t channel_layout;    ///< channel layout of audio buffer
     int nb_samples;             ///< number of audio samples
-    int size;                   ///< audio buffer size
     uint32_t sample_rate;       ///< audio buffer sample rate
     int planar;                 ///< audio buffer - planar or packed
 } AVFilterBufferRefAudioProps;
@@ -388,8 +387,8 @@ struct AVFilterPad {
      * Input audio pads only.
      */
     AVFilterBufferRef *(*get_audio_buffer)(AVFilterLink *link, int perms,
-                                           enum AVSampleFormat sample_fmt, int size,
-                                           uint64_t channel_layout, int planar);
+                                           enum AVSampleFormat sample_fmt, int nb_samples,
+                                           uint64_t channel_layout);
 
     /**
      * Callback called after the slices of a frame are completely sent. If
@@ -474,8 +473,9 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link,
 
 /** default handler for get_audio_buffer() for audio inputs */
 AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms,
-                                                     enum AVSampleFormat sample_fmt, int size,
-                                                     uint64_t channel_layout, int planar);
+                                                     enum AVSampleFormat sample_fmt,
+                                                     int nb_samples,
+                                                     uint64_t channel_layout);
 
 /**
  * A helper for query_formats() which sets all links to the same list of
@@ -505,8 +505,8 @@ AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link,
 
 /** get_audio_buffer() handler for filters which simply pass audio along */
 AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link, int perms,
-                                                  enum AVSampleFormat sample_fmt, int size,
-                                                  uint64_t channel_layout, int planar);
+                                                  enum AVSampleFormat sample_fmt, int nb_samples,
+                                                  uint64_t channel_layout);
 
 /**
  * Filter definition. This defines the pads a filter contains, and all the
@@ -690,15 +690,15 @@ avfilter_get_video_buffer_ref_from_arrays(uint8_t *data[4], int linesize[4], int
  *                       be requested
  * @param perms          the required access permissions
  * @param sample_fmt     the format of each sample in the buffer to allocate
- * @param size           the buffer size in bytes
+ * @param nb_samples     the number of samples per channel
  * @param channel_layout the number and type of channels per sample in the buffer to allocate
  * @param planar         audio data layout - planar or packed
  * @return               A reference to the samples. This must be unreferenced with
  *                       avfilter_unref_buffer when you are finished with it.
  */
 AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms,
-                                             enum AVSampleFormat sample_fmt, int size,
-                                             uint64_t channel_layout, int planar);
+                                             enum AVSampleFormat sample_fmt, int nb_samples,
+                                             uint64_t channel_layout);
 
 /**
  * Create an audio buffer reference wrapped around an already
diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c
index 086fcc0b4c..fcb29e3b8d 100644
--- a/libavfilter/defaults.c
+++ b/libavfilter/defaults.c
@@ -58,12 +58,13 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per
 }
 
 AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms,
-                                                     enum AVSampleFormat sample_fmt, int size,
-                                                     uint64_t channel_layout, int planar)
+                                                     enum AVSampleFormat sample_fmt, int nb_samples,
+                                                     uint64_t channel_layout)
 {
     AVFilterBuffer *samples = av_mallocz(sizeof(AVFilterBuffer));
     AVFilterBufferRef *ref = NULL;
     int i, sample_size, chans_nb, bufsize, per_channel_size, step_size = 0;
+    int planar = av_sample_fmt_is_planar(sample_fmt);
     char *buf;
 
     if (!samples || !(ref = av_mallocz(sizeof(AVFilterBufferRef))))
@@ -77,7 +78,7 @@ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int per
         goto fail;
 
     ref->audio->channel_layout = channel_layout;
-    ref->audio->size           = size;
+    ref->audio->nb_samples     = nb_samples;
     ref->audio->planar         = planar;
 
     /* make sure the buffer gets read permission or it's useless for output */
@@ -89,8 +90,7 @@ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int per
     sample_size = av_get_bytes_per_sample(sample_fmt);
     chans_nb = av_get_channel_layout_nb_channels(channel_layout);
 
-    per_channel_size = size/chans_nb;
-    ref->audio->nb_samples = per_channel_size/sample_size;
+    per_channel_size = nb_samples * sample_size;
 
     /* Set the number of bytes to traverse to reach next sample of a particular channel:
      * For planar, this is simply the sample size.
@@ -101,7 +101,7 @@ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int per
     memset(&samples->linesize[chans_nb], 0, (8-chans_nb) * sizeof(samples->linesize[0]));
 
     /* Calculate total buffer size, round to multiple of 16 to be SIMD friendly */
-    bufsize = (size + 15)&~15;
+    bufsize = (nb_samples * chans_nb * sample_size + 15)&~15;
     buf = av_malloc(bufsize);
     if (!buf)
         goto fail;
@@ -189,9 +189,8 @@ void avfilter_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *sa
 
     if (outlink) {
         outlink->out_buf = avfilter_default_get_audio_buffer(inlink, AV_PERM_WRITE, samplesref->format,
-                                                             samplesref->audio->size,
-                                                             samplesref->audio->channel_layout,
-                                                             samplesref->audio->planar);
+                                                             samplesref->audio->nb_samples,
+                                                             samplesref->audio->channel_layout);
         outlink->out_buf->pts                = samplesref->pts;
         outlink->out_buf->audio->sample_rate = samplesref->audio->sample_rate;
         avfilter_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0));
@@ -293,9 +292,9 @@ AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, int perms,
 }
 
 AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link, int perms,
-                                                  enum AVSampleFormat sample_fmt, int size,
-                                                  uint64_t channel_layout, int packed)
+                                                  enum AVSampleFormat sample_fmt, int nb_samples,
+                                                  uint64_t channel_layout)
 {
     return avfilter_get_audio_buffer(link->dst->outputs[0], perms, sample_fmt,
-                                     size, channel_layout, packed);
+                                     nb_samples, channel_layout);
 }

From 6735534f19369a914d795aa84cd3faa4c57729ce Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Sat, 15 Jan 2011 18:48:37 +0100
Subject: [PATCH 02/25] lavfi: use avfilter_get_audio_buffer_ref_from_arrays()
 in avfilter_default_get_audio_buffer

---
 libavfilter/defaults.c | 80 +++++++++---------------------------------
 1 file changed, 17 insertions(+), 63 deletions(-)

diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c
index fcb29e3b8d..7c75ab9c4b 100644
--- a/libavfilter/defaults.c
+++ b/libavfilter/defaults.c
@@ -61,78 +61,32 @@ AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int per
                                                      enum AVSampleFormat sample_fmt, int nb_samples,
                                                      uint64_t channel_layout)
 {
-    AVFilterBuffer *samples = av_mallocz(sizeof(AVFilterBuffer));
-    AVFilterBufferRef *ref = NULL;
-    int i, sample_size, chans_nb, bufsize, per_channel_size, step_size = 0;
-    int planar = av_sample_fmt_is_planar(sample_fmt);
-    char *buf;
+    AVFilterBufferRef *samplesref = NULL;
+    uint8_t **data;
+    int planar      = av_sample_fmt_is_planar(sample_fmt);
+    int nb_channels = av_get_channel_layout_nb_channels(channel_layout);
+    int planes      = planar ? nb_channels : 1;
+    int linesize;
 
-    if (!samples || !(ref = av_mallocz(sizeof(AVFilterBufferRef))))
+    if (!(data = av_mallocz(sizeof(*data) * planes)))
         goto fail;
 
-    ref->buf                   = samples;
-    ref->format                = sample_fmt;
-
-    ref->audio = av_mallocz(sizeof(AVFilterBufferRefAudioProps));
-    if (!ref->audio)
+    if (av_samples_alloc(data, &linesize, nb_channels, nb_samples, sample_fmt, 0) < 0)
         goto fail;
 
-    ref->audio->channel_layout = channel_layout;
-    ref->audio->nb_samples     = nb_samples;
-    ref->audio->planar         = planar;
-
-    /* make sure the buffer gets read permission or it's useless for output */
-    ref->perms = perms | AV_PERM_READ;
-
-    samples->refcount   = 1;
-    samples->free       = ff_avfilter_default_free_buffer;
-
-    sample_size = av_get_bytes_per_sample(sample_fmt);
-    chans_nb = av_get_channel_layout_nb_channels(channel_layout);
-
-    per_channel_size = nb_samples * sample_size;
-
-    /* Set the number of bytes to traverse to reach next sample of a particular channel:
-     * For planar, this is simply the sample size.
-     * For packed, this is the number of samples * sample_size.
-     */
-    for (i = 0; i < chans_nb; i++)
-        samples->linesize[i] = planar > 0 ? per_channel_size : sample_size;
-    memset(&samples->linesize[chans_nb], 0, (8-chans_nb) * sizeof(samples->linesize[0]));
-
-    /* Calculate total buffer size, round to multiple of 16 to be SIMD friendly */
-    bufsize = (nb_samples * chans_nb * sample_size + 15)&~15;
-    buf = av_malloc(bufsize);
-    if (!buf)
+    samplesref = avfilter_get_audio_buffer_ref_from_arrays(data, linesize, perms,
+                                                           nb_samples, sample_fmt,
+                                                           channel_layout);
+    if (!samplesref)
         goto fail;
 
-    /* For planar, set the start point of each channel's data within the buffer
-     * For packed, set the start point of the entire buffer only
-     */
-    samples->data[0] = buf;
-    if (buf && planar) {
-        for (i = 1; i < chans_nb; i++) {
-            step_size += per_channel_size;
-            samples->data[i] = buf + step_size;
-        }
-    } else {
-        for (i = 1; i < chans_nb; i++)
-            samples->data[i] = buf;
-    }
-
-    memset(&samples->data[chans_nb], 0, (8-chans_nb) * sizeof(samples->data[0]));
-
-    memcpy(ref->data,     samples->data,     sizeof(ref->data));
-    memcpy(ref->linesize, samples->linesize, sizeof(ref->linesize));
-
-    return ref;
+    av_freep(&data);
 
 fail:
-    if (ref)
-        av_free(ref->audio);
-    av_free(ref);
-    av_free(samples);
-    return NULL;
+    if (data)
+        av_freep(&data[0]);
+    av_freep(&data);
+    return samplesref;
 }
 
 void avfilter_default_start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref)

From 7ef0adcc2e800cb1357d4d5d7ce878c0f9a36c01 Mon Sep 17 00:00:00 2001
From: Stefano Sabatini <stefano.sabatini-lala@poste.it>
Date: Tue, 30 Aug 2011 23:22:29 +0200
Subject: [PATCH 03/25] lavfi: simplify signature for
 avfilter_get_audio_buffer() and friends

The additional parameters are just complicating the function interface.

Assume that a requested samples buffer will *always* have the format
specified in the requested link.

This breaks audio filtering API and ABI in theory, but since it's
unusable right now this shouldn't be a problem.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavfilter/avfilter.c | 11 ++++-------
 libavfilter/avfilter.h | 16 ++++------------
 libavfilter/defaults.c | 24 ++++++++++--------------
 3 files changed, 18 insertions(+), 33 deletions(-)

diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index 9f81be7a8d..e301ddb37b 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -367,16 +367,15 @@ fail:
 }
 
 AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms,
-                                             enum AVSampleFormat sample_fmt, int nb_samples,
-                                             uint64_t channel_layout)
+                                             int nb_samples)
 {
     AVFilterBufferRef *ret = NULL;
 
     if (link->dstpad->get_audio_buffer)
-        ret = link->dstpad->get_audio_buffer(link, perms, sample_fmt, nb_samples, channel_layout);
+        ret = link->dstpad->get_audio_buffer(link, perms, nb_samples);
 
     if (!ret)
-        ret = avfilter_default_get_audio_buffer(link, perms, sample_fmt, nb_samples, channel_layout);
+        ret = avfilter_default_get_audio_buffer(link, perms, nb_samples);
 
     if (ret)
         ret->type = AVMEDIA_TYPE_AUDIO;
@@ -593,9 +592,7 @@ void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
                samplesref->perms, link->dstpad->min_perms, link->dstpad->rej_perms);
 
         link->cur_buf = avfilter_default_get_audio_buffer(link, dst->min_perms,
-                                                          samplesref->format,
-                                                          samplesref->audio->nb_samples,
-                                                          samplesref->audio->channel_layout);
+                                                          samplesref->audio->nb_samples);
         link->cur_buf->pts                = samplesref->pts;
         link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate;
 
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 19ac057ed4..cf95b4bb16 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -387,8 +387,7 @@ struct AVFilterPad {
      * Input audio pads only.
      */
     AVFilterBufferRef *(*get_audio_buffer)(AVFilterLink *link, int perms,
-                                           enum AVSampleFormat sample_fmt, int nb_samples,
-                                           uint64_t channel_layout);
+                                           int nb_samples);
 
     /**
      * Callback called after the slices of a frame are completely sent. If
@@ -473,9 +472,7 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link,
 
 /** default handler for get_audio_buffer() for audio inputs */
 AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms,
-                                                     enum AVSampleFormat sample_fmt,
-                                                     int nb_samples,
-                                                     uint64_t channel_layout);
+                                                     int nb_samples);
 
 /**
  * A helper for query_formats() which sets all links to the same list of
@@ -505,8 +502,7 @@ AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link,
 
 /** get_audio_buffer() handler for filters which simply pass audio along */
 AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link, int perms,
-                                                  enum AVSampleFormat sample_fmt, int nb_samples,
-                                                  uint64_t channel_layout);
+                                                  int nb_samples);
 
 /**
  * Filter definition. This defines the pads a filter contains, and all the
@@ -689,16 +685,12 @@ avfilter_get_video_buffer_ref_from_arrays(uint8_t *data[4], int linesize[4], int
  * @param link           the output link to the filter from which the buffer will
  *                       be requested
  * @param perms          the required access permissions
- * @param sample_fmt     the format of each sample in the buffer to allocate
  * @param nb_samples     the number of samples per channel
- * @param channel_layout the number and type of channels per sample in the buffer to allocate
- * @param planar         audio data layout - planar or packed
  * @return               A reference to the samples. This must be unreferenced with
  *                       avfilter_unref_buffer when you are finished with it.
  */
 AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms,
-                                             enum AVSampleFormat sample_fmt, int nb_samples,
-                                             uint64_t channel_layout);
+                                             int nb_samples);
 
 /**
  * Create an audio buffer reference wrapped around an already
diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c
index 7c75ab9c4b..df05c06d63 100644
--- a/libavfilter/defaults.c
+++ b/libavfilter/defaults.c
@@ -58,25 +58,24 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per
 }
 
 AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms,
-                                                     enum AVSampleFormat sample_fmt, int nb_samples,
-                                                     uint64_t channel_layout)
+                                                     int nb_samples)
 {
     AVFilterBufferRef *samplesref = NULL;
     uint8_t **data;
-    int planar      = av_sample_fmt_is_planar(sample_fmt);
-    int nb_channels = av_get_channel_layout_nb_channels(channel_layout);
+    int planar      = av_sample_fmt_is_planar(link->format);
+    int nb_channels = av_get_channel_layout_nb_channels(link->channel_layout);
     int planes      = planar ? nb_channels : 1;
     int linesize;
 
     if (!(data = av_mallocz(sizeof(*data) * planes)))
         goto fail;
 
-    if (av_samples_alloc(data, &linesize, nb_channels, nb_samples, sample_fmt, 0) < 0)
+    if (av_samples_alloc(data, &linesize, nb_channels, nb_samples, link->format, 0) < 0)
         goto fail;
 
     samplesref = avfilter_get_audio_buffer_ref_from_arrays(data, linesize, perms,
-                                                           nb_samples, sample_fmt,
-                                                           channel_layout);
+                                                           nb_samples, link->format,
+                                                           link->channel_layout);
     if (!samplesref)
         goto fail;
 
@@ -142,9 +141,8 @@ void avfilter_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *sa
         outlink = inlink->dst->outputs[0];
 
     if (outlink) {
-        outlink->out_buf = avfilter_default_get_audio_buffer(inlink, AV_PERM_WRITE, samplesref->format,
-                                                             samplesref->audio->nb_samples,
-                                                             samplesref->audio->channel_layout);
+        outlink->out_buf = avfilter_default_get_audio_buffer(inlink, AV_PERM_WRITE,
+                                                             samplesref->audio->nb_samples);
         outlink->out_buf->pts                = samplesref->pts;
         outlink->out_buf->audio->sample_rate = samplesref->audio->sample_rate;
         avfilter_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0));
@@ -246,9 +244,7 @@ AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, int perms,
 }
 
 AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link, int perms,
-                                                  enum AVSampleFormat sample_fmt, int nb_samples,
-                                                  uint64_t channel_layout)
+                                                  int nb_samples)
 {
-    return avfilter_get_audio_buffer(link->dst->outputs[0], perms, sample_fmt,
-                                     nb_samples, channel_layout);
+    return avfilter_get_audio_buffer(link->dst->outputs[0], perms, nb_samples);
 }

From a6bdfc2a92a46aa7ee2d95a40f43b848ef94ec13 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sun, 6 May 2012 10:40:11 +0200
Subject: [PATCH 04/25] lavfi: change AVFilterBufferRefAudioProps.sample_rate
 from uint32_t to int

There's no reason for it to be explicitly 32 bits. It's declared as a
plain int in all other places in Libav.

This breaks audio filtering API and ABI in theory, but since it's
unusable right now this shouldn't be a problem.
---
 libavfilter/avfilter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index cf95b4bb16..357ce34555 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -109,7 +109,7 @@ typedef struct AVFilterBuffer {
 typedef struct AVFilterBufferRefAudioProps {
     uint64_t channel_layout;    ///< channel layout of audio buffer
     int nb_samples;             ///< number of audio samples
-    uint32_t sample_rate;       ///< audio buffer sample rate
+    int sample_rate;            ///< audio buffer sample rate
     int planar;                 ///< audio buffer - planar or packed
 } AVFilterBufferRefAudioProps;
 

From f20ab492acd2ab49f859dcd6d310029fb8c09dc4 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Wed, 9 May 2012 08:43:07 +0200
Subject: [PATCH 05/25] lavfi: change AVFilterLink.sample_rate from int64_t to
 int on next bump

There is no real reason for it to be 64bit, it's just a plain int in the
rest of Libav.
---
 libavfilter/avfilter.h | 4 ++++
 libavfilter/version.h  | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 357ce34555..69ada1b8be 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -596,7 +596,11 @@ struct AVFilterLink {
     AVRational sample_aspect_ratio; ///< agreed upon sample aspect ratio
     /* These two parameters apply only to audio */
     uint64_t channel_layout;    ///< channel layout of current buffer (see libavutil/audioconvert.h)
+#if FF_API_SAMPLERATE64
     int64_t sample_rate;        ///< samples per second
+#else
+    int sample_rate;            ///< samples per second
+#endif
 
     int format;                 ///< agreed upon media format
 
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 718ed7812e..71928f3f3b 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -47,5 +47,8 @@
 #ifndef FF_API_GRAPH_AVCLASS
 #define FF_API_GRAPH_AVCLASS            (LIBAVFILTER_VERSION_MAJOR > 2)
 #endif
+#ifndef FF_API_SAMPLERATE64
+#define FF_API_SAMPLERATE64             (LIBAVFILTER_VERSION_MAJOR < 3)
+#endif
 
 #endif // AVFILTER_VERSION_H

From 472fb3bbfaf6fddb33d45688046184e7684c9f71 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 7 May 2012 10:51:23 +0200
Subject: [PATCH 06/25] lavfi: remove some audio-related function from public
 API.

Those functions are only useful inside filters. It is better to not
support user filters until the API is more stable.

This breaks audio filtering API and ABI in theory, but since it's
unusable right now this shouldn't be a problem.
---
 libavfilter/af_anull.c |  5 ++--
 libavfilter/audio.h    | 61 ++++++++++++++++++++++++++++++++++++++++++
 libavfilter/avfilter.c | 16 ++++++-----
 libavfilter/avfilter.h | 37 -------------------------
 libavfilter/defaults.c | 24 +++++++++--------
 5 files changed, 86 insertions(+), 57 deletions(-)
 create mode 100644 libavfilter/audio.h

diff --git a/libavfilter/af_anull.c b/libavfilter/af_anull.c
index e2bed36f0a..59b275c767 100644
--- a/libavfilter/af_anull.c
+++ b/libavfilter/af_anull.c
@@ -21,6 +21,7 @@
  * null audio filter
  */
 
+#include "audio.h"
 #include "avfilter.h"
 
 AVFilter avfilter_af_anull = {
@@ -31,8 +32,8 @@ AVFilter avfilter_af_anull = {
 
     .inputs    = (AVFilterPad[]) {{ .name             = "default",
                                     .type             = AVMEDIA_TYPE_AUDIO,
-                                    .get_audio_buffer = avfilter_null_get_audio_buffer,
-                                    .filter_samples   = avfilter_null_filter_samples },
+                                    .get_audio_buffer = ff_null_get_audio_buffer,
+                                    .filter_samples   = ff_null_filter_samples },
                                   { .name = NULL}},
 
     .outputs   = (AVFilterPad[]) {{ .name             = "default",
diff --git a/libavfilter/audio.h b/libavfilter/audio.h
new file mode 100644
index 0000000000..935bec5c43
--- /dev/null
+++ b/libavfilter/audio.h
@@ -0,0 +1,61 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_AUDIO_H
+#define AVFILTER_AUDIO_H
+
+#include "avfilter.h"
+
+/** default handler for get_audio_buffer() for audio inputs */
+AVFilterBufferRef *ff_default_get_audio_buffer(AVFilterLink *link, int perms,
+                                                     int nb_samples);
+
+/** get_audio_buffer() handler for filters which simply pass audio along */
+AVFilterBufferRef *ff_null_get_audio_buffer(AVFilterLink *link, int perms,
+                                                  int nb_samples);
+
+/**
+ * Request an audio samples buffer with a specific set of permissions.
+ *
+ * @param link           the output link to the filter from which the buffer will
+ *                       be requested
+ * @param perms          the required access permissions
+ * @param nb_samples     the number of samples per channel
+ * @return               A reference to the samples. This must be unreferenced with
+ *                       avfilter_unref_buffer when you are finished with it.
+ */
+AVFilterBufferRef *ff_get_audio_buffer(AVFilterLink *link, int perms,
+                                             int nb_samples);
+
+/** default handler for filter_samples() for audio inputs */
+void ff_default_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref);
+
+/** filter_samples() handler for filters which simply pass audio along */
+void ff_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref);
+
+/**
+ * Send a buffer of audio samples to the next filter.
+ *
+ * @param link       the output link over which the audio samples are being sent
+ * @param samplesref a reference to the buffer of audio samples being sent. The
+ *                   receiving filter will free this reference when it no longer
+ *                   needs it or pass it on to the next filter.
+ */
+void ff_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref);
+
+#endif /* AVFILTER_AUDIO_H */
diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index e301ddb37b..6a530f8fd6 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -26,6 +26,8 @@
 #include "libavutil/audioconvert.h"
 #include "libavutil/imgutils.h"
 #include "libavcodec/avcodec.h"
+
+#include "audio.h"
 #include "avfilter.h"
 #include "internal.h"
 
@@ -366,8 +368,8 @@ fail:
     return NULL;
 }
 
-AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms,
-                                             int nb_samples)
+AVFilterBufferRef *ff_get_audio_buffer(AVFilterLink *link, int perms,
+                                       int nb_samples)
 {
     AVFilterBufferRef *ret = NULL;
 
@@ -375,7 +377,7 @@ AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms,
         ret = link->dstpad->get_audio_buffer(link, perms, nb_samples);
 
     if (!ret)
-        ret = avfilter_default_get_audio_buffer(link, perms, nb_samples);
+        ret = ff_default_get_audio_buffer(link, perms, nb_samples);
 
     if (ret)
         ret->type = AVMEDIA_TYPE_AUDIO;
@@ -570,7 +572,7 @@ void avfilter_draw_slice(AVFilterLink *link, int y, int h, int slice_dir)
     draw_slice(link, y, h, slice_dir);
 }
 
-void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
+void ff_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
 {
     void (*filter_samples)(AVFilterLink *, AVFilterBufferRef *);
     AVFilterPad *dst = link->dstpad;
@@ -578,7 +580,7 @@ void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
     FF_DPRINTF_START(NULL, filter_samples); ff_dlog_link(NULL, link, 1);
 
     if (!(filter_samples = dst->filter_samples))
-        filter_samples = avfilter_default_filter_samples;
+        filter_samples = ff_default_filter_samples;
 
     /* prepare to copy the samples if the buffer has insufficient permissions */
     if ((dst->min_perms & samplesref->perms) != dst->min_perms ||
@@ -591,8 +593,8 @@ void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
                "Copying audio data in avfilter (have perms %x, need %x, reject %x)\n",
                samplesref->perms, link->dstpad->min_perms, link->dstpad->rej_perms);
 
-        link->cur_buf = avfilter_default_get_audio_buffer(link, dst->min_perms,
-                                                          samplesref->audio->nb_samples);
+        link->cur_buf = ff_default_get_audio_buffer(link, dst->min_perms,
+                                                    samplesref->audio->nb_samples);
         link->cur_buf->pts                = samplesref->pts;
         link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate;
 
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 69ada1b8be..fd996db94b 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -460,9 +460,6 @@ void avfilter_default_draw_slice(AVFilterLink *link, int y, int h, int slice_dir
 /** default handler for end_frame() for video inputs */
 void avfilter_default_end_frame(AVFilterLink *link);
 
-/** default handler for filter_samples() for audio inputs */
-void avfilter_default_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref);
-
 /** default handler for config_props() for audio/video outputs */
 int avfilter_default_config_output_link(AVFilterLink *link);
 
@@ -470,10 +467,6 @@ int avfilter_default_config_output_link(AVFilterLink *link);
 AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link,
                                                      int perms, int w, int h);
 
-/** default handler for get_audio_buffer() for audio inputs */
-AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms,
-                                                     int nb_samples);
-
 /**
  * A helper for query_formats() which sets all links to the same list of
  * formats. If there are no links hooked to this filter, the list of formats is
@@ -493,17 +486,10 @@ void avfilter_null_draw_slice(AVFilterLink *link, int y, int h, int slice_dir);
 /** end_frame() handler for filters which simply pass video along */
 void avfilter_null_end_frame(AVFilterLink *link);
 
-/** filter_samples() handler for filters which simply pass audio along */
-void avfilter_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref);
-
 /** get_video_buffer() handler for filters which simply pass video along */
 AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link,
                                                   int perms, int w, int h);
 
-/** get_audio_buffer() handler for filters which simply pass audio along */
-AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link, int perms,
-                                                  int nb_samples);
-
 /**
  * Filter definition. This defines the pads a filter contains, and all the
  * callback functions used to interact with the filter.
@@ -683,19 +669,6 @@ AVFilterBufferRef *
 avfilter_get_video_buffer_ref_from_arrays(uint8_t *data[4], int linesize[4], int perms,
                                           int w, int h, enum PixelFormat format);
 
-/**
- * Request an audio samples buffer with a specific set of permissions.
- *
- * @param link           the output link to the filter from which the buffer will
- *                       be requested
- * @param perms          the required access permissions
- * @param nb_samples     the number of samples per channel
- * @return               A reference to the samples. This must be unreferenced with
- *                       avfilter_unref_buffer when you are finished with it.
- */
-AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms,
-                                             int nb_samples);
-
 /**
  * Create an audio buffer reference wrapped around an already
  * allocated samples buffer.
@@ -766,16 +739,6 @@ void avfilter_end_frame(AVFilterLink *link);
  */
 void avfilter_draw_slice(AVFilterLink *link, int y, int h, int slice_dir);
 
-/**
- * Send a buffer of audio samples to the next filter.
- *
- * @param link       the output link over which the audio samples are being sent
- * @param samplesref a reference to the buffer of audio samples being sent. The
- *                   receiving filter will free this reference when it no longer
- *                   needs it or pass it on to the next filter.
- */
-void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref);
-
 /** Initialize the filter system. Register all builtin filters. */
 void avfilter_register_all(void);
 
diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c
index df05c06d63..c25d37f8b3 100644
--- a/libavfilter/defaults.c
+++ b/libavfilter/defaults.c
@@ -22,6 +22,8 @@
 #include "libavutil/audioconvert.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/samplefmt.h"
+
+#include "audio.h"
 #include "avfilter.h"
 #include "internal.h"
 
@@ -57,8 +59,8 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per
     return picref;
 }
 
-AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms,
-                                                     int nb_samples)
+AVFilterBufferRef *ff_default_get_audio_buffer(AVFilterLink *link, int perms,
+                                               int nb_samples)
 {
     AVFilterBufferRef *samplesref = NULL;
     uint8_t **data;
@@ -133,7 +135,7 @@ void avfilter_default_end_frame(AVFilterLink *inlink)
 }
 
 /* FIXME: samplesref is same as link->cur_buf. Need to consider removing the redundant parameter. */
-void avfilter_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref)
+void ff_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref)
 {
     AVFilterLink *outlink = NULL;
 
@@ -141,11 +143,11 @@ void avfilter_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *sa
         outlink = inlink->dst->outputs[0];
 
     if (outlink) {
-        outlink->out_buf = avfilter_default_get_audio_buffer(inlink, AV_PERM_WRITE,
-                                                             samplesref->audio->nb_samples);
+        outlink->out_buf = ff_default_get_audio_buffer(inlink, AV_PERM_WRITE,
+                                                       samplesref->audio->nb_samples);
         outlink->out_buf->pts                = samplesref->pts;
         outlink->out_buf->audio->sample_rate = samplesref->audio->sample_rate;
-        avfilter_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0));
+        ff_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0));
         avfilter_unref_buffer(outlink->out_buf);
         outlink->out_buf = NULL;
     }
@@ -233,9 +235,9 @@ void avfilter_null_end_frame(AVFilterLink *link)
     avfilter_end_frame(link->dst->outputs[0]);
 }
 
-void avfilter_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
+void ff_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
 {
-    avfilter_filter_samples(link->dst->outputs[0], samplesref);
+    ff_filter_samples(link->dst->outputs[0], samplesref);
 }
 
 AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, int perms, int w, int h)
@@ -243,8 +245,8 @@ AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, int perms,
     return avfilter_get_video_buffer(link->dst->outputs[0], perms, w, h);
 }
 
-AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link, int perms,
-                                                  int nb_samples)
+AVFilterBufferRef *ff_null_get_audio_buffer(AVFilterLink *link, int perms,
+                                            int nb_samples)
 {
-    return avfilter_get_audio_buffer(link->dst->outputs[0], perms, nb_samples);
+    return ff_get_audio_buffer(link->dst->outputs[0], perms, nb_samples);
 }

From 0b45334a5880d6e2a4b3642adcd5feab8a27a150 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 7 May 2012 11:21:38 +0200
Subject: [PATCH 07/25] lavfi: move audio-related functions to a separate file.

This is easier to follow than having them randomly scattered in
avfilter.c and defaults.c.
---
 libavfilter/Makefile   |   1 +
 libavfilter/audio.c    | 209 +++++++++++++++++++++++++++++++++++++++++
 libavfilter/avfilter.c | 127 +------------------------
 libavfilter/defaults.c |  64 -------------
 libavfilter/internal.h |   4 +
 5 files changed, 215 insertions(+), 190 deletions(-)
 create mode 100644 libavfilter/audio.c

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index e786b6d2fe..49a47d3e1b 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -10,6 +10,7 @@ HEADERS = avfilter.h                                                    \
           vsrc_buffer.h                                                 \
 
 OBJS = allfilters.o                                                     \
+       audio.o                                                          \
        avfilter.o                                                       \
        avfiltergraph.o                                                  \
        buffersink.o                                                     \
diff --git a/libavfilter/audio.c b/libavfilter/audio.c
new file mode 100644
index 0000000000..3e12c697ce
--- /dev/null
+++ b/libavfilter/audio.c
@@ -0,0 +1,209 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/audioconvert.h"
+
+#include "audio.h"
+#include "avfilter.h"
+#include "internal.h"
+
+AVFilterBufferRef *ff_null_get_audio_buffer(AVFilterLink *link, int perms,
+                                            int nb_samples)
+{
+    return ff_get_audio_buffer(link->dst->outputs[0], perms, nb_samples);
+}
+
+AVFilterBufferRef *ff_default_get_audio_buffer(AVFilterLink *link, int perms,
+                                               int nb_samples)
+{
+    AVFilterBufferRef *samplesref = NULL;
+    uint8_t **data;
+    int planar      = av_sample_fmt_is_planar(link->format);
+    int nb_channels = av_get_channel_layout_nb_channels(link->channel_layout);
+    int planes      = planar ? nb_channels : 1;
+    int linesize;
+
+    if (!(data = av_mallocz(sizeof(*data) * planes)))
+        goto fail;
+
+    if (av_samples_alloc(data, &linesize, nb_channels, nb_samples, link->format, 0) < 0)
+        goto fail;
+
+    samplesref = avfilter_get_audio_buffer_ref_from_arrays(data, linesize, perms,
+                                                           nb_samples, link->format,
+                                                           link->channel_layout);
+    if (!samplesref)
+        goto fail;
+
+    av_freep(&data);
+
+fail:
+    if (data)
+        av_freep(&data[0]);
+    av_freep(&data);
+    return samplesref;
+}
+
+AVFilterBufferRef *ff_get_audio_buffer(AVFilterLink *link, int perms,
+                                       int nb_samples)
+{
+    AVFilterBufferRef *ret = NULL;
+
+    if (link->dstpad->get_audio_buffer)
+        ret = link->dstpad->get_audio_buffer(link, perms, nb_samples);
+
+    if (!ret)
+        ret = ff_default_get_audio_buffer(link, perms, nb_samples);
+
+    if (ret)
+        ret->type = AVMEDIA_TYPE_AUDIO;
+
+    return ret;
+}
+
+AVFilterBufferRef* avfilter_get_audio_buffer_ref_from_arrays(uint8_t **data,
+                                                             int linesize,int perms,
+                                                             int nb_samples,
+                                                             enum AVSampleFormat sample_fmt,
+                                                             uint64_t channel_layout)
+{
+    int planes;
+    AVFilterBuffer    *samples    = av_mallocz(sizeof(*samples));
+    AVFilterBufferRef *samplesref = av_mallocz(sizeof(*samplesref));
+
+    if (!samples || !samplesref)
+        goto fail;
+
+    samplesref->buf         = samples;
+    samplesref->buf->free   = ff_avfilter_default_free_buffer;
+    if (!(samplesref->audio = av_mallocz(sizeof(*samplesref->audio))))
+        goto fail;
+
+    samplesref->audio->nb_samples     = nb_samples;
+    samplesref->audio->channel_layout = channel_layout;
+    samplesref->audio->planar         = av_sample_fmt_is_planar(sample_fmt);
+
+    planes = samplesref->audio->planar ? av_get_channel_layout_nb_channels(channel_layout) : 1;
+
+    /* make sure the buffer gets read permission or it's useless for output */
+    samplesref->perms = perms | AV_PERM_READ;
+
+    samples->refcount  = 1;
+    samplesref->type   = AVMEDIA_TYPE_AUDIO;
+    samplesref->format = sample_fmt;
+
+    memcpy(samples->data, data,
+           FFMIN(FF_ARRAY_ELEMS(samples->data), planes)*sizeof(samples->data[0]));
+    memcpy(samplesref->data, samples->data, sizeof(samples->data));
+
+    samples->linesize[0] = samplesref->linesize[0] = linesize;
+
+    if (planes > FF_ARRAY_ELEMS(samples->data)) {
+        samples->   extended_data = av_mallocz(sizeof(*samples->extended_data) *
+                                               planes);
+        samplesref->extended_data = av_mallocz(sizeof(*samplesref->extended_data) *
+                                               planes);
+
+        if (!samples->extended_data || !samplesref->extended_data)
+            goto fail;
+
+        memcpy(samples->   extended_data, data, sizeof(*data)*planes);
+        memcpy(samplesref->extended_data, data, sizeof(*data)*planes);
+    } else {
+        samples->extended_data    = samples->data;
+        samplesref->extended_data = samplesref->data;
+    }
+
+    return samplesref;
+
+fail:
+    if (samples && samples->extended_data != samples->data)
+        av_freep(&samples->extended_data);
+    if (samplesref) {
+        av_freep(&samplesref->audio);
+        if (samplesref->extended_data != samplesref->data)
+            av_freep(&samplesref->extended_data);
+    }
+    av_freep(&samplesref);
+    av_freep(&samples);
+    return NULL;
+}
+
+void ff_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
+{
+    ff_filter_samples(link->dst->outputs[0], samplesref);
+}
+
+/* FIXME: samplesref is same as link->cur_buf. Need to consider removing the redundant parameter. */
+void ff_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref)
+{
+    AVFilterLink *outlink = NULL;
+
+    if (inlink->dst->output_count)
+        outlink = inlink->dst->outputs[0];
+
+    if (outlink) {
+        outlink->out_buf = ff_default_get_audio_buffer(inlink, AV_PERM_WRITE,
+                                                       samplesref->audio->nb_samples);
+        outlink->out_buf->pts                = samplesref->pts;
+        outlink->out_buf->audio->sample_rate = samplesref->audio->sample_rate;
+        ff_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0));
+        avfilter_unref_buffer(outlink->out_buf);
+        outlink->out_buf = NULL;
+    }
+    avfilter_unref_buffer(samplesref);
+    inlink->cur_buf = NULL;
+}
+
+void ff_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
+{
+    void (*filter_samples)(AVFilterLink *, AVFilterBufferRef *);
+    AVFilterPad *dst = link->dstpad;
+
+    FF_DPRINTF_START(NULL, filter_samples); ff_dlog_link(NULL, link, 1);
+
+    if (!(filter_samples = dst->filter_samples))
+        filter_samples = ff_default_filter_samples;
+
+    /* prepare to copy the samples if the buffer has insufficient permissions */
+    if ((dst->min_perms & samplesref->perms) != dst->min_perms ||
+        dst->rej_perms & samplesref->perms) {
+        int  i, planar = av_sample_fmt_is_planar(samplesref->format);
+        int planes = !planar ? 1:
+                     av_get_channel_layout_nb_channels(samplesref->audio->channel_layout);
+
+        av_log(link->dst, AV_LOG_DEBUG,
+               "Copying audio data in avfilter (have perms %x, need %x, reject %x)\n",
+               samplesref->perms, link->dstpad->min_perms, link->dstpad->rej_perms);
+
+        link->cur_buf = ff_default_get_audio_buffer(link, dst->min_perms,
+                                                    samplesref->audio->nb_samples);
+        link->cur_buf->pts                = samplesref->pts;
+        link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate;
+
+        /* Copy actual data into new samples buffer */
+        for (i = 0; i < planes; i++)
+            memcpy(link->cur_buf->extended_data[i], samplesref->extended_data[i], samplesref->linesize[0]);
+
+        avfilter_unref_buffer(samplesref);
+    } else
+        link->cur_buf = samplesref;
+
+    filter_samples(link, link->cur_buf);
+}
+
diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index 6a530f8fd6..bd898e37ab 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -27,7 +27,6 @@
 #include "libavutil/imgutils.h"
 #include "libavcodec/avcodec.h"
 
-#include "audio.h"
 #include "avfilter.h"
 #include "internal.h"
 
@@ -277,7 +276,7 @@ static void ff_dlog_ref(void *ctx, AVFilterBufferRef *ref, int end)
     av_dlog(ctx, "]%s", end ? "\n" : "");
 }
 
-static void ff_dlog_link(void *ctx, AVFilterLink *link, int end)
+void ff_dlog_link(void *ctx, AVFilterLink *link, int end)
 {
     if (link->type == AVMEDIA_TYPE_VIDEO) {
         av_dlog(ctx,
@@ -301,8 +300,6 @@ static void ff_dlog_link(void *ctx, AVFilterLink *link, int end)
     }
 }
 
-#define FF_DPRINTF_START(ctx, func) av_dlog(NULL, "%-16s: ", #func)
-
 AVFilterBufferRef *avfilter_get_video_buffer(AVFilterLink *link, int perms, int w, int h)
 {
     AVFilterBufferRef *ret = NULL;
@@ -368,91 +365,6 @@ fail:
     return NULL;
 }
 
-AVFilterBufferRef *ff_get_audio_buffer(AVFilterLink *link, int perms,
-                                       int nb_samples)
-{
-    AVFilterBufferRef *ret = NULL;
-
-    if (link->dstpad->get_audio_buffer)
-        ret = link->dstpad->get_audio_buffer(link, perms, nb_samples);
-
-    if (!ret)
-        ret = ff_default_get_audio_buffer(link, perms, nb_samples);
-
-    if (ret)
-        ret->type = AVMEDIA_TYPE_AUDIO;
-
-    return ret;
-}
-
-AVFilterBufferRef *avfilter_get_audio_buffer_ref_from_arrays(uint8_t **data,
-                                                             int linesize, int perms,
-                                                             int nb_samples,
-                                                             enum AVSampleFormat sample_fmt,
-                                                             uint64_t channel_layout)
-{
-    int planes;
-    AVFilterBuffer    *samples    = av_mallocz(sizeof(*samples));
-    AVFilterBufferRef *samplesref = av_mallocz(sizeof(*samplesref));
-
-    if (!samples || !samplesref)
-        goto fail;
-
-    samplesref->buf         = samples;
-    samplesref->buf->free   = ff_avfilter_default_free_buffer;
-    if (!(samplesref->audio = av_mallocz(sizeof(*samplesref->audio))))
-        goto fail;
-
-    samplesref->audio->nb_samples     = nb_samples;
-    samplesref->audio->channel_layout = channel_layout;
-    samplesref->audio->planar         = av_sample_fmt_is_planar(sample_fmt);
-
-    planes = samplesref->audio->planar ? av_get_channel_layout_nb_channels(channel_layout) : 1;
-
-    /* make sure the buffer gets read permission or it's useless for output */
-    samplesref->perms = perms | AV_PERM_READ;
-
-    samples->refcount  = 1;
-    samplesref->type   = AVMEDIA_TYPE_AUDIO;
-    samplesref->format = sample_fmt;
-
-    memcpy(samples->data, data,
-           FFMIN(FF_ARRAY_ELEMS(samples->data), planes)*sizeof(samples->data[0]));
-    memcpy(samplesref->data, samples->data, sizeof(samples->data));
-
-    samples->linesize[0] = samplesref->linesize[0] = linesize;
-
-    if (planes > FF_ARRAY_ELEMS(samples->data)) {
-        samples->   extended_data = av_mallocz(sizeof(*samples->extended_data) *
-                                               planes);
-        samplesref->extended_data = av_mallocz(sizeof(*samplesref->extended_data) *
-                                               planes);
-
-        if (!samples->extended_data || !samplesref->extended_data)
-            goto fail;
-
-        memcpy(samples->   extended_data, data, sizeof(*data)*planes);
-        memcpy(samplesref->extended_data, data, sizeof(*data)*planes);
-    } else {
-        samples->extended_data    = samples->data;
-        samplesref->extended_data = samplesref->data;
-    }
-
-    return samplesref;
-
-fail:
-    if (samples && samples->extended_data != samples->data)
-        av_freep(&samples->extended_data);
-    if (samplesref) {
-        av_freep(&samplesref->audio);
-        if (samplesref->extended_data != samplesref->data)
-            av_freep(&samplesref->extended_data);
-    }
-    av_freep(&samplesref);
-    av_freep(&samples);
-    return NULL;
-}
-
 int avfilter_request_frame(AVFilterLink *link)
 {
     FF_DPRINTF_START(NULL, request_frame); ff_dlog_link(NULL, link, 1);
@@ -572,43 +484,6 @@ void avfilter_draw_slice(AVFilterLink *link, int y, int h, int slice_dir)
     draw_slice(link, y, h, slice_dir);
 }
 
-void ff_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
-{
-    void (*filter_samples)(AVFilterLink *, AVFilterBufferRef *);
-    AVFilterPad *dst = link->dstpad;
-
-    FF_DPRINTF_START(NULL, filter_samples); ff_dlog_link(NULL, link, 1);
-
-    if (!(filter_samples = dst->filter_samples))
-        filter_samples = ff_default_filter_samples;
-
-    /* prepare to copy the samples if the buffer has insufficient permissions */
-    if ((dst->min_perms & samplesref->perms) != dst->min_perms ||
-        dst->rej_perms & samplesref->perms) {
-        int  i, planar = av_sample_fmt_is_planar(samplesref->format);
-        int planes = !planar ? 1:
-                     av_get_channel_layout_nb_channels(samplesref->audio->channel_layout);
-
-        av_log(link->dst, AV_LOG_DEBUG,
-               "Copying audio data in avfilter (have perms %x, need %x, reject %x)\n",
-               samplesref->perms, link->dstpad->min_perms, link->dstpad->rej_perms);
-
-        link->cur_buf = ff_default_get_audio_buffer(link, dst->min_perms,
-                                                    samplesref->audio->nb_samples);
-        link->cur_buf->pts                = samplesref->pts;
-        link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate;
-
-        /* Copy actual data into new samples buffer */
-        for (i = 0; i < planes; i++)
-            memcpy(link->cur_buf->extended_data[i], samplesref->extended_data[i], samplesref->linesize[0]);
-
-        avfilter_unref_buffer(samplesref);
-    } else
-        link->cur_buf = samplesref;
-
-    filter_samples(link, link->cur_buf);
-}
-
 #define MAX_REGISTERED_AVFILTERS_NB 64
 
 static AVFilter *registered_avfilters[MAX_REGISTERED_AVFILTERS_NB + 1];
diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c
index c25d37f8b3..caf6442974 100644
--- a/libavfilter/defaults.c
+++ b/libavfilter/defaults.c
@@ -23,7 +23,6 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/samplefmt.h"
 
-#include "audio.h"
 #include "avfilter.h"
 #include "internal.h"
 
@@ -59,37 +58,6 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per
     return picref;
 }
 
-AVFilterBufferRef *ff_default_get_audio_buffer(AVFilterLink *link, int perms,
-                                               int nb_samples)
-{
-    AVFilterBufferRef *samplesref = NULL;
-    uint8_t **data;
-    int planar      = av_sample_fmt_is_planar(link->format);
-    int nb_channels = av_get_channel_layout_nb_channels(link->channel_layout);
-    int planes      = planar ? nb_channels : 1;
-    int linesize;
-
-    if (!(data = av_mallocz(sizeof(*data) * planes)))
-        goto fail;
-
-    if (av_samples_alloc(data, &linesize, nb_channels, nb_samples, link->format, 0) < 0)
-        goto fail;
-
-    samplesref = avfilter_get_audio_buffer_ref_from_arrays(data, linesize, perms,
-                                                           nb_samples, link->format,
-                                                           link->channel_layout);
-    if (!samplesref)
-        goto fail;
-
-    av_freep(&data);
-
-fail:
-    if (data)
-        av_freep(&data[0]);
-    av_freep(&data);
-    return samplesref;
-}
-
 void avfilter_default_start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref)
 {
     AVFilterLink *outlink = NULL;
@@ -134,27 +102,6 @@ void avfilter_default_end_frame(AVFilterLink *inlink)
     }
 }
 
-/* FIXME: samplesref is same as link->cur_buf. Need to consider removing the redundant parameter. */
-void ff_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref)
-{
-    AVFilterLink *outlink = NULL;
-
-    if (inlink->dst->output_count)
-        outlink = inlink->dst->outputs[0];
-
-    if (outlink) {
-        outlink->out_buf = ff_default_get_audio_buffer(inlink, AV_PERM_WRITE,
-                                                       samplesref->audio->nb_samples);
-        outlink->out_buf->pts                = samplesref->pts;
-        outlink->out_buf->audio->sample_rate = samplesref->audio->sample_rate;
-        ff_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0));
-        avfilter_unref_buffer(outlink->out_buf);
-        outlink->out_buf = NULL;
-    }
-    avfilter_unref_buffer(samplesref);
-    inlink->cur_buf = NULL;
-}
-
 /**
  * default config_link() implementation for output video links to simplify
  * the implementation of one input one output video filters */
@@ -235,18 +182,7 @@ void avfilter_null_end_frame(AVFilterLink *link)
     avfilter_end_frame(link->dst->outputs[0]);
 }
 
-void ff_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
-{
-    ff_filter_samples(link->dst->outputs[0], samplesref);
-}
-
 AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, int perms, int w, int h)
 {
     return avfilter_get_video_buffer(link->dst->outputs[0], perms, w, h);
 }
-
-AVFilterBufferRef *ff_null_get_audio_buffer(AVFilterLink *link, int perms,
-                                            int nb_samples)
-{
-    return ff_get_audio_buffer(link->dst->outputs[0], perms, nb_samples);
-}
diff --git a/libavfilter/internal.h b/libavfilter/internal.h
index 0630e9b7d6..a5b3f788da 100644
--- a/libavfilter/internal.h
+++ b/libavfilter/internal.h
@@ -55,4 +55,8 @@ void ff_avfilter_default_free_buffer(AVFilterBuffer *buf);
 /** Tell is a format is contained in the provided list terminated by -1. */
 int ff_fmt_is_in(int fmt, const int *fmts);
 
+#define FF_DPRINTF_START(ctx, func) av_dlog(NULL, "%-16s: ", #func)
+
+void ff_dlog_link(void *ctx, AVFilterLink *link, int end);
+
 #endif /* AVFILTER_INTERNAL_H */

From 5cc6d5244d4ec89b3ac855abff4a3d19caee22f1 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Thu, 3 May 2012 15:23:32 -0400
Subject: [PATCH 08/25] lavr: replace the SSE version of
 ff_conv_fltp_to_flt_6ch() with SSE4 and AVX

The current SSE version is slower than the MMX version on Athlon64 and Sandy
Bridge, but the SSE4 and AVX versions are faster on Sandy Bridge.
---
 libavresample/x86/audio_convert.asm    | 30 ++++++++++++++------------
 libavresample/x86/audio_convert_init.c | 13 +++++++----
 libavutil/x86/x86util.asm              |  7 +++---
 3 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm
index 809c5d1378..ba59f3314f 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -54,26 +54,24 @@ cglobal conv_fltp_to_flt_6ch, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
     mova      m3, [srcq+src3q]
     mova      m4, [srcq+src4q]
     mova      m5, [srcq+src5q]
-%if cpuflag(sse)
+%if cpuflag(sse4)
     SBUTTERFLYPS 0, 1, 6
     SBUTTERFLYPS 2, 3, 6
     SBUTTERFLYPS 4, 5, 6
 
-    movaps    m6, m4
-    shufps    m4, m0, q3210
+    blendps   m6, m4, m0, 1100b
     movlhps   m0, m2
-    movhlps   m6, m2
-    movaps [dstq   ], m0
-    movaps [dstq+16], m4
-    movaps [dstq+32], m6
-
-    movaps    m6, m5
-    shufps    m5, m1, q3210
+    movhlps   m4, m2
+    blendps   m2, m5, m1, 1100b
     movlhps   m1, m3
-    movhlps   m6, m3
+    movhlps   m5, m3
+
+    movaps [dstq   ], m0
+    movaps [dstq+16], m6
+    movaps [dstq+32], m4
     movaps [dstq+48], m1
-    movaps [dstq+64], m5
-    movaps [dstq+80], m6
+    movaps [dstq+64], m2
+    movaps [dstq+80], m5
 %else ; mmx
     SBUTTERFLY dq, 0, 1, 6
     SBUTTERFLY dq, 2, 3, 6
@@ -100,5 +98,9 @@ cglobal conv_fltp_to_flt_6ch, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
 
 INIT_MMX mmx
 CONV_FLTP_TO_FLT_6CH
-INIT_XMM sse
+INIT_XMM sse4
 CONV_FLTP_TO_FLT_6CH
+%if HAVE_AVX
+INIT_XMM avx
+CONV_FLTP_TO_FLT_6CH
+%endif
diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c
index 6883f10a21..206aede751 100644
--- a/libavresample/x86/audio_convert_init.c
+++ b/libavresample/x86/audio_convert_init.c
@@ -22,8 +22,9 @@
 #include "libavutil/cpu.h"
 #include "libavresample/audio_convert.h"
 
-extern void ff_conv_fltp_to_flt_6ch_mmx(float *dst, float *const *src, int len);
-extern void ff_conv_fltp_to_flt_6ch_sse(float *dst, float *const *src, int len);
+extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len);
+extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len);
+extern void ff_conv_fltp_to_flt_6ch_avx (float *dst, float *const *src, int len);
 
 av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
 {
@@ -34,9 +35,13 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
         ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
                                   6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx);
     }
-    if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
+    if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
         ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
-                                  6, 16, 4, "SSE", ff_conv_fltp_to_flt_6ch_sse);
+                                  6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4);
+    }
+    if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
+        ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
+                                  6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx);
     }
 #endif
 }
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 55f4a936e2..508f24e2b5 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -42,10 +42,9 @@
 %endmacro
 
 %macro SBUTTERFLYPS 3
-    movaps   m%3, m%1
-    unpcklps m%1, m%2
-    unpckhps m%3, m%2
-    SWAP %2, %3
+    unpcklps m%3, m%1, m%2
+    unpckhps m%1, m%1, m%2
+    SWAP %1, %3, %2
 %endmacro
 
 %macro TRANSPOSE4x4B 5

From 59cbc4eee2edcfd0a89086237cd7a54e47f7c73b Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 23 Feb 2012 11:34:28 +0100
Subject: [PATCH 09/25] mov: make one comment slightly more specific

---
 libavformat/mov.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index ad1340a79d..29f01c3f72 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -994,7 +994,7 @@ static int mov_read_glbl(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return AVERROR_INVALIDDATA;
 
     if (atom.size >= 10) {
-        // Broken files created by legacy versions of Libav and FFmpeg will
+        // Broken files created by legacy versions of libavformat will
         // wrap a whole fiel atom inside of a glbl atom.
         unsigned size = avio_rb32(pb);
         unsigned type = avio_rl32(pb);

From db1e403cfbfdba00826c458fa80c4cd83d5499ec Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 8 Apr 2012 13:05:17 +0200
Subject: [PATCH 10/25] vcr1: cosmetics: K&R prettyprinting, typos,
 parentheses, dead code, comments

---
 libavcodec/vcr1.c | 142 +++++++++++++++++++++++-----------------------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/libavcodec/vcr1.c b/libavcodec/vcr1.c
index 23b06479ae..681150d80e 100644
--- a/libavcodec/vcr1.c
+++ b/libavcodec/vcr1.c
@@ -21,92 +21,88 @@
 
 /**
  * @file
- * ati vcr1 codec.
+ * ATI VCR1 codec
  */
 
 #include "avcodec.h"
 #include "dsputil.h"
 
-//#undef NDEBUG
-//#include <assert.h>
-
 /* Disable the encoder. */
 #undef CONFIG_VCR1_ENCODER
 #define CONFIG_VCR1_ENCODER 0
 
-typedef struct VCR1Context{
+typedef struct VCR1Context {
     AVCodecContext *avctx;
     AVFrame picture;
     int delta[16];
     int offset[4];
 } VCR1Context;
 
-static int decode_frame(AVCodecContext *avctx,
-                        void *data, int *data_size,
-                        AVPacket *avpkt)
+static int decode_frame(AVCodecContext *avctx, void *data,
+                        int *data_size, AVPacket *avpkt)
 {
-    const uint8_t *buf = avpkt->data;
-    int buf_size = avpkt->size;
-    VCR1Context * const a = avctx->priv_data;
-    AVFrame *picture = data;
-    AVFrame * const p = &a->picture;
-    const uint8_t *bytestream= buf;
+    const uint8_t *buf        = avpkt->data;
+    int buf_size              = avpkt->size;
+    VCR1Context *const a      = avctx->priv_data;
+    AVFrame *picture          = data;
+    AVFrame *const p          = &a->picture;
+    const uint8_t *bytestream = buf;
     int i, x, y;
 
-    if(p->data[0])
+    if (p->data[0])
         avctx->release_buffer(avctx, p);
 
-    p->reference= 0;
-    if(avctx->get_buffer(avctx, p) < 0){
+    p->reference = 0;
+    if (avctx->get_buffer(avctx, p) < 0) {
         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
         return -1;
     }
-    p->pict_type= AV_PICTURE_TYPE_I;
-    p->key_frame= 1;
+    p->pict_type = AV_PICTURE_TYPE_I;
+    p->key_frame = 1;
 
-    for(i=0; i<16; i++){
-        a->delta[i]= *(bytestream++);
+    for (i = 0; i < 16; i++) {
+        a->delta[i] = *bytestream++;
         bytestream++;
     }
 
-    for(y=0; y<avctx->height; y++){
+    for (y = 0; y < avctx->height; y++) {
         int offset;
-        uint8_t *luma= &a->picture.data[0][ y*a->picture.linesize[0] ];
+        uint8_t *luma = &a->picture.data[0][y * a->picture.linesize[0]];
 
-        if((y&3) == 0){
-            uint8_t *cb= &a->picture.data[1][ (y>>2)*a->picture.linesize[1] ];
-            uint8_t *cr= &a->picture.data[2][ (y>>2)*a->picture.linesize[2] ];
+        if ((y & 3) == 0) {
+            uint8_t *cb = &a->picture.data[1][(y >> 2) * a->picture.linesize[1]];
+            uint8_t *cr = &a->picture.data[2][(y >> 2) * a->picture.linesize[2]];
 
-            for(i=0; i<4; i++)
-                a->offset[i]= *(bytestream++);
+            for (i = 0; i < 4; i++)
+                a->offset[i] = *bytestream++;
 
-            offset= a->offset[0] - a->delta[ bytestream[2]&0xF ];
-            for(x=0; x<avctx->width; x+=4){
-                luma[0]=( offset += a->delta[ bytestream[2]&0xF ]);
-                luma[1]=( offset += a->delta[ bytestream[2]>>4  ]);
-                luma[2]=( offset += a->delta[ bytestream[0]&0xF ]);
-                luma[3]=( offset += a->delta[ bytestream[0]>>4  ]);
-                luma += 4;
+            offset = a->offset[0] - a->delta[bytestream[2] & 0xF];
+            for (x = 0; x < avctx->width; x += 4) {
+                luma[0]     = offset += a->delta[bytestream[2] & 0xF];
+                luma[1]     = offset += a->delta[bytestream[2] >>  4];
+                luma[2]     = offset += a->delta[bytestream[0] & 0xF];
+                luma[3]     = offset += a->delta[bytestream[0] >>  4];
+                luma       += 4;
 
-                *(cb++) = bytestream[3];
-                *(cr++) = bytestream[1];
+                *cb++       = bytestream[3];
+                *cr++       = bytestream[1];
 
-                bytestream+= 4;
+                bytestream += 4;
             }
-        }else{
-            offset= a->offset[y&3] - a->delta[ bytestream[2]&0xF ];
+        } else {
+            offset = a->offset[y & 3] - a->delta[bytestream[2] & 0xF];
 
-            for(x=0; x<avctx->width; x+=8){
-                luma[0]=( offset += a->delta[ bytestream[2]&0xF ]);
-                luma[1]=( offset += a->delta[ bytestream[2]>>4  ]);
-                luma[2]=( offset += a->delta[ bytestream[3]&0xF ]);
-                luma[3]=( offset += a->delta[ bytestream[3]>>4  ]);
-                luma[4]=( offset += a->delta[ bytestream[0]&0xF ]);
-                luma[5]=( offset += a->delta[ bytestream[0]>>4  ]);
-                luma[6]=( offset += a->delta[ bytestream[1]&0xF ]);
-                luma[7]=( offset += a->delta[ bytestream[1]>>4  ]);
-                luma += 8;
-                bytestream+= 4;
+            for (x = 0; x < avctx->width; x += 8) {
+                luma[0]     = offset += a->delta[bytestream[2] & 0xF];
+                luma[1]     = offset += a->delta[bytestream[2] >>  4];
+                luma[2]     = offset += a->delta[bytestream[3] & 0xF];
+                luma[3]     = offset += a->delta[bytestream[3] >>  4];
+                luma[4]     = offset += a->delta[bytestream[0] & 0xF];
+                luma[5]     = offset += a->delta[bytestream[0] >>  4];
+                luma[6]     = offset += a->delta[bytestream[1] & 0xF];
+                luma[7]     = offset += a->delta[bytestream[1] >>  4];
+                luma       += 8;
+                bytestream += 4;
             }
         }
     }
@@ -118,43 +114,47 @@ static int decode_frame(AVCodecContext *avctx,
 }
 
 #if CONFIG_VCR1_ENCODER
-static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
-    VCR1Context * const a = avctx->priv_data;
-    AVFrame *pict = data;
-    AVFrame * const p = &a->picture;
+static int encode_frame(AVCodecContext *avctx, unsigned char *buf,
+                        int buf_size, void *data)
+{
+    VCR1Context *const a = avctx->priv_data;
+    AVFrame *pict        = data;
+    AVFrame *const p     = &a->picture;
     int size;
 
-    *p = *pict;
-    p->pict_type= AV_PICTURE_TYPE_I;
-    p->key_frame= 1;
+    *p           = *pict;
+    p->pict_type = AV_PICTURE_TYPE_I;
+    p->key_frame = 1;
 
     avpriv_align_put_bits(&a->pb);
-    while(get_bit_count(&a->pb)&31)
+    while (get_bit_count(&a->pb) & 31)
         put_bits(&a->pb, 8, 0);
 
-    size= get_bit_count(&a->pb)/32;
+    size = get_bit_count(&a->pb) / 32;
 
-    return size*4;
+    return size * 4;
 }
 #endif
 
-static av_cold void common_init(AVCodecContext *avctx){
-    VCR1Context * const a = avctx->priv_data;
+static av_cold void common_init(AVCodecContext *avctx)
+{
+    VCR1Context *const a = avctx->priv_data;
 
     avctx->coded_frame = &a->picture;
-    a->avctx= avctx;
+    a->avctx           = avctx;
 }
 
-static av_cold int decode_init(AVCodecContext *avctx){
-
+static av_cold int decode_init(AVCodecContext *avctx)
+{
     common_init(avctx);
 
-    avctx->pix_fmt= PIX_FMT_YUV410P;
+    avctx->pix_fmt = PIX_FMT_YUV410P;
 
     return 0;
 }
 
-static av_cold int decode_end(AVCodecContext *avctx){
+static av_cold int decode_end(AVCodecContext *avctx)
+{
     VCR1Context *s = avctx->priv_data;
 
     if (s->picture.data[0])
@@ -164,8 +164,8 @@ static av_cold int decode_end(AVCodecContext *avctx){
 }
 
 #if CONFIG_VCR1_ENCODER
-static av_cold int encode_init(AVCodecContext *avctx){
-
+static av_cold int encode_init(AVCodecContext *avctx)
+{
     common_init(avctx);
 
     return 0;
@@ -194,4 +194,4 @@ AVCodec ff_vcr1_encoder = {
     .encode         = encode_frame,
     .long_name      = NULL_IF_CONFIG_SMALL("ATI VCR1"),
 };
-#endif
+#endif /* CONFIG_VCR1_ENCODER */

From 51c4d870936976039807bbc881850cf6491fc89a Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 9 Apr 2012 18:11:35 +0200
Subject: [PATCH 11/25] vcr1: group encoder code together to save #ifdefs

---
 libavcodec/vcr1.c | 90 ++++++++++++++++++++++-------------------------
 1 file changed, 43 insertions(+), 47 deletions(-)

diff --git a/libavcodec/vcr1.c b/libavcodec/vcr1.c
index 681150d80e..a745e58300 100644
--- a/libavcodec/vcr1.c
+++ b/libavcodec/vcr1.c
@@ -27,10 +27,6 @@
 #include "avcodec.h"
 #include "dsputil.h"
 
-/* Disable the encoder. */
-#undef CONFIG_VCR1_ENCODER
-#define CONFIG_VCR1_ENCODER 0
-
 typedef struct VCR1Context {
     AVCodecContext *avctx;
     AVFrame picture;
@@ -38,6 +34,33 @@ typedef struct VCR1Context {
     int offset[4];
 } VCR1Context;
 
+static av_cold void common_init(AVCodecContext *avctx)
+{
+    VCR1Context *const a = avctx->priv_data;
+
+    avctx->coded_frame = &a->picture;
+    a->avctx           = avctx;
+}
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    common_init(avctx);
+
+    avctx->pix_fmt = PIX_FMT_YUV410P;
+
+    return 0;
+}
+
+static av_cold int decode_end(AVCodecContext *avctx)
+{
+    VCR1Context *s = avctx->priv_data;
+
+    if (s->picture.data[0])
+        avctx->release_buffer(avctx, &s->picture);
+
+    return 0;
+}
+
 static int decode_frame(AVCodecContext *avctx, void *data,
                         int *data_size, AVPacket *avpkt)
 {
@@ -113,6 +136,22 @@ static int decode_frame(AVCodecContext *avctx, void *data,
     return buf_size;
 }
 
+AVCodec ff_vcr1_decoder = {
+    .name           = "vcr1",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = CODEC_ID_VCR1,
+    .priv_data_size = sizeof(VCR1Context),
+    .init           = decode_init,
+    .close          = decode_end,
+    .decode         = decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+    .long_name      = NULL_IF_CONFIG_SMALL("ATI VCR1"),
+};
+
+/* Disable the encoder. */
+#undef CONFIG_VCR1_ENCODER
+#define CONFIG_VCR1_ENCODER 0
+
 #if CONFIG_VCR1_ENCODER
 static int encode_frame(AVCodecContext *avctx, unsigned char *buf,
                         int buf_size, void *data)
@@ -134,57 +173,14 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf,
 
     return size * 4;
 }
-#endif
 
-static av_cold void common_init(AVCodecContext *avctx)
-{
-    VCR1Context *const a = avctx->priv_data;
-
-    avctx->coded_frame = &a->picture;
-    a->avctx           = avctx;
-}
-
-static av_cold int decode_init(AVCodecContext *avctx)
-{
-    common_init(avctx);
-
-    avctx->pix_fmt = PIX_FMT_YUV410P;
-
-    return 0;
-}
-
-static av_cold int decode_end(AVCodecContext *avctx)
-{
-    VCR1Context *s = avctx->priv_data;
-
-    if (s->picture.data[0])
-        avctx->release_buffer(avctx, &s->picture);
-
-    return 0;
-}
-
-#if CONFIG_VCR1_ENCODER
 static av_cold int encode_init(AVCodecContext *avctx)
 {
     common_init(avctx);
 
     return 0;
 }
-#endif
 
-AVCodec ff_vcr1_decoder = {
-    .name           = "vcr1",
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = CODEC_ID_VCR1,
-    .priv_data_size = sizeof(VCR1Context),
-    .init           = decode_init,
-    .close          = decode_end,
-    .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
-    .long_name      = NULL_IF_CONFIG_SMALL("ATI VCR1"),
-};
-
-#if CONFIG_VCR1_ENCODER
 AVCodec ff_vcr1_encoder = {
     .name           = "vcr1",
     .type           = AVMEDIA_TYPE_VIDEO,

From eeeefd500124a1ec3a7ca75d9c071c5517c3d153 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 12 Apr 2012 18:52:16 +0200
Subject: [PATCH 12/25] vcr1: drop pointless write-only AVCodecContext member
 from VCR1Context

---
 libavcodec/vcr1.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavcodec/vcr1.c b/libavcodec/vcr1.c
index a745e58300..e3e45cc6a5 100644
--- a/libavcodec/vcr1.c
+++ b/libavcodec/vcr1.c
@@ -28,7 +28,6 @@
 #include "dsputil.h"
 
 typedef struct VCR1Context {
-    AVCodecContext *avctx;
     AVFrame picture;
     int delta[16];
     int offset[4];
@@ -39,7 +38,6 @@ static av_cold void common_init(AVCodecContext *avctx)
     VCR1Context *const a = avctx->priv_data;
 
     avctx->coded_frame = &a->picture;
-    a->avctx           = avctx;
 }
 
 static av_cold int decode_init(AVCodecContext *avctx)

From 8ae19143277e8c740e1cdeb280cfdf4c47a3eb23 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 12 Apr 2012 18:55:25 +0200
Subject: [PATCH 13/25] vcr1enc: drop pointless empty encode_init() wrapper
 function

---
 libavcodec/vcr1.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/libavcodec/vcr1.c b/libavcodec/vcr1.c
index e3e45cc6a5..7edd801b8f 100644
--- a/libavcodec/vcr1.c
+++ b/libavcodec/vcr1.c
@@ -172,19 +172,12 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf,
     return size * 4;
 }
 
-static av_cold int encode_init(AVCodecContext *avctx)
-{
-    common_init(avctx);
-
-    return 0;
-}
-
 AVCodec ff_vcr1_encoder = {
     .name           = "vcr1",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = CODEC_ID_VCR1,
     .priv_data_size = sizeof(VCR1Context),
-    .init           = encode_init,
+    .init           = common_init,
     .encode         = encode_frame,
     .long_name      = NULL_IF_CONFIG_SMALL("ATI VCR1"),
 };

From 779222dbfe19ebe731dcdff460e1b1807b9285a7 Mon Sep 17 00:00:00 2001
From: Sean McGovern <gseanmcg@gmail.com>
Date: Wed, 9 May 2012 02:13:15 -0400
Subject: [PATCH 14/25] configure: Add _XOPEN_SOURCE=600 to Solaris
 preprocessor flags.

This is needed to expose some networking APIs.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 4b1e55169f..79b948e4f3 100755
--- a/configure
+++ b/configure
@@ -2452,7 +2452,7 @@ case $target_os in
         SHFLAGS='-shared -Wl,-h,$$(@F)'
         enabled x86 && SHFLAGS="-mimpure-text $SHFLAGS"
         network_extralibs="-lsocket -lnsl"
-        add_cppflags -D__EXTENSIONS__
+        add_cppflags -D__EXTENSIONS__ -D_XOPEN_SOURCE=600
         # When using suncc to build, the Solaris linker will mark
         # an executable with each instruction set encountered by
         # the Solaris assembler.  As our libraries contain their own

From ded69c5e21758e2e4a9a7e6ae0fec2d5ad312ba9 Mon Sep 17 00:00:00 2001
From: Sean McGovern <gseanmcg@gmail.com>
Date: Wed, 9 May 2012 02:13:16 -0400
Subject: [PATCH 15/25] sctp: be consistent with socket option level

Replace SOL_SCTP by the more portable IPPROTO_SCTP.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavformat/sctp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/sctp.c b/libavformat/sctp.c
index 3823e03ebe..817b0049a9 100644
--- a/libavformat/sctp.c
+++ b/libavformat/sctp.c
@@ -227,7 +227,7 @@ static int sctp_open(URLContext *h, const char *uri, int flags)
     if (s->max_streams) {
         initparams.sinit_max_instreams = s->max_streams;
         initparams.sinit_num_ostreams  = s->max_streams;
-        if (setsockopt(fd, SOL_SCTP, SCTP_INITMSG, &initparams,
+        if (setsockopt(fd, IPPROTO_SCTP, SCTP_INITMSG, &initparams,
                        sizeof(initparams)) < 0)
             av_log(h, AV_LOG_ERROR,
                    "SCTP ERROR: Unable to initialize socket max streams %d\n",

From 7cf78b3476d77888caa059398078640fb821170e Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Wed, 11 Apr 2012 10:31:02 +0200
Subject: [PATCH 16/25] cmdutils: Add fallback case to switch in
 check_stream_specifier().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the warning:
cmdutils.c:897: warning: ‘type’ may be used uninitialized in this function
---
 cmdutils.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cmdutils.c b/cmdutils.c
index 3cd11ca241..bd07d2ad89 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -34,6 +34,7 @@
 #include "libavdevice/avdevice.h"
 #include "libavresample/avresample.h"
 #include "libswscale/swscale.h"
+#include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/parseutils.h"
@@ -905,6 +906,7 @@ int check_stream_specifier(AVFormatContext *s, AVStream *st, const char *spec)
         case 's': type = AVMEDIA_TYPE_SUBTITLE;   break;
         case 'd': type = AVMEDIA_TYPE_DATA;       break;
         case 't': type = AVMEDIA_TYPE_ATTACHMENT; break;
+        default:  av_assert0(0);
         }
         if (type != st->codec->codec_type)
             return 0;

From 55c9320e0638349dbea2f8a658ecd3f48d1a80f1 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Wed, 9 May 2012 02:12:14 +0200
Subject: [PATCH 17/25] rtmp: Support 'rtmp_tcurl', an option which overrides
 the URL of the target stream.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtmpproto.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 9c82b462ff..5d7f4d185d 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -44,6 +44,7 @@
 
 #define APP_MAX_LENGTH 128
 #define PLAYPATH_MAX_LENGTH 256
+#define TCURL_MAX_LENGTH 512
 
 /** RTMP protocol handler state */
 typedef enum {
@@ -82,6 +83,7 @@ typedef struct RTMPContext {
     int           flv_header_bytes;           ///< number of initialized bytes in flv_header
     int           nb_invokes;                 ///< keeps track of invoke messages
     int           create_stream_invoke;       ///< invoke id for the create stream command
+    char*         tcurl;                      ///< url of the target stream
 } RTMPContext;
 
 #define PLAYER_KEY_OPEN_PART_LEN 30   ///< length of partial key used for first client digest signing
@@ -110,17 +112,14 @@ static const uint8_t rtmp_server_key[] = {
 /**
  * Generate 'connect' call and send it to the server.
  */
-static void gen_connect(URLContext *s, RTMPContext *rt, const char *proto,
-                        const char *host, int port)
+static void gen_connect(URLContext *s, RTMPContext *rt)
 {
     RTMPPacket pkt;
     uint8_t ver[64], *p;
-    char tcurl[512];
 
     ff_rtmp_packet_create(&pkt, RTMP_SYSTEM_CHANNEL, RTMP_PT_INVOKE, 0, 4096);
     p = pkt.data;
 
-    ff_url_join(tcurl, sizeof(tcurl), proto, NULL, host, port, "/%s", rt->app);
     ff_amf_write_string(&p, "connect");
     ff_amf_write_number(&p, ++rt->nb_invokes);
     ff_amf_write_object_start(&p);
@@ -138,7 +137,7 @@ static void gen_connect(URLContext *s, RTMPContext *rt, const char *proto,
     ff_amf_write_field_name(&p, "flashVer");
     ff_amf_write_string(&p, ver);
     ff_amf_write_field_name(&p, "tcUrl");
-    ff_amf_write_string(&p, tcurl);
+    ff_amf_write_string(&p, rt->tcurl);
     if (rt->is_input) {
         ff_amf_write_field_name(&p, "fpad");
         ff_amf_write_bool(&p, 0);
@@ -910,13 +909,19 @@ static int rtmp_open(URLContext *s, const char *uri, int flags)
         strncat(rt->playpath, fname, PLAYPATH_MAX_LENGTH - 5);
     }
 
+    if (!rt->tcurl) {
+        rt->tcurl = av_malloc(TCURL_MAX_LENGTH);
+        ff_url_join(rt->tcurl, TCURL_MAX_LENGTH, proto, NULL, hostname,
+                    port, "/%s", rt->app);
+    }
+
     rt->client_report_size = 1048576;
     rt->bytes_read = 0;
     rt->last_bytes_read = 0;
 
     av_log(s, AV_LOG_DEBUG, "Proto = %s, path = %s, app = %s, fname = %s\n",
            proto, path, rt->app, rt->playpath);
-    gen_connect(s, rt, proto, hostname, port);
+    gen_connect(s, rt);
 
     do {
         ret = get_packet(s, 1);
@@ -1057,6 +1062,7 @@ static const AVOption rtmp_options[] = {
     {"live", "live stream", 0, AV_OPT_TYPE_CONST, {-1}, 0, 0, DEC, "rtmp_live"},
     {"recorded", "recorded stream", 0, AV_OPT_TYPE_CONST, {0}, 0, 0, DEC, "rtmp_live"},
     {"rtmp_playpath", "Stream identifier to play or to publish", OFFSET(playpath), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC},
+    {"rtmp_tcurl", "URL of the target stream. Defaults to rtmp://host[:port]/app.", OFFSET(tcurl), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC},
     { NULL },
 };
 

From e64673e4f4f7acefe5f60f35fb3a196ccf5e9490 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Wed, 9 May 2012 02:12:15 +0200
Subject: [PATCH 18/25] rtmp: Support 'rtmp_flashver', an option which
 overrides the version of the Flash plugin.

---
 libavformat/rtmpproto.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 5d7f4d185d..11caad6270 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -45,6 +45,7 @@
 #define APP_MAX_LENGTH 128
 #define PLAYPATH_MAX_LENGTH 256
 #define TCURL_MAX_LENGTH 512
+#define FLASHVER_MAX_LENGTH 64
 
 /** RTMP protocol handler state */
 typedef enum {
@@ -84,6 +85,7 @@ typedef struct RTMPContext {
     int           nb_invokes;                 ///< keeps track of invoke messages
     int           create_stream_invoke;       ///< invoke id for the create stream command
     char*         tcurl;                      ///< url of the target stream
+    char*         flashver;                   ///< version of the flash plugin
 } RTMPContext;
 
 #define PLAYER_KEY_OPEN_PART_LEN 30   ///< length of partial key used for first client digest signing
@@ -115,7 +117,7 @@ static const uint8_t rtmp_server_key[] = {
 static void gen_connect(URLContext *s, RTMPContext *rt)
 {
     RTMPPacket pkt;
-    uint8_t ver[64], *p;
+    uint8_t *p;
 
     ff_rtmp_packet_create(&pkt, RTMP_SYSTEM_CHANNEL, RTMP_PT_INVOKE, 0, 4096);
     p = pkt.data;
@@ -126,16 +128,12 @@ static void gen_connect(URLContext *s, RTMPContext *rt)
     ff_amf_write_field_name(&p, "app");
     ff_amf_write_string(&p, rt->app);
 
-    if (rt->is_input) {
-        snprintf(ver, sizeof(ver), "%s %d,%d,%d,%d", RTMP_CLIENT_PLATFORM, RTMP_CLIENT_VER1,
-                 RTMP_CLIENT_VER2, RTMP_CLIENT_VER3, RTMP_CLIENT_VER4);
-    } else {
-        snprintf(ver, sizeof(ver), "FMLE/3.0 (compatible; %s)", LIBAVFORMAT_IDENT);
+    if (!rt->is_input) {
         ff_amf_write_field_name(&p, "type");
         ff_amf_write_string(&p, "nonprivate");
     }
     ff_amf_write_field_name(&p, "flashVer");
-    ff_amf_write_string(&p, ver);
+    ff_amf_write_string(&p, rt->flashver);
     ff_amf_write_field_name(&p, "tcUrl");
     ff_amf_write_string(&p, rt->tcurl);
     if (rt->is_input) {
@@ -915,6 +913,18 @@ static int rtmp_open(URLContext *s, const char *uri, int flags)
                     port, "/%s", rt->app);
     }
 
+    if (!rt->flashver) {
+        rt->flashver = av_malloc(FLASHVER_MAX_LENGTH);
+        if (rt->is_input) {
+            snprintf(rt->flashver, FLASHVER_MAX_LENGTH, "%s %d,%d,%d,%d",
+                    RTMP_CLIENT_PLATFORM, RTMP_CLIENT_VER1, RTMP_CLIENT_VER2,
+                    RTMP_CLIENT_VER3, RTMP_CLIENT_VER4);
+        } else {
+            snprintf(rt->flashver, FLASHVER_MAX_LENGTH,
+                    "FMLE/3.0 (compatible; %s)", LIBAVFORMAT_IDENT);
+        }
+    }
+
     rt->client_report_size = 1048576;
     rt->bytes_read = 0;
     rt->last_bytes_read = 0;
@@ -1057,6 +1067,7 @@ static int rtmp_write(URLContext *s, const uint8_t *buf, int size)
 
 static const AVOption rtmp_options[] = {
     {"rtmp_app", "Name of application to connect to on the RTMP server", OFFSET(app), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC},
+    {"rtmp_flashver", "Version of the Flash plugin used to run the SWF player.", OFFSET(flashver), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC},
     {"rtmp_live", "Specify that the media is a live stream.", OFFSET(live), AV_OPT_TYPE_INT, {-2}, INT_MIN, INT_MAX, DEC, "rtmp_live"},
     {"any", "both", 0, AV_OPT_TYPE_CONST, {-2}, 0, 0, DEC, "rtmp_live"},
     {"live", "live stream", 0, AV_OPT_TYPE_CONST, {-1}, 0, 0, DEC, "rtmp_live"},

From 05945db9ce3c6708e62d05bfb040db10d73eade0 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Wed, 9 May 2012 02:12:16 +0200
Subject: [PATCH 19/25] rtmp: Support 'rtmp_swfurl', an option which specifies
 the URL of the SWF player.

---
 libavformat/rtmpproto.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 11caad6270..13ef719ab2 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -86,6 +86,7 @@ typedef struct RTMPContext {
     int           create_stream_invoke;       ///< invoke id for the create stream command
     char*         tcurl;                      ///< url of the target stream
     char*         flashver;                   ///< version of the flash plugin
+    char*         swfurl;                     ///< url of the swf player
 } RTMPContext;
 
 #define PLAYER_KEY_OPEN_PART_LEN 30   ///< length of partial key used for first client digest signing
@@ -134,6 +135,12 @@ static void gen_connect(URLContext *s, RTMPContext *rt)
     }
     ff_amf_write_field_name(&p, "flashVer");
     ff_amf_write_string(&p, rt->flashver);
+
+    if (rt->swfurl) {
+        ff_amf_write_field_name(&p, "swfUrl");
+        ff_amf_write_string(&p, rt->swfurl);
+    }
+
     ff_amf_write_field_name(&p, "tcUrl");
     ff_amf_write_string(&p, rt->tcurl);
     if (rt->is_input) {
@@ -1073,6 +1080,7 @@ static const AVOption rtmp_options[] = {
     {"live", "live stream", 0, AV_OPT_TYPE_CONST, {-1}, 0, 0, DEC, "rtmp_live"},
     {"recorded", "recorded stream", 0, AV_OPT_TYPE_CONST, {0}, 0, 0, DEC, "rtmp_live"},
     {"rtmp_playpath", "Stream identifier to play or to publish", OFFSET(playpath), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC},
+    {"rtmp_swfurl", "URL of the SWF player. By default no value will be sent", OFFSET(swfurl), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC},
     {"rtmp_tcurl", "URL of the target stream. Defaults to rtmp://host[:port]/app.", OFFSET(tcurl), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC},
     { NULL },
 };

From d55961fa82d34c1783f525b05608694d2b2dea1c Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Wed, 9 May 2012 00:58:09 +0200
Subject: [PATCH 20/25] rtmp: Implement check bandwidth notification.

According to the behaviour of librtmp, it is recommended to send this
message to the server after receiving the 'onBWDone' callback in order
to do bandwidth checking and improve compatibility with some servers.
---
 libavformat/rtmpproto.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 13ef719ab2..427655c27e 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -372,6 +372,25 @@ static void gen_server_bw(URLContext *s, RTMPContext *rt)
     ff_rtmp_packet_destroy(&pkt);
 }
 
+/**
+ * Generate check bandwidth message and send it to the server.
+ */
+static void gen_check_bw(URLContext *s, RTMPContext *rt)
+{
+    RTMPPacket pkt;
+    uint8_t *p;
+
+    ff_rtmp_packet_create(&pkt, RTMP_SYSTEM_CHANNEL, RTMP_PT_INVOKE, 0, 21);
+
+    p = pkt.data;
+    ff_amf_write_string(&p, "_checkbw");
+    ff_amf_write_number(&p, ++rt->nb_invokes);
+    ff_amf_write_null(&p);
+
+    ff_rtmp_packet_write(rt->stream, &pkt, rt->chunk_size, rt->prev_pkt[1]);
+    ff_rtmp_packet_destroy(&pkt);
+}
+
 /**
  * Generate report on bytes read so far and send it to the server.
  */
@@ -691,6 +710,8 @@ static int rtmp_parse_result(URLContext *s, RTMPContext *rt, RTMPPacket *pkt)
             if (!t && !strcmp(tmpstr, "NetStream.Play.Stop")) rt->state = STATE_STOPPED;
             if (!t && !strcmp(tmpstr, "NetStream.Play.UnpublishNotify")) rt->state = STATE_STOPPED;
             if (!t && !strcmp(tmpstr, "NetStream.Publish.Start")) rt->state = STATE_PUBLISHING;
+        } else if (!memcmp(pkt->data, "\002\000\010onBWDone", 11)) {
+            gen_check_bw(s, rt);
         }
         break;
     }

From be545b8a34cb7934bddc6c76aa783bee0b90c361 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 7 May 2012 14:13:23 +0200
Subject: [PATCH 21/25] h264: K&R formatting cosmetics for header files (part
 I/II)

---
 libavcodec/h264.h     | 497 ++++++++++++++++++++++--------------------
 libavcodec/h264data.h | 371 +++++++++++++++----------------
 libavcodec/h264pred.h |  81 ++++---
 3 files changed, 491 insertions(+), 458 deletions(-)

diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index ce06f613cd..570ce2ffae 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -37,14 +37,14 @@
 #include "rectangle.h"
 
 #define interlaced_dct interlaced_dct_is_a_bad_name
-#define mb_intra mb_intra_is_not_initialized_see_mb_type
+#define mb_intra       mb_intra_is_not_initialized_see_mb_type
 
-#define MAX_SPS_COUNT 32
-#define MAX_PPS_COUNT 256
+#define MAX_SPS_COUNT          32
+#define MAX_PPS_COUNT         256
 
-#define MAX_MMCO_COUNT 66
+#define MAX_MMCO_COUNT         66
 
-#define MAX_DELAYED_PIC_COUNT 16
+#define MAX_DELAYED_PIC_COUNT  16
 
 /* Compiling in interlaced support reduces the speed
  * of progressive decoding by about 2%. */
@@ -59,25 +59,25 @@
 #define MAX_SLICES 16
 
 #ifdef ALLOW_INTERLACE
-#define MB_MBAFF h->mb_mbaff
-#define MB_FIELD h->mb_field_decoding_flag
+#define MB_MBAFF    h->mb_mbaff
+#define MB_FIELD    h->mb_field_decoding_flag
 #define FRAME_MBAFF h->mb_aff_frame
 #define FIELD_PICTURE (s->picture_structure != PICT_FRAME)
 #define LEFT_MBS 2
-#define LTOP 0
-#define LBOT 1
-#define LEFT(i) (i)
+#define LTOP     0
+#define LBOT     1
+#define LEFT(i)  (i)
 #else
-#define MB_MBAFF 0
-#define MB_FIELD 0
-#define FRAME_MBAFF 0
+#define MB_MBAFF      0
+#define MB_FIELD      0
+#define FRAME_MBAFF   0
 #define FIELD_PICTURE 0
 #undef  IS_INTERLACED
 #define IS_INTERLACED(mb_type) 0
 #define LEFT_MBS 1
-#define LTOP 0
-#define LBOT 0
-#define LEFT(i) 0
+#define LTOP     0
+#define LBOT     0
+#define LEFT(i)  0
 #endif
 #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE)
 
@@ -88,9 +88,9 @@
 #define CHROMA422 (h->sps.chroma_format_idc == 2)
 #define CHROMA444 (h->sps.chroma_format_idc == 3)
 
-#define EXTENDED_SAR          255
+#define EXTENDED_SAR       255
 
-#define MB_TYPE_REF0       MB_TYPE_ACPRED //dirty but it fits in 16 bit
+#define MB_TYPE_REF0       MB_TYPE_ACPRED // dirty but it fits in 16 bit
 #define MB_TYPE_8x8DCT     0x01000000
 #define IS_REF0(a)         ((a) & MB_TYPE_REF0)
 #define IS_8x8DCT(a)       ((a) & MB_TYPE_8x8DCT)
@@ -101,11 +101,11 @@
  */
 #define DELAYED_PIC_REF 4
 
-#define QP_MAX_NUM (51 + 2*6)           // The maximum supported qp
+#define QP_MAX_NUM (51 + 2 * 6)           // The maximum supported qp
 
 /* NAL unit types */
 enum {
-    NAL_SLICE=1,
+    NAL_SLICE = 1,
     NAL_DPA,
     NAL_DPB,
     NAL_DPC,
@@ -118,17 +118,17 @@ enum {
     NAL_END_STREAM,
     NAL_FILLER_DATA,
     NAL_SPS_EXT,
-    NAL_AUXILIARY_SLICE=19
+    NAL_AUXILIARY_SLICE = 19
 };
 
 /**
  * SEI message types
  */
 typedef enum {
-    SEI_BUFFERING_PERIOD             =  0, ///< buffering period (H.264, D.1.1)
-    SEI_TYPE_PIC_TIMING              =  1, ///< picture timing
-    SEI_TYPE_USER_DATA_UNREGISTERED  =  5, ///< unregistered user data
-    SEI_TYPE_RECOVERY_POINT          =  6  ///< recovery point (frame # to decoder sync)
+    SEI_BUFFERING_PERIOD            = 0,   ///< buffering period (H.264, D.1.1)
+    SEI_TYPE_PIC_TIMING             = 1,   ///< picture timing
+    SEI_TYPE_USER_DATA_UNREGISTERED = 5,   ///< unregistered user data
+    SEI_TYPE_RECOVERY_POINT         = 6    ///< recovery point (frame # to decoder sync)
 } SEI_Type;
 
 /**
@@ -149,8 +149,7 @@ typedef enum {
 /**
  * Sequence parameter set
  */
-typedef struct SPS{
-
+typedef struct SPS {
     int profile_idc;
     int level_idc;
     int chroma_format_idc;
@@ -167,9 +166,9 @@ typedef struct SPS{
     int mb_width;                      ///< pic_width_in_mbs_minus1 + 1
     int mb_height;                     ///< pic_height_in_map_units_minus1 + 1
     int frame_mbs_only_flag;
-    int mb_aff;                        ///<mb_adaptive_frame_field_flag
+    int mb_aff;                        ///< mb_adaptive_frame_field_flag
     int direct_8x8_inference_flag;
-    int crop;                   ///< frame_cropping_flag
+    int crop;                          ///< frame_cropping_flag
     unsigned int crop_left;            ///< frame_cropping_rect_left_offset
     unsigned int crop_right;           ///< frame_cropping_rect_right_offset
     unsigned int crop_top;             ///< frame_cropping_rect_top_offset
@@ -186,7 +185,7 @@ typedef struct SPS{
     uint32_t num_units_in_tick;
     uint32_t time_scale;
     int fixed_frame_rate_flag;
-    short offset_for_ref_frame[256]; //FIXME dyn aloc?
+    short offset_for_ref_frame[256]; // FIXME dyn aloc?
     int bitstream_restriction_flag;
     int num_reorder_frames;
     int scaling_matrix_present;
@@ -196,20 +195,20 @@ typedef struct SPS{
     int vcl_hrd_parameters_present_flag;
     int pic_struct_present_flag;
     int time_offset_length;
-    int cpb_cnt;                       ///< See H.264 E.1.2
-    int initial_cpb_removal_delay_length; ///< initial_cpb_removal_delay_length_minus1 +1
-    int cpb_removal_delay_length;      ///< cpb_removal_delay_length_minus1 + 1
-    int dpb_output_delay_length;       ///< dpb_output_delay_length_minus1 + 1
-    int bit_depth_luma;                ///< bit_depth_luma_minus8 + 8
-    int bit_depth_chroma;              ///< bit_depth_chroma_minus8 + 8
-    int residual_color_transform_flag; ///< residual_colour_transform_flag
-    int constraint_set_flags;          ///< constraint_set[0-3]_flag
-}SPS;
+    int cpb_cnt;                          ///< See H.264 E.1.2
+    int initial_cpb_removal_delay_length; ///< initial_cpb_removal_delay_length_minus1 + 1
+    int cpb_removal_delay_length;         ///< cpb_removal_delay_length_minus1 + 1
+    int dpb_output_delay_length;          ///< dpb_output_delay_length_minus1 + 1
+    int bit_depth_luma;                   ///< bit_depth_luma_minus8 + 8
+    int bit_depth_chroma;                 ///< bit_depth_chroma_minus8 + 8
+    int residual_color_transform_flag;    ///< residual_colour_transform_flag
+    int constraint_set_flags;             ///< constraint_set[0-3]_flag
+} SPS;
 
 /**
  * Picture parameter set
  */
-typedef struct PPS{
+typedef struct PPS {
     unsigned int sps_id;
     int cabac;                  ///< entropy_coding_mode_flag
     int pic_order_present;      ///< pic_order_present_flag
@@ -222,20 +221,20 @@ typedef struct PPS{
     int init_qs;                ///< pic_init_qs_minus26 + 26
     int chroma_qp_index_offset[2];
     int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
-    int constrained_intra_pred; ///< constrained_intra_pred_flag
-    int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
-    int transform_8x8_mode;     ///< transform_8x8_mode_flag
+    int constrained_intra_pred;     ///< constrained_intra_pred_flag
+    int redundant_pic_cnt_present;  ///< redundant_pic_cnt_present_flag
+    int transform_8x8_mode;         ///< transform_8x8_mode_flag
     uint8_t scaling_matrix4[6][16];
     uint8_t scaling_matrix8[6][64];
-    uint8_t chroma_qp_table[2][64];  ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
+    uint8_t chroma_qp_table[2][64]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
     int chroma_qp_diff;
-}PPS;
+} PPS;
 
 /**
  * Memory management control operation opcode.
  */
-typedef enum MMCOOpcode{
-    MMCO_END=0,
+typedef enum MMCOOpcode {
+    MMCO_END = 0,
     MMCO_SHORT2UNUSED,
     MMCO_LONG2UNUSED,
     MMCO_SHORT2LONG,
@@ -247,7 +246,7 @@ typedef enum MMCOOpcode{
 /**
  * Memory management control operation.
  */
-typedef struct MMCO{
+typedef struct MMCO {
     MMCOOpcode opcode;
     int short_pic_num;  ///< pic_num without wrapping (pic_num & max_pic_num)
     int long_arg;       ///< index, pic_num, or num long refs depending on opcode
@@ -256,18 +255,18 @@ typedef struct MMCO{
 /**
  * H264Context
  */
-typedef struct H264Context{
+typedef struct H264Context {
     MpegEncContext s;
     H264DSPContext h264dsp;
     int pixel_shift;    ///< 0 for 8-bit H264, 1 for high-bit-depth H264
-    int chroma_qp[2]; //QPc
+    int chroma_qp[2];   // QPc
 
     int qp_thresh;      ///< QP threshold to skip loopfilter
 
     int prev_mb_skipped;
     int next_mb_skipped;
 
-    //prediction stuff
+    // prediction stuff
     int chroma_pred_mode;
     int intra16x16_pred_mode;
 
@@ -281,32 +280,32 @@ typedef struct H264Context{
     int topright_type;
     int left_type[LEFT_MBS];
 
-    const uint8_t * left_block;
+    const uint8_t *left_block;
     int topleft_partition;
 
-    int8_t intra4x4_pred_mode_cache[5*8];
-    int8_t (*intra4x4_pred_mode);
+    int8_t intra4x4_pred_mode_cache[5 * 8];
+    int8_t(*intra4x4_pred_mode);
     H264PredContext hpc;
     unsigned int topleft_samples_available;
     unsigned int top_samples_available;
     unsigned int topright_samples_available;
     unsigned int left_samples_available;
-    uint8_t (*top_borders[2])[(16*3)*2];
+    uint8_t (*top_borders[2])[(16 * 3) * 2];
 
     /**
      * non zero coeff count cache.
      * is 64 if not available.
      */
-    DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15*8];
+    DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15 * 8];
 
     uint8_t (*non_zero_count)[48];
 
     /**
      * Motion vector cache.
      */
-    DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5*8][2];
-    DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5*8];
-#define LIST_NOT_USED -1 //FIXME rename?
+    DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
+    DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8];
+#define LIST_NOT_USED -1 // FIXME rename?
 #define PART_NOT_AVAILABLE -2
 
     /**
@@ -318,13 +317,13 @@ typedef struct H264Context{
      * block_offset[ 0..23] for frame macroblocks
      * block_offset[24..47] for field macroblocks
      */
-    int block_offset[2*(16*3)];
+    int block_offset[2 * (16 * 3)];
 
-    uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
+    uint32_t *mb2b_xy;  // FIXME are these 4 a good idea?
     uint32_t *mb2br_xy;
-    int b_stride; //FIXME use s->b4_stride
+    int b_stride;       // FIXME use s->b4_stride
 
-    int mb_linesize;   ///< may be equal to s->linesize or s->linesize*2, for mbaff
+    int mb_linesize;    ///< may be equal to s->linesize or s->linesize * 2, for mbaff
     int mb_uvlinesize;
 
     int emu_edge_width;
@@ -335,32 +334,32 @@ typedef struct H264Context{
     /**
      * current pps
      */
-    PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
+    PPS pps; // FIXME move to Picture perhaps? (->no) do we need that?
 
-    uint32_t dequant4_buffer[6][QP_MAX_NUM+1][16]; //FIXME should these be moved down?
-    uint32_t dequant8_buffer[6][QP_MAX_NUM+1][64];
-    uint32_t (*dequant4_coeff[6])[16];
-    uint32_t (*dequant8_coeff[6])[64];
+    uint32_t dequant4_buffer[6][QP_MAX_NUM + 1][16]; // FIXME should these be moved down?
+    uint32_t dequant8_buffer[6][QP_MAX_NUM + 1][64];
+    uint32_t(*dequant4_coeff[6])[16];
+    uint32_t(*dequant8_coeff[6])[64];
 
     int slice_num;
-    uint16_t *slice_table;     ///< slice_table_base + 2*mb_stride + 1
+    uint16_t *slice_table;      ///< slice_table_base + 2*mb_stride + 1
     int slice_type;
-    int slice_type_nos;        ///< S free slice type (SI/SP are remapped to I/P)
+    int slice_type_nos;         ///< S free slice type (SI/SP are remapped to I/P)
     int slice_type_fixed;
 
-    //interlacing specific flags
+    // interlacing specific flags
     int mb_aff_frame;
     int mb_field_decoding_flag;
-    int mb_mbaff;              ///< mb_aff_frame && mb_field_decoding_flag
+    int mb_mbaff;               ///< mb_aff_frame && mb_field_decoding_flag
 
     DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4];
 
-    //Weighted pred stuff
+    // Weighted pred stuff
     int use_weight;
     int use_weight_chroma;
     int luma_log2_weight_denom;
     int chroma_log2_weight_denom;
-    //The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss
+    // The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss
     int luma_weight[48][2][2];
     int chroma_weight[48][2][2][2];
     int implicit_weight[48][48][2];
@@ -370,48 +369,48 @@ typedef struct H264Context{
     int col_fieldoff;
     int dist_scale_factor[16];
     int dist_scale_factor_field[2][32];
-    int map_col_to_list0[2][16+32];
-    int map_col_to_list0_field[2][2][16+32];
+    int map_col_to_list0[2][16 + 32];
+    int map_col_to_list0_field[2][2][16 + 32];
 
     /**
      * num_ref_idx_l0/1_active_minus1 + 1
      */
-    unsigned int ref_count[2];   ///< counts frames or fields, depending on current mb mode
+    unsigned int ref_count[2];          ///< counts frames or fields, depending on current mb mode
     unsigned int list_count;
-    uint8_t *list_counts;            ///< Array of list_count per MB specifying the slice type
-    Picture ref_list[2][48];         /**< 0..15: frame refs, 16..47: mbaff field refs.
-                                          Reordered version of default_ref_list
-                                          according to picture reordering in slice header */
-    int ref2frm[MAX_SLICES][2][64];  ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1
+    uint8_t *list_counts;               ///< Array of list_count per MB specifying the slice type
+    Picture ref_list[2][48];            /**< 0..15: frame refs, 16..47: mbaff field refs.
+                                         *   Reordered version of default_ref_list
+                                         *   according to picture reordering in slice header */
+    int ref2frm[MAX_SLICES][2][64];     ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1
 
-    //data partitioning
+    // data partitioning
     GetBitContext intra_gb;
     GetBitContext inter_gb;
     GetBitContext *intra_gb_ptr;
     GetBitContext *inter_gb_ptr;
 
-    DECLARE_ALIGNED(16, DCTELEM, mb)[16*48*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
-    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16*2];
-    DCTELEM mb_padding[256*2];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
+    DECLARE_ALIGNED(16, DCTELEM, mb)[16 * 48 * 2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
+    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16 * 2];
+    DCTELEM mb_padding[256 * 2];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
 
     /**
      * Cabac
      */
     CABACContext cabac;
-    uint8_t      cabac_state[1024];
+    uint8_t cabac_state[1024];
 
-    /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
-    uint16_t     *cbp_table;
+    /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0, 1, 2), 0x0? luma_cbp */
+    uint16_t *cbp_table;
     int cbp;
     int top_cbp;
     int left_cbp;
     /* chroma_pred_mode for i4x4 or i16x16, else 0 */
-    uint8_t     *chroma_pred_mode_table;
-    int         last_qscale_diff;
-    uint8_t     (*mvd_table[2])[2];
-    DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5*8][2];
-    uint8_t     *direct_table;
-    uint8_t     direct_cache[5*8];
+    uint8_t *chroma_pred_mode_table;
+    int last_qscale_diff;
+    uint8_t (*mvd_table[2])[2];
+    DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5 * 8][2];
+    uint8_t *direct_table;
+    uint8_t direct_cache[5 * 8];
 
     uint8_t zigzag_scan[16];
     uint8_t zigzag_scan8x8[64];
@@ -432,13 +431,13 @@ typedef struct H264Context{
 
     int is_complex;
 
-    //deblock
-    int deblocking_filter;         ///< disable_deblocking_filter_idc with 1<->0
+    // deblock
+    int deblocking_filter;          ///< disable_deblocking_filter_idc with 1 <-> 0
     int slice_alpha_c0_offset;
     int slice_beta_offset;
 
-//=============================================================
-    //Things below are not used in the MB or more inner code
+    // =============================================================
+    // Things below are not used in the MB or more inner code
 
     int nal_ref_idc;
     int nal_unit_type;
@@ -448,37 +447,36 @@ typedef struct H264Context{
     /**
      * Used to parse AVC variant of h264
      */
-    int is_avc; ///< this flag is != 0 if codec is avc1
-    int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
-    int got_first; ///< this flag is != 0 if we've parsed a frame
+    int is_avc;           ///< this flag is != 0 if codec is avc1
+    int nal_length_size;  ///< Number of bytes used for nal length (1, 2 or 4)
+    int got_first;        ///< this flag is != 0 if we've parsed a frame
 
     SPS *sps_buffers[MAX_SPS_COUNT];
     PPS *pps_buffers[MAX_PPS_COUNT];
 
-    int dequant_coeff_pps;     ///< reinit tables when pps changes
+    int dequant_coeff_pps;      ///< reinit tables when pps changes
 
     uint16_t *slice_table_base;
 
-
-    //POC stuff
+    // POC stuff
     int poc_lsb;
     int poc_msb;
     int delta_poc_bottom;
     int delta_poc[2];
     int frame_num;
-    int prev_poc_msb;             ///< poc_msb of the last reference pic for POC type 0
-    int prev_poc_lsb;             ///< poc_lsb of the last reference pic for POC type 0
-    int frame_num_offset;         ///< for POC type 2
-    int prev_frame_num_offset;    ///< for POC type 2
-    int prev_frame_num;           ///< frame_num of the last pic for POC type 1/2
+    int prev_poc_msb;           ///< poc_msb of the last reference pic for POC type 0
+    int prev_poc_lsb;           ///< poc_lsb of the last reference pic for POC type 0
+    int frame_num_offset;       ///< for POC type 2
+    int prev_frame_num_offset;  ///< for POC type 2
+    int prev_frame_num;         ///< frame_num of the last pic for POC type 1/2
 
     /**
-     * frame_num for frames or 2*frame_num+1 for field pics.
+     * frame_num for frames or 2 * frame_num + 1 for field pics.
      */
     int curr_pic_num;
 
     /**
-     * max_frame_num or 2*max_frame_num for field pics.
+     * max_frame_num or 2 * max_frame_num for field pics.
      */
     int max_pic_num;
 
@@ -487,7 +485,7 @@ typedef struct H264Context{
     Picture *short_ref[32];
     Picture *long_ref[32];
     Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture
-    Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size?
+    Picture *delayed_pic[MAX_DELAYED_PIC_COUNT + 2]; // FIXME size?
     int last_pocs[MAX_DELAYED_PIC_COUNT];
     Picture *next_output_pic;
     int outputed_poc;
@@ -500,10 +498,10 @@ typedef struct H264Context{
     int mmco_index;
     int mmco_reset;
 
-    int long_ref_count;  ///< number of actual long term references
-    int short_ref_count; ///< number of actual short term references
+    int long_ref_count;     ///< number of actual long term references
+    int short_ref_count;    ///< number of actual short term references
 
-    int          cabac_init_idc;
+    int cabac_init_idc;
 
     /**
      * @name Members for slice based multithreading
@@ -572,18 +570,17 @@ typedef struct H264Context{
      */
     int sei_recovery_frame_cnt;
 
-    int luma_weight_flag[2];   ///< 7.4.3.2 luma_weight_lX_flag
-    int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag
+    int luma_weight_flag[2];    ///< 7.4.3.2 luma_weight_lX_flag
+    int chroma_weight_flag[2];  ///< 7.4.3.2 chroma_weight_lX_flag
 
     // Timestamp stuff
-    int sei_buffering_period_present;  ///< Buffering period SEI flag
-    int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs
+    int sei_buffering_period_present;   ///< Buffering period SEI flag
+    int initial_cpb_removal_delay[32];  ///< Initial timestamps for CPBs
 
     int cur_chroma_format_idc;
-}H264Context;
+} H264Context;
 
-
-extern const uint8_t ff_h264_chroma_qp[3][QP_MAX_NUM+1]; ///< One chroma qp table for each supported bit depth (8, 9, 10).
+extern const uint8_t ff_h264_chroma_qp[3][QP_MAX_NUM + 1]; ///< One chroma qp table for each supported bit depth (8, 9, 10).
 extern const uint16_t ff_h264_mb_sizes[4];
 
 /**
@@ -610,13 +607,16 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length);
  * Decode a network abstraction layer unit.
  * @param consumed is the number of bytes used as input
  * @param length is the length of the array
- * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
+ * @param dst_length is the number of decoded bytes FIXME here
+ *                   or a decode rbsp tailing?
  * @return decoded bytes, might be src+1 if no escapes
  */
-const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length);
+const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src,
+                                  int *dst_length, int *consumed, int length);
 
 /**
- * Free any data that may have been allocated in the H264 context like SPS, PPS etc.
+ * Free any data that may have been allocated in the H264 context
+ * like SPS, PPS etc.
  */
 av_cold void ff_h264_free_context(H264Context *h);
 
@@ -649,14 +649,15 @@ int ff_h264_decode_ref_pic_marking(H264Context *h, GetBitContext *gb);
 
 void ff_generate_sliding_window_mmcos(H264Context *h);
 
-
 /**
- * Check if the top & left blocks are available if needed & change the dc mode so it only uses the available blocks.
+ * Check if the top & left blocks are available if needed & change the
+ * dc mode so it only uses the available blocks.
  */
 int ff_h264_check_intra4x4_pred_mode(H264Context *h);
 
 /**
- * Check if the top & left blocks are available if needed & change the dc mode so it only uses the available blocks.
+ * Check if the top & left blocks are available if needed & change the
+ * dc mode so it only uses the available blocks.
  */
 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma);
 
@@ -668,24 +669,28 @@ av_cold void ff_h264_decode_init_vlc(void);
 
 /**
  * Decode a macroblock
- * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR if an error is noticed
+ * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR on error
  */
 int ff_h264_decode_mb_cavlc(H264Context *h);
 
 /**
  * Decode a CABAC coded macroblock
- * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR if an error is noticed
+ * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR on error
  */
 int ff_h264_decode_mb_cabac(H264Context *h);
 
 void ff_h264_init_cabac_states(H264Context *h);
 
-void ff_h264_direct_dist_scale_factor(H264Context * const h);
-void ff_h264_direct_ref_list_init(H264Context * const h);
-void ff_h264_pred_direct_motion(H264Context * const h, int *mb_type);
+void ff_h264_direct_dist_scale_factor(H264Context *const h);
+void ff_h264_direct_ref_list_init(H264Context *const h);
+void ff_h264_pred_direct_motion(H264Context *const h, int *mb_type);
 
-void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
-void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
+void ff_h264_filter_mb_fast(H264Context *h, int mb_x, int mb_y,
+                            uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr,
+                            unsigned int linesize, unsigned int uvlinesize);
+void ff_h264_filter_mb(H264Context *h, int mb_x, int mb_y,
+                       uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr,
+                       unsigned int linesize, unsigned int uvlinesize);
 
 /**
  * Reset SEI values at the beginning of the frame.
@@ -694,16 +699,15 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
  */
 void ff_h264_reset_sei(H264Context *h);
 
-
 /*
-o-o o-o
- / / /
-o-o o-o
- ,---'
-o-o o-o
- / / /
-o-o o-o
-*/
+ * o-o o-o
+ *  / / /
+ * o-o o-o
+ *  ,---'
+ * o-o o-o
+ *  / / /
+ * o-o o-o
+ */
 
 /* Scan8 organization:
  *    0 1 2 3 4 5 6 7
@@ -728,156 +732,173 @@ o-o o-o
 #define LUMA_DC_BLOCK_INDEX   48
 #define CHROMA_DC_BLOCK_INDEX 49
 
-//This table must be here because scan8[constant] must be known at compiletime
-static const uint8_t scan8[16*3 + 3]={
- 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
- 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
- 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
- 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,
- 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
- 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
- 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
- 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,
- 4+11*8, 5+11*8, 4+12*8, 5+12*8,
- 6+11*8, 7+11*8, 6+12*8, 7+12*8,
- 4+13*8, 5+13*8, 4+14*8, 5+14*8,
- 6+13*8, 7+13*8, 6+14*8, 7+14*8,
- 0+ 0*8, 0+ 5*8, 0+10*8
+// This table must be here because scan8[constant] must be known at compiletime
+static const uint8_t scan8[16 * 3 + 3] = {
+    4 +  1 * 8, 5 +  1 * 8, 4 +  2 * 8, 5 +  2 * 8,
+    6 +  1 * 8, 7 +  1 * 8, 6 +  2 * 8, 7 +  2 * 8,
+    4 +  3 * 8, 5 +  3 * 8, 4 +  4 * 8, 5 +  4 * 8,
+    6 +  3 * 8, 7 +  3 * 8, 6 +  4 * 8, 7 +  4 * 8,
+    4 +  6 * 8, 5 +  6 * 8, 4 +  7 * 8, 5 +  7 * 8,
+    6 +  6 * 8, 7 +  6 * 8, 6 +  7 * 8, 7 +  7 * 8,
+    4 +  8 * 8, 5 +  8 * 8, 4 +  9 * 8, 5 +  9 * 8,
+    6 +  8 * 8, 7 +  8 * 8, 6 +  9 * 8, 7 +  9 * 8,
+    4 + 11 * 8, 5 + 11 * 8, 4 + 12 * 8, 5 + 12 * 8,
+    6 + 11 * 8, 7 + 11 * 8, 6 + 12 * 8, 7 + 12 * 8,
+    4 + 13 * 8, 5 + 13 * 8, 4 + 14 * 8, 5 + 14 * 8,
+    6 + 13 * 8, 7 + 13 * 8, 6 + 14 * 8, 7 + 14 * 8,
+    0 +  0 * 8, 0 +  5 * 8, 0 + 10 * 8
 };
 
-static av_always_inline uint32_t pack16to32(int a, int b){
+static av_always_inline uint32_t pack16to32(int a, int b)
+{
 #if HAVE_BIGENDIAN
-   return (b&0xFFFF) + (a<<16);
+    return (b & 0xFFFF) + (a << 16);
 #else
-   return (a&0xFFFF) + (b<<16);
+    return (a & 0xFFFF) + (b << 16);
 #endif
 }
 
-static av_always_inline uint16_t pack8to16(int a, int b){
+static av_always_inline uint16_t pack8to16(int a, int b)
+{
 #if HAVE_BIGENDIAN
-   return (b&0xFF) + (a<<8);
+    return (b & 0xFF) + (a << 8);
 #else
-   return (a&0xFF) + (b<<8);
+    return (a & 0xFF) + (b << 8);
 #endif
 }
 
 /**
  * Get the chroma qp.
  */
-static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale){
+static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale)
+{
     return h->pps.chroma_qp_table[t][qscale];
 }
 
 /**
  * Get the predicted intra4x4 prediction mode.
  */
-static av_always_inline int pred_intra_mode(H264Context *h, int n){
-    const int index8= scan8[n];
-    const int left= h->intra4x4_pred_mode_cache[index8 - 1];
-    const int top = h->intra4x4_pred_mode_cache[index8 - 8];
-    const int min= FFMIN(left, top);
+static av_always_inline int pred_intra_mode(H264Context *h, int n)
+{
+    const int index8 = scan8[n];
+    const int left   = h->intra4x4_pred_mode_cache[index8 - 1];
+    const int top    = h->intra4x4_pred_mode_cache[index8 - 8];
+    const int min    = FFMIN(left, top);
 
-    tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
+    tprintf(h->s.avctx, "mode:%d %d min:%d\n", left, top, min);
 
-    if(min<0) return DC_PRED;
-    else      return min;
+    if (min < 0)
+        return DC_PRED;
+    else
+        return min;
 }
 
-static av_always_inline void write_back_intra_pred_mode(H264Context *h){
-    int8_t *i4x4= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
-    int8_t *i4x4_cache= h->intra4x4_pred_mode_cache;
+static av_always_inline void write_back_intra_pred_mode(H264Context *h)
+{
+    int8_t *i4x4       = h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
+    int8_t *i4x4_cache = h->intra4x4_pred_mode_cache;
 
-    AV_COPY32(i4x4, i4x4_cache + 4 + 8*4);
-    i4x4[4]= i4x4_cache[7+8*3];
-    i4x4[5]= i4x4_cache[7+8*2];
-    i4x4[6]= i4x4_cache[7+8*1];
+    AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
+    i4x4[4] = i4x4_cache[7 + 8 * 3];
+    i4x4[5] = i4x4_cache[7 + 8 * 2];
+    i4x4[6] = i4x4_cache[7 + 8 * 1];
 }
 
-static av_always_inline void write_back_non_zero_count(H264Context *h){
-    const int mb_xy= h->mb_xy;
-    uint8_t *nnz = h->non_zero_count[mb_xy];
+static av_always_inline void write_back_non_zero_count(H264Context *h)
+{
+    const int mb_xy    = h->mb_xy;
+    uint8_t *nnz       = h->non_zero_count[mb_xy];
     uint8_t *nnz_cache = h->non_zero_count_cache;
 
-    AV_COPY32(&nnz[ 0], &nnz_cache[4+8* 1]);
-    AV_COPY32(&nnz[ 4], &nnz_cache[4+8* 2]);
-    AV_COPY32(&nnz[ 8], &nnz_cache[4+8* 3]);
-    AV_COPY32(&nnz[12], &nnz_cache[4+8* 4]);
-    AV_COPY32(&nnz[16], &nnz_cache[4+8* 6]);
-    AV_COPY32(&nnz[20], &nnz_cache[4+8* 7]);
-    AV_COPY32(&nnz[32], &nnz_cache[4+8*11]);
-    AV_COPY32(&nnz[36], &nnz_cache[4+8*12]);
+    AV_COPY32(&nnz[ 0], &nnz_cache[4 + 8 * 1]);
+    AV_COPY32(&nnz[ 4], &nnz_cache[4 + 8 * 2]);
+    AV_COPY32(&nnz[ 8], &nnz_cache[4 + 8 * 3]);
+    AV_COPY32(&nnz[12], &nnz_cache[4 + 8 * 4]);
+    AV_COPY32(&nnz[16], &nnz_cache[4 + 8 * 6]);
+    AV_COPY32(&nnz[20], &nnz_cache[4 + 8 * 7]);
+    AV_COPY32(&nnz[32], &nnz_cache[4 + 8 * 11]);
+    AV_COPY32(&nnz[36], &nnz_cache[4 + 8 * 12]);
 
-    if(!h->s.chroma_y_shift){
-        AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]);
-        AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]);
-        AV_COPY32(&nnz[40], &nnz_cache[4+8*13]);
-        AV_COPY32(&nnz[44], &nnz_cache[4+8*14]);
+    if (!h->s.chroma_y_shift) {
+        AV_COPY32(&nnz[24], &nnz_cache[4 + 8 * 8]);
+        AV_COPY32(&nnz[28], &nnz_cache[4 + 8 * 9]);
+        AV_COPY32(&nnz[40], &nnz_cache[4 + 8 * 13]);
+        AV_COPY32(&nnz[44], &nnz_cache[4 + 8 * 14]);
     }
 }
 
-static av_always_inline void write_back_motion_list(H264Context *h, MpegEncContext * const s, int b_stride,
-                                                    int b_xy, int b8_xy, int mb_type, int list )
+static av_always_inline void write_back_motion_list(H264Context *h,
+                                                    MpegEncContext *const s,
+                                                    int b_stride,
+                                                    int b_xy, int b8_xy,
+                                                    int mb_type, int list)
 {
-    int16_t (*mv_dst)[2] = &s->current_picture.f.motion_val[list][b_xy];
-    int16_t (*mv_src)[2] = &h->mv_cache[list][scan8[0]];
-    AV_COPY128(mv_dst + 0*b_stride, mv_src + 8*0);
-    AV_COPY128(mv_dst + 1*b_stride, mv_src + 8*1);
-    AV_COPY128(mv_dst + 2*b_stride, mv_src + 8*2);
-    AV_COPY128(mv_dst + 3*b_stride, mv_src + 8*3);
-    if( CABAC ) {
-        uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]];
-        uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
-        if(IS_SKIP(mb_type))
+    int16_t(*mv_dst)[2] = &s->current_picture.f.motion_val[list][b_xy];
+    int16_t(*mv_src)[2] = &h->mv_cache[list][scan8[0]];
+    AV_COPY128(mv_dst + 0 * b_stride, mv_src + 8 * 0);
+    AV_COPY128(mv_dst + 1 * b_stride, mv_src + 8 * 1);
+    AV_COPY128(mv_dst + 2 * b_stride, mv_src + 8 * 2);
+    AV_COPY128(mv_dst + 3 * b_stride, mv_src + 8 * 3);
+    if (CABAC) {
+        uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8 * h->mb_xy
+                                                        : h->mb2br_xy[h->mb_xy]];
+        uint8_t(*mvd_src)[2]  = &h->mvd_cache[list][scan8[0]];
+        if (IS_SKIP(mb_type)) {
             AV_ZERO128(mvd_dst);
-        else{
-            AV_COPY64(mvd_dst, mvd_src + 8*3);
-            AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0);
-            AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1);
-            AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2);
+        } else {
+            AV_COPY64(mvd_dst, mvd_src + 8 * 3);
+            AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8 * 0);
+            AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8 * 1);
+            AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8 * 2);
         }
     }
 
     {
         int8_t *ref_index = &s->current_picture.f.ref_index[list][b8_xy];
         int8_t *ref_cache = h->ref_cache[list];
-        ref_index[0+0*2]= ref_cache[scan8[0]];
-        ref_index[1+0*2]= ref_cache[scan8[4]];
-        ref_index[0+1*2]= ref_cache[scan8[8]];
-        ref_index[1+1*2]= ref_cache[scan8[12]];
+        ref_index[0 + 0 * 2] = ref_cache[scan8[0]];
+        ref_index[1 + 0 * 2] = ref_cache[scan8[4]];
+        ref_index[0 + 1 * 2] = ref_cache[scan8[8]];
+        ref_index[1 + 1 * 2] = ref_cache[scan8[12]];
     }
 }
 
-static av_always_inline void write_back_motion(H264Context *h, int mb_type){
-    MpegEncContext * const s = &h->s;
-    const int b_stride = h->b_stride;
-    const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy
-    const int b8_xy= 4*h->mb_xy;
+static av_always_inline void write_back_motion(H264Context *h, int mb_type)
+{
+    MpegEncContext *const s = &h->s;
+    const int b_stride      = h->b_stride;
+    const int b_xy  = 4 * s->mb_x + 4 * s->mb_y * h->b_stride; // try mb2b(8)_xy
+    const int b8_xy = 4 * h->mb_xy;
 
-    if(USES_LIST(mb_type, 0)){
+    if (USES_LIST(mb_type, 0)) {
         write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 0);
-    }else{
+    } else {
         fill_rectangle(&s->current_picture.f.ref_index[0][b8_xy],
                        2, 2, 2, (uint8_t)LIST_NOT_USED, 1);
     }
-    if(USES_LIST(mb_type, 1)){
+    if (USES_LIST(mb_type, 1))
         write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 1);
-    }
 
-    if(h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC){
-        if(IS_8X8(mb_type)){
-            uint8_t *direct_table = &h->direct_table[4*h->mb_xy];
-            direct_table[1] = h->sub_mb_type[1]>>1;
-            direct_table[2] = h->sub_mb_type[2]>>1;
-            direct_table[3] = h->sub_mb_type[3]>>1;
+    if (h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC) {
+        if (IS_8X8(mb_type)) {
+            uint8_t *direct_table = &h->direct_table[4 * h->mb_xy];
+            direct_table[1] = h->sub_mb_type[1] >> 1;
+            direct_table[2] = h->sub_mb_type[2] >> 1;
+            direct_table[3] = h->sub_mb_type[3] >> 1;
         }
     }
 }
 
-static av_always_inline int get_dct8x8_allowed(H264Context *h){
-    if(h->sps.direct_8x8_inference_flag)
-        return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8                )*0x0001000100010001ULL));
+static av_always_inline int get_dct8x8_allowed(H264Context *h)
+{
+    if (h->sps.direct_8x8_inference_flag)
+        return !(AV_RN64A(h->sub_mb_type) &
+                 ((MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8) *
+                  0x0001000100010001ULL));
     else
-        return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
+        return !(AV_RN64A(h->sub_mb_type) &
+                 ((MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8 | MB_TYPE_DIRECT2) *
+                  0x0001000100010001ULL));
 }
 
 #endif /* AVCODEC_H264_H */
diff --git a/libavcodec/h264data.h b/libavcodec/h264data.h
index 2cfa548624..5311c21be9 100644
--- a/libavcodec/h264data.h
+++ b/libavcodec/h264data.h
@@ -30,240 +30,243 @@
 #define AVCODEC_H264DATA_H
 
 #include <stdint.h>
+
 #include "libavutil/rational.h"
 #include "mpegvideo.h"
 #include "h264.h"
 
-
-static const uint8_t golomb_to_pict_type[5]=
-{AV_PICTURE_TYPE_P, AV_PICTURE_TYPE_B, AV_PICTURE_TYPE_I, AV_PICTURE_TYPE_SP, AV_PICTURE_TYPE_SI};
-
-static const uint8_t golomb_to_intra4x4_cbp[48]={
- 47, 31, 15,  0, 23, 27, 29, 30,  7, 11, 13, 14, 39, 43, 45, 46,
- 16,  3,  5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44,  1,  2,  4,
-  8, 17, 18, 20, 24,  6,  9, 22, 25, 32, 33, 34, 36, 40, 38, 41
+static const uint8_t golomb_to_pict_type[5] = {
+    AV_PICTURE_TYPE_P, AV_PICTURE_TYPE_B, AV_PICTURE_TYPE_I,
+    AV_PICTURE_TYPE_SP, AV_PICTURE_TYPE_SI
 };
 
-static const uint8_t golomb_to_inter_cbp[48]={
-  0, 16,  1,  2,  4,  8, 32,  3,  5, 10, 12, 15, 47,  7, 11, 13,
- 14,  6,  9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46,
- 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41
+static const uint8_t golomb_to_intra4x4_cbp[48] = {
+    47, 31, 15, 0,  23, 27, 29, 30, 7,  11, 13, 14, 39, 43, 45, 46,
+    16, 3,  5,  10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1,  2,  4,
+    8,  17, 18, 20, 24, 6,  9,  22, 25, 32, 33, 34, 36, 40, 38, 41
 };
 
-static const uint8_t zigzag_scan[16]={
- 0+0*4, 1+0*4, 0+1*4, 0+2*4,
- 1+1*4, 2+0*4, 3+0*4, 2+1*4,
- 1+2*4, 0+3*4, 1+3*4, 2+2*4,
- 3+1*4, 3+2*4, 2+3*4, 3+3*4,
+static const uint8_t golomb_to_inter_cbp[48] = {
+    0,  16, 1,  2,  4,  8,  32, 3,  5,  10, 12, 15, 47, 7,  11, 13,
+    14, 6,  9,  31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46,
+    17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41
 };
 
-static const uint8_t field_scan[16]={
- 0+0*4, 0+1*4, 1+0*4, 0+2*4,
- 0+3*4, 1+1*4, 1+2*4, 1+3*4,
- 2+0*4, 2+1*4, 2+2*4, 2+3*4,
- 3+0*4, 3+1*4, 3+2*4, 3+3*4,
+static const uint8_t zigzag_scan[16] = {
+    0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4,
+    1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
+    1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
+    3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
 };
 
-static const uint8_t luma_dc_zigzag_scan[16]={
- 0*16 + 0*64, 1*16 + 0*64, 2*16 + 0*64, 0*16 + 2*64,
- 3*16 + 0*64, 0*16 + 1*64, 1*16 + 1*64, 2*16 + 1*64,
- 1*16 + 2*64, 2*16 + 2*64, 3*16 + 2*64, 0*16 + 3*64,
- 3*16 + 1*64, 1*16 + 3*64, 2*16 + 3*64, 3*16 + 3*64,
+static const uint8_t field_scan[16] = {
+    0 + 0 * 4, 0 + 1 * 4, 1 + 0 * 4, 0 + 2 * 4,
+    0 + 3 * 4, 1 + 1 * 4, 1 + 2 * 4, 1 + 3 * 4,
+    2 + 0 * 4, 2 + 1 * 4, 2 + 2 * 4, 2 + 3 * 4,
+    3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4,
 };
 
-static const uint8_t luma_dc_field_scan[16]={
- 0*16 + 0*64, 2*16 + 0*64, 1*16 + 0*64, 0*16 + 2*64,
- 2*16 + 2*64, 3*16 + 0*64, 1*16 + 2*64, 3*16 + 2*64,
- 0*16 + 1*64, 2*16 + 1*64, 0*16 + 3*64, 2*16 + 3*64,
- 1*16 + 1*64, 3*16 + 1*64, 1*16 + 3*64, 3*16 + 3*64,
+static const uint8_t luma_dc_zigzag_scan[16] = {
+    0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
+    3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
+    1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
+    3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
 };
 
-static const uint8_t chroma_dc_scan[4]={
- (0+0*2)*16, (1+0*2)*16,
- (0+1*2)*16, (1+1*2)*16,
+static const uint8_t luma_dc_field_scan[16] = {
+    0 * 16 + 0 * 64, 2 * 16 + 0 * 64, 1 * 16 + 0 * 64, 0 * 16 + 2 * 64,
+    2 * 16 + 2 * 64, 3 * 16 + 0 * 64, 1 * 16 + 2 * 64, 3 * 16 + 2 * 64,
+    0 * 16 + 1 * 64, 2 * 16 + 1 * 64, 0 * 16 + 3 * 64, 2 * 16 + 3 * 64,
+    1 * 16 + 1 * 64, 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 3 * 16 + 3 * 64,
 };
 
-static const uint8_t chroma422_dc_scan[8]={
- (0+0*2)*16, (0+1*2)*16,
- (1+0*2)*16, (0+2*2)*16,
- (0+3*2)*16, (1+1*2)*16,
- (1+2*2)*16, (1+3*2)*16,
+static const uint8_t chroma_dc_scan[4] = {
+    (0 + 0 * 2) * 16, (1 + 0 * 2) * 16,
+    (0 + 1 * 2) * 16, (1 + 1 * 2) * 16,
+};
+
+static const uint8_t chroma422_dc_scan[8] = {
+    (0 + 0 * 2) * 16, (0 + 1 * 2) * 16,
+    (1 + 0 * 2) * 16, (0 + 2 * 2) * 16,
+    (0 + 3 * 2) * 16, (1 + 1 * 2) * 16,
+    (1 + 2 * 2) * 16, (1 + 3 * 2) * 16,
 };
 
 // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)]
-static const uint8_t zigzag_scan8x8_cavlc[64]={
- 0+0*8, 1+1*8, 1+2*8, 2+2*8,
- 4+1*8, 0+5*8, 3+3*8, 7+0*8,
- 3+4*8, 1+7*8, 5+3*8, 6+3*8,
- 2+7*8, 6+4*8, 5+6*8, 7+5*8,
- 1+0*8, 2+0*8, 0+3*8, 3+1*8,
- 3+2*8, 0+6*8, 4+2*8, 6+1*8,
- 2+5*8, 2+6*8, 6+2*8, 5+4*8,
- 3+7*8, 7+3*8, 4+7*8, 7+6*8,
- 0+1*8, 3+0*8, 0+4*8, 4+0*8,
- 2+3*8, 1+5*8, 5+1*8, 5+2*8,
- 1+6*8, 3+5*8, 7+1*8, 4+5*8,
- 4+6*8, 7+4*8, 5+7*8, 6+7*8,
- 0+2*8, 2+1*8, 1+3*8, 5+0*8,
- 1+4*8, 2+4*8, 6+0*8, 4+3*8,
- 0+7*8, 4+4*8, 7+2*8, 3+6*8,
- 5+5*8, 6+5*8, 6+6*8, 7+7*8,
+static const uint8_t zigzag_scan8x8_cavlc[64] = {
+    0 + 0 * 8, 1 + 1 * 8, 1 + 2 * 8, 2 + 2 * 8,
+    4 + 1 * 8, 0 + 5 * 8, 3 + 3 * 8, 7 + 0 * 8,
+    3 + 4 * 8, 1 + 7 * 8, 5 + 3 * 8, 6 + 3 * 8,
+    2 + 7 * 8, 6 + 4 * 8, 5 + 6 * 8, 7 + 5 * 8,
+    1 + 0 * 8, 2 + 0 * 8, 0 + 3 * 8, 3 + 1 * 8,
+    3 + 2 * 8, 0 + 6 * 8, 4 + 2 * 8, 6 + 1 * 8,
+    2 + 5 * 8, 2 + 6 * 8, 6 + 2 * 8, 5 + 4 * 8,
+    3 + 7 * 8, 7 + 3 * 8, 4 + 7 * 8, 7 + 6 * 8,
+    0 + 1 * 8, 3 + 0 * 8, 0 + 4 * 8, 4 + 0 * 8,
+    2 + 3 * 8, 1 + 5 * 8, 5 + 1 * 8, 5 + 2 * 8,
+    1 + 6 * 8, 3 + 5 * 8, 7 + 1 * 8, 4 + 5 * 8,
+    4 + 6 * 8, 7 + 4 * 8, 5 + 7 * 8, 6 + 7 * 8,
+    0 + 2 * 8, 2 + 1 * 8, 1 + 3 * 8, 5 + 0 * 8,
+    1 + 4 * 8, 2 + 4 * 8, 6 + 0 * 8, 4 + 3 * 8,
+    0 + 7 * 8, 4 + 4 * 8, 7 + 2 * 8, 3 + 6 * 8,
+    5 + 5 * 8, 6 + 5 * 8, 6 + 6 * 8, 7 + 7 * 8,
 };
 
-static const uint8_t field_scan8x8[64]={
- 0+0*8, 0+1*8, 0+2*8, 1+0*8,
- 1+1*8, 0+3*8, 0+4*8, 1+2*8,
- 2+0*8, 1+3*8, 0+5*8, 0+6*8,
- 0+7*8, 1+4*8, 2+1*8, 3+0*8,
- 2+2*8, 1+5*8, 1+6*8, 1+7*8,
- 2+3*8, 3+1*8, 4+0*8, 3+2*8,
- 2+4*8, 2+5*8, 2+6*8, 2+7*8,
- 3+3*8, 4+1*8, 5+0*8, 4+2*8,
- 3+4*8, 3+5*8, 3+6*8, 3+7*8,
- 4+3*8, 5+1*8, 6+0*8, 5+2*8,
- 4+4*8, 4+5*8, 4+6*8, 4+7*8,
- 5+3*8, 6+1*8, 6+2*8, 5+4*8,
- 5+5*8, 5+6*8, 5+7*8, 6+3*8,
- 7+0*8, 7+1*8, 6+4*8, 6+5*8,
- 6+6*8, 6+7*8, 7+2*8, 7+3*8,
- 7+4*8, 7+5*8, 7+6*8, 7+7*8,
+static const uint8_t field_scan8x8[64] = {
+    0 + 0 * 8, 0 + 1 * 8, 0 + 2 * 8, 1 + 0 * 8,
+    1 + 1 * 8, 0 + 3 * 8, 0 + 4 * 8, 1 + 2 * 8,
+    2 + 0 * 8, 1 + 3 * 8, 0 + 5 * 8, 0 + 6 * 8,
+    0 + 7 * 8, 1 + 4 * 8, 2 + 1 * 8, 3 + 0 * 8,
+    2 + 2 * 8, 1 + 5 * 8, 1 + 6 * 8, 1 + 7 * 8,
+    2 + 3 * 8, 3 + 1 * 8, 4 + 0 * 8, 3 + 2 * 8,
+    2 + 4 * 8, 2 + 5 * 8, 2 + 6 * 8, 2 + 7 * 8,
+    3 + 3 * 8, 4 + 1 * 8, 5 + 0 * 8, 4 + 2 * 8,
+    3 + 4 * 8, 3 + 5 * 8, 3 + 6 * 8, 3 + 7 * 8,
+    4 + 3 * 8, 5 + 1 * 8, 6 + 0 * 8, 5 + 2 * 8,
+    4 + 4 * 8, 4 + 5 * 8, 4 + 6 * 8, 4 + 7 * 8,
+    5 + 3 * 8, 6 + 1 * 8, 6 + 2 * 8, 5 + 4 * 8,
+    5 + 5 * 8, 5 + 6 * 8, 5 + 7 * 8, 6 + 3 * 8,
+    7 + 0 * 8, 7 + 1 * 8, 6 + 4 * 8, 6 + 5 * 8,
+    6 + 6 * 8, 6 + 7 * 8, 7 + 2 * 8, 7 + 3 * 8,
+    7 + 4 * 8, 7 + 5 * 8, 7 + 6 * 8, 7 + 7 * 8,
 };
 
-static const uint8_t field_scan8x8_cavlc[64]={
- 0+0*8, 1+1*8, 2+0*8, 0+7*8,
- 2+2*8, 2+3*8, 2+4*8, 3+3*8,
- 3+4*8, 4+3*8, 4+4*8, 5+3*8,
- 5+5*8, 7+0*8, 6+6*8, 7+4*8,
- 0+1*8, 0+3*8, 1+3*8, 1+4*8,
- 1+5*8, 3+1*8, 2+5*8, 4+1*8,
- 3+5*8, 5+1*8, 4+5*8, 6+1*8,
- 5+6*8, 7+1*8, 6+7*8, 7+5*8,
- 0+2*8, 0+4*8, 0+5*8, 2+1*8,
- 1+6*8, 4+0*8, 2+6*8, 5+0*8,
- 3+6*8, 6+0*8, 4+6*8, 6+2*8,
- 5+7*8, 6+4*8, 7+2*8, 7+6*8,
- 1+0*8, 1+2*8, 0+6*8, 3+0*8,
- 1+7*8, 3+2*8, 2+7*8, 4+2*8,
- 3+7*8, 5+2*8, 4+7*8, 5+4*8,
- 6+3*8, 6+5*8, 7+3*8, 7+7*8,
+static const uint8_t field_scan8x8_cavlc[64] = {
+    0 + 0 * 8, 1 + 1 * 8, 2 + 0 * 8, 0 + 7 * 8,
+    2 + 2 * 8, 2 + 3 * 8, 2 + 4 * 8, 3 + 3 * 8,
+    3 + 4 * 8, 4 + 3 * 8, 4 + 4 * 8, 5 + 3 * 8,
+    5 + 5 * 8, 7 + 0 * 8, 6 + 6 * 8, 7 + 4 * 8,
+    0 + 1 * 8, 0 + 3 * 8, 1 + 3 * 8, 1 + 4 * 8,
+    1 + 5 * 8, 3 + 1 * 8, 2 + 5 * 8, 4 + 1 * 8,
+    3 + 5 * 8, 5 + 1 * 8, 4 + 5 * 8, 6 + 1 * 8,
+    5 + 6 * 8, 7 + 1 * 8, 6 + 7 * 8, 7 + 5 * 8,
+    0 + 2 * 8, 0 + 4 * 8, 0 + 5 * 8, 2 + 1 * 8,
+    1 + 6 * 8, 4 + 0 * 8, 2 + 6 * 8, 5 + 0 * 8,
+    3 + 6 * 8, 6 + 0 * 8, 4 + 6 * 8, 6 + 2 * 8,
+    5 + 7 * 8, 6 + 4 * 8, 7 + 2 * 8, 7 + 6 * 8,
+    1 + 0 * 8, 1 + 2 * 8, 0 + 6 * 8, 3 + 0 * 8,
+    1 + 7 * 8, 3 + 2 * 8, 2 + 7 * 8, 4 + 2 * 8,
+    3 + 7 * 8, 5 + 2 * 8, 4 + 7 * 8, 5 + 4 * 8,
+    6 + 3 * 8, 6 + 5 * 8, 7 + 3 * 8, 7 + 7 * 8,
 };
 
-typedef struct IMbInfo{
+typedef struct IMbInfo {
     uint16_t type;
     uint8_t pred_mode;
     uint8_t cbp;
 } IMbInfo;
 
-static const IMbInfo i_mb_type_info[26]={
-{MB_TYPE_INTRA4x4  , -1, -1},
-{MB_TYPE_INTRA16x16,  2,  0},
-{MB_TYPE_INTRA16x16,  1,  0},
-{MB_TYPE_INTRA16x16,  0,  0},
-{MB_TYPE_INTRA16x16,  3,  0},
-{MB_TYPE_INTRA16x16,  2,  16},
-{MB_TYPE_INTRA16x16,  1,  16},
-{MB_TYPE_INTRA16x16,  0,  16},
-{MB_TYPE_INTRA16x16,  3,  16},
-{MB_TYPE_INTRA16x16,  2,  32},
-{MB_TYPE_INTRA16x16,  1,  32},
-{MB_TYPE_INTRA16x16,  0,  32},
-{MB_TYPE_INTRA16x16,  3,  32},
-{MB_TYPE_INTRA16x16,  2,  15+0},
-{MB_TYPE_INTRA16x16,  1,  15+0},
-{MB_TYPE_INTRA16x16,  0,  15+0},
-{MB_TYPE_INTRA16x16,  3,  15+0},
-{MB_TYPE_INTRA16x16,  2,  15+16},
-{MB_TYPE_INTRA16x16,  1,  15+16},
-{MB_TYPE_INTRA16x16,  0,  15+16},
-{MB_TYPE_INTRA16x16,  3,  15+16},
-{MB_TYPE_INTRA16x16,  2,  15+32},
-{MB_TYPE_INTRA16x16,  1,  15+32},
-{MB_TYPE_INTRA16x16,  0,  15+32},
-{MB_TYPE_INTRA16x16,  3,  15+32},
-{MB_TYPE_INTRA_PCM , -1, -1},
+static const IMbInfo i_mb_type_info[26] = {
+    { MB_TYPE_INTRA4x4,  -1,  -1 },
+    { MB_TYPE_INTRA16x16, 2,   0 },
+    { MB_TYPE_INTRA16x16, 1,   0 },
+    { MB_TYPE_INTRA16x16, 0,   0 },
+    { MB_TYPE_INTRA16x16, 3,   0 },
+    { MB_TYPE_INTRA16x16, 2,  16 },
+    { MB_TYPE_INTRA16x16, 1,  16 },
+    { MB_TYPE_INTRA16x16, 0,  16 },
+    { MB_TYPE_INTRA16x16, 3,  16 },
+    { MB_TYPE_INTRA16x16, 2,  32 },
+    { MB_TYPE_INTRA16x16, 1,  32 },
+    { MB_TYPE_INTRA16x16, 0,  32 },
+    { MB_TYPE_INTRA16x16, 3,  32 },
+    { MB_TYPE_INTRA16x16, 2,  15 +  0 },
+    { MB_TYPE_INTRA16x16, 1,  15 +  0 },
+    { MB_TYPE_INTRA16x16, 0,  15 +  0 },
+    { MB_TYPE_INTRA16x16, 3,  15 +  0 },
+    { MB_TYPE_INTRA16x16, 2,  15 + 16 },
+    { MB_TYPE_INTRA16x16, 1,  15 + 16 },
+    { MB_TYPE_INTRA16x16, 0,  15 + 16 },
+    { MB_TYPE_INTRA16x16, 3,  15 + 16 },
+    { MB_TYPE_INTRA16x16, 2,  15 + 32 },
+    { MB_TYPE_INTRA16x16, 1,  15 + 32 },
+    { MB_TYPE_INTRA16x16, 0,  15 + 32 },
+    { MB_TYPE_INTRA16x16, 3,  15 + 32 },
+    { MB_TYPE_INTRA_PCM,  -1, -1 },
 };
 
-typedef struct PMbInfo{
+typedef struct PMbInfo {
     uint16_t type;
     uint8_t partition_count;
 } PMbInfo;
 
-static const PMbInfo p_mb_type_info[5]={
-{MB_TYPE_16x16|MB_TYPE_P0L0             , 1},
-{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2},
-{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2},
-{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P1L0, 4},
-{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_REF0, 4},
+static const PMbInfo p_mb_type_info[5] = {
+    { MB_TYPE_16x16 | MB_TYPE_P0L0,                               1 },
+    { MB_TYPE_16x8  | MB_TYPE_P0L0 | MB_TYPE_P1L0,                2 },
+    { MB_TYPE_8x16  | MB_TYPE_P0L0 | MB_TYPE_P1L0,                2 },
+    { MB_TYPE_8x8   | MB_TYPE_P0L0 | MB_TYPE_P1L0,                4 },
+    { MB_TYPE_8x8   | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_REF0, 4 },
 };
 
-static const PMbInfo p_sub_mb_type_info[4]={
-{MB_TYPE_16x16|MB_TYPE_P0L0             , 1},
-{MB_TYPE_16x8 |MB_TYPE_P0L0             , 2},
-{MB_TYPE_8x16 |MB_TYPE_P0L0             , 2},
-{MB_TYPE_8x8  |MB_TYPE_P0L0             , 4},
+static const PMbInfo p_sub_mb_type_info[4] = {
+    { MB_TYPE_16x16 | MB_TYPE_P0L0, 1 },
+    { MB_TYPE_16x8  | MB_TYPE_P0L0, 2 },
+    { MB_TYPE_8x16  | MB_TYPE_P0L0, 2 },
+    { MB_TYPE_8x8   | MB_TYPE_P0L0, 4 },
 };
 
-static const PMbInfo b_mb_type_info[23]={
-{MB_TYPE_DIRECT2|MB_TYPE_L0L1                                      , 1, },
-{MB_TYPE_16x16|MB_TYPE_P0L0                                       , 1, },
-{MB_TYPE_16x16             |MB_TYPE_P0L1                          , 1, },
-{MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1                          , 1, },
-{MB_TYPE_16x8 |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 2, },
-{MB_TYPE_8x16 |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 2, },
-{MB_TYPE_16x8              |MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
-{MB_TYPE_8x16              |MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
-{MB_TYPE_16x8 |MB_TYPE_P0L0                          |MB_TYPE_P1L1, 2, },
-{MB_TYPE_8x16 |MB_TYPE_P0L0                          |MB_TYPE_P1L1, 2, },
-{MB_TYPE_16x8              |MB_TYPE_P0L1|MB_TYPE_P1L0             , 2, },
-{MB_TYPE_8x16              |MB_TYPE_P0L1|MB_TYPE_P1L0             , 2, },
-{MB_TYPE_16x8 |MB_TYPE_P0L0             |MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
-{MB_TYPE_8x16 |MB_TYPE_P0L0             |MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
-{MB_TYPE_16x8              |MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
-{MB_TYPE_8x16              |MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
-{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0             , 2, },
-{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0             , 2, },
-{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
-{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
-{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
-{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
-{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, },
+static const PMbInfo b_mb_type_info[23] = {
+    { MB_TYPE_DIRECT2 | MB_TYPE_L0L1,                                              1, },
+    { MB_TYPE_16x16   | MB_TYPE_P0L0,                                              1, },
+    { MB_TYPE_16x16   | MB_TYPE_P0L1,                                              1, },
+    { MB_TYPE_16x16   | MB_TYPE_P0L0 | MB_TYPE_P0L1,                               1, },
+    { MB_TYPE_16x8    | MB_TYPE_P0L0 | MB_TYPE_P1L0,                               2, },
+    { MB_TYPE_8x16    | MB_TYPE_P0L0 | MB_TYPE_P1L0,                               2, },
+    { MB_TYPE_16x8    | MB_TYPE_P0L1 | MB_TYPE_P1L1,                               2, },
+    { MB_TYPE_8x16    | MB_TYPE_P0L1 | MB_TYPE_P1L1,                               2, },
+    { MB_TYPE_16x8    | MB_TYPE_P0L0 | MB_TYPE_P1L1,                               2, },
+    { MB_TYPE_8x16    | MB_TYPE_P0L0 | MB_TYPE_P1L1,                               2, },
+    { MB_TYPE_16x8    | MB_TYPE_P0L1 | MB_TYPE_P1L0,                               2, },
+    { MB_TYPE_8x16    | MB_TYPE_P0L1 | MB_TYPE_P1L0,                               2, },
+    { MB_TYPE_16x8    | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_P1L1,                2, },
+    { MB_TYPE_8x16    | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_P1L1,                2, },
+    { MB_TYPE_16x8    | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1,                2, },
+    { MB_TYPE_8x16    | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1,                2, },
+    { MB_TYPE_16x8    | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0,                2, },
+    { MB_TYPE_8x16    | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0,                2, },
+    { MB_TYPE_16x8    | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L1,                2, },
+    { MB_TYPE_8x16    | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L1,                2, },
+    { MB_TYPE_16x8    | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, },
+    { MB_TYPE_8x16    | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, },
+    { MB_TYPE_8x8     | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, },
 };
 
-static const PMbInfo b_sub_mb_type_info[13]={
-{MB_TYPE_DIRECT2                                                   , 1, },
-{MB_TYPE_16x16|MB_TYPE_P0L0                                       , 1, },
-{MB_TYPE_16x16             |MB_TYPE_P0L1                          , 1, },
-{MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1                          , 1, },
-{MB_TYPE_16x8 |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 2, },
-{MB_TYPE_8x16 |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 2, },
-{MB_TYPE_16x8              |MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
-{MB_TYPE_8x16              |MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
-{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
-{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
-{MB_TYPE_8x8  |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 4, },
-{MB_TYPE_8x8               |MB_TYPE_P0L1             |MB_TYPE_P1L1, 4, },
-{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, },
+static const PMbInfo b_sub_mb_type_info[13] = {
+    { MB_TYPE_DIRECT2,                                                           1, },
+    { MB_TYPE_16x16 | MB_TYPE_P0L0,                                              1, },
+    { MB_TYPE_16x16 | MB_TYPE_P0L1,                                              1, },
+    { MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1,                               1, },
+    { MB_TYPE_16x8  | MB_TYPE_P0L0 | MB_TYPE_P1L0,                               2, },
+    { MB_TYPE_8x16  | MB_TYPE_P0L0 | MB_TYPE_P1L0,                               2, },
+    { MB_TYPE_16x8  | MB_TYPE_P0L1 | MB_TYPE_P1L1,                               2, },
+    { MB_TYPE_8x16  | MB_TYPE_P0L1 | MB_TYPE_P1L1,                               2, },
+    { MB_TYPE_16x8  | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, },
+    { MB_TYPE_8x16  | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, },
+    { MB_TYPE_8x8   | MB_TYPE_P0L0 | MB_TYPE_P1L0,                               4, },
+    { MB_TYPE_8x8   | MB_TYPE_P0L1 | MB_TYPE_P1L1,                               4, },
+    { MB_TYPE_8x8   | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, },
 };
 
-static const uint8_t dequant4_coeff_init[6][3]={
-  {10,13,16},
-  {11,14,18},
-  {13,16,20},
-  {14,18,23},
-  {16,20,25},
-  {18,23,29},
+static const uint8_t dequant4_coeff_init[6][3] = {
+    { 10, 13, 16 },
+    { 11, 14, 18 },
+    { 13, 16, 20 },
+    { 14, 18, 23 },
+    { 16, 20, 25 },
+    { 18, 23, 29 },
 };
 
 static const uint8_t dequant8_coeff_init_scan[16] = {
-  0,3,4,3, 3,1,5,1, 4,5,2,5, 3,1,5,1
+    0, 3, 4, 3, 3, 1, 5, 1, 4, 5, 2, 5, 3, 1, 5, 1
 };
-static const uint8_t dequant8_coeff_init[6][6]={
-  {20,18,32,19,25,24},
-  {22,19,35,21,28,26},
-  {26,23,42,24,33,31},
-  {28,25,45,26,35,33},
-  {32,28,51,30,40,38},
-  {36,32,58,34,46,43},
+
+static const uint8_t dequant8_coeff_init[6][6] = {
+    { 20, 18, 32, 19, 25, 24 },
+    { 22, 19, 35, 21, 28, 26 },
+    { 26, 23, 42, 24, 33, 31 },
+    { 28, 25, 45, 26, 35, 33 },
+    { 32, 28, 51, 30, 40, 38 },
+    { 36, 32, 58, 34, 46, 43 },
 };
 
 #endif /* AVCODEC_H264DATA_H */
diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h
index b880446121..a964ae394b 100644
--- a/libavcodec/h264pred.h
+++ b/libavcodec/h264pred.h
@@ -35,18 +35,18 @@
  * Prediction types
  */
 //@{
-#define VERT_PRED             0
-#define HOR_PRED              1
-#define DC_PRED               2
-#define DIAG_DOWN_LEFT_PRED   3
-#define DIAG_DOWN_RIGHT_PRED  4
-#define VERT_RIGHT_PRED       5
-#define HOR_DOWN_PRED         6
-#define VERT_LEFT_PRED        7
-#define HOR_UP_PRED           8
+#define VERT_PRED              0
+#define HOR_PRED               1
+#define DC_PRED                2
+#define DIAG_DOWN_LEFT_PRED    3
+#define DIAG_DOWN_RIGHT_PRED   4
+#define VERT_RIGHT_PRED        5
+#define HOR_DOWN_PRED          6
+#define VERT_LEFT_PRED         7
+#define HOR_UP_PRED            8
 
 // DC edge (not for VP8)
-#define LEFT_DC_PRED          9
+#define LEFT_DC_PRED           9
 #define TOP_DC_PRED           10
 #define DC_128_PRED           11
 
@@ -56,7 +56,7 @@
 #define VERT_LEFT_PRED_RV40_NODOWN        14
 
 // VP8 specific
-#define TM_VP8_PRED           9     ///< "True Motion", used instead of plane
+#define TM_VP8_PRED            9    ///< "True Motion", used instead of plane
 #define VERT_VP8_PRED         10    ///< for VP8, #VERT_PRED is the average of
                                     ///< (left col+cur col x2+right col) / 4;
                                     ///< this is the "unaveraged" one
@@ -65,44 +65,53 @@
 #define DC_127_PRED           12
 #define DC_129_PRED           13
 
-#define DC_PRED8x8            0
-#define HOR_PRED8x8           1
-#define VERT_PRED8x8          2
-#define PLANE_PRED8x8         3
+#define DC_PRED8x8             0
+#define HOR_PRED8x8            1
+#define VERT_PRED8x8           2
+#define PLANE_PRED8x8          3
 
 // DC edge
-#define LEFT_DC_PRED8x8       4
-#define TOP_DC_PRED8x8        5
-#define DC_128_PRED8x8        6
+#define LEFT_DC_PRED8x8        4
+#define TOP_DC_PRED8x8         5
+#define DC_128_PRED8x8         6
 
 // H264/SVQ3 (8x8) specific
-#define ALZHEIMER_DC_L0T_PRED8x8 7
-#define ALZHEIMER_DC_0LT_PRED8x8 8
-#define ALZHEIMER_DC_L00_PRED8x8 9
+#define ALZHEIMER_DC_L0T_PRED8x8  7
+#define ALZHEIMER_DC_0LT_PRED8x8  8
+#define ALZHEIMER_DC_L00_PRED8x8  9
 #define ALZHEIMER_DC_0L0_PRED8x8 10
 
 // VP8 specific
-#define DC_127_PRED8x8        7
-#define DC_129_PRED8x8        8
+#define DC_127_PRED8x8         7
+#define DC_129_PRED8x8         8
 //@}
 
 /**
  * Context for storing H.264 prediction functions
  */
-typedef struct H264PredContext{
-    void (*pred4x4  [9+3+3])(uint8_t *src, const uint8_t *topright, int stride);//FIXME move to dsp?
-    void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
-    void (*pred8x8  [4+3+4])(uint8_t *src, int stride);
-    void (*pred16x16[4+3+2])(uint8_t *src, int stride);
+typedef struct H264PredContext {
+    void(*pred4x4[9 + 3 + 3])(uint8_t *src, const uint8_t *topright, int stride); //FIXME move to dsp?
+    void(*pred8x8l[9 + 3])(uint8_t *src, int topleft, int topright, int stride);
+    void(*pred8x8[4 + 3 + 4])(uint8_t *src, int stride);
+    void(*pred16x16[4 + 3 + 2])(uint8_t *src, int stride);
 
-    void (*pred4x4_add  [2])(uint8_t *pix/*align  4*/, const DCTELEM *block/*align 16*/, int stride);
-    void (*pred8x8l_add [2])(uint8_t *pix/*align  8*/, const DCTELEM *block/*align 16*/, int stride);
-    void (*pred8x8_add  [3])(uint8_t *pix/*align  8*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride);
-    void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride);
-}H264PredContext;
+    void(*pred4x4_add[2])(uint8_t *pix /*align  4*/,
+                          const DCTELEM *block /*align 16*/, int stride);
+    void(*pred8x8l_add[2])(uint8_t *pix /*align  8*/,
+                           const DCTELEM *block /*align 16*/, int stride);
+    void(*pred8x8_add[3])(uint8_t *pix /*align  8*/,
+                          const int *block_offset,
+                          const DCTELEM *block /*align 16*/, int stride);
+    void(*pred16x16_add[3])(uint8_t *pix /*align 16*/,
+                            const int *block_offset,
+                            const DCTELEM *block /*align 16*/, int stride);
+} H264PredContext;
 
-void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
-void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
+void ff_h264_pred_init(H264PredContext *h, int codec_id,
+                       const int bit_depth, const int chroma_format_idc);
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id,
+                           const int bit_depth, const int chroma_format_idc);
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
+                           const int bit_depth, const int chroma_format_idc);
 
 #endif /* AVCODEC_H264PRED_H */

From 1de53d006b754c8ecab2f31a223acfaea15924f4 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Mon, 7 May 2012 14:13:57 +0200
Subject: [PATCH 22/25] h264: K&R formatting cosmetics for header files (part
 II/II)

---
 libavcodec/h264_mvpred.h | 995 ++++++++++++++++++++-------------------
 libavcodec/h264dsp.h     |  96 ++--
 2 files changed, 584 insertions(+), 507 deletions(-)

diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h
index 2bd4458f0b..5244c290f2 100644
--- a/libavcodec/h264_mvpred.h
+++ b/libavcodec/h264_mvpred.h
@@ -35,53 +35,53 @@
 //#undef NDEBUG
 #include <assert.h>
 
-static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
-    const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
-    MpegEncContext *s = &h->s;
+static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C,
+                                              int i, int list, int part_width)
+{
+    const int topright_ref = h->ref_cache[list][i - 8 + part_width];
+    MpegEncContext *s      = &h->s;
 
     /* there is no consistent mapping of mvs to neighboring locations that will
      * make mbaff happy, so we can't move all this logic to fill_caches */
-    if(FRAME_MBAFF){
+    if (FRAME_MBAFF) {
+#define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4)                              \
+        const int xy = XY, y4 = Y4;                                     \
+        const int mb_type = mb_types[xy + (y4 >> 2) * s->mb_stride];    \
+        if (!USES_LIST(mb_type, list))                                  \
+            return LIST_NOT_USED;                                       \
+        mv = s->current_picture_ptr->f.motion_val[list][h->mb2b_xy[xy] + 3 + y4 * h->b_stride]; \
+        h->mv_cache[list][scan8[0] - 2][0] = mv[0];                     \
+        h->mv_cache[list][scan8[0] - 2][1] = mv[1] MV_OP;               \
+        return s->current_picture_ptr->f.ref_index[list][4 * xy + 1 + (y4 & ~1)] REF_OP;
 
-#define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4)\
-                const int xy = XY, y4 = Y4;\
-                const int mb_type = mb_types[xy+(y4>>2)*s->mb_stride];\
-                if(!USES_LIST(mb_type,list))\
-                    return LIST_NOT_USED;\
-                mv = s->current_picture_ptr->f.motion_val[list][h->mb2b_xy[xy] + 3 + y4*h->b_stride];\
-                h->mv_cache[list][scan8[0]-2][0] = mv[0];\
-                h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
-                return s->current_picture_ptr->f.ref_index[list][4*xy + 1 + (y4 & ~1)] REF_OP;
-
-        if(topright_ref == PART_NOT_AVAILABLE
-           && i >= scan8[0]+8 && (i&7)==4
-           && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
+        if (topright_ref == PART_NOT_AVAILABLE
+            && i >= scan8[0] + 8 && (i & 7) == 4
+            && h->ref_cache[list][scan8[0] - 1] != PART_NOT_AVAILABLE) {
             const uint32_t *mb_types = s->current_picture_ptr->f.mb_type;
             const int16_t *mv;
-            AV_ZERO32(h->mv_cache[list][scan8[0]-2]);
-            *C = h->mv_cache[list][scan8[0]-2];
+            AV_ZERO32(h->mv_cache[list][scan8[0] - 2]);
+            *C = h->mv_cache[list][scan8[0] - 2];
 
-            if(!MB_FIELD
-               && IS_INTERLACED(h->left_type[0])){
-                SET_DIAG_MV(*2, >>1, h->left_mb_xy[0]+s->mb_stride, (s->mb_y&1)*2+(i>>5));
+            if (!MB_FIELD && IS_INTERLACED(h->left_type[0])) {
+                SET_DIAG_MV(* 2, >> 1, h->left_mb_xy[0] + s->mb_stride,
+                            (s->mb_y & 1) * 2 + (i >> 5));
             }
-            if(MB_FIELD
-               && !IS_INTERLACED(h->left_type[0])){
+            if (MB_FIELD && !IS_INTERLACED(h->left_type[0])) {
                 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
-                SET_DIAG_MV(/2, <<1, h->left_mb_xy[i>=36], ((i>>2))&3);
+                SET_DIAG_MV(/ 2, << 1, h->left_mb_xy[i >= 36], ((i >> 2)) & 3);
             }
         }
 #undef SET_DIAG_MV
     }
 
-    if(topright_ref != PART_NOT_AVAILABLE){
-        *C= h->mv_cache[list][ i - 8 + part_width ];
+    if (topright_ref != PART_NOT_AVAILABLE) {
+        *C = h->mv_cache[list][i - 8 + part_width];
         return topright_ref;
-    }else{
+    } else {
         tprintf(s->avctx, "topright MV not available\n");
 
-        *C= h->mv_cache[list][ i - 8 - 1 ];
-        return h->ref_cache[list][ i - 8 - 1 ];
+        *C = h->mv_cache[list][i - 8 - 1];
+        return h->ref_cache[list][i - 8 - 1];
     }
 }
 
@@ -92,53 +92,61 @@ static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C,
  * @param mx the x component of the predicted motion vector
  * @param my the y component of the predicted motion vector
  */
-static av_always_inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
-    const int index8= scan8[n];
-    const int top_ref=      h->ref_cache[list][ index8 - 8 ];
-    const int left_ref=     h->ref_cache[list][ index8 - 1 ];
-    const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
-    const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
-    const int16_t * C;
+static av_always_inline void pred_motion(H264Context *const h, int n,
+                                         int part_width, int list, int ref,
+                                         int *const mx, int *const my)
+{
+    const int index8       = scan8[n];
+    const int top_ref      = h->ref_cache[list][index8 - 8];
+    const int left_ref     = h->ref_cache[list][index8 - 1];
+    const int16_t *const A = h->mv_cache[list][index8 - 1];
+    const int16_t *const B = h->mv_cache[list][index8 - 8];
+    const int16_t *C;
     int diagonal_ref, match_count;
 
-    assert(part_width==1 || part_width==2 || part_width==4);
+    assert(part_width == 1 || part_width == 2 || part_width == 4);
 
 /* mv_cache
-  B . . A T T T T
-  U . . L . . , .
-  U . . L . . . .
-  U . . L . . , .
-  . . . L . . . .
-*/
+ * B . . A T T T T
+ * U . . L . . , .
+ * U . . L . . . .
+ * U . . L . . , .
+ * . . . L . . . .
+ */
 
-    diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
-    match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
+    diagonal_ref = fetch_diagonal_mv(h, &C, index8, list, part_width);
+    match_count  = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref);
     tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
-    if(match_count > 1){ //most common
-        *mx= mid_pred(A[0], B[0], C[0]);
-        *my= mid_pred(A[1], B[1], C[1]);
-    }else if(match_count==1){
-        if(left_ref==ref){
-            *mx= A[0];
-            *my= A[1];
-        }else if(top_ref==ref){
-            *mx= B[0];
-            *my= B[1];
-        }else{
-            *mx= C[0];
-            *my= C[1];
+    if (match_count > 1) { //most common
+        *mx = mid_pred(A[0], B[0], C[0]);
+        *my = mid_pred(A[1], B[1], C[1]);
+    } else if (match_count == 1) {
+        if (left_ref == ref) {
+            *mx = A[0];
+            *my = A[1];
+        } else if (top_ref == ref) {
+            *mx = B[0];
+            *my = B[1];
+        } else {
+            *mx = C[0];
+            *my = C[1];
         }
-    }else{
-        if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
-            *mx= A[0];
-            *my= A[1];
-        }else{
-            *mx= mid_pred(A[0], B[0], C[0]);
-            *my= mid_pred(A[1], B[1], C[1]);
+    } else {
+        if (top_ref      == PART_NOT_AVAILABLE &&
+            diagonal_ref == PART_NOT_AVAILABLE &&
+            left_ref     != PART_NOT_AVAILABLE) {
+            *mx = A[0];
+            *my = A[1];
+        } else {
+            *mx = mid_pred(A[0], B[0], C[0]);
+            *my = mid_pred(A[1], B[1], C[1]);
         }
     }
 
-    tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
+    tprintf(h->s.avctx,
+            "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n",
+            top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref,
+            A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
 }
 
 /**
@@ -147,27 +155,32 @@ static av_always_inline void pred_motion(H264Context * const h, int n, int part_
  * @param mx the x component of the predicted motion vector
  * @param my the y component of the predicted motion vector
  */
-static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
-    if(n==0){
-        const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
-        const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
+static av_always_inline void pred_16x8_motion(H264Context *const h,
+                                              int n, int list, int ref,
+                                              int *const mx, int *const my)
+{
+    if (n == 0) {
+        const int top_ref      = h->ref_cache[list][scan8[0] - 8];
+        const int16_t *const B = h->mv_cache[list][scan8[0] - 8];
 
-        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
+        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n",
+                top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
 
-        if(top_ref == ref){
-            *mx= B[0];
-            *my= B[1];
+        if (top_ref == ref) {
+            *mx = B[0];
+            *my = B[1];
             return;
         }
-    }else{
-        const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
-        const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
+    } else {
+        const int left_ref     = h->ref_cache[list][scan8[8] - 1];
+        const int16_t *const A = h->mv_cache[list][scan8[8] - 1];
 
-        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
+        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n",
+                left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
 
-        if(left_ref == ref){
-            *mx= A[0];
-            *my= A[1];
+        if (left_ref == ref) {
+            *mx = A[0];
+            *my = A[1];
             return;
         }
     }
@@ -182,29 +195,34 @@ static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int
  * @param mx the x component of the predicted motion vector
  * @param my the y component of the predicted motion vector
  */
-static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
-    if(n==0){
-        const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
-        const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
+static av_always_inline void pred_8x16_motion(H264Context *const h,
+                                              int n, int list, int ref,
+                                              int *const mx, int *const my)
+{
+    if (n == 0) {
+        const int left_ref     = h->ref_cache[list][scan8[0] - 1];
+        const int16_t *const A = h->mv_cache[list][scan8[0] - 1];
 
-        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
+        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n",
+                left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
 
-        if(left_ref == ref){
-            *mx= A[0];
-            *my= A[1];
+        if (left_ref == ref) {
+            *mx = A[0];
+            *my = A[1];
             return;
         }
-    }else{
-        const int16_t * C;
+    } else {
+        const int16_t *C;
         int diagonal_ref;
 
-        diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
+        diagonal_ref = fetch_diagonal_mv(h, &C, scan8[4], list, 2);
 
-        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
+        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n",
+                diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
 
-        if(diagonal_ref == ref){
-            *mx= C[0];
-            *my= C[1];
+        if (diagonal_ref == ref) {
+            *mx = C[0];
+            *my = C[1];
             return;
         }
     }
@@ -213,168 +231,174 @@ static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int
     pred_motion(h, n, 2, list, ref, mx, my);
 }
 
-#define FIX_MV_MBAFF(type, refn, mvn, idx)\
-    if(FRAME_MBAFF){\
-        if(MB_FIELD){\
-            if(!IS_INTERLACED(type)){\
-                refn <<= 1;\
-                AV_COPY32(mvbuf[idx], mvn);\
-                mvbuf[idx][1] /= 2;\
-                mvn = mvbuf[idx];\
-            }\
-        }else{\
-            if(IS_INTERLACED(type)){\
-                refn >>= 1;\
-                AV_COPY32(mvbuf[idx], mvn);\
-                mvbuf[idx][1] <<= 1;\
-                mvn = mvbuf[idx];\
-            }\
-        }\
+#define FIX_MV_MBAFF(type, refn, mvn, idx)      \
+    if (FRAME_MBAFF) {                          \
+        if (MB_FIELD) {                         \
+            if (!IS_INTERLACED(type)) {         \
+                refn <<= 1;                     \
+                AV_COPY32(mvbuf[idx], mvn);     \
+                mvbuf[idx][1] /= 2;             \
+                mvn = mvbuf[idx];               \
+            }                                   \
+        } else {                                \
+            if (IS_INTERLACED(type)) {          \
+                refn >>= 1;                     \
+                AV_COPY32(mvbuf[idx], mvn);     \
+                mvbuf[idx][1] <<= 1;            \
+                mvn = mvbuf[idx];               \
+            }                                   \
+        }                                       \
     }
 
-static av_always_inline void pred_pskip_motion(H264Context * const h){
-    DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = {0};
+static av_always_inline void pred_pskip_motion(H264Context *const h)
+{
+    DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = { 0 };
     DECLARE_ALIGNED(4, int16_t, mvbuf)[3][2];
-    MpegEncContext * const s = &h->s;
-    int8_t *ref      = s->current_picture.f.ref_index[0];
-    int16_t (*mv)[2] = s->current_picture.f.motion_val[0];
+    MpegEncContext *const s = &h->s;
+    int8_t *ref     = s->current_picture.f.ref_index[0];
+    int16_t(*mv)[2] = s->current_picture.f.motion_val[0];
     int top_ref, left_ref, diagonal_ref, match_count, mx, my;
     const int16_t *A, *B, *C;
     int b_stride = h->b_stride;
 
     fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
 
-    /* To avoid doing an entire fill_decode_caches, we inline the relevant parts here.
-     * FIXME: this is a partial duplicate of the logic in fill_decode_caches, but it's
-     * faster this way.  Is there a way to avoid this duplication?
+    /* To avoid doing an entire fill_decode_caches, we inline the relevant
+     * parts here.
+     * FIXME: this is a partial duplicate of the logic in fill_decode_caches,
+     * but it's faster this way.  Is there a way to avoid this duplication?
      */
-    if(USES_LIST(h->left_type[LTOP], 0)){
-        left_ref = ref[4*h->left_mb_xy[LTOP] + 1 + (h->left_block[0]&~1)];
-        A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride*h->left_block[0]];
+    if (USES_LIST(h->left_type[LTOP], 0)) {
+        left_ref = ref[4 * h->left_mb_xy[LTOP] + 1 + (h->left_block[0] & ~1)];
+        A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride * h->left_block[0]];
         FIX_MV_MBAFF(h->left_type[LTOP], left_ref, A, 0);
-        if(!(left_ref | AV_RN32A(A))){
+        if (!(left_ref | AV_RN32A(A)))
             goto zeromv;
-        }
-    }else if(h->left_type[LTOP]){
+    } else if (h->left_type[LTOP]) {
         left_ref = LIST_NOT_USED;
-        A = zeromv;
-    }else{
+        A        = zeromv;
+    } else {
         goto zeromv;
     }
 
-    if(USES_LIST(h->top_type, 0)){
-        top_ref = ref[4*h->top_mb_xy + 2];
-        B = mv[h->mb2b_xy[h->top_mb_xy] + 3*b_stride];
+    if (USES_LIST(h->top_type, 0)) {
+        top_ref = ref[4 * h->top_mb_xy + 2];
+        B       = mv[h->mb2b_xy[h->top_mb_xy] + 3 * b_stride];
         FIX_MV_MBAFF(h->top_type, top_ref, B, 1);
-        if(!(top_ref | AV_RN32A(B))){
+        if (!(top_ref | AV_RN32A(B)))
             goto zeromv;
-        }
-    }else if(h->top_type){
+    } else if (h->top_type) {
         top_ref = LIST_NOT_USED;
-        B = zeromv;
-    }else{
+        B       = zeromv;
+    } else {
         goto zeromv;
     }
 
-    tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
+    tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n",
+            top_ref, left_ref, h->s.mb_x, h->s.mb_y);
 
-    if(USES_LIST(h->topright_type, 0)){
-        diagonal_ref = ref[4*h->topright_mb_xy + 2];
-        C = mv[h->mb2b_xy[h->topright_mb_xy] + 3*b_stride];
+    if (USES_LIST(h->topright_type, 0)) {
+        diagonal_ref = ref[4 * h->topright_mb_xy + 2];
+        C = mv[h->mb2b_xy[h->topright_mb_xy] + 3 * b_stride];
         FIX_MV_MBAFF(h->topright_type, diagonal_ref, C, 2);
-    }else if(h->topright_type){
+    } else if (h->topright_type) {
         diagonal_ref = LIST_NOT_USED;
         C = zeromv;
-    }else{
-        if(USES_LIST(h->topleft_type, 0)){
-            diagonal_ref = ref[4*h->topleft_mb_xy + 1 + (h->topleft_partition & 2)];
-            C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride)];
+    } else {
+        if (USES_LIST(h->topleft_type, 0)) {
+            diagonal_ref = ref[4 * h->topleft_mb_xy + 1 +
+                               (h->topleft_partition & 2)];
+            C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride +
+                   (h->topleft_partition & 2 * b_stride)];
             FIX_MV_MBAFF(h->topleft_type, diagonal_ref, C, 2);
-        }else if(h->topleft_type){
+        } else if (h->topleft_type) {
             diagonal_ref = LIST_NOT_USED;
-            C = zeromv;
-        }else{
+            C            = zeromv;
+        } else {
             diagonal_ref = PART_NOT_AVAILABLE;
-            C = zeromv;
+            C            = zeromv;
         }
     }
 
-    match_count= !diagonal_ref + !top_ref + !left_ref;
+    match_count = !diagonal_ref + !top_ref + !left_ref;
     tprintf(h->s.avctx, "pred_pskip_motion match_count=%d\n", match_count);
-    if(match_count > 1){
+    if (match_count > 1) {
         mx = mid_pred(A[0], B[0], C[0]);
         my = mid_pred(A[1], B[1], C[1]);
-    }else if(match_count==1){
-        if(!left_ref){
+    } else if (match_count == 1) {
+        if (!left_ref) {
             mx = A[0];
             my = A[1];
-        }else if(!top_ref){
+        } else if (!top_ref) {
             mx = B[0];
             my = B[1];
-        }else{
+        } else {
             mx = C[0];
             my = C[1];
         }
-    }else{
+    } else {
         mx = mid_pred(A[0], B[0], C[0]);
         my = mid_pred(A[1], B[1], C[1]);
     }
 
-    fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
+    fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx, my), 4);
     return;
+
 zeromv:
-    fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
+    fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
     return;
 }
 
-static void fill_decode_neighbors(H264Context *h, int mb_type){
-    MpegEncContext * const s = &h->s;
-    const int mb_xy= h->mb_xy;
+static void fill_decode_neighbors(H264Context *h, int mb_type)
+{
+    MpegEncContext *const s = &h->s;
+    const int mb_xy = h->mb_xy;
     int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS];
-    static const uint8_t left_block_options[4][32]={
-        {0,1,2,3,7,10,8,11,3+0*4, 3+1*4, 3+2*4, 3+3*4, 1+4*4, 1+8*4, 1+5*4, 1+9*4},
-        {2,2,3,3,8,11,8,11,3+2*4, 3+2*4, 3+3*4, 3+3*4, 1+5*4, 1+9*4, 1+5*4, 1+9*4},
-        {0,0,1,1,7,10,7,10,3+0*4, 3+0*4, 3+1*4, 3+1*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4},
-        {0,2,0,2,7,10,7,10,3+0*4, 3+2*4, 3+0*4, 3+2*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4}
+    static const uint8_t left_block_options[4][32] = {
+        { 0, 1, 2, 3, 7, 10, 8, 11, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 5 * 4, 1 + 9 * 4 },
+        { 2, 2, 3, 3, 8, 11, 8, 11, 3 + 2 * 4, 3 + 2 * 4, 3 + 3 * 4, 3 + 3 * 4, 1 + 5 * 4, 1 + 9 * 4, 1 + 5 * 4, 1 + 9 * 4 },
+        { 0, 0, 1, 1, 7, 10, 7, 10, 3 + 0 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 1 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 },
+        { 0, 2, 0, 2, 7, 10, 7, 10, 3 + 0 * 4, 3 + 2 * 4, 3 + 0 * 4, 3 + 2 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 }
     };
 
-    h->topleft_partition= -1;
+    h->topleft_partition = -1;
 
-    top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
+    top_xy = mb_xy - (s->mb_stride << MB_FIELD);
 
     /* Wow, what a mess, why didn't they simplify the interlacing & intra
      * stuff, I can't imagine that these complex rules are worth it. */
 
-    topleft_xy = top_xy - 1;
-    topright_xy= top_xy + 1;
-    left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
+    topleft_xy    = top_xy - 1;
+    topright_xy   = top_xy + 1;
+    left_xy[LBOT] = left_xy[LTOP] = mb_xy - 1;
     h->left_block = left_block_options[0];
-    if(FRAME_MBAFF){
+    if (FRAME_MBAFF) {
         const int left_mb_field_flag = IS_INTERLACED(s->current_picture.f.mb_type[mb_xy - 1]);
         const int curr_mb_field_flag = IS_INTERLACED(mb_type);
-        if(s->mb_y&1){
+        if (s->mb_y & 1) {
             if (left_mb_field_flag != curr_mb_field_flag) {
                 left_xy[LBOT] = left_xy[LTOP] = mb_xy - s->mb_stride - 1;
                 if (curr_mb_field_flag) {
                     left_xy[LBOT] += s->mb_stride;
-                    h->left_block = left_block_options[3];
+                    h->left_block  = left_block_options[3];
                 } else {
                     topleft_xy += s->mb_stride;
-                    // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
+                    /* take top left mv from the middle of the mb, as opposed
+                     * to all other modes which use the bottom right partition */
                     h->topleft_partition = 0;
-                    h->left_block = left_block_options[1];
+                    h->left_block        = left_block_options[1];
                 }
             }
-        }else{
-            if(curr_mb_field_flag){
+        } else {
+            if (curr_mb_field_flag) {
                 topleft_xy  += s->mb_stride & (((s->current_picture.f.mb_type[top_xy - 1] >> 7) & 1) - 1);
                 topright_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy + 1] >> 7) & 1) - 1);
-                top_xy      += s->mb_stride & (((s->current_picture.f.mb_type[top_xy    ] >> 7) & 1) - 1);
+                top_xy      += s->mb_stride & (((s->current_picture.f.mb_type[top_xy]     >> 7) & 1) - 1);
             }
             if (left_mb_field_flag != curr_mb_field_flag) {
                 if (curr_mb_field_flag) {
                     left_xy[LBOT] += s->mb_stride;
-                    h->left_block = left_block_options[3];
+                    h->left_block  = left_block_options[3];
                 } else {
                     h->left_block = left_block_options[2];
                 }
@@ -382,9 +406,9 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
         }
     }
 
-    h->topleft_mb_xy = topleft_xy;
-    h->top_mb_xy     = top_xy;
-    h->topright_mb_xy= topright_xy;
+    h->topleft_mb_xy    = topleft_xy;
+    h->top_mb_xy        = top_xy;
+    h->topright_mb_xy   = topright_xy;
     h->left_mb_xy[LTOP] = left_xy[LTOP];
     h->left_mb_xy[LBOT] = left_xy[LBOT];
     //FIXME do we need all in the context?
@@ -395,351 +419,372 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
     h->left_type[LTOP] = s->current_picture.f.mb_type[left_xy[LTOP]];
     h->left_type[LBOT] = s->current_picture.f.mb_type[left_xy[LBOT]];
 
-    if(FMO){
-    if(h->slice_table[topleft_xy    ] != h->slice_num) h->topleft_type = 0;
-    if(h->slice_table[top_xy        ] != h->slice_num) h->top_type     = 0;
-    if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0;
-    }else{
-        if(h->slice_table[topleft_xy ] != h->slice_num){
+    if (FMO) {
+        if (h->slice_table[topleft_xy] != h->slice_num)
             h->topleft_type = 0;
-            if(h->slice_table[top_xy        ] != h->slice_num) h->top_type     = 0;
-            if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0;
+        if (h->slice_table[top_xy] != h->slice_num)
+            h->top_type = 0;
+        if (h->slice_table[left_xy[LTOP]] != h->slice_num)
+            h->left_type[LTOP] = h->left_type[LBOT] = 0;
+    } else {
+        if (h->slice_table[topleft_xy] != h->slice_num) {
+            h->topleft_type = 0;
+            if (h->slice_table[top_xy] != h->slice_num)
+                h->top_type = 0;
+            if (h->slice_table[left_xy[LTOP]] != h->slice_num)
+                h->left_type[LTOP] = h->left_type[LBOT] = 0;
         }
     }
-    if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0;
+    if (h->slice_table[topright_xy] != h->slice_num)
+        h->topright_type = 0;
 }
 
-static void fill_decode_caches(H264Context *h, int mb_type){
-    MpegEncContext * const s = &h->s;
+static void fill_decode_caches(H264Context *h, int mb_type)
+{
+    MpegEncContext *const s = &h->s;
     int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS];
     int topleft_type, top_type, topright_type, left_type[LEFT_MBS];
-    const uint8_t * left_block= h->left_block;
+    const uint8_t *left_block = h->left_block;
     int i;
     uint8_t *nnz;
     uint8_t *nnz_cache;
 
-    topleft_xy     = h->topleft_mb_xy;
-    top_xy         = h->top_mb_xy;
-    topright_xy    = h->topright_mb_xy;
-    left_xy[LTOP]  = h->left_mb_xy[LTOP];
-    left_xy[LBOT]  = h->left_mb_xy[LBOT];
-    topleft_type   = h->topleft_type;
-    top_type       = h->top_type;
-    topright_type  = h->topright_type;
-    left_type[LTOP]= h->left_type[LTOP];
-    left_type[LBOT]= h->left_type[LBOT];
+    topleft_xy      = h->topleft_mb_xy;
+    top_xy          = h->top_mb_xy;
+    topright_xy     = h->topright_mb_xy;
+    left_xy[LTOP]   = h->left_mb_xy[LTOP];
+    left_xy[LBOT]   = h->left_mb_xy[LBOT];
+    topleft_type    = h->topleft_type;
+    top_type        = h->top_type;
+    topright_type   = h->topright_type;
+    left_type[LTOP] = h->left_type[LTOP];
+    left_type[LBOT] = h->left_type[LBOT];
 
-    if(!IS_SKIP(mb_type)){
-        if(IS_INTRA(mb_type)){
-            int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
-            h->topleft_samples_available=
-            h->top_samples_available=
-            h->left_samples_available= 0xFFFF;
-            h->topright_samples_available= 0xEEEA;
+    if (!IS_SKIP(mb_type)) {
+        if (IS_INTRA(mb_type)) {
+            int type_mask = h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
+            h->topleft_samples_available      =
+                h->top_samples_available      =
+                    h->left_samples_available = 0xFFFF;
+            h->topright_samples_available     = 0xEEEA;
 
-            if(!(top_type & type_mask)){
-                h->topleft_samples_available= 0xB3FF;
-                h->top_samples_available= 0x33FF;
-                h->topright_samples_available= 0x26EA;
+            if (!(top_type & type_mask)) {
+                h->topleft_samples_available  = 0xB3FF;
+                h->top_samples_available      = 0x33FF;
+                h->topright_samples_available = 0x26EA;
             }
-            if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])){
-                if(IS_INTERLACED(mb_type)){
-                    if(!(left_type[LTOP] & type_mask)){
-                        h->topleft_samples_available&= 0xDFFF;
-                        h->left_samples_available&= 0x5FFF;
+            if (IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])) {
+                if (IS_INTERLACED(mb_type)) {
+                    if (!(left_type[LTOP] & type_mask)) {
+                        h->topleft_samples_available &= 0xDFFF;
+                        h->left_samples_available    &= 0x5FFF;
                     }
-                    if(!(left_type[LBOT] & type_mask)){
-                        h->topleft_samples_available&= 0xFF5F;
-                        h->left_samples_available&= 0xFF5F;
+                    if (!(left_type[LBOT] & type_mask)) {
+                        h->topleft_samples_available &= 0xFF5F;
+                        h->left_samples_available    &= 0xFF5F;
                     }
-                }else{
+                } else {
                     int left_typei = s->current_picture.f.mb_type[left_xy[LTOP] + s->mb_stride];
 
                     assert(left_xy[LTOP] == left_xy[LBOT]);
-                    if(!((left_typei & type_mask) && (left_type[LTOP] & type_mask))){
-                        h->topleft_samples_available&= 0xDF5F;
-                        h->left_samples_available&= 0x5F5F;
+                    if (!((left_typei & type_mask) && (left_type[LTOP] & type_mask))) {
+                        h->topleft_samples_available &= 0xDF5F;
+                        h->left_samples_available    &= 0x5F5F;
                     }
                 }
-            }else{
-                if(!(left_type[LTOP] & type_mask)){
-                    h->topleft_samples_available&= 0xDF5F;
-                    h->left_samples_available&= 0x5F5F;
+            } else {
+                if (!(left_type[LTOP] & type_mask)) {
+                    h->topleft_samples_available &= 0xDF5F;
+                    h->left_samples_available    &= 0x5F5F;
                 }
             }
 
-            if(!(topleft_type & type_mask))
-                h->topleft_samples_available&= 0x7FFF;
+            if (!(topleft_type & type_mask))
+                h->topleft_samples_available &= 0x7FFF;
 
-            if(!(topright_type & type_mask))
-                h->topright_samples_available&= 0xFBFF;
+            if (!(topright_type & type_mask))
+                h->topright_samples_available &= 0xFBFF;
 
-            if(IS_INTRA4x4(mb_type)){
-                if(IS_INTRA4x4(top_type)){
-                    AV_COPY32(h->intra4x4_pred_mode_cache+4+8*0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]);
-                }else{
-                    h->intra4x4_pred_mode_cache[4+8*0]=
-                    h->intra4x4_pred_mode_cache[5+8*0]=
-                    h->intra4x4_pred_mode_cache[6+8*0]=
-                    h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask);
+            if (IS_INTRA4x4(mb_type)) {
+                if (IS_INTRA4x4(top_type)) {
+                    AV_COPY32(h->intra4x4_pred_mode_cache + 4 + 8 * 0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]);
+                } else {
+                    h->intra4x4_pred_mode_cache[4 + 8 * 0] =
+                    h->intra4x4_pred_mode_cache[5 + 8 * 0] =
+                    h->intra4x4_pred_mode_cache[6 + 8 * 0] =
+                    h->intra4x4_pred_mode_cache[7 + 8 * 0] = 2 - 3 * !(top_type & type_mask);
                 }
-                for(i=0; i<2; i++){
-                    if(IS_INTRA4x4(left_type[LEFT(i)])){
-                        int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]];
-                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]];
-                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]];
-                    }else{
-                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
-                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[LEFT(i)] & type_mask);
+                for (i = 0; i < 2; i++) {
+                    if (IS_INTRA4x4(left_type[LEFT(i)])) {
+                        int8_t *mode = h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]];
+                        h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = mode[6 - left_block[0 + 2 * i]];
+                        h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = mode[6 - left_block[1 + 2 * i]];
+                    } else {
+                        h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] =
+                        h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = 2 - 3 * !(left_type[LEFT(i)] & type_mask);
                     }
                 }
             }
         }
 
-
-/*
-0 . T T. T T T T
-1 L . .L . . . .
-2 L . .L . . . .
-3 . T TL . . . .
-4 L . .L . . . .
-5 L . .. . . . .
-*/
-//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
-    nnz_cache = h->non_zero_count_cache;
-    if(top_type){
-        nnz = h->non_zero_count[top_xy];
-        AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]);
-        if(!s->chroma_y_shift){
-            AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]);
-            AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]);
-        }else{
-            AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 5]);
-            AV_COPY32(&nnz_cache[4+8*10], &nnz[4* 9]);
-        }
-    }else{
-        uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
-        AV_WN32A(&nnz_cache[4+8* 0], top_empty);
-        AV_WN32A(&nnz_cache[4+8* 5], top_empty);
-        AV_WN32A(&nnz_cache[4+8*10], top_empty);
-    }
-
-    for (i=0; i<2; i++) {
-        if(left_type[LEFT(i)]){
-            nnz = h->non_zero_count[left_xy[LEFT(i)]];
-            nnz_cache[3+8* 1 + 2*8*i]= nnz[left_block[8+0+2*i]];
-            nnz_cache[3+8* 2 + 2*8*i]= nnz[left_block[8+1+2*i]];
-            if(CHROMA444){
-                nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]+4*4];
-                nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4];
-                nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4];
-                nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4];
-            }else if(CHROMA422) {
-                nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]-2+4*4];
-                nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]-2+4*4];
-                nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]-2+8*4];
-                nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]-2+8*4];
-            }else{
-                nnz_cache[3+8* 6 +   8*i]= nnz[left_block[8+4+2*i]];
-                nnz_cache[3+8*11 +   8*i]= nnz[left_block[8+5+2*i]];
+        /*
+         * 0 . T T. T T T T
+         * 1 L . .L . . . .
+         * 2 L . .L . . . .
+         * 3 . T TL . . . .
+         * 4 L . .L . . . .
+         * 5 L . .. . . . .
+         */
+        /* FIXME: constraint_intra_pred & partitioning & nnz
+         * (let us hope this is just a typo in the spec) */
+        nnz_cache = h->non_zero_count_cache;
+        if (top_type) {
+            nnz = h->non_zero_count[top_xy];
+            AV_COPY32(&nnz_cache[4 + 8 * 0], &nnz[4 * 3]);
+            if (!s->chroma_y_shift) {
+                AV_COPY32(&nnz_cache[4 + 8 *  5], &nnz[4 *  7]);
+                AV_COPY32(&nnz_cache[4 + 8 * 10], &nnz[4 * 11]);
+            } else {
+                AV_COPY32(&nnz_cache[4 + 8 *  5], &nnz[4 * 5]);
+                AV_COPY32(&nnz_cache[4 + 8 * 10], &nnz[4 * 9]);
+            }
+        } else {
+            uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
+            AV_WN32A(&nnz_cache[4 + 8 *  0], top_empty);
+            AV_WN32A(&nnz_cache[4 + 8 *  5], top_empty);
+            AV_WN32A(&nnz_cache[4 + 8 * 10], top_empty);
+        }
+
+        for (i = 0; i < 2; i++) {
+            if (left_type[LEFT(i)]) {
+                nnz = h->non_zero_count[left_xy[LEFT(i)]];
+                nnz_cache[3 + 8 * 1 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i]];
+                nnz_cache[3 + 8 * 2 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i]];
+                if (CHROMA444) {
+                    nnz_cache[3 + 8 *  6 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] + 4 * 4];
+                    nnz_cache[3 + 8 *  7 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] + 4 * 4];
+                    nnz_cache[3 + 8 * 11 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] + 8 * 4];
+                    nnz_cache[3 + 8 * 12 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] + 8 * 4];
+                } else if (CHROMA422) {
+                    nnz_cache[3 + 8 *  6 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] - 2 + 4 * 4];
+                    nnz_cache[3 + 8 *  7 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] - 2 + 4 * 4];
+                    nnz_cache[3 + 8 * 11 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] - 2 + 8 * 4];
+                    nnz_cache[3 + 8 * 12 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] - 2 + 8 * 4];
+                } else {
+                    nnz_cache[3 + 8 *  6 + 8 * i] = nnz[left_block[8 + 4 + 2 * i]];
+                    nnz_cache[3 + 8 * 11 + 8 * i] = nnz[left_block[8 + 5 + 2 * i]];
+                }
+            } else {
+                nnz_cache[3 + 8 *  1 + 2 * 8 * i] =
+                nnz_cache[3 + 8 *  2 + 2 * 8 * i] =
+                nnz_cache[3 + 8 *  6 + 2 * 8 * i] =
+                nnz_cache[3 + 8 *  7 + 2 * 8 * i] =
+                nnz_cache[3 + 8 * 11 + 2 * 8 * i] =
+                nnz_cache[3 + 8 * 12 + 2 * 8 * i] = CABAC && !IS_INTRA(mb_type) ? 0 : 64;
+            }
+        }
+
+        if (CABAC) {
+            // top_cbp
+            if (top_type)
+                h->top_cbp = h->cbp_table[top_xy];
+            else
+                h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
+            // left_cbp
+            if (left_type[LTOP]) {
+                h->left_cbp =   (h->cbp_table[left_xy[LTOP]] & 0x7F0) |
+                               ((h->cbp_table[left_xy[LTOP]] >> (left_block[0] & (~1))) & 2) |
+                              (((h->cbp_table[left_xy[LBOT]] >> (left_block[2] & (~1))) & 2) << 2);
+            } else {
+                h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
             }
-        }else{
-            nnz_cache[3+8* 1 + 2*8*i]=
-            nnz_cache[3+8* 2 + 2*8*i]=
-            nnz_cache[3+8* 6 + 2*8*i]=
-            nnz_cache[3+8* 7 + 2*8*i]=
-            nnz_cache[3+8*11 + 2*8*i]=
-            nnz_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
         }
     }
 
-    if( CABAC ) {
-        // top_cbp
-        if(top_type) {
-            h->top_cbp = h->cbp_table[top_xy];
-        } else {
-            h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
-        }
-        // left_cbp
-        if (left_type[LTOP]) {
-            h->left_cbp =   (h->cbp_table[left_xy[LTOP]] & 0x7F0)
-                        |  ((h->cbp_table[left_xy[LTOP]]>>(left_block[0]&(~1)))&2)
-                        | (((h->cbp_table[left_xy[LBOT]]>>(left_block[2]&(~1)))&2) << 2);
-        } else {
-            h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
-        }
-    }
-    }
-
-    if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){
+    if (IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)) {
         int list;
         int b_stride = h->b_stride;
-        for(list=0; list<h->list_count; list++){
+        for (list = 0; list < h->list_count; list++) {
             int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
             int8_t *ref       = s->current_picture.f.ref_index[list];
-            int16_t (*mv_cache)[2] = &h->mv_cache[list][scan8[0]];
-            int16_t (*mv)[2]       = s->current_picture.f.motion_val[list];
-            if(!USES_LIST(mb_type, list)){
+            int16_t(*mv_cache)[2] = &h->mv_cache[list][scan8[0]];
+            int16_t(*mv)[2]       = s->current_picture.f.motion_val[list];
+            if (!USES_LIST(mb_type, list))
                 continue;
-            }
             assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred));
 
-            if(USES_LIST(top_type, list)){
-                const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride;
-                AV_COPY128(mv_cache[0 - 1*8], mv[b_xy + 0]);
-                ref_cache[0 - 1*8]=
-                ref_cache[1 - 1*8]= ref[4*top_xy + 2];
-                ref_cache[2 - 1*8]=
-                ref_cache[3 - 1*8]= ref[4*top_xy + 3];
-            }else{
-                AV_ZERO128(mv_cache[0 - 1*8]);
-                AV_WN32A(&ref_cache[0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101u);
+            if (USES_LIST(top_type, list)) {
+                const int b_xy = h->mb2b_xy[top_xy] + 3 * b_stride;
+                AV_COPY128(mv_cache[0 - 1 * 8], mv[b_xy + 0]);
+                ref_cache[0 - 1 * 8] =
+                ref_cache[1 - 1 * 8] = ref[4 * top_xy + 2];
+                ref_cache[2 - 1 * 8] =
+                ref_cache[3 - 1 * 8] = ref[4 * top_xy + 3];
+            } else {
+                AV_ZERO128(mv_cache[0 - 1 * 8]);
+                AV_WN32A(&ref_cache[0 - 1 * 8],
+                         ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE) & 0xFF) * 0x01010101u);
             }
 
-            if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){
-            for(i=0; i<2; i++){
-                int cache_idx = -1 + i*2*8;
-                if(USES_LIST(left_type[LEFT(i)], list)){
-                    const int b_xy= h->mb2b_xy[left_xy[LEFT(i)]] + 3;
-                    const int b8_xy= 4*left_xy[LEFT(i)] + 1;
-                    AV_COPY32(mv_cache[cache_idx  ], mv[b_xy + b_stride*left_block[0+i*2]]);
-                    AV_COPY32(mv_cache[cache_idx+8], mv[b_xy + b_stride*left_block[1+i*2]]);
-                    ref_cache[cache_idx  ]= ref[b8_xy + (left_block[0+i*2]&~1)];
-                    ref_cache[cache_idx+8]= ref[b8_xy + (left_block[1+i*2]&~1)];
-                }else{
-                    AV_ZERO32(mv_cache[cache_idx  ]);
-                    AV_ZERO32(mv_cache[cache_idx+8]);
-                    ref_cache[cache_idx  ]=
-                    ref_cache[cache_idx+8]= (left_type[LEFT(i)]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+            if (mb_type & (MB_TYPE_16x8 | MB_TYPE_8x8)) {
+                for (i = 0; i < 2; i++) {
+                    int cache_idx = -1 + i * 2 * 8;
+                    if (USES_LIST(left_type[LEFT(i)], list)) {
+                        const int b_xy  = h->mb2b_xy[left_xy[LEFT(i)]] + 3;
+                        const int b8_xy = 4 * left_xy[LEFT(i)] + 1;
+                        AV_COPY32(mv_cache[cache_idx],
+                                  mv[b_xy + b_stride * left_block[0 + i * 2]]);
+                        AV_COPY32(mv_cache[cache_idx + 8],
+                                  mv[b_xy + b_stride * left_block[1 + i * 2]]);
+                        ref_cache[cache_idx]     = ref[b8_xy + (left_block[0 + i * 2] & ~1)];
+                        ref_cache[cache_idx + 8] = ref[b8_xy + (left_block[1 + i * 2] & ~1)];
+                    } else {
+                        AV_ZERO32(mv_cache[cache_idx]);
+                        AV_ZERO32(mv_cache[cache_idx + 8]);
+                        ref_cache[cache_idx]     =
+                        ref_cache[cache_idx + 8] = (left_type[LEFT(i)]) ? LIST_NOT_USED
+                                                                        : PART_NOT_AVAILABLE;
+                    }
                 }
-            }
-            }else{
-                if(USES_LIST(left_type[LTOP], list)){
-                    const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
-                    const int b8_xy= 4*left_xy[LTOP] + 1;
-                    AV_COPY32(mv_cache[-1], mv[b_xy + b_stride*left_block[0]]);
-                    ref_cache[-1]= ref[b8_xy + (left_block[0]&~1)];
-                }else{
+            } else {
+                if (USES_LIST(left_type[LTOP], list)) {
+                    const int b_xy  = h->mb2b_xy[left_xy[LTOP]] + 3;
+                    const int b8_xy = 4 * left_xy[LTOP] + 1;
+                    AV_COPY32(mv_cache[-1], mv[b_xy + b_stride * left_block[0]]);
+                    ref_cache[-1] = ref[b8_xy + (left_block[0] & ~1)];
+                } else {
                     AV_ZERO32(mv_cache[-1]);
-                    ref_cache[-1]= left_type[LTOP] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+                    ref_cache[-1] = left_type[LTOP] ? LIST_NOT_USED
+                                                    : PART_NOT_AVAILABLE;
                 }
             }
 
-            if(USES_LIST(topright_type, list)){
-                const int b_xy= h->mb2b_xy[topright_xy] + 3*b_stride;
-                AV_COPY32(mv_cache[4 - 1*8], mv[b_xy]);
-                ref_cache[4 - 1*8]= ref[4*topright_xy + 2];
-            }else{
-                AV_ZERO32(mv_cache[4 - 1*8]);
-                ref_cache[4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+            if (USES_LIST(topright_type, list)) {
+                const int b_xy = h->mb2b_xy[topright_xy] + 3 * b_stride;
+                AV_COPY32(mv_cache[4 - 1 * 8], mv[b_xy]);
+                ref_cache[4 - 1 * 8] = ref[4 * topright_xy + 2];
+            } else {
+                AV_ZERO32(mv_cache[4 - 1 * 8]);
+                ref_cache[4 - 1 * 8] = topright_type ? LIST_NOT_USED
+                                                     : PART_NOT_AVAILABLE;
             }
-            if(ref_cache[4 - 1*8] < 0){
-                if(USES_LIST(topleft_type, list)){
-                    const int b_xy = h->mb2b_xy[topleft_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride);
-                    const int b8_xy= 4*topleft_xy + 1 + (h->topleft_partition & 2);
-                    AV_COPY32(mv_cache[-1 - 1*8], mv[b_xy]);
-                    ref_cache[-1 - 1*8]= ref[b8_xy];
-                }else{
-                    AV_ZERO32(mv_cache[-1 - 1*8]);
-                    ref_cache[-1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+            if (ref_cache[4 - 1 * 8] < 0) {
+                if (USES_LIST(topleft_type, list)) {
+                    const int b_xy  = h->mb2b_xy[topleft_xy] + 3 + b_stride +
+                                      (h->topleft_partition & 2 * b_stride);
+                    const int b8_xy = 4 * topleft_xy + 1 + (h->topleft_partition & 2);
+                    AV_COPY32(mv_cache[-1 - 1 * 8], mv[b_xy]);
+                    ref_cache[-1 - 1 * 8] = ref[b8_xy];
+                } else {
+                    AV_ZERO32(mv_cache[-1 - 1 * 8]);
+                    ref_cache[-1 - 1 * 8] = topleft_type ? LIST_NOT_USED
+                                                         : PART_NOT_AVAILABLE;
                 }
             }
 
-            if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF)
+            if ((mb_type & (MB_TYPE_SKIP | MB_TYPE_DIRECT2)) && !FRAME_MBAFF)
                 continue;
 
-            if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))){
-                uint8_t (*mvd_cache)[2] = &h->mvd_cache[list][scan8[0]];
-                uint8_t (*mvd)[2] = h->mvd_table[list];
-                ref_cache[2+8*0] =
-                ref_cache[2+8*2] = PART_NOT_AVAILABLE;
-                AV_ZERO32(mv_cache[2+8*0]);
-                AV_ZERO32(mv_cache[2+8*2]);
+            if (!(mb_type & (MB_TYPE_SKIP | MB_TYPE_DIRECT2))) {
+                uint8_t(*mvd_cache)[2]   = &h->mvd_cache[list][scan8[0]];
+                uint8_t(*mvd)[2]         = h->mvd_table[list];
+                ref_cache[2 + 8 * 0] =
+                ref_cache[2 + 8 * 2] = PART_NOT_AVAILABLE;
+                AV_ZERO32(mv_cache[2 + 8 * 0]);
+                AV_ZERO32(mv_cache[2 + 8 * 2]);
 
-                if( CABAC ) {
-                    if(USES_LIST(top_type, list)){
-                        const int b_xy= h->mb2br_xy[top_xy];
-                        AV_COPY64(mvd_cache[0 - 1*8], mvd[b_xy + 0]);
-                    }else{
-                        AV_ZERO64(mvd_cache[0 - 1*8]);
+                if (CABAC) {
+                    if (USES_LIST(top_type, list)) {
+                        const int b_xy = h->mb2br_xy[top_xy];
+                        AV_COPY64(mvd_cache[0 - 1 * 8], mvd[b_xy + 0]);
+                    } else {
+                        AV_ZERO64(mvd_cache[0 - 1 * 8]);
                     }
-                    if(USES_LIST(left_type[LTOP], list)){
-                        const int b_xy= h->mb2br_xy[left_xy[LTOP]] + 6;
-                        AV_COPY16(mvd_cache[-1 + 0*8], mvd[b_xy - left_block[0]]);
-                        AV_COPY16(mvd_cache[-1 + 1*8], mvd[b_xy - left_block[1]]);
-                    }else{
-                        AV_ZERO16(mvd_cache[-1 + 0*8]);
-                        AV_ZERO16(mvd_cache[-1 + 1*8]);
+                    if (USES_LIST(left_type[LTOP], list)) {
+                        const int b_xy = h->mb2br_xy[left_xy[LTOP]] + 6;
+                        AV_COPY16(mvd_cache[-1 + 0 * 8], mvd[b_xy - left_block[0]]);
+                        AV_COPY16(mvd_cache[-1 + 1 * 8], mvd[b_xy - left_block[1]]);
+                    } else {
+                        AV_ZERO16(mvd_cache[-1 + 0 * 8]);
+                        AV_ZERO16(mvd_cache[-1 + 1 * 8]);
                     }
-                    if(USES_LIST(left_type[LBOT], list)){
-                        const int b_xy= h->mb2br_xy[left_xy[LBOT]] + 6;
-                        AV_COPY16(mvd_cache[-1 + 2*8], mvd[b_xy - left_block[2]]);
-                        AV_COPY16(mvd_cache[-1 + 3*8], mvd[b_xy - left_block[3]]);
-                    }else{
-                        AV_ZERO16(mvd_cache[-1 + 2*8]);
-                        AV_ZERO16(mvd_cache[-1 + 3*8]);
+                    if (USES_LIST(left_type[LBOT], list)) {
+                        const int b_xy = h->mb2br_xy[left_xy[LBOT]] + 6;
+                        AV_COPY16(mvd_cache[-1 + 2 * 8], mvd[b_xy - left_block[2]]);
+                        AV_COPY16(mvd_cache[-1 + 3 * 8], mvd[b_xy - left_block[3]]);
+                    } else {
+                        AV_ZERO16(mvd_cache[-1 + 2 * 8]);
+                        AV_ZERO16(mvd_cache[-1 + 3 * 8]);
                     }
-                    AV_ZERO16(mvd_cache[2+8*0]);
-                    AV_ZERO16(mvd_cache[2+8*2]);
-                    if(h->slice_type_nos == AV_PICTURE_TYPE_B){
+                    AV_ZERO16(mvd_cache[2 + 8 * 0]);
+                    AV_ZERO16(mvd_cache[2 + 8 * 2]);
+                    if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
                         uint8_t *direct_cache = &h->direct_cache[scan8[0]];
                         uint8_t *direct_table = h->direct_table;
-                        fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16>>1, 1);
+                        fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16 >> 1, 1);
 
-                        if(IS_DIRECT(top_type)){
-                            AV_WN32A(&direct_cache[-1*8], 0x01010101u*(MB_TYPE_DIRECT2>>1));
-                        }else if(IS_8X8(top_type)){
-                            int b8_xy = 4*top_xy;
-                            direct_cache[0 - 1*8]= direct_table[b8_xy + 2];
-                            direct_cache[2 - 1*8]= direct_table[b8_xy + 3];
-                        }else{
-                            AV_WN32A(&direct_cache[-1*8], 0x01010101*(MB_TYPE_16x16>>1));
+                        if (IS_DIRECT(top_type)) {
+                            AV_WN32A(&direct_cache[-1 * 8],
+                                     0x01010101u * (MB_TYPE_DIRECT2 >> 1));
+                        } else if (IS_8X8(top_type)) {
+                            int b8_xy = 4 * top_xy;
+                            direct_cache[0 - 1 * 8] = direct_table[b8_xy + 2];
+                            direct_cache[2 - 1 * 8] = direct_table[b8_xy + 3];
+                        } else {
+                            AV_WN32A(&direct_cache[-1 * 8],
+                                     0x01010101 * (MB_TYPE_16x16 >> 1));
                         }
 
-                        if(IS_DIRECT(left_type[LTOP]))
-                            direct_cache[-1 + 0*8]= MB_TYPE_DIRECT2>>1;
-                        else if(IS_8X8(left_type[LTOP]))
-                            direct_cache[-1 + 0*8]= direct_table[4*left_xy[LTOP] + 1 + (left_block[0]&~1)];
+                        if (IS_DIRECT(left_type[LTOP]))
+                            direct_cache[-1 + 0 * 8] = MB_TYPE_DIRECT2 >> 1;
+                        else if (IS_8X8(left_type[LTOP]))
+                            direct_cache[-1 + 0 * 8] = direct_table[4 * left_xy[LTOP] + 1 + (left_block[0] & ~1)];
                         else
-                            direct_cache[-1 + 0*8]= MB_TYPE_16x16>>1;
+                            direct_cache[-1 + 0 * 8] = MB_TYPE_16x16 >> 1;
 
-                        if(IS_DIRECT(left_type[LBOT]))
-                            direct_cache[-1 + 2*8]= MB_TYPE_DIRECT2>>1;
-                        else if(IS_8X8(left_type[LBOT]))
-                            direct_cache[-1 + 2*8]= direct_table[4*left_xy[LBOT] + 1 + (left_block[2]&~1)];
+                        if (IS_DIRECT(left_type[LBOT]))
+                            direct_cache[-1 + 2 * 8] = MB_TYPE_DIRECT2 >> 1;
+                        else if (IS_8X8(left_type[LBOT]))
+                            direct_cache[-1 + 2 * 8] = direct_table[4 * left_xy[LBOT] + 1 + (left_block[2] & ~1)];
                         else
-                            direct_cache[-1 + 2*8]= MB_TYPE_16x16>>1;
+                            direct_cache[-1 + 2 * 8] = MB_TYPE_16x16 >> 1;
                     }
                 }
             }
-            if(FRAME_MBAFF){
-#define MAP_MVS\
-                    MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
-                    MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
-                    MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
-                    MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
-                    MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
-                    MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
-                    MAP_F2F(scan8[0] - 1 + 0*8, left_type[LTOP])\
-                    MAP_F2F(scan8[0] - 1 + 1*8, left_type[LTOP])\
-                    MAP_F2F(scan8[0] - 1 + 2*8, left_type[LBOT])\
-                    MAP_F2F(scan8[0] - 1 + 3*8, left_type[LBOT])
-                if(MB_FIELD){
-#define MAP_F2F(idx, mb_type)\
-                    if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
-                        h->ref_cache[list][idx] <<= 1;\
-                        h->mv_cache[list][idx][1] /= 2;\
-                        h->mvd_cache[list][idx][1] >>=1;\
-                    }
+
+#define MAP_MVS                                                         \
+    MAP_F2F(scan8[0] - 1 - 1 * 8, topleft_type)                         \
+    MAP_F2F(scan8[0] + 0 - 1 * 8, top_type)                             \
+    MAP_F2F(scan8[0] + 1 - 1 * 8, top_type)                             \
+    MAP_F2F(scan8[0] + 2 - 1 * 8, top_type)                             \
+    MAP_F2F(scan8[0] + 3 - 1 * 8, top_type)                             \
+    MAP_F2F(scan8[0] + 4 - 1 * 8, topright_type)                        \
+    MAP_F2F(scan8[0] - 1 + 0 * 8, left_type[LTOP])                      \
+    MAP_F2F(scan8[0] - 1 + 1 * 8, left_type[LTOP])                      \
+    MAP_F2F(scan8[0] - 1 + 2 * 8, left_type[LBOT])                      \
+    MAP_F2F(scan8[0] - 1 + 3 * 8, left_type[LBOT])
+
+            if (FRAME_MBAFF) {
+                if (MB_FIELD) {
+
+#define MAP_F2F(idx, mb_type)                                           \
+    if (!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) {      \
+        h->ref_cache[list][idx]    <<= 1;                               \
+        h->mv_cache[list][idx][1]   /= 2;                               \
+        h->mvd_cache[list][idx][1] >>= 1;                               \
+    }
+
                     MAP_MVS
+                } else {
+
 #undef MAP_F2F
-                }else{
-#define MAP_F2F(idx, mb_type)\
-                    if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
-                        h->ref_cache[list][idx] >>= 1;\
-                        h->mv_cache[list][idx][1] <<= 1;\
-                        h->mvd_cache[list][idx][1] <<= 1;\
-                    }
+#define MAP_F2F(idx, mb_type)                                           \
+    if (IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) {       \
+        h->ref_cache[list][idx]    >>= 1;                               \
+        h->mv_cache[list][idx][1]  <<= 1;                               \
+        h->mvd_cache[list][idx][1] <<= 1;                               \
+    }
+
                     MAP_MVS
 #undef MAP_F2F
                 }
@@ -747,36 +792,34 @@ static void fill_decode_caches(H264Context *h, int mb_type){
         }
     }
 
-        h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]);
+    h->neighbor_transform_size = !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]);
 }
 
 /**
  * decodes a P_SKIP or B_SKIP macroblock
  */
-static void av_unused decode_mb_skip(H264Context *h){
-    MpegEncContext * const s = &h->s;
-    const int mb_xy= h->mb_xy;
-    int mb_type=0;
+static void av_unused decode_mb_skip(H264Context *h)
+{
+    MpegEncContext *const s = &h->s;
+    const int mb_xy = h->mb_xy;
+    int mb_type     = 0;
 
     memset(h->non_zero_count[mb_xy], 0, 48);
 
-    if(MB_FIELD)
-        mb_type|= MB_TYPE_INTERLACED;
+    if (MB_FIELD)
+        mb_type |= MB_TYPE_INTERLACED;
 
-    if( h->slice_type_nos == AV_PICTURE_TYPE_B )
-    {
+    if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
         // just for fill_caches. pred_direct_motion will set the real mb_type
-        mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
-        if(h->direct_spatial_mv_pred){
+        mb_type |= MB_TYPE_L0L1 | MB_TYPE_DIRECT2 | MB_TYPE_SKIP;
+        if (h->direct_spatial_mv_pred) {
             fill_decode_neighbors(h, mb_type);
-        fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ...
+            fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ...
         }
         ff_h264_pred_direct_motion(h, &mb_type);
-        mb_type|= MB_TYPE_SKIP;
-    }
-    else
-    {
-        mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
+        mb_type |= MB_TYPE_SKIP;
+    } else {
+        mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_SKIP;
 
         fill_decode_neighbors(h, mb_type);
         pred_pskip_motion(h);
@@ -785,8 +828,8 @@ static void av_unused decode_mb_skip(H264Context *h){
     write_back_motion(h, mb_type);
     s->current_picture.f.mb_type[mb_xy]      = mb_type;
     s->current_picture.f.qscale_table[mb_xy] = s->qscale;
-    h->slice_table[ mb_xy ]= h->slice_num;
-    h->prev_mb_skipped= 1;
+    h->slice_table[mb_xy]                    = h->slice_num;
+    h->prev_mb_skipped                       = 1;
 }
 
 #endif /* AVCODEC_H264_MVPRED_H */
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 08a25a54c9..248c7d0e08 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -28,56 +28,90 @@
 #define AVCODEC_H264DSP_H
 
 #include <stdint.h>
+
 #include "dsputil.h"
 
 typedef void (*h264_weight_func)(uint8_t *block, int stride, int height,
                                  int log2_denom, int weight, int offset);
-typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int height,
-                                   int log2_denom, int weightd, int weights, int offset);
+typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src,
+                                   int stride, int height, int log2_denom,
+                                   int weightd, int weights, int offset);
 
 /**
  * Context for storing H.264 DSP functions
  */
-typedef struct H264DSPContext{
+typedef struct H264DSPContext {
     /* weighted MC */
     h264_weight_func weight_h264_pixels_tab[4];
     h264_biweight_func biweight_h264_pixels_tab[4];
 
     /* loop filter */
-    void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
-    void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0);
-    void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
+    void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, int stride,
+                                    int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, int stride,
+                                    int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, int stride,
+                                          int alpha, int beta, int8_t *tc0);
     /* v/h_loop_filter_luma_intra: align 16 */
-    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
-    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
-    void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta);
-    void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
-    void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0);
-    void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
-    void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
-    void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
-    void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
+    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride,
+                                          int alpha, int beta);
+    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride,
+                                          int alpha, int beta);
+    void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix /*align 16*/,
+                                                int stride, int alpha, int beta);
+    void (*h264_v_loop_filter_chroma)(uint8_t *pix /*align 8*/, int stride,
+                                      int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_chroma)(uint8_t *pix /*align 4*/, int stride,
+                                      int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix /*align 8*/,
+                                            int stride, int alpha, int beta,
+                                            int8_t *tc0);
+    void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
+                                            int stride, int alpha, int beta);
+    void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
+                                            int stride, int alpha, int beta);
+    void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix /*align 8*/,
+                                                  int stride, int alpha, int beta);
     // h264_loop_filter_strength: simd only. the C version is inlined in h264.c
-    void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
-                                      int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field);
+    void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40],
+                                      int8_t ref[2][40], int16_t mv[2][40][2],
+                                      int bidir, int edges, int step,
+                                      int mask_mv0, int mask_mv1, int field);
 
     /* IDCT */
-    void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
-    void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
-    void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
-    void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
+    void (*h264_idct_add)(uint8_t *dst /*align 4*/,
+                          DCTELEM *block /*align 16*/, int stride);
+    void (*h264_idct8_add)(uint8_t *dst /*align 8*/,
+                           DCTELEM *block /*align 16*/, int stride);
+    void (*h264_idct_dc_add)(uint8_t *dst /*align 4*/,
+                             DCTELEM *block /*align 16*/, int stride);
+    void (*h264_idct8_dc_add)(uint8_t *dst /*align 8*/,
+                              DCTELEM *block /*align 16*/, int stride);
 
-    void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
-    void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
-    void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
-    void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
-    void (*h264_luma_dc_dequant_idct)(DCTELEM *output, DCTELEM *input/*align 16*/, int qmul);
+    void (*h264_idct_add16)(uint8_t *dst /*align 16*/, const int *blockoffset,
+                            DCTELEM *block /*align 16*/, int stride,
+                            const uint8_t nnzc[15 * 8]);
+    void (*h264_idct8_add4)(uint8_t *dst /*align 16*/, const int *blockoffset,
+                            DCTELEM *block /*align 16*/, int stride,
+                            const uint8_t nnzc[15 * 8]);
+    void (*h264_idct_add8)(uint8_t **dst /*align 16*/, const int *blockoffset,
+                           DCTELEM *block /*align 16*/, int stride,
+                           const uint8_t nnzc[15 * 8]);
+    void (*h264_idct_add16intra)(uint8_t *dst /*align 16*/, const int *blockoffset,
+                                 DCTELEM *block /*align 16*/,
+                                 int stride, const uint8_t nnzc[15 * 8]);
+    void (*h264_luma_dc_dequant_idct)(DCTELEM *output,
+                                      DCTELEM *input /*align 16*/, int qmul);
     void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
-}H264DSPContext;
+} H264DSPContext;
 
-void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
-void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
-void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
-void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
+                     const int chroma_format_idc);
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth,
+                         const int chroma_format_idc);
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth,
+                         const int chroma_format_idc);
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
+                         const int chroma_format_idc);
 
 #endif /* AVCODEC_H264DSP_H */

From cbc7d60afa0c56f8e50131830278fd32a89aed9d Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 10 May 2012 00:55:18 +0100
Subject: [PATCH 23/25] arm: dsputil: fix overreads in put/avg_pixels functions

The vertically interpolating variants of these functions read
ahead one line to optimise the loop.  On the last line processed,
this might be outside the buffer.  Fix these invalid reads by
processing the last line outside the loop.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/arm/dsputil_neon.S | 92 +++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S
index d49aedd6c4..4bdcd95061 100644
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -95,6 +95,7 @@ endfunc
 .endm
 
 .macro  pixels16_y2     rnd=1, avg=0
+        sub             r3,  r3,  #2
         vld1.64         {q0},     [r1], r2
         vld1.64         {q1},     [r1], r2
 1:      subs            r3,  r3,  #2
@@ -114,10 +115,25 @@ endfunc
         vst1.64         {q2},     [r0,:128], r2
         vst1.64         {q3},     [r0,:128], r2
         bne             1b
+
+        avg             q2,  q0,  q1
+        vld1.64         {q0},     [r1], r2
+        avg             q3,  q0,  q1
+  .if \avg
+        vld1.8          {q8},     [r0,:128], r2
+        vld1.8          {q9},     [r0,:128]
+        vrhadd.u8       q2,  q2,  q8
+        vrhadd.u8       q3,  q3,  q9
+        sub             r0,  r0,  r2
+  .endif
+        vst1.64         {q2},     [r0,:128], r2
+        vst1.64         {q3},     [r0,:128], r2
+
         bx              lr
 .endm
 
 .macro  pixels16_xy2    rnd=1, avg=0
+        sub             r3,  r3,  #2
         vld1.64         {d0-d2},  [r1], r2
         vld1.64         {d4-d6},  [r1], r2
   .ifeq \rnd
@@ -173,6 +189,42 @@ endfunc
         vaddl.u8        q11, d3,  d5
         vst1.64         {q15},    [r0,:128], r2
         bgt             1b
+
+        vld1.64         {d0-d2},  [r1], r2
+        vadd.u16        q12, q8,  q9
+  .ifeq \rnd
+        vadd.u16        q12, q12, q13
+  .endif
+        vext.8          q15, q0,  q1,  #1
+        vadd.u16        q1 , q10, q11
+        shrn            d28, q12, #2
+  .ifeq \rnd
+        vadd.u16        q1,  q1,  q13
+  .endif
+        shrn            d29, q1,  #2
+  .if \avg
+        vld1.8          {q8},     [r0,:128]
+        vrhadd.u8       q14, q14, q8
+  .endif
+        vaddl.u8        q8,  d0,  d30
+        vaddl.u8        q10, d1,  d31
+        vst1.64         {q14},    [r0,:128], r2
+        vadd.u16        q12, q8,  q9
+  .ifeq \rnd
+        vadd.u16        q12, q12, q13
+  .endif
+        vadd.u16        q0,  q10, q11
+        shrn            d30, q12, #2
+  .ifeq \rnd
+        vadd.u16        q0,  q0,  q13
+  .endif
+        shrn            d31, q0,  #2
+  .if \avg
+        vld1.8          {q9},     [r0,:128]
+        vrhadd.u8       q15, q15, q9
+  .endif
+        vst1.64         {q15},    [r0,:128], r2
+
         bx              lr
 .endm
 
@@ -228,6 +280,7 @@ endfunc
 .endm
 
 .macro  pixels8_y2      rnd=1, avg=0
+        sub             r3,  r3,  #2
         vld1.64         {d0},     [r1], r2
         vld1.64         {d1},     [r1], r2
 1:      subs            r3,  r3,  #2
@@ -246,10 +299,24 @@ endfunc
         vst1.64         {d4},     [r0,:64], r2
         vst1.64         {d5},     [r0,:64], r2
         bne             1b
+
+        avg             d4,  d0,  d1
+        vld1.64         {d0},     [r1], r2
+        avg             d5,  d0,  d1
+  .if \avg
+        vld1.8          {d2},     [r0,:64], r2
+        vld1.8          {d3},     [r0,:64]
+        vrhadd.u8       q2,  q2,  q1
+        sub             r0,  r0,  r2
+  .endif
+        vst1.64         {d4},     [r0,:64], r2
+        vst1.64         {d5},     [r0,:64], r2
+
         bx              lr
 .endm
 
 .macro  pixels8_xy2     rnd=1, avg=0
+        sub             r3,  r3,  #2
         vld1.64         {q0},     [r1], r2
         vld1.64         {q1},     [r1], r2
   .ifeq \rnd
@@ -291,6 +358,31 @@ endfunc
         vaddl.u8        q9,  d2,  d6
         vst1.64         {d7},     [r0,:64], r2
         bgt             1b
+
+        vld1.64         {q0},     [r1], r2
+        vadd.u16        q10, q8,  q9
+        vext.8          d4,  d0,  d1,  #1
+  .ifeq \rnd
+        vadd.u16        q10, q10, q11
+  .endif
+        vaddl.u8        q8,  d0,  d4
+        shrn            d5,  q10, #2
+        vadd.u16        q10, q8,  q9
+  .if \avg
+        vld1.8          {d7},     [r0,:64]
+        vrhadd.u8       d5,  d5,  d7
+  .endif
+  .ifeq \rnd
+        vadd.u16        q10, q10, q11
+  .endif
+        vst1.64         {d5},     [r0,:64], r2
+        shrn            d7,  q10, #2
+  .if \avg
+        vld1.8          {d5},     [r0,:64]
+        vrhadd.u8       d7,  d7,  d5
+  .endif
+        vst1.64         {d7},     [r0,:64], r2
+
         bx              lr
 .endm
 

From 706b998cdcea97c50fad2228f67488de0e06b2a2 Mon Sep 17 00:00:00 2001
From: Christophe Gisquet <christophe.gisquet@gmail.com>
Date: Tue, 28 Feb 2012 17:42:12 +0100
Subject: [PATCH 24/25] ape: Use unsigned integer maths

This involves a division that should be a shift.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/apedec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c
index e41f555e31..b07f3a090b 100644
--- a/libavcodec/apedec.c
+++ b/libavcodec/apedec.c
@@ -393,7 +393,7 @@ static inline int range_get_symbol(APEContext *ctx,
 }
 /** @} */ // group rangecoder
 
-static inline void update_rice(APERice *rice, int x)
+static inline void update_rice(APERice *rice, unsigned int x)
 {
     int lim = rice->k ? (1 << (rice->k + 4)) : 0;
     rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5);
@@ -406,7 +406,7 @@ static inline void update_rice(APERice *rice, int x)
 
 static inline int ape_decode_value(APEContext *ctx, APERice *rice)
 {
-    int x, overflow;
+    unsigned int x, overflow;
 
     if (ctx->fileversion < 3990) {
         int tmpk;

From 110d0cdc9d1ec414a658f841a3fbefbf6f796d61 Mon Sep 17 00:00:00 2001
From: Christophe Gisquet <christophe.gisquet@gmail.com>
Date: Thu, 19 Apr 2012 22:36:17 +0200
Subject: [PATCH 25/25] rv40dsp x86: MMX/MMX2/3DNow/SSE2/SSSE3 implementations
 of MC

Code mostly inspired by vp8's MC, however:
- its MMX2 horizontal filter is worse because it can't take advantage of
  the coefficient redundancy
- that same coefficient redundancy allows better code for non-SSSE3 versions

Benchmark (rounded to tens of unit):
        V8x8  H8x8  2D8x8  V16x16  H16x16  2D16x16
C       445    358   985    1785    1559    3280
MMX*    219    271   478     714     929    1443
SSE2    131    158   294     425     515     892
SSSE3   120    122   248     387     390     763

End result is overall around a 15% speedup for SSSE3 version (on 6 sequences);
all loop filter functions now take around 55% of decoding time, while luma MC
dsp functions are around 6%, chroma ones are 1.3% and biweight around 2.3%.

Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/x86/dsputil_mmx.c  |  16 ++
 libavcodec/x86/dsputil_mmx.h  |   5 +
 libavcodec/x86/rv40dsp.asm    | 316 +++++++++++++++++++++++++++++++++-
 libavcodec/x86/rv40dsp_init.c | 146 ++++++++++++++++
 4 files changed, 480 insertions(+), 3 deletions(-)

diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 3ef19c5d13..6377a73555 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -1791,6 +1791,22 @@ QPEL_2TAP(avg_, 16, 3dnow)
 QPEL_2TAP(put_,  8, 3dnow)
 QPEL_2TAP(avg_,  8, 3dnow)
 
+void ff_put_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride)
+{
+  put_pixels8_xy2_mmx(dst, src, stride, 8);
+}
+void ff_put_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride)
+{
+  put_pixels16_xy2_mmx(dst, src, stride, 16);
+}
+void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride)
+{
+  avg_pixels8_xy2_mmx(dst, src, stride, 8);
+}
+void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride)
+{
+  avg_pixels16_xy2_mmx(dst, src, stride, 16);
+}
 
 #if HAVE_YASM
 typedef void emu_edge_core_func(uint8_t *buf, const uint8_t *src,
diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h
index 097739cf98..37f4581b9c 100644
--- a/libavcodec/x86/dsputil_mmx.h
+++ b/libavcodec/x86/dsputil_mmx.h
@@ -199,6 +199,11 @@ void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd);
 
+void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
+void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
+void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
+void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
+
 void ff_mmx_idct(DCTELEM *block);
 void ff_mmxext_idct(DCTELEM *block);
 
diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm
index 721d3df094..e0213f40b9 100644
--- a/libavcodec/x86/rv40dsp.asm
+++ b/libavcodec/x86/rv40dsp.asm
@@ -1,5 +1,7 @@
 ;******************************************************************************
 ;* MMX/SSE2-optimized functions for the RV40 decoder
+;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
+;* Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com>
 ;* Copyright (C) 2012 Christophe Gisquet <christophe.gisquet@gmail.com>
 ;*
 ;* This file is part of Libav.
@@ -25,11 +27,319 @@
 SECTION_RODATA
 
 align 16
-shift_round:   times 8 dw 1 << (16 - 6)
-cextern pw_16
+pw_1024:   times 8 dw 1 << (16 - 6) ; pw_1024
+
+sixtap_filter_hb_m:  times 8 db   1, -5
+                     times 8 db  52, 20
+                     ; multiplied by 2 to have the same shift
+                     times 8 db   2, -10
+                     times 8 db  40,  40
+                     ; back to normal
+                     times 8 db   1, -5
+                     times 8 db  20, 52
+
+sixtap_filter_v_m:   times 8 dw   1
+                     times 8 dw  -5
+                     times 8 dw  52
+                     times 8 dw  20
+                     ; multiplied by 2 to have the same shift
+                     times 8 dw   2
+                     times 8 dw -10
+                     times 8 dw  40
+                     times 8 dw  40
+                     ; back to normal
+                     times 8 dw   1
+                     times 8 dw  -5
+                     times 8 dw  20
+                     times 8 dw  52
+
+%ifdef PIC
+%define sixtap_filter_hw   picregq
+%define sixtap_filter_hb   picregq
+%define sixtap_filter_v    picregq
+%define npicregs 1
+%else
+%define sixtap_filter_hw   sixtap_filter_hw_m
+%define sixtap_filter_hb   sixtap_filter_hb_m
+%define sixtap_filter_v    sixtap_filter_v_m
+%define npicregs 0
+%endif
+
+filter_h6_shuf1: db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5,  5, 6,  6,  7,  7,  8
+filter_h6_shuf2: db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7,  7, 8,  8,  9,  9, 10
+filter_h6_shuf3: db 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11, 10, 12, 11
+
+cextern  pw_32
+cextern  pw_16
+cextern  pw_512
 
 SECTION .text
 
+;-----------------------------------------------------------------------------
+; subpel MC functions:
+;
+; void [put|rv40]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, int deststride,
+;                                       uint8_t *src, int srcstride,
+;                                       int len, int m);
+;----------------------------------------------------------------------
+%macro LOAD  2
+%if WIN64
+   movsxd   %1q, %1d
+%endif
+%ifdef PIC
+   add      %1q, picregq
+%else
+   add      %1q, %2
+%endif
+%endmacro
+
+%macro STORE 3
+%ifidn %3, avg
+    movh      %2, [dstq]
+%endif
+    packuswb  %1, %1
+%ifidn %3, avg
+%if cpuflag(3dnow)
+    pavgusb   %1, %2
+%else
+    pavgb     %1, %2
+%endif
+%endif
+    movh  [dstq], %1
+%endmacro
+
+%macro FILTER_V 1
+cglobal %1_rv40_qpel_v, 6,6+npicregs,12, dst, dststride, src, srcstride, height, my, picreg
+%ifdef PIC
+    lea  picregq, [sixtap_filter_v_m]
+%endif
+    pxor      m7, m7
+    LOAD      my, sixtap_filter_v
+
+    ; read 5 lines
+    sub     srcq, srcstrideq
+    sub     srcq, srcstrideq
+    movh      m0, [srcq]
+    movh      m1, [srcq+srcstrideq]
+    movh      m2, [srcq+srcstrideq*2]
+    lea     srcq, [srcq+srcstrideq*2]
+    add     srcq, srcstrideq
+    movh      m3, [srcq]
+    movh      m4, [srcq+srcstrideq]
+    punpcklbw m0, m7
+    punpcklbw m1, m7
+    punpcklbw m2, m7
+    punpcklbw m3, m7
+    punpcklbw m4, m7
+
+%ifdef m8
+    mova      m8, [myq+ 0]
+    mova      m9, [myq+16]
+    mova     m10, [myq+32]
+    mova     m11, [myq+48]
+%define COEFF05  m8
+%define COEFF14  m9
+%define COEFF2   m10
+%define COEFF3   m11
+%else
+%define COEFF05  [myq+ 0]
+%define COEFF14  [myq+16]
+%define COEFF2   [myq+32]
+%define COEFF3   [myq+48]
+%endif
+.nextrow:
+    mova      m6, m1
+    movh      m5, [srcq+2*srcstrideq]      ; read new row
+    paddw     m6, m4
+    punpcklbw m5, m7
+    pmullw    m6, COEFF14
+    paddw     m0, m5
+    pmullw    m0, COEFF05
+    paddw     m6, m0
+    mova      m0, m1
+    paddw     m6, [pw_32]
+    mova      m1, m2
+    pmullw    m2, COEFF2
+    paddw     m6, m2
+    mova      m2, m3
+    pmullw    m3, COEFF3
+    paddw     m6, m3
+
+    ; round/clip/store
+    mova      m3, m4
+    psraw     m6, 6
+    mova      m4, m5
+    STORE     m6, m5, %1
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd                           ; next row
+    jg .nextrow
+    REP_RET
+%endmacro
+
+%macro FILTER_H  1
+cglobal %1_rv40_qpel_h, 6, 6+npicregs, 12, dst, dststride, src, srcstride, height, mx, picreg
+%ifdef PIC
+    lea  picregq, [sixtap_filter_v_m]
+%endif
+    pxor      m7, m7
+    LOAD      mx, sixtap_filter_v
+    mova      m6, [pw_32]
+%ifdef m8
+    mova      m8, [mxq+ 0]
+    mova      m9, [mxq+16]
+    mova     m10, [mxq+32]
+    mova     m11, [mxq+48]
+%define COEFF05  m8
+%define COEFF14  m9
+%define COEFF2   m10
+%define COEFF3   m11
+%else
+%define COEFF05  [mxq+ 0]
+%define COEFF14  [mxq+16]
+%define COEFF2   [mxq+32]
+%define COEFF3   [mxq+48]
+%endif
+.nextrow:
+    movq      m0, [srcq-2]
+    movq      m5, [srcq+3]
+    movq      m1, [srcq-1]
+    movq      m4, [srcq+2]
+    punpcklbw m0, m7
+    punpcklbw m5, m7
+    punpcklbw m1, m7
+    punpcklbw m4, m7
+    movq      m2, [srcq-0]
+    movq      m3, [srcq+1]
+    paddw     m0, m5
+    paddw     m1, m4
+    punpcklbw m2, m7
+    punpcklbw m3, m7
+    pmullw    m0, COEFF05
+    pmullw    m1, COEFF14
+    pmullw    m2, COEFF2
+    pmullw    m3, COEFF3
+    paddw     m0, m6
+    paddw     m1, m2
+    paddw     m0, m3
+    paddw     m0, m1
+    psraw     m0, 6
+    STORE     m0, m1, %1
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd            ; next row
+    jg .nextrow
+    REP_RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX  mmx
+FILTER_V  put
+FILTER_H  put
+
+INIT_MMX  mmx2
+FILTER_V  avg
+FILTER_H  avg
+
+INIT_MMX  3dnow
+FILTER_V  avg
+FILTER_H  avg
+%endif
+
+INIT_XMM  sse2
+FILTER_H  put
+FILTER_H  avg
+FILTER_V  put
+FILTER_V  avg
+
+%macro FILTER_SSSE3 1
+cglobal %1_rv40_qpel_v, 6,6+npicregs,8, dst, dststride, src, srcstride, height, my, picreg
+%ifdef PIC
+    lea  picregq, [sixtap_filter_hb_m]
+%endif
+
+    ; read 5 lines
+    sub     srcq, srcstrideq
+    LOAD      my, sixtap_filter_hb
+    sub     srcq, srcstrideq
+    movh      m0, [srcq]
+    movh      m1, [srcq+srcstrideq]
+    movh      m2, [srcq+srcstrideq*2]
+    lea     srcq, [srcq+srcstrideq*2]
+    add     srcq, srcstrideq
+    mova      m5, [myq]
+    movh      m3, [srcq]
+    movh      m4, [srcq+srcstrideq]
+    lea     srcq, [srcq+2*srcstrideq]
+
+.nextrow:
+    mova      m6, m2
+    punpcklbw m0, m1
+    punpcklbw m6, m3
+    pmaddubsw m0, m5
+    pmaddubsw m6, [myq+16]
+    movh      m7, [srcq]      ; read new row
+    paddw     m6, m0
+    mova      m0, m1
+    mova      m1, m2
+    mova      m2, m3
+    mova      m3, m4
+    mova      m4, m7
+    punpcklbw m7, m3
+    pmaddubsw m7, m5
+    paddw     m6, m7
+    pmulhrsw  m6, [pw_512]
+    STORE     m6, m7, %1
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec       heightd                          ; next row
+    jg       .nextrow
+    REP_RET
+
+cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height, mx, picreg
+%ifdef PIC
+    lea  picregq, [sixtap_filter_hb_m]
+%endif
+    mova      m3, [filter_h6_shuf2]
+    mova      m4, [filter_h6_shuf3]
+    LOAD      mx, sixtap_filter_hb
+    mova      m5, [mxq] ; set up 6tap filter in bytes
+    mova      m6, [mxq+16]
+    mova      m7, [filter_h6_shuf1]
+
+.nextrow:
+    movu      m0, [srcq-2]
+    mova      m1, m0
+    mova      m2, m0
+    pshufb    m0, m7
+    pshufb    m1, m3
+    pshufb    m2, m4
+    pmaddubsw m0, m5
+    pmaddubsw m1, m6
+    pmaddubsw m2, m5
+    paddw     m0, m1
+    paddw     m0, m2
+    pmulhrsw  m0, [pw_512]
+    STORE     m0, m1, %1
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd            ; next row
+    jg .nextrow
+    REP_RET
+%endmacro
+
+INIT_XMM ssse3
+FILTER_SSSE3  put
+FILTER_SSSE3  avg
+
 ; %1=5bits weights?, %2=dst %3=src1 %4=src3 %5=stride if sse2
 %macro RV40_WCORE  4-5
     movh       m4, [%3 + r6 + 0]
@@ -143,7 +453,7 @@ SECTION .text
 %macro RV40_WEIGHT  3
 cglobal rv40_weight_func_%1_%2, 6, 7, 8
 %if cpuflag(ssse3)
-    mova       m1, [shift_round]
+    mova       m1, [pw_1024]
 %else
     mova       m1, [pw_16]
 %endif
diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c
index df468aa9e5..3f42363e4e 100644
--- a/libavcodec/x86/rv40dsp_init.c
+++ b/libavcodec/x86/rv40dsp_init.c
@@ -22,8 +22,11 @@
 /**
  * @file
  * RV40 decoder motion compensation functions x86-optimised
+ * 2,0 and 0,2 have h264 equivalents.
+ * 3,3 is bugged in the rv40 format and maps to _xy2 version
  */
 
+#include "libavcodec/x86/dsputil_mmx.h"
 #include "libavcodec/rv34dsp.h"
 
 void ff_put_rv40_chroma_mc8_mmx  (uint8_t *dst, uint8_t *src,
@@ -53,6 +56,132 @@ DECLARE_WEIGHT(mmx)
 DECLARE_WEIGHT(sse2)
 DECLARE_WEIGHT(ssse3)
 
+/** @{ */
+/**
+ * Define one qpel function.
+ * LOOPSIZE must be already set to the number of pixels processed per
+ * iteration in the inner loop of the called functions.
+ * COFF(x) must be already defined so as to provide the offset into any
+ * array of coeffs used by the called function for the qpel position x.
+ */
+#define QPEL_FUNC_DECL(OP, SIZE, PH, PV, OPT)                           \
+static void OP ## rv40_qpel ##SIZE ##_mc ##PH ##PV ##OPT(uint8_t *dst,  \
+                                                         uint8_t *src,  \
+                                                         int stride)    \
+{                                                                       \
+    int i;                                                              \
+    if (PH && PV) {                                                     \
+        DECLARE_ALIGNED(16, uint8_t, tmp)[SIZE * (SIZE + 5)];           \
+        uint8_t *tmpptr = tmp + SIZE * 2;                               \
+        src -= stride * 2;                                              \
+                                                                        \
+        for (i = 0; i < SIZE; i += LOOPSIZE)                            \
+            ff_put_rv40_qpel_h ##OPT(tmp + i, SIZE, src + i, stride,    \
+                                     SIZE + 5, HCOFF(PH));              \
+        for (i = 0; i < SIZE; i += LOOPSIZE)                            \
+            ff_ ##OP ##rv40_qpel_v ##OPT(dst + i, stride, tmpptr + i,   \
+                                         SIZE, SIZE, VCOFF(PV));        \
+    } else if (PV) {                                                    \
+        for (i = 0; i < SIZE; i += LOOPSIZE)                            \
+            ff_ ##OP ##rv40_qpel_v ## OPT(dst + i, stride, src + i,     \
+                                          stride, SIZE, VCOFF(PV));     \
+    } else {                                                            \
+        for (i = 0; i < SIZE; i += LOOPSIZE)                            \
+            ff_ ##OP ##rv40_qpel_h ## OPT(dst + i, stride, src + i,     \
+                                          stride, SIZE, HCOFF(PH));     \
+    }                                                                   \
+};
+
+/** Declare functions for sizes 8 and 16 and given operations
+ *  and qpel position. */
+#define QPEL_FUNCS_DECL(OP, PH, PV, OPT) \
+    QPEL_FUNC_DECL(OP,  8, PH, PV, OPT)  \
+    QPEL_FUNC_DECL(OP, 16, PH, PV, OPT)
+
+/** Declare all functions for all sizes and qpel positions */
+#define QPEL_MC_DECL(OP, OPT)                                           \
+void ff_ ##OP ##rv40_qpel_h ##OPT(uint8_t *dst, ptrdiff_t dstStride,    \
+                                  const uint8_t *src,                   \
+                                  ptrdiff_t srcStride,                  \
+                                  int len, int m);                      \
+void ff_ ##OP ##rv40_qpel_v ##OPT(uint8_t *dst, ptrdiff_t dstStride,    \
+                                  const uint8_t *src,                   \
+                                  ptrdiff_t srcStride,                  \
+                                  int len, int m);                      \
+QPEL_FUNCS_DECL(OP, 0, 1, OPT)                                          \
+QPEL_FUNCS_DECL(OP, 0, 3, OPT)                                          \
+QPEL_FUNCS_DECL(OP, 1, 0, OPT)                                          \
+QPEL_FUNCS_DECL(OP, 1, 1, OPT)                                          \
+QPEL_FUNCS_DECL(OP, 1, 2, OPT)                                          \
+QPEL_FUNCS_DECL(OP, 1, 3, OPT)                                          \
+QPEL_FUNCS_DECL(OP, 2, 1, OPT)                                          \
+QPEL_FUNCS_DECL(OP, 2, 2, OPT)                                          \
+QPEL_FUNCS_DECL(OP, 2, 3, OPT)                                          \
+QPEL_FUNCS_DECL(OP, 3, 0, OPT)                                          \
+QPEL_FUNCS_DECL(OP, 3, 1, OPT)                                          \
+QPEL_FUNCS_DECL(OP, 3, 2, OPT)
+/** @} */
+
+#define LOOPSIZE  8
+#define HCOFF(x)  (32 * (x - 1))
+#define VCOFF(x)  (32 * (x - 1))
+QPEL_MC_DECL(put_, _ssse3)
+QPEL_MC_DECL(avg_, _ssse3)
+
+#undef LOOPSIZE
+#undef HCOFF
+#undef VCOFF
+#define LOOPSIZE  8
+#define HCOFF(x)  (64 * (x - 1))
+#define VCOFF(x)  (64 * (x - 1))
+QPEL_MC_DECL(put_, _sse2)
+QPEL_MC_DECL(avg_, _sse2)
+
+#if ARCH_X86_32
+#undef LOOPSIZE
+#undef HCOFF
+#undef VCOFF
+#define LOOPSIZE  4
+#define HCOFF(x)  (64 * (x - 1))
+#define VCOFF(x)  (64 * (x - 1))
+
+QPEL_MC_DECL(put_, _mmx)
+
+#define ff_put_rv40_qpel_h_mmx2  ff_put_rv40_qpel_h_mmx
+#define ff_put_rv40_qpel_v_mmx2  ff_put_rv40_qpel_v_mmx
+QPEL_MC_DECL(avg_, _mmx2)
+
+#define ff_put_rv40_qpel_h_3dnow  ff_put_rv40_qpel_h_mmx
+#define ff_put_rv40_qpel_v_3dnow  ff_put_rv40_qpel_v_mmx
+QPEL_MC_DECL(avg_, _3dnow)
+#endif
+
+/** @{ */
+/** Set one function */
+#define QPEL_FUNC_SET(OP, SIZE, PH, PV, OPT)                            \
+    c-> OP ## pixels_tab[2 - SIZE / 8][4 * PV + PH] = OP ## rv40_qpel ##SIZE ## _mc ##PH ##PV ##OPT;
+
+/** Set functions put and avg for sizes 8 and 16 and a given qpel position */
+#define QPEL_FUNCS_SET(OP, PH, PV, OPT)         \
+    QPEL_FUNC_SET(OP,  8, PH, PV, OPT)          \
+    QPEL_FUNC_SET(OP, 16, PH, PV, OPT)
+
+/** Set all functions for all sizes and qpel positions */
+#define QPEL_MC_SET(OP, OPT)   \
+QPEL_FUNCS_SET (OP, 0, 1, OPT) \
+QPEL_FUNCS_SET (OP, 0, 3, OPT) \
+QPEL_FUNCS_SET (OP, 1, 0, OPT) \
+QPEL_FUNCS_SET (OP, 1, 1, OPT) \
+QPEL_FUNCS_SET (OP, 1, 2, OPT) \
+QPEL_FUNCS_SET (OP, 1, 3, OPT) \
+QPEL_FUNCS_SET (OP, 2, 1, OPT) \
+QPEL_FUNCS_SET (OP, 2, 2, OPT) \
+QPEL_FUNCS_SET (OP, 2, 3, OPT) \
+QPEL_FUNCS_SET (OP, 3, 0, OPT) \
+QPEL_FUNCS_SET (OP, 3, 1, OPT) \
+QPEL_FUNCS_SET (OP, 3, 2, OPT)
+/** @} */
+
 void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
 {
 #if HAVE_YASM
@@ -65,25 +194,42 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
         c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_mmx;
         c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_mmx;
         c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_mmx;
+        c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_mmx;
+        c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_mmx;
+        c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_mmx;
+        c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_mmx;
+#if ARCH_X86_32
+        QPEL_MC_SET(put_, _mmx)
+#endif
     }
     if (mm_flags & AV_CPU_FLAG_MMX2) {
         c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2;
         c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2;
+#if ARCH_X86_32
+        QPEL_MC_SET(avg_, _mmx2)
+#endif
     } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
         c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow;
         c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow;
+#if ARCH_X86_32
+        QPEL_MC_SET(avg_, _3dnow)
+#endif
     }
     if (mm_flags & AV_CPU_FLAG_SSE2) {
         c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2;
         c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2;
         c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2;
         c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_sse2;
+        QPEL_MC_SET(put_, _sse2)
+        QPEL_MC_SET(avg_, _sse2)
     }
     if (mm_flags & AV_CPU_FLAG_SSSE3) {
         c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3;
         c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3;
         c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3;
         c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_ssse3;
+        QPEL_MC_SET(put_, _ssse3)
+        QPEL_MC_SET(avg_, _ssse3)
     }
 #endif
 }