diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index efca5ab3a1..3f2244b558 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -375,6 +375,7 @@ typedef struct H264SliceContext {
     int mb_skip_run;
     int is_complex;
 
+    int picture_structure;
     int mb_field_decoding_flag;
     int mb_mbaff;               ///< mb_aff_frame && mb_field_decoding_flag
 
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 10baf10d10..359c6a9d4d 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -1004,6 +1004,203 @@ static enum AVPixelFormat non_j_pixfmt(enum AVPixelFormat a)
     }
 }
 
+/* This function is called right after decoding the slice header for a first
+ * slice in a field (or a frame). It decides whether we are decoding a new frame
+ * or a second field in a pair and does the necessary setup.
+ */
+static int h264_field_start(H264Context *h, const H264SliceContext *sl)
+{
+    int i;
+    const SPS *sps = h->ps.sps;
+
+    int last_pic_structure, last_pic_droppable, ret;
+
+    last_pic_droppable   = h->droppable;
+    last_pic_structure   = h->picture_structure;
+    h->droppable         = (h->nal_ref_idc == 0);
+    h->picture_structure = sl->picture_structure;
+
+    /* Shorten frame num gaps so we don't have to allocate reference
+     * frames just to throw them away */
+    if (h->poc.frame_num != h->poc.prev_frame_num) {
+        int unwrap_prev_frame_num = h->poc.prev_frame_num;
+        int max_frame_num         = 1 << sps->log2_max_frame_num;
+
+        if (unwrap_prev_frame_num > h->poc.frame_num)
+            unwrap_prev_frame_num -= max_frame_num;
+
+        if ((h->poc.frame_num - unwrap_prev_frame_num) > sps->ref_frame_count) {
+            unwrap_prev_frame_num = (h->poc.frame_num - sps->ref_frame_count) - 1;
+            if (unwrap_prev_frame_num < 0)
+                unwrap_prev_frame_num += max_frame_num;
+
+            h->poc.prev_frame_num = unwrap_prev_frame_num;
+        }
+    }
+
+    /* See if we have a decoded first field looking for a pair...
+     * Here, we're using that to see if we should mark previously
+     * decode frames as "finished".
+     * We have to do that before the "dummy" in-between frame allocation,
+     * since that can modify h->cur_pic_ptr. */
+    if (h->first_field) {
+        av_assert0(h->cur_pic_ptr);
+        av_assert0(h->cur_pic_ptr->f->buf[0]);
+        assert(h->cur_pic_ptr->reference != DELAYED_PIC_REF);
+
+        /* Mark old field/frame as completed */
+        if (h->cur_pic_ptr->tf.owner == h->avctx) {
+            ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
+                                      last_pic_structure == PICT_BOTTOM_FIELD);
+        }
+
+        /* figure out if we have a complementary field pair */
+        if (!FIELD_PICTURE(h) || h->picture_structure == last_pic_structure) {
+            /* Previous field is unmatched. Don't display it, but let it
+             * remain for reference if marked as such. */
+            if (last_pic_structure != PICT_FRAME) {
+                ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
+                                          last_pic_structure == PICT_TOP_FIELD);
+            }
+        } else {
+            if (h->cur_pic_ptr->frame_num != h->poc.frame_num) {
+                /* This and previous field were reference, but had
+                 * different frame_nums. Consider this field first in
+                 * pair. Throw away previous field except for reference
+                 * purposes. */
+                if (last_pic_structure != PICT_FRAME) {
+                    ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
+                                              last_pic_structure == PICT_TOP_FIELD);
+                }
+            } else {
+                /* Second field in complementary pair */
+                if (!((last_pic_structure   == PICT_TOP_FIELD &&
+                       h->picture_structure == PICT_BOTTOM_FIELD) ||
+                      (last_pic_structure   == PICT_BOTTOM_FIELD &&
+                       h->picture_structure == PICT_TOP_FIELD))) {
+                    av_log(h->avctx, AV_LOG_ERROR,
+                           "Invalid field mode combination %d/%d\n",
+                           last_pic_structure, h->picture_structure);
+                    h->picture_structure = last_pic_structure;
+                    h->droppable         = last_pic_droppable;
+                    return AVERROR_INVALIDDATA;
+                } else if (last_pic_droppable != h->droppable) {
+                    avpriv_request_sample(h->avctx,
+                                          "Found reference and non-reference fields in the same frame, which");
+                    h->picture_structure = last_pic_structure;
+                    h->droppable         = last_pic_droppable;
+                    return AVERROR_PATCHWELCOME;
+                }
+            }
+        }
+    }
+
+    while (h->poc.frame_num != h->poc.prev_frame_num && !h->first_field &&
+           h->poc.frame_num != (h->poc.prev_frame_num + 1) % (1 << sps->log2_max_frame_num)) {
+        H264Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
+        av_log(h->avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n",
+               h->poc.frame_num, h->poc.prev_frame_num);
+        if (!sps->gaps_in_frame_num_allowed_flag)
+            for(i=0; i<FF_ARRAY_ELEMS(h->last_pocs); i++)
+                h->last_pocs[i] = INT_MIN;
+        ret = h264_frame_start(h);
+        if (ret < 0) {
+            h->first_field = 0;
+            return ret;
+        }
+
+        h->poc.prev_frame_num++;
+        h->poc.prev_frame_num        %= 1 << sps->log2_max_frame_num;
+        h->cur_pic_ptr->frame_num = h->poc.prev_frame_num;
+        h->cur_pic_ptr->invalid_gap = !sps->gaps_in_frame_num_allowed_flag;
+        ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 0);
+        ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 1);
+
+        h->explicit_ref_marking = 0;
+        ret = ff_h264_execute_ref_pic_marking(h);
+        if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE))
+            return ret;
+        /* Error concealment: If a ref is missing, copy the previous ref
+         * in its place.
+         * FIXME: Avoiding a memcpy would be nice, but ref handling makes
+         * many assumptions about there being no actual duplicates.
+         * FIXME: This does not copy padding for out-of-frame motion
+         * vectors.  Given we are concealing a lost frame, this probably
+         * is not noticeable by comparison, but it should be fixed. */
+        if (h->short_ref_count) {
+            if (prev &&
+                h->short_ref[0]->f->width == prev->f->width &&
+                h->short_ref[0]->f->height == prev->f->height &&
+                h->short_ref[0]->f->format == prev->f->format) {
+                av_image_copy(h->short_ref[0]->f->data,
+                              h->short_ref[0]->f->linesize,
+                              (const uint8_t **)prev->f->data,
+                              prev->f->linesize,
+                              prev->f->format,
+                              prev->f->width,
+                              prev->f->height);
+                h->short_ref[0]->poc = prev->poc + 2;
+            }
+            h->short_ref[0]->frame_num = h->poc.prev_frame_num;
+        }
+    }
+
+    /* See if we have a decoded first field looking for a pair...
+     * We're using that to see whether to continue decoding in that
+     * frame, or to allocate a new one. */
+    if (h->first_field) {
+        av_assert0(h->cur_pic_ptr);
+        av_assert0(h->cur_pic_ptr->f->buf[0]);
+        assert(h->cur_pic_ptr->reference != DELAYED_PIC_REF);
+
+        /* figure out if we have a complementary field pair */
+        if (!FIELD_PICTURE(h) || h->picture_structure == last_pic_structure) {
+            /* Previous field is unmatched. Don't display it, but let it
+             * remain for reference if marked as such. */
+            h->missing_fields ++;
+            h->cur_pic_ptr = NULL;
+            h->first_field = FIELD_PICTURE(h);
+        } else {
+            h->missing_fields = 0;
+            if (h->cur_pic_ptr->frame_num != h->poc.frame_num) {
+                ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
+                                          h->picture_structure==PICT_BOTTOM_FIELD);
+                /* This and the previous field had different frame_nums.
+                 * Consider this field first in pair. Throw away previous
+                 * one except for reference purposes. */
+                h->first_field = 1;
+                h->cur_pic_ptr = NULL;
+            } else {
+                /* Second field in complementary pair */
+                h->first_field = 0;
+            }
+        }
+    } else {
+        /* Frame or first field in a potentially complementary pair */
+        h->first_field = FIELD_PICTURE(h);
+    }
+
+    if (!FIELD_PICTURE(h) || h->first_field) {
+        if (h264_frame_start(h) < 0) {
+            h->first_field = 0;
+            return AVERROR_INVALIDDATA;
+        }
+    } else {
+        release_unused_pictures(h, 0);
+    }
+    /* Some macroblocks can be accessed before they're available in case
+    * of lost slices, MBAFF or threading. */
+    if (FIELD_PICTURE(h)) {
+        for(i = (h->picture_structure == PICT_BOTTOM_FIELD); i<h->mb_height; i++)
+            memset(h->slice_table + i*h->mb_stride, -1, (h->mb_stride - (i+1==h->mb_height)) * sizeof(*h->slice_table));
+    } else {
+        memset(h->slice_table, -1,
+            (h->mb_height * h->mb_stride - 1) * sizeof(*h->slice_table));
+    }
+
+    return 0;
+}
+
 static int h264_slice_header_parse(H264Context *h, H264SliceContext *sl)
 {
     const SPS *sps;
@@ -1011,7 +1208,6 @@ static int h264_slice_header_parse(H264Context *h, H264SliceContext *sl)
     unsigned int pps_id;
     int ret;
     unsigned int slice_type, tmp, i;
-    int last_pic_structure, last_pic_droppable;
     int must_reinit;
     int needs_reinit = 0;
     int field_pic_flag, bottom_field_flag;
@@ -1281,8 +1477,6 @@ static int h264_slice_header_parse(H264Context *h, H264SliceContext *sl)
     sl->mb_mbaff       = 0;
     mb_aff_frame       = 0;
     last_mb_aff_frame  = h->mb_aff_frame;
-    last_pic_structure = h->picture_structure;
-    last_pic_droppable = h->droppable;
 
     droppable = h->nal_ref_idc == 0;
     if (sps->frame_mbs_only_flag) {
@@ -1304,12 +1498,12 @@ static int h264_slice_header_parse(H264Context *h, H264SliceContext *sl)
     }
 
     if (h->current_slice) {
-        if (last_pic_structure != picture_structure ||
-            last_pic_droppable != droppable ||
+        if (h->picture_structure != picture_structure ||
+            h->droppable         != droppable ||
             last_mb_aff_frame  != mb_aff_frame) {
             av_log(h->avctx, AV_LOG_ERROR,
                    "Changing field mode (%d -> %d) between slices is not allowed\n",
-                   last_pic_structure, h->picture_structure);
+                   h->picture_structure, picture_structure);
             return AVERROR_INVALIDDATA;
         } else if (!h->cur_pic_ptr) {
             av_log(h->avctx, AV_LOG_ERROR,
@@ -1320,193 +1514,12 @@ static int h264_slice_header_parse(H264Context *h, H264SliceContext *sl)
     }
 
     if (!h->setup_finished) {
-        h->droppable         = droppable;
-        h->picture_structure = picture_structure;
         h->mb_aff_frame      = mb_aff_frame;
     }
+    sl->picture_structure      = picture_structure;
     sl->mb_field_decoding_flag = picture_structure != PICT_FRAME;
 
-    if (h->current_slice == 0) {
-        /* Shorten frame num gaps so we don't have to allocate reference
-         * frames just to throw them away */
-        if (h->poc.frame_num != h->poc.prev_frame_num) {
-            int unwrap_prev_frame_num = h->poc.prev_frame_num;
-            int max_frame_num         = 1 << sps->log2_max_frame_num;
-
-            if (unwrap_prev_frame_num > h->poc.frame_num)
-                unwrap_prev_frame_num -= max_frame_num;
-
-            if ((h->poc.frame_num - unwrap_prev_frame_num) > sps->ref_frame_count) {
-                unwrap_prev_frame_num = (h->poc.frame_num - sps->ref_frame_count) - 1;
-                if (unwrap_prev_frame_num < 0)
-                    unwrap_prev_frame_num += max_frame_num;
-
-                h->poc.prev_frame_num = unwrap_prev_frame_num;
-            }
-        }
-
-        /* See if we have a decoded first field looking for a pair...
-         * Here, we're using that to see if we should mark previously
-         * decode frames as "finished".
-         * We have to do that before the "dummy" in-between frame allocation,
-         * since that can modify h->cur_pic_ptr. */
-        if (h->first_field) {
-            av_assert0(h->cur_pic_ptr);
-            av_assert0(h->cur_pic_ptr->f->buf[0]);
-            assert(h->cur_pic_ptr->reference != DELAYED_PIC_REF);
-
-            /* Mark old field/frame as completed */
-            if (h->cur_pic_ptr->tf.owner == h->avctx) {
-                ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
-                                          last_pic_structure == PICT_BOTTOM_FIELD);
-            }
-
-            /* figure out if we have a complementary field pair */
-            if (!FIELD_PICTURE(h) || h->picture_structure == last_pic_structure) {
-                /* Previous field is unmatched. Don't display it, but let it
-                 * remain for reference if marked as such. */
-                if (last_pic_structure != PICT_FRAME) {
-                    ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
-                                              last_pic_structure == PICT_TOP_FIELD);
-                }
-            } else {
-                if (h->cur_pic_ptr->frame_num != h->poc.frame_num) {
-                    /* This and previous field were reference, but had
-                     * different frame_nums. Consider this field first in
-                     * pair. Throw away previous field except for reference
-                     * purposes. */
-                    if (last_pic_structure != PICT_FRAME) {
-                        ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
-                                                  last_pic_structure == PICT_TOP_FIELD);
-                    }
-                } else {
-                    /* Second field in complementary pair */
-                    if (!((last_pic_structure   == PICT_TOP_FIELD &&
-                           h->picture_structure == PICT_BOTTOM_FIELD) ||
-                          (last_pic_structure   == PICT_BOTTOM_FIELD &&
-                           h->picture_structure == PICT_TOP_FIELD))) {
-                        av_log(h->avctx, AV_LOG_ERROR,
-                               "Invalid field mode combination %d/%d\n",
-                               last_pic_structure, h->picture_structure);
-                        h->picture_structure = last_pic_structure;
-                        h->droppable         = last_pic_droppable;
-                        return AVERROR_INVALIDDATA;
-                    } else if (last_pic_droppable != h->droppable) {
-                        avpriv_request_sample(h->avctx,
-                                              "Found reference and non-reference fields in the same frame, which");
-                        h->picture_structure = last_pic_structure;
-                        h->droppable         = last_pic_droppable;
-                        return AVERROR_PATCHWELCOME;
-                    }
-                }
-            }
-        }
-
-        while (h->poc.frame_num != h->poc.prev_frame_num && !h->first_field &&
-               h->poc.frame_num != (h->poc.prev_frame_num + 1) % (1 << sps->log2_max_frame_num)) {
-            H264Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
-            av_log(h->avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n",
-                   h->poc.frame_num, h->poc.prev_frame_num);
-            if (!sps->gaps_in_frame_num_allowed_flag)
-                for(i=0; i<FF_ARRAY_ELEMS(h->last_pocs); i++)
-                    h->last_pocs[i] = INT_MIN;
-            ret = h264_frame_start(h);
-            if (ret < 0) {
-                h->first_field = 0;
-                return ret;
-            }
-
-            h->poc.prev_frame_num++;
-            h->poc.prev_frame_num        %= 1 << sps->log2_max_frame_num;
-            h->cur_pic_ptr->frame_num = h->poc.prev_frame_num;
-            h->cur_pic_ptr->invalid_gap = !sps->gaps_in_frame_num_allowed_flag;
-            ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 0);
-            ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 1);
-
-            h->explicit_ref_marking = 0;
-            ret = ff_h264_execute_ref_pic_marking(h);
-            if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE))
-                return ret;
-            /* Error concealment: If a ref is missing, copy the previous ref
-             * in its place.
-             * FIXME: Avoiding a memcpy would be nice, but ref handling makes
-             * many assumptions about there being no actual duplicates.
-             * FIXME: This does not copy padding for out-of-frame motion
-             * vectors.  Given we are concealing a lost frame, this probably
-             * is not noticeable by comparison, but it should be fixed. */
-            if (h->short_ref_count) {
-                if (prev &&
-                    h->short_ref[0]->f->width == prev->f->width &&
-                    h->short_ref[0]->f->height == prev->f->height &&
-                    h->short_ref[0]->f->format == prev->f->format) {
-                    av_image_copy(h->short_ref[0]->f->data,
-                                  h->short_ref[0]->f->linesize,
-                                  (const uint8_t **)prev->f->data,
-                                  prev->f->linesize,
-                                  prev->f->format,
-                                  prev->f->width,
-                                  prev->f->height);
-                    h->short_ref[0]->poc = prev->poc + 2;
-                }
-                h->short_ref[0]->frame_num = h->poc.prev_frame_num;
-            }
-        }
-
-        /* See if we have a decoded first field looking for a pair...
-         * We're using that to see whether to continue decoding in that
-         * frame, or to allocate a new one. */
-        if (h->first_field) {
-            av_assert0(h->cur_pic_ptr);
-            av_assert0(h->cur_pic_ptr->f->buf[0]);
-            assert(h->cur_pic_ptr->reference != DELAYED_PIC_REF);
-
-            /* figure out if we have a complementary field pair */
-            if (!FIELD_PICTURE(h) || h->picture_structure == last_pic_structure) {
-                /* Previous field is unmatched. Don't display it, but let it
-                 * remain for reference if marked as such. */
-                h->missing_fields ++;
-                h->cur_pic_ptr = NULL;
-                h->first_field = FIELD_PICTURE(h);
-            } else {
-                h->missing_fields = 0;
-                if (h->cur_pic_ptr->frame_num != h->poc.frame_num) {
-                    ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
-                                              h->picture_structure==PICT_BOTTOM_FIELD);
-                    /* This and the previous field had different frame_nums.
-                     * Consider this field first in pair. Throw away previous
-                     * one except for reference purposes. */
-                    h->first_field = 1;
-                    h->cur_pic_ptr = NULL;
-                } else {
-                    /* Second field in complementary pair */
-                    h->first_field = 0;
-                }
-            }
-        } else {
-            /* Frame or first field in a potentially complementary pair */
-            h->first_field = FIELD_PICTURE(h);
-        }
-
-        if (!FIELD_PICTURE(h) || h->first_field) {
-            if (h264_frame_start(h) < 0) {
-                h->first_field = 0;
-                return AVERROR_INVALIDDATA;
-            }
-        } else {
-            release_unused_pictures(h, 0);
-        }
-        /* Some macroblocks can be accessed before they're available in case
-        * of lost slices, MBAFF or threading. */
-        if (FIELD_PICTURE(h)) {
-            for(i = (h->picture_structure == PICT_BOTTOM_FIELD); i<h->mb_height; i++)
-                memset(h->slice_table + i*h->mb_stride, -1, (h->mb_stride - (i+1==h->mb_height)) * sizeof(*h->slice_table));
-        } else {
-            memset(h->slice_table, -1,
-                (h->mb_height * h->mb_stride - 1) * sizeof(*h->slice_table));
-        }
-    }
-
-    if (h->picture_structure == PICT_FRAME) {
+    if (picture_structure == PICT_FRAME) {
         h->curr_pic_num = h->poc.frame_num;
         h->max_pic_num  = 1 << sps->log2_max_frame_num;
     } else {
@@ -1523,7 +1536,7 @@ static int h264_slice_header_parse(H264Context *h, H264SliceContext *sl)
         if (!h->setup_finished)
             h->poc.poc_lsb = poc_lsb;
 
-        if (pps->pic_order_present == 1 && h->picture_structure == PICT_FRAME) {
+        if (pps->pic_order_present == 1 && picture_structure == PICT_FRAME) {
             int delta_poc_bottom = get_se_golomb(&sl->gb);
             if (!h->setup_finished)
                 h->poc.delta_poc_bottom = delta_poc_bottom;
@@ -1536,7 +1549,7 @@ static int h264_slice_header_parse(H264Context *h, H264SliceContext *sl)
         if (!h->setup_finished)
             h->poc.delta_poc[0] = delta_poc;
 
-        if (pps->pic_order_present == 1 && h->picture_structure == PICT_FRAME) {
+        if (pps->pic_order_present == 1 && picture_structure == PICT_FRAME) {
             delta_poc = get_se_golomb(&sl->gb);
 
             if (!h->setup_finished)
@@ -1552,7 +1565,7 @@ static int h264_slice_header_parse(H264Context *h, H264SliceContext *sl)
 
     ret = ff_h264_parse_ref_count(&sl->list_count, sl->ref_count,
                                   &sl->gb, pps, sl->slice_type_nos,
-                                  h->picture_structure, h->avctx);
+                                  picture_structure, h->avctx);
     if (ret < 0)
         return ret;
 
@@ -1655,6 +1668,12 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
     if (ret) // can not be ret<0 because of SLICE_SKIPED, SLICE_SINGLETHREAD, ...
         return ret;
 
+    if (h->current_slice == 0) {
+        ret = h264_field_start(h, sl);
+        if (ret < 0)
+            return ret;
+    }
+
     av_assert1(h->mb_num == h->mb_width * h->mb_height);
     if (sl->first_mb_addr << FIELD_OR_MBAFF_PICTURE(h) >= h->mb_num ||
         sl->first_mb_addr >= h->mb_num) {