diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c
index 204a57891e..ca51c97389 100644
--- a/libavcodec/mpeg12dec.c
+++ b/libavcodec/mpeg12dec.c
@@ -2262,7 +2262,31 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx,
         return 1;
     } else if (buf_size >= 11 &&
                p[0] == 'C' && p[1] == 'C' && p[2] == 0x01 && p[3] == 0xf8) {
-        /* extract DVD CC data */
+        /* extract DVD CC data
+         *
+         * uint32_t   user_data_start_code        0x000001B2    (big endian)
+         * uint16_t   user_identifier             0x4343 "CC"
+         * uint8_t    user_data_type_code         0x01
+         * uint8_t    caption_block_size          0xF8
+         * uint8_t
+         *   bit 7    caption_odd_field_first     1=odd field (CC1/CC2) first  0=even field (CC3/CC4) first
+         *   bit 6    caption_filler              0
+         *   bit 5:1  caption_block_count         number of caption blocks (pairs of caption words = frames). Most DVDs use 15 per start of GOP.
+         *   bit 0    caption_extra_field_added   1=one additional caption word
+         *
+         * struct caption_field_block {
+         *   uint8_t
+         *     bit 7:1 caption_filler             0x7F (all 1s)
+         *     bit 0   caption_field_odd          1=odd field (this is CC1/CC2)  0=even field (this is CC3/CC4)
+         *   uint8_t   caption_first_byte
+         *   uint8_t   caption_second_byte
+         * } caption_block[(caption_block_count * 2) + caption_extra_field_added];
+         *
+         * Some DVDs encode caption data for both fields with caption_field_odd=1. The only way to decode the fields
+         * correctly is to start on the field indicated by caption_odd_field_first and count between odd/even fields.
+         * Don't assume that the first caption word is the odd field. There do exist MPEG files in the wild that start
+         * on the even field. There also exist DVDs in the wild that encode an odd field count and the
+         * caption_extra_field_added/caption_odd_field_first bits change per packet to allow that. */
         int cc_count = 0;
         int i;
         // There is a caption count field in the data, but it is often