diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c index 204a57891e..ca51c97389 100644 --- a/libavcodec/mpeg12dec.c +++ b/libavcodec/mpeg12dec.c @@ -2262,7 +2262,31 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx, return 1; } else if (buf_size >= 11 && p[0] == 'C' && p[1] == 'C' && p[2] == 0x01 && p[3] == 0xf8) { - /* extract DVD CC data */ + /* extract DVD CC data + * + * uint32_t user_data_start_code 0x000001B2 (big endian) + * uint16_t user_identifier 0x4343 "CC" + * uint8_t user_data_type_code 0x01 + * uint8_t caption_block_size 0xF8 + * uint8_t + * bit 7 caption_odd_field_first 1=odd field (CC1/CC2) first 0=even field (CC3/CC4) first + * bit 6 caption_filler 0 + * bit 5:1 caption_block_count number of caption blocks (pairs of caption words = frames). Most DVDs use 15 per start of GOP. + * bit 0 caption_extra_field_added 1=one additional caption word + * + * struct caption_field_block { + * uint8_t + * bit 7:1 caption_filler 0x7F (all 1s) + * bit 0 caption_field_odd 1=odd field (this is CC1/CC2) 0=even field (this is CC3/CC4) + * uint8_t caption_first_byte + * uint8_t caption_second_byte + * } caption_block[(caption_block_count * 2) + caption_extra_field_added]; + * + * Some DVDs encode caption data for both fields with caption_field_odd=1. The only way to decode the fields + * correctly is to start on the field indicated by caption_odd_field_first and count between odd/even fields. + * Don't assume that the first caption word is the odd field. There do exist MPEG files in the wild that start + * on the even field. There also exist DVDs in the wild that encode an odd field count and the + * caption_extra_field_added/caption_odd_field_first bits change per packet to allow that. */ int cc_count = 0; int i; // There is a caption count field in the data, but it is often