FFmpeg/libavformat/subtitles.h

/*
 * Copyright (c) 2012 Clément Bœsch
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#ifndef AVFORMAT_SUBTITLES_H
#define AVFORMAT_SUBTITLES_H

#include <stdint.h>
#include "avformat.h"
#include "libavutil/bprint.h"

enum sub_sort {
    SUB_SORT_TS_POS = 0,    ///< sort by timestamps, then position
    SUB_SORT_POS_TS,        ///< sort by position, then timestamps
};

typedef struct {
    AVPacket *subs;         ///< array of subtitles packets
    int nb_subs;            ///< number of subtitles packets
    int allocated_size;     ///< allocated size for subs
    int current_sub_idx;    ///< current position for the read packet callback
    enum sub_sort sort;     ///< sort method to use when finalizing subtitles
} FFDemuxSubtitlesQueue;

/**
 * Insert a new subtitle event.
 *
 * @param event the subtitle line, may not be zero terminated
 * @param len   the length of the event (in strlen() sense, so without '\0')
 * @param merge set to 1 if the current event should be concatenated with the
 *              previous one instead of adding a new entry, 0 otherwise
 */
AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q,
                                    const uint8_t *event, int len, int merge);

/**
 * Set missing durations and sort subtitles by PTS, and then byte position.
 */
void ff_subtitles_queue_finalize(FFDemuxSubtitlesQueue *q);

/**
 * Generic read_packet() callback for subtitles demuxers using this queue
 * system.
 */
int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt);

/**
 * Update current_sub_idx to emulate a seek. Except the first parameter, it
 * matches AVInputFormat->read_seek2 prototypes.
 */
int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue *q, AVFormatContext *s, int stream_index,
                            int64_t min_ts, int64_t ts, int64_t max_ts, int flags);

/**
 * Remove and destroy all the subtitles packets.
 */
void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q);

/**
 * SMIL helper to load next chunk ("<...>" or untagged content) in buf.
 *
 * @param c cached character, to avoid a backward seek
 */
int ff_smil_extract_next_chunk(AVIOContext *pb, AVBPrint *buf, char *c);

/**
 * SMIL helper to point on the value of an attribute in the given tag.
 *
 * @param s    SMIL tag ("<...>")
 * @param attr the attribute to look for
 */
const char *ff_smil_get_attr_ptr(const char *s, const char *attr);

/**
 * @brief Read a subtitles chunk.
 *
 * A chunk is defined by a multiline "event", ending with a second line break.
 * The trailing line breaks are trimmed. CRLF are supported.
 * Example: "foo\r\nbar\r\n\r\nnext" will print "foo\r\nbar" into buf, and pb
 * will focus on the 'n' of the "next" string.
 *
 * @param pb  I/O context
 * @param buf an initialized buf where the chunk is written
 *
 * @note buf is cleared before writing into it.
 */
void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf);

/**
 * Get the number of characters to increment to jump to the next line, or to
 * the end of the string.
 * The function handles the following line breaks schemes:
 * LF, CRLF (MS), or standalone CR (old MacOS).
 */
static av_always_inline int ff_subtitles_next_line(const char *ptr)
{
    int n = strcspn(ptr, "\r\n");
    ptr += n;
    if (*ptr == '\r') {
        ptr++;
        n++;
    }
    if (*ptr == '\n')
        n++;
    return n;
}

#endif /* AVFORMAT_SUBTITLES_H */
lavf: add internal demuxer helpers for subtitles. 2012-06-22 22:56:30 +03:00			`/*`
			`* Copyright (c) 2012 Clément Bœsch`
			`*`
			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
			`* FFmpeg is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with FFmpeg; if not, write to the Free Software`
			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

			`#ifndef AVFORMAT_SUBTITLES_H`
			`#define AVFORMAT_SUBTITLES_H`

			`#include <stdint.h>`
			`#include "avformat.h"`
lavf/subtitles: add some SMIL helpers. This is needed for SAMI and RealText demuxers. 2012-06-17 12:43:09 +03:00			`#include "libavutil/bprint.h"`
lavf: add internal demuxer helpers for subtitles. 2012-06-22 22:56:30 +03:00
avformat/vobsub: fix several issues. Here is an extract of fate-samples/sub/vobsub.idx, with an additional text at the end of each line to better identify each bitmap: timestamp: 00:04:55:445, filepos: 00001b000 Ace! timestamp: 00:05:00:049, filepos: 00001b800 Wake up, honey! timestamp: 00:05:02:018, filepos: 00001c800 I gotta go to work. timestamp: 00:05:02:035, filepos: 00001d000 <???> timestamp: 00:05:04:203, filepos: 00001d800 Look after Clayton, okay? timestamp: 00:05:05:947, filepos: 00001e800 I'll be back tonight. timestamp: 00:05:07:957, filepos: 00001f800 Bye! Love you. timestamp: 00:05:21:295, filepos: 000020800 Hey, Ace! What's up? timestamp: 00:05:23:356, filepos: 000021800 Hey, how's it going? timestamp: 00:05:24:640, filepos: 000022800 Remember what today is? The 3rd! timestamp: 00:05:27:193, filepos: 000023800 Look over there! timestamp: 00:05:28:369, filepos: 000024800 Where are they going? timestamp: 00:05:28:361, filepos: 000025000 <???> timestamp: 00:05:29:946, filepos: 000025800 Let's go see. timestamp: 00:05:31:230, filepos: 000026000 I can't, man. I got Clayton. Note the two "<???>": they are basically split subtitles (with the previous one), which the dvdsub decoder is now supposed to reconstruct with a previous commit. But also note that while the first chunk has increasing timestamps, timestamp: 00:05:02:018, filepos: 00001c800 timestamp: 00:05:02:035, filepos: 00001d000 ...it's not the case of the second one (and this is not an exception in the original file): timestamp: 00:05:28:369, filepos: 000024800 timestamp: 00:05:28:361, filepos: 000025000 For the dvdsub decoder, they need to be "filepos'ed" ordered, but the FFDemuxSubtitlesQueue is timestamps ordered, which is the reason of the introduction of a sub sort method in the context, to allow giving priority to the position, and then the timestamps. With that change, the dvdsub decoder get fed with ordered packets. Now the packet size estimation was also broken: the filepos differences in the vobsub index defines the full data read between two subtitles chunks, and it is necessary to take into account what is read by the mpegps_read_pes_header() function since the length returned by that function doesn't count the size of the data it reads. This is fixed with the introduction of total_read, and {old,new}_pos. By doing this change, we can drop the unreliable len16 heuristic and simplify the whole loop. Note that mpegps_read_pes_header() often read more than one PES packet (typically in one call it can read 0x1ba and 0x1be chunk along with the relevant 0x1bd packet), which triggers the "total_read + pkt_size > psize" check. This is an expected behaviour, which could be avoided by having a more chunked version of mpegps_read_pes_header(). The latest change is the extraction of each stream into its own subtitles queue. If we don't do this, the maximum size for a subtitle chunk is broken, and the previous changes can not work. Having each stream in a different queue requires some little adjustments in the seek code of the demuxer. This commit is only meaningful as a whole change and can not be easily split. The FATE test changes because it uses the vobsub demuxer. 2013-09-29 23:05:14 +03:00			`enum sub_sort {`
			`SUB_SORT_TS_POS = 0, ///< sort by timestamps, then position`
			`SUB_SORT_POS_TS, ///< sort by position, then timestamps`
			`};`

lavf: add internal demuxer helpers for subtitles. 2012-06-22 22:56:30 +03:00			`typedef struct {`
			`AVPacket *subs; ///< array of subtitles packets`
			`int nb_subs; ///< number of subtitles packets`
			`int allocated_size; ///< allocated size for subs`
			`int current_sub_idx; ///< current position for the read packet callback`
avformat/vobsub: fix several issues. Here is an extract of fate-samples/sub/vobsub.idx, with an additional text at the end of each line to better identify each bitmap: timestamp: 00:04:55:445, filepos: 00001b000 Ace! timestamp: 00:05:00:049, filepos: 00001b800 Wake up, honey! timestamp: 00:05:02:018, filepos: 00001c800 I gotta go to work. timestamp: 00:05:02:035, filepos: 00001d000 <???> timestamp: 00:05:04:203, filepos: 00001d800 Look after Clayton, okay? timestamp: 00:05:05:947, filepos: 00001e800 I'll be back tonight. timestamp: 00:05:07:957, filepos: 00001f800 Bye! Love you. timestamp: 00:05:21:295, filepos: 000020800 Hey, Ace! What's up? timestamp: 00:05:23:356, filepos: 000021800 Hey, how's it going? timestamp: 00:05:24:640, filepos: 000022800 Remember what today is? The 3rd! timestamp: 00:05:27:193, filepos: 000023800 Look over there! timestamp: 00:05:28:369, filepos: 000024800 Where are they going? timestamp: 00:05:28:361, filepos: 000025000 <???> timestamp: 00:05:29:946, filepos: 000025800 Let's go see. timestamp: 00:05:31:230, filepos: 000026000 I can't, man. I got Clayton. Note the two "<???>": they are basically split subtitles (with the previous one), which the dvdsub decoder is now supposed to reconstruct with a previous commit. But also note that while the first chunk has increasing timestamps, timestamp: 00:05:02:018, filepos: 00001c800 timestamp: 00:05:02:035, filepos: 00001d000 ...it's not the case of the second one (and this is not an exception in the original file): timestamp: 00:05:28:369, filepos: 000024800 timestamp: 00:05:28:361, filepos: 000025000 For the dvdsub decoder, they need to be "filepos'ed" ordered, but the FFDemuxSubtitlesQueue is timestamps ordered, which is the reason of the introduction of a sub sort method in the context, to allow giving priority to the position, and then the timestamps. With that change, the dvdsub decoder get fed with ordered packets. Now the packet size estimation was also broken: the filepos differences in the vobsub index defines the full data read between two subtitles chunks, and it is necessary to take into account what is read by the mpegps_read_pes_header() function since the length returned by that function doesn't count the size of the data it reads. This is fixed with the introduction of total_read, and {old,new}_pos. By doing this change, we can drop the unreliable len16 heuristic and simplify the whole loop. Note that mpegps_read_pes_header() often read more than one PES packet (typically in one call it can read 0x1ba and 0x1be chunk along with the relevant 0x1bd packet), which triggers the "total_read + pkt_size > psize" check. This is an expected behaviour, which could be avoided by having a more chunked version of mpegps_read_pes_header(). The latest change is the extraction of each stream into its own subtitles queue. If we don't do this, the maximum size for a subtitle chunk is broken, and the previous changes can not work. Having each stream in a different queue requires some little adjustments in the seek code of the demuxer. This commit is only meaningful as a whole change and can not be easily split. The FATE test changes because it uses the vobsub demuxer. 2013-09-29 23:05:14 +03:00			`enum sub_sort sort; ///< sort method to use when finalizing subtitles`
lavf: add internal demuxer helpers for subtitles. 2012-06-22 22:56:30 +03:00			`} FFDemuxSubtitlesQueue;`

			`/**`
			`* Insert a new subtitle event.`
			`*`
			`* @param event the subtitle line, may not be zero terminated`
			`* @param len the length of the event (in strlen() sense, so without '\0')`
			`* @param merge set to 1 if the current event should be concatenated with the`
			`* previous one instead of adding a new entry, 0 otherwise`
			`*/`
			`AVPacket ff_subtitles_queue_insert(FFDemuxSubtitlesQueue q,`
			`const uint8_t *event, int len, int merge);`

			`/**`
			`* Set missing durations and sort subtitles by PTS, and then byte position.`
			`*/`
			`void ff_subtitles_queue_finalize(FFDemuxSubtitlesQueue *q);`

			`/**`
			`* Generic read_packet() callback for subtitles demuxers using this queue`
			`* system.`
			`*/`
			`int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue q, AVPacket pkt);`

lavf/subtitles: add ff_subtitles_queue_seek(). This function is almost identical to lavf/assdec:read_seek2(). It performs a generic seek for text subtitles demuxers for the new seeking API. The only difference with assdec:read_seek2 is the ts_diff being unsigned to avoid overflows. The seek callback in the ASS demuxer will be removed when it is redesigned to use FFDemuxSubtitlesQueue. 2012-11-23 23:40:46 +03:00			`/**`
			`* Update current_sub_idx to emulate a seek. Except the first parameter, it`
			`* matches AVInputFormat->read_seek2 prototypes.`
			`*/`
			`int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue q, AVFormatContext s, int stream_index,`
			`int64_t min_ts, int64_t ts, int64_t max_ts, int flags);`

lavf: add internal demuxer helpers for subtitles. 2012-06-22 22:56:30 +03:00			`/**`
			`* Remove and destroy all the subtitles packets.`
			`*/`
			`void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q);`

lavf/subtitles: add some SMIL helpers. This is needed for SAMI and RealText demuxers. 2012-06-17 12:43:09 +03:00			`/**`
			`* SMIL helper to load next chunk ("<...>" or untagged content) in buf.`
			`*`
			`* @param c cached character, to avoid a backward seek`
			`*/`
			`int ff_smil_extract_next_chunk(AVIOContext pb, AVBPrint buf, char *c);`

			`/**`
			`* SMIL helper to point on the value of an attribute in the given tag.`
			`*`
			`* @param s SMIL tag ("<...>")`
			`* @param attr the attribute to look for`
			`*/`
			`const char ff_smil_get_attr_ptr(const char s, const char *attr);`

lavf: move srtdec:read_chunk() to subtitles utils. This function can be useful for various other subtitles formats. 2012-12-28 03:15:01 +03:00			`/**`
			`* @brief Read a subtitles chunk.`
			`*`
			`* A chunk is defined by a multiline "event", ending with a second line break.`
lavf/subtitles: fix CLRF/CRLF typo. 2012-12-31 01:14:34 +03:00			`* The trailing line breaks are trimmed. CRLF are supported.`
lavf: move srtdec:read_chunk() to subtitles utils. This function can be useful for various other subtitles formats. 2012-12-28 03:15:01 +03:00			`* Example: "foo\r\nbar\r\n\r\nnext" will print "foo\r\nbar" into buf, and pb`
			`* will focus on the 'n' of the "next" string.`
			`*`
			`* @param pb I/O context`
			`* @param buf an initialized buf where the chunk is written`
			`*`
			`* @note buf is cleared before writing into it.`
			`*/`
			`void ff_subtitles_read_chunk(AVIOContext pb, AVBPrint buf);`

avformat/subtitles: add a next line jumper and use it. This fixes a bunch of possible overread in avformat with the idiom p += strcspn(p, "\n") + 1 (strcspn() can focus on the trailing '\0' if no '\n' is found, so the +1 leads to an overread). Note on lavf/matroskaenc: no extra subtitles.o Makefile dependency is added because only the header is required for ff_subtitles_next_line(). Note on lavf/mpsubdec: code gets slightly complex to avoid an infinite loop in the probing since there is no more forced increment. 2013-09-08 19:02:45 +03:00			`/**`
			`* Get the number of characters to increment to jump to the next line, or to`
			`* the end of the string.`
lavf/subtitles: Make comment less arrogant Signed-off-by: Alexander Strasser <eclipse7@gmx.net> 2013-09-10 02:07:48 +03:00			`* The function handles the following line breaks schemes:`
			`* LF, CRLF (MS), or standalone CR (old MacOS).`
avformat/subtitles: add a next line jumper and use it. This fixes a bunch of possible overread in avformat with the idiom p += strcspn(p, "\n") + 1 (strcspn() can focus on the trailing '\0' if no '\n' is found, so the +1 leads to an overread). Note on lavf/matroskaenc: no extra subtitles.o Makefile dependency is added because only the header is required for ff_subtitles_next_line(). Note on lavf/mpsubdec: code gets slightly complex to avoid an infinite loop in the probing since there is no more forced increment. 2013-09-08 19:02:45 +03:00			`*/`
			`static av_always_inline int ff_subtitles_next_line(const char *ptr)`
			`{`
avformat/subtitles: support standalone CR (MacOS). Recent .srt files with CR only were found in the wild. 2013-09-08 19:05:11 +03:00			`int n = strcspn(ptr, "\r\n");`
			`ptr += n;`
			`if (*ptr == '\r') {`
			`ptr++;`
			`n++;`
			`}`
			`if (*ptr == '\n')`
			`n++;`
			`return n;`
avformat/subtitles: add a next line jumper and use it. This fixes a bunch of possible overread in avformat with the idiom p += strcspn(p, "\n") + 1 (strcspn() can focus on the trailing '\0' if no '\n' is found, so the +1 leads to an overread). Note on lavf/matroskaenc: no extra subtitles.o Makefile dependency is added because only the header is required for ff_subtitles_next_line(). Note on lavf/mpsubdec: code gets slightly complex to avoid an infinite loop in the probing since there is no more forced increment. 2013-09-08 19:02:45 +03:00			`}`

lavf: add internal demuxer helpers for subtitles. 2012-06-22 22:56:30 +03:00			`#endif /* AVFORMAT_SUBTITLES_H */`