ffmpeg: add video heartbeat capability to fix_sub_duration

Splits the currently handled subtitle at random access point packets that can be configured to follow a specific output stream. Currently only subtitle streams which are directly mapped into the same output in which the heartbeat stream resides are affected. This way the subtitle - which is known to be shown at this time can be split and passed to muxer before its full duration is yet known. This is also a drawback, as this essentially outputs multiple subtitles from a single input subtitle that continues over multiple random access points. Thus this feature should not be utilized in cases where subtitle output latency does not matter. Co-authored-by: Andrzej Nadachowski <andrzej.nadachowski@24i.com> Co-authored-by: Bernard Boulay <bernard.boulay@24i.com> Signed-off-by: Jan Ekström <jan.ekstrom@24i.com>
2024-12-23 12:43:46 +02:00 · 2022-07-22 13:57:54 +03:00 · 2022-07-22 13:57:54 +03:00 · 9a820ec8b1
commit 9a820ec8b1
parent 746d27455b
8 changed files with 245 additions and 0 deletions
--- a/1
+++ b/1
@ -36,6 +36,7 @@ version <next>:
 - hstack_vaapi, vstack_vaapi and xstack_vaapi filters
 - XMD ADPCM decoder and demuxer
 - media100 to mjpegb bsf
 - ffmpeg CLI new option: -fix_sub_duration_heartbeat
 version 5.1:
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@ -1342,6 +1342,22 @@ List all hardware acceleration components enabled in this build of ffmpeg.
 Actual runtime availability depends on the hardware and its suitable driver
 being installed.
@item -fix_sub_duration_heartbeat[:@var{stream_specifier}]
 Set a specific output video stream as the heartbeat stream according to which
 to split and push through currently in-progress subtitle upon receipt of a
 random access packet.
 This lowers the latency of subtitles for which the end packet or the following
 subtitle has not yet been received. As a drawback, this will most likely lead
 to duplication of subtitle events in order to cover the full duration, so
 when dealing with use cases where latency of when the subtitle event is passed
 on to output is not relevant this option should not be utilized.
 Requires @option{-fix_sub_duration} to be set for the relevant input subtitle
 stream for this to have any effect, as well as for the input subtitle stream
 having to be directly mapped to the same output in which the heartbeat stream
 resides.
@end table
@section Audio Options
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@ -126,6 +126,7 @@ typedef struct BenchmarkTimeStamps {
    int64_t sys_usec;
 } BenchmarkTimeStamps;
 static int trigger_fix_sub_duration_heartbeat(OutputStream *ost, const AVPacket *pkt);
 static BenchmarkTimeStamps get_benchmark_time_stamps(void);
 static int64_t getmaxrss(void);
 static int ifilter_has_all_input_formats(FilterGraph *fg);
@ -953,6 +954,13 @@ static int encode_frame(OutputFile *of, OutputStream *ost, AVFrame *frame)
                   av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, &enc->time_base));
        }
        if ((ret = trigger_fix_sub_duration_heartbeat(ost, pkt)) < 0) {
            av_log(NULL, AV_LOG_ERROR,
                   "Subtitle heartbeat logic failed in %s! (%s)\n",
                   __func__, av_err2str(ret));
            exit_program(1);
        }
        ost->data_size_enc += pkt->size;
        ost->packets_encoded++;
@ -1912,6 +1920,16 @@ static void do_streamcopy(InputStream *ist, OutputStream *ost, const AVPacket *p
    opkt->duration = av_rescale_q(pkt->duration, ist->st->time_base, ost->mux_timebase);
    {
        int ret = trigger_fix_sub_duration_heartbeat(ost, pkt);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR,
                   "Subtitle heartbeat logic failed in %s! (%s)\n",
                   __func__, av_err2str(ret));
            exit_program(1);
        }
    }
    of_output_packet(of, opkt, ost, 0);
    ost->streamcopy_started = 1;
@ -2355,6 +2373,136 @@ out:
    return ret;
 }
 static int copy_av_subtitle(AVSubtitle *dst, AVSubtitle *src)
 {
    int ret = AVERROR_BUG;
    AVSubtitle tmp = {
        .format = src->format,
        .start_display_time = src->start_display_time,
        .end_display_time = src->end_display_time,
        .num_rects = 0,
        .rects = NULL,
        .pts = src->pts
    };
    if (!src->num_rects)
        goto success;
    if (!(tmp.rects = av_calloc(src->num_rects, sizeof(*tmp.rects))))
        return AVERROR(ENOMEM);
    for (int i = 0; i < src->num_rects; i++) {
        AVSubtitleRect *src_rect = src->rects[i];
        AVSubtitleRect *dst_rect;
        if (!(dst_rect = tmp.rects[i] = av_mallocz(sizeof(*tmp.rects[0])))) {
            ret = AVERROR(ENOMEM);
            goto cleanup;
        }
        tmp.num_rects++;
        dst_rect->type      = src_rect->type;
        dst_rect->flags     = src_rect->flags;
        dst_rect->x         = src_rect->x;
        dst_rect->y         = src_rect->y;
        dst_rect->w         = src_rect->w;
        dst_rect->h         = src_rect->h;
        dst_rect->nb_colors = src_rect->nb_colors;
        if (src_rect->text)
            if (!(dst_rect->text = av_strdup(src_rect->text))) {
                ret = AVERROR(ENOMEM);
                goto cleanup;
            }
        if (src_rect->ass)
            if (!(dst_rect->ass = av_strdup(src_rect->ass))) {
                ret = AVERROR(ENOMEM);
                goto cleanup;
            }
        for (int j = 0; j < 4; j++) {
            // SUBTITLE_BITMAP images are special in the sense that they
            // are like PAL8 images. first pointer to data, second to
            // palette. This makes the size calculation match this.
            size_t buf_size = src_rect->type == SUBTITLE_BITMAP && j == 1 ?
                              AVPALETTE_SIZE :
                              src_rect->h * src_rect->linesize[j];
            if (!src_rect->data[j])
                continue;
            if (!(dst_rect->data[j] = av_memdup(src_rect->data[j], buf_size))) {
                ret = AVERROR(ENOMEM);
                goto cleanup;
            }
            dst_rect->linesize[j] = src_rect->linesize[j];
        }
    }
 success:
    *dst = tmp;
    return 0;
 cleanup:
    avsubtitle_free(&tmp);
    return ret;
 }
 static int fix_sub_duration_heartbeat(InputStream *ist, int64_t signal_pts)
 {
    int ret = AVERROR_BUG;
    int got_output = 1;
    AVSubtitle *prev_subtitle = &ist->prev_sub.subtitle;
    AVSubtitle subtitle;
    if (!ist->fix_sub_duration || !prev_subtitle->num_rects ||
        signal_pts <= prev_subtitle->pts)
        return 0;
    if ((ret = copy_av_subtitle(&subtitle, prev_subtitle)) < 0)
        return ret;
    subtitle.pts = signal_pts;
    return process_subtitle(ist, &subtitle, &got_output);
 }
 static int trigger_fix_sub_duration_heartbeat(OutputStream *ost, const AVPacket *pkt)
 {
    OutputFile *of = output_files[ost->file_index];
    int64_t signal_pts = av_rescale_q(pkt->pts, ost->mux_timebase,
                                      AV_TIME_BASE_Q);
    if (!ost->fix_sub_duration_heartbeat || !(pkt->flags & AV_PKT_FLAG_KEY))
        // we are only interested in heartbeats on streams configured, and
        // only on random access points.
        return 0;
    for (int i = 0; i < of->nb_streams; i++) {
        OutputStream *iter_ost = of->streams[i];
        InputStream  *ist      = iter_ost->ist;
        int ret = AVERROR_BUG;
        if (iter_ost == ost || !ist || !ist->decoding_needed ||
            ist->dec_ctx->codec_type != AVMEDIA_TYPE_SUBTITLE)
            // We wish to skip the stream that causes the heartbeat,
            // output streams without an input stream, streams not decoded
            // (as fix_sub_duration is only done for decoded subtitles) as
            // well as non-subtitle streams.
            continue;
        if ((ret = fix_sub_duration_heartbeat(ist, signal_pts)) < 0)
            return ret;
    }
    return 0;
 }
 static int transcode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output,
                               int *decode_failed)
 {
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@ -224,6 +224,8 @@ typedef struct OptionsContext {
    int        nb_reinit_filters;
    SpecifierOpt *fix_sub_duration;
    int        nb_fix_sub_duration;
    SpecifierOpt *fix_sub_duration_heartbeat;
    int        nb_fix_sub_duration_heartbeat;
    SpecifierOpt *canvas_sizes;
    int        nb_canvas_sizes;
    SpecifierOpt *pass;
@ -675,6 +677,12 @@ typedef struct OutputStream {
    EncStats enc_stats_pre;
    EncStats enc_stats_post;
    /*
     * bool on whether this stream should be utilized for splitting
     * subtitles utilizing fix_sub_duration at random access points.
     */
    unsigned int fix_sub_duration_heartbeat;
 } OutputStream;
 typedef struct OutputFile {
--- a/fftools/ffmpeg_mux_init.c
+++ b/fftools/ffmpeg_mux_init.c
@ -61,6 +61,7 @@ static const char *const opt_name_enc_stats_pre_fmt[]         = {"enc_stats_pre_
 static const char *const opt_name_enc_stats_post_fmt[]        = {"enc_stats_post_fmt", NULL};
 static const char *const opt_name_filters[]                   = {"filter", "af", "vf", NULL};
 static const char *const opt_name_filter_scripts[]            = {"filter_script", NULL};
 static const char *const opt_name_fix_sub_duration_heartbeat[] = {"fix_sub_duration_heartbeat", NULL};
 static const char *const opt_name_fps_mode[]                  = {"fps_mode", NULL};
 static const char *const opt_name_force_fps[]                 = {"force_fps", NULL};
 static const char *const opt_name_forced_key_frames[]         = {"forced_key_frames", NULL};
@ -614,6 +615,9 @@ static OutputStream *new_output_stream(Muxer *mux, const OptionsContext *o,
    MATCH_PER_STREAM_OPT(bits_per_raw_sample, i, ost->bits_per_raw_sample,
                         oc, st);
    MATCH_PER_STREAM_OPT(fix_sub_duration_heartbeat, i, ost->fix_sub_duration_heartbeat,
                         oc, st);
    if (oc->oformat->flags & AVFMT_GLOBALHEADER && ost->enc_ctx)
        ost->enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@ -1658,6 +1658,11 @@ const OptionDef options[] = {
    { "autoscale",        HAS_ARG | OPT_BOOL | OPT_SPEC |
                          OPT_EXPERT | OPT_OUTPUT,                               { .off = OFFSET(autoscale) },
        "automatically insert a scale filter at the end of the filter graph" },
    { "fix_sub_duration_heartbeat", OPT_VIDEO | OPT_BOOL | OPT_EXPERT |
                                    OPT_SPEC | OPT_OUTPUT,                       { .off = OFFSET(fix_sub_duration_heartbeat) },
        "set this video output stream to be a heartbeat stream for "
        "fix_sub_duration, according to which subtitles should be split at "
        "random access points" },
    /* audio options */
    { "aframes",        OPT_AUDIO | HAS_ARG  | OPT_PERFILE | OPT_OUTPUT,           { .func_arg = opt_audio_frames },
--- a/tests/fate/ffmpeg.mak
+++ b/tests/fate/ffmpeg.mak
@ -117,6 +117,21 @@ fate-ffmpeg-fix_sub_duration: CMD = fmtstdout srt -fix_sub_duration \
  -real_time 1 -f lavfi \
  -i "movie=$(TARGET_SAMPLES)/sub/Closedcaption_rollup.m2v[out0+subcc]"
 # Basic test for fix_sub_duration_heartbeat, which causes a buffered subtitle
 # to be pushed out when a video keyframe is received from an encoder.
 FATE_SAMPLES_FFMPEG-$(call FILTERDEMDECENCMUX, MOVIE, MPEGVIDEO, \
                           MPEG2VIDEO, SUBRIP, SRT, LAVFI_INDEV  \
                           MPEGVIDEO_PARSER CCAPTION_DECODER \
                           MPEG2VIDEO_ENCODER NULL_MUXER PIPE_PROTOCOL) \
                           += fate-ffmpeg-fix_sub_duration_heartbeat
 fate-ffmpeg-fix_sub_duration_heartbeat: CMD = fmtstdout srt -fix_sub_duration \
  -real_time 1 -f lavfi \
  -i "movie=$(TARGET_SAMPLES)/sub/Closedcaption_rollup.m2v[out0+subcc]" \
  -map 0:v  -map 0:s -fix_sub_duration_heartbeat:v:0 \
  -c:v mpeg2video -b:v 2M -g 30 -sc_threshold 1000000000 \
  -c:s srt \
  -f null -
 FATE_STREAMCOPY-$(call REMUX, MP4 MOV, EAC3_DEMUXER) += fate-copy-trac3074
 fate-copy-trac3074: CMD = transcode eac3 $(TARGET_SAMPLES)/eac3/csi_miami_stereo_128_spx.eac3\
                     mp4 "-codec copy -map 0" "-codec copy"
--- a/tests/ref/fate/ffmpeg-fix_sub_duration_heartbeat
+++ b/tests/ref/fate/ffmpeg-fix_sub_duration_heartbeat
@ -0,0 +1,48 @@
 1
 00:00:00,968 --> 00:00:01,001
 <font face="Monospace">{\an7}(</font>
 2
 00:00:01,001 --> 00:00:01,168
 <font face="Monospace">{\an7}(</font>
 3
 00:00:01,168 --> 00:00:01,368
 <font face="Monospace">{\an7}(<i> inaudibl</i></font>
 4
 00:00:01,368 --> 00:00:01,568
 <font face="Monospace">{\an7}(<i> inaudible radio chat</i></font>
 5
 00:00:01,568 --> 00:00:02,002
 <font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font>
 6
 00:00:02,002 --> 00:00:03,003
 <font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font>
 7
 00:00:03,003 --> 00:00:03,103
 <font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font>
 8
 00:00:03,103 --> 00:00:03,303
 <font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
 >></font>
 9
 00:00:03,303 --> 00:00:03,503
 <font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
 >> Safety rema</font>
 10
 00:00:03,504 --> 00:00:03,704
 <font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
 >> Safety remains our numb</font>
 11
 00:00:03,704 --> 00:00:04,004
 <font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
 >> Safety remains our number one</font>