mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
ffmpeg: add video heartbeat capability to fix_sub_duration
Splits the currently handled subtitle at random access point packets that can be configured to follow a specific output stream. Currently only subtitle streams which are directly mapped into the same output in which the heartbeat stream resides are affected. This way the subtitle - which is known to be shown at this time can be split and passed to muxer before its full duration is yet known. This is also a drawback, as this essentially outputs multiple subtitles from a single input subtitle that continues over multiple random access points. Thus this feature should not be utilized in cases where subtitle output latency does not matter. Co-authored-by: Andrzej Nadachowski <andrzej.nadachowski@24i.com> Co-authored-by: Bernard Boulay <bernard.boulay@24i.com> Signed-off-by: Jan Ekström <jan.ekstrom@24i.com>
This commit is contained in:
parent
746d27455b
commit
9a820ec8b1
@ -36,6 +36,7 @@ version <next>:
|
|||||||
- hstack_vaapi, vstack_vaapi and xstack_vaapi filters
|
- hstack_vaapi, vstack_vaapi and xstack_vaapi filters
|
||||||
- XMD ADPCM decoder and demuxer
|
- XMD ADPCM decoder and demuxer
|
||||||
- media100 to mjpegb bsf
|
- media100 to mjpegb bsf
|
||||||
|
- ffmpeg CLI new option: -fix_sub_duration_heartbeat
|
||||||
|
|
||||||
|
|
||||||
version 5.1:
|
version 5.1:
|
||||||
|
@ -1342,6 +1342,22 @@ List all hardware acceleration components enabled in this build of ffmpeg.
|
|||||||
Actual runtime availability depends on the hardware and its suitable driver
|
Actual runtime availability depends on the hardware and its suitable driver
|
||||||
being installed.
|
being installed.
|
||||||
|
|
||||||
|
@item -fix_sub_duration_heartbeat[:@var{stream_specifier}]
|
||||||
|
Set a specific output video stream as the heartbeat stream according to which
|
||||||
|
to split and push through currently in-progress subtitle upon receipt of a
|
||||||
|
random access packet.
|
||||||
|
|
||||||
|
This lowers the latency of subtitles for which the end packet or the following
|
||||||
|
subtitle has not yet been received. As a drawback, this will most likely lead
|
||||||
|
to duplication of subtitle events in order to cover the full duration, so
|
||||||
|
when dealing with use cases where latency of when the subtitle event is passed
|
||||||
|
on to output is not relevant this option should not be utilized.
|
||||||
|
|
||||||
|
Requires @option{-fix_sub_duration} to be set for the relevant input subtitle
|
||||||
|
stream for this to have any effect, as well as for the input subtitle stream
|
||||||
|
having to be directly mapped to the same output in which the heartbeat stream
|
||||||
|
resides.
|
||||||
|
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
@section Audio Options
|
@section Audio Options
|
||||||
|
148
fftools/ffmpeg.c
148
fftools/ffmpeg.c
@ -126,6 +126,7 @@ typedef struct BenchmarkTimeStamps {
|
|||||||
int64_t sys_usec;
|
int64_t sys_usec;
|
||||||
} BenchmarkTimeStamps;
|
} BenchmarkTimeStamps;
|
||||||
|
|
||||||
|
static int trigger_fix_sub_duration_heartbeat(OutputStream *ost, const AVPacket *pkt);
|
||||||
static BenchmarkTimeStamps get_benchmark_time_stamps(void);
|
static BenchmarkTimeStamps get_benchmark_time_stamps(void);
|
||||||
static int64_t getmaxrss(void);
|
static int64_t getmaxrss(void);
|
||||||
static int ifilter_has_all_input_formats(FilterGraph *fg);
|
static int ifilter_has_all_input_formats(FilterGraph *fg);
|
||||||
@ -953,6 +954,13 @@ static int encode_frame(OutputFile *of, OutputStream *ost, AVFrame *frame)
|
|||||||
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, &enc->time_base));
|
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, &enc->time_base));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((ret = trigger_fix_sub_duration_heartbeat(ost, pkt)) < 0) {
|
||||||
|
av_log(NULL, AV_LOG_ERROR,
|
||||||
|
"Subtitle heartbeat logic failed in %s! (%s)\n",
|
||||||
|
__func__, av_err2str(ret));
|
||||||
|
exit_program(1);
|
||||||
|
}
|
||||||
|
|
||||||
ost->data_size_enc += pkt->size;
|
ost->data_size_enc += pkt->size;
|
||||||
|
|
||||||
ost->packets_encoded++;
|
ost->packets_encoded++;
|
||||||
@ -1912,6 +1920,16 @@ static void do_streamcopy(InputStream *ist, OutputStream *ost, const AVPacket *p
|
|||||||
|
|
||||||
opkt->duration = av_rescale_q(pkt->duration, ist->st->time_base, ost->mux_timebase);
|
opkt->duration = av_rescale_q(pkt->duration, ist->st->time_base, ost->mux_timebase);
|
||||||
|
|
||||||
|
{
|
||||||
|
int ret = trigger_fix_sub_duration_heartbeat(ost, pkt);
|
||||||
|
if (ret < 0) {
|
||||||
|
av_log(NULL, AV_LOG_ERROR,
|
||||||
|
"Subtitle heartbeat logic failed in %s! (%s)\n",
|
||||||
|
__func__, av_err2str(ret));
|
||||||
|
exit_program(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
of_output_packet(of, opkt, ost, 0);
|
of_output_packet(of, opkt, ost, 0);
|
||||||
|
|
||||||
ost->streamcopy_started = 1;
|
ost->streamcopy_started = 1;
|
||||||
@ -2355,6 +2373,136 @@ out:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int copy_av_subtitle(AVSubtitle *dst, AVSubtitle *src)
|
||||||
|
{
|
||||||
|
int ret = AVERROR_BUG;
|
||||||
|
AVSubtitle tmp = {
|
||||||
|
.format = src->format,
|
||||||
|
.start_display_time = src->start_display_time,
|
||||||
|
.end_display_time = src->end_display_time,
|
||||||
|
.num_rects = 0,
|
||||||
|
.rects = NULL,
|
||||||
|
.pts = src->pts
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!src->num_rects)
|
||||||
|
goto success;
|
||||||
|
|
||||||
|
if (!(tmp.rects = av_calloc(src->num_rects, sizeof(*tmp.rects))))
|
||||||
|
return AVERROR(ENOMEM);
|
||||||
|
|
||||||
|
for (int i = 0; i < src->num_rects; i++) {
|
||||||
|
AVSubtitleRect *src_rect = src->rects[i];
|
||||||
|
AVSubtitleRect *dst_rect;
|
||||||
|
|
||||||
|
if (!(dst_rect = tmp.rects[i] = av_mallocz(sizeof(*tmp.rects[0])))) {
|
||||||
|
ret = AVERROR(ENOMEM);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp.num_rects++;
|
||||||
|
|
||||||
|
dst_rect->type = src_rect->type;
|
||||||
|
dst_rect->flags = src_rect->flags;
|
||||||
|
|
||||||
|
dst_rect->x = src_rect->x;
|
||||||
|
dst_rect->y = src_rect->y;
|
||||||
|
dst_rect->w = src_rect->w;
|
||||||
|
dst_rect->h = src_rect->h;
|
||||||
|
dst_rect->nb_colors = src_rect->nb_colors;
|
||||||
|
|
||||||
|
if (src_rect->text)
|
||||||
|
if (!(dst_rect->text = av_strdup(src_rect->text))) {
|
||||||
|
ret = AVERROR(ENOMEM);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (src_rect->ass)
|
||||||
|
if (!(dst_rect->ass = av_strdup(src_rect->ass))) {
|
||||||
|
ret = AVERROR(ENOMEM);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 0; j < 4; j++) {
|
||||||
|
// SUBTITLE_BITMAP images are special in the sense that they
|
||||||
|
// are like PAL8 images. first pointer to data, second to
|
||||||
|
// palette. This makes the size calculation match this.
|
||||||
|
size_t buf_size = src_rect->type == SUBTITLE_BITMAP && j == 1 ?
|
||||||
|
AVPALETTE_SIZE :
|
||||||
|
src_rect->h * src_rect->linesize[j];
|
||||||
|
|
||||||
|
if (!src_rect->data[j])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!(dst_rect->data[j] = av_memdup(src_rect->data[j], buf_size))) {
|
||||||
|
ret = AVERROR(ENOMEM);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
dst_rect->linesize[j] = src_rect->linesize[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
success:
|
||||||
|
*dst = tmp;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
avsubtitle_free(&tmp);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int fix_sub_duration_heartbeat(InputStream *ist, int64_t signal_pts)
|
||||||
|
{
|
||||||
|
int ret = AVERROR_BUG;
|
||||||
|
int got_output = 1;
|
||||||
|
AVSubtitle *prev_subtitle = &ist->prev_sub.subtitle;
|
||||||
|
AVSubtitle subtitle;
|
||||||
|
|
||||||
|
if (!ist->fix_sub_duration || !prev_subtitle->num_rects ||
|
||||||
|
signal_pts <= prev_subtitle->pts)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if ((ret = copy_av_subtitle(&subtitle, prev_subtitle)) < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
subtitle.pts = signal_pts;
|
||||||
|
|
||||||
|
return process_subtitle(ist, &subtitle, &got_output);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int trigger_fix_sub_duration_heartbeat(OutputStream *ost, const AVPacket *pkt)
|
||||||
|
{
|
||||||
|
OutputFile *of = output_files[ost->file_index];
|
||||||
|
int64_t signal_pts = av_rescale_q(pkt->pts, ost->mux_timebase,
|
||||||
|
AV_TIME_BASE_Q);
|
||||||
|
|
||||||
|
if (!ost->fix_sub_duration_heartbeat || !(pkt->flags & AV_PKT_FLAG_KEY))
|
||||||
|
// we are only interested in heartbeats on streams configured, and
|
||||||
|
// only on random access points.
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < of->nb_streams; i++) {
|
||||||
|
OutputStream *iter_ost = of->streams[i];
|
||||||
|
InputStream *ist = iter_ost->ist;
|
||||||
|
int ret = AVERROR_BUG;
|
||||||
|
|
||||||
|
if (iter_ost == ost || !ist || !ist->decoding_needed ||
|
||||||
|
ist->dec_ctx->codec_type != AVMEDIA_TYPE_SUBTITLE)
|
||||||
|
// We wish to skip the stream that causes the heartbeat,
|
||||||
|
// output streams without an input stream, streams not decoded
|
||||||
|
// (as fix_sub_duration is only done for decoded subtitles) as
|
||||||
|
// well as non-subtitle streams.
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if ((ret = fix_sub_duration_heartbeat(ist, signal_pts)) < 0)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int transcode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output,
|
static int transcode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output,
|
||||||
int *decode_failed)
|
int *decode_failed)
|
||||||
{
|
{
|
||||||
|
@ -224,6 +224,8 @@ typedef struct OptionsContext {
|
|||||||
int nb_reinit_filters;
|
int nb_reinit_filters;
|
||||||
SpecifierOpt *fix_sub_duration;
|
SpecifierOpt *fix_sub_duration;
|
||||||
int nb_fix_sub_duration;
|
int nb_fix_sub_duration;
|
||||||
|
SpecifierOpt *fix_sub_duration_heartbeat;
|
||||||
|
int nb_fix_sub_duration_heartbeat;
|
||||||
SpecifierOpt *canvas_sizes;
|
SpecifierOpt *canvas_sizes;
|
||||||
int nb_canvas_sizes;
|
int nb_canvas_sizes;
|
||||||
SpecifierOpt *pass;
|
SpecifierOpt *pass;
|
||||||
@ -675,6 +677,12 @@ typedef struct OutputStream {
|
|||||||
|
|
||||||
EncStats enc_stats_pre;
|
EncStats enc_stats_pre;
|
||||||
EncStats enc_stats_post;
|
EncStats enc_stats_post;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* bool on whether this stream should be utilized for splitting
|
||||||
|
* subtitles utilizing fix_sub_duration at random access points.
|
||||||
|
*/
|
||||||
|
unsigned int fix_sub_duration_heartbeat;
|
||||||
} OutputStream;
|
} OutputStream;
|
||||||
|
|
||||||
typedef struct OutputFile {
|
typedef struct OutputFile {
|
||||||
|
@ -61,6 +61,7 @@ static const char *const opt_name_enc_stats_pre_fmt[] = {"enc_stats_pre_
|
|||||||
static const char *const opt_name_enc_stats_post_fmt[] = {"enc_stats_post_fmt", NULL};
|
static const char *const opt_name_enc_stats_post_fmt[] = {"enc_stats_post_fmt", NULL};
|
||||||
static const char *const opt_name_filters[] = {"filter", "af", "vf", NULL};
|
static const char *const opt_name_filters[] = {"filter", "af", "vf", NULL};
|
||||||
static const char *const opt_name_filter_scripts[] = {"filter_script", NULL};
|
static const char *const opt_name_filter_scripts[] = {"filter_script", NULL};
|
||||||
|
static const char *const opt_name_fix_sub_duration_heartbeat[] = {"fix_sub_duration_heartbeat", NULL};
|
||||||
static const char *const opt_name_fps_mode[] = {"fps_mode", NULL};
|
static const char *const opt_name_fps_mode[] = {"fps_mode", NULL};
|
||||||
static const char *const opt_name_force_fps[] = {"force_fps", NULL};
|
static const char *const opt_name_force_fps[] = {"force_fps", NULL};
|
||||||
static const char *const opt_name_forced_key_frames[] = {"forced_key_frames", NULL};
|
static const char *const opt_name_forced_key_frames[] = {"forced_key_frames", NULL};
|
||||||
@ -614,6 +615,9 @@ static OutputStream *new_output_stream(Muxer *mux, const OptionsContext *o,
|
|||||||
MATCH_PER_STREAM_OPT(bits_per_raw_sample, i, ost->bits_per_raw_sample,
|
MATCH_PER_STREAM_OPT(bits_per_raw_sample, i, ost->bits_per_raw_sample,
|
||||||
oc, st);
|
oc, st);
|
||||||
|
|
||||||
|
MATCH_PER_STREAM_OPT(fix_sub_duration_heartbeat, i, ost->fix_sub_duration_heartbeat,
|
||||||
|
oc, st);
|
||||||
|
|
||||||
if (oc->oformat->flags & AVFMT_GLOBALHEADER && ost->enc_ctx)
|
if (oc->oformat->flags & AVFMT_GLOBALHEADER && ost->enc_ctx)
|
||||||
ost->enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
|
ost->enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
|
||||||
|
|
||||||
|
@ -1658,6 +1658,11 @@ const OptionDef options[] = {
|
|||||||
{ "autoscale", HAS_ARG | OPT_BOOL | OPT_SPEC |
|
{ "autoscale", HAS_ARG | OPT_BOOL | OPT_SPEC |
|
||||||
OPT_EXPERT | OPT_OUTPUT, { .off = OFFSET(autoscale) },
|
OPT_EXPERT | OPT_OUTPUT, { .off = OFFSET(autoscale) },
|
||||||
"automatically insert a scale filter at the end of the filter graph" },
|
"automatically insert a scale filter at the end of the filter graph" },
|
||||||
|
{ "fix_sub_duration_heartbeat", OPT_VIDEO | OPT_BOOL | OPT_EXPERT |
|
||||||
|
OPT_SPEC | OPT_OUTPUT, { .off = OFFSET(fix_sub_duration_heartbeat) },
|
||||||
|
"set this video output stream to be a heartbeat stream for "
|
||||||
|
"fix_sub_duration, according to which subtitles should be split at "
|
||||||
|
"random access points" },
|
||||||
|
|
||||||
/* audio options */
|
/* audio options */
|
||||||
{ "aframes", OPT_AUDIO | HAS_ARG | OPT_PERFILE | OPT_OUTPUT, { .func_arg = opt_audio_frames },
|
{ "aframes", OPT_AUDIO | HAS_ARG | OPT_PERFILE | OPT_OUTPUT, { .func_arg = opt_audio_frames },
|
||||||
|
@ -117,6 +117,21 @@ fate-ffmpeg-fix_sub_duration: CMD = fmtstdout srt -fix_sub_duration \
|
|||||||
-real_time 1 -f lavfi \
|
-real_time 1 -f lavfi \
|
||||||
-i "movie=$(TARGET_SAMPLES)/sub/Closedcaption_rollup.m2v[out0+subcc]"
|
-i "movie=$(TARGET_SAMPLES)/sub/Closedcaption_rollup.m2v[out0+subcc]"
|
||||||
|
|
||||||
|
# Basic test for fix_sub_duration_heartbeat, which causes a buffered subtitle
|
||||||
|
# to be pushed out when a video keyframe is received from an encoder.
|
||||||
|
FATE_SAMPLES_FFMPEG-$(call FILTERDEMDECENCMUX, MOVIE, MPEGVIDEO, \
|
||||||
|
MPEG2VIDEO, SUBRIP, SRT, LAVFI_INDEV \
|
||||||
|
MPEGVIDEO_PARSER CCAPTION_DECODER \
|
||||||
|
MPEG2VIDEO_ENCODER NULL_MUXER PIPE_PROTOCOL) \
|
||||||
|
+= fate-ffmpeg-fix_sub_duration_heartbeat
|
||||||
|
fate-ffmpeg-fix_sub_duration_heartbeat: CMD = fmtstdout srt -fix_sub_duration \
|
||||||
|
-real_time 1 -f lavfi \
|
||||||
|
-i "movie=$(TARGET_SAMPLES)/sub/Closedcaption_rollup.m2v[out0+subcc]" \
|
||||||
|
-map 0:v -map 0:s -fix_sub_duration_heartbeat:v:0 \
|
||||||
|
-c:v mpeg2video -b:v 2M -g 30 -sc_threshold 1000000000 \
|
||||||
|
-c:s srt \
|
||||||
|
-f null -
|
||||||
|
|
||||||
FATE_STREAMCOPY-$(call REMUX, MP4 MOV, EAC3_DEMUXER) += fate-copy-trac3074
|
FATE_STREAMCOPY-$(call REMUX, MP4 MOV, EAC3_DEMUXER) += fate-copy-trac3074
|
||||||
fate-copy-trac3074: CMD = transcode eac3 $(TARGET_SAMPLES)/eac3/csi_miami_stereo_128_spx.eac3\
|
fate-copy-trac3074: CMD = transcode eac3 $(TARGET_SAMPLES)/eac3/csi_miami_stereo_128_spx.eac3\
|
||||||
mp4 "-codec copy -map 0" "-codec copy"
|
mp4 "-codec copy -map 0" "-codec copy"
|
||||||
|
48
tests/ref/fate/ffmpeg-fix_sub_duration_heartbeat
Normal file
48
tests/ref/fate/ffmpeg-fix_sub_duration_heartbeat
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
1
|
||||||
|
00:00:00,968 --> 00:00:01,001
|
||||||
|
<font face="Monospace">{\an7}(</font>
|
||||||
|
|
||||||
|
2
|
||||||
|
00:00:01,001 --> 00:00:01,168
|
||||||
|
<font face="Monospace">{\an7}(</font>
|
||||||
|
|
||||||
|
3
|
||||||
|
00:00:01,168 --> 00:00:01,368
|
||||||
|
<font face="Monospace">{\an7}(<i> inaudibl</i></font>
|
||||||
|
|
||||||
|
4
|
||||||
|
00:00:01,368 --> 00:00:01,568
|
||||||
|
<font face="Monospace">{\an7}(<i> inaudible radio chat</i></font>
|
||||||
|
|
||||||
|
5
|
||||||
|
00:00:01,568 --> 00:00:02,002
|
||||||
|
<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font>
|
||||||
|
|
||||||
|
6
|
||||||
|
00:00:02,002 --> 00:00:03,003
|
||||||
|
<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font>
|
||||||
|
|
||||||
|
7
|
||||||
|
00:00:03,003 --> 00:00:03,103
|
||||||
|
<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )</font>
|
||||||
|
|
||||||
|
8
|
||||||
|
00:00:03,103 --> 00:00:03,303
|
||||||
|
<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
|
||||||
|
>></font>
|
||||||
|
|
||||||
|
9
|
||||||
|
00:00:03,303 --> 00:00:03,503
|
||||||
|
<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
|
||||||
|
>> Safety rema</font>
|
||||||
|
|
||||||
|
10
|
||||||
|
00:00:03,504 --> 00:00:03,704
|
||||||
|
<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
|
||||||
|
>> Safety remains our numb</font>
|
||||||
|
|
||||||
|
11
|
||||||
|
00:00:03,704 --> 00:00:04,004
|
||||||
|
<font face="Monospace">{\an7}(<i> inaudible radio chatter</i> )
|
||||||
|
>> Safety remains our number one</font>
|
||||||
|
|
Loading…
Reference in New Issue
Block a user