From 5d407cb2d71176240487171bb0e803c3d2e36f15 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sun, 9 Apr 2023 09:55:55 +0200
Subject: [PATCH] fftools/ffmpeg: change video decoding timestamp generation

When no timestamps are available from the container, the video decoding
code will currently use fake dts values - generated in
process_input_packet() based on a combination of information from the
decoder and the parser (obtained via the demuxer) - to generate
timestamps during decoder flushing. This is fragile, hard to follow, and
unnecessarily convoluted, since more reliable information can be
obtained directly from post-decoding values.

The new code keeps track of the last decoded frame pts and estimates its
duration based on a number of heuristics. Timestamps generated when both
pts and pkt_dts are missing are then simple pts+duration of the last frame.
The heuristics are somewhat complicated by the fact that lavf insists on
making up packet timestamps based on its highly incomplete information.
That should be removed in the future, allowing to further simplify this
code.

The results of the following tests change:
* h264-3386 now requires -fps_mode passthrough to avoid dropping frames
  at the end; this is a pathology of the interaction of the new and old
  code, and the fact that the sample switches from field to frame coding
  in the last packet, and will be fixed in following commits
* hevc-conformance-DELTAQP_A_BRCM_4 stops inventing an arbitrary
  timestamp gap at the end
* hevc-small422chroma - the single frame output by this test now has a
  timestamp of 0, rather than an arbitrary 7
---
 fftools/ffmpeg.c                              | 79 ++++++++++++++-----
 fftools/ffmpeg.h                              |  9 ++-
 fftools/ffmpeg_demux.c                        |  3 +-
 tests/fate/h264.mak                           |  2 +-
 tests/ref/fate/h264-3386                      |  6 +-
 .../fate/hevc-conformance-DELTAQP_A_BRCM_4    |  2 +-
 tests/ref/fate/hevc-small422chroma            |  2 +-
 7 files changed, 75 insertions(+), 28 deletions(-)

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 741448c29a..757d837d5f 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -1275,11 +1275,63 @@ static int decode_audio(InputStream *ist, AVPacket *pkt, int *got_output,
     return err < 0 ? err : ret;
 }
 
+static int64_t video_duration_estimate(const InputStream *ist, const AVFrame *frame)
+{
+    const InputFile   *ifile = input_files[ist->file_index];
+    const int container_nots = !!(ifile->ctx->iformat->flags & AVFMT_NOTIMESTAMPS);
+    int64_t codec_duration = 0;
+
+    // XXX lavf currently makes up frame durations when they are not provided by
+    // the container. As there is no way to reliably distinguish real container
+    // durations from the fake made-up ones, we use heuristics based on whether
+    // the container has timestamps. Eventually lavf should stop making up
+    // durations, then this should be simplified.
+
+    // prefer frame duration for containers with timestamps
+    if (frame->duration > 0 && !container_nots)
+        return frame->duration;
+
+    if (ist->dec_ctx->framerate.den && ist->dec_ctx->framerate.num) {
+        int ticks = frame->repeat_pict >= 0 ?
+                    frame->repeat_pict + 1  :
+                    ist->dec_ctx->ticks_per_frame;
+        codec_duration = av_rescale_q(ticks, av_inv_q(ist->dec_ctx->framerate),
+                                      ist->st->time_base);
+    }
+
+    // prefer codec-layer duration for containers without timestamps
+    if (codec_duration > 0 && container_nots)
+        return codec_duration;
+
+    // when timestamps are available, repeat last frame's actual duration
+    // (i.e. pts difference between this and last frame)
+    if (frame->pts != AV_NOPTS_VALUE && ist->last_frame_pts != AV_NOPTS_VALUE &&
+        frame->pts > ist->last_frame_pts)
+        return frame->pts - ist->last_frame_pts;
+
+    // try frame/codec duration
+    if (frame->duration > 0)
+        return frame->duration;
+    if (codec_duration > 0)
+        return codec_duration;
+
+    // try average framerate
+    if (ist->st->avg_frame_rate.num && ist->st->avg_frame_rate.den) {
+        int64_t d = av_rescale_q(1, av_inv_q(ist->st->avg_frame_rate),
+                                 ist->st->time_base);
+        if (d > 0)
+            return d;
+    }
+
+    // last resort is last frame's estimated duration, and 1
+    return FFMAX(ist->last_frame_duration_est, 1);
+}
+
 static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output, int64_t *duration_pts, int eof,
                         int *decode_failed)
 {
     AVFrame *decoded_frame = ist->decoded_frame;
-    int i, ret = 0, err = 0;
+    int ret = 0, err = 0;
     int64_t best_effort_timestamp;
     int64_t dts = AV_NOPTS_VALUE;
 
@@ -1295,16 +1347,6 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output, int64_
         pkt->dts = dts; // ffmpeg.c probably shouldn't do this
     }
 
-    // The old code used to set dts on the drain packet, which does not work
-    // with the new API anymore.
-    if (eof) {
-        void *new = av_realloc_array(ist->dts_buffer, ist->nb_dts_buffer + 1, sizeof(ist->dts_buffer[0]));
-        if (!new)
-            return AVERROR(ENOMEM);
-        ist->dts_buffer = new;
-        ist->dts_buffer[ist->nb_dts_buffer++] = dts;
-    }
-
     update_benchmark(NULL);
     ret = decode(ist, ist->dec_ctx, decoded_frame, got_output, pkt);
     update_benchmark("decode_video %d.%d", ist->file_index, ist->st->index);
@@ -1363,13 +1405,10 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output, int64_
     if (ist->framerate.num)
         best_effort_timestamp = ist->cfr_next_pts++;
 
-    if (eof && best_effort_timestamp == AV_NOPTS_VALUE && ist->nb_dts_buffer > 0) {
-        best_effort_timestamp = ist->dts_buffer[0];
-
-        for (i = 0; i < ist->nb_dts_buffer - 1; i++)
-            ist->dts_buffer[i] = ist->dts_buffer[i + 1];
-        ist->nb_dts_buffer--;
-    }
+    // no timestamp available - extrapolate from previous frame duration
+    if (best_effort_timestamp == AV_NOPTS_VALUE &&
+        ist->last_frame_pts != AV_NOPTS_VALUE)
+        best_effort_timestamp = ist->last_frame_pts + ist->last_frame_duration_est;
 
     if (best_effort_timestamp == AV_NOPTS_VALUE)
         best_effort_timestamp = av_rescale_q(ist->pts, AV_TIME_BASE_Q, ist->st->time_base);
@@ -1381,6 +1420,10 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output, int64_
             ist->next_pts = ist->pts = ts;
     }
 
+    // update timestamp history
+    ist->last_frame_duration_est = video_duration_estimate(ist, decoded_frame);
+    ist->last_frame_pts          = decoded_frame->pts;
+
     if (debug_ts) {
         av_log(ist, AV_LOG_INFO,
                "decoder -> pts:%s pts_time:%s "
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index 50e5858385..17076f018d 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -373,6 +373,12 @@ typedef struct InputStream {
     /* predicted pts of the next decoded frame, in AV_TIME_BASE */
     int64_t       next_pts;
     int64_t       pts;       ///< current pts of the decoded frame  (in AV_TIME_BASE units)
+
+    // pts/estimated duration of the last decoded video frame
+    // in decoder timebase
+    int64_t last_frame_pts;
+    int64_t last_frame_duration_est;
+
     int           wrap_correction_done;
 
     // the value of AVCodecParserContext.repeat_pict from the AVStream parser
@@ -445,9 +451,6 @@ typedef struct InputStream {
     uint64_t frames_decoded;
     uint64_t samples_decoded;
 
-    int64_t *dts_buffer;
-    int nb_dts_buffer;
-
     int got_output;
 } InputStream;
 
diff --git a/fftools/ffmpeg_demux.c b/fftools/ffmpeg_demux.c
index 7ff57273c9..b9849d1669 100644
--- a/fftools/ffmpeg_demux.c
+++ b/fftools/ffmpeg_demux.c
@@ -532,7 +532,6 @@ static void ist_free(InputStream **pist)
     av_freep(&ist->filters);
     av_freep(&ist->outputs);
     av_freep(&ist->hwaccel_device);
-    av_freep(&ist->dts_buffer);
 
     avcodec_free_context(&ist->dec_ctx);
     avcodec_parameters_free(&ist->par);
@@ -879,6 +878,8 @@ static void add_input_streams(const OptionsContext *o, Demuxer *d)
 
             ist->framerate_guessed = av_guess_frame_rate(ic, st, NULL);
 
+            ist->last_frame_pts = AV_NOPTS_VALUE;
+
             break;
         case AVMEDIA_TYPE_AUDIO: {
             int guess_layout_max = INT_MAX;
diff --git a/tests/fate/h264.mak b/tests/fate/h264.mak
index c8ed23955a..7998879ed3 100644
--- a/tests/fate/h264.mak
+++ b/tests/fate/h264.mak
@@ -444,7 +444,7 @@ fate-h264-mixed-nal-coding:                       CMD = framecrc -i $(TARGET_SAM
 fate-h264-ref-pic-mod-overflow:                   CMD = framecrc -i $(TARGET_SAMPLES)/h264/ref-pic-mod-overflow.h264
 fate-h264-twofields-packet:                       CMD = framecrc -i $(TARGET_SAMPLES)/h264/twofields_packet.mp4 -an -frames 30
 fate-h264-unescaped-extradata:                    CMD = framecrc -i $(TARGET_SAMPLES)/h264/unescaped_extradata.mp4 -an -frames 10
-fate-h264-3386:                                   CMD = framecrc -i $(TARGET_SAMPLES)/h264/bbc2.sample.h264
+fate-h264-3386:                                   CMD = framecrc -i $(TARGET_SAMPLES)/h264/bbc2.sample.h264 -fps_mode passthrough
 fate-h264-missing-frame:                          CMD = framecrc -i $(TARGET_SAMPLES)/h264/nondeterministic_cut.h264
 fate-h264-timecode:                               CMD = framecrc -i $(TARGET_SAMPLES)/h264/crew_cif_timecode-2.h264
 
diff --git a/tests/ref/fate/h264-3386 b/tests/ref/fate/h264-3386
index 421581f8c6..f1a25a2dc0 100644
--- a/tests/ref/fate/h264-3386
+++ b/tests/ref/fate/h264-3386
@@ -47,6 +47,6 @@
 0,         45,         45,        1,  2332800, 0x80b968d3
 0,         46,         46,        1,  2332800, 0xa45f4e6f
 0,         47,         47,        1,  2332800, 0xe0fcbade
-0,         48,         48,        1,  2332800, 0x19568f4d
-0,         49,         49,        1,  2332800, 0x33c53f59
-0,         50,         50,        1,  2332800, 0xbdbe8fbf
+0,         47,         47,        1,  2332800, 0x19568f4d
+0,         48,         48,        1,  2332800, 0x33c53f59
+0,         49,         49,        1,  2332800, 0xbdbe8fbf
diff --git a/tests/ref/fate/hevc-conformance-DELTAQP_A_BRCM_4 b/tests/ref/fate/hevc-conformance-DELTAQP_A_BRCM_4
index 5ca3faf58f..2abd58b64b 100644
--- a/tests/ref/fate/hevc-conformance-DELTAQP_A_BRCM_4
+++ b/tests/ref/fate/hevc-conformance-DELTAQP_A_BRCM_4
@@ -98,4 +98,4 @@
 0,         92,         92,        1,  3133440, 0x761571be
 0,         93,         93,        1,  3133440, 0x34dc14a1
 0,         94,         94,        1,  3133440, 0xbb94c2d4
-0,         96,         96,        1,  3133440, 0x5300e459
+0,         95,         95,        1,  3133440, 0x5300e459
diff --git a/tests/ref/fate/hevc-small422chroma b/tests/ref/fate/hevc-small422chroma
index de0ea46c31..dca2af0a1a 100644
--- a/tests/ref/fate/hevc-small422chroma
+++ b/tests/ref/fate/hevc-small422chroma
@@ -3,4 +3,4 @@
 #codec_id 0: rawvideo
 #dimensions 0: 3840x2160
 #sar 0: 1/1
-0,          7,          7,        1, 33177600, 0x53015e18
+0,          0,          0,        1, 33177600, 0x53015e18