mxf: Set audio packets pts

Extrapolate audio timestamps based on the number of samples demuxed. Deal with some MXF nastiness involving fractional number of samples per EditUnit when seeking (the specs handwave this away). Further fixes from Tomas Härdin. Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
2025-09-16 08:36:51 +02:00 · 2012-09-13 21:15:48 +02:00
parent e118bb1a33
commit 5b930092c3
3 changed files with 108 additions and 7 deletions
--- a/libavformat/mxfdec.c
+++ b/libavformat/mxfdec.c
@@ -122,6 +122,8 @@ typedef struct {
    uint8_t track_number[4];
    AVRational edit_rate;
    int intra_only;
+    uint64_t sample_count;
+    int64_t original_duration; /* st->duration in SampleRate/EditRate units */
 } MXFTrack;

 typedef struct {
@@ -1424,7 +1426,7 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
        }
        st->id = source_track->track_id;
        st->priv_data = source_track;
-        st->duration = component->duration;
+        source_track->original_duration = st->duration = component->duration;
        if (st->duration == -1)
            st->duration = AV_NOPTS_VALUE;
        st->start_time = component->start_position;
@@ -1439,6 +1441,10 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
        }
        avpriv_set_pts_info(st, 64, material_track->edit_rate.den, material_track->edit_rate.num);

+        /* ensure SourceTrack EditRate == MaterialTrack EditRate since only
+         * the former is accessible via st->priv_data */
+        source_track->edit_rate = material_track->edit_rate;
+
        PRINT_KEY(mxf->fc, "data definition   ul", source_track->sequence->data_definition_ul);
        codec_ul = mxf_get_codec_ul(ff_mxf_data_definition_uls, &source_track->sequence->data_definition_ul);
        st->codec->codec_type = codec_ul->id;
@@ -1569,6 +1575,12 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
                avpriv_set_pts_info(st, 64, 1, 48000);
            }

+            /* if duration is set, rescale it from EditRate to SampleRate */
+            if (st->duration != AV_NOPTS_VALUE)
+                st->duration = av_rescale_q(st->duration,
+                                            av_inv_q(material_track->edit_rate),
+                                            st->time_base);
+
            /* TODO: implement AV_CODEC_ID_RAWAUDIO */
            if (st->codec->codec_id == AV_CODEC_ID_PCM_S16LE) {
                if (descriptor->bits_per_sample > 16 && descriptor->bits_per_sample <= 24)
@@ -2037,6 +2049,63 @@ static int64_t mxf_set_current_edit_unit(MXFContext *mxf, int64_t current_offset
    return next_ofs;
 }

+static int mxf_compute_sample_count(MXFContext *mxf, int stream_index,
+                                    uint64_t *sample_count)
+{
+    int i, total = 0, size = 0;
+    AVStream *st = mxf->fc->streams[stream_index];
+    MXFTrack *track = st->priv_data;
+    AVRational time_base = av_inv_q(track->edit_rate);
+    AVRational sample_rate = av_inv_q(st->time_base);
+    const MXFSamplesPerFrame *spf = NULL;
+
+    if ((sample_rate.num / sample_rate.den) == 48000)
+        spf = ff_mxf_get_samples_per_frame(mxf->fc, time_base);
+    if (!spf) {
+        int remainder = (sample_rate.num * time_base.num) %
+                        (time_base.den * sample_rate.den);
+        *sample_count = av_q2d(av_mul_q((AVRational){mxf->current_edit_unit, 1},
+                                        av_mul_q(sample_rate, time_base)));
+        if (remainder)
+            av_log(mxf->fc, AV_LOG_WARNING,
+                   "seeking detected on stream #%d with time base (%d/%d) and "
+                   "sample rate (%d/%d), audio pts won't be accurate.\n",
+                   stream_index, time_base.num, time_base.den,
+                   sample_rate.num, sample_rate.den);
+        return 0;
+    }
+
+    while (spf->samples_per_frame[size]) {
+        total += spf->samples_per_frame[size];
+        size++;
+    }
+
+    if (!size)
+        return 0;
+
+    *sample_count = (mxf->current_edit_unit / size) * (uint64_t)total;
+    for (i = 0; i < mxf->current_edit_unit % size; i++) {
+        *sample_count += spf->samples_per_frame[i];
+    }
+
+    return 0;
+}
+
+static int mxf_set_audio_pts(MXFContext *mxf, AVCodecContext *codec,
+                             AVPacket *pkt)
+{
+    MXFTrack *track = mxf->fc->streams[pkt->stream_index]->priv_data;
+    int64_t bits_per_sample = av_get_bits_per_sample(codec->codec_id);
+
+    pkt->pts = track->sample_count;
+
+    if (codec->channels <= 0 || codec->channels * bits_per_sample < 8)
+        return AVERROR_INVALIDDATA;
+
+    track->sample_count += pkt->size / (codec->channels * bits_per_sample / 8);
+    return 0;
+}
+
 static int mxf_read_packet_old(AVFormatContext *s, AVPacket *pkt)
 {
    KLVPacket klv;
@@ -2061,6 +2130,7 @@ static int mxf_read_packet_old(AVFormatContext *s, AVPacket *pkt)
            int64_t next_ofs, next_klv;
            AVStream *st;
            MXFTrack *track;
+            AVCodecContext *codec;

            if (index < 0) {
                av_log(s, AV_LOG_ERROR, "error getting stream index %d\n", AV_RB32(klv.key+12));
@@ -2102,7 +2172,9 @@ static int mxf_read_packet_old(AVFormatContext *s, AVPacket *pkt)
            pkt->stream_index = index;
            pkt->pos = klv.offset;

-            if (s->streams[index]->codec->codec_type == AVMEDIA_TYPE_VIDEO && next_ofs >= 0) {
+            codec = s->streams[index]->codec;
+
+            if (codec->codec_type == AVMEDIA_TYPE_VIDEO && next_ofs >= 0) {
                /* mxf->current_edit_unit good - see if we have an
                 * index table to derive timestamps from */
                MXFIndexTable *t = &mxf->index_tables[0];
@@ -2117,6 +2189,10 @@ static int mxf_read_packet_old(AVFormatContext *s, AVPacket *pkt)
                     * < PTS if low_delay = 0 (Sony IMX30) */
                    pkt->pts = mxf->current_edit_unit;
                }
+            } else if (codec->codec_type == AVMEDIA_TYPE_AUDIO) {
+                int ret = mxf_set_audio_pts(mxf, codec, pkt);
+                if (ret < 0)
+                    return ret;
            }

            /* seek for truncated packets */
@@ -2174,13 +2250,18 @@ static int mxf_read_packet(AVFormatContext *s, AVPacket *pkt)
        if ((ret = av_get_packet(s->pb, pkt, size)) != size)
            return ret < 0 ? ret : AVERROR_EOF;

+    pkt->stream_index = 0;
+
    if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO && t->ptses &&
        mxf->current_edit_unit >= 0 && mxf->current_edit_unit < t->nb_ptses) {
        pkt->dts = mxf->current_edit_unit + t->first_dts;
        pkt->pts = t->ptses[mxf->current_edit_unit];
+    } else if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
+        int ret = mxf_set_audio_pts(mxf, st->codec, pkt);
+        if (ret < 0)
+            return ret;
    }

-    pkt->stream_index = 0;
    mxf->current_edit_unit += edit_units;

    return 0;
@@ -2263,8 +2344,14 @@ static int mxf_read_seek(AVFormatContext *s, int stream_index, int64_t sample_ti
    int64_t seconds;
    MXFContext* mxf = s->priv_data;
    int64_t seekpos;
-    int ret;
+    int i, ret;
    MXFIndexTable *t;
+    MXFTrack *source_track = st->priv_data;
+
+    /* if audio then truncate sample_time to EditRate */
+    if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO)
+        sample_time = av_rescale_q(sample_time, st->time_base,
+                                   av_inv_q(source_track->edit_rate));

    if (mxf->nb_index_tables <= 0) {
    if (!s->bit_rate)
@@ -2293,7 +2380,7 @@ static int mxf_read_seek(AVFormatContext *s, int stream_index, int64_t sample_ti
        } else {
            /* no IndexEntryArray (one or more CBR segments)
             * make sure we don't seek past the end */
-            sample_time = FFMIN(sample_time, st->duration - 1);
+            sample_time = FFMIN(sample_time, source_track->original_duration - 1);
        }

        if ((ret = mxf_edit_unit_absolute_offset(mxf, t, sample_time, &sample_time, &seekpos, 1)) << 0)
@@ -2303,6 +2390,20 @@ static int mxf_read_seek(AVFormatContext *s, int stream_index, int64_t sample_ti
        mxf->current_edit_unit = sample_time;
        avio_seek(s->pb, seekpos, SEEK_SET);
    }
+
+    // Update all tracks sample count
+    for (i = 0; i < s->nb_streams; i++) {
+        AVStream *cur_st = s->streams[i];
+        MXFTrack *cur_track = cur_st->priv_data;
+        uint64_t current_sample_count = 0;
+        if (cur_st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
+            ret = mxf_compute_sample_count(mxf, i, &current_sample_count);
+            if (ret < 0)
+                return ret;
+
+            cur_track->sample_count = current_sample_count;
+        }
+    }
    return 0;
 }

--- a/tests/ref/seek/lavf-mxf
+++ b/tests/ref/seek/lavf-mxf
@@ -30,7 +30,7 @@ ret: 0         st: 0 flags:1  ts: 2.400000
 ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460800 size: 24712
 ret:-1         st: 1 flags:0  ts: 1.306667
 ret: 0         st: 1 flags:1  ts: 0.200833
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460800 size: 24712
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
 ret: 0         st:-1 flags:0  ts:-0.904994
 ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
 ret: 0         st:-1 flags:1  ts: 1.989173
--- a/tests/ref/seek/lavf-mxf_d10
+++ b/tests/ref/seek/lavf-mxf_d10
@@ -34,7 +34,7 @@ ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos:5117952 size:150000
 ret: 0         st: 1 flags:0  ts: 1.306667
 ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:5117952 size:150000
 ret: 0         st: 1 flags:1  ts: 0.200833
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:5117952 size:150000
+ret: 0         st: 0 flags:1 dts: 0.200000 pts: 0.200000 pos:1071104 size:150000
 ret: 0         st:-1 flags:0  ts:-0.904994
 ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   6144 size:150000
 ret: 0         st:-1 flags:1  ts: 1.989173