diff --git a/libavformat/avformat.h b/libavformat/avformat.h index b915148ad7..2370cb08da 100644 --- a/libavformat/avformat.h +++ b/libavformat/avformat.h @@ -1030,6 +1030,14 @@ typedef struct AVStream { */ int skip_samples; + /** + * If not 0, the first audio sample that should be discarded from the stream. + * This is broken by design (needs global sample count), but can't be + * avoided for broken by design formats such as mp3 with ad-hoc gapless + * audio support. + */ + int64_t end_discard_sample; + /** * Number of internally decoded frames, used internally in libavformat, do not access * its lifetime differs from info which is why it is not in that structure. diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c index 4872afc43c..639c78d405 100644 --- a/libavformat/mp3dec.c +++ b/libavformat/mp3dec.c @@ -219,6 +219,8 @@ static void mp3_parse_info_tag(AVFormatContext *s, AVStream *st, mp3->start_pad = v>>12; mp3-> end_pad = v&4095; st->skip_samples = mp3->start_pad + 528 + 1; + if (mp3->frames) + st->end_discard_sample = -mp3->end_pad + 528 + 1 + mp3->frames * (int64_t)spf; if (!st->start_time) st->start_time = av_rescale_q(st->skip_samples, (AVRational){1, c->sample_rate}, diff --git a/libavformat/utils.c b/libavformat/utils.c index e899e4d071..58533f88e9 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -1255,6 +1255,11 @@ static int read_from_packet_buffer(AVPacketList **pkt_buffer, return 0; } +static int64_t ts_to_samples(AVStream *st, int64_t ts) +{ + return av_rescale(ts, st->time_base.num * st->codec->sample_rate, st->time_base.den); +} + static int read_frame_internal(AVFormatContext *s, AVPacket *pkt) { int ret = 0, i, got_packet = 0; @@ -1352,10 +1357,20 @@ static int read_frame_internal(AVFormatContext *s, AVPacket *pkt) if (ret >= 0) { AVStream *st = s->streams[pkt->stream_index]; - if (st->skip_samples) { + int discard_padding = 0; + if (st->end_discard_sample && pkt->pts != AV_NOPTS_VALUE) { + int64_t pts = pkt->pts - (is_relative(pkt->pts) ? RELATIVE_TS_BASE : 0); + int64_t sample = ts_to_samples(st, pts); + int duration = ts_to_samples(st, pkt->duration); + int64_t end_sample = sample + duration; + if (duration > 0 && end_sample >= st->end_discard_sample) + discard_padding = FFMIN(end_sample - st->end_discard_sample, duration); + } + if (st->skip_samples || discard_padding) { uint8_t *p = av_packet_new_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES, 10); if (p) { AV_WL32(p, st->skip_samples); + AV_WL32(p + 4, discard_padding); av_log(s, AV_LOG_DEBUG, "demuxer injecting skip %d\n", st->skip_samples); } st->skip_samples = 0;