diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index b915148ad7..2370cb08da 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1030,6 +1030,14 @@ typedef struct AVStream {
      */
     int skip_samples;
 
+    /**
+     * If not 0, the first audio sample that should be discarded from the stream.
+     * This is broken by design (needs global sample count), but can't be
+     * avoided for broken by design formats such as mp3 with ad-hoc gapless
+     * audio support.
+     */
+    int64_t end_discard_sample;
+
     /**
      * Number of internally decoded frames, used internally in libavformat, do not access
      * its lifetime differs from info which is why it is not in that structure.
diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
index 4872afc43c..639c78d405 100644
--- a/libavformat/mp3dec.c
+++ b/libavformat/mp3dec.c
@@ -219,6 +219,8 @@ static void mp3_parse_info_tag(AVFormatContext *s, AVStream *st,
         mp3->start_pad = v>>12;
         mp3->  end_pad = v&4095;
         st->skip_samples = mp3->start_pad + 528 + 1;
+        if (mp3->frames)
+            st->end_discard_sample = -mp3->end_pad + 528 + 1 + mp3->frames * (int64_t)spf;
         if (!st->start_time)
             st->start_time = av_rescale_q(st->skip_samples,
                                             (AVRational){1, c->sample_rate},
diff --git a/libavformat/utils.c b/libavformat/utils.c
index e899e4d071..58533f88e9 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1255,6 +1255,11 @@ static int read_from_packet_buffer(AVPacketList **pkt_buffer,
     return 0;
 }
 
+static int64_t ts_to_samples(AVStream *st, int64_t ts)
+{
+    return av_rescale(ts, st->time_base.num * st->codec->sample_rate, st->time_base.den);
+}
+
 static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
 {
     int ret = 0, i, got_packet = 0;
@@ -1352,10 +1357,20 @@ static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
 
     if (ret >= 0) {
         AVStream *st = s->streams[pkt->stream_index];
-        if (st->skip_samples) {
+        int discard_padding = 0;
+        if (st->end_discard_sample && pkt->pts != AV_NOPTS_VALUE) {
+            int64_t pts = pkt->pts - (is_relative(pkt->pts) ? RELATIVE_TS_BASE : 0);
+            int64_t sample = ts_to_samples(st, pts);
+            int duration = ts_to_samples(st, pkt->duration);
+            int64_t end_sample = sample + duration;
+            if (duration > 0 && end_sample >= st->end_discard_sample)
+                discard_padding = FFMIN(end_sample - st->end_discard_sample, duration);
+        }
+        if (st->skip_samples || discard_padding) {
             uint8_t *p = av_packet_new_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
             if (p) {
                 AV_WL32(p, st->skip_samples);
+                AV_WL32(p + 4, discard_padding);
                 av_log(s, AV_LOG_DEBUG, "demuxer injecting skip %d\n", st->skip_samples);
             }
             st->skip_samples = 0;