avcodec/pcm_rechunk_bsf: add bitstream filter to rechunk pcm audio

Signed-off-by: Marton Balint <cus@passwd.hu>
2025-08-10 06:10:52 +02:00 · 2020-03-24 23:24:22 +01:00
parent d7a0071a44
commit 2035620b7c
6 changed files with 254 additions and 1 deletions
--- a/1
+++ b/1
@@ -65,6 +65,7 @@ version <next>:
 - Cunning Developments ADPCM decoder
 - asubboost filter
 - Pro Pinball Series Soundbank demuxer
+- pcm_rechunk bitstream filter


 version 4.2:
--- a/doc/bitstream_filters.texi
+++ b/doc/bitstream_filters.texi
@@ -548,6 +548,36 @@ ffmpeg -i INPUT -c copy -bsf noise[=1] output.mkv
@section null
 This bitstream filter passes the packets through unchanged.

+@section pcm_rechunk
+
+Repacketize PCM audio to a fixed number of samples per packet or a fixed packet
+rate per second. This is similar to the @ref{asetnsamples,,asetnsamples audio
+filter,ffmpeg-filters} but works on audio packets instead of audio frames.
+
+@table @option
+@item nb_out_samples, n
+Set the number of samples per each output audio packet. The number is intended
+as the number of samples @emph{per each channel}. Default value is 1024.
+
+@item pad, p
+If set to 1, the filter will pad the last audio packet with silence, so that it
+will contain the same number of samples (or roughly the same number of samples,
+see @option{frame_rate}) as the previous ones. Default value is 1.
+
+@item frame_rate, r
+This option makes the filter output a fixed number of packets per second instead
+of a fixed number of samples per packet. If the audio sample rate is not
+divisible by the frame rate then the number of samples will not be constant but
+will vary slightly so that each packet will start as close to the frame
+boundary as possible. Using this option has precedence over @option{nb_out_samples}.
+@end table
+
+You can generate the well known 1602-1601-1602-1601-1602 pattern of 48kHz audio
+for NTSC frame rate using the @option{frame_rate} option.
+@example
+ffmpeg -f lavfi -i sine=r=48000:d=1 -c pcm_s16le -bsf pcm_rechunk=r=30000/1001 -f framecrc -
+@end example
+
@section prores_metadata

 Modify color property metadata embedded in prores stream.
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1117,6 +1117,7 @@ OBJS-$(CONFIG_MPEG2_METADATA_BSF)         += mpeg2_metadata_bsf.o
 OBJS-$(CONFIG_NOISE_BSF)                  += noise_bsf.o
 OBJS-$(CONFIG_NULL_BSF)                   += null_bsf.o
 OBJS-$(CONFIG_OPUS_METADATA_BSF)          += opus_metadata_bsf.o
+OBJS-$(CONFIG_PCM_RECHUNK_BSF)            += pcm_rechunk_bsf.o
 OBJS-$(CONFIG_PRORES_METADATA_BSF)        += prores_metadata_bsf.o
 OBJS-$(CONFIG_REMOVE_EXTRADATA_BSF)       += remove_extradata_bsf.o
 OBJS-$(CONFIG_TEXT2MOVSUB_BSF)            += movsub_bsf.o
--- a/libavcodec/bitstream_filters.c
+++ b/libavcodec/bitstream_filters.c
@@ -50,6 +50,7 @@ extern const AVBitStreamFilter ff_mov2textsub_bsf;
 extern const AVBitStreamFilter ff_noise_bsf;
 extern const AVBitStreamFilter ff_null_bsf;
 extern const AVBitStreamFilter ff_opus_metadata_bsf;
+extern const AVBitStreamFilter ff_pcm_rechunk_bsf;
 extern const AVBitStreamFilter ff_prores_metadata_bsf;
 extern const AVBitStreamFilter ff_remove_extradata_bsf;
 extern const AVBitStreamFilter ff_text2movsub_bsf;
--- a/libavcodec/pcm_rechunk_bsf.c
+++ b/libavcodec/pcm_rechunk_bsf.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2020 Marton Balint
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "bsf.h"
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+
+typedef struct PCMContext {
+    const AVClass *class;
+
+    int nb_out_samples;
+    int pad;
+    AVRational frame_rate;
+
+    AVPacket *in_pkt;
+    AVPacket *out_pkt;
+    int sample_size;
+    int64_t n;
+} PCMContext;
+
+static int init(AVBSFContext *ctx)
+{
+    PCMContext *s = ctx->priv_data;
+    AVRational sr = av_make_q(ctx->par_in->sample_rate, 1);
+    int64_t min_samples;
+
+    if (ctx->par_in->channels <= 0 || ctx->par_in->sample_rate <= 0)
+        return AVERROR(EINVAL);
+
+    ctx->time_base_out = av_inv_q(sr);
+    s->sample_size = ctx->par_in->channels * av_get_bits_per_sample(ctx->par_in->codec_id) / 8;
+
+    if (s->frame_rate.num) {
+        min_samples = av_rescale_q_rnd(1, sr, s->frame_rate, AV_ROUND_DOWN);
+    } else {
+        min_samples = s->nb_out_samples;
+    }
+    if (min_samples <= 0 || min_samples > INT_MAX / s->sample_size - 1)
+        return AVERROR(EINVAL);
+
+    s->in_pkt  = av_packet_alloc();
+    s->out_pkt = av_packet_alloc();
+    if (!s->in_pkt || !s->out_pkt)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void uninit(AVBSFContext *ctx)
+{
+    PCMContext *s = ctx->priv_data;
+    av_packet_free(&s->in_pkt);
+    av_packet_free(&s->out_pkt);
+}
+
+static void flush(AVBSFContext *ctx)
+{
+    PCMContext *s = ctx->priv_data;
+    av_packet_unref(s->in_pkt);
+    av_packet_unref(s->out_pkt);
+    s->n = 0;
+}
+
+static int send_packet(PCMContext *s, int nb_samples, AVPacket *pkt)
+{
+    pkt->duration = nb_samples;
+    s->n++;
+    return 0;
+}
+
+static void drain_packet(AVPacket *pkt, int drain_data, int drain_samples)
+{
+    pkt->size -= drain_data;
+    pkt->data += drain_data;
+    if (pkt->dts != AV_NOPTS_VALUE)
+        pkt->dts += drain_samples;
+    if (pkt->pts != AV_NOPTS_VALUE)
+        pkt->pts += drain_samples;
+}
+
+static int get_next_nb_samples(AVBSFContext *ctx)
+{
+    PCMContext *s = ctx->priv_data;
+    if (s->frame_rate.num) {
+        AVRational sr = av_make_q(ctx->par_in->sample_rate, 1);
+        return av_rescale_q(s->n + 1, sr, s->frame_rate) - av_rescale_q(s->n, sr, s->frame_rate);
+    } else {
+        return s->nb_out_samples;
+    }
+}
+
+static int rechunk_filter(AVBSFContext *ctx, AVPacket *pkt)
+{
+    PCMContext *s = ctx->priv_data;
+    int nb_samples = get_next_nb_samples(ctx);
+    int data_size = nb_samples * s->sample_size;
+    int ret;
+
+    do {
+        if (s->in_pkt->size) {
+            if (s->out_pkt->size || s->in_pkt->size < data_size) {
+                int drain = FFMIN(s->in_pkt->size, data_size - s->out_pkt->size);
+                if (!s->out_pkt->size) {
+                    ret = av_new_packet(s->out_pkt, data_size);
+                    if (ret < 0)
+                        return ret;
+                    ret = av_packet_copy_props(s->out_pkt, s->in_pkt);
+                    if (ret < 0) {
+                        av_packet_unref(s->out_pkt);
+                        return ret;
+                    }
+                    s->out_pkt->size = 0;
+                }
+                memcpy(s->out_pkt->data + s->out_pkt->size, s->in_pkt->data, drain);
+                s->out_pkt->size += drain;
+                drain_packet(s->in_pkt, drain, drain / s->sample_size);
+                if (!s->in_pkt->size)
+                    av_packet_unref(s->in_pkt);
+                if (s->out_pkt->size == data_size) {
+                    av_packet_move_ref(pkt, s->out_pkt);
+                    return send_packet(s, nb_samples, pkt);
+                }
+            } else if (s->in_pkt->size > data_size) {
+                ret = av_packet_ref(pkt, s->in_pkt);
+                if (ret < 0)
+                    return ret;
+                pkt->size = data_size;
+                drain_packet(s->in_pkt, data_size, nb_samples);
+                return send_packet(s, nb_samples, pkt);
+            } else {
+                av_assert0(s->in_pkt->size == data_size);
+                av_packet_move_ref(pkt, s->in_pkt);
+                return send_packet(s, nb_samples, pkt);
+            }
+        }
+
+        ret = ff_bsf_get_packet_ref(ctx, s->in_pkt);
+        if (ret == AVERROR_EOF && s->out_pkt->size) {
+            if (s->pad) {
+                memset(s->out_pkt->data + s->out_pkt->size, 0, data_size - s->out_pkt->size);
+                s->out_pkt->size = data_size;
+            } else {
+                nb_samples = s->out_pkt->size / s->sample_size;
+            }
+            av_packet_move_ref(pkt, s->out_pkt);
+            return send_packet(s, nb_samples, pkt);
+        }
+        if (ret >= 0)
+            av_packet_rescale_ts(s->in_pkt, ctx->time_base_in, ctx->time_base_out);
+    } while (ret >= 0);
+
+    return ret;
+}
+
+#define OFFSET(x) offsetof(PCMContext, x)
+#define FLAGS (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_BSF_PARAM)
+static const AVOption options[] = {
+    { "nb_out_samples", "set the number of per-packet output samples", OFFSET(nb_out_samples),   AV_OPT_TYPE_INT, {.i64=1024}, 1, INT_MAX, FLAGS },
+    { "n",              "set the number of per-packet output samples", OFFSET(nb_out_samples),   AV_OPT_TYPE_INT, {.i64=1024}, 1, INT_MAX, FLAGS },
+    { "pad",            "pad last packet with zeros",                  OFFSET(pad),             AV_OPT_TYPE_BOOL, {.i64=1} ,   0,       1, FLAGS },
+    { "p",              "pad last packet with zeros",                  OFFSET(pad),             AV_OPT_TYPE_BOOL, {.i64=1} ,   0,       1, FLAGS },
+    { "frame_rate",     "set number of packets per second",            OFFSET(frame_rate),  AV_OPT_TYPE_RATIONAL, {.dbl=0},    0, INT_MAX, FLAGS },
+    { "r",              "set number of packets per second",            OFFSET(frame_rate),  AV_OPT_TYPE_RATIONAL, {.dbl=0},    0, INT_MAX, FLAGS },
+    { NULL },
+};
+
+static const AVClass pcm_rechunk_class = {
+    .class_name = "pcm_rechunk_bsf",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const enum AVCodecID codec_ids[] = {
+    AV_CODEC_ID_PCM_S16LE,
+    AV_CODEC_ID_PCM_S16BE,
+    AV_CODEC_ID_PCM_S8,
+    AV_CODEC_ID_PCM_S32LE,
+    AV_CODEC_ID_PCM_S32BE,
+    AV_CODEC_ID_PCM_S24LE,
+    AV_CODEC_ID_PCM_S24BE,
+    AV_CODEC_ID_PCM_F32BE,
+    AV_CODEC_ID_PCM_F32LE,
+    AV_CODEC_ID_PCM_F64BE,
+    AV_CODEC_ID_PCM_F64LE,
+    AV_CODEC_ID_PCM_S64LE,
+    AV_CODEC_ID_PCM_S64BE,
+    AV_CODEC_ID_PCM_F16LE,
+    AV_CODEC_ID_PCM_F24LE,
+    AV_CODEC_ID_NONE,
+};
+
+const AVBitStreamFilter ff_pcm_rechunk_bsf = {
+    .name           = "pcm_rechunk",
+    .priv_data_size = sizeof(PCMContext),
+    .priv_class     = &pcm_rechunk_class,
+    .filter         = rechunk_filter,
+    .init           = init,
+    .flush          = flush,
+    .close          = uninit,
+    .codec_ids      = codec_ids,
+};
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -28,7 +28,7 @@
 #include "libavutil/version.h"

 #define LIBAVCODEC_VERSION_MAJOR  58
-#define LIBAVCODEC_VERSION_MINOR  82
+#define LIBAVCODEC_VERSION_MINOR  83
 #define LIBAVCODEC_VERSION_MICRO 100

 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \