1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-24 13:56:33 +02:00

avfilter/af_arnndn: add support for commands

This commit is contained in:
Paul B Mahol 2021-02-04 22:47:26 +01:00
parent 996ef512fe
commit fb9c56bbd5
2 changed files with 93 additions and 32 deletions

View File

@ -2353,6 +2353,10 @@ in the final filter output. Set this option to -1 to hear actual
noise removed from input signal.
@end table
@subsection Commands
This filter supports the all above options as @ref{commands}.
@section asetnsamples
Set the number of samples per each output audio frame.

View File

@ -129,7 +129,7 @@ typedef struct DenoiseState {
float mem_hp_x[2];
float lastg[NB_BANDS];
float history[FRAME_SIZE];
RNNState rnn;
RNNState rnn[2];
AVTXContext *tx, *txi;
av_tx_fn tx_fn, txi_fn;
} DenoiseState;
@ -146,7 +146,7 @@ typedef struct AudioRNNContext {
DECLARE_ALIGNED(32, float, window)[WINDOW_SIZE];
DECLARE_ALIGNED(32, float, dct_table)[FFALIGN(NB_BANDS, 4)][FFALIGN(NB_BANDS, 4)];
RNNModel *model;
RNNModel *model[2];
AVFloatDSPContext *fdsp;
} AudioRNNContext;
@ -350,27 +350,34 @@ static int config_input(AVFilterLink *inlink)
s->channels = inlink->channels;
s->st = av_calloc(s->channels, sizeof(DenoiseState));
if (!s->st)
s->st = av_calloc(s->channels, sizeof(DenoiseState));
if (!s->st)
return AVERROR(ENOMEM);
for (int i = 0; i < s->channels; i++) {
DenoiseState *st = &s->st[i];
st->rnn.model = s->model;
st->rnn.vad_gru_state = av_calloc(sizeof(float), FFALIGN(s->model->vad_gru_size, 16));
st->rnn.noise_gru_state = av_calloc(sizeof(float), FFALIGN(s->model->noise_gru_size, 16));
st->rnn.denoise_gru_state = av_calloc(sizeof(float), FFALIGN(s->model->denoise_gru_size, 16));
if (!st->rnn.vad_gru_state ||
!st->rnn.noise_gru_state ||
!st->rnn.denoise_gru_state)
st->rnn[0].model = s->model[0];
st->rnn[0].vad_gru_state = av_calloc(sizeof(float), FFALIGN(s->model[0]->vad_gru_size, 16));
st->rnn[0].noise_gru_state = av_calloc(sizeof(float), FFALIGN(s->model[0]->noise_gru_size, 16));
st->rnn[0].denoise_gru_state = av_calloc(sizeof(float), FFALIGN(s->model[0]->denoise_gru_size, 16));
if (!st->rnn[0].vad_gru_state ||
!st->rnn[0].noise_gru_state ||
!st->rnn[0].denoise_gru_state)
return AVERROR(ENOMEM);
}
ret = av_tx_init(&st->tx, &st->tx_fn, AV_TX_FLOAT_FFT, 0, WINDOW_SIZE, NULL, 0);
for (int i = 0; i < s->channels; i++) {
DenoiseState *st = &s->st[i];
if (!st->tx)
ret = av_tx_init(&st->tx, &st->tx_fn, AV_TX_FLOAT_FFT, 0, WINDOW_SIZE, NULL, 0);
if (ret < 0)
return ret;
ret = av_tx_init(&st->txi, &st->txi_fn, AV_TX_FLOAT_FFT, 1, WINDOW_SIZE, NULL, 0);
if (!st->txi)
ret = av_tx_init(&st->txi, &st->txi_fn, AV_TX_FLOAT_FFT, 1, WINDOW_SIZE, NULL, 0);
if (ret < 0)
return ret;
}
@ -1368,7 +1375,7 @@ static float rnnoise_channel(AudioRNNContext *s, DenoiseState *st, float *out, c
silence = compute_frame_features(s, st, X, P, Ex, Ep, Exp, features, x);
if (!silence && !disabled) {
compute_rnn(s, &st->rnn, g, &vad_prob, features);
compute_rnn(s, &st->rnn[0], g, &vad_prob, features);
pitch_filter(X, P, Ex, Ep, Exp, g);
for (int i = 0; i < NB_BANDS; i++) {
float alpha = .6f;
@ -1458,26 +1465,38 @@ static int activate(AVFilterContext *ctx)
return FFERROR_NOT_READY;
}
static av_cold int init(AVFilterContext *ctx)
static int open_model(AVFilterContext *ctx, RNNModel **model)
{
AudioRNNContext *s = ctx->priv;
FILE *f;
s->fdsp = avpriv_float_dsp_alloc(0);
if (!s->fdsp)
return AVERROR(ENOMEM);
if (!s->model_name)
return AVERROR(EINVAL);
f = av_fopen_utf8(s->model_name, "r");
if (!f)
return AVERROR(EINVAL);
s->model = rnnoise_model_from_file(f);
*model = rnnoise_model_from_file(f);
fclose(f);
if (!s->model)
if (!*model)
return AVERROR(EINVAL);
return 0;
}
static av_cold int init(AVFilterContext *ctx)
{
AudioRNNContext *s = ctx->priv;
int ret;
s->fdsp = avpriv_float_dsp_alloc(0);
if (!s->fdsp)
return AVERROR(ENOMEM);
ret = open_model(ctx, &s->model[0]);
if (ret < 0)
return ret;
for (int i = 0; i < FRAME_SIZE; i++) {
s->window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
s->window[WINDOW_SIZE - 1 - i] = s->window[i];
@ -1494,22 +1513,59 @@ static av_cold int init(AVFilterContext *ctx)
return 0;
}
static void free_model(AVFilterContext *ctx, int n)
{
AudioRNNContext *s = ctx->priv;
rnnoise_model_free(s->model[n]);
s->model[n] = NULL;
for (int ch = 0; ch < s->channels && s->st; ch++) {
av_freep(&s->st[ch].rnn[n].vad_gru_state);
av_freep(&s->st[ch].rnn[n].noise_gru_state);
av_freep(&s->st[ch].rnn[n].denoise_gru_state);
}
}
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
char *res, int res_len, int flags)
{
AudioRNNContext *s = ctx->priv;
int ret;
ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
if (ret < 0)
return ret;
ret = open_model(ctx, &s->model[1]);
if (ret < 0)
return ret;
FFSWAP(RNNModel *, s->model[0], s->model[1]);
for (int ch = 0; ch < s->channels; ch++)
FFSWAP(RNNState, s->st[ch].rnn[0], s->st[ch].rnn[1]);
ret = config_input(ctx->inputs[0]);
if (ret < 0) {
for (int ch = 0; ch < s->channels; ch++)
FFSWAP(RNNState, s->st[ch].rnn[0], s->st[ch].rnn[1]);
FFSWAP(RNNModel *, s->model[0], s->model[1]);
return ret;
}
free_model(ctx, 1);
return 0;
}
static av_cold void uninit(AVFilterContext *ctx)
{
AudioRNNContext *s = ctx->priv;
av_freep(&s->fdsp);
rnnoise_model_free(s->model);
s->model = NULL;
if (s->st) {
for (int ch = 0; ch < s->channels; ch++) {
av_freep(&s->st[ch].rnn.vad_gru_state);
av_freep(&s->st[ch].rnn.noise_gru_state);
av_freep(&s->st[ch].rnn.denoise_gru_state);
av_tx_uninit(&s->st[ch].tx);
av_tx_uninit(&s->st[ch].txi);
}
free_model(ctx, 0);
for (int ch = 0; ch < s->channels && s->st; ch++) {
av_tx_uninit(&s->st[ch].tx);
av_tx_uninit(&s->st[ch].txi);
}
av_freep(&s->st);
}
@ -1532,7 +1588,7 @@ static const AVFilterPad outputs[] = {
};
#define OFFSET(x) offsetof(AudioRNNContext, x)
#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
static const AVOption arnndn_options[] = {
{ "model", "set model name", OFFSET(model_name), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, AF },
@ -1556,4 +1612,5 @@ AVFilter ff_af_arnndn = {
.outputs = outputs,
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
AVFILTER_FLAG_SLICE_THREADS,
.process_command = process_command,
};