mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-24 13:56:33 +02:00
avfilter/af_arnndn: add support for commands
This commit is contained in:
parent
996ef512fe
commit
fb9c56bbd5
@ -2353,6 +2353,10 @@ in the final filter output. Set this option to -1 to hear actual
|
||||
noise removed from input signal.
|
||||
@end table
|
||||
|
||||
@subsection Commands
|
||||
|
||||
This filter supports the all above options as @ref{commands}.
|
||||
|
||||
@section asetnsamples
|
||||
|
||||
Set the number of samples per each output audio frame.
|
||||
|
@ -129,7 +129,7 @@ typedef struct DenoiseState {
|
||||
float mem_hp_x[2];
|
||||
float lastg[NB_BANDS];
|
||||
float history[FRAME_SIZE];
|
||||
RNNState rnn;
|
||||
RNNState rnn[2];
|
||||
AVTXContext *tx, *txi;
|
||||
av_tx_fn tx_fn, txi_fn;
|
||||
} DenoiseState;
|
||||
@ -146,7 +146,7 @@ typedef struct AudioRNNContext {
|
||||
DECLARE_ALIGNED(32, float, window)[WINDOW_SIZE];
|
||||
DECLARE_ALIGNED(32, float, dct_table)[FFALIGN(NB_BANDS, 4)][FFALIGN(NB_BANDS, 4)];
|
||||
|
||||
RNNModel *model;
|
||||
RNNModel *model[2];
|
||||
|
||||
AVFloatDSPContext *fdsp;
|
||||
} AudioRNNContext;
|
||||
@ -350,27 +350,34 @@ static int config_input(AVFilterLink *inlink)
|
||||
|
||||
s->channels = inlink->channels;
|
||||
|
||||
s->st = av_calloc(s->channels, sizeof(DenoiseState));
|
||||
if (!s->st)
|
||||
s->st = av_calloc(s->channels, sizeof(DenoiseState));
|
||||
if (!s->st)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
for (int i = 0; i < s->channels; i++) {
|
||||
DenoiseState *st = &s->st[i];
|
||||
|
||||
st->rnn.model = s->model;
|
||||
st->rnn.vad_gru_state = av_calloc(sizeof(float), FFALIGN(s->model->vad_gru_size, 16));
|
||||
st->rnn.noise_gru_state = av_calloc(sizeof(float), FFALIGN(s->model->noise_gru_size, 16));
|
||||
st->rnn.denoise_gru_state = av_calloc(sizeof(float), FFALIGN(s->model->denoise_gru_size, 16));
|
||||
if (!st->rnn.vad_gru_state ||
|
||||
!st->rnn.noise_gru_state ||
|
||||
!st->rnn.denoise_gru_state)
|
||||
st->rnn[0].model = s->model[0];
|
||||
st->rnn[0].vad_gru_state = av_calloc(sizeof(float), FFALIGN(s->model[0]->vad_gru_size, 16));
|
||||
st->rnn[0].noise_gru_state = av_calloc(sizeof(float), FFALIGN(s->model[0]->noise_gru_size, 16));
|
||||
st->rnn[0].denoise_gru_state = av_calloc(sizeof(float), FFALIGN(s->model[0]->denoise_gru_size, 16));
|
||||
if (!st->rnn[0].vad_gru_state ||
|
||||
!st->rnn[0].noise_gru_state ||
|
||||
!st->rnn[0].denoise_gru_state)
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
ret = av_tx_init(&st->tx, &st->tx_fn, AV_TX_FLOAT_FFT, 0, WINDOW_SIZE, NULL, 0);
|
||||
for (int i = 0; i < s->channels; i++) {
|
||||
DenoiseState *st = &s->st[i];
|
||||
|
||||
if (!st->tx)
|
||||
ret = av_tx_init(&st->tx, &st->tx_fn, AV_TX_FLOAT_FFT, 0, WINDOW_SIZE, NULL, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = av_tx_init(&st->txi, &st->txi_fn, AV_TX_FLOAT_FFT, 1, WINDOW_SIZE, NULL, 0);
|
||||
if (!st->txi)
|
||||
ret = av_tx_init(&st->txi, &st->txi_fn, AV_TX_FLOAT_FFT, 1, WINDOW_SIZE, NULL, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
@ -1368,7 +1375,7 @@ static float rnnoise_channel(AudioRNNContext *s, DenoiseState *st, float *out, c
|
||||
silence = compute_frame_features(s, st, X, P, Ex, Ep, Exp, features, x);
|
||||
|
||||
if (!silence && !disabled) {
|
||||
compute_rnn(s, &st->rnn, g, &vad_prob, features);
|
||||
compute_rnn(s, &st->rnn[0], g, &vad_prob, features);
|
||||
pitch_filter(X, P, Ex, Ep, Exp, g);
|
||||
for (int i = 0; i < NB_BANDS; i++) {
|
||||
float alpha = .6f;
|
||||
@ -1458,26 +1465,38 @@ static int activate(AVFilterContext *ctx)
|
||||
return FFERROR_NOT_READY;
|
||||
}
|
||||
|
||||
static av_cold int init(AVFilterContext *ctx)
|
||||
static int open_model(AVFilterContext *ctx, RNNModel **model)
|
||||
{
|
||||
AudioRNNContext *s = ctx->priv;
|
||||
FILE *f;
|
||||
|
||||
s->fdsp = avpriv_float_dsp_alloc(0);
|
||||
if (!s->fdsp)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
if (!s->model_name)
|
||||
return AVERROR(EINVAL);
|
||||
f = av_fopen_utf8(s->model_name, "r");
|
||||
if (!f)
|
||||
return AVERROR(EINVAL);
|
||||
|
||||
s->model = rnnoise_model_from_file(f);
|
||||
*model = rnnoise_model_from_file(f);
|
||||
fclose(f);
|
||||
if (!s->model)
|
||||
if (!*model)
|
||||
return AVERROR(EINVAL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static av_cold int init(AVFilterContext *ctx)
|
||||
{
|
||||
AudioRNNContext *s = ctx->priv;
|
||||
int ret;
|
||||
|
||||
s->fdsp = avpriv_float_dsp_alloc(0);
|
||||
if (!s->fdsp)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
ret = open_model(ctx, &s->model[0]);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
for (int i = 0; i < FRAME_SIZE; i++) {
|
||||
s->window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
|
||||
s->window[WINDOW_SIZE - 1 - i] = s->window[i];
|
||||
@ -1494,22 +1513,59 @@ static av_cold int init(AVFilterContext *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_model(AVFilterContext *ctx, int n)
|
||||
{
|
||||
AudioRNNContext *s = ctx->priv;
|
||||
|
||||
rnnoise_model_free(s->model[n]);
|
||||
s->model[n] = NULL;
|
||||
|
||||
for (int ch = 0; ch < s->channels && s->st; ch++) {
|
||||
av_freep(&s->st[ch].rnn[n].vad_gru_state);
|
||||
av_freep(&s->st[ch].rnn[n].noise_gru_state);
|
||||
av_freep(&s->st[ch].rnn[n].denoise_gru_state);
|
||||
}
|
||||
}
|
||||
|
||||
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
|
||||
char *res, int res_len, int flags)
|
||||
{
|
||||
AudioRNNContext *s = ctx->priv;
|
||||
int ret;
|
||||
|
||||
ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = open_model(ctx, &s->model[1]);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
FFSWAP(RNNModel *, s->model[0], s->model[1]);
|
||||
for (int ch = 0; ch < s->channels; ch++)
|
||||
FFSWAP(RNNState, s->st[ch].rnn[0], s->st[ch].rnn[1]);
|
||||
|
||||
ret = config_input(ctx->inputs[0]);
|
||||
if (ret < 0) {
|
||||
for (int ch = 0; ch < s->channels; ch++)
|
||||
FFSWAP(RNNState, s->st[ch].rnn[0], s->st[ch].rnn[1]);
|
||||
FFSWAP(RNNModel *, s->model[0], s->model[1]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
free_model(ctx, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static av_cold void uninit(AVFilterContext *ctx)
|
||||
{
|
||||
AudioRNNContext *s = ctx->priv;
|
||||
|
||||
av_freep(&s->fdsp);
|
||||
rnnoise_model_free(s->model);
|
||||
s->model = NULL;
|
||||
|
||||
if (s->st) {
|
||||
for (int ch = 0; ch < s->channels; ch++) {
|
||||
av_freep(&s->st[ch].rnn.vad_gru_state);
|
||||
av_freep(&s->st[ch].rnn.noise_gru_state);
|
||||
av_freep(&s->st[ch].rnn.denoise_gru_state);
|
||||
av_tx_uninit(&s->st[ch].tx);
|
||||
av_tx_uninit(&s->st[ch].txi);
|
||||
}
|
||||
free_model(ctx, 0);
|
||||
for (int ch = 0; ch < s->channels && s->st; ch++) {
|
||||
av_tx_uninit(&s->st[ch].tx);
|
||||
av_tx_uninit(&s->st[ch].txi);
|
||||
}
|
||||
av_freep(&s->st);
|
||||
}
|
||||
@ -1532,7 +1588,7 @@ static const AVFilterPad outputs[] = {
|
||||
};
|
||||
|
||||
#define OFFSET(x) offsetof(AudioRNNContext, x)
|
||||
#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
|
||||
#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
|
||||
|
||||
static const AVOption arnndn_options[] = {
|
||||
{ "model", "set model name", OFFSET(model_name), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, AF },
|
||||
@ -1556,4 +1612,5 @@ AVFilter ff_af_arnndn = {
|
||||
.outputs = outputs,
|
||||
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
|
||||
AVFILTER_FLAG_SLICE_THREADS,
|
||||
.process_command = process_command,
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user