mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
avfilter/af_dialoguenhance: add double-floating point sample format support
This commit is contained in:
parent
704ef556fe
commit
9e74c7ae87
@ -26,7 +26,6 @@
|
|||||||
#include "filters.h"
|
#include "filters.h"
|
||||||
#include "formats.h"
|
#include "formats.h"
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
#include "window_func.h"
|
|
||||||
|
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
|
|
||||||
@ -38,8 +37,11 @@ typedef struct AudioDialogueEnhancementContext {
|
|||||||
int fft_size;
|
int fft_size;
|
||||||
int overlap;
|
int overlap;
|
||||||
|
|
||||||
float *window;
|
void *window;
|
||||||
float prev_vad;
|
float *window_float;
|
||||||
|
double *window_double;
|
||||||
|
float prev_vad_float;
|
||||||
|
double prev_vad_double;
|
||||||
|
|
||||||
AVFrame *in;
|
AVFrame *in;
|
||||||
AVFrame *in_frame;
|
AVFrame *in_frame;
|
||||||
@ -49,6 +51,8 @@ typedef struct AudioDialogueEnhancementContext {
|
|||||||
AVFrame *windowed_prev;
|
AVFrame *windowed_prev;
|
||||||
AVFrame *center_frame;
|
AVFrame *center_frame;
|
||||||
|
|
||||||
|
int (*de_stereo)(AVFilterContext *ctx, AVFrame *out);
|
||||||
|
|
||||||
AVTXContext *tx_ctx[2], *itx_ctx;
|
AVTXContext *tx_ctx[2], *itx_ctx;
|
||||||
av_tx_fn tx_fn, itx_fn;
|
av_tx_fn tx_fn, itx_fn;
|
||||||
} AudioDialogueEnhanceContext;
|
} AudioDialogueEnhanceContext;
|
||||||
@ -72,6 +76,7 @@ static int query_formats(AVFilterContext *ctx)
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if ((ret = ff_add_format (&formats, AV_SAMPLE_FMT_FLTP )) < 0 ||
|
if ((ret = ff_add_format (&formats, AV_SAMPLE_FMT_FLTP )) < 0 ||
|
||||||
|
(ret = ff_add_format (&formats, AV_SAMPLE_FMT_DBLP )) < 0 ||
|
||||||
(ret = ff_set_common_formats (ctx , formats )) < 0 ||
|
(ret = ff_set_common_formats (ctx , formats )) < 0 ||
|
||||||
(ret = ff_add_channel_layout (&in_layout , &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO)) < 0 ||
|
(ret = ff_add_channel_layout (&in_layout , &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO)) < 0 ||
|
||||||
(ret = ff_channel_layouts_ref(in_layout, &ctx->inputs[0]->outcfg.channel_layouts)) < 0 ||
|
(ret = ff_channel_layouts_ref(in_layout, &ctx->inputs[0]->outcfg.channel_layouts)) < 0 ||
|
||||||
@ -82,20 +87,22 @@ static int query_formats(AVFilterContext *ctx)
|
|||||||
return ff_set_common_all_samplerates(ctx);
|
return ff_set_common_all_samplerates(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define DEPTH 32
|
||||||
|
#include "dialoguenhance_template.c"
|
||||||
|
|
||||||
|
#undef DEPTH
|
||||||
|
#define DEPTH 64
|
||||||
|
#include "dialoguenhance_template.c"
|
||||||
|
|
||||||
static int config_input(AVFilterLink *inlink)
|
static int config_input(AVFilterLink *inlink)
|
||||||
{
|
{
|
||||||
AVFilterContext *ctx = inlink->dst;
|
AVFilterContext *ctx = inlink->dst;
|
||||||
AudioDialogueEnhanceContext *s = ctx->priv;
|
AudioDialogueEnhanceContext *s = ctx->priv;
|
||||||
float scale = 1.f, iscale, overlap;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
s->fft_size = inlink->sample_rate > 100000 ? 8192 : inlink->sample_rate > 50000 ? 4096 : 2048;
|
s->fft_size = inlink->sample_rate > 100000 ? 8192 : inlink->sample_rate > 50000 ? 4096 : 2048;
|
||||||
s->overlap = s->fft_size / 4;
|
s->overlap = s->fft_size / 4;
|
||||||
|
|
||||||
s->window = av_calloc(s->fft_size, sizeof(*s->window));
|
|
||||||
if (!s->window)
|
|
||||||
return AVERROR(ENOMEM);
|
|
||||||
|
|
||||||
s->in_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
|
s->in_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
|
||||||
s->center_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
|
s->center_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
|
||||||
s->out_dist_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
|
s->out_dist_frame = ff_get_audio_buffer(inlink, (s->fft_size + 2) * 2);
|
||||||
@ -106,202 +113,18 @@ static int config_input(AVFilterLink *inlink)
|
|||||||
!s->out_dist_frame || !s->windowed_frame || !s->center_frame)
|
!s->out_dist_frame || !s->windowed_frame || !s->center_frame)
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
|
|
||||||
generate_window_func(s->window, s->fft_size, WFUNC_SINE, &overlap);
|
switch (inlink->format) {
|
||||||
|
case AV_SAMPLE_FMT_FLTP:
|
||||||
|
s->de_stereo = de_stereo_float;
|
||||||
|
ret = de_tx_init_float(ctx);
|
||||||
|
break;
|
||||||
|
case AV_SAMPLE_FMT_DBLP:
|
||||||
|
s->de_stereo = de_stereo_double;
|
||||||
|
ret = de_tx_init_double(ctx);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
iscale = 1.f / (s->fft_size * 1.5f);
|
|
||||||
|
|
||||||
ret = av_tx_init(&s->tx_ctx[0], &s->tx_fn, AV_TX_FLOAT_RDFT, 0, s->fft_size, &scale, 0);
|
|
||||||
if (ret < 0)
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
ret = av_tx_init(&s->tx_ctx[1], &s->tx_fn, AV_TX_FLOAT_RDFT, 0, s->fft_size, &scale, 0);
|
|
||||||
if (ret < 0)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
ret = av_tx_init(&s->itx_ctx, &s->itx_fn, AV_TX_FLOAT_RDFT, 1, s->fft_size, &iscale, 0);
|
|
||||||
if (ret < 0)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void apply_window(AudioDialogueEnhanceContext *s,
|
|
||||||
const float *in_frame, float *out_frame, const int add_to_out_frame)
|
|
||||||
{
|
|
||||||
const float *window = s->window;
|
|
||||||
|
|
||||||
if (add_to_out_frame) {
|
|
||||||
for (int i = 0; i < s->fft_size; i++)
|
|
||||||
out_frame[i] += in_frame[i] * window[i];
|
|
||||||
} else {
|
|
||||||
for (int i = 0; i < s->fft_size; i++)
|
|
||||||
out_frame[i] = in_frame[i] * window[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static float sqrf(float x)
|
|
||||||
{
|
|
||||||
return x * x;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void get_centere(AVComplexFloat *left, AVComplexFloat *right,
|
|
||||||
AVComplexFloat *center, int N)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < N; i++) {
|
|
||||||
const float l_re = left[i].re;
|
|
||||||
const float l_im = left[i].im;
|
|
||||||
const float r_re = right[i].re;
|
|
||||||
const float r_im = right[i].im;
|
|
||||||
const float a = 0.5f * (1.f - sqrtf((sqrf(l_re - r_re) + sqrf(l_im - r_im))/
|
|
||||||
(sqrf(l_re + r_re) + sqrf(l_im + r_im) + FLT_EPSILON)));
|
|
||||||
|
|
||||||
center[i].re = a * (l_re + r_re);
|
|
||||||
center[i].im = a * (l_im + r_im);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static float flux(float *curf, float *prevf, int N)
|
|
||||||
{
|
|
||||||
AVComplexFloat *cur = (AVComplexFloat *)curf;
|
|
||||||
AVComplexFloat *prev = (AVComplexFloat *)prevf;
|
|
||||||
float sum = 0.f;
|
|
||||||
|
|
||||||
for (int i = 0; i < N; i++) {
|
|
||||||
float c_re = cur[i].re;
|
|
||||||
float c_im = cur[i].im;
|
|
||||||
float p_re = prev[i].re;
|
|
||||||
float p_im = prev[i].im;
|
|
||||||
|
|
||||||
sum += sqrf(hypotf(c_re, c_im) - hypotf(p_re, p_im));
|
|
||||||
}
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
static float fluxlr(float *lf, float *lpf,
|
|
||||||
float *rf, float *rpf,
|
|
||||||
int N)
|
|
||||||
{
|
|
||||||
AVComplexFloat *l = (AVComplexFloat *)lf;
|
|
||||||
AVComplexFloat *lp = (AVComplexFloat *)lpf;
|
|
||||||
AVComplexFloat *r = (AVComplexFloat *)rf;
|
|
||||||
AVComplexFloat *rp = (AVComplexFloat *)rpf;
|
|
||||||
float sum = 0.f;
|
|
||||||
|
|
||||||
for (int i = 0; i < N; i++) {
|
|
||||||
float c_re = l[i].re - r[i].re;
|
|
||||||
float c_im = l[i].im - r[i].im;
|
|
||||||
float p_re = lp[i].re - rp[i].re;
|
|
||||||
float p_im = lp[i].im - rp[i].im;
|
|
||||||
|
|
||||||
sum += sqrf(hypotf(c_re, c_im) - hypotf(p_re, p_im));
|
|
||||||
}
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
static float calc_vad(float fc, float flr, float a)
|
|
||||||
{
|
|
||||||
const float vad = a * (fc / (fc + flr) - 0.5f);
|
|
||||||
|
|
||||||
return av_clipf(vad, 0.f, 1.f);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void get_final(float *c, float *l,
|
|
||||||
float *r, float vad, int N,
|
|
||||||
float original, float enhance)
|
|
||||||
{
|
|
||||||
AVComplexFloat *center = (AVComplexFloat *)c;
|
|
||||||
AVComplexFloat *left = (AVComplexFloat *)l;
|
|
||||||
AVComplexFloat *right = (AVComplexFloat *)r;
|
|
||||||
|
|
||||||
for (int i = 0; i < N; i++) {
|
|
||||||
float cP = sqrf(center[i].re) + sqrf(center[i].im);
|
|
||||||
float lrP = sqrf(left[i].re - right[i].re) + sqrf(left[i].im - right[i].im);
|
|
||||||
float G = cP / (cP + lrP + FLT_EPSILON);
|
|
||||||
float re, im;
|
|
||||||
|
|
||||||
re = center[i].re * (original + vad * G * enhance);
|
|
||||||
im = center[i].im * (original + vad * G * enhance);
|
|
||||||
|
|
||||||
center[i].re = re;
|
|
||||||
center[i].im = im;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int de_stereo(AVFilterContext *ctx, AVFrame *out)
|
|
||||||
{
|
|
||||||
AudioDialogueEnhanceContext *s = ctx->priv;
|
|
||||||
float *center = (float *)s->center_frame->extended_data[0];
|
|
||||||
float *center_prev = (float *)s->center_frame->extended_data[1];
|
|
||||||
float *left_in = (float *)s->in_frame->extended_data[0];
|
|
||||||
float *right_in = (float *)s->in_frame->extended_data[1];
|
|
||||||
float *left_out = (float *)s->out_dist_frame->extended_data[0];
|
|
||||||
float *right_out = (float *)s->out_dist_frame->extended_data[1];
|
|
||||||
float *left_samples = (float *)s->in->extended_data[0];
|
|
||||||
float *right_samples = (float *)s->in->extended_data[1];
|
|
||||||
float *windowed_left = (float *)s->windowed_frame->extended_data[0];
|
|
||||||
float *windowed_right = (float *)s->windowed_frame->extended_data[1];
|
|
||||||
float *windowed_oleft = (float *)s->windowed_out->extended_data[0];
|
|
||||||
float *windowed_oright = (float *)s->windowed_out->extended_data[1];
|
|
||||||
float *windowed_pleft = (float *)s->windowed_prev->extended_data[0];
|
|
||||||
float *windowed_pright = (float *)s->windowed_prev->extended_data[1];
|
|
||||||
float *left_osamples = (float *)out->extended_data[0];
|
|
||||||
float *right_osamples = (float *)out->extended_data[1];
|
|
||||||
float *center_osamples = (float *)out->extended_data[2];
|
|
||||||
const int overlap = s->overlap;
|
|
||||||
const int offset = s->fft_size - overlap;
|
|
||||||
const int nb_samples = FFMIN(overlap, s->in->nb_samples);
|
|
||||||
float vad;
|
|
||||||
|
|
||||||
// shift in/out buffers
|
|
||||||
memmove(left_in, &left_in[overlap], offset * sizeof(float));
|
|
||||||
memmove(right_in, &right_in[overlap], offset * sizeof(float));
|
|
||||||
memmove(left_out, &left_out[overlap], offset * sizeof(float));
|
|
||||||
memmove(right_out, &right_out[overlap], offset * sizeof(float));
|
|
||||||
|
|
||||||
memcpy(&left_in[offset], left_samples, nb_samples * sizeof(float));
|
|
||||||
memcpy(&right_in[offset], right_samples, nb_samples * sizeof(float));
|
|
||||||
memset(&left_out[offset], 0, overlap * sizeof(float));
|
|
||||||
memset(&right_out[offset], 0, overlap * sizeof(float));
|
|
||||||
|
|
||||||
apply_window(s, left_in, windowed_left, 0);
|
|
||||||
apply_window(s, right_in, windowed_right, 0);
|
|
||||||
|
|
||||||
s->tx_fn(s->tx_ctx[0], windowed_oleft, windowed_left, sizeof(float));
|
|
||||||
s->tx_fn(s->tx_ctx[1], windowed_oright, windowed_right, sizeof(float));
|
|
||||||
|
|
||||||
get_centere((AVComplexFloat *)windowed_oleft,
|
|
||||||
(AVComplexFloat *)windowed_oright,
|
|
||||||
(AVComplexFloat *)center,
|
|
||||||
s->fft_size / 2 + 1);
|
|
||||||
|
|
||||||
vad = calc_vad(flux(center, center_prev, s->fft_size / 2 + 1),
|
|
||||||
fluxlr(windowed_oleft, windowed_pleft,
|
|
||||||
windowed_oright, windowed_pright, s->fft_size / 2 + 1), s->voice);
|
|
||||||
vad = vad * 0.1 + 0.9 * s->prev_vad;
|
|
||||||
s->prev_vad = vad;
|
|
||||||
|
|
||||||
memcpy(center_prev, center, s->fft_size * sizeof(float));
|
|
||||||
memcpy(windowed_pleft, windowed_oleft, s->fft_size * sizeof(float));
|
|
||||||
memcpy(windowed_pright, windowed_oright, s->fft_size * sizeof(float));
|
|
||||||
|
|
||||||
get_final(center, windowed_oleft, windowed_oright, vad, s->fft_size / 2 + 1,
|
|
||||||
s->original, s->enhance);
|
|
||||||
|
|
||||||
s->itx_fn(s->itx_ctx, windowed_oleft, center, sizeof(AVComplexFloat));
|
|
||||||
|
|
||||||
apply_window(s, windowed_oleft, left_out, 1);
|
|
||||||
|
|
||||||
memcpy(left_osamples, left_in, overlap * sizeof(float));
|
|
||||||
memcpy(right_osamples, right_in, overlap * sizeof(float));
|
|
||||||
|
|
||||||
if (ctx->is_disabled)
|
|
||||||
memset(center_osamples, 0, overlap * sizeof(float));
|
|
||||||
else
|
|
||||||
memcpy(center_osamples, left_out, overlap * sizeof(float));
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
|
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
|
||||||
@ -319,7 +142,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
|
|||||||
}
|
}
|
||||||
|
|
||||||
s->in = in;
|
s->in = in;
|
||||||
de_stereo(ctx, out);
|
s->de_stereo(ctx, out);
|
||||||
|
|
||||||
av_frame_copy_props(out, in);
|
av_frame_copy_props(out, in);
|
||||||
out->nb_samples = in->nb_samples;
|
out->nb_samples = in->nb_samples;
|
||||||
|
274
libavfilter/dialoguenhance_template.c
Normal file
274
libavfilter/dialoguenhance_template.c
Normal file
@ -0,0 +1,274 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/tx.h"
|
||||||
|
#include "avfilter.h"
|
||||||
|
#include "internal.h"
|
||||||
|
#include "audio.h"
|
||||||
|
|
||||||
|
#undef ctype
|
||||||
|
#undef ftype
|
||||||
|
#undef SQRT
|
||||||
|
#undef HYPOT
|
||||||
|
#undef SAMPLE_FORMAT
|
||||||
|
#undef TX_TYPE
|
||||||
|
#undef ONE
|
||||||
|
#undef ZERO
|
||||||
|
#undef HALF
|
||||||
|
#undef SIN
|
||||||
|
#undef CLIP
|
||||||
|
#undef EPSILON
|
||||||
|
#if DEPTH == 32
|
||||||
|
#define SAMPLE_FORMAT float
|
||||||
|
#define SQRT sqrtf
|
||||||
|
#define HYPOT hypotf
|
||||||
|
#define ctype AVComplexFloat
|
||||||
|
#define ftype float
|
||||||
|
#define TX_TYPE AV_TX_FLOAT_RDFT
|
||||||
|
#define ONE 1.f
|
||||||
|
#define ZERO 0.f
|
||||||
|
#define HALF 0.5f
|
||||||
|
#define SIN sinf
|
||||||
|
#define CLIP av_clipf
|
||||||
|
#define EPSILON FLT_EPSILON
|
||||||
|
#else
|
||||||
|
#define SAMPLE_FORMAT double
|
||||||
|
#define SQRT sqrt
|
||||||
|
#define HYPOT hypot
|
||||||
|
#define ctype AVComplexDouble
|
||||||
|
#define ftype double
|
||||||
|
#define TX_TYPE AV_TX_DOUBLE_RDFT
|
||||||
|
#define ONE 1.0
|
||||||
|
#define ZERO 0.0
|
||||||
|
#define HALF 0.5
|
||||||
|
#define SIN sin
|
||||||
|
#define CLIP av_clipd
|
||||||
|
#define EPSILON DBL_EPSILON
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define fn3(a,b) a##_##b
|
||||||
|
#define fn2(a,b) fn3(a,b)
|
||||||
|
#define fn(a) fn2(a, SAMPLE_FORMAT)
|
||||||
|
|
||||||
|
static int fn(de_tx_init)(AVFilterContext *ctx)
|
||||||
|
{
|
||||||
|
AudioDialogueEnhanceContext *s = ctx->priv;
|
||||||
|
ftype scale = ONE, iscale = ONE / (s->fft_size * 1.5f);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
s->window = av_calloc(s->fft_size, sizeof(ftype));
|
||||||
|
if (!s->window)
|
||||||
|
return AVERROR(ENOMEM);
|
||||||
|
fn(s->window) = s->window;
|
||||||
|
for (int n = 0; n < s->fft_size; n++)
|
||||||
|
fn(s->window)[n] = SIN(M_PI*n/(s->fft_size-1));
|
||||||
|
|
||||||
|
ret = av_tx_init(&s->tx_ctx[0], &s->tx_fn, TX_TYPE, 0, s->fft_size, &scale, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
ret = av_tx_init(&s->tx_ctx[1], &s->tx_fn, TX_TYPE, 0, s->fft_size, &scale, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
ret = av_tx_init(&s->itx_ctx, &s->itx_fn, TX_TYPE, 1, s->fft_size, &iscale, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fn(apply_window)(AudioDialogueEnhanceContext *s,
|
||||||
|
const ftype *in_frame, ftype *out_frame, const int add_to_out_frame)
|
||||||
|
{
|
||||||
|
const ftype *window = fn(s->window);
|
||||||
|
const int fft_size = s->fft_size;
|
||||||
|
|
||||||
|
if (add_to_out_frame) {
|
||||||
|
for (int i = 0; i < fft_size; i++)
|
||||||
|
out_frame[i] += in_frame[i] * window[i];
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < fft_size; i++)
|
||||||
|
out_frame[i] = in_frame[i] * window[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static ftype fn(sqr)(ftype x)
|
||||||
|
{
|
||||||
|
return x * x;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fn(get_centere)(ctype *left, ctype *right,
|
||||||
|
ctype *center, int N)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < N; i++) {
|
||||||
|
const ftype l_re = left[i].re;
|
||||||
|
const ftype l_im = left[i].im;
|
||||||
|
const ftype r_re = right[i].re;
|
||||||
|
const ftype r_im = right[i].im;
|
||||||
|
const ftype a = HALF * (ONE - SQRT((fn(sqr)(l_re - r_re) + fn(sqr)(l_im - r_im))/
|
||||||
|
(fn(sqr)(l_re + r_re) + fn(sqr)(l_im + r_im) + EPSILON)));
|
||||||
|
|
||||||
|
center[i].re = a * (l_re + r_re);
|
||||||
|
center[i].im = a * (l_im + r_im);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static ftype fn(flux)(ftype *curf, ftype *prevf, int N)
|
||||||
|
{
|
||||||
|
ctype *cur = (ctype *)curf;
|
||||||
|
ctype *prev = (ctype *)prevf;
|
||||||
|
ftype sum = ZERO;
|
||||||
|
|
||||||
|
for (int i = 0; i < N; i++) {
|
||||||
|
ftype c_re = cur[i].re;
|
||||||
|
ftype c_im = cur[i].im;
|
||||||
|
ftype p_re = prev[i].re;
|
||||||
|
ftype p_im = prev[i].im;
|
||||||
|
|
||||||
|
sum += fn(sqr)(HYPOT(c_re, c_im) - HYPOT(p_re, p_im));
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ftype fn(fluxlr)(ftype *lf, ftype *lpf,
|
||||||
|
ftype *rf, ftype *rpf,
|
||||||
|
int N)
|
||||||
|
{
|
||||||
|
ctype *l = (ctype *)lf;
|
||||||
|
ctype *lp = (ctype *)lpf;
|
||||||
|
ctype *r = (ctype *)rf;
|
||||||
|
ctype *rp = (ctype *)rpf;
|
||||||
|
ftype sum = ZERO;
|
||||||
|
|
||||||
|
for (int i = 0; i < N; i++) {
|
||||||
|
ftype c_re = l[i].re - r[i].re;
|
||||||
|
ftype c_im = l[i].im - r[i].im;
|
||||||
|
ftype p_re = lp[i].re - rp[i].re;
|
||||||
|
ftype p_im = lp[i].im - rp[i].im;
|
||||||
|
|
||||||
|
sum += fn(sqr)(HYPOT(c_re, c_im) - HYPOT(p_re, p_im));
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ftype fn(calc_vad)(ftype fc, ftype flr, ftype a)
|
||||||
|
{
|
||||||
|
const ftype vad = a * (fc / (fc + flr) - HALF);
|
||||||
|
|
||||||
|
return CLIP(vad, ZERO, ONE);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fn(get_final)(ftype *c, ftype *l,
|
||||||
|
ftype *r, ftype vad, int N,
|
||||||
|
ftype original, ftype enhance)
|
||||||
|
{
|
||||||
|
ctype *center = (ctype *)c;
|
||||||
|
ctype *left = (ctype *)l;
|
||||||
|
ctype *right = (ctype *)r;
|
||||||
|
|
||||||
|
for (int i = 0; i < N; i++) {
|
||||||
|
ftype cP = fn(sqr)(center[i].re) + fn(sqr)(center[i].im);
|
||||||
|
ftype lrP = fn(sqr)(left[i].re - right[i].re) + fn(sqr)(left[i].im - right[i].im);
|
||||||
|
ftype G = cP / (cP + lrP + EPSILON);
|
||||||
|
ftype re, im;
|
||||||
|
|
||||||
|
re = center[i].re * (original + vad * G * enhance);
|
||||||
|
im = center[i].im * (original + vad * G * enhance);
|
||||||
|
|
||||||
|
center[i].re = re;
|
||||||
|
center[i].im = im;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int fn(de_stereo)(AVFilterContext *ctx, AVFrame *out)
|
||||||
|
{
|
||||||
|
AudioDialogueEnhanceContext *s = ctx->priv;
|
||||||
|
ftype *center = (ftype *)s->center_frame->extended_data[0];
|
||||||
|
ftype *center_prev = (ftype *)s->center_frame->extended_data[1];
|
||||||
|
ftype *left_in = (ftype *)s->in_frame->extended_data[0];
|
||||||
|
ftype *right_in = (ftype *)s->in_frame->extended_data[1];
|
||||||
|
ftype *left_out = (ftype *)s->out_dist_frame->extended_data[0];
|
||||||
|
ftype *right_out = (ftype *)s->out_dist_frame->extended_data[1];
|
||||||
|
ftype *left_samples = (ftype *)s->in->extended_data[0];
|
||||||
|
ftype *right_samples = (ftype *)s->in->extended_data[1];
|
||||||
|
ftype *windowed_left = (ftype *)s->windowed_frame->extended_data[0];
|
||||||
|
ftype *windowed_right = (ftype *)s->windowed_frame->extended_data[1];
|
||||||
|
ftype *windowed_oleft = (ftype *)s->windowed_out->extended_data[0];
|
||||||
|
ftype *windowed_oright = (ftype *)s->windowed_out->extended_data[1];
|
||||||
|
ftype *windowed_pleft = (ftype *)s->windowed_prev->extended_data[0];
|
||||||
|
ftype *windowed_pright = (ftype *)s->windowed_prev->extended_data[1];
|
||||||
|
ftype *left_osamples = (ftype *)out->extended_data[0];
|
||||||
|
ftype *right_osamples = (ftype *)out->extended_data[1];
|
||||||
|
ftype *center_osamples = (ftype *)out->extended_data[2];
|
||||||
|
const int overlap = s->overlap;
|
||||||
|
const int offset = s->fft_size - overlap;
|
||||||
|
const int nb_samples = FFMIN(overlap, s->in->nb_samples);
|
||||||
|
ftype vad;
|
||||||
|
|
||||||
|
// shift in/out buffers
|
||||||
|
memmove(left_in, &left_in[overlap], offset * sizeof(ftype));
|
||||||
|
memmove(right_in, &right_in[overlap], offset * sizeof(ftype));
|
||||||
|
memmove(left_out, &left_out[overlap], offset * sizeof(ftype));
|
||||||
|
memmove(right_out, &right_out[overlap], offset * sizeof(ftype));
|
||||||
|
|
||||||
|
memcpy(&left_in[offset], left_samples, nb_samples * sizeof(ftype));
|
||||||
|
memcpy(&right_in[offset], right_samples, nb_samples * sizeof(ftype));
|
||||||
|
memset(&left_out[offset], 0, overlap * sizeof(ftype));
|
||||||
|
memset(&right_out[offset], 0, overlap * sizeof(ftype));
|
||||||
|
|
||||||
|
fn(apply_window)(s, left_in, windowed_left, 0);
|
||||||
|
fn(apply_window)(s, right_in, windowed_right, 0);
|
||||||
|
|
||||||
|
s->tx_fn(s->tx_ctx[0], windowed_oleft, windowed_left, sizeof(ftype));
|
||||||
|
s->tx_fn(s->tx_ctx[1], windowed_oright, windowed_right, sizeof(ftype));
|
||||||
|
|
||||||
|
fn(get_centere)((ctype *)windowed_oleft,
|
||||||
|
(ctype *)windowed_oright,
|
||||||
|
(ctype *)center,
|
||||||
|
s->fft_size / 2 + 1);
|
||||||
|
|
||||||
|
vad = fn(calc_vad)(fn(flux)(center, center_prev, s->fft_size / 2 + 1),
|
||||||
|
fn(fluxlr)(windowed_oleft, windowed_pleft,
|
||||||
|
windowed_oright, windowed_pright, s->fft_size / 2 + 1), s->voice);
|
||||||
|
vad = vad * 0.1 + 0.9 * fn(s->prev_vad);
|
||||||
|
fn(s->prev_vad) = vad;
|
||||||
|
|
||||||
|
memcpy(center_prev, center, s->fft_size * sizeof(ftype));
|
||||||
|
memcpy(windowed_pleft, windowed_oleft, s->fft_size * sizeof(ftype));
|
||||||
|
memcpy(windowed_pright, windowed_oright, s->fft_size * sizeof(ftype));
|
||||||
|
|
||||||
|
fn(get_final)(center, windowed_oleft, windowed_oright, vad, s->fft_size / 2 + 1,
|
||||||
|
s->original, s->enhance);
|
||||||
|
|
||||||
|
s->itx_fn(s->itx_ctx, windowed_oleft, center, sizeof(ctype));
|
||||||
|
|
||||||
|
fn(apply_window)(s, windowed_oleft, left_out, 1);
|
||||||
|
|
||||||
|
memcpy(left_osamples, left_in, overlap * sizeof(ftype));
|
||||||
|
memcpy(right_osamples, right_in, overlap * sizeof(ftype));
|
||||||
|
|
||||||
|
if (ctx->is_disabled)
|
||||||
|
memset(center_osamples, 0, overlap * sizeof(ftype));
|
||||||
|
else
|
||||||
|
memcpy(center_osamples, left_out, overlap * sizeof(ftype));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user