You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-10 06:10:52 +02:00
avfilter/af_headphone: speed up fast convolution
Do IFFT only once per output channel.
This commit is contained in:
@@ -73,6 +73,7 @@ typedef struct HeadphoneContext {
|
|||||||
float *data_ir[2];
|
float *data_ir[2];
|
||||||
float *temp_src[2];
|
float *temp_src[2];
|
||||||
FFTComplex *temp_fft[2];
|
FFTComplex *temp_fft[2];
|
||||||
|
FFTComplex *temp_afft[2];
|
||||||
|
|
||||||
FFTContext *fft[2], *ifft[2];
|
FFTContext *fft[2], *ifft[2];
|
||||||
FFTComplex *data_hrtf[2];
|
FFTComplex *data_hrtf[2];
|
||||||
@@ -158,6 +159,7 @@ typedef struct ThreadData {
|
|||||||
float **ringbuffer;
|
float **ringbuffer;
|
||||||
float **temp_src;
|
float **temp_src;
|
||||||
FFTComplex **temp_fft;
|
FFTComplex **temp_fft;
|
||||||
|
FFTComplex **temp_afft;
|
||||||
} ThreadData;
|
} ThreadData;
|
||||||
|
|
||||||
static int headphone_convolute(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
|
static int headphone_convolute(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
|
||||||
@@ -251,6 +253,7 @@ static int headphone_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr,
|
|||||||
const int buffer_length = s->buffer_length;
|
const int buffer_length = s->buffer_length;
|
||||||
const uint32_t modulo = (uint32_t)buffer_length - 1;
|
const uint32_t modulo = (uint32_t)buffer_length - 1;
|
||||||
FFTComplex *fft_in = s->temp_fft[jobnr];
|
FFTComplex *fft_in = s->temp_fft[jobnr];
|
||||||
|
FFTComplex *fft_acc = s->temp_afft[jobnr];
|
||||||
FFTContext *ifft = s->ifft[jobnr];
|
FFTContext *ifft = s->ifft[jobnr];
|
||||||
FFTContext *fft = s->fft[jobnr];
|
FFTContext *fft = s->fft[jobnr];
|
||||||
const int n_fft = s->n_fft;
|
const int n_fft = s->n_fft;
|
||||||
@@ -273,6 +276,8 @@ static int headphone_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr,
|
|||||||
dst[2 * j] = 0;
|
dst[2 * j] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memset(fft_acc, 0, sizeof(FFTComplex) * n_fft);
|
||||||
|
|
||||||
for (i = 0; i < in_channels; i++) {
|
for (i = 0; i < in_channels; i++) {
|
||||||
if (i == s->lfe_channel) {
|
if (i == s->lfe_channel) {
|
||||||
for (j = 0; j < in->nb_samples; j++) {
|
for (j = 0; j < in->nb_samples; j++) {
|
||||||
@@ -297,22 +302,22 @@ static int headphone_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr,
|
|||||||
const float re = fft_in[j].re;
|
const float re = fft_in[j].re;
|
||||||
const float im = fft_in[j].im;
|
const float im = fft_in[j].im;
|
||||||
|
|
||||||
fft_in[j].re = re * hcomplex->re - im * hcomplex->im;
|
fft_acc[j].re += re * hcomplex->re - im * hcomplex->im;
|
||||||
fft_in[j].im = re * hcomplex->im + im * hcomplex->re;
|
fft_acc[j].im += re * hcomplex->im + im * hcomplex->re;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
av_fft_permute(ifft, fft_in);
|
av_fft_permute(ifft, fft_acc);
|
||||||
av_fft_calc(ifft, fft_in);
|
av_fft_calc(ifft, fft_acc);
|
||||||
|
|
||||||
for (j = 0; j < in->nb_samples; j++) {
|
for (j = 0; j < in->nb_samples; j++) {
|
||||||
dst[2 * j] += fft_in[j].re * fft_scale;
|
dst[2 * j] += fft_acc[j].re * fft_scale;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = 0; j < ir_len - 1; j++) {
|
for (j = 0; j < ir_len - 1; j++) {
|
||||||
int write_pos = (wr + j) & modulo;
|
int write_pos = (wr + j) & modulo;
|
||||||
|
|
||||||
*(ringbuffer + write_pos) += fft_in[in->nb_samples + j].re * fft_scale;
|
*(ringbuffer + write_pos) += fft_acc[in->nb_samples + j].re * fft_scale;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < out->nb_samples; i++) {
|
for (i = 0; i < out->nb_samples; i++) {
|
||||||
@@ -364,6 +369,7 @@ static int headphone_frame(HeadphoneContext *s, AVFrame *in, AVFilterLink *outli
|
|||||||
td.delay = s->delay; td.ir = s->data_ir; td.n_clippings = n_clippings;
|
td.delay = s->delay; td.ir = s->data_ir; td.n_clippings = n_clippings;
|
||||||
td.ringbuffer = s->ringbuffer; td.temp_src = s->temp_src;
|
td.ringbuffer = s->ringbuffer; td.temp_src = s->temp_src;
|
||||||
td.temp_fft = s->temp_fft;
|
td.temp_fft = s->temp_fft;
|
||||||
|
td.temp_afft = s->temp_afft;
|
||||||
|
|
||||||
if (s->type == TIME_DOMAIN) {
|
if (s->type == TIME_DOMAIN) {
|
||||||
ctx->internal->execute(ctx, headphone_convolute, &td, NULL, 2);
|
ctx->internal->execute(ctx, headphone_convolute, &td, NULL, 2);
|
||||||
@@ -439,7 +445,10 @@ static int convert_coeffs(AVFilterContext *ctx, AVFilterLink *inlink)
|
|||||||
s->ringbuffer[1] = av_calloc(s->buffer_length, sizeof(float));
|
s->ringbuffer[1] = av_calloc(s->buffer_length, sizeof(float));
|
||||||
s->temp_fft[0] = av_calloc(s->n_fft, sizeof(FFTComplex));
|
s->temp_fft[0] = av_calloc(s->n_fft, sizeof(FFTComplex));
|
||||||
s->temp_fft[1] = av_calloc(s->n_fft, sizeof(FFTComplex));
|
s->temp_fft[1] = av_calloc(s->n_fft, sizeof(FFTComplex));
|
||||||
if (!s->temp_fft[0] || !s->temp_fft[1]) {
|
s->temp_afft[0] = av_calloc(s->n_fft, sizeof(FFTComplex));
|
||||||
|
s->temp_afft[1] = av_calloc(s->n_fft, sizeof(FFTComplex));
|
||||||
|
if (!s->temp_fft[0] || !s->temp_fft[1] ||
|
||||||
|
!s->temp_afft[0] || !s->temp_afft[1]) {
|
||||||
ret = AVERROR(ENOMEM);
|
ret = AVERROR(ENOMEM);
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
@@ -819,6 +828,8 @@ static av_cold void uninit(AVFilterContext *ctx)
|
|||||||
av_freep(&s->temp_src[1]);
|
av_freep(&s->temp_src[1]);
|
||||||
av_freep(&s->temp_fft[0]);
|
av_freep(&s->temp_fft[0]);
|
||||||
av_freep(&s->temp_fft[1]);
|
av_freep(&s->temp_fft[1]);
|
||||||
|
av_freep(&s->temp_afft[0]);
|
||||||
|
av_freep(&s->temp_afft[1]);
|
||||||
av_freep(&s->data_hrtf[0]);
|
av_freep(&s->data_hrtf[0]);
|
||||||
av_freep(&s->data_hrtf[1]);
|
av_freep(&s->data_hrtf[1]);
|
||||||
av_freep(&s->fdsp);
|
av_freep(&s->fdsp);
|
||||||
|
Reference in New Issue
Block a user