1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-11-21 10:55:51 +02:00
FFmpeg/libavfilter/avf_showcwt.c

1098 lines
37 KiB
C
Raw Normal View History

/*
* Copyright (c) 2022 Paul B Mahol
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <float.h>
#include <math.h>
#include "libavutil/tx.h"
#include "libavutil/avassert.h"
#include "libavutil/avstring.h"
#include "libavutil/channel_layout.h"
#include "libavutil/float_dsp.h"
#include "libavutil/cpu.h"
#include "libavutil/opt.h"
#include "libavutil/parseutils.h"
#include "audio.h"
#include "video.h"
#include "avfilter.h"
#include "filters.h"
#include "internal.h"
enum FrequencyScale {
FSCALE_LINEAR,
FSCALE_LOG2,
FSCALE_BARK,
FSCALE_MEL,
FSCALE_ERBS,
FSCALE_SQRT,
FSCALE_CBRT,
FSCALE_QDRT,
NB_FSCALE
};
enum DirectionMode {
DIRECTION_LR,
DIRECTION_RL,
DIRECTION_UD,
DIRECTION_DU,
NB_DIRECTION
};
enum SlideMode {
SLIDE_REPLACE,
SLIDE_SCROLL,
SLIDE_FRAME,
NB_SLIDE
};
typedef struct ShowCWTContext {
const AVClass *class;
int w, h;
int mode;
char *rate_str;
AVRational auto_frame_rate;
AVRational frame_rate;
AVTXContext **fft;
AVTXContext **ifft;
av_tx_fn tx_fn;
av_tx_fn itx_fn;
int fft_in_size;
int fft_out_size;
int ifft_in_size;
int ifft_out_size;
int pos;
int64_t in_pts;
int64_t old_pts;
int64_t eof_pts;
float *frequency_band;
AVFrame *kernel;
unsigned *index;
int *kernel_start;
int *kernel_stop;
AVFrame *cache;
AVFrame *outpicref;
AVFrame *fft_in;
AVFrame *fft_out;
AVFrame *dst_x;
AVFrame *src_x;
AVFrame *kernel_x;
AVFrame *ifft_in;
AVFrame *ifft_out;
AVFrame *ch_out;
int nb_threads;
int nb_channels;
int nb_consumed_samples;
int pps;
int eof;
int slide;
int new_frame;
int direction;
int hop_size;
int hop_index;
int ihop_size;
int ihop_index;
int input_padding_size;
int input_sample_count;
int output_padding_size;
int output_sample_count;
int frequency_band_count;
float logarithmic_basis;
int frequency_scale;
float minimum_frequency;
float maximum_frequency;
float deviation;
AVFloatDSPContext *fdsp;
} ShowCWTContext;
#define OFFSET(x) offsetof(ShowCWTContext, x)
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
static const AVOption showcwt_options[] = {
{ "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "640x512"}, 0, 0, FLAGS },
{ "s", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "640x512"}, 0, 0, FLAGS },
{ "rate", "set video rate", OFFSET(rate_str), AV_OPT_TYPE_STRING, {.str = "25"}, 0, 0, FLAGS },
{ "r", "set video rate", OFFSET(rate_str), AV_OPT_TYPE_STRING, {.str = "25"}, 0, 0, FLAGS },
{ "scale", "set frequency scale", OFFSET(frequency_scale), AV_OPT_TYPE_INT, {.i64=0}, 0, NB_FSCALE-1, FLAGS, "scale" },
{ "linear", "linear", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_LINEAR}, 0, 0, FLAGS, "scale" },
{ "log2", "logarithmic", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_LOG2}, 0, 0, FLAGS, "scale" },
{ "bark", "bark", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_BARK}, 0, 0, FLAGS, "scale" },
{ "mel", "mel", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_MEL}, 0, 0, FLAGS, "scale" },
{ "erbs", "erbs", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_ERBS}, 0, 0, FLAGS, "scale" },
{ "sqrt", "sqrt", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_SQRT}, 0, 0, FLAGS, "scale" },
{ "cbrt", "cbrt", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_CBRT}, 0, 0, FLAGS, "scale" },
{ "qdrt", "qdrt", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_QDRT}, 0, 0, FLAGS, "scale" },
{ "min", "set minimum frequency", OFFSET(minimum_frequency), AV_OPT_TYPE_FLOAT, {.dbl = 20.}, 1, 2000, FLAGS },
{ "max", "set maximum frequency", OFFSET(maximum_frequency), AV_OPT_TYPE_FLOAT, {.dbl = 20000.}, 0, 192000, FLAGS },
{ "logb", "set logarithmic basis", OFFSET(logarithmic_basis), AV_OPT_TYPE_FLOAT, {.dbl = 0.0001}, 0, 1, FLAGS },
{ "deviation", "set frequency deviation", OFFSET(deviation), AV_OPT_TYPE_FLOAT, {.dbl = 1.}, 0, 10, FLAGS },
{ "pps", "set pixels per second", OFFSET(pps), AV_OPT_TYPE_INT, {.i64 = 64}, 1, 1024, FLAGS },
{ "mode", "set output mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 4, FLAGS, "mode" },
{ "magnitude", "magnitude", 0, AV_OPT_TYPE_CONST,{.i64=0}, 0, 0, FLAGS, "mode" },
{ "phase", "phase", 0, AV_OPT_TYPE_CONST,{.i64=1}, 0, 0, FLAGS, "mode" },
{ "magphase", "magnitude+phase", 0, AV_OPT_TYPE_CONST,{.i64=2}, 0, 0, FLAGS, "mode" },
{ "channel", "color per channel", 0, AV_OPT_TYPE_CONST,{.i64=3}, 0, 0, FLAGS, "mode" },
{ "stereo", "stereo difference", 0, AV_OPT_TYPE_CONST,{.i64=4}, 0, 0, FLAGS, "mode" },
{ "slide", "set slide mode", OFFSET(slide), AV_OPT_TYPE_INT, {.i64=0}, 0, NB_SLIDE-1, FLAGS, "slide" },
{ "replace", "replace", 0, AV_OPT_TYPE_CONST,{.i64=SLIDE_REPLACE},0, 0, FLAGS, "slide" },
{ "scroll", "scroll", 0, AV_OPT_TYPE_CONST,{.i64=SLIDE_SCROLL}, 0, 0, FLAGS, "slide" },
{ "frame", "frame", 0, AV_OPT_TYPE_CONST,{.i64=SLIDE_FRAME}, 0, 0, FLAGS, "slide" },
{ "direction", "set direction mode", OFFSET(direction), AV_OPT_TYPE_INT, {.i64=0}, 0, NB_DIRECTION-1, FLAGS, "direction" },
{ "lr", "left to right", 0, AV_OPT_TYPE_CONST,{.i64=DIRECTION_LR}, 0, 0, FLAGS, "direction" },
{ "rl", "right to left", 0, AV_OPT_TYPE_CONST,{.i64=DIRECTION_RL}, 0, 0, FLAGS, "direction" },
{ "ud", "up to down", 0, AV_OPT_TYPE_CONST,{.i64=DIRECTION_UD}, 0, 0, FLAGS, "direction" },
{ "du", "down to up", 0, AV_OPT_TYPE_CONST,{.i64=DIRECTION_DU}, 0, 0, FLAGS, "direction" },
{ NULL }
};
AVFILTER_DEFINE_CLASS(showcwt);
static av_cold void uninit(AVFilterContext *ctx)
{
ShowCWTContext *s = ctx->priv;
av_freep(&s->frequency_band);
av_freep(&s->kernel_start);
av_freep(&s->kernel_stop);
av_freep(&s->index);
av_frame_free(&s->kernel);
av_frame_free(&s->cache);
av_frame_free(&s->outpicref);
av_frame_free(&s->fft_in);
av_frame_free(&s->fft_out);
av_frame_free(&s->dst_x);
av_frame_free(&s->src_x);
av_frame_free(&s->kernel_x);
av_frame_free(&s->ifft_in);
av_frame_free(&s->ifft_out);
av_frame_free(&s->ch_out);
if (s->fft) {
for (int n = 0; n < s->nb_threads; n++)
av_tx_uninit(&s->fft[n]);
av_freep(&s->fft);
}
if (s->ifft) {
for (int n = 0; n < s->nb_threads; n++)
av_tx_uninit(&s->ifft[n]);
av_freep(&s->ifft);
}
av_freep(&s->fdsp);
}
static int query_formats(AVFilterContext *ctx)
{
AVFilterFormats *formats = NULL;
AVFilterChannelLayouts *layouts = NULL;
AVFilterLink *inlink = ctx->inputs[0];
AVFilterLink *outlink = ctx->outputs[0];
static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVA444P, AV_PIX_FMT_NONE };
int ret;
formats = ff_make_format_list(sample_fmts);
if ((ret = ff_formats_ref(formats, &inlink->outcfg.formats)) < 0)
return ret;
layouts = ff_all_channel_counts();
if ((ret = ff_channel_layouts_ref(layouts, &inlink->outcfg.channel_layouts)) < 0)
return ret;
formats = ff_all_samplerates();
if ((ret = ff_formats_ref(formats, &inlink->outcfg.samplerates)) < 0)
return ret;
formats = ff_make_format_list(pix_fmts);
if ((ret = ff_formats_ref(formats, &outlink->incfg.formats)) < 0)
return ret;
return 0;
}
static void frequency_band(float *frequency_band,
int frequency_band_count,
float frequency_range,
float frequency_offset,
int frequency_scale, float deviation)
{
deviation *= sqrtf(1.f / (4.f * M_PI)); // Heisenberg Gabor Limit
for (int y = 0; y < frequency_band_count; y++) {
float frequency = frequency_range * (1.f - (float)y / frequency_band_count) + frequency_offset;
float frequency_derivative = frequency_range / frequency_band_count;
switch (frequency_scale) {
case FSCALE_LOG2:
frequency = powf(2.f, frequency);
frequency_derivative *= logf(2.f) * frequency;
break;
case FSCALE_BARK:
frequency = 600.f * sinhf(frequency / 6.f);
frequency_derivative *= sqrtf(frequency * frequency + 360000.f) / 6.f;
break;
case FSCALE_MEL:
frequency = 700.f * (powf(10.f, frequency / 2595.f) - 1.f);
frequency_derivative *= (frequency + 700.f) * logf(10.f) / 2595.f;
break;
case FSCALE_ERBS:
frequency = 676170.4f / (47.06538f - expf(frequency * 0.08950404f)) - 14678.49f;
frequency_derivative *= (frequency * frequency + 14990.4 * frequency + 4577850.f) / 160514.f;
break;
case FSCALE_SQRT:
frequency = frequency * frequency;
frequency_derivative *= 2.f * sqrtf(frequency);
break;
case FSCALE_CBRT:
frequency = frequency * frequency * frequency;
frequency_derivative *= 3.f * powf(frequency, 2.f / 3.f);
break;
case FSCALE_QDRT:
frequency = frequency * frequency * frequency * frequency;
frequency_derivative *= 4.f * powf(frequency, 3.f / 4.f);
break;
}
frequency_band[y*2 ] = frequency;
frequency_band[y*2+1] = frequency_derivative * deviation;
}
}
static float remap_log(float value, float log_factor)
{
float sign = (0 < value) - (value < 0);
value = logf(value * sign) * log_factor;
return 1.f - av_clipf(value, 0.f, 1.f);
}
static int run_channel_cwt_prepare(AVFilterContext *ctx, void *arg, int jobnr, int ch)
{
ShowCWTContext *s = ctx->priv;
const int hop_size = s->hop_size;
AVFrame *fin = arg;
AVComplexFloat *cache = (AVComplexFloat *)s->cache->extended_data[ch];
AVComplexFloat *src = (AVComplexFloat *)s->fft_in->extended_data[ch];
AVComplexFloat *dst = (AVComplexFloat *)s->fft_out->extended_data[ch];
if (fin) {
const float *input = (const float *)fin->extended_data[ch];
const int offset = s->input_padding_size - fin->nb_samples;
memmove(cache, cache + fin->nb_samples, sizeof(*cache) * offset);
for (int n = 0; n < fin->nb_samples; n++) {
cache[offset + n].re = input[n];
cache[offset + n].im = 0.f;
}
}
if (fin && s->hop_index + fin->nb_samples < hop_size)
return 0;
memcpy(src, cache, sizeof(*src) * s->input_padding_size);
s->tx_fn(s->fft[jobnr], dst, src, sizeof(*src));
return 0;
}
static int draw(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{
ShowCWTContext *s = ctx->priv;
const ptrdiff_t ylinesize = s->outpicref->linesize[0];
const ptrdiff_t ulinesize = s->outpicref->linesize[1];
const ptrdiff_t vlinesize = s->outpicref->linesize[2];
const ptrdiff_t alinesize = s->outpicref->linesize[3];
const float log_factor = 1.f/logf(s->logarithmic_basis);
const int count = s->frequency_band_count;
const int start = (count * jobnr) / nb_jobs;
const int end = (count * (jobnr+1)) / nb_jobs;
const int ihop_index = s->ihop_index;
const int ihop_size = s->ihop_size;
const int direction = s->direction;
uint8_t *dstY, *dstU, *dstV, *dstA;
const int mode = s->mode;
const int w_1 = s->w - 1;
const int x = s->pos;
float Y, U, V;
for (int y = start; y < end; y++) {
const AVComplexFloat *src = ((const AVComplexFloat *)s->ch_out->extended_data[0]) +
y * ihop_size + ihop_index;
switch (direction) {
case DIRECTION_LR:
case DIRECTION_RL:
dstY = s->outpicref->data[0] + y * ylinesize;
dstU = s->outpicref->data[1] + y * ulinesize;
dstV = s->outpicref->data[2] + y * vlinesize;
dstA = s->outpicref->data[3] ? s->outpicref->data[3] + y * alinesize : NULL;
break;
case DIRECTION_UD:
case DIRECTION_DU:
dstY = s->outpicref->data[0] + x * ylinesize + w_1 - y;
dstU = s->outpicref->data[1] + x * ulinesize + w_1 - y;
dstV = s->outpicref->data[2] + x * vlinesize + w_1 - y;
dstA = s->outpicref->data[3] ? s->outpicref->data[3] + x * alinesize + w_1 - y : NULL;
break;
}
switch (s->slide) {
case SLIDE_REPLACE:
case SLIDE_FRAME:
/* nothing to do here */
break;
case SLIDE_SCROLL:
switch (s->direction) {
case DIRECTION_RL:
memmove(dstY, dstY + 1, w_1);
memmove(dstU, dstU + 1, w_1);
memmove(dstV, dstV + 1, w_1);
if (dstA != NULL)
memmove(dstA, dstA + 1, w_1);
break;
case DIRECTION_LR:
memmove(dstY + 1, dstY, w_1);
memmove(dstU + 1, dstU, w_1);
memmove(dstV + 1, dstV, w_1);
if (dstA != NULL)
memmove(dstA + 1, dstA, w_1);
break;
}
break;
}
if (direction == DIRECTION_RL ||
direction == DIRECTION_LR) {
dstY += x;
dstU += x;
dstV += x;
if (dstA != NULL)
dstA += x;
}
switch (mode) {
case 4:
{
const AVComplexFloat *src2 = ((const AVComplexFloat *)s->ch_out->extended_data[FFMIN(1, s->nb_channels - 1)]) +
y * ihop_size + ihop_index;
float z, u, v;
z = hypotf(src[0].re + src2[0].re, src[0].im + src2[0].im);
u = hypotf(src[0].re, src[0].im);
v = hypotf(src2[0].re, src2[0].im);
z = remap_log(z, log_factor);
u = remap_log(u, log_factor);
v = remap_log(v, log_factor);
Y = z;
U = 0.5f + z * sinf((v - u) * M_PI_2);
V = 0.5f + z * sinf((u - v) * M_PI_2);
dstY[0] = av_clip_uint8(lrintf(Y * 255.f));
dstU[0] = av_clip_uint8(lrintf(U * 255.f));
dstV[0] = av_clip_uint8(lrintf(V * 255.f));
if (dstA)
dstA[0] = dstY[0];
}
break;
case 3:
{
const int nb_channels = s->nb_channels;
const float yf = 1.f / nb_channels;
Y = 0.f;
U = V = 0.5f;
for (int ch = 0; ch < nb_channels; ch++) {
const AVComplexFloat *src = ((const AVComplexFloat *)s->ch_out->extended_data[ch]) +
y * ihop_size + ihop_index;
float z;
z = hypotf(src[0].re, src[0].im);
z = remap_log(z, log_factor);
Y += z * yf;
U += z * yf * sinf(2.f * M_PI * ch * yf);
V += z * yf * cosf(2.f * M_PI * ch * yf);
}
dstY[0] = av_clip_uint8(lrintf(Y * 255.f));
dstU[0] = av_clip_uint8(lrintf(U * 255.f));
dstV[0] = av_clip_uint8(lrintf(V * 255.f));
if (dstA)
dstA[0] = dstY[0];
}
break;
case 2:
Y = hypotf(src[0].re, src[0].im);
Y = remap_log(Y, log_factor);
U = atan2f(src[0].im, src[0].re);
U = 0.5f + 0.5f * U * Y / M_PI;
V = 1.f - U;
dstY[0] = av_clip_uint8(lrintf(Y * 255.f));
dstU[0] = av_clip_uint8(lrintf(U * 255.f));
dstV[0] = av_clip_uint8(lrintf(V * 255.f));
if (dstA)
dstA[0] = dstY[0];
break;
case 1:
Y = atan2f(src[0].im, src[0].re);
Y = 0.5f + 0.5f * Y / M_PI;
dstY[0] = av_clip_uint8(lrintf(Y * 255.f));
if (dstA)
dstA[0] = dstY[0];
break;
case 0:
Y = hypotf(src[0].re, src[0].im);
Y = remap_log(Y, log_factor);
dstY[0] = av_clip_uint8(lrintf(Y * 255.f));
if (dstA)
dstA[0] = dstY[0];
break;
}
}
return 0;
}
static int run_channel_cwt(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{
ShowCWTContext *s = ctx->priv;
const int ch = *(int *)arg;
AVComplexFloat *dst = (AVComplexFloat *)s->fft_out->extended_data[ch];
const int output_sample_count = s->output_sample_count;
const int ihop_size = s->ihop_size;
const int ioffset = (s->output_padding_size - ihop_size) >> 1;
const int count = s->frequency_band_count;
const int start = (count * jobnr) / nb_jobs;
const int end = (count * (jobnr+1)) / nb_jobs;
for (int y = start; y < end; y++) {
AVComplexFloat *isrc = (AVComplexFloat *)s->ifft_in->extended_data[y];
AVComplexFloat *idst = (AVComplexFloat *)s->ifft_out->extended_data[y];
AVComplexFloat *chout = ((AVComplexFloat *)s->ch_out->extended_data[ch]) + y * ihop_size;
AVComplexFloat *dstx = (AVComplexFloat *)s->dst_x->extended_data[jobnr];
AVComplexFloat *srcx = (AVComplexFloat *)s->src_x->extended_data[jobnr];
AVComplexFloat *kernelx = (AVComplexFloat *)s->kernel_x->extended_data[jobnr];
const AVComplexFloat *kernel = (const AVComplexFloat *)s->kernel->extended_data[y];
const unsigned *index = (const unsigned *)s->index;
const int kernel_start = s->kernel_start[y];
const int kernel_stop = s->kernel_stop[y];
const int kernel_range = kernel_stop - kernel_start;
memcpy(kernelx, kernel + kernel_start, sizeof(*kernel) * kernel_range);
memcpy(srcx, dst + kernel_start, sizeof(*dst) * kernel_range);
s->fdsp->vector_fmul((float *)dstx, (const float *)srcx,
(const float *)kernelx, FFALIGN(kernel_range * 2, 16));
memset(isrc, 0, sizeof(*isrc) * output_sample_count);
for (int i = 0; i < kernel_range; i++) {
const unsigned n = index[i + kernel_start];
isrc[n].re += dstx[i].re;
isrc[n].im += dstx[i].im;
}
s->itx_fn(s->ifft[jobnr], idst, isrc, sizeof(*isrc));
memcpy(chout, idst + ioffset, sizeof(*chout) * ihop_size);
}
return 0;
}
static void compute_kernel(AVFilterContext *ctx)
{
ShowCWTContext *s = ctx->priv;
const int size = s->input_sample_count;
const float scale_factor = 1.f/(float)size;
const int output_sample_count = s->output_sample_count;
const int fsize = s->frequency_band_count;
unsigned *index = s->index;
for (int y = 0; y < fsize; y++) {
AVComplexFloat *kernel = (AVComplexFloat *)s->kernel->extended_data[y];
int *kernel_start = s->kernel_start;
int *kernel_stop = s->kernel_stop;
float frequency = s->frequency_band[y*2];
float deviation = 1.f / (s->frequency_band[y*2+1] *
output_sample_count);
for (int n = 0; n < size; n++) {
float ff, f = fabsf(n-frequency);
f = size - fabsf(f - size);
ff = expf(-f*f*deviation) * scale_factor;
kernel[n].re = ff;
kernel[n].im = ff;
}
for (int n = 0; n < size; n++) {
if (kernel[n].re != 0.f) {
kernel_start[y] = n;
break;
}
}
for (int n = 0; n < size; n++) {
if (kernel[size - n - 1].re != 0.f) {
kernel_stop[y] = size - n;
break;
}
}
}
for (int n = 0; n < size; n++)
index[n] = n % output_sample_count;
}
static int config_output(AVFilterLink *outlink)
{
AVFilterContext *ctx = outlink->src;
AVFilterLink *inlink = ctx->inputs[0];
ShowCWTContext *s = ctx->priv;
float maximum_frequency = fminf(s->maximum_frequency, inlink->sample_rate * 0.5f);
float minimum_frequency = s->minimum_frequency;
float scale = 1.f, factor;
int ret;
uninit(ctx);
s->fdsp = avpriv_float_dsp_alloc(0);
if (!s->fdsp)
return AVERROR(ENOMEM);
switch (s->direction) {
case DIRECTION_LR:
case DIRECTION_RL:
s->frequency_band_count = s->h;
break;
case DIRECTION_UD:
case DIRECTION_DU:
s->frequency_band_count = s->w;
break;
}
s->new_frame = 1;
s->nb_threads = FFMIN(s->frequency_band_count, ff_filter_get_nb_threads(ctx));
s->nb_channels = inlink->ch_layout.nb_channels;
s->old_pts = AV_NOPTS_VALUE;
s->eof_pts = AV_NOPTS_VALUE;
s->nb_consumed_samples = 65536;
s->input_sample_count = s->nb_consumed_samples;
s->hop_size = s->nb_consumed_samples >> 1;
s->input_padding_size = 65536;
2023-07-10 10:24:52 +02:00
s->output_padding_size = FFMAX(16, av_rescale(s->input_padding_size, s->pps, inlink->sample_rate));
outlink->w = s->w;
outlink->h = s->h;
outlink->sample_aspect_ratio = (AVRational){1,1};
s->fft_in_size = FFALIGN(s->input_padding_size, av_cpu_max_align());
s->fft_out_size = FFALIGN(s->input_padding_size, av_cpu_max_align());
s->output_sample_count = s->output_padding_size;
s->ifft_in_size = FFALIGN(s->output_padding_size, av_cpu_max_align());
s->ifft_out_size = FFALIGN(s->output_padding_size, av_cpu_max_align());
s->ihop_size = s->output_padding_size >> 1;
s->fft = av_calloc(s->nb_threads, sizeof(*s->fft));
if (!s->fft)
return AVERROR(ENOMEM);
for (int n = 0; n < s->nb_threads; n++) {
ret = av_tx_init(&s->fft[n], &s->tx_fn, AV_TX_FLOAT_FFT, 0, s->input_padding_size, &scale, 0);
if (ret < 0)
return ret;
}
s->ifft = av_calloc(s->nb_threads, sizeof(*s->ifft));
if (!s->ifft)
return AVERROR(ENOMEM);
for (int n = 0; n < s->nb_threads; n++) {
ret = av_tx_init(&s->ifft[n], &s->itx_fn, AV_TX_FLOAT_FFT, 1, s->output_padding_size, &scale, 0);
if (ret < 0)
return ret;
}
s->frequency_band = av_calloc(s->frequency_band_count,
sizeof(*s->frequency_band) * 2);
s->outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h);
s->fft_in = ff_get_audio_buffer(inlink, s->fft_in_size * 2);
s->fft_out = ff_get_audio_buffer(inlink, s->fft_out_size * 2);
s->dst_x = av_frame_alloc();
s->src_x = av_frame_alloc();
s->kernel_x = av_frame_alloc();
s->cache = ff_get_audio_buffer(inlink, s->fft_in_size * 2);
s->ch_out = ff_get_audio_buffer(inlink, s->frequency_band_count * 2 * s->ihop_size);
s->ifft_in = av_frame_alloc();
s->ifft_out = av_frame_alloc();
s->kernel = av_frame_alloc();
s->index = av_calloc(s->input_padding_size, sizeof(*s->index));
s->kernel_start = av_calloc(s->frequency_band_count, sizeof(*s->kernel_start));
s->kernel_stop = av_calloc(s->frequency_band_count, sizeof(*s->kernel_stop));
if (!s->outpicref || !s->fft_in || !s->fft_out || !s->src_x || !s->dst_x || !s->kernel_x ||
!s->ifft_in || !s->ifft_out || !s->kernel_start || !s->kernel_stop ||
!s->frequency_band || !s->kernel || !s->cache || !s->index)
return AVERROR(ENOMEM);
s->ifft_in->format = inlink->format;
s->ifft_in->nb_samples = s->ifft_in_size * 2;
s->ifft_in->ch_layout.nb_channels = s->frequency_band_count;
ret = av_frame_get_buffer(s->ifft_in, 0);
if (ret < 0)
return ret;
s->ifft_out->format = inlink->format;
s->ifft_out->nb_samples = s->ifft_out_size * 2;
s->ifft_out->ch_layout.nb_channels = s->frequency_band_count;
ret = av_frame_get_buffer(s->ifft_out, 0);
if (ret < 0)
return ret;
s->kernel->format = inlink->format;
s->kernel->nb_samples = s->input_padding_size * 2;
s->kernel->ch_layout.nb_channels = s->frequency_band_count;
ret = av_frame_get_buffer(s->kernel, 0);
if (ret < 0)
return ret;
s->src_x->format = inlink->format;
s->src_x->nb_samples = s->fft_out_size * 2;
s->src_x->ch_layout.nb_channels = s->nb_threads;
ret = av_frame_get_buffer(s->src_x, 0);
if (ret < 0)
return ret;
s->dst_x->format = inlink->format;
s->dst_x->nb_samples = s->fft_out_size * 2;
s->dst_x->ch_layout.nb_channels = s->nb_threads;
ret = av_frame_get_buffer(s->dst_x, 0);
if (ret < 0)
return ret;
s->kernel_x->format = inlink->format;
s->kernel_x->nb_samples = s->fft_out_size * 2;
s->kernel_x->ch_layout.nb_channels = s->nb_threads;
ret = av_frame_get_buffer(s->kernel_x, 0);
if (ret < 0)
return ret;
s->outpicref->sample_aspect_ratio = (AVRational){1,1};
for (int y = 0; y < outlink->h; y++) {
memset(s->outpicref->data[0] + y * s->outpicref->linesize[0], 0, outlink->w);
memset(s->outpicref->data[1] + y * s->outpicref->linesize[1], 128, outlink->w);
memset(s->outpicref->data[2] + y * s->outpicref->linesize[2], 128, outlink->w);
if (s->outpicref->data[3])
memset(s->outpicref->data[3] + y * s->outpicref->linesize[3], 0, outlink->w);
}
s->outpicref->color_range = AVCOL_RANGE_JPEG;
factor = s->nb_consumed_samples / (float)inlink->sample_rate;
minimum_frequency *= factor;
maximum_frequency *= factor;
switch (s->frequency_scale) {
case FSCALE_LOG2:
minimum_frequency = logf(minimum_frequency) / logf(2.f);
maximum_frequency = logf(maximum_frequency) / logf(2.f);
break;
case FSCALE_BARK:
minimum_frequency = 6.f * asinhf(minimum_frequency / 600.f);
maximum_frequency = 6.f * asinhf(maximum_frequency / 600.f);
break;
case FSCALE_MEL:
minimum_frequency = 2595.f * log10f(1.f + minimum_frequency / 700.f);
maximum_frequency = 2595.f * log10f(1.f + maximum_frequency / 700.f);
break;
case FSCALE_ERBS:
minimum_frequency = 11.17268f * log(1.f + (46.06538f * minimum_frequency) / (minimum_frequency + 14678.49f));
maximum_frequency = 11.17268f * log(1.f + (46.06538f * maximum_frequency) / (maximum_frequency + 14678.49f));
break;
case FSCALE_SQRT:
minimum_frequency = sqrtf(minimum_frequency);
maximum_frequency = sqrtf(maximum_frequency);
break;
case FSCALE_CBRT:
minimum_frequency = cbrtf(minimum_frequency);
maximum_frequency = cbrtf(maximum_frequency);
break;
case FSCALE_QDRT:
minimum_frequency = powf(minimum_frequency, 0.25f);
maximum_frequency = powf(maximum_frequency, 0.25f);
break;
}
frequency_band(s->frequency_band,
s->frequency_band_count, maximum_frequency - minimum_frequency,
minimum_frequency, s->frequency_scale, s->deviation);
av_log(ctx, AV_LOG_DEBUG, "input_sample_count: %d\n", s->input_sample_count);
av_log(ctx, AV_LOG_DEBUG, "output_sample_count: %d\n", s->output_sample_count);
switch (s->direction) {
case DIRECTION_LR:
s->pos = 0;
break;
case DIRECTION_RL:
s->pos = s->w - 1;
break;
case DIRECTION_UD:
s->pos = 0;
break;
case DIRECTION_DU:
s->pos = s->h - 1;
break;
}
s->auto_frame_rate = av_make_q(inlink->sample_rate, s->hop_size);
if (strcmp(s->rate_str, "auto")) {
ret = av_parse_video_rate(&s->frame_rate, s->rate_str);
} else {
s->frame_rate = s->auto_frame_rate;
}
outlink->frame_rate = s->frame_rate;
outlink->time_base = av_inv_q(outlink->frame_rate);
compute_kernel(ctx);
return 0;
}
static int output_frame(AVFilterContext *ctx)
{
AVFilterLink *outlink = ctx->outputs[0];
AVFilterLink *inlink = ctx->inputs[0];
ShowCWTContext *s = ctx->priv;
const int nb_planes = 3 + (s->outpicref->data[3] != NULL);
int ret;
switch (s->slide) {
case SLIDE_SCROLL:
switch (s->direction) {
case DIRECTION_UD:
for (int p = 0; p < nb_planes; p++) {
ptrdiff_t linesize = s->outpicref->linesize[p];
for (int y = s->h - 1; y > 0; y--) {
uint8_t *dst = s->outpicref->data[p] + y * linesize;
memmove(dst, dst - linesize, s->w);
}
}
break;
case DIRECTION_DU:
for (int p = 0; p < nb_planes; p++) {
ptrdiff_t linesize = s->outpicref->linesize[p];
for (int y = 0; y < s->h - 1; y++) {
uint8_t *dst = s->outpicref->data[p] + y * linesize;
memmove(dst, dst + linesize, s->w);
}
}
break;
}
break;
}
ff_filter_execute(ctx, draw, NULL, NULL, s->nb_threads);
switch (s->slide) {
case SLIDE_REPLACE:
case SLIDE_FRAME:
switch (s->direction) {
case DIRECTION_LR:
s->pos++;
if (s->pos >= s->w) {
s->pos = 0;
s->new_frame = 1;
}
break;
case DIRECTION_RL:
s->pos--;
if (s->pos < 0) {
s->pos = s->w - 1;
s->new_frame = 1;
}
break;
case DIRECTION_UD:
s->pos++;
if (s->pos >= s->h) {
s->pos = 0;
s->new_frame = 1;
}
break;
case DIRECTION_DU:
s->pos--;
if (s->pos < 0) {
s->pos = s->h - 1;
s->new_frame = 1;
}
break;
}
break;
case SLIDE_SCROLL:
switch (s->direction) {
case DIRECTION_UD:
case DIRECTION_LR:
s->pos = 0;
break;
case DIRECTION_RL:
s->pos = s->w - 1;
break;
case DIRECTION_DU:
s->pos = s->h - 1;
break;
}
break;
}
if (s->slide == SLIDE_FRAME && s->eof) {
switch (s->direction) {
case DIRECTION_LR:
for (int p = 0; p < nb_planes; p++) {
ptrdiff_t linesize = s->outpicref->linesize[p];
const int size = s->w - s->pos;
const int fill = p > 0 && p < 3 ? 128 : 0;
const int x = s->pos;
for (int y = 0; y < s->h; y++) {
uint8_t *dst = s->outpicref->data[p] + y * linesize + x;
memset(dst, fill, size);
}
}
break;
case DIRECTION_RL:
for (int p = 0; p < nb_planes; p++) {
ptrdiff_t linesize = s->outpicref->linesize[p];
const int size = s->w - s->pos;
const int fill = p > 0 && p < 3 ? 128 : 0;
for (int y = 0; y < s->h; y++) {
uint8_t *dst = s->outpicref->data[p] + y * linesize;
memset(dst, fill, size);
}
}
break;
case DIRECTION_UD:
for (int p = 0; p < nb_planes; p++) {
ptrdiff_t linesize = s->outpicref->linesize[p];
const int fill = p > 0 && p < 3 ? 128 : 0;
for (int y = s->pos; y < s->h; y++) {
uint8_t *dst = s->outpicref->data[p] + y * linesize;
memset(dst, fill, s->w);
}
}
break;
case DIRECTION_DU:
for (int p = 0; p < nb_planes; p++) {
ptrdiff_t linesize = s->outpicref->linesize[p];
const int fill = p > 0 && p < 3 ? 128 : 0;
for (int y = s->h - s->pos; y >= 0; y--) {
uint8_t *dst = s->outpicref->data[p] + y * linesize;
memset(dst, fill, s->w);
}
}
break;
}
}
s->new_frame = s->slide == SLIDE_FRAME && (s->new_frame || s->eof);
if (s->slide != SLIDE_FRAME || s->new_frame == 1) {
int64_t pts_offset = s->new_frame ? 0LL : av_rescale(s->ihop_index, s->hop_size, s->ihop_size);
pts_offset = av_rescale_q(pts_offset - 16384LL, av_make_q(1, inlink->sample_rate), inlink->time_base);
s->outpicref->pts = av_rescale_q(s->in_pts + pts_offset, inlink->time_base, outlink->time_base);
s->outpicref->duration = 1;
}
s->ihop_index++;
if (s->ihop_index >= s->ihop_size)
s->ihop_index = 0;
if (s->slide == SLIDE_FRAME && s->new_frame == 0)
return 1;
if (s->old_pts < s->outpicref->pts) {
AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
if (!out)
return AVERROR(ENOMEM);
ret = av_frame_copy_props(out, s->outpicref);
if (ret < 0)
goto fail;
ret = av_frame_copy(out, s->outpicref);
if (ret < 0)
goto fail;
s->old_pts = s->outpicref->pts;
s->new_frame = 0;
ret = ff_filter_frame(outlink, out);
if (ret <= 0)
return ret;
fail:
av_frame_free(&out);
return ret;
}
return 1;
}
static int run_channels_cwt_prepare(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{
ShowCWTContext *s = ctx->priv;
const int count = s->nb_channels;
const int start = (count * jobnr) / nb_jobs;
const int end = (count * (jobnr+1)) / nb_jobs;
for (int ch = start; ch < end; ch++)
run_channel_cwt_prepare(ctx, arg, jobnr, ch);
return 0;
}
static int activate(AVFilterContext *ctx)
{
AVFilterLink *inlink = ctx->inputs[0];
AVFilterLink *outlink = ctx->outputs[0];
ShowCWTContext *s = ctx->priv;
int ret = 0, status;
int64_t pts;
FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
if (s->outpicref) {
AVFrame *fin = NULL;
if (s->ihop_index == 0) {
if (!s->eof) {
ret = ff_inlink_consume_samples(inlink, 1, s->hop_size - s->hop_index, &fin);
if (ret < 0)
return ret;
}
if (ret > 0 || s->eof) {
ff_filter_execute(ctx, run_channels_cwt_prepare, fin, NULL,
FFMIN(s->nb_threads, s->nb_channels));
if (fin) {
if ((s->hop_index == 0 && s->slide != SLIDE_FRAME) || s->new_frame) {
s->in_pts = fin->pts;
s->new_frame = 0;
}
s->hop_index += fin->nb_samples;
av_frame_free(&fin);
} else {
s->hop_index = s->hop_size;
}
}
}
if (s->hop_index >= s->hop_size || s->ihop_index > 0) {
if (s->hop_index)
s->hop_index = 0;
for (int ch = 0; ch < s->nb_channels && s->ihop_index == 0; ch++) {
ff_filter_execute(ctx, run_channel_cwt, (void *)&ch, NULL,
s->nb_threads);
}
ret = output_frame(ctx);
if (ret != 1)
return ret;
}
}
if (s->eof && s->eof_pts != AV_NOPTS_VALUE &&
(s->old_pts + 1 >= s->eof_pts || (s->slide == SLIDE_FRAME))) {
if (s->slide == SLIDE_FRAME)
ret = output_frame(ctx);
ff_outlink_set_status(outlink, AVERROR_EOF, s->eof_pts);
return ret;
}
if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
if (status == AVERROR_EOF) {
s->eof = 1;
ff_filter_set_ready(ctx, 10);
s->eof_pts = av_rescale_q(pts, inlink->time_base, outlink->time_base);
return 0;
}
}
if (ff_inlink_queued_samples(inlink) > 0 || s->ihop_index ||
s->hop_index >= s->hop_size || s->eof) {
ff_filter_set_ready(ctx, 10);
return 0;
}
if (ff_outlink_frame_wanted(outlink)) {
ff_inlink_request_frame(inlink);
return 0;
}
return FFERROR_NOT_READY;
}
static const AVFilterPad showcwt_inputs[] = {
{
.name = "default",
.type = AVMEDIA_TYPE_AUDIO,
},
};
static const AVFilterPad showcwt_outputs[] = {
{
.name = "default",
.type = AVMEDIA_TYPE_VIDEO,
.config_props = config_output,
},
};
const AVFilter ff_avf_showcwt = {
.name = "showcwt",
.description = NULL_IF_CONFIG_SMALL("Convert input audio to a CWT (Continuous Wavelet Transform) spectrum video output."),
.uninit = uninit,
.priv_size = sizeof(ShowCWTContext),
FILTER_INPUTS(showcwt_inputs),
FILTER_OUTPUTS(showcwt_outputs),
FILTER_QUERY_FUNC(query_formats),
.activate = activate,
.priv_class = &showcwt_class,
.flags = AVFILTER_FLAG_SLICE_THREADS,
};