1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-10 06:10:52 +02:00

avfilter/x86/f_ebur128: implement AVX peak calculation

Stereo only, for simplicity. Slightly faster than the C code.
This commit is contained in:
Niklas Haas
2025-06-21 17:23:16 +02:00
parent 3b26b782ee
commit daef348574
4 changed files with 31 additions and 8 deletions

View File

@@ -502,6 +502,9 @@ static int config_audio_output(AVFilterLink *outlink)
return AVERROR(ENOMEM);
}
#if ARCH_X86
ff_ebur128_init_x86(&ebur128->dsp, nb_channels);
#endif
return 0;
}
@@ -581,11 +584,6 @@ static av_cold int init(AVFilterContext *ctx)
ebur128->dsp.filter_channels = ff_ebur128_filter_channels_c;
ebur128->dsp.find_peak = ff_ebur128_find_peak_c;
#if ARCH_X86
ff_ebur128_init_x86(&ebur128->dsp);
#endif
return 0;
}

View File

@@ -55,7 +55,7 @@ static_assert(offsetof(EBUR128DSPContext, pre) == 0, "struct l
static_assert(offsetof(EBUR128DSPContext, rlb) == 5 * sizeof(double), "struct layout mismatch");
static_assert(offsetof(EBUR128DSPContext, y) == 10 * sizeof(double), "struct layout mismatch");
void ff_ebur128_init_x86(EBUR128DSPContext *dsp);
void ff_ebur128_init_x86(EBUR128DSPContext *dsp, int nb_channels);
void ff_ebur128_filter_channels_c(const EBUR128DSPContext *, const double *,
double *, double *, double *, double *, int);

View File

@@ -37,6 +37,10 @@ struc DSP
.z resq 1
endstruc
SECTION_RODATA
abs_mask: dq 0x7FFFFFFFFFFFFFFF
SECTION .text
%macro MOVNQ 3 ; num, dst, src
@@ -140,4 +144,20 @@ cglobal ebur128_filter_channels, 7, 9, 14, dsp, samples, cache400, cache3000, su
jnz .loop
RET
cglobal ebur128_find_peak_2ch, 4, 5, 3, ch_peaks, channels, samples, nb_samples
vpbroadcastq m2, [abs_mask]
movupd m0, [ch_peaksq]
.loop:
movupd m1, [samplesq]
add samplesq, mmsize
pand m1, m2
maxpd m0, m1
dec nb_samplesd
jg .loop
movupd [ch_peaksq], m0
shufpd m1, m0, m0, 1
maxpd m0, m1
movq rax, m0
RET
%endif ; ARCH_X86_64

View File

@@ -26,10 +26,15 @@
void ff_ebur128_filter_channels_avx(const EBUR128DSPContext *, const double *,
double *, double *, double *, double *, int);
av_cold void ff_ebur128_init_x86(EBUR128DSPContext *dsp)
double ff_ebur128_find_peak_2ch_avx(double *, int, const double *, int);
av_cold void ff_ebur128_init_x86(EBUR128DSPContext *dsp, int nb_channels)
{
int cpu_flags = av_get_cpu_flags();
if (ARCH_X86_64 && EXTERNAL_AVX(cpu_flags))
if (ARCH_X86_64 && EXTERNAL_AVX(cpu_flags)) {
dsp->filter_channels = ff_ebur128_filter_channels_avx;
if (nb_channels == 2)
dsp->find_peak = ff_ebur128_find_peak_2ch_avx;
}
}