You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-10 06:10:52 +02:00
avfilter/x86/f_ebur128: implement AVX peak calculation
Stereo only, for simplicity. Slightly faster than the C code.
This commit is contained in:
@@ -502,6 +502,9 @@ static int config_audio_output(AVFilterLink *outlink)
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
#if ARCH_X86
|
||||
ff_ebur128_init_x86(&ebur128->dsp, nb_channels);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -581,11 +584,6 @@ static av_cold int init(AVFilterContext *ctx)
|
||||
|
||||
ebur128->dsp.filter_channels = ff_ebur128_filter_channels_c;
|
||||
ebur128->dsp.find_peak = ff_ebur128_find_peak_c;
|
||||
|
||||
#if ARCH_X86
|
||||
ff_ebur128_init_x86(&ebur128->dsp);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -55,7 +55,7 @@ static_assert(offsetof(EBUR128DSPContext, pre) == 0, "struct l
|
||||
static_assert(offsetof(EBUR128DSPContext, rlb) == 5 * sizeof(double), "struct layout mismatch");
|
||||
static_assert(offsetof(EBUR128DSPContext, y) == 10 * sizeof(double), "struct layout mismatch");
|
||||
|
||||
void ff_ebur128_init_x86(EBUR128DSPContext *dsp);
|
||||
void ff_ebur128_init_x86(EBUR128DSPContext *dsp, int nb_channels);
|
||||
|
||||
void ff_ebur128_filter_channels_c(const EBUR128DSPContext *, const double *,
|
||||
double *, double *, double *, double *, int);
|
||||
|
@@ -37,6 +37,10 @@ struc DSP
|
||||
.z resq 1
|
||||
endstruc
|
||||
|
||||
SECTION_RODATA
|
||||
|
||||
abs_mask: dq 0x7FFFFFFFFFFFFFFF
|
||||
|
||||
SECTION .text
|
||||
|
||||
%macro MOVNQ 3 ; num, dst, src
|
||||
@@ -140,4 +144,20 @@ cglobal ebur128_filter_channels, 7, 9, 14, dsp, samples, cache400, cache3000, su
|
||||
jnz .loop
|
||||
RET
|
||||
|
||||
cglobal ebur128_find_peak_2ch, 4, 5, 3, ch_peaks, channels, samples, nb_samples
|
||||
vpbroadcastq m2, [abs_mask]
|
||||
movupd m0, [ch_peaksq]
|
||||
.loop:
|
||||
movupd m1, [samplesq]
|
||||
add samplesq, mmsize
|
||||
pand m1, m2
|
||||
maxpd m0, m1
|
||||
dec nb_samplesd
|
||||
jg .loop
|
||||
movupd [ch_peaksq], m0
|
||||
shufpd m1, m0, m0, 1
|
||||
maxpd m0, m1
|
||||
movq rax, m0
|
||||
RET
|
||||
|
||||
%endif ; ARCH_X86_64
|
||||
|
@@ -26,10 +26,15 @@
|
||||
void ff_ebur128_filter_channels_avx(const EBUR128DSPContext *, const double *,
|
||||
double *, double *, double *, double *, int);
|
||||
|
||||
av_cold void ff_ebur128_init_x86(EBUR128DSPContext *dsp)
|
||||
double ff_ebur128_find_peak_2ch_avx(double *, int, const double *, int);
|
||||
|
||||
av_cold void ff_ebur128_init_x86(EBUR128DSPContext *dsp, int nb_channels)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (ARCH_X86_64 && EXTERNAL_AVX(cpu_flags))
|
||||
if (ARCH_X86_64 && EXTERNAL_AVX(cpu_flags)) {
|
||||
dsp->filter_channels = ff_ebur128_filter_channels_avx;
|
||||
if (nb_channels == 2)
|
||||
dsp->find_peak = ff_ebur128_find_peak_2ch_avx;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user