You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-04 22:03:09 +02:00
avfilter/x86/f_ebur128: add x86 AVX implementation
Processes two channels in parallel, using 128-bit XMM registers. In theory, we could go up to YMM registers to process 4 channels, but this is not a gain except for relatively high channel counts (e.g. 7.1), and also complicates the sample load/store operations considerably. I decided to only add an AVX variant, since the C code is not substantially slower enough to justify a separate function just for ancient CPUs.
This commit is contained in:
@ -579,6 +579,11 @@ static av_cold int init(AVFilterContext *ctx)
|
|||||||
/* summary */
|
/* summary */
|
||||||
av_log(ctx, AV_LOG_VERBOSE, "EBU +%d scale\n", ebur128->meter);
|
av_log(ctx, AV_LOG_VERBOSE, "EBU +%d scale\n", ebur128->meter);
|
||||||
|
|
||||||
|
ebur128->dsp.filter_channels = ff_ebur128_filter_channels_c;
|
||||||
|
#if ARCH_X86
|
||||||
|
ff_ebur128_init_x86(&ebur128->dsp);
|
||||||
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -692,11 +697,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
|
|||||||
MOVE_TO_NEXT_CACHED_ENTRY(400);
|
MOVE_TO_NEXT_CACHED_ENTRY(400);
|
||||||
MOVE_TO_NEXT_CACHED_ENTRY(3000);
|
MOVE_TO_NEXT_CACHED_ENTRY(3000);
|
||||||
|
|
||||||
ff_ebur128_filter_channels_c(dsp, &samples[idx_insample * nb_channels],
|
dsp->filter_channels(dsp, &samples[idx_insample * nb_channels],
|
||||||
&ebur128->i400.cache[bin_id_400 * nb_channels],
|
&ebur128->i400.cache[bin_id_400 * nb_channels],
|
||||||
&ebur128->i3000.cache[bin_id_3000 * nb_channels],
|
&ebur128->i3000.cache[bin_id_3000 * nb_channels],
|
||||||
ebur128->i400.sum, ebur128->i3000.sum,
|
ebur128->i400.sum, ebur128->i3000.sum,
|
||||||
nb_channels);
|
nb_channels);
|
||||||
|
|
||||||
#define FIND_PEAK(global, sp, ptype) do { \
|
#define FIND_PEAK(global, sp, ptype) do { \
|
||||||
int ch; \
|
int ch; \
|
||||||
|
@ -22,6 +22,9 @@
|
|||||||
#ifndef AVFILTER_F_EBUR128_H
|
#ifndef AVFILTER_F_EBUR128_H
|
||||||
#define AVFILTER_F_EBUR128_H
|
#define AVFILTER_F_EBUR128_H
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
typedef struct EBUR128Biquad {
|
typedef struct EBUR128Biquad {
|
||||||
double b0, b1, b2;
|
double b0, b1, b2;
|
||||||
double a1, a2;
|
double a1, a2;
|
||||||
@ -35,8 +38,21 @@ typedef struct EBUR128DSPContext {
|
|||||||
/* Cache of 3 samples for each channel */
|
/* Cache of 3 samples for each channel */
|
||||||
double *y; /* after pre-filter */
|
double *y; /* after pre-filter */
|
||||||
double *z; /* after RLB-filter */
|
double *z; /* after RLB-filter */
|
||||||
|
|
||||||
|
/* DSP functions */
|
||||||
|
void (*filter_channels)(const struct EBUR128DSPContext *dsp,
|
||||||
|
const double *samples,
|
||||||
|
double *cache_400, double *cache_3000,
|
||||||
|
double *sum_400, double *sum_3000,
|
||||||
|
int nb_channels);
|
||||||
} EBUR128DSPContext;
|
} EBUR128DSPContext;
|
||||||
|
|
||||||
|
static_assert(offsetof(EBUR128DSPContext, pre) == 0, "struct layout mismatch");
|
||||||
|
static_assert(offsetof(EBUR128DSPContext, rlb) == 5 * sizeof(double), "struct layout mismatch");
|
||||||
|
static_assert(offsetof(EBUR128DSPContext, y) == 10 * sizeof(double), "struct layout mismatch");
|
||||||
|
|
||||||
|
void ff_ebur128_init_x86(EBUR128DSPContext *dsp);
|
||||||
|
|
||||||
void ff_ebur128_filter_channels_c(const EBUR128DSPContext *, const double *,
|
void ff_ebur128_filter_channels_c(const EBUR128DSPContext *, const double *,
|
||||||
double *, double *, double *, double *, int);
|
double *, double *, double *, double *, int);
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
|
|||||||
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
|
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
|
||||||
OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
|
OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
|
||||||
OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution_init.o
|
OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution_init.o
|
||||||
|
OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128_init.o
|
||||||
OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o
|
OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o
|
||||||
OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o
|
OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o
|
||||||
OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur_init.o
|
OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur_init.o
|
||||||
@ -52,6 +53,7 @@ X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
|
|||||||
X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
|
X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
|
||||||
X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
|
X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
|
||||||
X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o
|
X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o
|
||||||
|
X86ASM-OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128.o
|
||||||
X86ASM-OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o
|
X86ASM-OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o
|
||||||
X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER) += x86/vf_framerate.o
|
X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER) += x86/vf_framerate.o
|
||||||
X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o
|
X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o
|
||||||
|
143
libavfilter/x86/f_ebur128.asm
Normal file
143
libavfilter/x86/f_ebur128.asm
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
;*****************************************************************************
|
||||||
|
;* x86-optimized functions for ebur128 filter
|
||||||
|
;*
|
||||||
|
;* Copyright (C) 2025 Niklas Haas
|
||||||
|
;*
|
||||||
|
;* This file is part of FFmpeg.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
;* modify it under the terms of the GNU Lesser General Public
|
||||||
|
;* License as published by the Free Software Foundation; either
|
||||||
|
;* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;* Lesser General Public License for more details.
|
||||||
|
;*
|
||||||
|
;* You should have received a copy of the GNU Lesser General Public
|
||||||
|
;* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;*****************************************************************************
|
||||||
|
|
||||||
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
|
struc Biquad
|
||||||
|
.b0 resq 1
|
||||||
|
.b1 resq 1
|
||||||
|
.b2 resq 1
|
||||||
|
.a1 resq 1
|
||||||
|
.a2 resq 1
|
||||||
|
endstruc
|
||||||
|
|
||||||
|
struc DSP
|
||||||
|
.pre resq 5
|
||||||
|
.rlb resq 5
|
||||||
|
.y resq 1
|
||||||
|
.z resq 1
|
||||||
|
endstruc
|
||||||
|
|
||||||
|
SECTION .text
|
||||||
|
|
||||||
|
%macro MOVNQ 3 ; num, dst, src
|
||||||
|
%if %1 == 1
|
||||||
|
movsd %2, %3
|
||||||
|
%else
|
||||||
|
movupd %2, %3
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FILTER 11 ; y0, y1, y2, x, b0, b1, b2, a1, a2, samples, num_channels
|
||||||
|
; Y[0] := b0 * X + Y1
|
||||||
|
; Y[1] := b1 * X + Y2 - a1 * Y[0]
|
||||||
|
; Y[2] := b2 * X - a2 * Y[0]
|
||||||
|
movsd %1, [%10 + 8]
|
||||||
|
movsd %3, [%10 + 16]
|
||||||
|
%if %11 > 1
|
||||||
|
movhpd %1, [%10 + 32]
|
||||||
|
movhpd %3, [%10 + 40]
|
||||||
|
%endif
|
||||||
|
|
||||||
|
mulpd %2, %5, %4
|
||||||
|
addpd %1, %2
|
||||||
|
|
||||||
|
mulpd %2, %8, %1
|
||||||
|
subpd %3, %2
|
||||||
|
mulpd %2, %6, %4
|
||||||
|
addpd %2, %3
|
||||||
|
|
||||||
|
mulpd %3, %7, %4
|
||||||
|
mulpd %4, %9, %1
|
||||||
|
subpd %3, %4
|
||||||
|
|
||||||
|
movsd [%10 + 0], %1
|
||||||
|
movsd [%10 + 8], %2
|
||||||
|
movsd [%10 + 16], %3
|
||||||
|
%if %11 > 1
|
||||||
|
movhpd [%10 + 24], %1
|
||||||
|
movhpd [%10 + 32], %2
|
||||||
|
movhpd [%10 + 40], %3
|
||||||
|
%endif
|
||||||
|
add %10, 24 * %11
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro filter_channels 1 ; num_channels
|
||||||
|
MOVNQ %1, m3, [samplesq]
|
||||||
|
add samplesq, 8 * %1
|
||||||
|
|
||||||
|
FILTER m0, m1, m2, m3, m4, m5, m6, m7, m8, r7q, %1
|
||||||
|
FILTER m3, m1, m2, m0, m9, m10, m11, m12, m13, r8q, %1
|
||||||
|
|
||||||
|
; update sum and cache
|
||||||
|
mulpd m3, m3
|
||||||
|
subpd m0, m3, [cache400q]
|
||||||
|
subpd m1, m3, [cache3000q]
|
||||||
|
MOVNQ %1, [cache400q], m3
|
||||||
|
MOVNQ %1, [cache3000q], m3
|
||||||
|
add cache400q, 8 * %1
|
||||||
|
add cache3000q, 8 * %1
|
||||||
|
addpd m0, [sum400q]
|
||||||
|
addpd m1, [sum3000q]
|
||||||
|
MOVNQ %1, [sum400q], m0
|
||||||
|
MOVNQ %1, [sum3000q], m1
|
||||||
|
add sum400q, 8 * %1
|
||||||
|
add sum3000q, 8 * %1
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%if ARCH_X86_64
|
||||||
|
|
||||||
|
INIT_XMM avx
|
||||||
|
cglobal ebur128_filter_channels, 7, 9, 14, dsp, samples, cache400, cache3000, sum400, sum3000, channels
|
||||||
|
movddup m4, [dspq + DSP.pre + Biquad.b0]
|
||||||
|
movddup m5, [dspq + DSP.pre + Biquad.b1]
|
||||||
|
movddup m6, [dspq + DSP.pre + Biquad.b2]
|
||||||
|
movddup m7, [dspq + DSP.pre + Biquad.a1]
|
||||||
|
movddup m8, [dspq + DSP.pre + Biquad.a2]
|
||||||
|
|
||||||
|
movddup m9, [dspq + DSP.rlb + Biquad.b0]
|
||||||
|
movddup m10, [dspq + DSP.rlb + Biquad.b1]
|
||||||
|
movddup m11, [dspq + DSP.rlb + Biquad.b2]
|
||||||
|
movddup m12, [dspq + DSP.rlb + Biquad.a1]
|
||||||
|
movddup m13, [dspq + DSP.rlb + Biquad.a2]
|
||||||
|
|
||||||
|
mov r7q, [dspq + DSP.y]
|
||||||
|
mov r8q, [dspq + DSP.z]
|
||||||
|
|
||||||
|
; handle odd channel count
|
||||||
|
test channelsd, 1
|
||||||
|
jnz .tail
|
||||||
|
|
||||||
|
.loop:
|
||||||
|
filter_channels 2
|
||||||
|
sub channelsd, 2
|
||||||
|
jg .loop
|
||||||
|
RET
|
||||||
|
|
||||||
|
.tail:
|
||||||
|
filter_channels 1
|
||||||
|
dec channelsd
|
||||||
|
test channelsd, channelsd
|
||||||
|
jnz .loop
|
||||||
|
RET
|
||||||
|
|
||||||
|
%endif ; ARCH_X86_64
|
35
libavfilter/x86/f_ebur128_init.c
Normal file
35
libavfilter/x86/f_ebur128_init.c
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018 Paul B Mahol
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/x86/cpu.h"
|
||||||
|
#include "libavfilter/f_ebur128.h"
|
||||||
|
|
||||||
|
void ff_ebur128_filter_channels_avx(const EBUR128DSPContext *, const double *,
|
||||||
|
double *, double *, double *, double *, int);
|
||||||
|
|
||||||
|
av_cold void ff_ebur128_init_x86(EBUR128DSPContext *dsp)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (ARCH_X86_64 && EXTERNAL_AVX(cpu_flags))
|
||||||
|
dsp->filter_channels = ff_ebur128_filter_channels_avx;
|
||||||
|
}
|
Reference in New Issue
Block a user