mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-03-28 12:32:17 +02:00
dcadsp: split lfe_dir cases
The x86 runs short on registers because numerous elements are not static. In addition, splitting them allows more optimized code, at least for x86. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
82ae8a44e6
commit
45854df9a5
@ -47,16 +47,43 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
|
|||||||
float out[32], const float in[32],
|
float out[32], const float in[32],
|
||||||
float scale);
|
float scale);
|
||||||
|
|
||||||
|
static void lfe_fir0_vfp(float *out, const float *in, const float *coefs,
|
||||||
|
float scale)
|
||||||
|
{
|
||||||
|
ff_dca_lfe_fir_vfp(out, in, coefs, 32, scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void lfe_fir1_vfp(float *out, const float *in, const float *coefs,
|
||||||
|
float scale)
|
||||||
|
{
|
||||||
|
ff_dca_lfe_fir_vfp(out, in, coefs, 64, scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void lfe_fir0_neon(float *out, const float *in, const float *coefs,
|
||||||
|
float scale)
|
||||||
|
{
|
||||||
|
ff_dca_lfe_fir_neon(out, in, coefs, 32, scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void lfe_fir1_neon(float *out, const float *in, const float *coefs,
|
||||||
|
float scale)
|
||||||
|
{
|
||||||
|
ff_dca_lfe_fir_neon(out, in, coefs, 64, scale);
|
||||||
|
}
|
||||||
|
|
||||||
av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
|
av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
|
||||||
{
|
{
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) {
|
if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) {
|
||||||
s->lfe_fir = ff_dca_lfe_fir_vfp;
|
s->lfe_fir[0] = lfe_fir0_vfp;
|
||||||
|
s->lfe_fir[1] = lfe_fir1_vfp;
|
||||||
s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp;
|
s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp;
|
||||||
}
|
}
|
||||||
if (have_neon(cpu_flags))
|
if (have_neon(cpu_flags)) {
|
||||||
s->lfe_fir = ff_dca_lfe_fir_neon;
|
s->lfe_fir[0] = lfe_fir0_neon;
|
||||||
|
s->lfe_fir[1] = lfe_fir1_neon;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
|
av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
|
||||||
|
@ -1118,23 +1118,23 @@ static void lfe_interpolation_fir(DCAContext *s, int decimation_select,
|
|||||||
* samples_out: An array holding interpolated samples
|
* samples_out: An array holding interpolated samples
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int decifactor;
|
int idx;
|
||||||
const float *prCoeff;
|
const float *prCoeff;
|
||||||
int deciindex;
|
int deciindex;
|
||||||
|
|
||||||
/* Select decimation filter */
|
/* Select decimation filter */
|
||||||
if (decimation_select == 1) {
|
if (decimation_select == 1) {
|
||||||
decifactor = 64;
|
idx = 1;
|
||||||
prCoeff = lfe_fir_128;
|
prCoeff = lfe_fir_128;
|
||||||
} else {
|
} else {
|
||||||
decifactor = 32;
|
idx = 0;
|
||||||
prCoeff = lfe_fir_64;
|
prCoeff = lfe_fir_64;
|
||||||
}
|
}
|
||||||
/* Interpolation */
|
/* Interpolation */
|
||||||
for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
|
for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
|
||||||
s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor, scale);
|
s->dcadsp.lfe_fir[idx](samples_out, samples_in, prCoeff, scale);
|
||||||
samples_in++;
|
samples_in++;
|
||||||
samples_out += 2 * decifactor;
|
samples_out += 2 * 32 * (1 + idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,8 +32,9 @@ static void int8x8_fmul_int32_c(float *dst, const int8_t *src, int scale)
|
|||||||
dst[i] = src[i] * fscale;
|
dst[i] = src[i] * fscale;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
|
static inline void
|
||||||
int decifactor, float scale)
|
dca_lfe_fir(float *out, const float *in, const float *coefs,
|
||||||
|
int decifactor, float scale)
|
||||||
{
|
{
|
||||||
float *out2 = out + decifactor;
|
float *out2 = out + decifactor;
|
||||||
const float *cf0 = coefs;
|
const float *cf0 = coefs;
|
||||||
@ -82,9 +83,22 @@ static void dca_qmf_32_subbands(float samples_in[32][8], int sb_act,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void dca_lfe_fir0_c(float *out, const float *in, const float *coefs,
|
||||||
|
float scale)
|
||||||
|
{
|
||||||
|
dca_lfe_fir(out, in, coefs, 32, scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dca_lfe_fir1_c(float *out, const float *in, const float *coefs,
|
||||||
|
float scale)
|
||||||
|
{
|
||||||
|
dca_lfe_fir(out, in, coefs, 64, scale);
|
||||||
|
}
|
||||||
|
|
||||||
av_cold void ff_dcadsp_init(DCADSPContext *s)
|
av_cold void ff_dcadsp_init(DCADSPContext *s)
|
||||||
{
|
{
|
||||||
s->lfe_fir = dca_lfe_fir_c;
|
s->lfe_fir[0] = dca_lfe_fir0_c;
|
||||||
|
s->lfe_fir[1] = dca_lfe_fir1_c;
|
||||||
s->qmf_32_subbands = dca_qmf_32_subbands;
|
s->qmf_32_subbands = dca_qmf_32_subbands;
|
||||||
s->int8x8_fmul_int32 = int8x8_fmul_int32_c;
|
s->int8x8_fmul_int32 = int8x8_fmul_int32_c;
|
||||||
if (ARCH_ARM) ff_dcadsp_init_arm(s);
|
if (ARCH_ARM) ff_dcadsp_init_arm(s);
|
||||||
|
@ -23,8 +23,8 @@
|
|||||||
#include "synth_filter.h"
|
#include "synth_filter.h"
|
||||||
|
|
||||||
typedef struct DCADSPContext {
|
typedef struct DCADSPContext {
|
||||||
void (*lfe_fir)(float *out, const float *in, const float *coefs,
|
void (*lfe_fir[2])(float *out, const float *in, const float *coefs,
|
||||||
int decifactor, float scale);
|
float scale);
|
||||||
void (*qmf_32_subbands)(float samples_in[32][8], int sb_act,
|
void (*qmf_32_subbands)(float samples_in[32][8], int sb_act,
|
||||||
SynthFilterContext *synth, FFTContext *imdct,
|
SynthFilterContext *synth, FFTContext *imdct,
|
||||||
float synth_buf_ptr[512],
|
float synth_buf_ptr[512],
|
||||||
|
Loading…
x
Reference in New Issue
Block a user