mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
dsputil: move vector_fmul_scalar() to AVFloatDSPContext in libavutil
This commit is contained in:
parent
1e27655388
commit
284ea790d8
@ -1360,7 +1360,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
|
||||
|
||||
band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
|
||||
scale = sf[idx] / sqrtf(band_energy);
|
||||
ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
|
||||
ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
|
||||
}
|
||||
} else {
|
||||
const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
|
||||
@ -1506,7 +1506,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
|
||||
}
|
||||
} while (len -= 2);
|
||||
|
||||
ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
|
||||
ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1730,10 +1730,10 @@ static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_p
|
||||
c *= 1 - 2 * cpe->ms_mask[idx];
|
||||
scale = c * sce1->sf[idx];
|
||||
for (group = 0; group < ics->group_len[g]; group++)
|
||||
ac->dsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
|
||||
coef0 + group * 128 + offsets[i],
|
||||
scale,
|
||||
offsets[i + 1] - offsets[i]);
|
||||
ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
|
||||
coef0 + group * 128 + offsets[i],
|
||||
scale,
|
||||
offsets[i + 1] - offsets[i]);
|
||||
}
|
||||
} else {
|
||||
int bt_run_end = sce1->band_type_run_end[idx];
|
||||
|
@ -144,8 +144,6 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||
|
||||
void ff_vector_fmul_window_neon(float *dst, const float *src0,
|
||||
const float *src1, const float *win, int len);
|
||||
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
void ff_butterflies_float_neon(float *v1, float *v2, int len);
|
||||
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
|
||||
void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
|
||||
@ -305,7 +303,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
|
||||
}
|
||||
|
||||
c->vector_fmul_window = ff_vector_fmul_window_neon;
|
||||
c->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
|
||||
c->butterflies_float = ff_butterflies_float_neon;
|
||||
c->scalarproduct_float = ff_scalarproduct_float_neon;
|
||||
c->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
|
||||
|
@ -642,44 +642,6 @@ function ff_vorbis_inverse_coupling_neon, export=1
|
||||
endfunc
|
||||
#endif
|
||||
|
||||
function ff_vector_fmul_scalar_neon, export=1
|
||||
VFP len .req r2
|
||||
NOVFP len .req r3
|
||||
VFP vdup.32 q8, d0[0]
|
||||
NOVFP vdup.32 q8, r2
|
||||
bics r12, len, #15
|
||||
beq 3f
|
||||
vld1.32 {q0},[r1,:128]!
|
||||
vld1.32 {q1},[r1,:128]!
|
||||
1: vmul.f32 q0, q0, q8
|
||||
vld1.32 {q2},[r1,:128]!
|
||||
vmul.f32 q1, q1, q8
|
||||
vld1.32 {q3},[r1,:128]!
|
||||
vmul.f32 q2, q2, q8
|
||||
vst1.32 {q0},[r0,:128]!
|
||||
vmul.f32 q3, q3, q8
|
||||
vst1.32 {q1},[r0,:128]!
|
||||
subs r12, r12, #16
|
||||
beq 2f
|
||||
vld1.32 {q0},[r1,:128]!
|
||||
vst1.32 {q2},[r0,:128]!
|
||||
vld1.32 {q1},[r1,:128]!
|
||||
vst1.32 {q3},[r0,:128]!
|
||||
b 1b
|
||||
2: vst1.32 {q2},[r0,:128]!
|
||||
vst1.32 {q3},[r0,:128]!
|
||||
ands len, len, #15
|
||||
it eq
|
||||
bxeq lr
|
||||
3: vld1.32 {q0},[r1,:128]!
|
||||
vmul.f32 q0, q0, q8
|
||||
vst1.32 {q0},[r0,:128]!
|
||||
subs len, len, #4
|
||||
bgt 3b
|
||||
bx lr
|
||||
.unreq len
|
||||
endfunc
|
||||
|
||||
function ff_butterflies_float_neon, export=1
|
||||
1: vld1.32 {q0},[r0,:128]
|
||||
vld1.32 {q1},[r1,:128]
|
||||
|
@ -2392,14 +2392,6 @@ static void vector_fmul_window_c(float *dst, const float *src0,
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
|
||||
int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len; i++)
|
||||
dst[i] = src[i] * mul;
|
||||
}
|
||||
|
||||
static void butterflies_float_c(float *restrict v1, float *restrict v2,
|
||||
int len)
|
||||
{
|
||||
@ -2869,7 +2861,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
||||
c->scalarproduct_float = ff_scalarproduct_float_c;
|
||||
c->butterflies_float = butterflies_float_c;
|
||||
c->butterflies_float_interleave = butterflies_float_interleave_c;
|
||||
c->vector_fmul_scalar = vector_fmul_scalar_c;
|
||||
|
||||
c->shrink[0]= av_image_copy_plane;
|
||||
c->shrink[1]= ff_shrink22;
|
||||
|
@ -382,16 +382,6 @@ typedef struct DSPContext {
|
||||
void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len);
|
||||
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
|
||||
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
|
||||
/**
|
||||
* Multiply a vector of floats by a scalar float. Source and
|
||||
* destination vectors must overlap exactly or not at all.
|
||||
* @param dst result vector, 16-byte aligned
|
||||
* @param src input vector, 16-byte aligned
|
||||
* @param mul scalar value
|
||||
* @param len length of vector, multiple of 4
|
||||
*/
|
||||
void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
/**
|
||||
* Calculate the scalar product of two vectors of floats.
|
||||
* @param v1 first vector, 16-byte aligned
|
||||
|
@ -28,12 +28,12 @@
|
||||
|
||||
#include "libavutil/channel_layout.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/float_dsp.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
#include "libavutil/log.h"
|
||||
#include "libavutil/opt.h"
|
||||
#include "avcodec.h"
|
||||
#include "audio_frame_queue.h"
|
||||
#include "dsputil.h"
|
||||
#include "internal.h"
|
||||
#include "mpegaudio.h"
|
||||
#include "mpegaudiodecheader.h"
|
||||
@ -50,7 +50,7 @@ typedef struct LAMEContext {
|
||||
int reservoir;
|
||||
float *samples_flt[2];
|
||||
AudioFrameQueue afq;
|
||||
DSPContext dsp;
|
||||
AVFloatDSPContext fdsp;
|
||||
} LAMEContext;
|
||||
|
||||
|
||||
@ -167,7 +167,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
ff_dsputil_init(&s->dsp, avctx);
|
||||
avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
|
||||
|
||||
return 0;
|
||||
error:
|
||||
@ -205,10 +205,10 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
for (ch = 0; ch < avctx->channels; ch++) {
|
||||
s->dsp.vector_fmul_scalar(s->samples_flt[ch],
|
||||
(const float *)frame->data[ch],
|
||||
32768.0f,
|
||||
FFALIGN(frame->nb_samples, 8));
|
||||
s->fdsp.vector_fmul_scalar(s->samples_flt[ch],
|
||||
(const float *)frame->data[ch],
|
||||
32768.0f,
|
||||
FFALIGN(frame->nb_samples, 8));
|
||||
}
|
||||
ENCODE_BUFFER(lame_encode_buffer_float, float, s->samples_flt);
|
||||
break;
|
||||
|
@ -111,7 +111,7 @@ static void apply_window_and_mdct(AVCodecContext * avctx, const AVFrame *frame)
|
||||
|
||||
for (ch = 0; ch < avctx->channels; ch++) {
|
||||
memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output));
|
||||
s->dsp.vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
|
||||
s->fdsp.vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
|
||||
s->dsp.vector_fmul_reverse(&s->output[window_len], s->frame_out[ch], win, len);
|
||||
s->fdsp.vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len);
|
||||
mdct->mdct_calc(mdct, s->coefs[ch], s->output);
|
||||
|
@ -86,6 +86,7 @@
|
||||
* subframe in order to reconstruct the output samples.
|
||||
*/
|
||||
|
||||
#include "libavutil/float_dsp.h"
|
||||
#include "libavutil/intfloat.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
#include "avcodec.h"
|
||||
@ -170,6 +171,7 @@ typedef struct WMAProDecodeCtx {
|
||||
AVCodecContext* avctx; ///< codec context for av_log
|
||||
AVFrame frame; ///< AVFrame for decoded output
|
||||
DSPContext dsp; ///< accelerated DSP functions
|
||||
AVFloatDSPContext fdsp;
|
||||
uint8_t frame_data[MAX_FRAMESIZE +
|
||||
FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
|
||||
PutBitContext pb; ///< context for filling the frame_data buffer
|
||||
@ -280,6 +282,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
|
||||
|
||||
s->avctx = avctx;
|
||||
ff_dsputil_init(&s->dsp, avctx);
|
||||
avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
|
||||
|
||||
init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
|
||||
|
||||
avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
|
||||
@ -1008,12 +1012,12 @@ static void inverse_channel_transform(WMAProDecodeCtx *s)
|
||||
}
|
||||
} else if (s->avctx->channels == 2) {
|
||||
int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
|
||||
s->dsp.vector_fmul_scalar(ch_data[0] + sfb[0],
|
||||
ch_data[0] + sfb[0],
|
||||
181.0 / 128, len);
|
||||
s->dsp.vector_fmul_scalar(ch_data[1] + sfb[0],
|
||||
ch_data[1] + sfb[0],
|
||||
181.0 / 128, len);
|
||||
s->fdsp.vector_fmul_scalar(ch_data[0] + sfb[0],
|
||||
ch_data[0] + sfb[0],
|
||||
181.0 / 128, len);
|
||||
s->fdsp.vector_fmul_scalar(ch_data[1] + sfb[0],
|
||||
ch_data[1] + sfb[0],
|
||||
181.0 / 128, len);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1259,9 +1263,9 @@ static int decode_subframe(WMAProDecodeCtx *s)
|
||||
s->channel[c].scale_factor_step;
|
||||
const float quant = pow(10.0, exp / 20.0);
|
||||
int start = s->cur_sfb_offsets[b];
|
||||
s->dsp.vector_fmul_scalar(s->tmp + start,
|
||||
s->channel[c].coeffs + start,
|
||||
quant, end - start);
|
||||
s->fdsp.vector_fmul_scalar(s->tmp + start,
|
||||
s->channel[c].coeffs + start,
|
||||
quant, end - start);
|
||||
}
|
||||
|
||||
/** apply imdct (imdct_half == DCTIV with reverse) */
|
||||
|
@ -29,8 +29,12 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int l
|
||||
void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
|
||||
{
|
||||
fdsp->vector_fmul = ff_vector_fmul_neon;
|
||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
|
||||
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
|
||||
}
|
||||
|
@ -108,3 +108,41 @@ NOVFP vdup.32 q15, r2
|
||||
bx lr
|
||||
.unreq len
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_scalar_neon, export=1
|
||||
VFP len .req r2
|
||||
NOVFP len .req r3
|
||||
VFP vdup.32 q8, d0[0]
|
||||
NOVFP vdup.32 q8, r2
|
||||
bics r12, len, #15
|
||||
beq 3f
|
||||
vld1.32 {q0},[r1,:128]!
|
||||
vld1.32 {q1},[r1,:128]!
|
||||
1: vmul.f32 q0, q0, q8
|
||||
vld1.32 {q2},[r1,:128]!
|
||||
vmul.f32 q1, q1, q8
|
||||
vld1.32 {q3},[r1,:128]!
|
||||
vmul.f32 q2, q2, q8
|
||||
vst1.32 {q0},[r0,:128]!
|
||||
vmul.f32 q3, q3, q8
|
||||
vst1.32 {q1},[r0,:128]!
|
||||
subs r12, r12, #16
|
||||
beq 2f
|
||||
vld1.32 {q0},[r1,:128]!
|
||||
vst1.32 {q2},[r0,:128]!
|
||||
vld1.32 {q1},[r1,:128]!
|
||||
vst1.32 {q3},[r0,:128]!
|
||||
b 1b
|
||||
2: vst1.32 {q2},[r0,:128]!
|
||||
vst1.32 {q3},[r0,:128]!
|
||||
ands len, len, #15
|
||||
it eq
|
||||
bxeq lr
|
||||
3: vld1.32 {q0},[r1,:128]!
|
||||
vmul.f32 q0, q0, q8
|
||||
vst1.32 {q0},[r0,:128]!
|
||||
subs len, len, #4
|
||||
bgt 3b
|
||||
bx lr
|
||||
.unreq len
|
||||
endfunc
|
||||
|
@ -36,10 +36,19 @@ static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
|
||||
dst[i] += src[i] * mul;
|
||||
}
|
||||
|
||||
static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
|
||||
int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len; i++)
|
||||
dst[i] = src[i] * mul;
|
||||
}
|
||||
|
||||
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
|
||||
{
|
||||
fdsp->vector_fmul = vector_fmul_c;
|
||||
fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
|
||||
fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
|
||||
|
||||
#if ARCH_ARM
|
||||
ff_float_dsp_init_arm(fdsp);
|
||||
|
@ -51,6 +51,21 @@ typedef struct AVFloatDSPContext {
|
||||
*/
|
||||
void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
/**
|
||||
* Multiply a vector of floats by a scalar float. Source and
|
||||
* destination vectors must overlap exactly or not at all.
|
||||
*
|
||||
* @param dst result vector
|
||||
* constraints: 16-byte aligned
|
||||
* @param src input vector
|
||||
* constraints: 16-byte aligned
|
||||
* @param mul scalar value
|
||||
* @param len length of vector
|
||||
* constraints: multiple of 4
|
||||
*/
|
||||
void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
} AVFloatDSPContext;
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user