mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-24 13:56:33 +02:00
ARM: NEON optimisations for some dsputil functions
NEON versions of the following functions are added: vector_fmul_scalar vector_fmul_sv_scalar sv_fmul_scalar butterflies_float Originally committed as revision 19957 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
42d3fbb3f4
commit
1dee3e97c6
@ -157,6 +157,17 @@ void ff_vector_fmul_neon(float *dst, const float *src, int len);
|
||||
void ff_vector_fmul_window_neon(float *dst, const float *src0,
|
||||
const float *src1, const float *win,
|
||||
float add_bias, int len);
|
||||
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
void ff_vector_fmul_sv_scalar_2_neon(float *dst, const float *src,
|
||||
const float **vp, float mul, int len);
|
||||
void ff_vector_fmul_sv_scalar_4_neon(float *dst, const float *src,
|
||||
const float **vp, float mul, int len);
|
||||
void ff_sv_fmul_scalar_2_neon(float *dst, const float **vp, float mul,
|
||||
int len);
|
||||
void ff_sv_fmul_scalar_4_neon(float *dst, const float **vp, float mul,
|
||||
int len);
|
||||
void ff_butterflies_float_neon(float *v1, float *v2, int len);
|
||||
|
||||
void ff_float_to_int16_neon(int16_t *, const float *, long);
|
||||
void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
|
||||
@ -269,6 +280,14 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
|
||||
|
||||
c->vector_fmul = ff_vector_fmul_neon;
|
||||
c->vector_fmul_window = ff_vector_fmul_window_neon;
|
||||
c->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
|
||||
c->butterflies_float = ff_butterflies_float_neon;
|
||||
|
||||
c->vector_fmul_sv_scalar[0] = ff_vector_fmul_sv_scalar_2_neon;
|
||||
c->vector_fmul_sv_scalar[1] = ff_vector_fmul_sv_scalar_4_neon;
|
||||
|
||||
c->sv_fmul_scalar[0] = ff_sv_fmul_scalar_2_neon;
|
||||
c->sv_fmul_scalar[1] = ff_sv_fmul_scalar_4_neon;
|
||||
|
||||
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
|
||||
c->float_to_int16 = ff_float_to_int16_neon;
|
||||
|
@ -858,3 +858,155 @@ function ff_vorbis_inverse_coupling_neon, export=1
|
||||
bx lr
|
||||
.endfunc
|
||||
#endif
|
||||
|
||||
function ff_vector_fmul_scalar_neon, export=1
|
||||
VFP len .req r2
|
||||
NOVFP len .req r3
|
||||
VFP vdup.32 q8, d0[0]
|
||||
NOVFP vdup.32 q8, r2
|
||||
bics r12, len, #15
|
||||
beq 3f
|
||||
vld1.32 {q0},[r1,:128]!
|
||||
vld1.32 {q1},[r1,:128]!
|
||||
1: vmul.f32 q0, q0, q8
|
||||
vld1.32 {q2},[r1,:128]!
|
||||
vmul.f32 q1, q1, q8
|
||||
vld1.32 {q3},[r1,:128]!
|
||||
vmul.f32 q2, q2, q8
|
||||
vst1.32 {q0},[r0,:128]!
|
||||
vmul.f32 q3, q3, q8
|
||||
vst1.32 {q1},[r0,:128]!
|
||||
subs r12, r12, #16
|
||||
beq 2f
|
||||
vld1.32 {q0},[r1,:128]!
|
||||
vst1.32 {q2},[r0,:128]!
|
||||
vld1.32 {q1},[r1,:128]!
|
||||
vst1.32 {q3},[r0,:128]!
|
||||
b 1b
|
||||
2: vst1.32 {q2},[r0,:128]!
|
||||
vst1.32 {q3},[r0,:128]!
|
||||
ands len, len, #15
|
||||
bxeq lr
|
||||
3: vld1.32 {q0},[r1,:128]!
|
||||
vmul.f32 q0, q0, q8
|
||||
vst1.32 {q0},[r0,:128]!
|
||||
subs len, len, #4
|
||||
bgt 3b
|
||||
bx lr
|
||||
.unreq len
|
||||
.endfunc
|
||||
|
||||
function ff_vector_fmul_sv_scalar_2_neon, export=1
|
||||
VFP vdup.32 d16, d0[0]
|
||||
NOVFP vdup.32 d16, r3
|
||||
NOVFP ldr r3, [sp]
|
||||
vld1.32 {d0},[r1,:64]!
|
||||
vld1.32 {d1},[r1,:64]!
|
||||
1: subs r3, r3, #4
|
||||
vmul.f32 d4, d0, d16
|
||||
vmul.f32 d5, d1, d16
|
||||
ldr r12, [r2], #4
|
||||
vld1.32 {d2},[r12,:64]
|
||||
ldr r12, [r2], #4
|
||||
vld1.32 {d3},[r12,:64]
|
||||
vmul.f32 d4, d4, d2
|
||||
vmul.f32 d5, d5, d3
|
||||
beq 2f
|
||||
vld1.32 {d0},[r1,:64]!
|
||||
vld1.32 {d1},[r1,:64]!
|
||||
vst1.32 {d4},[r0,:64]!
|
||||
vst1.32 {d5},[r0,:64]!
|
||||
b 1b
|
||||
2: vst1.32 {d4},[r0,:64]!
|
||||
vst1.32 {d5},[r0,:64]!
|
||||
bx lr
|
||||
.endfunc
|
||||
|
||||
function ff_vector_fmul_sv_scalar_4_neon, export=1
|
||||
VFP vdup.32 q10, d0[0]
|
||||
NOVFP vdup.32 q10, r3
|
||||
NOVFP ldr r3, [sp]
|
||||
push {lr}
|
||||
bics lr, r3, #7
|
||||
beq 3f
|
||||
vld1.32 {q0},[r1,:128]!
|
||||
vld1.32 {q2},[r1,:128]!
|
||||
1: ldr r12, [r2], #4
|
||||
vld1.32 {q1},[r12,:128]
|
||||
ldr r12, [r2], #4
|
||||
vld1.32 {q3},[r12,:128]
|
||||
vmul.f32 q8, q0, q10
|
||||
vmul.f32 q8, q8, q1
|
||||
vmul.f32 q9, q2, q10
|
||||
vmul.f32 q9, q9, q3
|
||||
subs lr, lr, #8
|
||||
beq 2f
|
||||
vld1.32 {q0},[r1,:128]!
|
||||
vld1.32 {q2},[r1,:128]!
|
||||
vst1.32 {q8},[r0,:128]!
|
||||
vst1.32 {q9},[r0,:128]!
|
||||
b 1b
|
||||
2: vst1.32 {q8},[r0,:128]!
|
||||
vst1.32 {q9},[r0,:128]!
|
||||
ands r3, r3, #7
|
||||
popeq {pc}
|
||||
3: vld1.32 {q0},[r1,:128]!
|
||||
ldr r12, [r2], #4
|
||||
vld1.32 {q1},[r12,:128]
|
||||
vmul.f32 q0, q0, q10
|
||||
vmul.f32 q0, q0, q1
|
||||
vst1.32 {q0},[r0,:128]!
|
||||
subs r3, r3, #4
|
||||
bgt 3b
|
||||
pop {pc}
|
||||
.endfunc
|
||||
|
||||
function ff_sv_fmul_scalar_2_neon, export=1
|
||||
VFP len .req r2
|
||||
NOVFP len .req r3
|
||||
VFP vdup.32 q8, d0[0]
|
||||
NOVFP vdup.32 q8, r2
|
||||
ldr r12, [r1], #4
|
||||
vld1.32 {d0},[r12,:64]
|
||||
ldr r12, [r1], #4
|
||||
vld1.32 {d1},[r12,:64]
|
||||
1: vmul.f32 q1, q0, q8
|
||||
subs len, len, #4
|
||||
beq 2f
|
||||
ldr r12, [r1], #4
|
||||
vld1.32 {d0},[r12,:64]
|
||||
ldr r12, [r1], #4
|
||||
vld1.32 {d1},[r12,:64]
|
||||
vst1.32 {q1},[r0,:128]!
|
||||
b 1b
|
||||
2: vst1.32 {q1},[r0,:128]!
|
||||
bx lr
|
||||
.unreq len
|
||||
.endfunc
|
||||
|
||||
function ff_sv_fmul_scalar_4_neon, export=1
|
||||
VFP len .req r2
|
||||
NOVFP len .req r3
|
||||
VFP vdup.32 q8, d0[0]
|
||||
NOVFP vdup.32 q8, r2
|
||||
1: ldr r12, [r1], #4
|
||||
vld1.32 {q0},[r12,:128]
|
||||
vmul.f32 q0, q0, q8
|
||||
vst1.32 {q0},[r0,:128]!
|
||||
subs len, len, #4
|
||||
bgt 1b
|
||||
bx lr
|
||||
.unreq len
|
||||
.endfunc
|
||||
|
||||
function ff_butterflies_float_neon, export=1
|
||||
1: vld1.32 {q0},[r0,:128]
|
||||
vld1.32 {q1},[r1,:128]
|
||||
vsub.f32 q2, q0, q1
|
||||
vadd.f32 q1, q0, q1
|
||||
vst1.32 {q2},[r1,:128]!
|
||||
vst1.32 {q1},[r0,:128]!
|
||||
subs r2, r2, #4
|
||||
bgt 1b
|
||||
bx lr
|
||||
.endfunc
|
||||
|
Loading…
x
Reference in New Issue
Block a user