mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
ARM: NEON optimised vorbis_inverse_coupling
12% faster Vorbis decoding on Cortex-A8. Originally committed as revision 19637 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
737cbcde08
commit
e814015d69
@ -161,6 +161,8 @@ void ff_vector_fmul_window_neon(float *dst, const float *src0,
|
||||
void ff_float_to_int16_neon(int16_t *, const float *, long);
|
||||
void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
|
||||
|
||||
void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
|
||||
|
||||
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
|
||||
{
|
||||
c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
|
||||
@ -272,4 +274,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
|
||||
c->float_to_int16 = ff_float_to_int16_neon;
|
||||
c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
|
||||
}
|
||||
|
||||
if (CONFIG_VORBIS_DECODER)
|
||||
c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
|
||||
}
|
||||
|
@ -19,6 +19,7 @@
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "asm.S"
|
||||
|
||||
preserve8
|
||||
@ -795,3 +796,66 @@ NOVFP ldr lr, [sp, #16]
|
||||
vst1.64 {d22,d23},[ip,:128], r5
|
||||
pop {r4,r5,pc}
|
||||
.endfunc
|
||||
|
||||
#if CONFIG_VORBIS_DECODER
|
||||
function ff_vorbis_inverse_coupling_neon, export=1
|
||||
vmov.i32 q10, #1<<31
|
||||
subs r2, r2, #4
|
||||
tst r2, #4
|
||||
mov r3, r0
|
||||
mov r12, r1
|
||||
beq 3f
|
||||
|
||||
vld1.32 {d24-d25},[r1,:128]!
|
||||
vld1.32 {d22-d23},[r0,:128]!
|
||||
vcle.s32 q8, q12, #0
|
||||
vand q9, q11, q10
|
||||
veor q12, q12, q9
|
||||
vand q2, q12, q8
|
||||
vbic q3, q12, q8
|
||||
vadd.f32 q12, q11, q2
|
||||
vsub.f32 q11, q11, q3
|
||||
1: vld1.32 {d2-d3}, [r1,:128]!
|
||||
vld1.32 {d0-d1}, [r0,:128]!
|
||||
vcle.s32 q8, q1, #0
|
||||
vand q9, q0, q10
|
||||
veor q1, q1, q9
|
||||
vst1.32 {d24-d25},[r3, :128]!
|
||||
vst1.32 {d22-d23},[r12,:128]!
|
||||
vand q2, q1, q8
|
||||
vbic q3, q1, q8
|
||||
vadd.f32 q1, q0, q2
|
||||
vsub.f32 q0, q0, q3
|
||||
subs r2, r2, #8
|
||||
ble 2f
|
||||
vld1.32 {d24-d25},[r1,:128]!
|
||||
vld1.32 {d22-d23},[r0,:128]!
|
||||
vcle.s32 q8, q12, #0
|
||||
vand q9, q11, q10
|
||||
veor q12, q12, q9
|
||||
vst1.32 {d2-d3}, [r3, :128]!
|
||||
vst1.32 {d0-d1}, [r12,:128]!
|
||||
vand q2, q12, q8
|
||||
vbic q3, q12, q8
|
||||
vadd.f32 q12, q11, q2
|
||||
vsub.f32 q11, q11, q3
|
||||
b 1b
|
||||
|
||||
2: vst1.32 {d2-d3}, [r3, :128]!
|
||||
vst1.32 {d0-d1}, [r12,:128]!
|
||||
bxlt lr
|
||||
|
||||
3: vld1.32 {d2-d3}, [r1,:128]
|
||||
vld1.32 {d0-d1}, [r0,:128]
|
||||
vcle.s32 q8, q1, #0
|
||||
vand q9, q0, q10
|
||||
veor q1, q1, q9
|
||||
vand q2, q1, q8
|
||||
vbic q3, q1, q8
|
||||
vadd.f32 q1, q0, q2
|
||||
vsub.f32 q0, q0, q3
|
||||
vst1.32 {d2-d3}, [r0,:128]!
|
||||
vst1.32 {d0-d1}, [r1,:128]!
|
||||
bx lr
|
||||
.endfunc
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user