1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-13 21:28:01 +02:00

ARM: NEON optimised vector_clipf

Originally committed as revision 20031 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Måns Rullgård 2009-09-26 19:55:21 +00:00
parent 0a07e9d012
commit f331cec47d
2 changed files with 30 additions and 0 deletions

View File

@ -174,6 +174,8 @@ void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src,
void ff_vector_fmul_reverse_neon(float *dst, const float *src0, void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
const float *src1, int len); const float *src1, int len);
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
int len);
void ff_float_to_int16_neon(int16_t *, const float *, long); void ff_float_to_int16_neon(int16_t *, const float *, long);
void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int); void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
@ -297,6 +299,8 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->sv_fmul_scalar[0] = ff_sv_fmul_scalar_2_neon; c->sv_fmul_scalar[0] = ff_sv_fmul_scalar_2_neon;
c->sv_fmul_scalar[1] = ff_sv_fmul_scalar_4_neon; c->sv_fmul_scalar[1] = ff_sv_fmul_scalar_4_neon;
c->vector_clipf = ff_vector_clipf_neon;
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
c->float_to_int16 = ff_float_to_int16_neon; c->float_to_int16 = ff_float_to_int16_neon;
c->float_to_int16_interleave = ff_float_to_int16_interleave_neon; c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;

View File

@ -1075,3 +1075,29 @@ function ff_vector_fmul_reverse_neon, export=1
2: vst1.32 {q8-q9}, [r0,:128]! 2: vst1.32 {q8-q9}, [r0,:128]!
bx lr bx lr
.endfunc .endfunc
function ff_vector_clipf_neon, export=1
VFP vdup.32 q1, d0[1]
VFP vdup.32 q0, d0[0]
NOVFP vdup.32 q0, r2
NOVFP vdup.32 q1, r3
NOVFP ldr r2, [sp]
vld1.f32 {q2},[r1,:128]!
vmin.f32 q10, q2, q1
vld1.f32 {q3},[r1,:128]!
vmin.f32 q11, q3, q1
1: vmax.f32 q8, q10, q0
vmax.f32 q9, q11, q0
subs r2, r2, #8
beq 2f
vld1.f32 {q2},[r1,:128]!
vmin.f32 q10, q2, q1
vld1.f32 {q3},[r1,:128]!
vmin.f32 q11, q3, q1
vst1.f32 {q8},[r0,:128]!
vst1.f32 {q9},[r0,:128]!
b 1b
2: vst1.f32 {q8},[r0,:128]!
vst1.f32 {q9},[r0,:128]!
bx lr
.endfunc