mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
rv34: NEON optimised dc only inverse transform
30-50% faster than the C implementation, 0.5% overall speedup on bourne.rmvb.
This commit is contained in:
parent
136ee32da3
commit
e1e369049e
@ -26,8 +26,13 @@
|
||||
void ff_rv34_inv_transform_neon(DCTELEM *block);
|
||||
void ff_rv34_inv_transform_noround_neon(DCTELEM *block);
|
||||
|
||||
void ff_rv34_inv_transform_dc_neon(DCTELEM *block);
|
||||
void ff_rv34_inv_transform_noround_dc_neon(DCTELEM *block);
|
||||
|
||||
void ff_rv34dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
|
||||
{
|
||||
c->rv34_inv_transform_tab[0] = ff_rv34_inv_transform_neon;
|
||||
c->rv34_inv_transform_tab[1] = ff_rv34_inv_transform_noround_neon;
|
||||
c->rv34_inv_transform_tab[0] = ff_rv34_inv_transform_neon;
|
||||
c->rv34_inv_transform_tab[1] = ff_rv34_inv_transform_noround_neon;
|
||||
c->rv34_inv_transform_dc_tab[0] = ff_rv34_inv_transform_dc_neon;
|
||||
c->rv34_inv_transform_dc_tab[1] = ff_rv34_inv_transform_noround_dc_neon;
|
||||
}
|
||||
|
@ -107,3 +107,32 @@ function ff_rv34_inv_transform_noround_neon, export=1
|
||||
vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r2,:64], r1
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
/* void rv34_inv_transform_dc_c(DCTELEM *block) */
|
||||
function ff_rv34_inv_transform_dc_neon, export=1
|
||||
vld1.16 d28[], [r0:16] @ block[0]
|
||||
vmov.i16 d4, #169
|
||||
mov r1, #16
|
||||
vmull.s16 q3, d28, d4
|
||||
vrshrn.s32 d0, q3, #10
|
||||
vst1.16 {d0}, [r0:64], r1
|
||||
vst1.16 {d0}, [r0:64], r1
|
||||
vst1.16 {d0}, [r0:64], r1
|
||||
vst1.16 {d0}, [r0:64], r1
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
/* void rv34_inv_transform_dc_noround_c(DCTELEM *block) */
|
||||
function ff_rv34_inv_transform_noround_dc_neon, export=1
|
||||
vld1.16 d28[], [r0:16] @ block[0]
|
||||
vmov.i16 d4, #251
|
||||
vorr.s16 d4, #256 @ 13^2 * 3
|
||||
mov r1, #16
|
||||
vmull.s16 q3, d28, d4
|
||||
vshrn.s32 d0, q3, #11
|
||||
vst1.64 {d0}, [r0:64], r1
|
||||
vst1.64 {d0}, [r0:64], r1
|
||||
vst1.64 {d0}, [r0:64], r1
|
||||
vst1.64 {d0}, [r0:64], r1
|
||||
bx lr
|
||||
endfunc
|
||||
|
Loading…
Reference in New Issue
Block a user