mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
rv34: NEON optimised dc only inverse transform
30-50% faster than the C implementation, 0.5% overall speedup on bourne.rmvb.
This commit is contained in:
parent
136ee32da3
commit
e1e369049e
@ -26,8 +26,13 @@
|
|||||||
void ff_rv34_inv_transform_neon(DCTELEM *block);
|
void ff_rv34_inv_transform_neon(DCTELEM *block);
|
||||||
void ff_rv34_inv_transform_noround_neon(DCTELEM *block);
|
void ff_rv34_inv_transform_noround_neon(DCTELEM *block);
|
||||||
|
|
||||||
|
void ff_rv34_inv_transform_dc_neon(DCTELEM *block);
|
||||||
|
void ff_rv34_inv_transform_noround_dc_neon(DCTELEM *block);
|
||||||
|
|
||||||
void ff_rv34dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
|
void ff_rv34dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
|
||||||
{
|
{
|
||||||
c->rv34_inv_transform_tab[0] = ff_rv34_inv_transform_neon;
|
c->rv34_inv_transform_tab[0] = ff_rv34_inv_transform_neon;
|
||||||
c->rv34_inv_transform_tab[1] = ff_rv34_inv_transform_noround_neon;
|
c->rv34_inv_transform_tab[1] = ff_rv34_inv_transform_noround_neon;
|
||||||
|
c->rv34_inv_transform_dc_tab[0] = ff_rv34_inv_transform_dc_neon;
|
||||||
|
c->rv34_inv_transform_dc_tab[1] = ff_rv34_inv_transform_noround_dc_neon;
|
||||||
}
|
}
|
||||||
|
@ -107,3 +107,32 @@ function ff_rv34_inv_transform_noround_neon, export=1
|
|||||||
vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r2,:64], r1
|
vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r2,:64], r1
|
||||||
bx lr
|
bx lr
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
/* void rv34_inv_transform_dc_c(DCTELEM *block) */
|
||||||
|
function ff_rv34_inv_transform_dc_neon, export=1
|
||||||
|
vld1.16 d28[], [r0:16] @ block[0]
|
||||||
|
vmov.i16 d4, #169
|
||||||
|
mov r1, #16
|
||||||
|
vmull.s16 q3, d28, d4
|
||||||
|
vrshrn.s32 d0, q3, #10
|
||||||
|
vst1.16 {d0}, [r0:64], r1
|
||||||
|
vst1.16 {d0}, [r0:64], r1
|
||||||
|
vst1.16 {d0}, [r0:64], r1
|
||||||
|
vst1.16 {d0}, [r0:64], r1
|
||||||
|
bx lr
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
/* void rv34_inv_transform_dc_noround_c(DCTELEM *block) */
|
||||||
|
function ff_rv34_inv_transform_noround_dc_neon, export=1
|
||||||
|
vld1.16 d28[], [r0:16] @ block[0]
|
||||||
|
vmov.i16 d4, #251
|
||||||
|
vorr.s16 d4, #256 @ 13^2 * 3
|
||||||
|
mov r1, #16
|
||||||
|
vmull.s16 q3, d28, d4
|
||||||
|
vshrn.s32 d0, q3, #11
|
||||||
|
vst1.64 {d0}, [r0:64], r1
|
||||||
|
vst1.64 {d0}, [r0:64], r1
|
||||||
|
vst1.64 {d0}, [r0:64], r1
|
||||||
|
vst1.64 {d0}, [r0:64], r1
|
||||||
|
bx lr
|
||||||
|
endfunc
|
||||||
|
Loading…
Reference in New Issue
Block a user