mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
ARM: set size of asm functions in object files
Originally committed as revision 22404 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
db76ca7f35
commit
a7e7d40c2e
@ -35,6 +35,11 @@ ELF .eabi_attribute 25, \val
|
||||
.endm
|
||||
|
||||
.macro function name, export=0
|
||||
.macro endfunc
|
||||
.size \name, . - \name
|
||||
.endfunc
|
||||
.purgem endfunc
|
||||
.endm
|
||||
.if \export
|
||||
.global EXTERN_ASM\name
|
||||
EXTERN_ASM\name:
|
||||
|
@ -36,7 +36,7 @@ function ff_prefetch_arm, export=1
|
||||
add r0, r0, r1
|
||||
bne ff_prefetch_arm
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
#endif
|
||||
|
||||
.macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
|
||||
@ -151,7 +151,7 @@ function ff_put_pixels16_arm, export=1
|
||||
add r0, r0, r2
|
||||
bne 4b
|
||||
pop {r4-r11,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
@ ----------------------------------------------------------------
|
||||
.align 5
|
||||
@ -203,7 +203,7 @@ function ff_put_pixels8_arm, export=1
|
||||
add r0, r0, r2
|
||||
bne 4b
|
||||
pop {r4-r5,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
@ ----------------------------------------------------------------
|
||||
.align 5
|
||||
@ -263,7 +263,7 @@ function ff_put_pixels8_x2_arm, export=1
|
||||
add r0, r0, r2
|
||||
bne 4b
|
||||
pop {r4-r10,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.align 5
|
||||
function ff_put_no_rnd_pixels8_x2_arm, export=1
|
||||
@ -322,7 +322,7 @@ function ff_put_no_rnd_pixels8_x2_arm, export=1
|
||||
add r0, r0, r2
|
||||
bne 4b
|
||||
pop {r4-r10,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
|
||||
@ ----------------------------------------------------------------
|
||||
@ -422,7 +422,7 @@ function ff_put_pixels8_y2_arm, export=1
|
||||
add r0, r0, r2
|
||||
bne 6b
|
||||
pop {r4-r11,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.align 5
|
||||
function ff_put_no_rnd_pixels8_y2_arm, export=1
|
||||
@ -520,7 +520,7 @@ function ff_put_no_rnd_pixels8_y2_arm, export=1
|
||||
add r0, r0, r2
|
||||
bne 6b
|
||||
pop {r4-r11,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.ltorg
|
||||
|
||||
@ -603,7 +603,7 @@ function ff_put_pixels8_xy2_arm, export=1
|
||||
3: RND_XY2_EXPAND 2, lsl
|
||||
.align 5
|
||||
4: RND_XY2_EXPAND 3, lsl
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.align 5
|
||||
function ff_put_no_rnd_pixels8_xy2_arm, export=1
|
||||
@ -619,7 +619,7 @@ function ff_put_no_rnd_pixels8_xy2_arm, export=1
|
||||
3: RND_XY2_EXPAND 2, lsr
|
||||
.align 5
|
||||
4: RND_XY2_EXPAND 3, lsr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.align 5
|
||||
@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
|
||||
@ -709,4 +709,4 @@ function ff_add_pixels_clamped_arm, export=1
|
||||
|
||||
pop {r4-r10}
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
@ -32,7 +32,7 @@ function ff_\type\()_pixels16\subp\()_armv6, export=1
|
||||
add r0, r0, #8
|
||||
add r1, r1, #8
|
||||
b ff_\type\()_pixels8\subp\()_armv6
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
call_2x_pixels avg
|
||||
@ -61,7 +61,7 @@ function ff_put_pixels16_armv6, export=1
|
||||
|
||||
pop {r4-r11}
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_put_pixels8_armv6, export=1
|
||||
push {r4-r7}
|
||||
@ -77,7 +77,7 @@ function ff_put_pixels8_armv6, export=1
|
||||
|
||||
pop {r4-r7}
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_put_pixels8_x2_armv6, export=1
|
||||
push {r4-r11, lr}
|
||||
@ -118,7 +118,7 @@ function ff_put_pixels8_x2_armv6, export=1
|
||||
bne 1b
|
||||
|
||||
pop {r4-r11, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_put_pixels8_y2_armv6, export=1
|
||||
push {r4-r11}
|
||||
@ -157,7 +157,7 @@ function ff_put_pixels8_y2_armv6, export=1
|
||||
|
||||
pop {r4-r11}
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_put_pixels8_x2_no_rnd_armv6, export=1
|
||||
push {r4-r9, lr}
|
||||
@ -185,7 +185,7 @@ function ff_put_pixels8_x2_no_rnd_armv6, export=1
|
||||
bne 1b
|
||||
|
||||
pop {r4-r9, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_put_pixels8_y2_no_rnd_armv6, export=1
|
||||
push {r4-r9, lr}
|
||||
@ -210,7 +210,7 @@ function ff_put_pixels8_y2_no_rnd_armv6, export=1
|
||||
bne 1b
|
||||
|
||||
pop {r4-r9, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_avg_pixels8_armv6, export=1
|
||||
pld [r1, r2]
|
||||
@ -260,7 +260,7 @@ function ff_avg_pixels8_armv6, export=1
|
||||
strd r6, r7, [r0], r2
|
||||
|
||||
pop {r4-r10, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_add_pixels_clamped_armv6, export=1
|
||||
push {r4-r8,lr}
|
||||
@ -287,7 +287,7 @@ function ff_add_pixels_clamped_armv6, export=1
|
||||
strd r6, r7, [r1], r2
|
||||
bgt 1b
|
||||
pop {r4-r8,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_get_pixels_armv6, export=1
|
||||
pld [r1, r2]
|
||||
@ -309,7 +309,7 @@ function ff_get_pixels_armv6, export=1
|
||||
bgt 1b
|
||||
|
||||
pop {r4-r8, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_diff_pixels_armv6, export=1
|
||||
pld [r1, r3]
|
||||
@ -342,7 +342,7 @@ function ff_diff_pixels_armv6, export=1
|
||||
bgt 1b
|
||||
|
||||
pop {r4-r9, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pix_abs16_armv6, export=1
|
||||
ldr r0, [sp]
|
||||
@ -371,7 +371,7 @@ function ff_pix_abs16_armv6, export=1
|
||||
2:
|
||||
add r0, r12, lr
|
||||
pop {r4-r9, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pix_abs16_x2_armv6, export=1
|
||||
ldr r12, [sp]
|
||||
@ -426,7 +426,7 @@ function ff_pix_abs16_x2_armv6, export=1
|
||||
bgt 1b
|
||||
|
||||
pop {r4-r11, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3
|
||||
ldr \n0, [r2]
|
||||
@ -484,7 +484,7 @@ function ff_pix_abs16_y2_armv6, export=1
|
||||
bgt 1b
|
||||
|
||||
pop {r4-r11, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pix_abs8_armv6, export=1
|
||||
pld [r2, r3]
|
||||
@ -514,7 +514,7 @@ function ff_pix_abs8_armv6, export=1
|
||||
usada8 lr, r9, r7, lr
|
||||
add r0, r0, lr
|
||||
pop {r4-r9, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_sse16_armv6, export=1
|
||||
ldr r12, [sp]
|
||||
@ -565,7 +565,7 @@ function ff_sse16_armv6, export=1
|
||||
bgt 1b
|
||||
|
||||
pop {r4-r9, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pix_norm1_armv6, export=1
|
||||
push {r4-r6, lr}
|
||||
@ -595,7 +595,7 @@ function ff_pix_norm1_armv6, export=1
|
||||
|
||||
mov r0, lr
|
||||
pop {r4-r6, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pix_sum_armv6, export=1
|
||||
push {r4-r7, lr}
|
||||
@ -620,4 +620,4 @@ function ff_pix_sum_armv6, export=1
|
||||
usada8 r3, r7, lr, r3
|
||||
add r0, r2, r3
|
||||
pop {r4-r7, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
@ -240,7 +240,7 @@
|
||||
.macro pixfunc pfx name suf rnd_op args:vararg
|
||||
function ff_\pfx\name\suf\()_neon, export=1
|
||||
\name \rnd_op \args
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
.macro pixfunc2 pfx name args:vararg
|
||||
@ -250,7 +250,7 @@ function ff_\pfx\name\suf\()_neon, export=1
|
||||
|
||||
function ff_put_h264_qpel16_mc00_neon, export=1
|
||||
mov r3, #16
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
pixfunc put_ pixels16
|
||||
pixfunc2 put_ pixels16_x2, _no_rnd, vhadd.u8
|
||||
@ -259,13 +259,13 @@ function ff_put_h264_qpel16_mc00_neon, export=1
|
||||
|
||||
function ff_avg_h264_qpel16_mc00_neon, export=1
|
||||
mov r3, #16
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
pixfunc avg_ pixels16,, 1
|
||||
|
||||
function ff_put_h264_qpel8_mc00_neon, export=1
|
||||
mov r3, #8
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
pixfunc put_ pixels8
|
||||
pixfunc2 put_ pixels8_x2, _no_rnd, vhadd.u8
|
||||
@ -274,7 +274,7 @@ function ff_put_h264_qpel8_mc00_neon, export=1
|
||||
|
||||
function ff_avg_h264_qpel8_mc00_neon, export=1
|
||||
mov r3, #8
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
pixfunc avg_ pixels8,, 1
|
||||
|
||||
@ -300,7 +300,7 @@ function ff_put_pixels_clamped_neon, export=1
|
||||
vst1.64 {d6}, [r1,:64], r2
|
||||
vst1.64 {d7}, [r1,:64], r2
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_put_signed_pixels_clamped_neon, export=1
|
||||
vmov.u8 d31, #128
|
||||
@ -337,7 +337,7 @@ function ff_put_signed_pixels_clamped_neon, export=1
|
||||
vst1.64 {d6}, [r1,:64], r2
|
||||
vst1.64 {d7}, [r1,:64], r2
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_add_pixels_clamped_neon, export=1
|
||||
mov r3, r1
|
||||
@ -382,7 +382,7 @@ function ff_add_pixels_clamped_neon, export=1
|
||||
vst1.64 {d4}, [r3,:64], r2
|
||||
vst1.64 {d6}, [r3,:64], r2
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_float_to_int16_neon, export=1
|
||||
subs r2, r2, #8
|
||||
@ -426,7 +426,7 @@ function ff_float_to_int16_neon, export=1
|
||||
vshrn.s32 d5, q9, #16
|
||||
vst1.64 {d4-d5}, [r0,:128]!
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_float_to_int16_interleave_neon, export=1
|
||||
cmp r3, #2
|
||||
@ -719,7 +719,7 @@ function ff_float_to_int16_interleave_neon, export=1
|
||||
vld1.64 {d2-d3}, [r4,:128]!
|
||||
vcvt.s32.f32 q1, q1, #16
|
||||
b 6b
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_neon, export=1
|
||||
mov r3, r0
|
||||
@ -759,7 +759,7 @@ function ff_vector_fmul_neon, export=1
|
||||
vmul.f32 q9, q1, q3
|
||||
3: vst1.64 {d16-d19},[r3,:128]!
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_window_neon, export=1
|
||||
VFP vdup.32 q8, d0[0]
|
||||
@ -811,7 +811,7 @@ NOVFP ldr lr, [sp, #16]
|
||||
vst1.64 {d20,d21},[r0,:128]!
|
||||
vst1.64 {d22,d23},[ip,:128], r5
|
||||
pop {r4,r5,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
#if CONFIG_VORBIS_DECODER
|
||||
function ff_vorbis_inverse_coupling_neon, export=1
|
||||
@ -872,7 +872,7 @@ function ff_vorbis_inverse_coupling_neon, export=1
|
||||
vst1.32 {d2-d3}, [r0,:128]!
|
||||
vst1.32 {d0-d1}, [r1,:128]!
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
#endif
|
||||
|
||||
function ff_vector_fmul_scalar_neon, export=1
|
||||
@ -910,7 +910,7 @@ NOVFP vdup.32 q8, r2
|
||||
bgt 3b
|
||||
bx lr
|
||||
.unreq len
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_sv_scalar_2_neon, export=1
|
||||
VFP vdup.32 d16, d0[0]
|
||||
@ -936,7 +936,7 @@ NOVFP ldr r3, [sp]
|
||||
2: vst1.32 {d4},[r0,:64]!
|
||||
vst1.32 {d5},[r0,:64]!
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_sv_scalar_4_neon, export=1
|
||||
VFP vdup.32 q10, d0[0]
|
||||
@ -975,7 +975,7 @@ NOVFP ldr r3, [sp]
|
||||
subs r3, r3, #4
|
||||
bgt 3b
|
||||
pop {pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_sv_fmul_scalar_2_neon, export=1
|
||||
VFP len .req r2
|
||||
@ -998,7 +998,7 @@ NOVFP vdup.32 q8, r2
|
||||
2: vst1.32 {q1},[r0,:128]!
|
||||
bx lr
|
||||
.unreq len
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_sv_fmul_scalar_4_neon, export=1
|
||||
VFP len .req r2
|
||||
@ -1013,7 +1013,7 @@ NOVFP vdup.32 q8, r2
|
||||
bgt 1b
|
||||
bx lr
|
||||
.unreq len
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_butterflies_float_neon, export=1
|
||||
1: vld1.32 {q0},[r0,:128]
|
||||
@ -1025,7 +1025,7 @@ function ff_butterflies_float_neon, export=1
|
||||
subs r2, r2, #4
|
||||
bgt 1b
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_scalarproduct_float_neon, export=1
|
||||
vmov.f32 q2, #0.0
|
||||
@ -1038,7 +1038,7 @@ function ff_scalarproduct_float_neon, export=1
|
||||
vpadd.f32 d0, d0, d0
|
||||
NOVFP vmov.32 r0, d0[0]
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_int32_to_float_fmul_scalar_neon, export=1
|
||||
VFP vdup.32 q0, d0[0]
|
||||
@ -1066,7 +1066,7 @@ NOVFP len .req r3
|
||||
vst1.32 {q10},[r0,:128]!
|
||||
bx lr
|
||||
.unreq len
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_reverse_neon, export=1
|
||||
add r2, r2, r3, lsl #2
|
||||
@ -1090,7 +1090,7 @@ function ff_vector_fmul_reverse_neon, export=1
|
||||
b 1b
|
||||
2: vst1.32 {q8-q9}, [r0,:128]!
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_vector_fmul_add_neon, export=1
|
||||
ldr r12, [sp]
|
||||
@ -1117,7 +1117,7 @@ function ff_vector_fmul_add_neon, export=1
|
||||
b 1b
|
||||
2: vst1.32 {q12-q13},[r0,:128]!
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_vector_clipf_neon, export=1
|
||||
VFP vdup.32 q1, d0[1]
|
||||
@ -1143,4 +1143,4 @@ NOVFP ldr r2, [sp]
|
||||
2: vst1.f32 {q8},[r0,:128]!
|
||||
vst1.f32 {q9},[r0,:128]!
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
@ -78,7 +78,7 @@ function ff_vector_fmul_vfp, export=1
|
||||
fmxr fpscr, r12
|
||||
vpop {d8-d15}
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
/**
|
||||
* ARM VFP optimized implementation of 'vector_fmul_reverse_c' function.
|
||||
@ -131,7 +131,7 @@ function ff_vector_fmul_reverse_vfp, export=1
|
||||
|
||||
vpop {d8-d15}
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
#if HAVE_ARMV6
|
||||
/**
|
||||
@ -185,5 +185,5 @@ function ff_float_to_int16_vfp, export=1
|
||||
|
||||
vpop {d8-d11}
|
||||
pop {r4-r8,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
#endif
|
||||
|
@ -43,7 +43,7 @@ function fft4_neon
|
||||
vst1.32 {d0-d3}, [r0,:128]
|
||||
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function fft8_neon
|
||||
mov r1, r0
|
||||
@ -96,7 +96,7 @@ function fft8_neon
|
||||
vst1.32 {d0-d3}, [r0,:128]
|
||||
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function fft16_neon
|
||||
movrel r1, mppm
|
||||
@ -198,7 +198,7 @@ function fft16_neon
|
||||
vst2.32 {d26-d27},[r0,:128], r1
|
||||
vst2.32 {d30-d31},[r0,:128]
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function fft_pass_neon
|
||||
push {r4-r6,lr}
|
||||
@ -274,7 +274,7 @@ function fft_pass_neon
|
||||
bne 1b
|
||||
|
||||
pop {r4-r6,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.macro def_fft n, n2, n4
|
||||
.align 6
|
||||
@ -291,7 +291,7 @@ function fft\n\()_neon
|
||||
movrel r1, X(ff_cos_\n)
|
||||
mov r2, #\n4/2
|
||||
b fft_pass_neon
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
def_fft 32, 16, 8
|
||||
@ -314,7 +314,7 @@ function ff_fft_calc_neon, export=1
|
||||
ldr r3, [r3, r2, lsl #2]
|
||||
mov r0, r1
|
||||
bx r3
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_fft_permute_neon, export=1
|
||||
push {r4,lr}
|
||||
@ -344,7 +344,7 @@ function ff_fft_permute_neon, export=1
|
||||
bgt 1b
|
||||
|
||||
pop {r4,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.section .rodata
|
||||
.align 4
|
||||
|
@ -183,7 +183,7 @@ function ff_\type\()_h264_chroma_mc8_neon, export=1
|
||||
bgt 5b
|
||||
|
||||
pop {r4-r7, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
/* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
|
||||
@ -317,7 +317,7 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
|
||||
bgt 5b
|
||||
|
||||
pop {r4-r7, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
.macro h264_chroma_mc2 type
|
||||
@ -385,7 +385,7 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1
|
||||
subs r3, r3, #2
|
||||
bgt 2b
|
||||
pop {r4-r6, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
.text
|
||||
@ -518,7 +518,7 @@ function ff_h264_v_loop_filter_luma_neon, export=1
|
||||
|
||||
align_pop_regs
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_h264_h_loop_filter_luma_neon, export=1
|
||||
h264_loop_filter_start
|
||||
@ -570,7 +570,7 @@ function ff_h264_h_loop_filter_luma_neon, export=1
|
||||
|
||||
align_pop_regs
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.macro h264_loop_filter_chroma
|
||||
vdup.8 d22, r2 @ alpha
|
||||
@ -621,7 +621,7 @@ function ff_h264_v_loop_filter_chroma_neon, export=1
|
||||
vst1.64 {d0}, [r0,:64], r1
|
||||
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_h264_h_loop_filter_chroma_neon, export=1
|
||||
h264_loop_filter_start
|
||||
@ -659,7 +659,7 @@ function ff_h264_h_loop_filter_chroma_neon, export=1
|
||||
vst1.32 {d2[1]}, [r0], r1
|
||||
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
/* H.264 qpel MC */
|
||||
|
||||
@ -774,7 +774,7 @@ function put_h264_qpel16_h_lowpass_neon_packed
|
||||
mov ip, #16
|
||||
mov lr, r4
|
||||
b put_h264_qpel8_h_lowpass_neon
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.macro h264_qpel_h_lowpass type
|
||||
function \type\()_h264_qpel16_h_lowpass_neon
|
||||
@ -787,7 +787,7 @@ function \type\()_h264_qpel16_h_lowpass_neon
|
||||
add r1, r1, #8
|
||||
mov ip, #16
|
||||
pop {lr}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_h_lowpass_neon
|
||||
1: vld1.64 {d0, d1}, [r1], r2
|
||||
@ -805,7 +805,7 @@ function \type\()_h264_qpel8_h_lowpass_neon
|
||||
vst1.64 {d16}, [r0,:64], r3
|
||||
bne 1b
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel_h_lowpass put
|
||||
@ -824,7 +824,7 @@ function \type\()_h264_qpel16_h_lowpass_l2_neon
|
||||
add r3, r3, #8
|
||||
mov ip, #16
|
||||
pop {lr}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_h_lowpass_l2_neon
|
||||
1: vld1.64 {d0, d1}, [r1], r2
|
||||
@ -845,7 +845,7 @@ function \type\()_h264_qpel8_h_lowpass_l2_neon
|
||||
vst1.64 {d1}, [r0,:64], r2
|
||||
bne 1b
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel_h_lowpass_l2 put
|
||||
@ -864,7 +864,7 @@ function put_h264_qpel16_v_lowpass_neon_packed
|
||||
sub r1, r1, r3, lsl #2
|
||||
mov lr, r4
|
||||
b put_h264_qpel8_v_lowpass_neon
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.macro h264_qpel_v_lowpass type
|
||||
function \type\()_h264_qpel16_v_lowpass_neon
|
||||
@ -880,7 +880,7 @@ function \type\()_h264_qpel16_v_lowpass_neon
|
||||
bl \type\()_h264_qpel8_v_lowpass_neon
|
||||
sub r1, r1, r3, lsl #2
|
||||
mov lr, r4
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_v_lowpass_neon
|
||||
vld1.64 {d8}, [r1], r3
|
||||
@ -934,7 +934,7 @@ function \type\()_h264_qpel8_v_lowpass_neon
|
||||
vst1.64 {d28}, [r0,:64], r2
|
||||
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel_v_lowpass put
|
||||
@ -956,7 +956,7 @@ function \type\()_h264_qpel16_v_lowpass_l2_neon
|
||||
bl \type\()_h264_qpel8_v_lowpass_l2_neon
|
||||
sub r1, r1, r3, lsl #2
|
||||
mov lr, r4
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_v_lowpass_l2_neon
|
||||
vld1.64 {d8}, [r1], r3
|
||||
@ -1023,7 +1023,7 @@ function \type\()_h264_qpel8_v_lowpass_l2_neon
|
||||
vst1.64 {d11}, [r0,:64], r3
|
||||
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel_v_lowpass_l2 put
|
||||
@ -1093,7 +1093,7 @@ function put_h264_qpel8_hv_lowpass_neon_top
|
||||
transpose_8x8 d12, d13, d14, d15, d8, d9, d10, d11
|
||||
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.macro h264_qpel8_hv_lowpass type
|
||||
function \type\()_h264_qpel8_hv_lowpass_neon
|
||||
@ -1129,7 +1129,7 @@ function \type\()_h264_qpel8_hv_lowpass_neon
|
||||
|
||||
mov lr, r10
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel8_hv_lowpass put
|
||||
@ -1178,7 +1178,7 @@ function \type\()_h264_qpel8_hv_lowpass_l2_neon
|
||||
|
||||
mov lr, r10
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel8_hv_lowpass_l2 put
|
||||
@ -1199,7 +1199,7 @@ function \type\()_h264_qpel16_hv_lowpass_neon
|
||||
sub r1, r1, r3, lsl #2
|
||||
mov lr, r9
|
||||
b \type\()_h264_qpel8_hv_lowpass_neon
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel16_hv_lowpass_l2_neon
|
||||
mov r9, lr
|
||||
@ -1216,7 +1216,7 @@ function \type\()_h264_qpel16_hv_lowpass_l2_neon
|
||||
sub r1, r1, r3, lsl #2
|
||||
mov lr, r9
|
||||
b \type\()_h264_qpel8_hv_lowpass_l2_neon
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel16_hv put
|
||||
@ -1229,7 +1229,7 @@ function ff_\type\()_h264_qpel8_mc10_neon, export=1
|
||||
sub r1, r1, #2
|
||||
mov ip, #8
|
||||
b \type\()_h264_qpel8_h_lowpass_l2_neon
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel8_mc20_neon, export=1
|
||||
lowpass_const r3
|
||||
@ -1237,7 +1237,7 @@ function ff_\type\()_h264_qpel8_mc20_neon, export=1
|
||||
mov r3, r2
|
||||
mov ip, #8
|
||||
b \type\()_h264_qpel8_h_lowpass_neon
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel8_mc30_neon, export=1
|
||||
lowpass_const r3
|
||||
@ -1245,7 +1245,7 @@ function ff_\type\()_h264_qpel8_mc30_neon, export=1
|
||||
sub r1, r1, #2
|
||||
mov ip, #8
|
||||
b \type\()_h264_qpel8_h_lowpass_l2_neon
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel8_mc01_neon, export=1
|
||||
push {lr}
|
||||
@ -1258,7 +1258,7 @@ function ff_\type\()_h264_qpel8_mc01_neon, export=1
|
||||
bl \type\()_h264_qpel8_v_lowpass_l2_neon
|
||||
vpop {d8-d15}
|
||||
pop {pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel8_mc11_neon, export=1
|
||||
push {r0, r1, r11, lr}
|
||||
@ -1282,7 +1282,7 @@ function ff_\type\()_h264_qpel8_mc11_neon, export=1
|
||||
vpop {d8-d15}
|
||||
add sp, r11, #8
|
||||
pop {r11, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel8_mc21_neon, export=1
|
||||
push {r0, r1, r4, r10, r11, lr}
|
||||
@ -1307,14 +1307,14 @@ function ff_\type\()_h264_qpel8_mc21_neon, export=1
|
||||
vpop {d8-d15}
|
||||
add sp, r11, #8
|
||||
pop {r4, r10, r11, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel8_mc31_neon, export=1
|
||||
add r1, r1, #1
|
||||
push {r0, r1, r11, lr}
|
||||
sub r1, r1, #1
|
||||
b \type\()_h264_qpel8_mc11
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel8_mc02_neon, export=1
|
||||
push {lr}
|
||||
@ -1325,7 +1325,7 @@ function ff_\type\()_h264_qpel8_mc02_neon, export=1
|
||||
bl \type\()_h264_qpel8_v_lowpass_neon
|
||||
vpop {d8-d15}
|
||||
pop {pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel8_mc12_neon, export=1
|
||||
push {r0, r1, r4, r10, r11, lr}
|
||||
@ -1349,7 +1349,7 @@ function ff_\type\()_h264_qpel8_mc12_neon, export=1
|
||||
vpop {d8-d15}
|
||||
add sp, r11, #8
|
||||
pop {r4, r10, r11, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel8_mc22_neon, export=1
|
||||
push {r4, r10, r11, lr}
|
||||
@ -1365,31 +1365,31 @@ function ff_\type\()_h264_qpel8_mc22_neon, export=1
|
||||
vpop {d8-d15}
|
||||
mov sp, r11
|
||||
pop {r4, r10, r11, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel8_mc32_neon, export=1
|
||||
push {r0, r1, r4, r10, r11, lr}
|
||||
add r1, r1, #1
|
||||
b \type\()_h264_qpel8_mc12
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel8_mc03_neon, export=1
|
||||
push {lr}
|
||||
add ip, r1, r2
|
||||
b \type\()_h264_qpel8_mc01
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel8_mc13_neon, export=1
|
||||
push {r0, r1, r11, lr}
|
||||
add r1, r1, r2
|
||||
b \type\()_h264_qpel8_mc11
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel8_mc23_neon, export=1
|
||||
push {r0, r1, r4, r10, r11, lr}
|
||||
add r1, r1, r2
|
||||
b \type\()_h264_qpel8_mc21
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel8_mc33_neon, export=1
|
||||
add r1, r1, #1
|
||||
@ -1397,7 +1397,7 @@ function ff_\type\()_h264_qpel8_mc33_neon, export=1
|
||||
add r1, r1, r2
|
||||
sub r1, r1, #1
|
||||
b \type\()_h264_qpel8_mc11
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel8 put
|
||||
@ -1409,21 +1409,21 @@ function ff_\type\()_h264_qpel16_mc10_neon, export=1
|
||||
mov r3, r1
|
||||
sub r1, r1, #2
|
||||
b \type\()_h264_qpel16_h_lowpass_l2_neon
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel16_mc20_neon, export=1
|
||||
lowpass_const r3
|
||||
sub r1, r1, #2
|
||||
mov r3, r2
|
||||
b \type\()_h264_qpel16_h_lowpass_neon
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel16_mc30_neon, export=1
|
||||
lowpass_const r3
|
||||
add r3, r1, #1
|
||||
sub r1, r1, #2
|
||||
b \type\()_h264_qpel16_h_lowpass_l2_neon
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel16_mc01_neon, export=1
|
||||
push {r4, lr}
|
||||
@ -1436,7 +1436,7 @@ function ff_\type\()_h264_qpel16_mc01_neon, export=1
|
||||
bl \type\()_h264_qpel16_v_lowpass_l2_neon
|
||||
vpop {d8-d15}
|
||||
pop {r4, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel16_mc11_neon, export=1
|
||||
push {r0, r1, r4, r11, lr}
|
||||
@ -1459,7 +1459,7 @@ function ff_\type\()_h264_qpel16_mc11_neon, export=1
|
||||
vpop {d8-d15}
|
||||
add sp, r11, #8
|
||||
pop {r4, r11, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel16_mc21_neon, export=1
|
||||
push {r0, r1, r4-r5, r9-r11, lr}
|
||||
@ -1481,14 +1481,14 @@ function ff_\type\()_h264_qpel16_mc21_neon, export=1
|
||||
vpop {d8-d15}
|
||||
add sp, r11, #8
|
||||
pop {r4-r5, r9-r11, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel16_mc31_neon, export=1
|
||||
add r1, r1, #1
|
||||
push {r0, r1, r4, r11, lr}
|
||||
sub r1, r1, #1
|
||||
b \type\()_h264_qpel16_mc11
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel16_mc02_neon, export=1
|
||||
push {r4, lr}
|
||||
@ -1499,7 +1499,7 @@ function ff_\type\()_h264_qpel16_mc02_neon, export=1
|
||||
bl \type\()_h264_qpel16_v_lowpass_neon
|
||||
vpop {d8-d15}
|
||||
pop {r4, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel16_mc12_neon, export=1
|
||||
push {r0, r1, r4-r5, r9-r11, lr}
|
||||
@ -1522,7 +1522,7 @@ function ff_\type\()_h264_qpel16_mc12_neon, export=1
|
||||
vpop {d8-d15}
|
||||
add sp, r11, #8
|
||||
pop {r4-r5, r9-r11, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel16_mc22_neon, export=1
|
||||
push {r4, r9-r11, lr}
|
||||
@ -1539,31 +1539,31 @@ function ff_\type\()_h264_qpel16_mc22_neon, export=1
|
||||
vpop {d8-d15}
|
||||
mov sp, r11
|
||||
pop {r4, r9-r11, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel16_mc32_neon, export=1
|
||||
push {r0, r1, r4-r5, r9-r11, lr}
|
||||
add r1, r1, #1
|
||||
b \type\()_h264_qpel16_mc12
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel16_mc03_neon, export=1
|
||||
push {r4, lr}
|
||||
add ip, r1, r2
|
||||
b \type\()_h264_qpel16_mc01
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel16_mc13_neon, export=1
|
||||
push {r0, r1, r4, r11, lr}
|
||||
add r1, r1, r2
|
||||
b \type\()_h264_qpel16_mc11
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel16_mc23_neon, export=1
|
||||
push {r0, r1, r4-r5, r9-r11, lr}
|
||||
add r1, r1, r2
|
||||
b \type\()_h264_qpel16_mc21
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_\type\()_h264_qpel16_mc33_neon, export=1
|
||||
add r1, r1, #1
|
||||
@ -1571,7 +1571,7 @@ function ff_\type\()_h264_qpel16_mc33_neon, export=1
|
||||
add r1, r1, r2
|
||||
sub r1, r1, #1
|
||||
b \type\()_h264_qpel16_mc11
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel16 put
|
||||
@ -1719,7 +1719,7 @@ function biweight_h264_pixels_\w\()_neon
|
||||
biweight_\w vmlsl.u8, vmlsl.u8
|
||||
40: rsb r5, r5, #0
|
||||
biweight_\w vmlsl.u8, vmlal.u8
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
.macro biweight_entry w, h, b=1
|
||||
@ -1728,7 +1728,7 @@ function ff_biweight_h264_pixels_\w\()x\h\()_neon, export=1
|
||||
.if \b
|
||||
b biweight_h264_pixels_\w\()_neon
|
||||
.endif
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
biweight_entry 16, 8
|
||||
@ -1856,7 +1856,7 @@ function weight_h264_pixels_\w\()_neon
|
||||
weight_\w vadd.s16
|
||||
10: rsb r3, r3, #0
|
||||
weight_\w vsub.s16
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
.macro weight_entry w, h, b=1
|
||||
@ -1865,7 +1865,7 @@ function ff_weight_h264_pixels_\w\()x\h\()_neon, export=1
|
||||
.if \b
|
||||
b weight_h264_pixels_\w\()_neon
|
||||
.endif
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
weight_entry 16, 8
|
||||
|
@ -69,7 +69,7 @@ function ff_h264_idct_add_neon, export=1
|
||||
vst1.32 {d1[0]}, [r0,:32], r2
|
||||
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_h264_idct_dc_add_neon, export=1
|
||||
vld1.16 {d2[],d3[]}, [r1,:16]
|
||||
@ -88,7 +88,7 @@ function ff_h264_idct_dc_add_neon, export=1
|
||||
vst1.32 {d1[0]}, [r0,:32], r2
|
||||
vst1.32 {d1[1]}, [r0,:32], r2
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_h264_idct_add16_neon, export=1
|
||||
push {r4-r8,lr}
|
||||
@ -115,7 +115,7 @@ function ff_h264_idct_add16_neon, export=1
|
||||
add r1, r1, #32
|
||||
bne 1b
|
||||
pop {r4-r8,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_h264_idct_add16intra_neon, export=1
|
||||
push {r4-r8,lr}
|
||||
@ -140,7 +140,7 @@ function ff_h264_idct_add16intra_neon, export=1
|
||||
add r1, r1, #32
|
||||
bne 1b
|
||||
pop {r4-r8,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_h264_idct_add8_neon, export=1
|
||||
push {r4-r10,lr}
|
||||
@ -167,7 +167,7 @@ function ff_h264_idct_add8_neon, export=1
|
||||
add r1, r1, #32
|
||||
bne 1b
|
||||
pop {r4-r10,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.section .rodata
|
||||
scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8
|
||||
|
@ -45,7 +45,7 @@
|
||||
function ff_pred16x16_128_dc_neon, export=1
|
||||
vmov.i8 q0, #128
|
||||
b .L_pred16x16_dc_end
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred16x16_top_dc_neon, export=1
|
||||
sub r2, r0, r1
|
||||
@ -54,7 +54,7 @@ function ff_pred16x16_top_dc_neon, export=1
|
||||
vrshrn.u16 d0, q0, #4
|
||||
vdup.8 q0, d0[0]
|
||||
b .L_pred16x16_dc_end
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred16x16_left_dc_neon, export=1
|
||||
sub r2, r0, #1
|
||||
@ -64,7 +64,7 @@ function ff_pred16x16_left_dc_neon, export=1
|
||||
vrshrn.u16 d0, q0, #4
|
||||
vdup.8 q0, d0[0]
|
||||
b .L_pred16x16_dc_end
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred16x16_dc_neon, export=1
|
||||
sub r2, r0, r1
|
||||
@ -87,7 +87,7 @@ function ff_pred16x16_dc_neon, export=1
|
||||
subs r3, r3, #1
|
||||
bne 6b
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred16x16_hor_neon, export=1
|
||||
sub r2, r0, #1
|
||||
@ -97,7 +97,7 @@ function ff_pred16x16_hor_neon, export=1
|
||||
subs r3, r3, #1
|
||||
bne 1b
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred16x16_vert_neon, export=1
|
||||
sub r0, r0, r1
|
||||
@ -108,7 +108,7 @@ function ff_pred16x16_vert_neon, export=1
|
||||
subs r3, r3, #1
|
||||
bne 1b
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred16x16_plane_neon, export=1
|
||||
sub r3, r0, r1
|
||||
@ -164,7 +164,7 @@ function ff_pred16x16_plane_neon, export=1
|
||||
subs r3, r3, #1
|
||||
bne 1b
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.section .rodata
|
||||
.align 4
|
||||
@ -181,7 +181,7 @@ function ff_pred8x8_hor_neon, export=1
|
||||
subs r3, r3, #1
|
||||
bne 1b
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred8x8_vert_neon, export=1
|
||||
sub r0, r0, r1
|
||||
@ -192,7 +192,7 @@ function ff_pred8x8_vert_neon, export=1
|
||||
subs r3, r3, #1
|
||||
bne 1b
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred8x8_plane_neon, export=1
|
||||
sub r3, r0, r1
|
||||
@ -244,12 +244,12 @@ function ff_pred8x8_plane_neon, export=1
|
||||
subs r3, r3, #1
|
||||
bne 1b
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred8x8_128_dc_neon, export=1
|
||||
vmov.i8 q0, #128
|
||||
b .L_pred8x8_dc_end
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred8x8_top_dc_neon, export=1
|
||||
sub r2, r0, r1
|
||||
@ -261,7 +261,7 @@ function ff_pred8x8_top_dc_neon, export=1
|
||||
vdup.8 d0, d0[0]
|
||||
vtrn.32 d0, d1
|
||||
b .L_pred8x8_dc_end
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred8x8_left_dc_neon, export=1
|
||||
sub r2, r0, #1
|
||||
@ -272,7 +272,7 @@ function ff_pred8x8_left_dc_neon, export=1
|
||||
vdup.8 d1, d0[1]
|
||||
vdup.8 d0, d0[0]
|
||||
b .L_pred8x8_dc_end
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred8x8_dc_neon, export=1
|
||||
sub r2, r0, r1
|
||||
@ -298,7 +298,7 @@ function ff_pred8x8_dc_neon, export=1
|
||||
subs r3, r3, #1
|
||||
bne 6b
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred8x8_l0t_dc_neon, export=1
|
||||
sub r2, r0, r1
|
||||
@ -316,7 +316,7 @@ function ff_pred8x8_l0t_dc_neon, export=1
|
||||
vdup.8 q2, d3[2]
|
||||
vtrn.32 q0, q2
|
||||
b .L_pred8x8_dc_end
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred8x8_l00_dc_neon, export=1
|
||||
sub r2, r0, #1
|
||||
@ -327,7 +327,7 @@ function ff_pred8x8_l00_dc_neon, export=1
|
||||
vmov.i8 d1, #128
|
||||
vdup.8 d0, d0[0]
|
||||
b .L_pred8x8_dc_end
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred8x8_0lt_dc_neon, export=1
|
||||
sub r2, r0, r1
|
||||
@ -347,7 +347,7 @@ function ff_pred8x8_0lt_dc_neon, export=1
|
||||
vdup.8 d5, d2[5]
|
||||
vtrn.32 q0, q2
|
||||
b .L_pred8x8_dc_end
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_pred8x8_0l0_dc_neon, export=1
|
||||
add r2, r0, r1, lsl #2
|
||||
@ -359,4 +359,4 @@ function ff_pred8x8_0l0_dc_neon, export=1
|
||||
vmov.i8 d0, #128
|
||||
vdup.8 d1, d1[0]
|
||||
b .L_pred8x8_dc_end
|
||||
.endfunc
|
||||
endfunc
|
||||
|
@ -75,7 +75,7 @@ function ff_scalarproduct_int16_neon, export=1
|
||||
vpaddl.s32 d3, d2
|
||||
vmov.32 r0, d3[0]
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
@ scalarproduct_and_madd_int16(/*aligned*/v0,v1,v2,order,mul)
|
||||
function ff_scalarproduct_and_madd_int16_neon, export=1
|
||||
@ -115,4 +115,4 @@ function ff_scalarproduct_and_madd_int16_neon, export=1
|
||||
vpaddl.s32 d3, d2
|
||||
vmov.32 r0, d3[0]
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
@ -122,7 +122,7 @@ function ff_imdct_half_neon, export=1
|
||||
vst2.32 {d5,d7}, [r8,:128]
|
||||
|
||||
pop {r4-r8,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_imdct_calc_neon, export=1
|
||||
push {r4-r6,lr}
|
||||
@ -158,7 +158,7 @@ function ff_imdct_calc_neon, export=1
|
||||
bgt 1b
|
||||
|
||||
pop {r4-r6,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_mdct_calc_neon, export=1
|
||||
push {r4-r10,lr}
|
||||
@ -300,4 +300,4 @@ function ff_mdct_calc_neon, export=1
|
||||
vst2.32 {d5,d7}, [r8,:128]
|
||||
|
||||
pop {r4-r10,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
@ -114,4 +114,4 @@ function ff_dct_unquantize_h263_armv5te, export=1
|
||||
strh r9, [r0], #2
|
||||
strh lr, [r0], #2
|
||||
pop {r4-r9,pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
@ -147,7 +147,7 @@ row_dc_only:
|
||||
strd a3, [a1, #8]
|
||||
|
||||
ldr pc, [sp], #4
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.macro idct_col
|
||||
ldr a4, [a1] /* a4 = col[1:0] */
|
||||
@ -331,7 +331,7 @@ function idct_col_armv5te
|
||||
str a2, [a1, #(16*4)]
|
||||
|
||||
ldr pc, [sp], #4
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function idct_col_put_armv5te
|
||||
str lr, [sp, #-4]!
|
||||
@ -448,7 +448,7 @@ function idct_col_put_armv5te
|
||||
strh a2, [v2, -lr]
|
||||
|
||||
ldr pc, [sp], #4
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function idct_col_add_armv5te
|
||||
str lr, [sp, #-4]!
|
||||
@ -598,7 +598,7 @@ function idct_col_add_armv5te
|
||||
strh a2, [v2]
|
||||
|
||||
ldr pc, [sp], #4
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_simple_idct_armv5te, export=1
|
||||
stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
|
||||
@ -630,7 +630,7 @@ function ff_simple_idct_armv5te, export=1
|
||||
bl idct_col_armv5te
|
||||
|
||||
ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_simple_idct_add_armv5te, export=1
|
||||
stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
|
||||
@ -665,7 +665,7 @@ function ff_simple_idct_add_armv5te, export=1
|
||||
|
||||
add sp, sp, #8
|
||||
ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_simple_idct_put_armv5te, export=1
|
||||
stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
|
||||
@ -700,4 +700,4 @@ function ff_simple_idct_put_armv5te, export=1
|
||||
|
||||
add sp, sp, #8
|
||||
ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
@ -237,7 +237,7 @@ function idct_row_armv6
|
||||
strh r2, [r1, #(16*5)]
|
||||
strh r2, [r1, #(16*7)]
|
||||
pop {pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
/*
|
||||
Compute IDCT of single column, read as row.
|
||||
@ -264,7 +264,7 @@ function idct_col_armv6
|
||||
strh r8, [r1, #(16*7)]
|
||||
|
||||
pop {pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
/*
|
||||
Compute IDCT of single column, read as row, store saturated 8-bit.
|
||||
@ -294,7 +294,7 @@ function idct_col_put_armv6
|
||||
sub r1, r1, r2, lsl #3
|
||||
|
||||
pop {pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
/*
|
||||
Compute IDCT of single column, read as row, add/store saturated 8-bit.
|
||||
@ -349,7 +349,7 @@ function idct_col_add_armv6
|
||||
sub r1, r1, r2, lsl #3
|
||||
|
||||
pop {pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
/*
|
||||
Compute 8 IDCT row transforms.
|
||||
@ -396,7 +396,7 @@ function ff_simple_idct_armv6, export=1
|
||||
|
||||
add sp, sp, #128
|
||||
pop {r4-r11, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
/* ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
|
||||
function ff_simple_idct_add_armv6, export=1
|
||||
@ -413,7 +413,7 @@ function ff_simple_idct_add_armv6, export=1
|
||||
|
||||
add sp, sp, #(128+8)
|
||||
pop {r4-r11, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
/* ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
|
||||
function ff_simple_idct_put_armv6, export=1
|
||||
@ -430,4 +430,4 @@ function ff_simple_idct_put_armv6, export=1
|
||||
|
||||
add sp, sp, #(128+8)
|
||||
pop {r4-r11, pc}
|
||||
.endfunc
|
||||
endfunc
|
||||
|
@ -77,7 +77,7 @@ function idct_row4_pld_neon
|
||||
add r3, r3, r1, lsl #1
|
||||
pld [r3]
|
||||
pld [r3, r1]
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function idct_row4_neon
|
||||
vmov.i32 q15, #(1<<(ROW_SHIFT-1))
|
||||
@ -147,7 +147,7 @@ function idct_row4_neon
|
||||
vst1.64 {d6-d9}, [r2,:128]!
|
||||
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function idct_col4_neon
|
||||
mov ip, #16
|
||||
@ -218,7 +218,7 @@ function idct_col4_neon
|
||||
vsubhn.i32 d6, q14, q6
|
||||
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.align 6
|
||||
|
||||
@ -237,7 +237,7 @@ function idct_col4_st8_neon
|
||||
vst1.32 {d5[1]}, [r0,:32], r1
|
||||
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.section .rodata
|
||||
.align 4
|
||||
@ -275,7 +275,7 @@ function ff_simple_idct_put_neon, export=1
|
||||
bl idct_col4_st8_neon
|
||||
|
||||
idct_end
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.align 6
|
||||
|
||||
@ -312,7 +312,7 @@ function idct_col4_add8_neon
|
||||
vst1.32 {d5[1]}, [ip,:32], r1
|
||||
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
/* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, DCTELEM *data); */
|
||||
function ff_simple_idct_add_neon, export=1
|
||||
@ -330,7 +330,7 @@ function ff_simple_idct_add_neon, export=1
|
||||
bl idct_col4_add8_neon
|
||||
|
||||
idct_end
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.align 6
|
||||
|
||||
@ -351,7 +351,7 @@ function idct_col4_st16_neon
|
||||
vst1.64 {d9}, [r2,:64], ip
|
||||
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
/* void ff_simple_idct_neon(DCTELEM *data); */
|
||||
function ff_simple_idct_neon, export=1
|
||||
@ -370,4 +370,4 @@ function ff_simple_idct_neon, export=1
|
||||
bl idct_col4_st16_neon
|
||||
|
||||
idct_end
|
||||
.endfunc
|
||||
endfunc
|
||||
|
@ -74,7 +74,7 @@ function ff_vp3_v_loop_filter_neon, export=1
|
||||
vst1.64 {d0}, [ip,:64], r1
|
||||
vst1.64 {d1}, [ip,:64], r1
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_vp3_h_loop_filter_neon, export=1
|
||||
sub ip, r0, #1
|
||||
@ -107,7 +107,7 @@ function ff_vp3_h_loop_filter_neon, export=1
|
||||
vst1.16 {d0[3]}, [ip], r1
|
||||
vst1.16 {d1[3]}, [ip], r1
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
|
||||
function vp3_idct_start_neon
|
||||
@ -120,7 +120,7 @@ function vp3_idct_start_neon
|
||||
vadd.s16 q1, q8, q12
|
||||
vsub.s16 q8, q8, q12
|
||||
vld1.64 {d28-d31}, [r2,:128]!
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function vp3_idct_core_neon
|
||||
vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16
|
||||
@ -211,7 +211,7 @@ function vp3_idct_core_neon
|
||||
vadd.s16 q10, q1, q2 // Ad = (A - C) * C4
|
||||
vsub.s16 q14, q4, q3 // H = ip[2] * C6 - ip[6] * C2
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
.macro VP3_IDCT_END type
|
||||
function vp3_idct_end_\type\()_neon
|
||||
@ -259,7 +259,7 @@ function vp3_idct_end_\type\()_neon
|
||||
vswp d23, d30
|
||||
.endif
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
VP3_IDCT_END row
|
||||
@ -289,7 +289,7 @@ function ff_vp3_idct_neon, export=1
|
||||
vst1.64 {d24-d27}, [r0,:128]!
|
||||
vst1.64 {d28-d31}, [r0,:128]!
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_vp3_idct_put_neon, export=1
|
||||
mov ip, lr
|
||||
@ -319,7 +319,7 @@ function ff_vp3_idct_put_neon, export=1
|
||||
vst1.64 {d6}, [r0,:64], r1
|
||||
vst1.64 {d7}, [r0,:64], r1
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
||||
function ff_vp3_idct_add_neon, export=1
|
||||
mov ip, lr
|
||||
@ -373,4 +373,4 @@ function ff_vp3_idct_add_neon, export=1
|
||||
vst1.64 {d6}, [r2,:64], r1
|
||||
vst1.64 {d7}, [r2,:64], r1
|
||||
bx lr
|
||||
.endfunc
|
||||
endfunc
|
||||
|
Loading…
Reference in New Issue
Block a user