mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
arm: hpeldsp: Move half-pel assembly from dsputil to hpeldsp
Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
parent
47e5a98174
commit
7384b7a713
@ -31,6 +31,11 @@ OBJS-$(CONFIG_H264DSP) += arm/h264dsp_init_arm.o
|
|||||||
OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o
|
OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o
|
||||||
OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o
|
OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o
|
||||||
|
|
||||||
|
OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_arm.o \
|
||||||
|
arm/hpeldsp_arm.o
|
||||||
|
ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \
|
||||||
|
arm/hpeldsp_armv6.o
|
||||||
|
|
||||||
OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_arm.o
|
OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_arm.o
|
||||||
OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_arm.o \
|
OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_arm.o \
|
||||||
arm/rv40dsp_init_arm.o \
|
arm/rv40dsp_init_arm.o \
|
||||||
@ -84,6 +89,9 @@ NEON-OBJS-$(CONFIG_AAC_DECODER) += arm/sbrdsp_neon.o \
|
|||||||
NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_neon.o \
|
NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_neon.o \
|
||||||
arm/synth_filter_neon.o \
|
arm/synth_filter_neon.o \
|
||||||
|
|
||||||
|
NEON-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_neon.o \
|
||||||
|
arm/hpeldsp_neon.o
|
||||||
|
|
||||||
NEON-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_neon.o
|
NEON-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_neon.o
|
||||||
NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o
|
NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o
|
||||||
NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \
|
NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \
|
||||||
|
@ -26,590 +26,6 @@
|
|||||||
#define pld @
|
#define pld @
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
|
|
||||||
mov \Rd0, \Rn0, lsr #(\shift * 8)
|
|
||||||
mov \Rd1, \Rn1, lsr #(\shift * 8)
|
|
||||||
mov \Rd2, \Rn2, lsr #(\shift * 8)
|
|
||||||
mov \Rd3, \Rn3, lsr #(\shift * 8)
|
|
||||||
orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
|
|
||||||
orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
|
|
||||||
orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
|
|
||||||
orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
|
|
||||||
.endm
|
|
||||||
.macro ALIGN_DWORD shift, R0, R1, R2
|
|
||||||
mov \R0, \R0, lsr #(\shift * 8)
|
|
||||||
orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
|
|
||||||
mov \R1, \R1, lsr #(\shift * 8)
|
|
||||||
orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
|
|
||||||
.endm
|
|
||||||
.macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
|
|
||||||
mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
|
|
||||||
mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
|
|
||||||
orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
|
|
||||||
orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
|
|
||||||
@ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
|
|
||||||
@ Rmask = 0xFEFEFEFE
|
|
||||||
@ Rn = destroy
|
|
||||||
eor \Rd0, \Rn0, \Rm0
|
|
||||||
eor \Rd1, \Rn1, \Rm1
|
|
||||||
orr \Rn0, \Rn0, \Rm0
|
|
||||||
orr \Rn1, \Rn1, \Rm1
|
|
||||||
and \Rd0, \Rd0, \Rmask
|
|
||||||
and \Rd1, \Rd1, \Rmask
|
|
||||||
sub \Rd0, \Rn0, \Rd0, lsr #1
|
|
||||||
sub \Rd1, \Rn1, \Rd1, lsr #1
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
|
|
||||||
@ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
|
|
||||||
@ Rmask = 0xFEFEFEFE
|
|
||||||
@ Rn = destroy
|
|
||||||
eor \Rd0, \Rn0, \Rm0
|
|
||||||
eor \Rd1, \Rn1, \Rm1
|
|
||||||
and \Rn0, \Rn0, \Rm0
|
|
||||||
and \Rn1, \Rn1, \Rm1
|
|
||||||
and \Rd0, \Rd0, \Rmask
|
|
||||||
and \Rd1, \Rd1, \Rmask
|
|
||||||
add \Rd0, \Rn0, \Rd0, lsr #1
|
|
||||||
add \Rd1, \Rn1, \Rd1, lsr #1
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro JMP_ALIGN tmp, reg
|
|
||||||
ands \tmp, \reg, #3
|
|
||||||
bic \reg, \reg, #3
|
|
||||||
beq 1f
|
|
||||||
subs \tmp, \tmp, #1
|
|
||||||
beq 2f
|
|
||||||
subs \tmp, \tmp, #1
|
|
||||||
beq 3f
|
|
||||||
b 4f
|
|
||||||
.endm
|
|
||||||
|
|
||||||
@ ----------------------------------------------------------------
|
|
||||||
.align 5
|
|
||||||
function ff_put_pixels16_arm, export=1
|
|
||||||
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
||||||
@ block = word aligned, pixles = unaligned
|
|
||||||
pld [r1]
|
|
||||||
push {r4-r11, lr}
|
|
||||||
JMP_ALIGN r5, r1
|
|
||||||
1:
|
|
||||||
ldm r1, {r4-r7}
|
|
||||||
add r1, r1, r2
|
|
||||||
stm r0, {r4-r7}
|
|
||||||
pld [r1]
|
|
||||||
subs r3, r3, #1
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 1b
|
|
||||||
pop {r4-r11, pc}
|
|
||||||
.align 5
|
|
||||||
2:
|
|
||||||
ldm r1, {r4-r8}
|
|
||||||
add r1, r1, r2
|
|
||||||
ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
|
|
||||||
pld [r1]
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r9-r12}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 2b
|
|
||||||
pop {r4-r11, pc}
|
|
||||||
.align 5
|
|
||||||
3:
|
|
||||||
ldm r1, {r4-r8}
|
|
||||||
add r1, r1, r2
|
|
||||||
ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
|
|
||||||
pld [r1]
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r9-r12}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 3b
|
|
||||||
pop {r4-r11, pc}
|
|
||||||
.align 5
|
|
||||||
4:
|
|
||||||
ldm r1, {r4-r8}
|
|
||||||
add r1, r1, r2
|
|
||||||
ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
|
|
||||||
pld [r1]
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r9-r12}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 4b
|
|
||||||
pop {r4-r11,pc}
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
@ ----------------------------------------------------------------
|
|
||||||
.align 5
|
|
||||||
function ff_put_pixels8_arm, export=1
|
|
||||||
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
||||||
@ block = word aligned, pixles = unaligned
|
|
||||||
pld [r1]
|
|
||||||
push {r4-r5,lr}
|
|
||||||
JMP_ALIGN r5, r1
|
|
||||||
1:
|
|
||||||
ldm r1, {r4-r5}
|
|
||||||
add r1, r1, r2
|
|
||||||
subs r3, r3, #1
|
|
||||||
pld [r1]
|
|
||||||
stm r0, {r4-r5}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 1b
|
|
||||||
pop {r4-r5,pc}
|
|
||||||
.align 5
|
|
||||||
2:
|
|
||||||
ldm r1, {r4-r5, r12}
|
|
||||||
add r1, r1, r2
|
|
||||||
ALIGN_DWORD 1, r4, r5, r12
|
|
||||||
pld [r1]
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r4-r5}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 2b
|
|
||||||
pop {r4-r5,pc}
|
|
||||||
.align 5
|
|
||||||
3:
|
|
||||||
ldm r1, {r4-r5, r12}
|
|
||||||
add r1, r1, r2
|
|
||||||
ALIGN_DWORD 2, r4, r5, r12
|
|
||||||
pld [r1]
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r4-r5}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 3b
|
|
||||||
pop {r4-r5,pc}
|
|
||||||
.align 5
|
|
||||||
4:
|
|
||||||
ldm r1, {r4-r5, r12}
|
|
||||||
add r1, r1, r2
|
|
||||||
ALIGN_DWORD 3, r4, r5, r12
|
|
||||||
pld [r1]
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r4-r5}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 4b
|
|
||||||
pop {r4-r5,pc}
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
@ ----------------------------------------------------------------
|
|
||||||
.align 5
|
|
||||||
function ff_put_pixels8_x2_arm, export=1
|
|
||||||
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
||||||
@ block = word aligned, pixles = unaligned
|
|
||||||
pld [r1]
|
|
||||||
push {r4-r10,lr}
|
|
||||||
ldr r12, =0xfefefefe
|
|
||||||
JMP_ALIGN r5, r1
|
|
||||||
1:
|
|
||||||
ldm r1, {r4-r5, r10}
|
|
||||||
add r1, r1, r2
|
|
||||||
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
|
|
||||||
pld [r1]
|
|
||||||
RND_AVG32 r8, r9, r4, r5, r6, r7, r12
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r8-r9}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 1b
|
|
||||||
pop {r4-r10,pc}
|
|
||||||
.align 5
|
|
||||||
2:
|
|
||||||
ldm r1, {r4-r5, r10}
|
|
||||||
add r1, r1, r2
|
|
||||||
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
|
|
||||||
ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
|
|
||||||
pld [r1]
|
|
||||||
RND_AVG32 r4, r5, r6, r7, r8, r9, r12
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r4-r5}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 2b
|
|
||||||
pop {r4-r10,pc}
|
|
||||||
.align 5
|
|
||||||
3:
|
|
||||||
ldm r1, {r4-r5, r10}
|
|
||||||
add r1, r1, r2
|
|
||||||
ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
|
|
||||||
ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
|
|
||||||
pld [r1]
|
|
||||||
RND_AVG32 r4, r5, r6, r7, r8, r9, r12
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r4-r5}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 3b
|
|
||||||
pop {r4-r10,pc}
|
|
||||||
.align 5
|
|
||||||
4:
|
|
||||||
ldm r1, {r4-r5, r10}
|
|
||||||
add r1, r1, r2
|
|
||||||
ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
|
|
||||||
pld [r1]
|
|
||||||
RND_AVG32 r8, r9, r6, r7, r5, r10, r12
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r8-r9}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 4b
|
|
||||||
pop {r4-r10,pc}
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
.align 5
|
|
||||||
function ff_put_no_rnd_pixels8_x2_arm, export=1
|
|
||||||
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
||||||
@ block = word aligned, pixles = unaligned
|
|
||||||
pld [r1]
|
|
||||||
push {r4-r10,lr}
|
|
||||||
ldr r12, =0xfefefefe
|
|
||||||
JMP_ALIGN r5, r1
|
|
||||||
1:
|
|
||||||
ldm r1, {r4-r5, r10}
|
|
||||||
add r1, r1, r2
|
|
||||||
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
|
|
||||||
pld [r1]
|
|
||||||
NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r8-r9}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 1b
|
|
||||||
pop {r4-r10,pc}
|
|
||||||
.align 5
|
|
||||||
2:
|
|
||||||
ldm r1, {r4-r5, r10}
|
|
||||||
add r1, r1, r2
|
|
||||||
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
|
|
||||||
ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
|
|
||||||
pld [r1]
|
|
||||||
NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r4-r5}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 2b
|
|
||||||
pop {r4-r10,pc}
|
|
||||||
.align 5
|
|
||||||
3:
|
|
||||||
ldm r1, {r4-r5, r10}
|
|
||||||
add r1, r1, r2
|
|
||||||
ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
|
|
||||||
ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
|
|
||||||
pld [r1]
|
|
||||||
NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r4-r5}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 3b
|
|
||||||
pop {r4-r10,pc}
|
|
||||||
.align 5
|
|
||||||
4:
|
|
||||||
ldm r1, {r4-r5, r10}
|
|
||||||
add r1, r1, r2
|
|
||||||
ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
|
|
||||||
pld [r1]
|
|
||||||
NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r8-r9}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 4b
|
|
||||||
pop {r4-r10,pc}
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
|
|
||||||
@ ----------------------------------------------------------------
|
|
||||||
.align 5
|
|
||||||
function ff_put_pixels8_y2_arm, export=1
|
|
||||||
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
||||||
@ block = word aligned, pixles = unaligned
|
|
||||||
pld [r1]
|
|
||||||
push {r4-r11,lr}
|
|
||||||
mov r3, r3, lsr #1
|
|
||||||
ldr r12, =0xfefefefe
|
|
||||||
JMP_ALIGN r5, r1
|
|
||||||
1:
|
|
||||||
ldm r1, {r4-r5}
|
|
||||||
add r1, r1, r2
|
|
||||||
6: ldm r1, {r6-r7}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
RND_AVG32 r8, r9, r4, r5, r6, r7, r12
|
|
||||||
ldm r1, {r4-r5}
|
|
||||||
add r1, r1, r2
|
|
||||||
stm r0, {r8-r9}
|
|
||||||
add r0, r0, r2
|
|
||||||
pld [r1]
|
|
||||||
RND_AVG32 r8, r9, r6, r7, r4, r5, r12
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r8-r9}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 6b
|
|
||||||
pop {r4-r11,pc}
|
|
||||||
.align 5
|
|
||||||
2:
|
|
||||||
ldm r1, {r4-r6}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 1, r4, r5, r6
|
|
||||||
6: ldm r1, {r7-r9}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 1, r7, r8, r9
|
|
||||||
RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
|
||||||
stm r0, {r10-r11}
|
|
||||||
add r0, r0, r2
|
|
||||||
ldm r1, {r4-r6}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 1, r4, r5, r6
|
|
||||||
subs r3, r3, #1
|
|
||||||
RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
|
||||||
stm r0, {r10-r11}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 6b
|
|
||||||
pop {r4-r11,pc}
|
|
||||||
.align 5
|
|
||||||
3:
|
|
||||||
ldm r1, {r4-r6}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 2, r4, r5, r6
|
|
||||||
6: ldm r1, {r7-r9}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 2, r7, r8, r9
|
|
||||||
RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
|
||||||
stm r0, {r10-r11}
|
|
||||||
add r0, r0, r2
|
|
||||||
ldm r1, {r4-r6}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 2, r4, r5, r6
|
|
||||||
subs r3, r3, #1
|
|
||||||
RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
|
||||||
stm r0, {r10-r11}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 6b
|
|
||||||
pop {r4-r11,pc}
|
|
||||||
.align 5
|
|
||||||
4:
|
|
||||||
ldm r1, {r4-r6}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 3, r4, r5, r6
|
|
||||||
6: ldm r1, {r7-r9}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 3, r7, r8, r9
|
|
||||||
RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
|
||||||
stm r0, {r10-r11}
|
|
||||||
add r0, r0, r2
|
|
||||||
ldm r1, {r4-r6}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 3, r4, r5, r6
|
|
||||||
subs r3, r3, #1
|
|
||||||
RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
|
||||||
stm r0, {r10-r11}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 6b
|
|
||||||
pop {r4-r11,pc}
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
.align 5
|
|
||||||
function ff_put_no_rnd_pixels8_y2_arm, export=1
|
|
||||||
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
||||||
@ block = word aligned, pixles = unaligned
|
|
||||||
pld [r1]
|
|
||||||
push {r4-r11,lr}
|
|
||||||
mov r3, r3, lsr #1
|
|
||||||
ldr r12, =0xfefefefe
|
|
||||||
JMP_ALIGN r5, r1
|
|
||||||
1:
|
|
||||||
ldm r1, {r4-r5}
|
|
||||||
add r1, r1, r2
|
|
||||||
6: ldm r1, {r6-r7}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
|
|
||||||
ldm r1, {r4-r5}
|
|
||||||
add r1, r1, r2
|
|
||||||
stm r0, {r8-r9}
|
|
||||||
add r0, r0, r2
|
|
||||||
pld [r1]
|
|
||||||
NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
|
|
||||||
subs r3, r3, #1
|
|
||||||
stm r0, {r8-r9}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 6b
|
|
||||||
pop {r4-r11,pc}
|
|
||||||
.align 5
|
|
||||||
2:
|
|
||||||
ldm r1, {r4-r6}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 1, r4, r5, r6
|
|
||||||
6: ldm r1, {r7-r9}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 1, r7, r8, r9
|
|
||||||
NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
|
||||||
stm r0, {r10-r11}
|
|
||||||
add r0, r0, r2
|
|
||||||
ldm r1, {r4-r6}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 1, r4, r5, r6
|
|
||||||
subs r3, r3, #1
|
|
||||||
NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
|
||||||
stm r0, {r10-r11}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 6b
|
|
||||||
pop {r4-r11,pc}
|
|
||||||
.align 5
|
|
||||||
3:
|
|
||||||
ldm r1, {r4-r6}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 2, r4, r5, r6
|
|
||||||
6: ldm r1, {r7-r9}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 2, r7, r8, r9
|
|
||||||
NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
|
||||||
stm r0, {r10-r11}
|
|
||||||
add r0, r0, r2
|
|
||||||
ldm r1, {r4-r6}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 2, r4, r5, r6
|
|
||||||
subs r3, r3, #1
|
|
||||||
NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
|
||||||
stm r0, {r10-r11}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 6b
|
|
||||||
pop {r4-r11,pc}
|
|
||||||
.align 5
|
|
||||||
4:
|
|
||||||
ldm r1, {r4-r6}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 3, r4, r5, r6
|
|
||||||
6: ldm r1, {r7-r9}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 3, r7, r8, r9
|
|
||||||
NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
|
||||||
stm r0, {r10-r11}
|
|
||||||
add r0, r0, r2
|
|
||||||
ldm r1, {r4-r6}
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
ALIGN_DWORD 3, r4, r5, r6
|
|
||||||
subs r3, r3, #1
|
|
||||||
NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
|
||||||
stm r0, {r10-r11}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 6b
|
|
||||||
pop {r4-r11,pc}
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
.ltorg
|
|
||||||
|
|
||||||
@ ----------------------------------------------------------------
|
|
||||||
.macro RND_XY2_IT align, rnd
|
|
||||||
@ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
|
|
||||||
@ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
|
|
||||||
.if \align == 0
|
|
||||||
ldm r1, {r6-r8}
|
|
||||||
.elseif \align == 3
|
|
||||||
ldm r1, {r5-r7}
|
|
||||||
.else
|
|
||||||
ldm r1, {r8-r10}
|
|
||||||
.endif
|
|
||||||
add r1, r1, r2
|
|
||||||
pld [r1]
|
|
||||||
.if \align == 0
|
|
||||||
ALIGN_DWORD_D 1, r4, r5, r6, r7, r8
|
|
||||||
.elseif \align == 1
|
|
||||||
ALIGN_DWORD_D 1, r4, r5, r8, r9, r10
|
|
||||||
ALIGN_DWORD_D 2, r6, r7, r8, r9, r10
|
|
||||||
.elseif \align == 2
|
|
||||||
ALIGN_DWORD_D 2, r4, r5, r8, r9, r10
|
|
||||||
ALIGN_DWORD_D 3, r6, r7, r8, r9, r10
|
|
||||||
.elseif \align == 3
|
|
||||||
ALIGN_DWORD_D 3, r4, r5, r5, r6, r7
|
|
||||||
.endif
|
|
||||||
ldr r14, =0x03030303
|
|
||||||
tst r3, #1
|
|
||||||
and r8, r4, r14
|
|
||||||
and r9, r5, r14
|
|
||||||
and r10, r6, r14
|
|
||||||
and r11, r7, r14
|
|
||||||
it eq
|
|
||||||
andeq r14, r14, r14, \rnd #1
|
|
||||||
add r8, r8, r10
|
|
||||||
add r9, r9, r11
|
|
||||||
ldr r12, =0xfcfcfcfc >> 2
|
|
||||||
itt eq
|
|
||||||
addeq r8, r8, r14
|
|
||||||
addeq r9, r9, r14
|
|
||||||
and r4, r12, r4, lsr #2
|
|
||||||
and r5, r12, r5, lsr #2
|
|
||||||
and r6, r12, r6, lsr #2
|
|
||||||
and r7, r12, r7, lsr #2
|
|
||||||
add r10, r4, r6
|
|
||||||
add r11, r5, r7
|
|
||||||
subs r3, r3, #1
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro RND_XY2_EXPAND align, rnd
|
|
||||||
RND_XY2_IT \align, \rnd
|
|
||||||
6: push {r8-r11}
|
|
||||||
RND_XY2_IT \align, \rnd
|
|
||||||
pop {r4-r7}
|
|
||||||
add r4, r4, r8
|
|
||||||
add r5, r5, r9
|
|
||||||
ldr r14, =0x0f0f0f0f
|
|
||||||
add r6, r6, r10
|
|
||||||
add r7, r7, r11
|
|
||||||
and r4, r14, r4, lsr #2
|
|
||||||
and r5, r14, r5, lsr #2
|
|
||||||
add r4, r4, r6
|
|
||||||
add r5, r5, r7
|
|
||||||
stm r0, {r4-r5}
|
|
||||||
add r0, r0, r2
|
|
||||||
bge 6b
|
|
||||||
pop {r4-r11,pc}
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.align 5
|
|
||||||
function ff_put_pixels8_xy2_arm, export=1
|
|
||||||
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
||||||
@ block = word aligned, pixles = unaligned
|
|
||||||
pld [r1]
|
|
||||||
push {r4-r11,lr} @ R14 is also called LR
|
|
||||||
JMP_ALIGN r5, r1
|
|
||||||
1: RND_XY2_EXPAND 0, lsl
|
|
||||||
.align 5
|
|
||||||
2: RND_XY2_EXPAND 1, lsl
|
|
||||||
.align 5
|
|
||||||
3: RND_XY2_EXPAND 2, lsl
|
|
||||||
.align 5
|
|
||||||
4: RND_XY2_EXPAND 3, lsl
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
.align 5
|
|
||||||
function ff_put_no_rnd_pixels8_xy2_arm, export=1
|
|
||||||
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
|
||||||
@ block = word aligned, pixles = unaligned
|
|
||||||
pld [r1]
|
|
||||||
push {r4-r11,lr}
|
|
||||||
JMP_ALIGN r5, r1
|
|
||||||
1: RND_XY2_EXPAND 0, lsr
|
|
||||||
.align 5
|
|
||||||
2: RND_XY2_EXPAND 1, lsr
|
|
||||||
.align 5
|
|
||||||
3: RND_XY2_EXPAND 2, lsr
|
|
||||||
.align 5
|
|
||||||
4: RND_XY2_EXPAND 3, lsr
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
.align 5
|
.align 5
|
||||||
@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
|
@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
|
||||||
function ff_add_pixels_clamped_arm, export=1
|
function ff_add_pixels_clamped_arm, export=1
|
||||||
|
@ -20,244 +20,6 @@
|
|||||||
|
|
||||||
#include "libavutil/arm/asm.S"
|
#include "libavutil/arm/asm.S"
|
||||||
|
|
||||||
.macro call_2x_pixels type, subp
|
|
||||||
function ff_\type\()_pixels16\subp\()_armv6, export=1
|
|
||||||
push {r0-r3, lr}
|
|
||||||
bl ff_\type\()_pixels8\subp\()_armv6
|
|
||||||
pop {r0-r3, lr}
|
|
||||||
add r0, r0, #8
|
|
||||||
add r1, r1, #8
|
|
||||||
b ff_\type\()_pixels8\subp\()_armv6
|
|
||||||
endfunc
|
|
||||||
.endm
|
|
||||||
|
|
||||||
call_2x_pixels avg
|
|
||||||
call_2x_pixels put, _x2
|
|
||||||
call_2x_pixels put, _y2
|
|
||||||
call_2x_pixels put, _x2_no_rnd
|
|
||||||
call_2x_pixels put, _y2_no_rnd
|
|
||||||
|
|
||||||
function ff_put_pixels16_armv6, export=1
|
|
||||||
push {r4-r11}
|
|
||||||
1:
|
|
||||||
ldr r5, [r1, #4]
|
|
||||||
ldr r6, [r1, #8]
|
|
||||||
ldr r7, [r1, #12]
|
|
||||||
ldr_post r4, r1, r2
|
|
||||||
strd r6, r7, [r0, #8]
|
|
||||||
ldr r9, [r1, #4]
|
|
||||||
strd_post r4, r5, r0, r2
|
|
||||||
ldr r10, [r1, #8]
|
|
||||||
ldr r11, [r1, #12]
|
|
||||||
ldr_post r8, r1, r2
|
|
||||||
strd r10, r11, [r0, #8]
|
|
||||||
subs r3, r3, #2
|
|
||||||
strd_post r8, r9, r0, r2
|
|
||||||
bne 1b
|
|
||||||
|
|
||||||
pop {r4-r11}
|
|
||||||
bx lr
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
function ff_put_pixels8_armv6, export=1
|
|
||||||
push {r4-r7}
|
|
||||||
1:
|
|
||||||
ldr r5, [r1, #4]
|
|
||||||
ldr_post r4, r1, r2
|
|
||||||
ldr r7, [r1, #4]
|
|
||||||
strd_post r4, r5, r0, r2
|
|
||||||
ldr_post r6, r1, r2
|
|
||||||
subs r3, r3, #2
|
|
||||||
strd_post r6, r7, r0, r2
|
|
||||||
bne 1b
|
|
||||||
|
|
||||||
pop {r4-r7}
|
|
||||||
bx lr
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
function ff_put_pixels8_x2_armv6, export=1
|
|
||||||
push {r4-r11, lr}
|
|
||||||
mov r12, #1
|
|
||||||
orr r12, r12, r12, lsl #8
|
|
||||||
orr r12, r12, r12, lsl #16
|
|
||||||
1:
|
|
||||||
ldr r4, [r1]
|
|
||||||
subs r3, r3, #2
|
|
||||||
ldr r5, [r1, #4]
|
|
||||||
ldr r7, [r1, #5]
|
|
||||||
lsr r6, r4, #8
|
|
||||||
ldr_pre r8, r1, r2
|
|
||||||
orr r6, r6, r5, lsl #24
|
|
||||||
ldr r9, [r1, #4]
|
|
||||||
ldr r11, [r1, #5]
|
|
||||||
lsr r10, r8, #8
|
|
||||||
add r1, r1, r2
|
|
||||||
orr r10, r10, r9, lsl #24
|
|
||||||
eor r14, r4, r6
|
|
||||||
uhadd8 r4, r4, r6
|
|
||||||
eor r6, r5, r7
|
|
||||||
uhadd8 r5, r5, r7
|
|
||||||
and r14, r14, r12
|
|
||||||
and r6, r6, r12
|
|
||||||
uadd8 r4, r4, r14
|
|
||||||
eor r14, r8, r10
|
|
||||||
uadd8 r5, r5, r6
|
|
||||||
eor r6, r9, r11
|
|
||||||
uhadd8 r8, r8, r10
|
|
||||||
and r14, r14, r12
|
|
||||||
uhadd8 r9, r9, r11
|
|
||||||
and r6, r6, r12
|
|
||||||
uadd8 r8, r8, r14
|
|
||||||
strd_post r4, r5, r0, r2
|
|
||||||
uadd8 r9, r9, r6
|
|
||||||
strd_post r8, r9, r0, r2
|
|
||||||
bne 1b
|
|
||||||
|
|
||||||
pop {r4-r11, pc}
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
function ff_put_pixels8_y2_armv6, export=1
|
|
||||||
push {r4-r11}
|
|
||||||
mov r12, #1
|
|
||||||
orr r12, r12, r12, lsl #8
|
|
||||||
orr r12, r12, r12, lsl #16
|
|
||||||
ldr r4, [r1]
|
|
||||||
ldr r5, [r1, #4]
|
|
||||||
ldr_pre r6, r1, r2
|
|
||||||
ldr r7, [r1, #4]
|
|
||||||
1:
|
|
||||||
subs r3, r3, #2
|
|
||||||
uhadd8 r8, r4, r6
|
|
||||||
eor r10, r4, r6
|
|
||||||
uhadd8 r9, r5, r7
|
|
||||||
eor r11, r5, r7
|
|
||||||
and r10, r10, r12
|
|
||||||
ldr_pre r4, r1, r2
|
|
||||||
uadd8 r8, r8, r10
|
|
||||||
and r11, r11, r12
|
|
||||||
uadd8 r9, r9, r11
|
|
||||||
ldr r5, [r1, #4]
|
|
||||||
uhadd8 r10, r4, r6
|
|
||||||
eor r6, r4, r6
|
|
||||||
uhadd8 r11, r5, r7
|
|
||||||
and r6, r6, r12
|
|
||||||
eor r7, r5, r7
|
|
||||||
uadd8 r10, r10, r6
|
|
||||||
and r7, r7, r12
|
|
||||||
ldr_pre r6, r1, r2
|
|
||||||
uadd8 r11, r11, r7
|
|
||||||
strd_post r8, r9, r0, r2
|
|
||||||
ldr r7, [r1, #4]
|
|
||||||
strd_post r10, r11, r0, r2
|
|
||||||
bne 1b
|
|
||||||
|
|
||||||
pop {r4-r11}
|
|
||||||
bx lr
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
function ff_put_pixels8_x2_no_rnd_armv6, export=1
|
|
||||||
push {r4-r9, lr}
|
|
||||||
1:
|
|
||||||
subs r3, r3, #2
|
|
||||||
ldr r4, [r1]
|
|
||||||
ldr r5, [r1, #4]
|
|
||||||
ldr r7, [r1, #5]
|
|
||||||
ldr_pre r8, r1, r2
|
|
||||||
ldr r9, [r1, #4]
|
|
||||||
ldr r14, [r1, #5]
|
|
||||||
add r1, r1, r2
|
|
||||||
lsr r6, r4, #8
|
|
||||||
orr r6, r6, r5, lsl #24
|
|
||||||
lsr r12, r8, #8
|
|
||||||
orr r12, r12, r9, lsl #24
|
|
||||||
uhadd8 r4, r4, r6
|
|
||||||
uhadd8 r5, r5, r7
|
|
||||||
uhadd8 r8, r8, r12
|
|
||||||
uhadd8 r9, r9, r14
|
|
||||||
stm r0, {r4,r5}
|
|
||||||
add r0, r0, r2
|
|
||||||
stm r0, {r8,r9}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 1b
|
|
||||||
|
|
||||||
pop {r4-r9, pc}
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
function ff_put_pixels8_y2_no_rnd_armv6, export=1
|
|
||||||
push {r4-r9, lr}
|
|
||||||
ldr r4, [r1]
|
|
||||||
ldr r5, [r1, #4]
|
|
||||||
ldr_pre r6, r1, r2
|
|
||||||
ldr r7, [r1, #4]
|
|
||||||
1:
|
|
||||||
subs r3, r3, #2
|
|
||||||
uhadd8 r8, r4, r6
|
|
||||||
ldr_pre r4, r1, r2
|
|
||||||
uhadd8 r9, r5, r7
|
|
||||||
ldr r5, [r1, #4]
|
|
||||||
uhadd8 r12, r4, r6
|
|
||||||
ldr_pre r6, r1, r2
|
|
||||||
uhadd8 r14, r5, r7
|
|
||||||
ldr r7, [r1, #4]
|
|
||||||
stm r0, {r8,r9}
|
|
||||||
add r0, r0, r2
|
|
||||||
stm r0, {r12,r14}
|
|
||||||
add r0, r0, r2
|
|
||||||
bne 1b
|
|
||||||
|
|
||||||
pop {r4-r9, pc}
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
function ff_avg_pixels8_armv6, export=1
|
|
||||||
pld [r1, r2]
|
|
||||||
push {r4-r10, lr}
|
|
||||||
mov lr, #1
|
|
||||||
orr lr, lr, lr, lsl #8
|
|
||||||
orr lr, lr, lr, lsl #16
|
|
||||||
ldrd r4, r5, [r0]
|
|
||||||
ldr r10, [r1, #4]
|
|
||||||
ldr_post r9, r1, r2
|
|
||||||
subs r3, r3, #2
|
|
||||||
1:
|
|
||||||
pld [r1, r2]
|
|
||||||
eor r8, r4, r9
|
|
||||||
uhadd8 r4, r4, r9
|
|
||||||
eor r12, r5, r10
|
|
||||||
ldrd_reg r6, r7, r0, r2
|
|
||||||
uhadd8 r5, r5, r10
|
|
||||||
and r8, r8, lr
|
|
||||||
ldr r10, [r1, #4]
|
|
||||||
and r12, r12, lr
|
|
||||||
uadd8 r4, r4, r8
|
|
||||||
ldr_post r9, r1, r2
|
|
||||||
eor r8, r6, r9
|
|
||||||
uadd8 r5, r5, r12
|
|
||||||
pld [r1, r2, lsl #1]
|
|
||||||
eor r12, r7, r10
|
|
||||||
uhadd8 r6, r6, r9
|
|
||||||
strd_post r4, r5, r0, r2
|
|
||||||
uhadd8 r7, r7, r10
|
|
||||||
beq 2f
|
|
||||||
and r8, r8, lr
|
|
||||||
ldrd_reg r4, r5, r0, r2
|
|
||||||
uadd8 r6, r6, r8
|
|
||||||
ldr r10, [r1, #4]
|
|
||||||
and r12, r12, lr
|
|
||||||
subs r3, r3, #2
|
|
||||||
uadd8 r7, r7, r12
|
|
||||||
ldr_post r9, r1, r2
|
|
||||||
strd_post r6, r7, r0, r2
|
|
||||||
b 1b
|
|
||||||
2:
|
|
||||||
and r8, r8, lr
|
|
||||||
and r12, r12, lr
|
|
||||||
uadd8 r6, r6, r8
|
|
||||||
uadd8 r7, r7, r12
|
|
||||||
strd_post r6, r7, r0, r2
|
|
||||||
|
|
||||||
pop {r4-r10, pc}
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
function ff_add_pixels_clamped_armv6, export=1
|
function ff_add_pixels_clamped_armv6, export=1
|
||||||
push {r4-r8,lr}
|
push {r4-r8,lr}
|
||||||
mov r3, #8
|
mov r3, #8
|
||||||
|
@ -30,24 +30,6 @@ void ff_simple_idct_arm(int16_t *data);
|
|||||||
static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size);
|
static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size);
|
||||||
static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size);
|
static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size);
|
||||||
|
|
||||||
void ff_put_pixels8_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
|
||||||
void ff_put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
|
||||||
void ff_put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
|
||||||
void ff_put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
|
||||||
|
|
||||||
void ff_put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
|
||||||
void ff_put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
|
||||||
void ff_put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
|
||||||
|
|
||||||
void ff_put_pixels16_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
|
||||||
|
|
||||||
CALL_2X_PIXELS(ff_put_pixels16_x2_arm, ff_put_pixels8_x2_arm, 8)
|
|
||||||
CALL_2X_PIXELS(ff_put_pixels16_y2_arm, ff_put_pixels8_y2_arm, 8)
|
|
||||||
CALL_2X_PIXELS(ff_put_pixels16_xy2_arm, ff_put_pixels8_xy2_arm, 8)
|
|
||||||
CALL_2X_PIXELS(ff_put_no_rnd_pixels16_x2_arm, ff_put_no_rnd_pixels8_x2_arm, 8)
|
|
||||||
CALL_2X_PIXELS(ff_put_no_rnd_pixels16_y2_arm, ff_put_no_rnd_pixels8_y2_arm, 8)
|
|
||||||
CALL_2X_PIXELS(ff_put_no_rnd_pixels16_xy2_arm, ff_put_no_rnd_pixels8_xy2_arm,8)
|
|
||||||
|
|
||||||
void ff_add_pixels_clamped_arm(const int16_t *block, uint8_t *dest,
|
void ff_add_pixels_clamped_arm(const int16_t *block, uint8_t *dest,
|
||||||
int line_size);
|
int line_size);
|
||||||
|
|
||||||
@ -76,7 +58,6 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block)
|
|||||||
|
|
||||||
av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx)
|
av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
|
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
ff_put_pixels_clamped = c->put_pixels_clamped;
|
ff_put_pixels_clamped = c->put_pixels_clamped;
|
||||||
@ -99,26 +80,6 @@ av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx)
|
|||||||
|
|
||||||
c->add_pixels_clamped = ff_add_pixels_clamped_arm;
|
c->add_pixels_clamped = ff_add_pixels_clamped_arm;
|
||||||
|
|
||||||
if (!high_bit_depth) {
|
|
||||||
c->put_pixels_tab[0][0] = ff_put_pixels16_arm;
|
|
||||||
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_arm;
|
|
||||||
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_arm;
|
|
||||||
c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_arm;
|
|
||||||
c->put_pixels_tab[1][0] = ff_put_pixels8_arm;
|
|
||||||
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_arm;
|
|
||||||
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_arm;
|
|
||||||
c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_arm;
|
|
||||||
|
|
||||||
c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_arm;
|
|
||||||
c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_arm;
|
|
||||||
c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_arm;
|
|
||||||
c->put_no_rnd_pixels_tab[0][3] = ff_put_no_rnd_pixels16_xy2_arm;
|
|
||||||
c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_arm;
|
|
||||||
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_arm;
|
|
||||||
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_arm;
|
|
||||||
c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (have_armv5te(cpu_flags)) ff_dsputil_init_armv5te(c, avctx);
|
if (have_armv5te(cpu_flags)) ff_dsputil_init_armv5te(c, avctx);
|
||||||
if (have_armv6(cpu_flags)) ff_dsputil_init_armv6(c, avctx);
|
if (have_armv6(cpu_flags)) ff_dsputil_init_armv6(c, avctx);
|
||||||
if (have_neon(cpu_flags)) ff_dsputil_init_neon(c, avctx);
|
if (have_neon(cpu_flags)) ff_dsputil_init_neon(c, avctx);
|
||||||
|
@ -27,24 +27,6 @@ void ff_simple_idct_armv6(int16_t *data);
|
|||||||
void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data);
|
void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data);
|
||||||
void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data);
|
void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data);
|
||||||
|
|
||||||
void ff_put_pixels16_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels16_x2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels16_y2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
|
|
||||||
void ff_put_pixels16_x2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels16_y2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
|
|
||||||
void ff_avg_pixels16_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
|
|
||||||
void ff_put_pixels8_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels8_x2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels8_y2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
|
|
||||||
void ff_put_pixels8_x2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels8_y2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
|
|
||||||
void ff_avg_pixels8_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
|
|
||||||
void ff_add_pixels_clamped_armv6(const int16_t *block,
|
void ff_add_pixels_clamped_armv6(const int16_t *block,
|
||||||
uint8_t *restrict pixels,
|
uint8_t *restrict pixels,
|
||||||
int line_size);
|
int line_size);
|
||||||
@ -82,29 +64,6 @@ av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx)
|
|||||||
c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
|
c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!high_bit_depth) {
|
|
||||||
c->put_pixels_tab[0][0] = ff_put_pixels16_armv6;
|
|
||||||
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_armv6;
|
|
||||||
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_armv6;
|
|
||||||
/* c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_armv6; */
|
|
||||||
c->put_pixels_tab[1][0] = ff_put_pixels8_armv6;
|
|
||||||
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_armv6;
|
|
||||||
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_armv6;
|
|
||||||
/* c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_armv6; */
|
|
||||||
|
|
||||||
c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_armv6;
|
|
||||||
c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_armv6;
|
|
||||||
c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_armv6;
|
|
||||||
/* c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_armv6; */
|
|
||||||
c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_armv6;
|
|
||||||
c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_armv6;
|
|
||||||
c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_armv6;
|
|
||||||
/* c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_armv6; */
|
|
||||||
|
|
||||||
c->avg_pixels_tab[0][0] = ff_avg_pixels16_armv6;
|
|
||||||
c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!high_bit_depth)
|
if (!high_bit_depth)
|
||||||
c->get_pixels = ff_get_pixels_armv6;
|
c->get_pixels = ff_get_pixels_armv6;
|
||||||
c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
|
c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
|
||||||
|
@ -32,33 +32,6 @@ void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data);
|
|||||||
void ff_clear_block_neon(int16_t *block);
|
void ff_clear_block_neon(int16_t *block);
|
||||||
void ff_clear_blocks_neon(int16_t *blocks);
|
void ff_clear_blocks_neon(int16_t *blocks);
|
||||||
|
|
||||||
void ff_put_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels8_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
|
|
||||||
void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_avg_pixels16_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_avg_pixels16_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_avg_pixels16_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_avg_pixels8_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_avg_pixels8_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_avg_pixels8_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_avg_pixels8_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
|
||||||
|
|
||||||
void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
|
void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
|
||||||
void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
|
void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
|
||||||
void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);
|
void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);
|
||||||
@ -92,38 +65,6 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
|
|||||||
if (!high_bit_depth) {
|
if (!high_bit_depth) {
|
||||||
c->clear_block = ff_clear_block_neon;
|
c->clear_block = ff_clear_block_neon;
|
||||||
c->clear_blocks = ff_clear_blocks_neon;
|
c->clear_blocks = ff_clear_blocks_neon;
|
||||||
|
|
||||||
c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
|
|
||||||
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
|
|
||||||
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;
|
|
||||||
c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;
|
|
||||||
c->put_pixels_tab[1][0] = ff_put_pixels8_neon;
|
|
||||||
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;
|
|
||||||
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;
|
|
||||||
c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;
|
|
||||||
|
|
||||||
c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;
|
|
||||||
c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;
|
|
||||||
c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;
|
|
||||||
c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;
|
|
||||||
c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;
|
|
||||||
c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;
|
|
||||||
c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
|
|
||||||
c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
|
|
||||||
|
|
||||||
c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
|
|
||||||
c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_neon;
|
|
||||||
c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_neon;
|
|
||||||
c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_neon;
|
|
||||||
c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
|
|
||||||
c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_neon;
|
|
||||||
c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_neon;
|
|
||||||
c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_neon;
|
|
||||||
|
|
||||||
c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_neon;
|
|
||||||
c->avg_no_rnd_pixels_tab[1] = ff_avg_pixels16_x2_no_rnd_neon;
|
|
||||||
c->avg_no_rnd_pixels_tab[2] = ff_avg_pixels16_y2_no_rnd_neon;
|
|
||||||
c->avg_no_rnd_pixels_tab[3] = ff_avg_pixels16_xy2_no_rnd_neon;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
c->add_pixels_clamped = ff_add_pixels_clamped_neon;
|
c->add_pixels_clamped = ff_add_pixels_clamped_neon;
|
||||||
|
@ -37,394 +37,6 @@ function ff_clear_blocks_neon, export=1
|
|||||||
bx lr
|
bx lr
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
.macro pixels16 rnd=1, avg=0
|
|
||||||
.if \avg
|
|
||||||
mov r12, r0
|
|
||||||
.endif
|
|
||||||
1: vld1.8 {q0}, [r1], r2
|
|
||||||
vld1.8 {q1}, [r1], r2
|
|
||||||
vld1.8 {q2}, [r1], r2
|
|
||||||
pld [r1, r2, lsl #2]
|
|
||||||
vld1.8 {q3}, [r1], r2
|
|
||||||
pld [r1]
|
|
||||||
pld [r1, r2]
|
|
||||||
pld [r1, r2, lsl #1]
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {q8}, [r12,:128], r2
|
|
||||||
vrhadd.u8 q0, q0, q8
|
|
||||||
vld1.8 {q9}, [r12,:128], r2
|
|
||||||
vrhadd.u8 q1, q1, q9
|
|
||||||
vld1.8 {q10}, [r12,:128], r2
|
|
||||||
vrhadd.u8 q2, q2, q10
|
|
||||||
vld1.8 {q11}, [r12,:128], r2
|
|
||||||
vrhadd.u8 q3, q3, q11
|
|
||||||
.endif
|
|
||||||
subs r3, r3, #4
|
|
||||||
vst1.64 {q0}, [r0,:128], r2
|
|
||||||
vst1.64 {q1}, [r0,:128], r2
|
|
||||||
vst1.64 {q2}, [r0,:128], r2
|
|
||||||
vst1.64 {q3}, [r0,:128], r2
|
|
||||||
bne 1b
|
|
||||||
bx lr
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro pixels16_x2 rnd=1, avg=0
|
|
||||||
1: vld1.8 {d0-d2}, [r1], r2
|
|
||||||
vld1.8 {d4-d6}, [r1], r2
|
|
||||||
pld [r1]
|
|
||||||
pld [r1, r2]
|
|
||||||
subs r3, r3, #2
|
|
||||||
vext.8 q1, q0, q1, #1
|
|
||||||
avg q0, q0, q1
|
|
||||||
vext.8 q3, q2, q3, #1
|
|
||||||
avg q2, q2, q3
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {q1}, [r0,:128], r2
|
|
||||||
vld1.8 {q3}, [r0,:128]
|
|
||||||
vrhadd.u8 q0, q0, q1
|
|
||||||
vrhadd.u8 q2, q2, q3
|
|
||||||
sub r0, r0, r2
|
|
||||||
.endif
|
|
||||||
vst1.8 {q0}, [r0,:128], r2
|
|
||||||
vst1.8 {q2}, [r0,:128], r2
|
|
||||||
bne 1b
|
|
||||||
bx lr
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro pixels16_y2 rnd=1, avg=0
|
|
||||||
sub r3, r3, #2
|
|
||||||
vld1.8 {q0}, [r1], r2
|
|
||||||
vld1.8 {q1}, [r1], r2
|
|
||||||
1: subs r3, r3, #2
|
|
||||||
avg q2, q0, q1
|
|
||||||
vld1.8 {q0}, [r1], r2
|
|
||||||
avg q3, q0, q1
|
|
||||||
vld1.8 {q1}, [r1], r2
|
|
||||||
pld [r1]
|
|
||||||
pld [r1, r2]
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {q8}, [r0,:128], r2
|
|
||||||
vld1.8 {q9}, [r0,:128]
|
|
||||||
vrhadd.u8 q2, q2, q8
|
|
||||||
vrhadd.u8 q3, q3, q9
|
|
||||||
sub r0, r0, r2
|
|
||||||
.endif
|
|
||||||
vst1.8 {q2}, [r0,:128], r2
|
|
||||||
vst1.8 {q3}, [r0,:128], r2
|
|
||||||
bne 1b
|
|
||||||
|
|
||||||
avg q2, q0, q1
|
|
||||||
vld1.8 {q0}, [r1], r2
|
|
||||||
avg q3, q0, q1
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {q8}, [r0,:128], r2
|
|
||||||
vld1.8 {q9}, [r0,:128]
|
|
||||||
vrhadd.u8 q2, q2, q8
|
|
||||||
vrhadd.u8 q3, q3, q9
|
|
||||||
sub r0, r0, r2
|
|
||||||
.endif
|
|
||||||
vst1.8 {q2}, [r0,:128], r2
|
|
||||||
vst1.8 {q3}, [r0,:128], r2
|
|
||||||
|
|
||||||
bx lr
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro pixels16_xy2 rnd=1, avg=0
|
|
||||||
sub r3, r3, #2
|
|
||||||
vld1.8 {d0-d2}, [r1], r2
|
|
||||||
vld1.8 {d4-d6}, [r1], r2
|
|
||||||
NRND vmov.i16 q13, #1
|
|
||||||
pld [r1]
|
|
||||||
pld [r1, r2]
|
|
||||||
vext.8 q1, q0, q1, #1
|
|
||||||
vext.8 q3, q2, q3, #1
|
|
||||||
vaddl.u8 q8, d0, d2
|
|
||||||
vaddl.u8 q10, d1, d3
|
|
||||||
vaddl.u8 q9, d4, d6
|
|
||||||
vaddl.u8 q11, d5, d7
|
|
||||||
1: subs r3, r3, #2
|
|
||||||
vld1.8 {d0-d2}, [r1], r2
|
|
||||||
vadd.u16 q12, q8, q9
|
|
||||||
pld [r1]
|
|
||||||
NRND vadd.u16 q12, q12, q13
|
|
||||||
vext.8 q15, q0, q1, #1
|
|
||||||
vadd.u16 q1 , q10, q11
|
|
||||||
shrn d28, q12, #2
|
|
||||||
NRND vadd.u16 q1, q1, q13
|
|
||||||
shrn d29, q1, #2
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {q8}, [r0,:128]
|
|
||||||
vrhadd.u8 q14, q14, q8
|
|
||||||
.endif
|
|
||||||
vaddl.u8 q8, d0, d30
|
|
||||||
vld1.8 {d2-d4}, [r1], r2
|
|
||||||
vaddl.u8 q10, d1, d31
|
|
||||||
vst1.8 {q14}, [r0,:128], r2
|
|
||||||
vadd.u16 q12, q8, q9
|
|
||||||
pld [r1, r2]
|
|
||||||
NRND vadd.u16 q12, q12, q13
|
|
||||||
vext.8 q2, q1, q2, #1
|
|
||||||
vadd.u16 q0, q10, q11
|
|
||||||
shrn d30, q12, #2
|
|
||||||
NRND vadd.u16 q0, q0, q13
|
|
||||||
shrn d31, q0, #2
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {q9}, [r0,:128]
|
|
||||||
vrhadd.u8 q15, q15, q9
|
|
||||||
.endif
|
|
||||||
vaddl.u8 q9, d2, d4
|
|
||||||
vaddl.u8 q11, d3, d5
|
|
||||||
vst1.8 {q15}, [r0,:128], r2
|
|
||||||
bgt 1b
|
|
||||||
|
|
||||||
vld1.8 {d0-d2}, [r1], r2
|
|
||||||
vadd.u16 q12, q8, q9
|
|
||||||
NRND vadd.u16 q12, q12, q13
|
|
||||||
vext.8 q15, q0, q1, #1
|
|
||||||
vadd.u16 q1 , q10, q11
|
|
||||||
shrn d28, q12, #2
|
|
||||||
NRND vadd.u16 q1, q1, q13
|
|
||||||
shrn d29, q1, #2
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {q8}, [r0,:128]
|
|
||||||
vrhadd.u8 q14, q14, q8
|
|
||||||
.endif
|
|
||||||
vaddl.u8 q8, d0, d30
|
|
||||||
vaddl.u8 q10, d1, d31
|
|
||||||
vst1.8 {q14}, [r0,:128], r2
|
|
||||||
vadd.u16 q12, q8, q9
|
|
||||||
NRND vadd.u16 q12, q12, q13
|
|
||||||
vadd.u16 q0, q10, q11
|
|
||||||
shrn d30, q12, #2
|
|
||||||
NRND vadd.u16 q0, q0, q13
|
|
||||||
shrn d31, q0, #2
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {q9}, [r0,:128]
|
|
||||||
vrhadd.u8 q15, q15, q9
|
|
||||||
.endif
|
|
||||||
vst1.8 {q15}, [r0,:128], r2
|
|
||||||
|
|
||||||
bx lr
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro pixels8 rnd=1, avg=0
|
|
||||||
1: vld1.8 {d0}, [r1], r2
|
|
||||||
vld1.8 {d1}, [r1], r2
|
|
||||||
vld1.8 {d2}, [r1], r2
|
|
||||||
pld [r1, r2, lsl #2]
|
|
||||||
vld1.8 {d3}, [r1], r2
|
|
||||||
pld [r1]
|
|
||||||
pld [r1, r2]
|
|
||||||
pld [r1, r2, lsl #1]
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {d4}, [r0,:64], r2
|
|
||||||
vrhadd.u8 d0, d0, d4
|
|
||||||
vld1.8 {d5}, [r0,:64], r2
|
|
||||||
vrhadd.u8 d1, d1, d5
|
|
||||||
vld1.8 {d6}, [r0,:64], r2
|
|
||||||
vrhadd.u8 d2, d2, d6
|
|
||||||
vld1.8 {d7}, [r0,:64], r2
|
|
||||||
vrhadd.u8 d3, d3, d7
|
|
||||||
sub r0, r0, r2, lsl #2
|
|
||||||
.endif
|
|
||||||
subs r3, r3, #4
|
|
||||||
vst1.8 {d0}, [r0,:64], r2
|
|
||||||
vst1.8 {d1}, [r0,:64], r2
|
|
||||||
vst1.8 {d2}, [r0,:64], r2
|
|
||||||
vst1.8 {d3}, [r0,:64], r2
|
|
||||||
bne 1b
|
|
||||||
bx lr
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro pixels8_x2 rnd=1, avg=0
|
|
||||||
1: vld1.8 {q0}, [r1], r2
|
|
||||||
vext.8 d1, d0, d1, #1
|
|
||||||
vld1.8 {q1}, [r1], r2
|
|
||||||
vext.8 d3, d2, d3, #1
|
|
||||||
pld [r1]
|
|
||||||
pld [r1, r2]
|
|
||||||
subs r3, r3, #2
|
|
||||||
vswp d1, d2
|
|
||||||
avg q0, q0, q1
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {d4}, [r0,:64], r2
|
|
||||||
vld1.8 {d5}, [r0,:64]
|
|
||||||
vrhadd.u8 q0, q0, q2
|
|
||||||
sub r0, r0, r2
|
|
||||||
.endif
|
|
||||||
vst1.8 {d0}, [r0,:64], r2
|
|
||||||
vst1.8 {d1}, [r0,:64], r2
|
|
||||||
bne 1b
|
|
||||||
bx lr
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro pixels8_y2 rnd=1, avg=0
|
|
||||||
sub r3, r3, #2
|
|
||||||
vld1.8 {d0}, [r1], r2
|
|
||||||
vld1.8 {d1}, [r1], r2
|
|
||||||
1: subs r3, r3, #2
|
|
||||||
avg d4, d0, d1
|
|
||||||
vld1.8 {d0}, [r1], r2
|
|
||||||
avg d5, d0, d1
|
|
||||||
vld1.8 {d1}, [r1], r2
|
|
||||||
pld [r1]
|
|
||||||
pld [r1, r2]
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {d2}, [r0,:64], r2
|
|
||||||
vld1.8 {d3}, [r0,:64]
|
|
||||||
vrhadd.u8 q2, q2, q1
|
|
||||||
sub r0, r0, r2
|
|
||||||
.endif
|
|
||||||
vst1.8 {d4}, [r0,:64], r2
|
|
||||||
vst1.8 {d5}, [r0,:64], r2
|
|
||||||
bne 1b
|
|
||||||
|
|
||||||
avg d4, d0, d1
|
|
||||||
vld1.8 {d0}, [r1], r2
|
|
||||||
avg d5, d0, d1
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {d2}, [r0,:64], r2
|
|
||||||
vld1.8 {d3}, [r0,:64]
|
|
||||||
vrhadd.u8 q2, q2, q1
|
|
||||||
sub r0, r0, r2
|
|
||||||
.endif
|
|
||||||
vst1.8 {d4}, [r0,:64], r2
|
|
||||||
vst1.8 {d5}, [r0,:64], r2
|
|
||||||
|
|
||||||
bx lr
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro pixels8_xy2 rnd=1, avg=0
|
|
||||||
sub r3, r3, #2
|
|
||||||
vld1.8 {q0}, [r1], r2
|
|
||||||
vld1.8 {q1}, [r1], r2
|
|
||||||
NRND vmov.i16 q11, #1
|
|
||||||
pld [r1]
|
|
||||||
pld [r1, r2]
|
|
||||||
vext.8 d4, d0, d1, #1
|
|
||||||
vext.8 d6, d2, d3, #1
|
|
||||||
vaddl.u8 q8, d0, d4
|
|
||||||
vaddl.u8 q9, d2, d6
|
|
||||||
1: subs r3, r3, #2
|
|
||||||
vld1.8 {q0}, [r1], r2
|
|
||||||
pld [r1]
|
|
||||||
vadd.u16 q10, q8, q9
|
|
||||||
vext.8 d4, d0, d1, #1
|
|
||||||
NRND vadd.u16 q10, q10, q11
|
|
||||||
vaddl.u8 q8, d0, d4
|
|
||||||
shrn d5, q10, #2
|
|
||||||
vld1.8 {q1}, [r1], r2
|
|
||||||
vadd.u16 q10, q8, q9
|
|
||||||
pld [r1, r2]
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {d7}, [r0,:64]
|
|
||||||
vrhadd.u8 d5, d5, d7
|
|
||||||
.endif
|
|
||||||
NRND vadd.u16 q10, q10, q11
|
|
||||||
vst1.8 {d5}, [r0,:64], r2
|
|
||||||
shrn d7, q10, #2
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {d5}, [r0,:64]
|
|
||||||
vrhadd.u8 d7, d7, d5
|
|
||||||
.endif
|
|
||||||
vext.8 d6, d2, d3, #1
|
|
||||||
vaddl.u8 q9, d2, d6
|
|
||||||
vst1.8 {d7}, [r0,:64], r2
|
|
||||||
bgt 1b
|
|
||||||
|
|
||||||
vld1.8 {q0}, [r1], r2
|
|
||||||
vadd.u16 q10, q8, q9
|
|
||||||
vext.8 d4, d0, d1, #1
|
|
||||||
NRND vadd.u16 q10, q10, q11
|
|
||||||
vaddl.u8 q8, d0, d4
|
|
||||||
shrn d5, q10, #2
|
|
||||||
vadd.u16 q10, q8, q9
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {d7}, [r0,:64]
|
|
||||||
vrhadd.u8 d5, d5, d7
|
|
||||||
.endif
|
|
||||||
NRND vadd.u16 q10, q10, q11
|
|
||||||
vst1.8 {d5}, [r0,:64], r2
|
|
||||||
shrn d7, q10, #2
|
|
||||||
.if \avg
|
|
||||||
vld1.8 {d5}, [r0,:64]
|
|
||||||
vrhadd.u8 d7, d7, d5
|
|
||||||
.endif
|
|
||||||
vst1.8 {d7}, [r0,:64], r2
|
|
||||||
|
|
||||||
bx lr
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro pixfunc pfx, name, suf, rnd=1, avg=0
|
|
||||||
.if \rnd
|
|
||||||
.macro avg rd, rn, rm
|
|
||||||
vrhadd.u8 \rd, \rn, \rm
|
|
||||||
.endm
|
|
||||||
.macro shrn rd, rn, rm
|
|
||||||
vrshrn.u16 \rd, \rn, \rm
|
|
||||||
.endm
|
|
||||||
.macro NRND insn:vararg
|
|
||||||
.endm
|
|
||||||
.else
|
|
||||||
.macro avg rd, rn, rm
|
|
||||||
vhadd.u8 \rd, \rn, \rm
|
|
||||||
.endm
|
|
||||||
.macro shrn rd, rn, rm
|
|
||||||
vshrn.u16 \rd, \rn, \rm
|
|
||||||
.endm
|
|
||||||
.macro NRND insn:vararg
|
|
||||||
\insn
|
|
||||||
.endm
|
|
||||||
.endif
|
|
||||||
function ff_\pfx\name\suf\()_neon, export=1
|
|
||||||
\name \rnd, \avg
|
|
||||||
endfunc
|
|
||||||
.purgem avg
|
|
||||||
.purgem shrn
|
|
||||||
.purgem NRND
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro pixfunc2 pfx, name, avg=0
|
|
||||||
pixfunc \pfx, \name, rnd=1, avg=\avg
|
|
||||||
pixfunc \pfx, \name, _no_rnd, rnd=0, avg=\avg
|
|
||||||
.endm
|
|
||||||
|
|
||||||
function ff_put_h264_qpel16_mc00_neon, export=1
|
|
||||||
mov r3, #16
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
pixfunc put_, pixels16, avg=0
|
|
||||||
pixfunc2 put_, pixels16_x2, avg=0
|
|
||||||
pixfunc2 put_, pixels16_y2, avg=0
|
|
||||||
pixfunc2 put_, pixels16_xy2, avg=0
|
|
||||||
|
|
||||||
function ff_avg_h264_qpel16_mc00_neon, export=1
|
|
||||||
mov r3, #16
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
pixfunc avg_, pixels16, avg=1
|
|
||||||
pixfunc2 avg_, pixels16_x2, avg=1
|
|
||||||
pixfunc2 avg_, pixels16_y2, avg=1
|
|
||||||
pixfunc2 avg_, pixels16_xy2, avg=1
|
|
||||||
|
|
||||||
function ff_put_h264_qpel8_mc00_neon, export=1
|
|
||||||
mov r3, #8
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
pixfunc put_, pixels8, avg=0
|
|
||||||
pixfunc2 put_, pixels8_x2, avg=0
|
|
||||||
pixfunc2 put_, pixels8_y2, avg=0
|
|
||||||
pixfunc2 put_, pixels8_xy2, avg=0
|
|
||||||
|
|
||||||
function ff_avg_h264_qpel8_mc00_neon, export=1
|
|
||||||
mov r3, #8
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
pixfunc avg_, pixels8, avg=1
|
|
||||||
pixfunc avg_, pixels8_x2, avg=1
|
|
||||||
pixfunc avg_, pixels8_y2, avg=1
|
|
||||||
pixfunc avg_, pixels8_xy2, avg=1
|
|
||||||
|
|
||||||
function ff_put_pixels_clamped_neon, export=1
|
function ff_put_pixels_clamped_neon, export=1
|
||||||
vld1.16 {d16-d19}, [r0,:128]!
|
vld1.16 {d16-d19}, [r0,:128]!
|
||||||
vqmovun.s16 d0, q8
|
vqmovun.s16 d0, q8
|
||||||
|
611
libavcodec/arm/hpeldsp_arm.S
Normal file
611
libavcodec/arm/hpeldsp_arm.S
Normal file
@ -0,0 +1,611 @@
|
|||||||
|
@
|
||||||
|
@ ARMv4 optimized DSP utils
|
||||||
|
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
|
||||||
|
@
|
||||||
|
@ This file is part of Libav.
|
||||||
|
@
|
||||||
|
@ Libav is free software; you can redistribute it and/or
|
||||||
|
@ modify it under the terms of the GNU Lesser General Public
|
||||||
|
@ License as published by the Free Software Foundation; either
|
||||||
|
@ version 2.1 of the License, or (at your option) any later version.
|
||||||
|
@
|
||||||
|
@ Libav is distributed in the hope that it will be useful,
|
||||||
|
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
@ Lesser General Public License for more details.
|
||||||
|
@
|
||||||
|
@ You should have received a copy of the GNU Lesser General Public
|
||||||
|
@ License along with Libav; if not, write to the Free Software
|
||||||
|
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
@
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include "libavutil/arm/asm.S"
|
||||||
|
|
||||||
|
#if !HAVE_ARMV5TE_EXTERNAL
|
||||||
|
#define pld @
|
||||||
|
#endif
|
||||||
|
|
||||||
|
.macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
|
||||||
|
mov \Rd0, \Rn0, lsr #(\shift * 8)
|
||||||
|
mov \Rd1, \Rn1, lsr #(\shift * 8)
|
||||||
|
mov \Rd2, \Rn2, lsr #(\shift * 8)
|
||||||
|
mov \Rd3, \Rn3, lsr #(\shift * 8)
|
||||||
|
orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
|
||||||
|
orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
|
||||||
|
orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
|
||||||
|
orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
|
||||||
|
.endm
|
||||||
|
.macro ALIGN_DWORD shift, R0, R1, R2
|
||||||
|
mov \R0, \R0, lsr #(\shift * 8)
|
||||||
|
orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
|
||||||
|
mov \R1, \R1, lsr #(\shift * 8)
|
||||||
|
orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
|
||||||
|
.endm
|
||||||
|
.macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
|
||||||
|
mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
|
||||||
|
mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
|
||||||
|
orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
|
||||||
|
orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
|
||||||
|
@ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
|
||||||
|
@ Rmask = 0xFEFEFEFE
|
||||||
|
@ Rn = destroy
|
||||||
|
eor \Rd0, \Rn0, \Rm0
|
||||||
|
eor \Rd1, \Rn1, \Rm1
|
||||||
|
orr \Rn0, \Rn0, \Rm0
|
||||||
|
orr \Rn1, \Rn1, \Rm1
|
||||||
|
and \Rd0, \Rd0, \Rmask
|
||||||
|
and \Rd1, \Rd1, \Rmask
|
||||||
|
sub \Rd0, \Rn0, \Rd0, lsr #1
|
||||||
|
sub \Rd1, \Rn1, \Rd1, lsr #1
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
|
||||||
|
@ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
|
||||||
|
@ Rmask = 0xFEFEFEFE
|
||||||
|
@ Rn = destroy
|
||||||
|
eor \Rd0, \Rn0, \Rm0
|
||||||
|
eor \Rd1, \Rn1, \Rm1
|
||||||
|
and \Rn0, \Rn0, \Rm0
|
||||||
|
and \Rn1, \Rn1, \Rm1
|
||||||
|
and \Rd0, \Rd0, \Rmask
|
||||||
|
and \Rd1, \Rd1, \Rmask
|
||||||
|
add \Rd0, \Rn0, \Rd0, lsr #1
|
||||||
|
add \Rd1, \Rn1, \Rd1, lsr #1
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro JMP_ALIGN tmp, reg
|
||||||
|
ands \tmp, \reg, #3
|
||||||
|
bic \reg, \reg, #3
|
||||||
|
beq 1f
|
||||||
|
subs \tmp, \tmp, #1
|
||||||
|
beq 2f
|
||||||
|
subs \tmp, \tmp, #1
|
||||||
|
beq 3f
|
||||||
|
b 4f
|
||||||
|
.endm
|
||||||
|
|
||||||
|
@ ----------------------------------------------------------------
|
||||||
|
.align 5
|
||||||
|
function ff_put_pixels16_arm, export=1
|
||||||
|
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
|
@ block = word aligned, pixles = unaligned
|
||||||
|
pld [r1]
|
||||||
|
push {r4-r11, lr}
|
||||||
|
JMP_ALIGN r5, r1
|
||||||
|
1:
|
||||||
|
ldm r1, {r4-r7}
|
||||||
|
add r1, r1, r2
|
||||||
|
stm r0, {r4-r7}
|
||||||
|
pld [r1]
|
||||||
|
subs r3, r3, #1
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 1b
|
||||||
|
pop {r4-r11, pc}
|
||||||
|
.align 5
|
||||||
|
2:
|
||||||
|
ldm r1, {r4-r8}
|
||||||
|
add r1, r1, r2
|
||||||
|
ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
|
||||||
|
pld [r1]
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r9-r12}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 2b
|
||||||
|
pop {r4-r11, pc}
|
||||||
|
.align 5
|
||||||
|
3:
|
||||||
|
ldm r1, {r4-r8}
|
||||||
|
add r1, r1, r2
|
||||||
|
ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
|
||||||
|
pld [r1]
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r9-r12}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 3b
|
||||||
|
pop {r4-r11, pc}
|
||||||
|
.align 5
|
||||||
|
4:
|
||||||
|
ldm r1, {r4-r8}
|
||||||
|
add r1, r1, r2
|
||||||
|
ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
|
||||||
|
pld [r1]
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r9-r12}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 4b
|
||||||
|
pop {r4-r11,pc}
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
@ ----------------------------------------------------------------
|
||||||
|
.align 5
|
||||||
|
function ff_put_pixels8_arm, export=1
|
||||||
|
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
|
@ block = word aligned, pixles = unaligned
|
||||||
|
pld [r1]
|
||||||
|
push {r4-r5,lr}
|
||||||
|
JMP_ALIGN r5, r1
|
||||||
|
1:
|
||||||
|
ldm r1, {r4-r5}
|
||||||
|
add r1, r1, r2
|
||||||
|
subs r3, r3, #1
|
||||||
|
pld [r1]
|
||||||
|
stm r0, {r4-r5}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 1b
|
||||||
|
pop {r4-r5,pc}
|
||||||
|
.align 5
|
||||||
|
2:
|
||||||
|
ldm r1, {r4-r5, r12}
|
||||||
|
add r1, r1, r2
|
||||||
|
ALIGN_DWORD 1, r4, r5, r12
|
||||||
|
pld [r1]
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r4-r5}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 2b
|
||||||
|
pop {r4-r5,pc}
|
||||||
|
.align 5
|
||||||
|
3:
|
||||||
|
ldm r1, {r4-r5, r12}
|
||||||
|
add r1, r1, r2
|
||||||
|
ALIGN_DWORD 2, r4, r5, r12
|
||||||
|
pld [r1]
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r4-r5}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 3b
|
||||||
|
pop {r4-r5,pc}
|
||||||
|
.align 5
|
||||||
|
4:
|
||||||
|
ldm r1, {r4-r5, r12}
|
||||||
|
add r1, r1, r2
|
||||||
|
ALIGN_DWORD 3, r4, r5, r12
|
||||||
|
pld [r1]
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r4-r5}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 4b
|
||||||
|
pop {r4-r5,pc}
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
@ ----------------------------------------------------------------
|
||||||
|
.align 5
|
||||||
|
function ff_put_pixels8_x2_arm, export=1
|
||||||
|
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
|
@ block = word aligned, pixles = unaligned
|
||||||
|
pld [r1]
|
||||||
|
push {r4-r10,lr}
|
||||||
|
ldr r12, =0xfefefefe
|
||||||
|
JMP_ALIGN r5, r1
|
||||||
|
1:
|
||||||
|
ldm r1, {r4-r5, r10}
|
||||||
|
add r1, r1, r2
|
||||||
|
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
|
||||||
|
pld [r1]
|
||||||
|
RND_AVG32 r8, r9, r4, r5, r6, r7, r12
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r8-r9}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 1b
|
||||||
|
pop {r4-r10,pc}
|
||||||
|
.align 5
|
||||||
|
2:
|
||||||
|
ldm r1, {r4-r5, r10}
|
||||||
|
add r1, r1, r2
|
||||||
|
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
|
||||||
|
ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
|
||||||
|
pld [r1]
|
||||||
|
RND_AVG32 r4, r5, r6, r7, r8, r9, r12
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r4-r5}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 2b
|
||||||
|
pop {r4-r10,pc}
|
||||||
|
.align 5
|
||||||
|
3:
|
||||||
|
ldm r1, {r4-r5, r10}
|
||||||
|
add r1, r1, r2
|
||||||
|
ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
|
||||||
|
ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
|
||||||
|
pld [r1]
|
||||||
|
RND_AVG32 r4, r5, r6, r7, r8, r9, r12
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r4-r5}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 3b
|
||||||
|
pop {r4-r10,pc}
|
||||||
|
.align 5
|
||||||
|
4:
|
||||||
|
ldm r1, {r4-r5, r10}
|
||||||
|
add r1, r1, r2
|
||||||
|
ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
|
||||||
|
pld [r1]
|
||||||
|
RND_AVG32 r8, r9, r6, r7, r5, r10, r12
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r8-r9}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 4b
|
||||||
|
pop {r4-r10,pc}
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
function ff_put_no_rnd_pixels8_x2_arm, export=1
|
||||||
|
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
|
@ block = word aligned, pixles = unaligned
|
||||||
|
pld [r1]
|
||||||
|
push {r4-r10,lr}
|
||||||
|
ldr r12, =0xfefefefe
|
||||||
|
JMP_ALIGN r5, r1
|
||||||
|
1:
|
||||||
|
ldm r1, {r4-r5, r10}
|
||||||
|
add r1, r1, r2
|
||||||
|
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
|
||||||
|
pld [r1]
|
||||||
|
NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r8-r9}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 1b
|
||||||
|
pop {r4-r10,pc}
|
||||||
|
.align 5
|
||||||
|
2:
|
||||||
|
ldm r1, {r4-r5, r10}
|
||||||
|
add r1, r1, r2
|
||||||
|
ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
|
||||||
|
ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
|
||||||
|
pld [r1]
|
||||||
|
NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r4-r5}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 2b
|
||||||
|
pop {r4-r10,pc}
|
||||||
|
.align 5
|
||||||
|
3:
|
||||||
|
ldm r1, {r4-r5, r10}
|
||||||
|
add r1, r1, r2
|
||||||
|
ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
|
||||||
|
ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
|
||||||
|
pld [r1]
|
||||||
|
NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r4-r5}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 3b
|
||||||
|
pop {r4-r10,pc}
|
||||||
|
.align 5
|
||||||
|
4:
|
||||||
|
ldm r1, {r4-r5, r10}
|
||||||
|
add r1, r1, r2
|
||||||
|
ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
|
||||||
|
pld [r1]
|
||||||
|
NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r8-r9}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 4b
|
||||||
|
pop {r4-r10,pc}
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
|
||||||
|
@ ----------------------------------------------------------------
|
||||||
|
.align 5
|
||||||
|
function ff_put_pixels8_y2_arm, export=1
|
||||||
|
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
|
@ block = word aligned, pixles = unaligned
|
||||||
|
pld [r1]
|
||||||
|
push {r4-r11,lr}
|
||||||
|
mov r3, r3, lsr #1
|
||||||
|
ldr r12, =0xfefefefe
|
||||||
|
JMP_ALIGN r5, r1
|
||||||
|
1:
|
||||||
|
ldm r1, {r4-r5}
|
||||||
|
add r1, r1, r2
|
||||||
|
6: ldm r1, {r6-r7}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
RND_AVG32 r8, r9, r4, r5, r6, r7, r12
|
||||||
|
ldm r1, {r4-r5}
|
||||||
|
add r1, r1, r2
|
||||||
|
stm r0, {r8-r9}
|
||||||
|
add r0, r0, r2
|
||||||
|
pld [r1]
|
||||||
|
RND_AVG32 r8, r9, r6, r7, r4, r5, r12
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r8-r9}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 6b
|
||||||
|
pop {r4-r11,pc}
|
||||||
|
.align 5
|
||||||
|
2:
|
||||||
|
ldm r1, {r4-r6}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 1, r4, r5, r6
|
||||||
|
6: ldm r1, {r7-r9}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 1, r7, r8, r9
|
||||||
|
RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
||||||
|
stm r0, {r10-r11}
|
||||||
|
add r0, r0, r2
|
||||||
|
ldm r1, {r4-r6}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 1, r4, r5, r6
|
||||||
|
subs r3, r3, #1
|
||||||
|
RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
||||||
|
stm r0, {r10-r11}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 6b
|
||||||
|
pop {r4-r11,pc}
|
||||||
|
.align 5
|
||||||
|
3:
|
||||||
|
ldm r1, {r4-r6}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 2, r4, r5, r6
|
||||||
|
6: ldm r1, {r7-r9}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 2, r7, r8, r9
|
||||||
|
RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
||||||
|
stm r0, {r10-r11}
|
||||||
|
add r0, r0, r2
|
||||||
|
ldm r1, {r4-r6}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 2, r4, r5, r6
|
||||||
|
subs r3, r3, #1
|
||||||
|
RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
||||||
|
stm r0, {r10-r11}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 6b
|
||||||
|
pop {r4-r11,pc}
|
||||||
|
.align 5
|
||||||
|
4:
|
||||||
|
ldm r1, {r4-r6}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 3, r4, r5, r6
|
||||||
|
6: ldm r1, {r7-r9}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 3, r7, r8, r9
|
||||||
|
RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
||||||
|
stm r0, {r10-r11}
|
||||||
|
add r0, r0, r2
|
||||||
|
ldm r1, {r4-r6}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 3, r4, r5, r6
|
||||||
|
subs r3, r3, #1
|
||||||
|
RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
||||||
|
stm r0, {r10-r11}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 6b
|
||||||
|
pop {r4-r11,pc}
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
function ff_put_no_rnd_pixels8_y2_arm, export=1
|
||||||
|
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
|
@ block = word aligned, pixles = unaligned
|
||||||
|
pld [r1]
|
||||||
|
push {r4-r11,lr}
|
||||||
|
mov r3, r3, lsr #1
|
||||||
|
ldr r12, =0xfefefefe
|
||||||
|
JMP_ALIGN r5, r1
|
||||||
|
1:
|
||||||
|
ldm r1, {r4-r5}
|
||||||
|
add r1, r1, r2
|
||||||
|
6: ldm r1, {r6-r7}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
|
||||||
|
ldm r1, {r4-r5}
|
||||||
|
add r1, r1, r2
|
||||||
|
stm r0, {r8-r9}
|
||||||
|
add r0, r0, r2
|
||||||
|
pld [r1]
|
||||||
|
NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
|
||||||
|
subs r3, r3, #1
|
||||||
|
stm r0, {r8-r9}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 6b
|
||||||
|
pop {r4-r11,pc}
|
||||||
|
.align 5
|
||||||
|
2:
|
||||||
|
ldm r1, {r4-r6}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 1, r4, r5, r6
|
||||||
|
6: ldm r1, {r7-r9}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 1, r7, r8, r9
|
||||||
|
NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
||||||
|
stm r0, {r10-r11}
|
||||||
|
add r0, r0, r2
|
||||||
|
ldm r1, {r4-r6}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 1, r4, r5, r6
|
||||||
|
subs r3, r3, #1
|
||||||
|
NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
||||||
|
stm r0, {r10-r11}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 6b
|
||||||
|
pop {r4-r11,pc}
|
||||||
|
.align 5
|
||||||
|
3:
|
||||||
|
ldm r1, {r4-r6}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 2, r4, r5, r6
|
||||||
|
6: ldm r1, {r7-r9}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 2, r7, r8, r9
|
||||||
|
NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
||||||
|
stm r0, {r10-r11}
|
||||||
|
add r0, r0, r2
|
||||||
|
ldm r1, {r4-r6}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 2, r4, r5, r6
|
||||||
|
subs r3, r3, #1
|
||||||
|
NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
||||||
|
stm r0, {r10-r11}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 6b
|
||||||
|
pop {r4-r11,pc}
|
||||||
|
.align 5
|
||||||
|
4:
|
||||||
|
ldm r1, {r4-r6}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 3, r4, r5, r6
|
||||||
|
6: ldm r1, {r7-r9}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 3, r7, r8, r9
|
||||||
|
NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
|
||||||
|
stm r0, {r10-r11}
|
||||||
|
add r0, r0, r2
|
||||||
|
ldm r1, {r4-r6}
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
ALIGN_DWORD 3, r4, r5, r6
|
||||||
|
subs r3, r3, #1
|
||||||
|
NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
|
||||||
|
stm r0, {r10-r11}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 6b
|
||||||
|
pop {r4-r11,pc}
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
.ltorg
|
||||||
|
|
||||||
|
@ ----------------------------------------------------------------
|
||||||
|
.macro RND_XY2_IT align, rnd
|
||||||
|
@ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
|
||||||
|
@ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
|
||||||
|
.if \align == 0
|
||||||
|
ldm r1, {r6-r8}
|
||||||
|
.elseif \align == 3
|
||||||
|
ldm r1, {r5-r7}
|
||||||
|
.else
|
||||||
|
ldm r1, {r8-r10}
|
||||||
|
.endif
|
||||||
|
add r1, r1, r2
|
||||||
|
pld [r1]
|
||||||
|
.if \align == 0
|
||||||
|
ALIGN_DWORD_D 1, r4, r5, r6, r7, r8
|
||||||
|
.elseif \align == 1
|
||||||
|
ALIGN_DWORD_D 1, r4, r5, r8, r9, r10
|
||||||
|
ALIGN_DWORD_D 2, r6, r7, r8, r9, r10
|
||||||
|
.elseif \align == 2
|
||||||
|
ALIGN_DWORD_D 2, r4, r5, r8, r9, r10
|
||||||
|
ALIGN_DWORD_D 3, r6, r7, r8, r9, r10
|
||||||
|
.elseif \align == 3
|
||||||
|
ALIGN_DWORD_D 3, r4, r5, r5, r6, r7
|
||||||
|
.endif
|
||||||
|
ldr r14, =0x03030303
|
||||||
|
tst r3, #1
|
||||||
|
and r8, r4, r14
|
||||||
|
and r9, r5, r14
|
||||||
|
and r10, r6, r14
|
||||||
|
and r11, r7, r14
|
||||||
|
it eq
|
||||||
|
andeq r14, r14, r14, \rnd #1
|
||||||
|
add r8, r8, r10
|
||||||
|
add r9, r9, r11
|
||||||
|
ldr r12, =0xfcfcfcfc >> 2
|
||||||
|
itt eq
|
||||||
|
addeq r8, r8, r14
|
||||||
|
addeq r9, r9, r14
|
||||||
|
and r4, r12, r4, lsr #2
|
||||||
|
and r5, r12, r5, lsr #2
|
||||||
|
and r6, r12, r6, lsr #2
|
||||||
|
and r7, r12, r7, lsr #2
|
||||||
|
add r10, r4, r6
|
||||||
|
add r11, r5, r7
|
||||||
|
subs r3, r3, #1
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro RND_XY2_EXPAND align, rnd
|
||||||
|
RND_XY2_IT \align, \rnd
|
||||||
|
6: push {r8-r11}
|
||||||
|
RND_XY2_IT \align, \rnd
|
||||||
|
pop {r4-r7}
|
||||||
|
add r4, r4, r8
|
||||||
|
add r5, r5, r9
|
||||||
|
ldr r14, =0x0f0f0f0f
|
||||||
|
add r6, r6, r10
|
||||||
|
add r7, r7, r11
|
||||||
|
and r4, r14, r4, lsr #2
|
||||||
|
and r5, r14, r5, lsr #2
|
||||||
|
add r4, r4, r6
|
||||||
|
add r5, r5, r7
|
||||||
|
stm r0, {r4-r5}
|
||||||
|
add r0, r0, r2
|
||||||
|
bge 6b
|
||||||
|
pop {r4-r11,pc}
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
function ff_put_pixels8_xy2_arm, export=1
|
||||||
|
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
|
@ block = word aligned, pixles = unaligned
|
||||||
|
pld [r1]
|
||||||
|
push {r4-r11,lr} @ R14 is also called LR
|
||||||
|
JMP_ALIGN r5, r1
|
||||||
|
1: RND_XY2_EXPAND 0, lsl
|
||||||
|
.align 5
|
||||||
|
2: RND_XY2_EXPAND 1, lsl
|
||||||
|
.align 5
|
||||||
|
3: RND_XY2_EXPAND 2, lsl
|
||||||
|
.align 5
|
||||||
|
4: RND_XY2_EXPAND 3, lsl
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
function ff_put_no_rnd_pixels8_xy2_arm, export=1
|
||||||
|
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
|
@ block = word aligned, pixles = unaligned
|
||||||
|
pld [r1]
|
||||||
|
push {r4-r11,lr}
|
||||||
|
JMP_ALIGN r5, r1
|
||||||
|
1: RND_XY2_EXPAND 0, lsr
|
||||||
|
.align 5
|
||||||
|
2: RND_XY2_EXPAND 1, lsr
|
||||||
|
.align 5
|
||||||
|
3: RND_XY2_EXPAND 2, lsr
|
||||||
|
.align 5
|
||||||
|
4: RND_XY2_EXPAND 3, lsr
|
||||||
|
endfunc
|
27
libavcodec/arm/hpeldsp_arm.h
Normal file
27
libavcodec/arm/hpeldsp_arm.h
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVCODEC_ARM_HPELDSP_H
|
||||||
|
#define AVCODEC_ARM_HPELDSP_H
|
||||||
|
|
||||||
|
#include "libavcodec/hpeldsp.h"
|
||||||
|
|
||||||
|
void ff_hpeldsp_init_armv6(HpelDSPContext *c, int flags);
|
||||||
|
void ff_hpeldsp_init_neon(HpelDSPContext *c, int flags);
|
||||||
|
|
||||||
|
#endif /* AVCODEC_ARM_HPELDSP_H */
|
259
libavcodec/arm/hpeldsp_armv6.S
Normal file
259
libavcodec/arm/hpeldsp_armv6.S
Normal file
@ -0,0 +1,259 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
|
||||||
|
*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/arm/asm.S"
|
||||||
|
|
||||||
|
.macro call_2x_pixels type, subp
|
||||||
|
function ff_\type\()_pixels16\subp\()_armv6, export=1
|
||||||
|
push {r0-r3, lr}
|
||||||
|
bl ff_\type\()_pixels8\subp\()_armv6
|
||||||
|
pop {r0-r3, lr}
|
||||||
|
add r0, r0, #8
|
||||||
|
add r1, r1, #8
|
||||||
|
b ff_\type\()_pixels8\subp\()_armv6
|
||||||
|
endfunc
|
||||||
|
.endm
|
||||||
|
|
||||||
|
call_2x_pixels avg
|
||||||
|
call_2x_pixels put, _x2
|
||||||
|
call_2x_pixels put, _y2
|
||||||
|
call_2x_pixels put, _x2_no_rnd
|
||||||
|
call_2x_pixels put, _y2_no_rnd
|
||||||
|
|
||||||
|
function ff_put_pixels16_armv6, export=1
|
||||||
|
push {r4-r11}
|
||||||
|
1:
|
||||||
|
ldr r5, [r1, #4]
|
||||||
|
ldr r6, [r1, #8]
|
||||||
|
ldr r7, [r1, #12]
|
||||||
|
ldr_post r4, r1, r2
|
||||||
|
strd r6, r7, [r0, #8]
|
||||||
|
ldr r9, [r1, #4]
|
||||||
|
strd_post r4, r5, r0, r2
|
||||||
|
ldr r10, [r1, #8]
|
||||||
|
ldr r11, [r1, #12]
|
||||||
|
ldr_post r8, r1, r2
|
||||||
|
strd r10, r11, [r0, #8]
|
||||||
|
subs r3, r3, #2
|
||||||
|
strd_post r8, r9, r0, r2
|
||||||
|
bne 1b
|
||||||
|
|
||||||
|
pop {r4-r11}
|
||||||
|
bx lr
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
function ff_put_pixels8_armv6, export=1
|
||||||
|
push {r4-r7}
|
||||||
|
1:
|
||||||
|
ldr r5, [r1, #4]
|
||||||
|
ldr_post r4, r1, r2
|
||||||
|
ldr r7, [r1, #4]
|
||||||
|
strd_post r4, r5, r0, r2
|
||||||
|
ldr_post r6, r1, r2
|
||||||
|
subs r3, r3, #2
|
||||||
|
strd_post r6, r7, r0, r2
|
||||||
|
bne 1b
|
||||||
|
|
||||||
|
pop {r4-r7}
|
||||||
|
bx lr
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
function ff_put_pixels8_x2_armv6, export=1
|
||||||
|
push {r4-r11, lr}
|
||||||
|
mov r12, #1
|
||||||
|
orr r12, r12, r12, lsl #8
|
||||||
|
orr r12, r12, r12, lsl #16
|
||||||
|
1:
|
||||||
|
ldr r4, [r1]
|
||||||
|
subs r3, r3, #2
|
||||||
|
ldr r5, [r1, #4]
|
||||||
|
ldr r7, [r1, #5]
|
||||||
|
lsr r6, r4, #8
|
||||||
|
ldr_pre r8, r1, r2
|
||||||
|
orr r6, r6, r5, lsl #24
|
||||||
|
ldr r9, [r1, #4]
|
||||||
|
ldr r11, [r1, #5]
|
||||||
|
lsr r10, r8, #8
|
||||||
|
add r1, r1, r2
|
||||||
|
orr r10, r10, r9, lsl #24
|
||||||
|
eor r14, r4, r6
|
||||||
|
uhadd8 r4, r4, r6
|
||||||
|
eor r6, r5, r7
|
||||||
|
uhadd8 r5, r5, r7
|
||||||
|
and r14, r14, r12
|
||||||
|
and r6, r6, r12
|
||||||
|
uadd8 r4, r4, r14
|
||||||
|
eor r14, r8, r10
|
||||||
|
uadd8 r5, r5, r6
|
||||||
|
eor r6, r9, r11
|
||||||
|
uhadd8 r8, r8, r10
|
||||||
|
and r14, r14, r12
|
||||||
|
uhadd8 r9, r9, r11
|
||||||
|
and r6, r6, r12
|
||||||
|
uadd8 r8, r8, r14
|
||||||
|
strd_post r4, r5, r0, r2
|
||||||
|
uadd8 r9, r9, r6
|
||||||
|
strd_post r8, r9, r0, r2
|
||||||
|
bne 1b
|
||||||
|
|
||||||
|
pop {r4-r11, pc}
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
function ff_put_pixels8_y2_armv6, export=1
|
||||||
|
push {r4-r11}
|
||||||
|
mov r12, #1
|
||||||
|
orr r12, r12, r12, lsl #8
|
||||||
|
orr r12, r12, r12, lsl #16
|
||||||
|
ldr r4, [r1]
|
||||||
|
ldr r5, [r1, #4]
|
||||||
|
ldr_pre r6, r1, r2
|
||||||
|
ldr r7, [r1, #4]
|
||||||
|
1:
|
||||||
|
subs r3, r3, #2
|
||||||
|
uhadd8 r8, r4, r6
|
||||||
|
eor r10, r4, r6
|
||||||
|
uhadd8 r9, r5, r7
|
||||||
|
eor r11, r5, r7
|
||||||
|
and r10, r10, r12
|
||||||
|
ldr_pre r4, r1, r2
|
||||||
|
uadd8 r8, r8, r10
|
||||||
|
and r11, r11, r12
|
||||||
|
uadd8 r9, r9, r11
|
||||||
|
ldr r5, [r1, #4]
|
||||||
|
uhadd8 r10, r4, r6
|
||||||
|
eor r6, r4, r6
|
||||||
|
uhadd8 r11, r5, r7
|
||||||
|
and r6, r6, r12
|
||||||
|
eor r7, r5, r7
|
||||||
|
uadd8 r10, r10, r6
|
||||||
|
and r7, r7, r12
|
||||||
|
ldr_pre r6, r1, r2
|
||||||
|
uadd8 r11, r11, r7
|
||||||
|
strd_post r8, r9, r0, r2
|
||||||
|
ldr r7, [r1, #4]
|
||||||
|
strd_post r10, r11, r0, r2
|
||||||
|
bne 1b
|
||||||
|
|
||||||
|
pop {r4-r11}
|
||||||
|
bx lr
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
function ff_put_pixels8_x2_no_rnd_armv6, export=1
|
||||||
|
push {r4-r9, lr}
|
||||||
|
1:
|
||||||
|
subs r3, r3, #2
|
||||||
|
ldr r4, [r1]
|
||||||
|
ldr r5, [r1, #4]
|
||||||
|
ldr r7, [r1, #5]
|
||||||
|
ldr_pre r8, r1, r2
|
||||||
|
ldr r9, [r1, #4]
|
||||||
|
ldr r14, [r1, #5]
|
||||||
|
add r1, r1, r2
|
||||||
|
lsr r6, r4, #8
|
||||||
|
orr r6, r6, r5, lsl #24
|
||||||
|
lsr r12, r8, #8
|
||||||
|
orr r12, r12, r9, lsl #24
|
||||||
|
uhadd8 r4, r4, r6
|
||||||
|
uhadd8 r5, r5, r7
|
||||||
|
uhadd8 r8, r8, r12
|
||||||
|
uhadd8 r9, r9, r14
|
||||||
|
stm r0, {r4,r5}
|
||||||
|
add r0, r0, r2
|
||||||
|
stm r0, {r8,r9}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 1b
|
||||||
|
|
||||||
|
pop {r4-r9, pc}
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
function ff_put_pixels8_y2_no_rnd_armv6, export=1
|
||||||
|
push {r4-r9, lr}
|
||||||
|
ldr r4, [r1]
|
||||||
|
ldr r5, [r1, #4]
|
||||||
|
ldr_pre r6, r1, r2
|
||||||
|
ldr r7, [r1, #4]
|
||||||
|
1:
|
||||||
|
subs r3, r3, #2
|
||||||
|
uhadd8 r8, r4, r6
|
||||||
|
ldr_pre r4, r1, r2
|
||||||
|
uhadd8 r9, r5, r7
|
||||||
|
ldr r5, [r1, #4]
|
||||||
|
uhadd8 r12, r4, r6
|
||||||
|
ldr_pre r6, r1, r2
|
||||||
|
uhadd8 r14, r5, r7
|
||||||
|
ldr r7, [r1, #4]
|
||||||
|
stm r0, {r8,r9}
|
||||||
|
add r0, r0, r2
|
||||||
|
stm r0, {r12,r14}
|
||||||
|
add r0, r0, r2
|
||||||
|
bne 1b
|
||||||
|
|
||||||
|
pop {r4-r9, pc}
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
function ff_avg_pixels8_armv6, export=1
|
||||||
|
pld [r1, r2]
|
||||||
|
push {r4-r10, lr}
|
||||||
|
mov lr, #1
|
||||||
|
orr lr, lr, lr, lsl #8
|
||||||
|
orr lr, lr, lr, lsl #16
|
||||||
|
ldrd r4, r5, [r0]
|
||||||
|
ldr r10, [r1, #4]
|
||||||
|
ldr_post r9, r1, r2
|
||||||
|
subs r3, r3, #2
|
||||||
|
1:
|
||||||
|
pld [r1, r2]
|
||||||
|
eor r8, r4, r9
|
||||||
|
uhadd8 r4, r4, r9
|
||||||
|
eor r12, r5, r10
|
||||||
|
ldrd_reg r6, r7, r0, r2
|
||||||
|
uhadd8 r5, r5, r10
|
||||||
|
and r8, r8, lr
|
||||||
|
ldr r10, [r1, #4]
|
||||||
|
and r12, r12, lr
|
||||||
|
uadd8 r4, r4, r8
|
||||||
|
ldr_post r9, r1, r2
|
||||||
|
eor r8, r6, r9
|
||||||
|
uadd8 r5, r5, r12
|
||||||
|
pld [r1, r2, lsl #1]
|
||||||
|
eor r12, r7, r10
|
||||||
|
uhadd8 r6, r6, r9
|
||||||
|
strd_post r4, r5, r0, r2
|
||||||
|
uhadd8 r7, r7, r10
|
||||||
|
beq 2f
|
||||||
|
and r8, r8, lr
|
||||||
|
ldrd_reg r4, r5, r0, r2
|
||||||
|
uadd8 r6, r6, r8
|
||||||
|
ldr r10, [r1, #4]
|
||||||
|
and r12, r12, lr
|
||||||
|
subs r3, r3, #2
|
||||||
|
uadd8 r7, r7, r12
|
||||||
|
ldr_post r9, r1, r2
|
||||||
|
strd_post r6, r7, r0, r2
|
||||||
|
b 1b
|
||||||
|
2:
|
||||||
|
and r8, r8, lr
|
||||||
|
and r12, r12, lr
|
||||||
|
uadd8 r6, r6, r8
|
||||||
|
uadd8 r7, r7, r12
|
||||||
|
strd_post r6, r7, r0, r2
|
||||||
|
|
||||||
|
pop {r4-r10, pc}
|
||||||
|
endfunc
|
71
libavcodec/arm/hpeldsp_init_arm.c
Normal file
71
libavcodec/arm/hpeldsp_init_arm.c
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
/*
|
||||||
|
* ARM optimized DSP utils
|
||||||
|
* Copyright (c) 2001 Lionel Ulmer
|
||||||
|
*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/arm/cpu.h"
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavcodec/rnd_avg.h"
|
||||||
|
#include "hpeldsp_arm.h"
|
||||||
|
|
||||||
|
void ff_put_pixels8_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
||||||
|
void ff_put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
||||||
|
void ff_put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
||||||
|
void ff_put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
||||||
|
|
||||||
|
void ff_put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
||||||
|
void ff_put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
||||||
|
void ff_put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
||||||
|
|
||||||
|
void ff_put_pixels16_arm(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h);
|
||||||
|
|
||||||
|
CALL_2X_PIXELS(ff_put_pixels16_x2_arm, ff_put_pixels8_x2_arm, 8)
|
||||||
|
CALL_2X_PIXELS(ff_put_pixels16_y2_arm, ff_put_pixels8_y2_arm, 8)
|
||||||
|
CALL_2X_PIXELS(ff_put_pixels16_xy2_arm, ff_put_pixels8_xy2_arm, 8)
|
||||||
|
CALL_2X_PIXELS(ff_put_no_rnd_pixels16_x2_arm, ff_put_no_rnd_pixels8_x2_arm, 8)
|
||||||
|
CALL_2X_PIXELS(ff_put_no_rnd_pixels16_y2_arm, ff_put_no_rnd_pixels8_y2_arm, 8)
|
||||||
|
CALL_2X_PIXELS(ff_put_no_rnd_pixels16_xy2_arm, ff_put_no_rnd_pixels8_xy2_arm,8)
|
||||||
|
|
||||||
|
av_cold void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
c->put_pixels_tab[0][0] = ff_put_pixels16_arm;
|
||||||
|
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_arm;
|
||||||
|
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_arm;
|
||||||
|
c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_arm;
|
||||||
|
c->put_pixels_tab[1][0] = ff_put_pixels8_arm;
|
||||||
|
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_arm;
|
||||||
|
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_arm;
|
||||||
|
c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_arm;
|
||||||
|
|
||||||
|
c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_arm;
|
||||||
|
c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_arm;
|
||||||
|
c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_arm;
|
||||||
|
c->put_no_rnd_pixels_tab[0][3] = ff_put_no_rnd_pixels16_xy2_arm;
|
||||||
|
c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_arm;
|
||||||
|
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_arm;
|
||||||
|
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_arm;
|
||||||
|
c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm;
|
||||||
|
|
||||||
|
if (have_armv6(cpu_flags))
|
||||||
|
ff_hpeldsp_init_armv6(c, flags);
|
||||||
|
if (have_neon(cpu_flags))
|
||||||
|
ff_hpeldsp_init_neon(c, flags);
|
||||||
|
}
|
67
libavcodec/arm/hpeldsp_init_armv6.c
Normal file
67
libavcodec/arm/hpeldsp_init_armv6.c
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
|
||||||
|
*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "hpeldsp_arm.h"
|
||||||
|
|
||||||
|
void ff_put_pixels16_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels16_x2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels16_y2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
|
||||||
|
void ff_put_pixels16_x2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels16_y2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
|
||||||
|
void ff_avg_pixels16_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
|
||||||
|
void ff_put_pixels8_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels8_x2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels8_y2_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
|
||||||
|
void ff_put_pixels8_x2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels8_y2_no_rnd_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
|
||||||
|
void ff_avg_pixels8_armv6(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
|
||||||
|
av_cold void ff_hpeldsp_init_armv6(HpelDSPContext *c, int flags)
|
||||||
|
{
|
||||||
|
c->put_pixels_tab[0][0] = ff_put_pixels16_armv6;
|
||||||
|
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_armv6;
|
||||||
|
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_armv6;
|
||||||
|
/* c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_armv6; */
|
||||||
|
c->put_pixels_tab[1][0] = ff_put_pixels8_armv6;
|
||||||
|
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_armv6;
|
||||||
|
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_armv6;
|
||||||
|
/* c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_armv6; */
|
||||||
|
|
||||||
|
c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_armv6;
|
||||||
|
c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_armv6;
|
||||||
|
c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_armv6;
|
||||||
|
/* c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_armv6; */
|
||||||
|
c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_armv6;
|
||||||
|
c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_armv6;
|
||||||
|
c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_armv6;
|
||||||
|
/* c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_armv6; */
|
||||||
|
|
||||||
|
c->avg_pixels_tab[0][0] = ff_avg_pixels16_armv6;
|
||||||
|
c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6;
|
||||||
|
}
|
88
libavcodec/arm/hpeldsp_init_neon.c
Normal file
88
libavcodec/arm/hpeldsp_init_neon.c
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
/*
|
||||||
|
* ARM NEON optimised DSP functions
|
||||||
|
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||||
|
*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "hpeldsp_arm.h"
|
||||||
|
|
||||||
|
void ff_put_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels8_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
|
||||||
|
void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_avg_pixels16_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_avg_pixels16_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_avg_pixels16_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_avg_pixels8_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_avg_pixels8_x2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_avg_pixels8_y2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_avg_pixels8_xy2_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, ptrdiff_t, int);
|
||||||
|
|
||||||
|
av_cold void ff_hpeldsp_init_neon(HpelDSPContext *c, int flags)
|
||||||
|
{
|
||||||
|
c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
|
||||||
|
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
|
||||||
|
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;
|
||||||
|
c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;
|
||||||
|
c->put_pixels_tab[1][0] = ff_put_pixels8_neon;
|
||||||
|
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;
|
||||||
|
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;
|
||||||
|
c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;
|
||||||
|
|
||||||
|
c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;
|
||||||
|
c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;
|
||||||
|
c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;
|
||||||
|
c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;
|
||||||
|
c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;
|
||||||
|
c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;
|
||||||
|
c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
|
||||||
|
c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
|
||||||
|
|
||||||
|
c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
|
||||||
|
c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_neon;
|
||||||
|
c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_neon;
|
||||||
|
c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_neon;
|
||||||
|
c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
|
||||||
|
c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_neon;
|
||||||
|
c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_neon;
|
||||||
|
c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_neon;
|
||||||
|
|
||||||
|
c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_neon;
|
||||||
|
c->avg_no_rnd_pixels_tab[1] = ff_avg_pixels16_x2_no_rnd_neon;
|
||||||
|
c->avg_no_rnd_pixels_tab[2] = ff_avg_pixels16_y2_no_rnd_neon;
|
||||||
|
c->avg_no_rnd_pixels_tab[3] = ff_avg_pixels16_xy2_no_rnd_neon;
|
||||||
|
}
|
410
libavcodec/arm/hpeldsp_neon.S
Normal file
410
libavcodec/arm/hpeldsp_neon.S
Normal file
@ -0,0 +1,410 @@
|
|||||||
|
/*
|
||||||
|
* ARM NEON optimised DSP functions
|
||||||
|
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||||
|
*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/arm/asm.S"
|
||||||
|
|
||||||
|
.macro pixels16 rnd=1, avg=0
|
||||||
|
.if \avg
|
||||||
|
mov r12, r0
|
||||||
|
.endif
|
||||||
|
1: vld1.8 {q0}, [r1], r2
|
||||||
|
vld1.8 {q1}, [r1], r2
|
||||||
|
vld1.8 {q2}, [r1], r2
|
||||||
|
pld [r1, r2, lsl #2]
|
||||||
|
vld1.8 {q3}, [r1], r2
|
||||||
|
pld [r1]
|
||||||
|
pld [r1, r2]
|
||||||
|
pld [r1, r2, lsl #1]
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {q8}, [r12,:128], r2
|
||||||
|
vrhadd.u8 q0, q0, q8
|
||||||
|
vld1.8 {q9}, [r12,:128], r2
|
||||||
|
vrhadd.u8 q1, q1, q9
|
||||||
|
vld1.8 {q10}, [r12,:128], r2
|
||||||
|
vrhadd.u8 q2, q2, q10
|
||||||
|
vld1.8 {q11}, [r12,:128], r2
|
||||||
|
vrhadd.u8 q3, q3, q11
|
||||||
|
.endif
|
||||||
|
subs r3, r3, #4
|
||||||
|
vst1.64 {q0}, [r0,:128], r2
|
||||||
|
vst1.64 {q1}, [r0,:128], r2
|
||||||
|
vst1.64 {q2}, [r0,:128], r2
|
||||||
|
vst1.64 {q3}, [r0,:128], r2
|
||||||
|
bne 1b
|
||||||
|
bx lr
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro pixels16_x2 rnd=1, avg=0
|
||||||
|
1: vld1.8 {d0-d2}, [r1], r2
|
||||||
|
vld1.8 {d4-d6}, [r1], r2
|
||||||
|
pld [r1]
|
||||||
|
pld [r1, r2]
|
||||||
|
subs r3, r3, #2
|
||||||
|
vext.8 q1, q0, q1, #1
|
||||||
|
avg q0, q0, q1
|
||||||
|
vext.8 q3, q2, q3, #1
|
||||||
|
avg q2, q2, q3
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {q1}, [r0,:128], r2
|
||||||
|
vld1.8 {q3}, [r0,:128]
|
||||||
|
vrhadd.u8 q0, q0, q1
|
||||||
|
vrhadd.u8 q2, q2, q3
|
||||||
|
sub r0, r0, r2
|
||||||
|
.endif
|
||||||
|
vst1.8 {q0}, [r0,:128], r2
|
||||||
|
vst1.8 {q2}, [r0,:128], r2
|
||||||
|
bne 1b
|
||||||
|
bx lr
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro pixels16_y2 rnd=1, avg=0
|
||||||
|
sub r3, r3, #2
|
||||||
|
vld1.8 {q0}, [r1], r2
|
||||||
|
vld1.8 {q1}, [r1], r2
|
||||||
|
1: subs r3, r3, #2
|
||||||
|
avg q2, q0, q1
|
||||||
|
vld1.8 {q0}, [r1], r2
|
||||||
|
avg q3, q0, q1
|
||||||
|
vld1.8 {q1}, [r1], r2
|
||||||
|
pld [r1]
|
||||||
|
pld [r1, r2]
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {q8}, [r0,:128], r2
|
||||||
|
vld1.8 {q9}, [r0,:128]
|
||||||
|
vrhadd.u8 q2, q2, q8
|
||||||
|
vrhadd.u8 q3, q3, q9
|
||||||
|
sub r0, r0, r2
|
||||||
|
.endif
|
||||||
|
vst1.8 {q2}, [r0,:128], r2
|
||||||
|
vst1.8 {q3}, [r0,:128], r2
|
||||||
|
bne 1b
|
||||||
|
|
||||||
|
avg q2, q0, q1
|
||||||
|
vld1.8 {q0}, [r1], r2
|
||||||
|
avg q3, q0, q1
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {q8}, [r0,:128], r2
|
||||||
|
vld1.8 {q9}, [r0,:128]
|
||||||
|
vrhadd.u8 q2, q2, q8
|
||||||
|
vrhadd.u8 q3, q3, q9
|
||||||
|
sub r0, r0, r2
|
||||||
|
.endif
|
||||||
|
vst1.8 {q2}, [r0,:128], r2
|
||||||
|
vst1.8 {q3}, [r0,:128], r2
|
||||||
|
|
||||||
|
bx lr
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro pixels16_xy2 rnd=1, avg=0
|
||||||
|
sub r3, r3, #2
|
||||||
|
vld1.8 {d0-d2}, [r1], r2
|
||||||
|
vld1.8 {d4-d6}, [r1], r2
|
||||||
|
NRND vmov.i16 q13, #1
|
||||||
|
pld [r1]
|
||||||
|
pld [r1, r2]
|
||||||
|
vext.8 q1, q0, q1, #1
|
||||||
|
vext.8 q3, q2, q3, #1
|
||||||
|
vaddl.u8 q8, d0, d2
|
||||||
|
vaddl.u8 q10, d1, d3
|
||||||
|
vaddl.u8 q9, d4, d6
|
||||||
|
vaddl.u8 q11, d5, d7
|
||||||
|
1: subs r3, r3, #2
|
||||||
|
vld1.8 {d0-d2}, [r1], r2
|
||||||
|
vadd.u16 q12, q8, q9
|
||||||
|
pld [r1]
|
||||||
|
NRND vadd.u16 q12, q12, q13
|
||||||
|
vext.8 q15, q0, q1, #1
|
||||||
|
vadd.u16 q1 , q10, q11
|
||||||
|
shrn d28, q12, #2
|
||||||
|
NRND vadd.u16 q1, q1, q13
|
||||||
|
shrn d29, q1, #2
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {q8}, [r0,:128]
|
||||||
|
vrhadd.u8 q14, q14, q8
|
||||||
|
.endif
|
||||||
|
vaddl.u8 q8, d0, d30
|
||||||
|
vld1.8 {d2-d4}, [r1], r2
|
||||||
|
vaddl.u8 q10, d1, d31
|
||||||
|
vst1.8 {q14}, [r0,:128], r2
|
||||||
|
vadd.u16 q12, q8, q9
|
||||||
|
pld [r1, r2]
|
||||||
|
NRND vadd.u16 q12, q12, q13
|
||||||
|
vext.8 q2, q1, q2, #1
|
||||||
|
vadd.u16 q0, q10, q11
|
||||||
|
shrn d30, q12, #2
|
||||||
|
NRND vadd.u16 q0, q0, q13
|
||||||
|
shrn d31, q0, #2
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {q9}, [r0,:128]
|
||||||
|
vrhadd.u8 q15, q15, q9
|
||||||
|
.endif
|
||||||
|
vaddl.u8 q9, d2, d4
|
||||||
|
vaddl.u8 q11, d3, d5
|
||||||
|
vst1.8 {q15}, [r0,:128], r2
|
||||||
|
bgt 1b
|
||||||
|
|
||||||
|
vld1.8 {d0-d2}, [r1], r2
|
||||||
|
vadd.u16 q12, q8, q9
|
||||||
|
NRND vadd.u16 q12, q12, q13
|
||||||
|
vext.8 q15, q0, q1, #1
|
||||||
|
vadd.u16 q1 , q10, q11
|
||||||
|
shrn d28, q12, #2
|
||||||
|
NRND vadd.u16 q1, q1, q13
|
||||||
|
shrn d29, q1, #2
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {q8}, [r0,:128]
|
||||||
|
vrhadd.u8 q14, q14, q8
|
||||||
|
.endif
|
||||||
|
vaddl.u8 q8, d0, d30
|
||||||
|
vaddl.u8 q10, d1, d31
|
||||||
|
vst1.8 {q14}, [r0,:128], r2
|
||||||
|
vadd.u16 q12, q8, q9
|
||||||
|
NRND vadd.u16 q12, q12, q13
|
||||||
|
vadd.u16 q0, q10, q11
|
||||||
|
shrn d30, q12, #2
|
||||||
|
NRND vadd.u16 q0, q0, q13
|
||||||
|
shrn d31, q0, #2
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {q9}, [r0,:128]
|
||||||
|
vrhadd.u8 q15, q15, q9
|
||||||
|
.endif
|
||||||
|
vst1.8 {q15}, [r0,:128], r2
|
||||||
|
|
||||||
|
bx lr
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro pixels8 rnd=1, avg=0
|
||||||
|
1: vld1.8 {d0}, [r1], r2
|
||||||
|
vld1.8 {d1}, [r1], r2
|
||||||
|
vld1.8 {d2}, [r1], r2
|
||||||
|
pld [r1, r2, lsl #2]
|
||||||
|
vld1.8 {d3}, [r1], r2
|
||||||
|
pld [r1]
|
||||||
|
pld [r1, r2]
|
||||||
|
pld [r1, r2, lsl #1]
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {d4}, [r0,:64], r2
|
||||||
|
vrhadd.u8 d0, d0, d4
|
||||||
|
vld1.8 {d5}, [r0,:64], r2
|
||||||
|
vrhadd.u8 d1, d1, d5
|
||||||
|
vld1.8 {d6}, [r0,:64], r2
|
||||||
|
vrhadd.u8 d2, d2, d6
|
||||||
|
vld1.8 {d7}, [r0,:64], r2
|
||||||
|
vrhadd.u8 d3, d3, d7
|
||||||
|
sub r0, r0, r2, lsl #2
|
||||||
|
.endif
|
||||||
|
subs r3, r3, #4
|
||||||
|
vst1.8 {d0}, [r0,:64], r2
|
||||||
|
vst1.8 {d1}, [r0,:64], r2
|
||||||
|
vst1.8 {d2}, [r0,:64], r2
|
||||||
|
vst1.8 {d3}, [r0,:64], r2
|
||||||
|
bne 1b
|
||||||
|
bx lr
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro pixels8_x2 rnd=1, avg=0
|
||||||
|
1: vld1.8 {q0}, [r1], r2
|
||||||
|
vext.8 d1, d0, d1, #1
|
||||||
|
vld1.8 {q1}, [r1], r2
|
||||||
|
vext.8 d3, d2, d3, #1
|
||||||
|
pld [r1]
|
||||||
|
pld [r1, r2]
|
||||||
|
subs r3, r3, #2
|
||||||
|
vswp d1, d2
|
||||||
|
avg q0, q0, q1
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {d4}, [r0,:64], r2
|
||||||
|
vld1.8 {d5}, [r0,:64]
|
||||||
|
vrhadd.u8 q0, q0, q2
|
||||||
|
sub r0, r0, r2
|
||||||
|
.endif
|
||||||
|
vst1.8 {d0}, [r0,:64], r2
|
||||||
|
vst1.8 {d1}, [r0,:64], r2
|
||||||
|
bne 1b
|
||||||
|
bx lr
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro pixels8_y2 rnd=1, avg=0
|
||||||
|
sub r3, r3, #2
|
||||||
|
vld1.8 {d0}, [r1], r2
|
||||||
|
vld1.8 {d1}, [r1], r2
|
||||||
|
1: subs r3, r3, #2
|
||||||
|
avg d4, d0, d1
|
||||||
|
vld1.8 {d0}, [r1], r2
|
||||||
|
avg d5, d0, d1
|
||||||
|
vld1.8 {d1}, [r1], r2
|
||||||
|
pld [r1]
|
||||||
|
pld [r1, r2]
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {d2}, [r0,:64], r2
|
||||||
|
vld1.8 {d3}, [r0,:64]
|
||||||
|
vrhadd.u8 q2, q2, q1
|
||||||
|
sub r0, r0, r2
|
||||||
|
.endif
|
||||||
|
vst1.8 {d4}, [r0,:64], r2
|
||||||
|
vst1.8 {d5}, [r0,:64], r2
|
||||||
|
bne 1b
|
||||||
|
|
||||||
|
avg d4, d0, d1
|
||||||
|
vld1.8 {d0}, [r1], r2
|
||||||
|
avg d5, d0, d1
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {d2}, [r0,:64], r2
|
||||||
|
vld1.8 {d3}, [r0,:64]
|
||||||
|
vrhadd.u8 q2, q2, q1
|
||||||
|
sub r0, r0, r2
|
||||||
|
.endif
|
||||||
|
vst1.8 {d4}, [r0,:64], r2
|
||||||
|
vst1.8 {d5}, [r0,:64], r2
|
||||||
|
|
||||||
|
bx lr
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro pixels8_xy2 rnd=1, avg=0
|
||||||
|
sub r3, r3, #2
|
||||||
|
vld1.8 {q0}, [r1], r2
|
||||||
|
vld1.8 {q1}, [r1], r2
|
||||||
|
NRND vmov.i16 q11, #1
|
||||||
|
pld [r1]
|
||||||
|
pld [r1, r2]
|
||||||
|
vext.8 d4, d0, d1, #1
|
||||||
|
vext.8 d6, d2, d3, #1
|
||||||
|
vaddl.u8 q8, d0, d4
|
||||||
|
vaddl.u8 q9, d2, d6
|
||||||
|
1: subs r3, r3, #2
|
||||||
|
vld1.8 {q0}, [r1], r2
|
||||||
|
pld [r1]
|
||||||
|
vadd.u16 q10, q8, q9
|
||||||
|
vext.8 d4, d0, d1, #1
|
||||||
|
NRND vadd.u16 q10, q10, q11
|
||||||
|
vaddl.u8 q8, d0, d4
|
||||||
|
shrn d5, q10, #2
|
||||||
|
vld1.8 {q1}, [r1], r2
|
||||||
|
vadd.u16 q10, q8, q9
|
||||||
|
pld [r1, r2]
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {d7}, [r0,:64]
|
||||||
|
vrhadd.u8 d5, d5, d7
|
||||||
|
.endif
|
||||||
|
NRND vadd.u16 q10, q10, q11
|
||||||
|
vst1.8 {d5}, [r0,:64], r2
|
||||||
|
shrn d7, q10, #2
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {d5}, [r0,:64]
|
||||||
|
vrhadd.u8 d7, d7, d5
|
||||||
|
.endif
|
||||||
|
vext.8 d6, d2, d3, #1
|
||||||
|
vaddl.u8 q9, d2, d6
|
||||||
|
vst1.8 {d7}, [r0,:64], r2
|
||||||
|
bgt 1b
|
||||||
|
|
||||||
|
vld1.8 {q0}, [r1], r2
|
||||||
|
vadd.u16 q10, q8, q9
|
||||||
|
vext.8 d4, d0, d1, #1
|
||||||
|
NRND vadd.u16 q10, q10, q11
|
||||||
|
vaddl.u8 q8, d0, d4
|
||||||
|
shrn d5, q10, #2
|
||||||
|
vadd.u16 q10, q8, q9
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {d7}, [r0,:64]
|
||||||
|
vrhadd.u8 d5, d5, d7
|
||||||
|
.endif
|
||||||
|
NRND vadd.u16 q10, q10, q11
|
||||||
|
vst1.8 {d5}, [r0,:64], r2
|
||||||
|
shrn d7, q10, #2
|
||||||
|
.if \avg
|
||||||
|
vld1.8 {d5}, [r0,:64]
|
||||||
|
vrhadd.u8 d7, d7, d5
|
||||||
|
.endif
|
||||||
|
vst1.8 {d7}, [r0,:64], r2
|
||||||
|
|
||||||
|
bx lr
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro pixfunc pfx, name, suf, rnd=1, avg=0
|
||||||
|
.if \rnd
|
||||||
|
.macro avg rd, rn, rm
|
||||||
|
vrhadd.u8 \rd, \rn, \rm
|
||||||
|
.endm
|
||||||
|
.macro shrn rd, rn, rm
|
||||||
|
vrshrn.u16 \rd, \rn, \rm
|
||||||
|
.endm
|
||||||
|
.macro NRND insn:vararg
|
||||||
|
.endm
|
||||||
|
.else
|
||||||
|
.macro avg rd, rn, rm
|
||||||
|
vhadd.u8 \rd, \rn, \rm
|
||||||
|
.endm
|
||||||
|
.macro shrn rd, rn, rm
|
||||||
|
vshrn.u16 \rd, \rn, \rm
|
||||||
|
.endm
|
||||||
|
.macro NRND insn:vararg
|
||||||
|
\insn
|
||||||
|
.endm
|
||||||
|
.endif
|
||||||
|
function ff_\pfx\name\suf\()_neon, export=1
|
||||||
|
\name \rnd, \avg
|
||||||
|
endfunc
|
||||||
|
.purgem avg
|
||||||
|
.purgem shrn
|
||||||
|
.purgem NRND
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro pixfunc2 pfx, name, avg=0
|
||||||
|
pixfunc \pfx, \name, rnd=1, avg=\avg
|
||||||
|
pixfunc \pfx, \name, _no_rnd, rnd=0, avg=\avg
|
||||||
|
.endm
|
||||||
|
|
||||||
|
function ff_put_h264_qpel16_mc00_neon, export=1
|
||||||
|
mov r3, #16
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
pixfunc put_, pixels16, avg=0
|
||||||
|
pixfunc2 put_, pixels16_x2, avg=0
|
||||||
|
pixfunc2 put_, pixels16_y2, avg=0
|
||||||
|
pixfunc2 put_, pixels16_xy2, avg=0
|
||||||
|
|
||||||
|
function ff_avg_h264_qpel16_mc00_neon, export=1
|
||||||
|
mov r3, #16
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
pixfunc avg_, pixels16, avg=1
|
||||||
|
pixfunc2 avg_, pixels16_x2, avg=1
|
||||||
|
pixfunc2 avg_, pixels16_y2, avg=1
|
||||||
|
pixfunc2 avg_, pixels16_xy2, avg=1
|
||||||
|
|
||||||
|
function ff_put_h264_qpel8_mc00_neon, export=1
|
||||||
|
mov r3, #8
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
pixfunc put_, pixels8, avg=0
|
||||||
|
pixfunc2 put_, pixels8_x2, avg=0
|
||||||
|
pixfunc2 put_, pixels8_y2, avg=0
|
||||||
|
pixfunc2 put_, pixels8_xy2, avg=0
|
||||||
|
|
||||||
|
function ff_avg_h264_qpel8_mc00_neon, export=1
|
||||||
|
mov r3, #8
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
pixfunc avg_, pixels8, avg=1
|
||||||
|
pixfunc avg_, pixels8_x2, avg=1
|
||||||
|
pixfunc avg_, pixels8_y2, avg=1
|
||||||
|
pixfunc avg_, pixels8_xy2, avg=1
|
@ -54,6 +54,8 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
|
|||||||
hpel_funcs(avg, [3], 2);
|
hpel_funcs(avg, [3], 2);
|
||||||
hpel_funcs(avg_no_rnd,, 16);
|
hpel_funcs(avg_no_rnd,, 16);
|
||||||
|
|
||||||
|
if (ARCH_ARM)
|
||||||
|
ff_hpeldsp_init_arm(c, flags);
|
||||||
if (ARCH_PPC)
|
if (ARCH_PPC)
|
||||||
ff_hpeldsp_init_ppc(c, flags);
|
ff_hpeldsp_init_ppc(c, flags);
|
||||||
if (ARCH_X86)
|
if (ARCH_X86)
|
||||||
|
@ -94,6 +94,7 @@ typedef struct HpelDSPContext {
|
|||||||
|
|
||||||
void ff_hpeldsp_init(HpelDSPContext *c, int flags);
|
void ff_hpeldsp_init(HpelDSPContext *c, int flags);
|
||||||
|
|
||||||
|
void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags);
|
||||||
void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags);
|
void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags);
|
||||||
void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags);
|
void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user