mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
VP8: ARM optimised decode_block_coeffs_internal
Approximately 5% faster on Cortex-A8. Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
parent
7da48fd011
commit
a7878c9f73
2
Makefile
2
Makefile
@ -66,7 +66,7 @@ config.h: .config
|
||||
|
||||
SUBDIR_VARS := OBJS FFLIBS CLEANFILES DIRS TESTPROGS EXAMPLES SKIPHEADERS \
|
||||
ALTIVEC-OBJS MMX-OBJS NEON-OBJS X86-OBJS YASM-OBJS-FFT YASM-OBJS \
|
||||
HOSTPROGS BUILT_HEADERS TESTOBJS ARCH_HEADERS
|
||||
HOSTPROGS BUILT_HEADERS TESTOBJS ARCH_HEADERS ARMV6-OBJS
|
||||
|
||||
define RESET
|
||||
$(1) :=
|
||||
|
@ -3,6 +3,7 @@ OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o \
|
||||
OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o
|
||||
OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o
|
||||
OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_arm.o
|
||||
ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o
|
||||
|
||||
OBJS-$(CONFIG_H264DSP) += arm/h264dsp_init_arm.o
|
||||
OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o
|
||||
@ -23,6 +24,7 @@ OBJS-$(HAVE_ARMV5TE) += arm/dsputil_init_armv5te.o \
|
||||
OBJS-$(HAVE_ARMV6) += arm/dsputil_init_armv6.o \
|
||||
arm/dsputil_armv6.o \
|
||||
arm/simple_idct_armv6.o \
|
||||
$(ARMV6-OBJS-yes)
|
||||
|
||||
VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o \
|
||||
|
||||
|
29
libavcodec/arm/vp8.h
Normal file
29
libavcodec/arm/vp8.h
Normal file
@ -0,0 +1,29 @@
|
||||
/**
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVCODEC_ARM_VP8_H
|
||||
#define AVCODEC_ARM_VP8_H
|
||||
|
||||
#if HAVE_ARMV6
|
||||
#define decode_block_coeffs_internal ff_decode_block_coeffs_armv6
|
||||
int ff_decode_block_coeffs_armv6(VP56RangeCoder *rc, DCTELEM block[16],
|
||||
uint8_t probs[8][3][NUM_DCT_TOKENS-1],
|
||||
int i, uint8_t *token_prob, int16_t qmul[2]);
|
||||
#endif
|
||||
|
||||
#endif
|
220
libavcodec/arm/vp8_armv6.S
Normal file
220
libavcodec/arm/vp8_armv6.S
Normal file
@ -0,0 +1,220 @@
|
||||
/**
|
||||
* Copyright (C) 2010 Mans Rullgard
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "asm.S"
|
||||
|
||||
.syntax unified
|
||||
|
||||
.macro rac_get_prob h, bs, buf, cw, pr, t0, t1
|
||||
adds \bs, \bs, \t0
|
||||
lsl \cw, \cw, \t0
|
||||
lsl \t0, \h, \t0
|
||||
rsb \h, \pr, #256
|
||||
ldrhcs \t1, [\buf], #2
|
||||
smlabb \h, \t0, \pr, \h
|
||||
rev16cs \t1, \t1
|
||||
orrcs \cw, \cw, \t1, lsl \bs
|
||||
subcs \bs, \bs, #16
|
||||
lsr \h, \h, #8
|
||||
cmp \cw, \h, lsl #16
|
||||
subge \cw, \cw, \h, lsl #16
|
||||
subge \h, \t0, \h
|
||||
.endm
|
||||
|
||||
.macro rac_get_128 h, bs, buf, cw, t0, t1
|
||||
adds \bs, \bs, \t0
|
||||
lsl \cw, \cw, \t0
|
||||
lsl \t0, \h, \t0
|
||||
ldrhcs \t1, [\buf], #2
|
||||
mov \h, #128
|
||||
rev16cs \t1, \t1
|
||||
add \h, \h, \t0, lsl #7
|
||||
orrcs \cw, \cw, \t1, lsl \bs
|
||||
subcs \bs, \bs, #16
|
||||
lsr \h, \h, #8
|
||||
cmp \cw, \h, lsl #16
|
||||
subge \cw, \cw, \h, lsl #16
|
||||
subge \h, \t0, \h
|
||||
.endm
|
||||
|
||||
function ff_decode_block_coeffs_armv6, export=1
|
||||
push {r0,r1,r4-r11,lr}
|
||||
movrel lr, ff_vp56_norm_shift
|
||||
ldrd r4, r5, [sp, #44] @ token_prob, qmul
|
||||
cmp r3, #0
|
||||
ldr r11, [r5]
|
||||
ldm r0, {r5-r7} @ high, bits, buf
|
||||
pkhtbne r11, r11, r11, asr #16
|
||||
ldr r8, [r0, #16] @ code_word
|
||||
0:
|
||||
ldrb r9, [lr, r5]
|
||||
add r3, r3, #1
|
||||
ldrb r0, [r4, #1]
|
||||
rac_get_prob r5, r6, r7, r8, r0, r9, r10
|
||||
blt 2f
|
||||
|
||||
ldrb r9, [lr, r5]
|
||||
ldrb r0, [r4, #2]
|
||||
rac_get_prob r5, r6, r7, r8, r0, r9, r10
|
||||
ldrb r9, [lr, r5]
|
||||
bge 3f
|
||||
|
||||
add r4, r3, r3, lsl #5
|
||||
sxth r12, r11
|
||||
add r4, r2, r4
|
||||
adds r6, r6, r9
|
||||
add r4, r4, #11
|
||||
lsl r8, r8, r9
|
||||
ldrhcs r10, [r7], #2
|
||||
lsl r9, r5, r9
|
||||
mov r5, #128
|
||||
rev16cs r10, r10
|
||||
add r5, r5, r9, lsl #7
|
||||
orrcs r8, r8, r10, lsl r6
|
||||
subcs r6, r6, #16
|
||||
lsr r5, r5, #8
|
||||
cmp r8, r5, lsl #16
|
||||
movrel r10, zigzag_scan-1
|
||||
subge r8, r8, r5, lsl #16
|
||||
subge r5, r9, r5
|
||||
ldrb r10, [r10, r3]
|
||||
rsbge r12, r12, #0
|
||||
cmp r3, #16
|
||||
strh r12, [r1, r10]
|
||||
bge 6f
|
||||
5:
|
||||
ldrb r9, [lr, r5]
|
||||
ldrb r0, [r4]
|
||||
rac_get_prob r5, r6, r7, r8, r0, r9, r10
|
||||
pkhtb r11, r11, r11, asr #16
|
||||
bge 0b
|
||||
|
||||
6:
|
||||
ldr r0, [sp]
|
||||
ldr r9, [r0, #12]
|
||||
cmp r7, r9
|
||||
movhi r7, r9
|
||||
stm r0, {r5-r7} @ high, bits, buf
|
||||
str r8, [r0, #16] @ code_word
|
||||
|
||||
add sp, sp, #8
|
||||
mov r0, r3
|
||||
pop {r4-r11,pc}
|
||||
2:
|
||||
add r4, r3, r3, lsl #5
|
||||
cmp r3, #16
|
||||
add r4, r2, r4
|
||||
pkhtb r11, r11, r11, asr #16
|
||||
bne 0b
|
||||
b 6b
|
||||
3:
|
||||
ldrb r0, [r4, #3]
|
||||
rac_get_prob r5, r6, r7, r8, r0, r9, r10
|
||||
ldrb r9, [lr, r5]
|
||||
bge 1f
|
||||
|
||||
mov r12, #2
|
||||
ldrb r0, [r4, #4]
|
||||
rac_get_prob r5, r6, r7, r8, r0, r9, r10
|
||||
addge r12, #1
|
||||
ldrb r9, [lr, r5]
|
||||
blt 4f
|
||||
ldrb r0, [r4, #5]
|
||||
rac_get_prob r5, r6, r7, r8, r0, r9, r10
|
||||
addge r12, #1
|
||||
ldrb r9, [lr, r5]
|
||||
b 4f
|
||||
1:
|
||||
ldrb r0, [r4, #6]
|
||||
rac_get_prob r5, r6, r7, r8, r0, r9, r10
|
||||
ldrb r9, [lr, r5]
|
||||
bge 3f
|
||||
|
||||
ldrb r0, [r4, #7]
|
||||
rac_get_prob r5, r6, r7, r8, r0, r9, r10
|
||||
ldrb r9, [lr, r5]
|
||||
bge 2f
|
||||
|
||||
mov r12, #5
|
||||
mov r0, #159
|
||||
rac_get_prob r5, r6, r7, r8, r0, r9, r10
|
||||
addge r12, r12, #1
|
||||
ldrb r9, [lr, r5]
|
||||
b 4f
|
||||
2:
|
||||
mov r12, #7
|
||||
mov r0, #165
|
||||
rac_get_prob r5, r6, r7, r8, r0, r9, r10
|
||||
addge r12, r12, #2
|
||||
ldrb r9, [lr, r5]
|
||||
mov r0, #145
|
||||
rac_get_prob r5, r6, r7, r8, r0, r9, r10
|
||||
addge r12, r12, #1
|
||||
ldrb r9, [lr, r5]
|
||||
b 4f
|
||||
3:
|
||||
ldrb r0, [r4, #8]
|
||||
rac_get_prob r5, r6, r7, r8, r0, r9, r10
|
||||
addge r4, r4, #1
|
||||
ldrb r9, [lr, r5]
|
||||
movge r12, #2
|
||||
movlt r12, #0
|
||||
ldrb r0, [r4, #9]
|
||||
rac_get_prob r5, r6, r7, r8, r0, r9, r10
|
||||
mov r9, #8
|
||||
addge r12, r12, #1
|
||||
movrel r4, ff_vp8_dct_cat_prob
|
||||
lsl r9, r9, r12
|
||||
ldr r4, [r4, r12, lsl #2]
|
||||
add r12, r9, #3
|
||||
mov r1, #0
|
||||
ldrb r0, [r4], #1
|
||||
1:
|
||||
ldrb r9, [lr, r5]
|
||||
lsl r1, r1, #1
|
||||
rac_get_prob r5, r6, r7, r8, r0, r9, r10
|
||||
ldrb r0, [r4], #1
|
||||
addge r1, r1, #1
|
||||
cmp r0, #0
|
||||
bne 1b
|
||||
ldrb r9, [lr, r5]
|
||||
add r12, r12, r1
|
||||
ldr r1, [sp, #4]
|
||||
4:
|
||||
add r4, r3, r3, lsl #5
|
||||
add r4, r2, r4
|
||||
add r4, r4, #22
|
||||
rac_get_128 r5, r6, r7, r8, r9, r10
|
||||
rsbge r12, r12, #0
|
||||
smulbb r12, r12, r11
|
||||
movrel r9, zigzag_scan-1
|
||||
ldrb r9, [r9, r3]
|
||||
cmp r3, #16
|
||||
strh r12, [r1, r9]
|
||||
bge 6b
|
||||
b 5b
|
||||
endfunc
|
||||
|
||||
.section .rodata
|
||||
zigzag_scan:
|
||||
.byte 0, 2, 8, 16
|
||||
.byte 10, 4, 6, 12
|
||||
.byte 18, 24, 26, 20
|
||||
.byte 14, 22, 28, 30
|
@ -30,6 +30,10 @@
|
||||
#include "h264pred.h"
|
||||
#include "rectangle.h"
|
||||
|
||||
#if ARCH_ARM
|
||||
# include "arm/vp8.h"
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint8_t filter_level;
|
||||
uint8_t inner_limit;
|
||||
@ -801,6 +805,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef decode_block_coeffs_internal
|
||||
/**
|
||||
* @param c arithmetic bitstream reader context
|
||||
* @param block destination for block coefficients
|
||||
@ -854,7 +859,7 @@ skip_eob:
|
||||
int b = vp56_rac_get_prob(c, token_prob[9+a]);
|
||||
int cat = (a<<1) + b;
|
||||
coeff = 3 + (8<<cat);
|
||||
coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]);
|
||||
coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
|
||||
}
|
||||
}
|
||||
token_prob = probs[i+1][2];
|
||||
@ -864,6 +869,7 @@ skip_eob:
|
||||
|
||||
return i;
|
||||
}
|
||||
#endif
|
||||
|
||||
static av_always_inline
|
||||
int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
|
||||
|
@ -313,7 +313,7 @@ static const uint8_t vp8_dct_cat5_prob[] = { 180, 157, 141, 134, 130, 0 };
|
||||
static const uint8_t vp8_dct_cat6_prob[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
|
||||
|
||||
// only used for cat3 and above; cat 1 and 2 are referenced directly
|
||||
static const uint8_t * const vp8_dct_cat_prob[] =
|
||||
const uint8_t * const ff_vp8_dct_cat_prob[] =
|
||||
{
|
||||
vp8_dct_cat3_prob,
|
||||
vp8_dct_cat4_prob,
|
||||
|
Loading…
Reference in New Issue
Block a user