mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-21 10:55:51 +02:00
dca: ARMv6 optimised decode_blockcode()
This is a hand-tuned version of the code with impossible parts of the FASTDIV function ommitted. 2-5% faster overall on Cortex-A8. Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
parent
035af998ad
commit
00a856e3f9
@ -23,6 +23,61 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
#include "libavutil/intmath.h"
|
||||
|
||||
#if HAVE_ARMV6 && HAVE_INLINE_ASM
|
||||
|
||||
#define decode_blockcodes decode_blockcodes
|
||||
static inline int decode_blockcodes(int code1, int code2, int levels,
|
||||
int *values)
|
||||
{
|
||||
int v0, v1, v2, v3, v4, v5;
|
||||
|
||||
__asm__ ("smmul %8, %14, %18 \n"
|
||||
"smmul %11, %15, %18 \n"
|
||||
"smlabb %14, %8, %17, %14 \n"
|
||||
"smlabb %15, %11, %17, %15 \n"
|
||||
"smmul %9, %8, %18 \n"
|
||||
"smmul %12, %11, %18 \n"
|
||||
"sub %14, %14, %16, lsr #1 \n"
|
||||
"sub %15, %15, %16, lsr #1 \n"
|
||||
"smlabb %8, %9, %17, %8 \n"
|
||||
"smlabb %11, %12, %17, %11 \n"
|
||||
"smmul %10, %9, %18 \n"
|
||||
"smmul %13, %12, %18 \n"
|
||||
"str %14, %0 \n"
|
||||
"str %15, %4 \n"
|
||||
"sub %8, %8, %16, lsr #1 \n"
|
||||
"sub %11, %11, %16, lsr #1 \n"
|
||||
"smlabb %9, %10, %17, %9 \n"
|
||||
"smlabb %12, %13, %17, %12 \n"
|
||||
"smmul %14, %10, %18 \n"
|
||||
"smmul %15, %13, %18 \n"
|
||||
"str %8, %1 \n"
|
||||
"str %11, %5 \n"
|
||||
"sub %9, %9, %16, lsr #1 \n"
|
||||
"sub %12, %12, %16, lsr #1 \n"
|
||||
"smlabb %10, %14, %17, %10 \n"
|
||||
"smlabb %13, %15, %17, %13 \n"
|
||||
"str %9, %2 \n"
|
||||
"str %12, %6 \n"
|
||||
"sub %10, %10, %16, lsr #1 \n"
|
||||
"sub %13, %13, %16, lsr #1 \n"
|
||||
"str %10, %3 \n"
|
||||
"str %13, %7 \n"
|
||||
: "=m"(values[0]), "=m"(values[1]),
|
||||
"=m"(values[2]), "=m"(values[3]),
|
||||
"=m"(values[4]), "=m"(values[5]),
|
||||
"=m"(values[6]), "=m"(values[7]),
|
||||
"=&r"(v0), "=&r"(v1), "=&r"(v2),
|
||||
"=&r"(v3), "=&r"(v4), "=&r"(v5),
|
||||
"+&r"(code1), "+&r"(code2)
|
||||
: "r"(levels - 1), "r"(-levels), "r"(ff_inverse[levels]));
|
||||
|
||||
return code1 | code2;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON && HAVE_INLINE_ASM && HAVE_ASM_MOD_Y
|
||||
|
||||
|
@ -1038,6 +1038,7 @@ static void dca_downmix(float *samples, int srcfmt,
|
||||
}
|
||||
|
||||
|
||||
#ifndef decode_blockcodes
|
||||
/* Very compact version of the block code decoder that does not use table
|
||||
* look-up but is slightly slower */
|
||||
static int decode_blockcode(int code, int levels, int *values)
|
||||
@ -1051,14 +1052,16 @@ static int decode_blockcode(int code, int levels, int *values)
|
||||
code = div;
|
||||
}
|
||||
|
||||
if (code == 0)
|
||||
return 0;
|
||||
else {
|
||||
av_log(NULL, AV_LOG_ERROR, "ERROR: block code look-up failed\n");
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
static int decode_blockcodes(int code1, int code2, int levels, int *values)
|
||||
{
|
||||
return decode_blockcode(code1, levels, values) |
|
||||
decode_blockcode(code2, levels, values + 4);
|
||||
}
|
||||
#endif
|
||||
|
||||
static const uint8_t abits_sizes[7] = { 7, 10, 12, 13, 15, 17, 19 };
|
||||
static const uint8_t abits_levels[7] = { 3, 5, 7, 9, 13, 17, 25 };
|
||||
|
||||
@ -1125,16 +1128,20 @@ static int dca_subsubframe(DCAContext * s, int base_channel, int block_index)
|
||||
if (abits >= 11 || !dca_smpl_bitalloc[abits].vlc[sel].table){
|
||||
if (abits <= 7){
|
||||
/* Block code */
|
||||
int block_code1, block_code2, size, levels;
|
||||
int block_code1, block_code2, size, levels, err;
|
||||
|
||||
size = abits_sizes[abits-1];
|
||||
levels = abits_levels[abits-1];
|
||||
|
||||
block_code1 = get_bits(&s->gb, size);
|
||||
/* FIXME Should test return value */
|
||||
decode_blockcode(block_code1, levels, block);
|
||||
block_code2 = get_bits(&s->gb, size);
|
||||
decode_blockcode(block_code2, levels, &block[4]);
|
||||
err = decode_blockcodes(block_code1, block_code2,
|
||||
levels, block);
|
||||
if (err) {
|
||||
av_log(s->avctx, AV_LOG_ERROR,
|
||||
"ERROR: block code look-up failed\n");
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
}else{
|
||||
/* no coding */
|
||||
for (m = 0; m < 8; m++)
|
||||
|
Loading…
Reference in New Issue
Block a user