mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
Inline asm for VP56 arith coder
This is a lot more reliable to get cmov rather than trying to trick gcc into generating it, useful since it's 2% faster overall. Patch by Eli Friedman <eli.friedman at gmail> Originally committed as revision 24471 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
ca18a478e3
commit
3611e7a309
@ -208,23 +208,25 @@ static av_always_inline unsigned int vp56_rac_renorm(VP56RangeCoder *c)
|
|||||||
return code_word;
|
return code_word;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if ARCH_X86
|
||||||
|
#include "x86/vp56_arith.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef vp56_rac_get_prob
|
||||||
|
#define vp56_rac_get_prob vp56_rac_get_prob
|
||||||
static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
|
static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
|
||||||
{
|
{
|
||||||
/* Don't put c->high in a local variable; if we do that, gcc gets
|
|
||||||
* the stupids and turns the code below into a branch again. */
|
|
||||||
unsigned int code_word = vp56_rac_renorm(c);
|
unsigned int code_word = vp56_rac_renorm(c);
|
||||||
unsigned int low = 1 + (((c->high - 1) * prob) >> 8);
|
unsigned int low = 1 + (((c->high - 1) * prob) >> 8);
|
||||||
unsigned int low_shift = low << 8;
|
unsigned int low_shift = low << 8;
|
||||||
int bit = code_word >= low_shift;
|
int bit = code_word >= low_shift;
|
||||||
|
|
||||||
/* Incantation to convince GCC to turn these into conditional moves
|
|
||||||
* instead of branches -- faster, as this branch is basically
|
|
||||||
* unpredictable. */
|
|
||||||
c->high = bit ? c->high - low : low;
|
c->high = bit ? c->high - low : low;
|
||||||
c->code_word = bit ? code_word - low_shift : code_word;
|
c->code_word = bit ? code_word - low_shift : code_word;
|
||||||
|
|
||||||
return bit;
|
return bit;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// branchy variant, to be used where there's a branch based on the bit decoded
|
// branchy variant, to be used where there's a branch based on the bit decoded
|
||||||
static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
|
static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
|
||||||
|
54
libavcodec/x86/vp56_arith.h
Normal file
54
libavcodec/x86/vp56_arith.h
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
/**
|
||||||
|
* VP5 and VP6 compatible video decoder (arith decoder)
|
||||||
|
*
|
||||||
|
* Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
|
||||||
|
* Copyright (C) 2010 Eli Friedman
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVCODEC_X86_VP56_ARITH_H
|
||||||
|
#define AVCODEC_X86_VP56_ARITH_H
|
||||||
|
|
||||||
|
#if HAVE_FAST_CMOV
|
||||||
|
#define vp56_rac_get_prob vp56_rac_get_prob
|
||||||
|
static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
|
||||||
|
{
|
||||||
|
unsigned int code_word = vp56_rac_renorm(c);
|
||||||
|
unsigned int high = c->high;
|
||||||
|
unsigned int low = 1 + (((high - 1) * prob) >> 8);
|
||||||
|
unsigned int low_shift = low << 8;
|
||||||
|
int bit = 0;
|
||||||
|
|
||||||
|
__asm__(
|
||||||
|
"subl %4, %1 \n\t"
|
||||||
|
"subl %3, %2 \n\t"
|
||||||
|
"leal (%2, %3), %3 \n\t"
|
||||||
|
"setae %b0 \n\t"
|
||||||
|
"cmovb %4, %1 \n\t"
|
||||||
|
"cmovb %3, %2 \n\t"
|
||||||
|
: "+q"(bit), "+r"(high), "+r"(code_word), "+r"(low_shift)
|
||||||
|
: "r"(low)
|
||||||
|
);
|
||||||
|
|
||||||
|
c->high = high;
|
||||||
|
c->code_word = code_word;
|
||||||
|
return bit;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* AVCODEC_X86_VP56_ARITH_H */
|
Loading…
Reference in New Issue
Block a user