Inline asm for VP56 arith coder

This is a lot more reliable to get cmov rather than trying to trick gcc into generating it, useful since it's 2% faster overall. Patch by Eli Friedman <eli.friedman at gmail> Originally committed as revision 24471 to svn://svn.ffmpeg.org/ffmpeg/trunk
2024-12-23 12:43:46 +02:00 · 2010-07-23 21:46:30 +00:00 · 2010-07-23 21:46:30 +00:00 · 3611e7a309
commit 3611e7a309
parent ca18a478e3
2 changed files with 61 additions and 5 deletions
--- a/libavcodec/vp56.h
+++ b/libavcodec/vp56.h
@ -208,23 +208,25 @@ static av_always_inline unsigned int vp56_rac_renorm(VP56RangeCoder *c)
    return code_word;
 }
 #if ARCH_X86
 #include "x86/vp56_arith.h"
 #endif
 #ifndef vp56_rac_get_prob
 #define vp56_rac_get_prob vp56_rac_get_prob
 static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
 {
    /* Don't put c->high in a local variable; if we do that, gcc gets
     * the stupids and turns the code below into a branch again. */
    unsigned int code_word = vp56_rac_renorm(c);
    unsigned int low = 1 + (((c->high - 1) * prob) >> 8);
    unsigned int low_shift = low << 8;
    int bit = code_word >= low_shift;
    /* Incantation to convince GCC to turn these into conditional moves
     * instead of branches -- faster, as this branch is basically
     * unpredictable. */
    c->high = bit ? c->high - low : low;
    c->code_word = bit ? code_word - low_shift : code_word;
    return bit;
 }
 #endif
 // branchy variant, to be used where there's a branch based on the bit decoded
 static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
--- a/libavcodec/x86/vp56_arith.h
+++ b/libavcodec/x86/vp56_arith.h
@ -0,0 +1,54 @@
 /**
 * VP5 and VP6 compatible video decoder (arith decoder)
 *
 * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
 * Copyright (C) 2010  Eli Friedman
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 #ifndef AVCODEC_X86_VP56_ARITH_H
 #define AVCODEC_X86_VP56_ARITH_H
 #if HAVE_FAST_CMOV
 #define vp56_rac_get_prob vp56_rac_get_prob
 static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
 {
    unsigned int code_word = vp56_rac_renorm(c);
    unsigned int high = c->high;
    unsigned int low = 1 + (((high - 1) * prob) >> 8);
    unsigned int low_shift = low << 8;
    int bit = 0;
    __asm__(
        "subl  %4, %1      \n\t"
        "subl  %3, %2      \n\t"
        "leal (%2, %3), %3 \n\t"
        "setae %b0         \n\t"
        "cmovb %4, %1      \n\t"
        "cmovb %3, %2      \n\t"
        : "+q"(bit), "+r"(high), "+r"(code_word), "+r"(low_shift)
        : "r"(low)
    );
    c->high      = high;
    c->code_word = code_word;
    return bit;
 }
 #endif
 #endif /* AVCODEC_X86_VP56_ARITH_H */