mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
182663a58a
While the inline cabac assembly has worked correctly in i386 builds historically, modern compiler updates has started showing issues with it, when the function gets inlined into larger contexts that fail to provide the amount of free registers as this function requires. This was an issue with Clang on Windows on i386, which was fixed in c6d284b945324a7bc70ea8b9056040c8148aa835. However, recently the same issues also have started showing up with GCC (both for Windows and Linux). Whether the issue appears seems dependent on a lot of optimizer tuning (e.g. the issue appears or goes away depenent on the combinaton of -march= and -mtune= options), potentially due to the compiler making different decisions on how much to inline. Fixes: https://trac.ffmpeg.org/ticket/8903 Signed-off-by: Martin Storsjö <martin@martin.st>
309 lines
14 KiB
C
309 lines
14 KiB
C
/*
|
|
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#ifndef AVCODEC_X86_CABAC_H
|
|
#define AVCODEC_X86_CABAC_H
|
|
|
|
#include <stddef.h>
|
|
|
|
#include "libavcodec/cabac.h"
|
|
#include "libavutil/attributes.h"
|
|
#include "libavutil/macros.h"
|
|
#include "libavutil/x86/asm.h"
|
|
#include "config.h"
|
|
|
|
#if (defined(__i386) && defined(__clang__) && (__clang_major__<2 || (__clang_major__==2 && __clang_minor__<10)))\
|
|
|| ( !defined(__clang__) && defined(__llvm__) && __GNUC__==4 && __GNUC_MINOR__==2 && __GNUC_PATCHLEVEL__<=1)\
|
|
|| (defined(__INTEL_COMPILER) && defined(_MSC_VER))
|
|
# define BROKEN_COMPILER 1
|
|
#else
|
|
# define BROKEN_COMPILER 0
|
|
#endif
|
|
|
|
#if HAVE_INLINE_ASM
|
|
|
|
#ifndef UNCHECKED_BITSTREAM_READER
|
|
#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
|
|
#endif
|
|
|
|
#if UNCHECKED_BITSTREAM_READER
|
|
#define END_CHECK(end) ""
|
|
#else
|
|
#define END_CHECK(end) \
|
|
"cmp "end" , %%"FF_REG_c" \n\t"\
|
|
"jge 1f \n\t"
|
|
#endif
|
|
|
|
#ifdef BROKEN_RELOCATIONS
|
|
#define TABLES_ARG , "r"(tables)
|
|
|
|
#if HAVE_FAST_CMOV
|
|
#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
|
|
"cmp "low" , "tmp" \n\t"\
|
|
"cmova %%ecx , "range" \n\t"\
|
|
"sbb %%rcx , %%rcx \n\t"\
|
|
"and %%ecx , "tmp" \n\t"\
|
|
"xor %%rcx , "retq" \n\t"\
|
|
"sub "tmp" , "low" \n\t"
|
|
#else /* HAVE_FAST_CMOV */
|
|
#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
|
|
/* P4 Prescott has crappy cmov,sbb,64-bit shift so avoid them */ \
|
|
"sub "low" , "tmp" \n\t"\
|
|
"sar $31 , "tmp" \n\t"\
|
|
"sub %%ecx , "range" \n\t"\
|
|
"and "tmp" , "range" \n\t"\
|
|
"add %%ecx , "range" \n\t"\
|
|
"shl $17 , %%ecx \n\t"\
|
|
"and "tmp" , %%ecx \n\t"\
|
|
"sub %%ecx , "low" \n\t"\
|
|
"xor "tmp" , "ret" \n\t"\
|
|
"movslq "ret" , "retq" \n\t"
|
|
#endif /* HAVE_FAST_CMOV */
|
|
|
|
#define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
|
|
"movzbl "statep" , "ret" \n\t"\
|
|
"mov "range" , "tmp" \n\t"\
|
|
"and $0xC0 , "range" \n\t"\
|
|
"lea ("ret", "range", 2), %%ecx \n\t"\
|
|
"movzbl "lps_off"("tables", %%rcx), "range" \n\t"\
|
|
"sub "range" , "tmp" \n\t"\
|
|
"mov "tmp" , %%ecx \n\t"\
|
|
"shl $17 , "tmp" \n\t"\
|
|
BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
|
|
"movzbl "norm_off"("tables", "rangeq"), %%ecx \n\t"\
|
|
"shl %%cl , "range" \n\t"\
|
|
"movzbl "mlps_off"+128("tables", "retq"), "tmp" \n\t"\
|
|
"shl %%cl , "low" \n\t"\
|
|
"mov "tmpbyte" , "statep" \n\t"\
|
|
"test "lowword" , "lowword" \n\t"\
|
|
"jnz 2f \n\t"\
|
|
"mov "byte" , %%"FF_REG_c" \n\t"\
|
|
END_CHECK(end)\
|
|
"add"FF_OPSIZE" $2 , "byte" \n\t"\
|
|
"1: \n\t"\
|
|
"movzwl (%%"FF_REG_c") , "tmp" \n\t"\
|
|
"lea -1("low") , %%ecx \n\t"\
|
|
"xor "low" , %%ecx \n\t"\
|
|
"shr $15 , %%ecx \n\t"\
|
|
"bswap "tmp" \n\t"\
|
|
"shr $15 , "tmp" \n\t"\
|
|
"movzbl "norm_off"("tables", %%rcx), %%ecx \n\t"\
|
|
"sub $0xFFFF , "tmp" \n\t"\
|
|
"neg %%ecx \n\t"\
|
|
"add $7 , %%ecx \n\t"\
|
|
"shl %%cl , "tmp" \n\t"\
|
|
"add "tmp" , "low" \n\t"\
|
|
"2: \n\t"
|
|
|
|
#else /* BROKEN_RELOCATIONS */
|
|
#define TABLES_ARG NAMED_CONSTRAINTS_ARRAY_ADD(ff_h264_cabac_tables)
|
|
#define RIP_ARG
|
|
|
|
#if HAVE_FAST_CMOV
|
|
#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
|
|
"mov "tmp" , %%ecx \n\t"\
|
|
"shl $17 , "tmp" \n\t"\
|
|
"cmp "low" , "tmp" \n\t"\
|
|
"cmova %%ecx , "range" \n\t"\
|
|
"sbb %%ecx , %%ecx \n\t"\
|
|
"and %%ecx , "tmp" \n\t"\
|
|
"xor %%ecx , "ret" \n\t"\
|
|
"sub "tmp" , "low" \n\t"
|
|
#else /* HAVE_FAST_CMOV */
|
|
#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
|
|
"mov "tmp" , %%ecx \n\t"\
|
|
"shl $17 , "tmp" \n\t"\
|
|
"sub "low" , "tmp" \n\t"\
|
|
"sar $31 , "tmp" \n\t" /*lps_mask*/\
|
|
"sub %%ecx , "range" \n\t" /*RangeLPS - range*/\
|
|
"and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\
|
|
"add %%ecx , "range" \n\t" /*new range*/\
|
|
"shl $17 , %%ecx \n\t"\
|
|
"and "tmp" , %%ecx \n\t"\
|
|
"sub %%ecx , "low" \n\t"\
|
|
"xor "tmp" , "ret" \n\t"
|
|
#endif /* HAVE_FAST_CMOV */
|
|
|
|
#define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
|
|
"movzbl "statep" , "ret" \n\t"\
|
|
"mov "range" , "tmp" \n\t"\
|
|
"and $0xC0 , "range" \n\t"\
|
|
"movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\
|
|
"sub "range" , "tmp" \n\t"\
|
|
BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \
|
|
"movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx \n\t"\
|
|
"shl %%cl , "range" \n\t"\
|
|
"movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp" \n\t"\
|
|
"shl %%cl , "low" \n\t"\
|
|
"mov "tmpbyte" , "statep" \n\t"\
|
|
"test "lowword" , "lowword" \n\t"\
|
|
" jnz 2f \n\t"\
|
|
"mov "byte" , %%"FF_REG_c" \n\t"\
|
|
END_CHECK(end)\
|
|
"add"FF_OPSIZE" $2 , "byte" \n\t"\
|
|
"1: \n\t"\
|
|
"movzwl (%%"FF_REG_c") , "tmp" \n\t"\
|
|
"lea -1("low") , %%ecx \n\t"\
|
|
"xor "low" , %%ecx \n\t"\
|
|
"shr $15 , %%ecx \n\t"\
|
|
"bswap "tmp" \n\t"\
|
|
"shr $15 , "tmp" \n\t"\
|
|
"movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\
|
|
"sub $0xFFFF , "tmp" \n\t"\
|
|
"neg %%ecx \n\t"\
|
|
"add $7 , %%ecx \n\t"\
|
|
"shl %%cl , "tmp" \n\t"\
|
|
"add "tmp" , "low" \n\t"\
|
|
"2: \n\t"
|
|
|
|
#endif /* BROKEN_RELOCATIONS */
|
|
|
|
#if HAVE_7REGS && !BROKEN_COMPILER
|
|
#define get_cabac_inline get_cabac_inline_x86
|
|
static
|
|
#if ARCH_X86_32
|
|
av_noinline
|
|
#else
|
|
av_always_inline
|
|
#endif
|
|
int get_cabac_inline_x86(CABACContext *c, uint8_t *const state)
|
|
{
|
|
int bit, tmp;
|
|
#ifdef BROKEN_RELOCATIONS
|
|
void *tables;
|
|
|
|
__asm__ volatile(
|
|
"lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
|
|
: "=&r"(tables)
|
|
: NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables)
|
|
);
|
|
#endif
|
|
|
|
__asm__ volatile(
|
|
BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1",
|
|
"%2", "%q2", "%3", "%b3",
|
|
"%c6(%5)", "%c7(%5)",
|
|
AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
|
|
AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
|
|
AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
|
|
"%8")
|
|
: "=&r"(bit), "=&r"(c->low), "=&r"(c->range), "=&q"(tmp)
|
|
: "r"(state), "r"(c),
|
|
"i"(offsetof(CABACContext, bytestream)),
|
|
"i"(offsetof(CABACContext, bytestream_end))
|
|
TABLES_ARG
|
|
,"1"(c->low), "2"(c->range)
|
|
: "%"FF_REG_c, "memory"
|
|
);
|
|
return bit & 1;
|
|
}
|
|
#endif /* HAVE_7REGS && !BROKEN_COMPILER */
|
|
|
|
#if !BROKEN_COMPILER
|
|
#define get_cabac_bypass_sign get_cabac_bypass_sign_x86
|
|
static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
|
|
{
|
|
x86_reg tmp;
|
|
__asm__ volatile(
|
|
"movl %c6(%2), %k1 \n\t"
|
|
"movl %c3(%2), %%eax \n\t"
|
|
"shl $17, %k1 \n\t"
|
|
"add %%eax, %%eax \n\t"
|
|
"sub %k1, %%eax \n\t"
|
|
"cdq \n\t"
|
|
"and %%edx, %k1 \n\t"
|
|
"add %k1, %%eax \n\t"
|
|
"xor %%edx, %%ecx \n\t"
|
|
"sub %%edx, %%ecx \n\t"
|
|
"test %%ax, %%ax \n\t"
|
|
"jnz 1f \n\t"
|
|
"mov %c4(%2), %1 \n\t"
|
|
"subl $0xFFFF, %%eax \n\t"
|
|
"movzwl (%1), %%edx \n\t"
|
|
"bswap %%edx \n\t"
|
|
"shrl $15, %%edx \n\t"
|
|
#if UNCHECKED_BITSTREAM_READER
|
|
"add $2, %1 \n\t"
|
|
"addl %%edx, %%eax \n\t"
|
|
"mov %1, %c4(%2) \n\t"
|
|
#else
|
|
"addl %%edx, %%eax \n\t"
|
|
"cmp %c5(%2), %1 \n\t"
|
|
"jge 1f \n\t"
|
|
"add"FF_OPSIZE" $2, %c4(%2) \n\t"
|
|
#endif
|
|
"1: \n\t"
|
|
"movl %%eax, %c3(%2) \n\t"
|
|
|
|
: "+c"(val), "=&r"(tmp)
|
|
: "r"(c),
|
|
"i"(offsetof(CABACContext, low)),
|
|
"i"(offsetof(CABACContext, bytestream)),
|
|
"i"(offsetof(CABACContext, bytestream_end)),
|
|
"i"(offsetof(CABACContext, range))
|
|
: "%eax", "%edx", "memory"
|
|
);
|
|
return val;
|
|
}
|
|
|
|
#define get_cabac_bypass get_cabac_bypass_x86
|
|
static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
|
|
{
|
|
x86_reg tmp;
|
|
int res;
|
|
__asm__ volatile(
|
|
"movl %c6(%2), %k1 \n\t"
|
|
"movl %c3(%2), %%eax \n\t"
|
|
"shl $17, %k1 \n\t"
|
|
"add %%eax, %%eax \n\t"
|
|
"sub %k1, %%eax \n\t"
|
|
"cdq \n\t"
|
|
"and %%edx, %k1 \n\t"
|
|
"add %k1, %%eax \n\t"
|
|
"inc %%edx \n\t"
|
|
"test %%ax, %%ax \n\t"
|
|
"jnz 1f \n\t"
|
|
"mov %c4(%2), %1 \n\t"
|
|
"subl $0xFFFF, %%eax \n\t"
|
|
"movzwl (%1), %%ecx \n\t"
|
|
"bswap %%ecx \n\t"
|
|
"shrl $15, %%ecx \n\t"
|
|
"addl %%ecx, %%eax \n\t"
|
|
"cmp %c5(%2), %1 \n\t"
|
|
"jge 1f \n\t"
|
|
"add"FF_OPSIZE" $2, %c4(%2) \n\t"
|
|
"1: \n\t"
|
|
"movl %%eax, %c3(%2) \n\t"
|
|
|
|
: "=&d"(res), "=&r"(tmp)
|
|
: "r"(c),
|
|
"i"(offsetof(CABACContext, low)),
|
|
"i"(offsetof(CABACContext, bytestream)),
|
|
"i"(offsetof(CABACContext, bytestream_end)),
|
|
"i"(offsetof(CABACContext, range))
|
|
: "%eax", "%ecx", "memory"
|
|
);
|
|
return res;
|
|
}
|
|
#endif /* !BROKEN_COMPILER */
|
|
|
|
#endif /* HAVE_INLINE_ASM */
|
|
#endif /* AVCODEC_X86_CABAC_H */
|