mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-03 05:10:03 +02:00
8ec0204ee4
This fixes two issues preventing suncc from building this code. The undocumented 'a' operand modifier, causing gcc to omit a $ in front of immediate operands (as required in addresses), is not supported by suncc. Luckily, the also undocumented 'c' modifer has the same effect and is supported. On some asm statements with a large number of operands, suncc for no obvious reason fails to correctly substitute some of the operands. Fortunately, some of the operands in these statements are plain numbers which can be inserted directly into the code block instead of passed as operands. With these changes, the code builds correctly with both gcc and suncc. Signed-off-by: Mans Rullgard <mans@mansr.com>
234 lines
12 KiB
C
234 lines
12 KiB
C
/*
|
|
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
|
|
*
|
|
* This file is part of Libav.
|
|
*
|
|
* Libav is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* Libav is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with Libav; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#ifndef AVCODEC_X86_CABAC_H
|
|
#define AVCODEC_X86_CABAC_H
|
|
|
|
#include "libavcodec/cabac.h"
|
|
#include "libavutil/attributes.h"
|
|
#include "libavutil/x86/asm.h"
|
|
#include "libavutil/internal.h"
|
|
#include "config.h"
|
|
|
|
#if HAVE_INLINE_ASM
|
|
|
|
#ifdef BROKEN_RELOCATIONS
|
|
#define TABLES_ARG , "r"(tables)
|
|
|
|
#if HAVE_FAST_CMOV
|
|
#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
|
|
"cmp "low" , "tmp" \n\t"\
|
|
"cmova %%ecx , "range" \n\t"\
|
|
"sbb %%rcx , %%rcx \n\t"\
|
|
"and %%ecx , "tmp" \n\t"\
|
|
"xor %%rcx , "retq" \n\t"\
|
|
"sub "tmp" , "low" \n\t"
|
|
#else /* HAVE_FAST_CMOV */
|
|
#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
|
|
/* P4 Prescott has crappy cmov,sbb,64bit shift so avoid them */ \
|
|
"sub "low" , "tmp" \n\t"\
|
|
"sar $31 , "tmp" \n\t"\
|
|
"sub %%ecx , "range" \n\t"\
|
|
"and "tmp" , "range" \n\t"\
|
|
"add %%ecx , "range" \n\t"\
|
|
"shl $17 , %%ecx \n\t"\
|
|
"and "tmp" , %%ecx \n\t"\
|
|
"sub %%ecx , "low" \n\t"\
|
|
"xor "tmp" , "ret" \n\t"\
|
|
"movslq "ret" , "retq" \n\t"
|
|
#endif /* HAVE_FAST_CMOV */
|
|
|
|
#define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
|
|
"movzbl "statep" , "ret" \n\t"\
|
|
"mov "range" , "tmp" \n\t"\
|
|
"and $0xC0 , "range" \n\t"\
|
|
"lea ("ret", "range", 2), %%ecx \n\t"\
|
|
"movzbl "lps_off"("tables", %%rcx), "range" \n\t"\
|
|
"sub "range" , "tmp" \n\t"\
|
|
"mov "tmp" , %%ecx \n\t"\
|
|
"shl $17 , "tmp" \n\t"\
|
|
BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
|
|
"movzbl "norm_off"("tables", "rangeq"), %%ecx \n\t"\
|
|
"shl %%cl , "range" \n\t"\
|
|
"movzbl "mlps_off"+128("tables", "retq"), "tmp" \n\t"\
|
|
"shl %%cl , "low" \n\t"\
|
|
"mov "tmpbyte" , "statep" \n\t"\
|
|
"test "lowword" , "lowword" \n\t"\
|
|
"jnz 2f \n\t"\
|
|
"mov "byte" , %%"REG_c" \n\t"\
|
|
"cmp "end" , %%"REG_c" \n\t"\
|
|
"jge 1f \n\t"\
|
|
"add"OPSIZE" $2 , "byte" \n\t"\
|
|
"1: \n\t"\
|
|
"movzwl (%%"REG_c") , "tmp" \n\t"\
|
|
"lea -1("low") , %%ecx \n\t"\
|
|
"xor "low" , %%ecx \n\t"\
|
|
"shr $15 , %%ecx \n\t"\
|
|
"bswap "tmp" \n\t"\
|
|
"shr $15 , "tmp" \n\t"\
|
|
"movzbl "norm_off"("tables", %%rcx), %%ecx \n\t"\
|
|
"sub $0xFFFF , "tmp" \n\t"\
|
|
"neg %%ecx \n\t"\
|
|
"add $7 , %%ecx \n\t"\
|
|
"shl %%cl , "tmp" \n\t"\
|
|
"add "tmp" , "low" \n\t"\
|
|
"2: \n\t"
|
|
|
|
#else /* BROKEN_RELOCATIONS */
|
|
#define TABLES_ARG
|
|
|
|
#if HAVE_FAST_CMOV
|
|
#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
|
|
"mov "tmp" , %%ecx \n\t"\
|
|
"shl $17 , "tmp" \n\t"\
|
|
"cmp "low" , "tmp" \n\t"\
|
|
"cmova %%ecx , "range" \n\t"\
|
|
"sbb %%ecx , %%ecx \n\t"\
|
|
"and %%ecx , "tmp" \n\t"\
|
|
"xor %%ecx , "ret" \n\t"\
|
|
"sub "tmp" , "low" \n\t"
|
|
#else /* HAVE_FAST_CMOV */
|
|
#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
|
|
"mov "tmp" , %%ecx \n\t"\
|
|
"shl $17 , "tmp" \n\t"\
|
|
"sub "low" , "tmp" \n\t"\
|
|
"sar $31 , "tmp" \n\t" /*lps_mask*/\
|
|
"sub %%ecx , "range" \n\t" /*RangeLPS - range*/\
|
|
"and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\
|
|
"add %%ecx , "range" \n\t" /*new range*/\
|
|
"shl $17 , %%ecx \n\t"\
|
|
"and "tmp" , %%ecx \n\t"\
|
|
"sub %%ecx , "low" \n\t"\
|
|
"xor "tmp" , "ret" \n\t"
|
|
#endif /* HAVE_FAST_CMOV */
|
|
|
|
#define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
|
|
"movzbl "statep" , "ret" \n\t"\
|
|
"mov "range" , "tmp" \n\t"\
|
|
"and $0xC0 , "range" \n\t"\
|
|
"movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\
|
|
"sub "range" , "tmp" \n\t"\
|
|
BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \
|
|
"movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx \n\t"\
|
|
"shl %%cl , "range" \n\t"\
|
|
"movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp" \n\t"\
|
|
"shl %%cl , "low" \n\t"\
|
|
"mov "tmpbyte" , "statep" \n\t"\
|
|
"test "lowword" , "lowword" \n\t"\
|
|
" jnz 2f \n\t"\
|
|
"mov "byte" , %%"REG_c" \n\t"\
|
|
"cmp "end" , %%"REG_c" \n\t"\
|
|
"jge 1f \n\t"\
|
|
"add"OPSIZE" $2 , "byte" \n\t"\
|
|
"1: \n\t"\
|
|
"movzwl (%%"REG_c") , "tmp" \n\t"\
|
|
"lea -1("low") , %%ecx \n\t"\
|
|
"xor "low" , %%ecx \n\t"\
|
|
"shr $15 , %%ecx \n\t"\
|
|
"bswap "tmp" \n\t"\
|
|
"shr $15 , "tmp" \n\t"\
|
|
"movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\
|
|
"sub $0xFFFF , "tmp" \n\t"\
|
|
"neg %%ecx \n\t"\
|
|
"add $7 , %%ecx \n\t"\
|
|
"shl %%cl , "tmp" \n\t"\
|
|
"add "tmp" , "low" \n\t"\
|
|
"2: \n\t"
|
|
|
|
#endif /* BROKEN_RELOCATIONS */
|
|
|
|
|
|
#if HAVE_7REGS
|
|
#define get_cabac_inline get_cabac_inline_x86
|
|
static av_always_inline int get_cabac_inline_x86(CABACContext *c,
|
|
uint8_t *const state)
|
|
{
|
|
int bit, tmp;
|
|
#ifdef BROKEN_RELOCATIONS
|
|
void *tables;
|
|
|
|
__asm__ volatile(
|
|
"lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
|
|
: "=&r"(tables)
|
|
);
|
|
#endif
|
|
|
|
__asm__ volatile(
|
|
BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1",
|
|
"%2", "%q2", "%3", "%b3",
|
|
"%c6(%5)", "%c7(%5)",
|
|
AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
|
|
AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
|
|
AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
|
|
"%8")
|
|
: "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp)
|
|
: "r"(state), "r"(c),
|
|
"i"(offsetof(CABACContext, bytestream)),
|
|
"i"(offsetof(CABACContext, bytestream_end))
|
|
TABLES_ARG
|
|
: "%"REG_c, "memory"
|
|
);
|
|
return bit & 1;
|
|
}
|
|
#endif /* HAVE_7REGS */
|
|
|
|
#define get_cabac_bypass_sign get_cabac_bypass_sign_x86
|
|
static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
|
|
{
|
|
x86_reg tmp;
|
|
__asm__ volatile(
|
|
"movl %c6(%2), %k1 \n\t"
|
|
"movl %c3(%2), %%eax \n\t"
|
|
"shl $17, %k1 \n\t"
|
|
"add %%eax, %%eax \n\t"
|
|
"sub %k1, %%eax \n\t"
|
|
"cltd \n\t"
|
|
"and %%edx, %k1 \n\t"
|
|
"add %k1, %%eax \n\t"
|
|
"xor %%edx, %%ecx \n\t"
|
|
"sub %%edx, %%ecx \n\t"
|
|
"test %%ax, %%ax \n\t"
|
|
"jnz 1f \n\t"
|
|
"mov %c4(%2), %1 \n\t"
|
|
"subl $0xFFFF, %%eax \n\t"
|
|
"movzwl (%1), %%edx \n\t"
|
|
"bswap %%edx \n\t"
|
|
"shrl $15, %%edx \n\t"
|
|
"addl %%edx, %%eax \n\t"
|
|
"cmp %c5(%2), %1 \n\t"
|
|
"jge 1f \n\t"
|
|
"add"OPSIZE" $2, %c4(%2) \n\t"
|
|
"1: \n\t"
|
|
"movl %%eax, %c3(%2) \n\t"
|
|
|
|
: "+c"(val), "=&r"(tmp)
|
|
: "r"(c),
|
|
"i"(offsetof(CABACContext, low)),
|
|
"i"(offsetof(CABACContext, bytestream)),
|
|
"i"(offsetof(CABACContext, bytestream_end)),
|
|
"i"(offsetof(CABACContext, range))
|
|
: "%eax", "%edx", "memory"
|
|
);
|
|
return val;
|
|
}
|
|
|
|
#endif /* HAVE_INLINE_ASM */
|
|
#endif /* AVCODEC_X86_CABAC_H */
|