mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-21 10:55:51 +02:00
Merge remote-tracking branch 'qatar/master'
* qatar/master: get_bits: remove x86 inline asm in A32 bitstream reader doc: Remove outdated information about our issue tracker avidec: Factor out the sync fucntionality. fate-aac: Expand coverage. ac3dsp: add x86-optimized versions of ac3dsp.extract_exponents(). ac3dsp: simplify extract_exponents() now that it does not need to do clipping. ac3enc: clip coefficients after MDCT. ac3enc: add int32_t array clipping function to DSPUtil, including x86 versions. swscale: for >8bit scaling, read in native bit-depth. matroskadec: matroska_read_seek after after EBML_STOP leads to failure. doxygen: fix usage of @file directive in libavutil/{dict,file}.h doxygen: Help doxygen parser to understand the DECLARE_ALIGNED and offsetof macros Conflicts: doc/issue_tracker.txt libavformat/avidec.c libavutil/dict.h libswscale/swscale.c libswscale/utils.c tests/ref/lavfi/pixfmts_scale Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
3074f03a07
1
Doxyfile
1
Doxyfile
@ -1160,6 +1160,7 @@ INCLUDE_FILE_PATTERNS =
|
||||
|
||||
PREDEFINED = __attribute__(x)="" "RENAME(x)=x ## _TMPL" "DEF(x)=x ## _TMPL" \
|
||||
HAVE_AV_CONFIG_H HAVE_MMX HAVE_MMX2 HAVE_AMD3DNOW \
|
||||
"DECLARE_ALIGNED(a,t,n)=t n" "offsetof(x,y)=0x42" \
|
||||
|
||||
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
|
||||
# this tag can be used to specify a list of macro names that should be expanded.
|
||||
|
@ -164,21 +164,8 @@ static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nb_coefs; i++) {
|
||||
int e;
|
||||
int v = abs(coef[i]);
|
||||
if (v == 0)
|
||||
e = 24;
|
||||
else {
|
||||
e = 23 - av_log2(v);
|
||||
if (e >= 24) {
|
||||
e = 24;
|
||||
coef[i] = 0;
|
||||
} else if (e < 0) {
|
||||
e = 0;
|
||||
coef[i] = av_clip(coef[i], -16777215, 16777215);
|
||||
}
|
||||
}
|
||||
exp[i] = e;
|
||||
exp[i] = v ? 23 - av_log2(v) : 24;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -50,12 +50,16 @@
|
||||
#if CONFIG_AC3ENC_FLOAT
|
||||
#define AC3_NAME(x) ff_ac3_float_ ## x
|
||||
#define MAC_COEF(d,a,b) ((d)+=(a)*(b))
|
||||
#define COEF_MIN (-16777215.0/16777216.0)
|
||||
#define COEF_MAX ( 16777215.0/16777216.0)
|
||||
typedef float SampleType;
|
||||
typedef float CoefType;
|
||||
typedef float CoefSumType;
|
||||
#else
|
||||
#define AC3_NAME(x) ff_ac3_fixed_ ## x
|
||||
#define MAC_COEF(d,a,b) MAC64(d,a,b)
|
||||
#define COEF_MIN -16777215
|
||||
#define COEF_MAX 16777215
|
||||
typedef int16_t SampleType;
|
||||
typedef int32_t CoefType;
|
||||
typedef int64_t CoefSumType;
|
||||
|
@ -104,6 +104,15 @@ static void scale_coefficients(AC3EncodeContext *s)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Clip MDCT coefficients to allowable range.
|
||||
*/
|
||||
static void clip_coefficients(DSPContext *dsp, int32_t *coef, unsigned int len)
|
||||
{
|
||||
dsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len);
|
||||
}
|
||||
|
||||
|
||||
static av_cold int ac3_fixed_encode_init(AVCodecContext *avctx)
|
||||
{
|
||||
AC3EncodeContext *s = avctx->priv_data;
|
||||
|
@ -111,6 +111,15 @@ static void scale_coefficients(AC3EncodeContext *s)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Clip MDCT coefficients to allowable range.
|
||||
*/
|
||||
static void clip_coefficients(DSPContext *dsp, float *coef, unsigned int len)
|
||||
{
|
||||
dsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len);
|
||||
}
|
||||
|
||||
|
||||
#if CONFIG_AC3_ENCODER
|
||||
AVCodec ff_ac3_float_encoder = {
|
||||
"ac3_float",
|
||||
|
@ -41,6 +41,8 @@ static void apply_window(DSPContext *dsp, SampleType *output,
|
||||
|
||||
static int normalize_samples(AC3EncodeContext *s);
|
||||
|
||||
static void clip_coefficients(DSPContext *dsp, CoefType *coef, unsigned int len);
|
||||
|
||||
|
||||
int AC3_NAME(allocate_sample_buffers)(AC3EncodeContext *s)
|
||||
{
|
||||
@ -171,8 +173,8 @@ static void apply_channel_coupling(AC3EncodeContext *s)
|
||||
cpl_coef[i] += ch_coef[i];
|
||||
}
|
||||
|
||||
/* coefficients must be clipped to +/- 1.0 in order to be encoded */
|
||||
s->dsp.vector_clipf(cpl_coef, cpl_coef, -1.0f, 1.0f, num_cpl_coefs);
|
||||
/* coefficients must be clipped in order to be encoded */
|
||||
clip_coefficients(&s->dsp, cpl_coef, num_cpl_coefs);
|
||||
|
||||
/* scale coupling coefficients from float to 24-bit fixed-point */
|
||||
s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][cpl_start],
|
||||
@ -300,6 +302,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
|
||||
if (!block->cpl_in_use || !block->new_cpl_coords)
|
||||
continue;
|
||||
|
||||
clip_coefficients(&s->dsp, cpl_coords[blk][1], s->fbw_channels * 16);
|
||||
s->ac3dsp.float_to_fixed24(fixed_cpl_coords[blk][1],
|
||||
cpl_coords[blk][1],
|
||||
s->fbw_channels * 16);
|
||||
@ -433,7 +436,11 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame,
|
||||
|
||||
apply_mdct(s);
|
||||
|
||||
scale_coefficients(s);
|
||||
if (s->fixed_point)
|
||||
scale_coefficients(s);
|
||||
|
||||
clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1],
|
||||
AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels);
|
||||
|
||||
s->cpl_on = s->cpl_enabled;
|
||||
ff_ac3_compute_coupling_strategy(s);
|
||||
@ -443,6 +450,9 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame,
|
||||
|
||||
compute_rematrixing_strategy(s);
|
||||
|
||||
if (!s->fixed_point)
|
||||
scale_coefficients(s);
|
||||
|
||||
ff_ac3_apply_rematrixing(s);
|
||||
|
||||
ff_ac3_process_exponents(s);
|
||||
|
@ -2664,6 +2664,22 @@ static void apply_window_int16_c(int16_t *output, const int16_t *input,
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len)
|
||||
{
|
||||
do {
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
*dst++ = av_clip(*src++, min, max);
|
||||
len -= 8;
|
||||
} while (len > 0);
|
||||
}
|
||||
|
||||
#define W0 2048
|
||||
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
|
||||
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
|
||||
@ -3106,6 +3122,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
||||
c->scalarproduct_int16 = scalarproduct_int16_c;
|
||||
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
|
||||
c->apply_window_int16 = apply_window_int16_c;
|
||||
c->vector_clip_int32 = vector_clip_int32_c;
|
||||
c->scalarproduct_float = scalarproduct_float_c;
|
||||
c->butterflies_float = butterflies_float_c;
|
||||
c->vector_fmul_scalar = vector_fmul_scalar_c;
|
||||
|
@ -553,6 +553,22 @@ typedef struct DSPContext {
|
||||
void (*apply_window_int16)(int16_t *output, const int16_t *input,
|
||||
const int16_t *window, unsigned int len);
|
||||
|
||||
/**
|
||||
* Clip each element in an array of int32_t to a given minimum and maximum value.
|
||||
* @param dst destination array
|
||||
* constraints: 16-byte aligned
|
||||
* @param src source array
|
||||
* constraints: 16-byte aligned
|
||||
* @param min minimum value
|
||||
* constraints: must in the the range [-(1<<24), 1<<24]
|
||||
* @param max maximum value
|
||||
* constraints: must in the the range [-(1<<24), 1<<24]
|
||||
* @param len number of elements in the array
|
||||
* constraints: multiple of 32 greater than zero
|
||||
*/
|
||||
void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
|
||||
/* rv30 functions */
|
||||
qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
|
||||
qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
|
||||
|
@ -201,19 +201,11 @@ static inline void skip_bits_long(GetBitContext *s, int n){
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#if ARCH_X86
|
||||
# define SKIP_CACHE(name, gb, num) \
|
||||
__asm__("shldl %2, %1, %0 \n\t" \
|
||||
"shll %2, %1 \n\t" \
|
||||
: "+r" (name##_cache0), "+r" (name##_cache1) \
|
||||
: "Ic" ((uint8_t)(num)))
|
||||
#else
|
||||
# define SKIP_CACHE(name, gb, num) do { \
|
||||
name##_cache0 <<= (num); \
|
||||
name##_cache0 |= NEG_USR32(name##_cache1,num); \
|
||||
name##_cache1 <<= (num); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
# define SKIP_COUNTER(name, gb, num) name##_bit_count += (num)
|
||||
|
||||
|
@ -32,6 +32,11 @@ cextern ac3_bap_bits
|
||||
pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
|
||||
pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
|
||||
|
||||
; used in ff_ac3_extract_exponents()
|
||||
pd_1: times 4 dd 1
|
||||
pd_151: times 4 dd 151
|
||||
pb_shuf_4dwb: db 0, 4, 8, 12
|
||||
|
||||
SECTION .text
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
@ -346,3 +351,100 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
|
||||
movd eax, m0
|
||||
add eax, sumd
|
||||
RET
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs)
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
%macro PABSD_MMX 2 ; src/dst, tmp
|
||||
pxor %2, %2
|
||||
pcmpgtd %2, %1
|
||||
pxor %1, %2
|
||||
psubd %1, %2
|
||||
%endmacro
|
||||
|
||||
%macro PABSD_SSSE3 1-2 ; src/dst, unused
|
||||
pabsd %1, %1
|
||||
%endmacro
|
||||
|
||||
%ifdef HAVE_AMD3DNOW
|
||||
INIT_MMX
|
||||
cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
|
||||
add expq, lenq
|
||||
lea coefq, [coefq+4*lenq]
|
||||
neg lenq
|
||||
movq m3, [pd_1]
|
||||
movq m4, [pd_151]
|
||||
.loop:
|
||||
movq m0, [coefq+4*lenq ]
|
||||
movq m1, [coefq+4*lenq+8]
|
||||
PABSD_MMX m0, m2
|
||||
PABSD_MMX m1, m2
|
||||
pslld m0, 1
|
||||
por m0, m3
|
||||
pi2fd m2, m0
|
||||
psrld m2, 23
|
||||
movq m0, m4
|
||||
psubd m0, m2
|
||||
pslld m1, 1
|
||||
por m1, m3
|
||||
pi2fd m2, m1
|
||||
psrld m2, 23
|
||||
movq m1, m4
|
||||
psubd m1, m2
|
||||
packssdw m0, m0
|
||||
packuswb m0, m0
|
||||
packssdw m1, m1
|
||||
packuswb m1, m1
|
||||
punpcklwd m0, m1
|
||||
movd [expq+lenq], m0
|
||||
add lenq, 4
|
||||
jl .loop
|
||||
REP_RET
|
||||
%endif
|
||||
|
||||
%macro AC3_EXTRACT_EXPONENTS 1
|
||||
cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
|
||||
add expq, lenq
|
||||
lea coefq, [coefq+4*lenq]
|
||||
neg lenq
|
||||
mova m2, [pd_1]
|
||||
mova m3, [pd_151]
|
||||
%ifidn %1, ssse3 ;
|
||||
movd m4, [pb_shuf_4dwb]
|
||||
%endif
|
||||
.loop:
|
||||
; move 4 32-bit coefs to xmm0
|
||||
mova m0, [coefq+4*lenq]
|
||||
; absolute value
|
||||
PABSD m0, m1
|
||||
; convert to float and extract exponents
|
||||
pslld m0, 1
|
||||
por m0, m2
|
||||
cvtdq2ps m1, m0
|
||||
psrld m1, 23
|
||||
mova m0, m3
|
||||
psubd m0, m1
|
||||
; move the lowest byte in each of 4 dwords to the low dword
|
||||
%ifidn %1, ssse3
|
||||
pshufb m0, m4
|
||||
%else
|
||||
packssdw m0, m0
|
||||
packuswb m0, m0
|
||||
%endif
|
||||
movd [expq+lenq], m0
|
||||
|
||||
add lenq, 4
|
||||
jl .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
%ifdef HAVE_SSE
|
||||
INIT_XMM
|
||||
%define PABSD PABSD_MMX
|
||||
AC3_EXTRACT_EXPONENTS sse2
|
||||
%ifdef HAVE_SSSE3
|
||||
%define PABSD PABSD_SSSE3
|
||||
AC3_EXTRACT_EXPONENTS ssse3
|
||||
%endif
|
||||
%endif
|
||||
|
@ -44,6 +44,10 @@ extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned i
|
||||
|
||||
extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
|
||||
|
||||
extern void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_coefs);
|
||||
extern void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
|
||||
extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
|
||||
|
||||
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
|
||||
{
|
||||
int mm_flags = av_get_cpu_flags();
|
||||
@ -56,6 +60,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
|
||||
c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
|
||||
}
|
||||
if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
|
||||
c->extract_exponents = ff_ac3_extract_exponents_3dnow;
|
||||
if (!bit_exact) {
|
||||
c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
|
||||
}
|
||||
@ -72,6 +77,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
|
||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
|
||||
c->float_to_fixed24 = ff_float_to_fixed24_sse2;
|
||||
c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
|
||||
c->extract_exponents = ff_ac3_extract_exponents_sse2;
|
||||
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
|
||||
c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
|
||||
c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
|
||||
@ -79,6 +85,9 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
|
||||
}
|
||||
if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
|
||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
|
||||
if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
|
||||
c->extract_exponents = ff_ac3_extract_exponents_ssse3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -2333,6 +2333,15 @@ int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, i
|
||||
|
||||
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
|
||||
|
||||
void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
void ff_vector_clip_int32_sse2_int(int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
void ff_vector_clip_int32_sse41 (int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
|
||||
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
||||
{
|
||||
int mm_flags = av_get_cpu_flags();
|
||||
@ -2473,6 +2482,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
||||
|
||||
c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
|
||||
c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
|
||||
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_mmx;
|
||||
#endif
|
||||
|
||||
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
||||
@ -2756,6 +2767,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
||||
#if HAVE_YASM
|
||||
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
|
||||
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
|
||||
if (mm_flags & AV_CPU_FLAG_ATOM) {
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_sse2_int;
|
||||
} else {
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_sse2;
|
||||
}
|
||||
if (avctx->flags & CODEC_FLAG_BITEXACT) {
|
||||
c->apply_window_int16 = ff_apply_window_int16_sse2_ba;
|
||||
} else {
|
||||
@ -2781,6 +2797,13 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
|
||||
#if HAVE_YASM
|
||||
c->vector_clip_int32 = ff_vector_clip_int32_sse41;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if HAVE_AVX && HAVE_YASM
|
||||
if (mm_flags & AV_CPU_FLAG_AVX) {
|
||||
if (bit_depth == 10) {
|
||||
|
@ -1048,3 +1048,118 @@ emu_edge sse
|
||||
%ifdef ARCH_X86_32
|
||||
emu_edge mmx
|
||||
%endif
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
|
||||
; int32_t max, unsigned int len)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
%macro PMINSD_MMX 3 ; dst, src, tmp
|
||||
mova %3, %2
|
||||
pcmpgtd %3, %1
|
||||
pxor %1, %2
|
||||
pand %1, %3
|
||||
pxor %1, %2
|
||||
%endmacro
|
||||
|
||||
%macro PMAXSD_MMX 3 ; dst, src, tmp
|
||||
mova %3, %1
|
||||
pcmpgtd %3, %2
|
||||
pand %1, %3
|
||||
pandn %3, %2
|
||||
por %1, %3
|
||||
%endmacro
|
||||
|
||||
%macro CLIPD_MMX 3-4 ; src/dst, min, max, tmp
|
||||
PMINSD_MMX %1, %3, %4
|
||||
PMAXSD_MMX %1, %2, %4
|
||||
%endmacro
|
||||
|
||||
%macro CLIPD_SSE2 3-4 ; src/dst, min (float), max (float), unused
|
||||
cvtdq2ps %1, %1
|
||||
minps %1, %3
|
||||
maxps %1, %2
|
||||
cvtps2dq %1, %1
|
||||
%endmacro
|
||||
|
||||
%macro CLIPD_SSE41 3-4 ; src/dst, min, max, unused
|
||||
pminsd %1, %3
|
||||
pmaxsd %1, %2
|
||||
%endmacro
|
||||
|
||||
%macro SPLATD_MMX 1
|
||||
punpckldq %1, %1
|
||||
%endmacro
|
||||
|
||||
%macro SPLATD_SSE2 1
|
||||
pshufd %1, %1, 0
|
||||
%endmacro
|
||||
|
||||
%macro VECTOR_CLIP_INT32 4
|
||||
cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
|
||||
%ifidn %1, sse2
|
||||
cvtsi2ss m4, minm
|
||||
cvtsi2ss m5, maxm
|
||||
%else
|
||||
movd m4, minm
|
||||
movd m5, maxm
|
||||
%endif
|
||||
SPLATD m4
|
||||
SPLATD m5
|
||||
.loop:
|
||||
%assign %%i 1
|
||||
%rep %3
|
||||
mova m0, [srcq+mmsize*0*%%i]
|
||||
mova m1, [srcq+mmsize*1*%%i]
|
||||
mova m2, [srcq+mmsize*2*%%i]
|
||||
mova m3, [srcq+mmsize*3*%%i]
|
||||
%if %4
|
||||
mova m7, [srcq+mmsize*4*%%i]
|
||||
mova m8, [srcq+mmsize*5*%%i]
|
||||
mova m9, [srcq+mmsize*6*%%i]
|
||||
mova m10, [srcq+mmsize*7*%%i]
|
||||
%endif
|
||||
CLIPD m0, m4, m5, m6
|
||||
CLIPD m1, m4, m5, m6
|
||||
CLIPD m2, m4, m5, m6
|
||||
CLIPD m3, m4, m5, m6
|
||||
%if %4
|
||||
CLIPD m7, m4, m5, m6
|
||||
CLIPD m8, m4, m5, m6
|
||||
CLIPD m9, m4, m5, m6
|
||||
CLIPD m10, m4, m5, m6
|
||||
%endif
|
||||
mova [dstq+mmsize*0*%%i], m0
|
||||
mova [dstq+mmsize*1*%%i], m1
|
||||
mova [dstq+mmsize*2*%%i], m2
|
||||
mova [dstq+mmsize*3*%%i], m3
|
||||
%if %4
|
||||
mova [dstq+mmsize*4*%%i], m7
|
||||
mova [dstq+mmsize*5*%%i], m8
|
||||
mova [dstq+mmsize*6*%%i], m9
|
||||
mova [dstq+mmsize*7*%%i], m10
|
||||
%endif
|
||||
%assign %%i %%i+1
|
||||
%endrep
|
||||
add srcq, mmsize*4*(%3+%4)
|
||||
add dstq, mmsize*4*(%3+%4)
|
||||
sub lend, mmsize*(%3+%4)
|
||||
jg .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX
|
||||
%define SPLATD SPLATD_MMX
|
||||
%define CLIPD CLIPD_MMX
|
||||
VECTOR_CLIP_INT32 mmx, 0, 1, 0
|
||||
INIT_XMM
|
||||
%define SPLATD SPLATD_SSE2
|
||||
VECTOR_CLIP_INT32 sse2_int, 6, 1, 0
|
||||
%define CLIPD CLIPD_SSE2
|
||||
VECTOR_CLIP_INT32 sse2, 6, 2, 0
|
||||
%define CLIPD CLIPD_SSE41
|
||||
%ifdef m8
|
||||
VECTOR_CLIP_INT32 sse41, 11, 1, 1
|
||||
%else
|
||||
VECTOR_CLIP_INT32 sse41, 6, 1, 0
|
||||
%endif
|
||||
|
@ -861,13 +861,137 @@ static int get_stream_idx(int *d){
|
||||
}
|
||||
}
|
||||
|
||||
static int avi_read_packet(AVFormatContext *s, AVPacket *pkt)
|
||||
static int avi_sync(AVFormatContext *s)
|
||||
{
|
||||
AVIContext *avi = s->priv_data;
|
||||
AVIOContext *pb = s->pb;
|
||||
int n, d[8];
|
||||
unsigned int size;
|
||||
int64_t i, sync;
|
||||
|
||||
start_sync:
|
||||
memset(d, -1, sizeof(int)*8);
|
||||
for(i=sync=avio_tell(pb); !url_feof(pb); i++) {
|
||||
int j;
|
||||
|
||||
for(j=0; j<7; j++)
|
||||
d[j]= d[j+1];
|
||||
d[7]= avio_r8(pb);
|
||||
|
||||
size= d[4] + (d[5]<<8) + (d[6]<<16) + (d[7]<<24);
|
||||
|
||||
n= get_stream_idx(d+2);
|
||||
//av_log(s, AV_LOG_DEBUG, "%X %X %X %X %X %X %X %X %"PRId64" %d %d\n", d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], i, size, n);
|
||||
if(i + (uint64_t)size > avi->fsize || d[0]<0)
|
||||
continue;
|
||||
|
||||
//parse ix##
|
||||
if( (d[0] == 'i' && d[1] == 'x' && n < s->nb_streams)
|
||||
//parse JUNK
|
||||
||(d[0] == 'J' && d[1] == 'U' && d[2] == 'N' && d[3] == 'K')
|
||||
||(d[0] == 'i' && d[1] == 'd' && d[2] == 'x' && d[3] == '1')){
|
||||
avio_skip(pb, size);
|
||||
//av_log(s, AV_LOG_DEBUG, "SKIP\n");
|
||||
goto start_sync;
|
||||
}
|
||||
|
||||
//parse stray LIST
|
||||
if(d[0] == 'L' && d[1] == 'I' && d[2] == 'S' && d[3] == 'T'){
|
||||
avio_skip(pb, 4);
|
||||
goto start_sync;
|
||||
}
|
||||
|
||||
n= get_stream_idx(d);
|
||||
|
||||
if(!((i-avi->last_pkt_pos)&1) && get_stream_idx(d+1) < s->nb_streams)
|
||||
continue;
|
||||
|
||||
//detect ##ix chunk and skip
|
||||
if(d[2] == 'i' && d[3] == 'x' && n < s->nb_streams){
|
||||
avio_skip(pb, size);
|
||||
goto start_sync;
|
||||
}
|
||||
|
||||
//parse ##dc/##wb
|
||||
if(n < s->nb_streams){
|
||||
AVStream *st;
|
||||
AVIStream *ast;
|
||||
st = s->streams[n];
|
||||
ast = st->priv_data;
|
||||
|
||||
if(s->nb_streams>=2){
|
||||
AVStream *st1 = s->streams[1];
|
||||
AVIStream *ast1= st1->priv_data;
|
||||
//workaround for broken small-file-bug402.avi
|
||||
if( d[2] == 'w' && d[3] == 'b'
|
||||
&& n==0
|
||||
&& st ->codec->codec_type == AVMEDIA_TYPE_VIDEO
|
||||
&& st1->codec->codec_type == AVMEDIA_TYPE_AUDIO
|
||||
&& ast->prefix == 'd'*256+'c'
|
||||
&& (d[2]*256+d[3] == ast1->prefix || !ast1->prefix_count)
|
||||
){
|
||||
n=1;
|
||||
st = st1;
|
||||
ast = ast1;
|
||||
av_log(s, AV_LOG_WARNING, "Invalid stream + prefix combination, assuming audio.\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( (st->discard >= AVDISCARD_DEFAULT && size==0)
|
||||
/*|| (st->discard >= AVDISCARD_NONKEY && !(pkt->flags & AV_PKT_FLAG_KEY))*/ //FIXME needs a little reordering
|
||||
|| st->discard >= AVDISCARD_ALL){
|
||||
ast->frame_offset += get_duration(ast, size);
|
||||
avio_skip(pb, size);
|
||||
goto start_sync;
|
||||
}
|
||||
|
||||
if (d[2] == 'p' && d[3] == 'c' && size<=4*256+4) {
|
||||
int k = avio_r8(pb);
|
||||
int last = (k + avio_r8(pb) - 1) & 0xFF;
|
||||
|
||||
avio_rl16(pb); //flags
|
||||
|
||||
for (; k <= last; k++)
|
||||
ast->pal[k] = avio_rb32(pb)>>8;// b + (g << 8) + (r << 16);
|
||||
ast->has_pal= 1;
|
||||
goto start_sync;
|
||||
} else if( ((ast->prefix_count<5 || sync+9 > i) && d[2]<128 && d[3]<128) ||
|
||||
d[2]*256+d[3] == ast->prefix /*||
|
||||
(d[2] == 'd' && d[3] == 'c') ||
|
||||
(d[2] == 'w' && d[3] == 'b')*/) {
|
||||
|
||||
//av_log(s, AV_LOG_DEBUG, "OK\n");
|
||||
if(d[2]*256+d[3] == ast->prefix)
|
||||
ast->prefix_count++;
|
||||
else{
|
||||
ast->prefix= d[2]*256+d[3];
|
||||
ast->prefix_count= 0;
|
||||
}
|
||||
|
||||
avi->stream_index= n;
|
||||
ast->packet_size= size + 8;
|
||||
ast->remaining= size;
|
||||
|
||||
if(size || !ast->sample_size){
|
||||
uint64_t pos= avio_tell(pb) - 8;
|
||||
if(!st->index_entries || !st->nb_index_entries || st->index_entries[st->nb_index_entries - 1].pos < pos){
|
||||
av_add_index_entry(st, pos, ast->frame_offset, size, 0, AVINDEX_KEYFRAME);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return AVERROR_EOF;
|
||||
}
|
||||
|
||||
static int avi_read_packet(AVFormatContext *s, AVPacket *pkt)
|
||||
{
|
||||
AVIContext *avi = s->priv_data;
|
||||
AVIOContext *pb = s->pb;
|
||||
int err;
|
||||
void* dstr;
|
||||
|
||||
if (CONFIG_DV_DEMUXER && avi->dv_demux) {
|
||||
@ -1041,121 +1165,9 @@ resync:
|
||||
return size;
|
||||
}
|
||||
|
||||
memset(d, -1, sizeof(int)*8);
|
||||
for(i=sync=avio_tell(pb); !url_feof(pb); i++) {
|
||||
int j;
|
||||
|
||||
for(j=0; j<7; j++)
|
||||
d[j]= d[j+1];
|
||||
d[7]= avio_r8(pb);
|
||||
|
||||
size= d[4] + (d[5]<<8) + (d[6]<<16) + (d[7]<<24);
|
||||
|
||||
n= get_stream_idx(d+2);
|
||||
//av_log(s, AV_LOG_DEBUG, "%X %X %X %X %X %X %X %X %"PRId64" %d %d\n", d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], i, size, n);
|
||||
if(i + (uint64_t)size > avi->fsize || d[0]<0)
|
||||
continue;
|
||||
|
||||
//parse ix##
|
||||
if( (d[0] == 'i' && d[1] == 'x' && n < s->nb_streams)
|
||||
//parse JUNK
|
||||
||(d[0] == 'J' && d[1] == 'U' && d[2] == 'N' && d[3] == 'K')
|
||||
||(d[0] == 'i' && d[1] == 'd' && d[2] == 'x' && d[3] == '1')){
|
||||
avio_skip(pb, size);
|
||||
//av_log(s, AV_LOG_DEBUG, "SKIP\n");
|
||||
goto resync;
|
||||
}
|
||||
|
||||
//parse stray LIST
|
||||
if(d[0] == 'L' && d[1] == 'I' && d[2] == 'S' && d[3] == 'T'){
|
||||
avio_skip(pb, 4);
|
||||
goto resync;
|
||||
}
|
||||
|
||||
n= get_stream_idx(d);
|
||||
|
||||
if(!((i-avi->last_pkt_pos)&1) && get_stream_idx(d+1) < s->nb_streams)
|
||||
continue;
|
||||
|
||||
//detect ##ix chunk and skip
|
||||
if(d[2] == 'i' && d[3] == 'x' && n < s->nb_streams){
|
||||
avio_skip(pb, size);
|
||||
goto resync;
|
||||
}
|
||||
|
||||
//parse ##dc/##wb
|
||||
if(n < s->nb_streams){
|
||||
AVStream *st;
|
||||
AVIStream *ast;
|
||||
st = s->streams[n];
|
||||
ast = st->priv_data;
|
||||
|
||||
if(s->nb_streams>=2){
|
||||
AVStream *st1 = s->streams[1];
|
||||
AVIStream *ast1= st1->priv_data;
|
||||
//workaround for broken small-file-bug402.avi
|
||||
if( d[2] == 'w' && d[3] == 'b'
|
||||
&& n==0
|
||||
&& st ->codec->codec_type == AVMEDIA_TYPE_VIDEO
|
||||
&& st1->codec->codec_type == AVMEDIA_TYPE_AUDIO
|
||||
&& ast->prefix == 'd'*256+'c'
|
||||
&& (d[2]*256+d[3] == ast1->prefix || !ast1->prefix_count)
|
||||
){
|
||||
n=1;
|
||||
st = st1;
|
||||
ast = ast1;
|
||||
av_log(s, AV_LOG_WARNING, "Invalid stream + prefix combination, assuming audio.\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( (st->discard >= AVDISCARD_DEFAULT && size==0)
|
||||
/*|| (st->discard >= AVDISCARD_NONKEY && !(pkt->flags & AV_PKT_FLAG_KEY))*/ //FIXME needs a little reordering
|
||||
|| st->discard >= AVDISCARD_ALL){
|
||||
ast->frame_offset += get_duration(ast, size);
|
||||
avio_skip(pb, size);
|
||||
goto resync;
|
||||
}
|
||||
|
||||
if (d[2] == 'p' && d[3] == 'c' && size<=4*256+4) {
|
||||
int k = avio_r8(pb);
|
||||
int last = (k + avio_r8(pb) - 1) & 0xFF;
|
||||
|
||||
avio_rl16(pb); //flags
|
||||
|
||||
for (; k <= last; k++)
|
||||
ast->pal[k] = avio_rb32(pb)>>8;// b + (g << 8) + (r << 16);
|
||||
ast->has_pal= 1;
|
||||
goto resync;
|
||||
} else if( ((ast->prefix_count<5 || sync+9 > i) && d[2]<128 && d[3]<128) ||
|
||||
d[2]*256+d[3] == ast->prefix /*||
|
||||
(d[2] == 'd' && d[3] == 'c') ||
|
||||
(d[2] == 'w' && d[3] == 'b')*/) {
|
||||
|
||||
//av_log(s, AV_LOG_DEBUG, "OK\n");
|
||||
if(d[2]*256+d[3] == ast->prefix)
|
||||
ast->prefix_count++;
|
||||
else{
|
||||
ast->prefix= d[2]*256+d[3];
|
||||
ast->prefix_count= 0;
|
||||
}
|
||||
|
||||
avi->stream_index= n;
|
||||
ast->packet_size= size + 8;
|
||||
ast->remaining= size;
|
||||
|
||||
if(size || !ast->sample_size){
|
||||
uint64_t pos= avio_tell(pb) - 8;
|
||||
if(!st->index_entries || !st->nb_index_entries || st->index_entries[st->nb_index_entries - 1].pos < pos){
|
||||
av_add_index_entry(st, pos, ast->frame_offset, size, 0, AVINDEX_KEYFRAME);
|
||||
}
|
||||
}
|
||||
goto resync;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return AVERROR_EOF;
|
||||
if ((err = avi_sync(s)) < 0)
|
||||
return err;
|
||||
goto resync;
|
||||
}
|
||||
|
||||
/* XXX: We make the implicit supposition that the positions are sorted
|
||||
|
@ -1960,6 +1960,7 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
|
||||
|
||||
if ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
|
||||
avio_seek(s->pb, st->index_entries[st->nb_index_entries-1].pos, SEEK_SET);
|
||||
matroska->current_id = 0;
|
||||
while ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
|
||||
matroska_clear_queue(matroska);
|
||||
if (matroska_parse_cluster(matroska) < 0)
|
||||
@ -1988,6 +1989,7 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
|
||||
}
|
||||
|
||||
avio_seek(s->pb, st->index_entries[index_min].pos, SEEK_SET);
|
||||
matroska->current_id = 0;
|
||||
matroska->skip_to_keyframe = !(flags & AVSEEK_FLAG_ANY);
|
||||
matroska->skip_to_timecode = st->index_entries[index].timestamp;
|
||||
matroska->done = 0;
|
||||
|
@ -18,7 +18,8 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file Public dictionary API.
|
||||
* @file
|
||||
* Public dictionary API.
|
||||
* @deprecated
|
||||
* AVDictionary is provided for compatibility with libav. It is both in
|
||||
* implementation as well as API inefficient. It does not scale and is
|
||||
|
@ -22,7 +22,8 @@
|
||||
#include "avutil.h"
|
||||
|
||||
/**
|
||||
* @file misc file utilities
|
||||
* @file
|
||||
* Misc file utilities.
|
||||
*/
|
||||
|
||||
/**
|
||||
|
@ -1783,53 +1783,6 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
|
||||
|
||||
#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
|
||||
|
||||
// FIXME Maybe dither instead.
|
||||
static av_always_inline void
|
||||
yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
|
||||
const uint8_t *_srcU, const uint8_t *_srcV,
|
||||
int width, enum PixelFormat origin, int depth)
|
||||
{
|
||||
int i;
|
||||
const uint16_t *srcU = (const uint16_t *) _srcU;
|
||||
const uint16_t *srcV = (const uint16_t *) _srcV;
|
||||
|
||||
for (i = 0; i < width; i++) {
|
||||
dstU[i] = input_pixel(&srcU[i]) >> (depth - 8);
|
||||
dstV[i] = input_pixel(&srcV[i]) >> (depth - 8);
|
||||
}
|
||||
}
|
||||
|
||||
static av_always_inline void
|
||||
yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY,
|
||||
int width, enum PixelFormat origin, int depth)
|
||||
{
|
||||
int i;
|
||||
const uint16_t *srcY = (const uint16_t*)_srcY;
|
||||
|
||||
for (i = 0; i < width; i++)
|
||||
dstY[i] = input_pixel(&srcY[i]) >> (depth - 8);
|
||||
}
|
||||
|
||||
#undef input_pixel
|
||||
|
||||
#define YUV_NBPS(depth, BE_LE, origin) \
|
||||
static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
|
||||
const uint8_t *srcU, const uint8_t *srcV, \
|
||||
int width, uint32_t *unused) \
|
||||
{ \
|
||||
yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
|
||||
} \
|
||||
static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \
|
||||
int width, uint32_t *unused) \
|
||||
{ \
|
||||
yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
|
||||
}
|
||||
|
||||
YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
|
||||
YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
|
||||
YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
|
||||
YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
|
||||
|
||||
static void bgr24ToY_c(int16_t *dst, const uint8_t *src,
|
||||
int width, uint32_t *unused)
|
||||
{
|
||||
|
@ -2,10 +2,22 @@ FATE_AAC += fate-aac-al04_44
|
||||
fate-aac-al04_44: CMD = pcm -i $(SAMPLES)/aac/al04_44.mp4
|
||||
fate-aac-al04_44: REF = $(SAMPLES)/aac/al04_44.s16
|
||||
|
||||
FATE_AAC += fate-aac-al05_44
|
||||
fate-aac-al05_44: CMD = pcm -i $(SAMPLES)/aac/al05_44.mp4
|
||||
fate-aac-al05_44: REF = $(SAMPLES)/aac/al05_44.s16
|
||||
|
||||
FATE_AAC += fate-aac-al06_44
|
||||
fate-aac-al06_44: CMD = pcm -i $(SAMPLES)/aac/al06_44.mp4
|
||||
fate-aac-al06_44: REF = $(SAMPLES)/aac/al06_44.s16
|
||||
|
||||
FATE_AAC += fate-aac-al07_96
|
||||
fate-aac-al07_96: CMD = pcm -i $(SAMPLES)/aac/al07_96.mp4
|
||||
fate-aac-al07_96: REF = $(SAMPLES)/aac/al07_96.s16
|
||||
|
||||
FATE_AAC += fate-aac-al17_44
|
||||
fate-aac-al17_44: CMD = pcm -i $(SAMPLES)/aac/al17_44.mp4
|
||||
fate-aac-al17_44: REF = $(SAMPLES)/aac/al17_44.s16
|
||||
|
||||
FATE_AAC += fate-aac-am00_88
|
||||
fate-aac-am00_88: CMD = pcm -i $(SAMPLES)/aac/am00_88.mp4
|
||||
fate-aac-am00_88: REF = $(SAMPLES)/aac/am00_88.s16
|
||||
|
Loading…
Reference in New Issue
Block a user