1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

Merge remote-tracking branch 'qatar/master'

* qatar/master: (35 commits)
  flvdec: Do not call parse_keyframes_index with a NULL stream
  libspeexdec: include system headers before local headers
  libspeexdec: return meaningful error codes
  libspeexdec: cosmetics: reindent
  libspeexdec: decode one frame at a time.
  swscale: fix signed shift overflows in ff_yuv2rgb_c_init_tables()
  Move timefilter code from lavf to lavd.
  mov: add support for hdvd and pgapmetadata atoms
  mov: rename function _stik, some indentation cosmetics
  mov: rename function _int8 to remove ambiguity, some indentation cosmetics
  mov: parse the gnre atom
  mp3on4: check for allocation failures in decode_init_mp3on4()
  mp3on4: create a separate flush function for MP3onMP4.
  mp3on4: ensure that the frame channel count does not exceed the codec channel count.
  mp3on4: set channel layout
  mp3on4: fix the output channel order
  mp3on4: allocate temp buffer with av_malloc() instead of on the stack.
  mp3on4: copy MPADSPContext from first context to all contexts.
  fmtconvert: port float_to_int16_interleave() 2-channel x86 inline asm to yasm
  fmtconvert: port int32_to_float_fmul_scalar() x86 inline asm to yasm
  ...

Conflicts:
	libavcodec/arm/h264dsp_init_arm.c
	libavcodec/h264.c
	libavcodec/h264.h
	libavcodec/h264_cabac.c
	libavcodec/h264_cavlc.c
	libavcodec/h264_ps.c
	libavcodec/h264dsp_template.c
	libavcodec/h264idct_template.c
	libavcodec/h264pred.c
	libavcodec/h264pred_template.c
	libavcodec/x86/h264dsp_mmx.c
	libavdevice/Makefile
	libavdevice/jack_audio.c
	libavformat/Makefile
	libavformat/flvdec.c
	libavformat/flvenc.c
	libavutil/pixfmt.h
	libswscale/utils.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2011-10-22 01:03:27 +02:00
commit aedc908601
47 changed files with 1112 additions and 991 deletions

View File

@ -67,6 +67,7 @@ easier to use. The changes are:
- aevalsrc audio source added - aevalsrc audio source added
- Ut Video decoder - Ut Video decoder
- Speex encoding via libspeex - Speex encoding via libspeex
- 4:2:2 H.264 decoding support
version 0.8: version 0.8:

View File

@ -32,47 +32,22 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0); int beta, int8_t *tc0);
void ff_weight_h264_pixels_16x16_neon(uint8_t *ds, int stride, int log2_den, void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height,
int weight, int offset); int log2_den, int weight, int offset);
void ff_weight_h264_pixels_16x8_neon(uint8_t *ds, int stride, int log2_den, void ff_weight_h264_pixels_8_neon(uint8_t *dst, int stride, int height,
int weight, int offset); int log2_den, int weight, int offset);
void ff_weight_h264_pixels_8x16_neon(uint8_t *ds, int stride, int log2_den, void ff_weight_h264_pixels_4_neon(uint8_t *dst, int stride, int height,
int weight, int offset); int log2_den, int weight, int offset);
void ff_weight_h264_pixels_8x8_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_8x4_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x8_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x4_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_weight_h264_pixels_4x2_neon(uint8_t *ds, int stride, int log2_den,
int weight, int offset);
void ff_biweight_h264_pixels_16x16_neon(uint8_t *dst, uint8_t *src, int stride, void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights, int height, int log2_den, int weightd,
int offset); int weights, int offset);
void ff_biweight_h264_pixels_16x8_neon(uint8_t *dst, uint8_t *src, int stride, void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights, int height, int log2_den, int weightd,
int offset); int weights, int offset);
void ff_biweight_h264_pixels_8x16_neon(uint8_t *dst, uint8_t *src, int stride, void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights, int height, int log2_den, int weightd,
int offset); int weights, int offset);
void ff_biweight_h264_pixels_8x8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_8x4_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x8_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x4_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_biweight_h264_pixels_4x2_neon(uint8_t *dst, uint8_t *src, int stride,
int log2_den, int weightd, int weights,
int offset);
void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride); void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride); void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
@ -101,23 +76,14 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const i
c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
} }
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16x16_neon;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_16x8_neon;
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_8x16_neon;
c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels_8x8_neon;
c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels_8x4_neon;
c->weight_h264_pixels_tab[5] = ff_weight_h264_pixels_4x8_neon;
c->weight_h264_pixels_tab[6] = ff_weight_h264_pixels_4x4_neon;
c->weight_h264_pixels_tab[7] = ff_weight_h264_pixels_4x2_neon;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16x16_neon; c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_16x8_neon; c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_8x16_neon; c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels_8x8_neon;
c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels_8x4_neon; c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
c->biweight_h264_pixels_tab[5] = ff_biweight_h264_pixels_4x8_neon; c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
c->biweight_h264_pixels_tab[6] = ff_biweight_h264_pixels_4x4_neon; c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
c->biweight_h264_pixels_tab[7] = ff_biweight_h264_pixels_4x2_neon;
c->h264_idct_add = ff_h264_idct_add_neon; c->h264_idct_add = ff_h264_idct_add_neon;
c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;

View File

@ -1592,7 +1592,7 @@ endfunc
vdup.8 d1, r5 vdup.8 d1, r5
vmov q2, q8 vmov q2, q8
vmov q3, q8 vmov q3, q8
1: subs ip, ip, #2 1: subs r3, r3, #2
vld1.8 {d20-d21},[r0,:128], r2 vld1.8 {d20-d21},[r0,:128], r2
\macd q2, d0, d20 \macd q2, d0, d20
pld [r0] pld [r0]
@ -1632,7 +1632,7 @@ endfunc
vdup.8 d1, r5 vdup.8 d1, r5
vmov q1, q8 vmov q1, q8
vmov q10, q8 vmov q10, q8
1: subs ip, ip, #2 1: subs r3, r3, #2
vld1.8 {d4},[r0,:64], r2 vld1.8 {d4},[r0,:64], r2
\macd q1, d0, d4 \macd q1, d0, d4
pld [r0] pld [r0]
@ -1662,7 +1662,7 @@ endfunc
vdup.8 d1, r5 vdup.8 d1, r5
vmov q1, q8 vmov q1, q8
vmov q10, q8 vmov q10, q8
1: subs ip, ip, #4 1: subs r3, r3, #4
vld1.32 {d4[0]},[r0,:32], r2 vld1.32 {d4[0]},[r0,:32], r2
vld1.32 {d4[1]},[r0,:32], r2 vld1.32 {d4[1]},[r0,:32], r2
\macd q1, d0, d4 \macd q1, d0, d4
@ -1700,16 +1700,17 @@ endfunc
.endm .endm
.macro biweight_func w .macro biweight_func w
function biweight_h264_pixels_\w\()_neon function ff_biweight_h264_pixels_\w\()_neon, export=1
push {r4-r6, lr} push {r4-r6, lr}
add r4, sp, #16 ldr r12, [sp, #16]
add r4, sp, #20
ldm r4, {r4-r6} ldm r4, {r4-r6}
lsr lr, r4, #31 lsr lr, r4, #31
add r6, r6, #1 add r6, r6, #1
eors lr, lr, r5, lsr #30 eors lr, lr, r5, lsr #30
orr r6, r6, #1 orr r6, r6, #1
vdup.16 q9, r3 vdup.16 q9, r12
lsl r6, r6, r3 lsl r6, r6, r12
vmvn q9, q9 vmvn q9, q9
vdup.16 q8, r6 vdup.16 q8, r6
mov r6, r0 mov r6, r0
@ -1730,34 +1731,15 @@ function biweight_h264_pixels_\w\()_neon
endfunc endfunc
.endm .endm
.macro biweight_entry w, h, b=1
function ff_biweight_h264_pixels_\w\()x\h\()_neon, export=1
mov ip, #\h
.if \b
b biweight_h264_pixels_\w\()_neon
.endif
endfunc
.endm
biweight_entry 16, 8
biweight_entry 16, 16, b=0
biweight_func 16 biweight_func 16
biweight_entry 8, 16
biweight_entry 8, 4
biweight_entry 8, 8, b=0
biweight_func 8 biweight_func 8
biweight_entry 4, 8
biweight_entry 4, 2
biweight_entry 4, 4, b=0
biweight_func 4 biweight_func 4
@ Weighted prediction @ Weighted prediction
.macro weight_16 add .macro weight_16 add
vdup.8 d0, r3 vdup.8 d0, r12
1: subs ip, ip, #2 1: subs r2, r2, #2
vld1.8 {d20-d21},[r0,:128], r1 vld1.8 {d20-d21},[r0,:128], r1
vmull.u8 q2, d0, d20 vmull.u8 q2, d0, d20
pld [r0] pld [r0]
@ -1785,8 +1767,8 @@ endfunc
.endm .endm
.macro weight_8 add .macro weight_8 add
vdup.8 d0, r3 vdup.8 d0, r12
1: subs ip, ip, #2 1: subs r2, r2, #2
vld1.8 {d4},[r0,:64], r1 vld1.8 {d4},[r0,:64], r1
vmull.u8 q1, d0, d4 vmull.u8 q1, d0, d4
pld [r0] pld [r0]
@ -1806,10 +1788,10 @@ endfunc
.endm .endm
.macro weight_4 add .macro weight_4 add
vdup.8 d0, r3 vdup.8 d0, r12
vmov q1, q8 vmov q1, q8
vmov q10, q8 vmov q10, q8
1: subs ip, ip, #4 1: subs r2, r2, #4
vld1.32 {d4[0]},[r0,:32], r1 vld1.32 {d4[0]},[r0,:32], r1
vld1.32 {d4[1]},[r0,:32], r1 vld1.32 {d4[1]},[r0,:32], r1
vmull.u8 q1, d0, d4 vmull.u8 q1, d0, d4
@ -1842,50 +1824,32 @@ endfunc
.endm .endm
.macro weight_func w .macro weight_func w
function weight_h264_pixels_\w\()_neon function ff_weight_h264_pixels_\w\()_neon, export=1
push {r4, lr} push {r4, lr}
ldr r4, [sp, #8] ldr r12, [sp, #8]
cmp r2, #1 ldr r4, [sp, #12]
lsl r4, r4, r2 cmp r3, #1
lsl r4, r4, r3
vdup.16 q8, r4 vdup.16 q8, r4
mov r4, r0 mov r4, r0
ble 20f ble 20f
rsb lr, r2, #1 rsb lr, r3, #1
vdup.16 q9, lr vdup.16 q9, lr
cmp r3, #0 cmp r12, #0
blt 10f blt 10f
weight_\w vhadd.s16 weight_\w vhadd.s16
10: rsb r3, r3, #0 10: rsb r12, r12, #0
weight_\w vhsub.s16 weight_\w vhsub.s16
20: rsb lr, r2, #0 20: rsb lr, r3, #0
vdup.16 q9, lr vdup.16 q9, lr
cmp r3, #0 cmp r12, #0
blt 10f blt 10f
weight_\w vadd.s16 weight_\w vadd.s16
10: rsb r3, r3, #0 10: rsb r12, r12, #0
weight_\w vsub.s16 weight_\w vsub.s16
endfunc endfunc
.endm .endm
.macro weight_entry w, h, b=1
function ff_weight_h264_pixels_\w\()x\h\()_neon, export=1
mov ip, #\h
.if \b
b weight_h264_pixels_\w\()_neon
.endif
endfunc
.endm
weight_entry 16, 8
weight_entry 16, 16, b=0
weight_func 16 weight_func 16
weight_entry 8, 16
weight_entry 8, 4
weight_entry 8, 8, b=0
weight_func 8 weight_func 8
weight_entry 4, 8
weight_entry 4, 2
weight_entry 4, 4, b=0
weight_func 4 weight_func 4

View File

@ -70,7 +70,15 @@ typedef struct FmtConvertContext {
long len, int channels); long len, int channels);
/** /**
* Convert an array of interleaved float to multiple arrays of float. * Convert multiple arrays of float to an array of interleaved float.
*
* @param dst destination array of interleaved float.
* constraints: 16-byte aligned
* @param src source array of float arrays, one for each channel.
* constraints: 16-byte aligned
* @param len number of elements to convert.
* constraints: multiple of 8
* @param channels number of channels
*/ */
void (*float_interleave)(float *dst, const float **src, unsigned int len, void (*float_interleave)(float *dst, const float **src, unsigned int len,
int channels); int channels);

View File

@ -460,11 +460,14 @@ static void chroma_dc_dct_c(DCTELEM *block){
} }
#endif #endif
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, static av_always_inline void
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, mc_dir_part(H264Context *h, Picture *pic, int n, int square,
int src_x_offset, int src_y_offset, int height, int delta, int list,
qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int pixel_shift, int chroma444){ int src_x_offset, int src_y_offset,
qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
int pixel_shift, int chroma_idc)
{
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
@ -479,6 +482,7 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
const int full_my= my>>2; const int full_my= my>>2;
const int pic_width = 16*s->mb_width; const int pic_width = 16*s->mb_width;
const int pic_height = 16*s->mb_height >> MB_FIELD; const int pic_height = 16*s->mb_height >> MB_FIELD;
int ysh;
if(mx&7) extra_width -= 3; if(mx&7) extra_width -= 3;
if(my&7) extra_height -= 3; if(my&7) extra_height -= 3;
@ -487,7 +491,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
|| full_my < 0-extra_height || full_my < 0-extra_height
|| full_mx + 16/*FIXME*/ > pic_width + extra_width || full_mx + 16/*FIXME*/ > pic_width + extra_width
|| full_my + 16/*FIXME*/ > pic_height + extra_height){ || full_my + 16/*FIXME*/ > pic_height + extra_height){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize; src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
emu=1; emu=1;
} }
@ -499,7 +504,7 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return; if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
if(chroma444){ if(chroma_idc == 3 /* yuv444 */){
src_cb = pic->f.data[1] + offset; src_cb = pic->f.data[1] + offset;
if(emu){ if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
@ -524,42 +529,55 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
return; return;
} }
if(MB_FIELD){ ysh = 3 - (chroma_idc == 2 /* yuv422 */);
if(chroma_idc == 1 /* yuv420 */ && MB_FIELD){
// chroma offset when predicting from a field of opposite parity // chroma offset when predicting from a field of opposite parity
my += 2 * ((s->mb_y & 1) - (pic->f.reference - 1)); my += 2 * ((s->mb_y & 1) - (pic->f.reference - 1));
emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
} }
src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + (my >> 3) * h->mb_uvlinesize;
src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + (my >> 3) * h->mb_uvlinesize; src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize;
src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize;
if(emu){ if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
src_cb= s->edge_emu_buffer; src_cb= s->edge_emu_buffer;
} }
chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7); chroma_op(dest_cb, src_cb, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7);
if(emu){ if(emu){
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
src_cr= s->edge_emu_buffer; src_cr= s->edge_emu_buffer;
} }
chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7); chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7);
} }
static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta, static av_always_inline void
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, mc_part_std(H264Context *h, int n, int square, int height, int delta,
int x_offset, int y_offset, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, int x_offset, int y_offset,
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
int list0, int list1, int pixel_shift, int chroma444){ qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
int list0, int list1, int pixel_shift, int chroma_idc)
{
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
qpel_mc_func *qpix_op= qpix_put; qpel_mc_func *qpix_op= qpix_put;
h264_chroma_mc_func chroma_op= chroma_put; h264_chroma_mc_func chroma_op= chroma_put;
dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
if(chroma444){ if (chroma_idc == 3 /* yuv444 */) {
dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
}else{ } else if (chroma_idc == 2 /* yuv422 */) {
dest_cb += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
dest_cr += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
} else /* yuv420 */ {
dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
} }
@ -568,9 +586,9 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
if(list0){ if(list0){
Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
mc_dir_part(h, ref, n, square, chroma_height, delta, 0, mc_dir_part(h, ref, n, square, height, delta, 0,
dest_y, dest_cb, dest_cr, x_offset, y_offset, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_op, chroma_op, pixel_shift, chroma444); qpix_op, chroma_op, pixel_shift, chroma_idc);
qpix_op= qpix_avg; qpix_op= qpix_avg;
chroma_op= chroma_avg; chroma_op= chroma_avg;
@ -578,28 +596,36 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
if(list1){ if(list1){
Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
mc_dir_part(h, ref, n, square, chroma_height, delta, 1, mc_dir_part(h, ref, n, square, height, delta, 1,
dest_y, dest_cb, dest_cr, x_offset, y_offset, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_op, chroma_op, pixel_shift, chroma444); qpix_op, chroma_op, pixel_shift, chroma_idc);
} }
} }
static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta, static av_always_inline void
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, mc_part_weighted(H264Context *h, int n, int square, int height, int delta,
int x_offset, int y_offset, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, int x_offset, int y_offset,
h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
int list0, int list1, int pixel_shift, int chroma444){ h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
int list0, int list1, int pixel_shift, int chroma_idc){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
int chroma_height;
dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
if(chroma444){ if (chroma_idc == 3 /* yuv444 */) {
chroma_height = height;
chroma_weight_avg = luma_weight_avg; chroma_weight_avg = luma_weight_avg;
chroma_weight_op = luma_weight_op; chroma_weight_op = luma_weight_op;
dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
}else{ } else if (chroma_idc == 2 /* yuv422 */) {
chroma_height = height;
dest_cb += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
dest_cr += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
} else /* yuv420 */ {
chroma_height = height >> 1;
dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;
} }
@ -615,27 +641,32 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
int refn0 = h->ref_cache[0][ scan8[n] ]; int refn0 = h->ref_cache[0][ scan8[n] ];
int refn1 = h->ref_cache[1][ scan8[n] ]; int refn1 = h->ref_cache[1][ scan8[n] ];
mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
dest_y, dest_cb, dest_cr, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444); x_offset, y_offset, qpix_put, chroma_put,
mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, pixel_shift, chroma_idc);
mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
tmp_y, tmp_cb, tmp_cr, tmp_y, tmp_cb, tmp_cr,
x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444); x_offset, y_offset, qpix_put, chroma_put,
pixel_shift, chroma_idc);
if(h->use_weight == 2){ if(h->use_weight == 2){
int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1]; int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
int weight1 = 64 - weight0; int weight1 = 64 - weight0;
luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0); luma_weight_avg( dest_y, tmp_y, h-> mb_linesize,
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0); height, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0); chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
chroma_height, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
chroma_height, 5, weight0, weight1, 0);
}else{ }else{
luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom, luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, h->luma_log2_weight_denom,
h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0], h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]); h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0], h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]); h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0], h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]); h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
} }
@ -643,42 +674,46 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
int list = list1 ? 1 : 0; int list = list1 ? 1 : 0;
int refn = h->ref_cache[list][ scan8[n] ]; int refn = h->ref_cache[list][ scan8[n] ];
Picture *ref= &h->ref_list[list][refn]; Picture *ref= &h->ref_list[list][refn];
mc_dir_part(h, ref, n, square, chroma_height, delta, list, mc_dir_part(h, ref, n, square, height, delta, list,
dest_y, dest_cb, dest_cr, x_offset, y_offset, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put, chroma_put, pixel_shift, chroma444); qpix_put, chroma_put, pixel_shift, chroma_idc);
luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom, luma_weight_op(dest_y, h->mb_linesize, height, h->luma_log2_weight_denom,
h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]); h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
if(h->use_weight_chroma){ if(h->use_weight_chroma){
chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]); h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]); h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
} }
} }
} }
static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta, static av_always_inline void
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, mc_part(H264Context *h, int n, int square, int height, int delta,
int x_offset, int y_offset, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, int x_offset, int y_offset,
qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
h264_weight_func *weight_op, h264_biweight_func *weight_avg, qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
int list0, int list1, int pixel_shift, int chroma444){ h264_weight_func *weight_op, h264_biweight_func *weight_avg,
int list0, int list1, int pixel_shift, int chroma_idc)
{
if((h->use_weight==2 && list0 && list1 if((h->use_weight==2 && list0 && list1
&& (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32)) && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
|| h->use_weight==1) || h->use_weight==1)
mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put, x_offset, y_offset, qpix_put, chroma_put,
weight_op[0], weight_op[3], weight_avg[0], weight_op[0], weight_op[1], weight_avg[0],
weight_avg[3], list0, list1, pixel_shift, chroma444); weight_avg[1], list0, list1, pixel_shift, chroma_idc);
else else
mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, mc_part_std(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put, qpix_avg, x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
chroma_avg, list0, list1, pixel_shift, chroma444); chroma_avg, list0, list1, pixel_shift, chroma_idc);
} }
static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){ static av_always_inline void
prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma_idc)
{
/* fetch pixels for estimated mv 4 macroblocks ahead /* fetch pixels for estimated mv 4 macroblocks ahead
* optimized for 64byte cache lines */ * optimized for 64byte cache lines */
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
@ -689,7 +724,7 @@ static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, in
uint8_t **src = h->ref_list[list][refn].f.data; uint8_t **src = h->ref_list[list][refn].f.data;
int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift); int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
s->dsp.prefetch(src[0]+off, s->linesize, 4); s->dsp.prefetch(src[0]+off, s->linesize, 4);
if(chroma444){ if (chroma_idc == 3 /* yuv444 */) {
s->dsp.prefetch(src[1]+off, s->linesize, 4); s->dsp.prefetch(src[1]+off, s->linesize, 4);
s->dsp.prefetch(src[2]+off, s->linesize, 4); s->dsp.prefetch(src[2]+off, s->linesize, 4);
}else{ }else{
@ -703,7 +738,8 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
h264_weight_func *weight_op, h264_biweight_func *weight_avg, h264_weight_func *weight_op, h264_biweight_func *weight_avg,
int pixel_shift, int chroma444){ int pixel_shift, int chroma_idc)
{
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy; const int mb_xy= h->mb_xy;
const int mb_type = s->current_picture.f.mb_type[mb_xy]; const int mb_type = s->current_picture.f.mb_type[mb_xy];
@ -712,36 +748,36 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
if(HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME)) if(HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
await_references(h); await_references(h);
prefetch_motion(h, 0, pixel_shift, chroma444); prefetch_motion(h, 0, pixel_shift, chroma_idc);
if(IS_16X16(mb_type)){ if(IS_16X16(mb_type)){
mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
weight_op, weight_avg, weight_op, weight_avg,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma_idc);
}else if(IS_16X8(mb_type)){ }else if(IS_16X8(mb_type)){
mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0, mc_part(h, 0, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
&weight_op[1], &weight_avg[1], weight_op, weight_avg,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma_idc);
mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4, mc_part(h, 8, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
&weight_op[1], &weight_avg[1], weight_op, weight_avg,
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
pixel_shift, chroma444); pixel_shift, chroma_idc);
}else if(IS_8X16(mb_type)){ }else if(IS_8X16(mb_type)){
mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, mc_part(h, 0, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[2], &weight_avg[2], &weight_op[1], &weight_avg[1],
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma_idc);
mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, mc_part(h, 4, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[2], &weight_avg[2], &weight_op[1], &weight_avg[1],
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
pixel_shift, chroma444); pixel_shift, chroma_idc);
}else{ }else{
int i; int i;
@ -754,50 +790,72 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
int y_offset= (i&2)<<1; int y_offset= (i&2)<<1;
if(IS_SUB_8X8(sub_mb_type)){ if(IS_SUB_8X8(sub_mb_type)){
mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[3], &weight_avg[3], &weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma_idc);
}else if(IS_SUB_8X4(sub_mb_type)){ }else if(IS_SUB_8X4(sub_mb_type)){
mc_part(h, n , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset, mc_part(h, n , 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
&weight_op[4], &weight_avg[4], &weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma_idc);
mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, mc_part(h, n+2, 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
&weight_op[4], &weight_avg[4], &weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma_idc);
}else if(IS_SUB_4X8(sub_mb_type)){ }else if(IS_SUB_4X8(sub_mb_type)){
mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, mc_part(h, n , 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[5], &weight_avg[5], &weight_op[2], &weight_avg[2],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma_idc);
mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, mc_part(h, n+1, 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[5], &weight_avg[5], &weight_op[2], &weight_avg[2],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma_idc);
}else{ }else{
int j; int j;
assert(IS_SUB_4X4(sub_mb_type)); assert(IS_SUB_4X4(sub_mb_type));
for(j=0; j<4; j++){ for(j=0; j<4; j++){
int sub_x_offset= x_offset + 2*(j&1); int sub_x_offset= x_offset + 2*(j&1);
int sub_y_offset= y_offset + (j&2); int sub_y_offset= y_offset + (j&2);
mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, mc_part(h, n+j, 1, 4, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[6], &weight_avg[6], &weight_op[2], &weight_avg[2],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma444); pixel_shift, chroma_idc);
} }
} }
} }
} }
prefetch_motion(h, 1, pixel_shift, chroma444); prefetch_motion(h, 1, pixel_shift, chroma_idc);
}
static av_always_inline void
hl_motion_420(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
int pixel_shift)
{
hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 1);
}
static av_always_inline void
hl_motion_422(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
int pixel_shift)
{
hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 2);
} }
static void free_tables(H264Context *h, int free_rbsp){ static void free_tables(H264Context *h, int free_rbsp){
@ -1468,7 +1526,10 @@ static void decode_postinit(H264Context *h, int setup_finished){
ff_thread_finish_setup(s->avctx); ff_thread_finish_setup(s->avctx);
} }
static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y,
uint8_t *src_cb, uint8_t *src_cr,
int linesize, int uvlinesize, int simple)
{
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
uint8_t *top_border; uint8_t *top_border;
int top_idx = 1; int top_idx = 1;
@ -1813,7 +1874,8 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
} }
} }
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift)
{
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
const int mb_x= s->mb_x; const int mb_x= s->mb_x;
const int mb_y= s->mb_y; const int mb_y= s->mb_y;
@ -1827,7 +1889,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
/* is_h264 should always be true if SVQ3 is disabled. */ /* is_h264 should always be true if SVQ3 is disabled. */
const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264; const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
const int block_h = 16>>s->chroma_y_shift; const int block_h = 16 >> s->chroma_y_shift;
const int chroma422 = CHROMA422;
dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16;
dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h; dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h;
@ -1844,8 +1907,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
block_offset = &h->block_offset[48]; block_offset = &h->block_offset[48];
if(mb_y&1){ //FIXME move out of this function? if(mb_y&1){ //FIXME move out of this function?
dest_y -= s->linesize*15; dest_y -= s->linesize*15;
dest_cb-= s->uvlinesize*(block_h-1); dest_cb-= s->uvlinesize * (block_h - 1);
dest_cr-= s->uvlinesize*(block_h-1); dest_cr-= s->uvlinesize * (block_h - 1);
} }
if(FRAME_MBAFF) { if(FRAME_MBAFF) {
int list; int list;
@ -1884,7 +1947,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
} }
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if (!h->sps.chroma_format_idc) { if (!h->sps.chroma_format_idc) {
for (i = 0; i < 8; i++) { for (i = 0; i < block_h; i++) {
uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
for (j = 0; j < 8; j++) { for (j = 0; j < 8; j++) {
@ -1911,13 +1974,13 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if (!h->sps.chroma_format_idc) { if (!h->sps.chroma_format_idc) {
for (i=0; i<8; i++) { for (i=0; i<8; i++) {
memset(dest_cb+ i*uvlinesize, 1 << (bit_depth - 1), 8); memset(dest_cb + i*uvlinesize, 1 << (bit_depth - 1), 8);
memset(dest_cr+ i*uvlinesize, 1 << (bit_depth - 1), 8); memset(dest_cr + i*uvlinesize, 1 << (bit_depth - 1), 8);
} }
} else { } else {
for (i=0; i<block_h; i++) { for (i=0; i<block_h; i++) {
memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8); memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4, 8);
memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8); memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4, 8);
} }
} }
} }
@ -1937,11 +2000,21 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
if(h->deblocking_filter) if(h->deblocking_filter)
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift); xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
}else if(is_h264){ }else if(is_h264){
hl_motion(h, dest_y, dest_cb, dest_cr, if (chroma422) {
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, hl_motion_422(h, dest_y, dest_cb, dest_cr,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab, s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 0); h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab,
pixel_shift);
} else {
hl_motion_420(h, dest_y, dest_cb, dest_cr,
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab,
pixel_shift);
}
} }
hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0); hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
@ -1959,14 +2032,20 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
idct_add (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize); idct_add (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
} }
if (chroma422) {
for(i=j*16+4; i<j*16+8; i++){
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
idct_add (dest[j-1] + block_offset[i+4], h->mb + (i*16 << pixel_shift), uvlinesize);
}
}
} }
} }
}else{ }else{
if(is_h264){ if(is_h264){
int qp[2]; int qp[2];
if (CHROMA422) { if (chroma422) {
qp[0] = h->chroma_qp[0]+3; qp[0] = h->chroma_qp[0] + 3;
qp[1] = h->chroma_qp[1]+3; qp[1] = h->chroma_qp[1] + 3;
} else { } else {
qp[0] = h->chroma_qp[0]; qp[0] = h->chroma_qp[0];
qp[1] = h->chroma_qp[1]; qp[1] = h->chroma_qp[1];
@ -2086,7 +2165,7 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simpl
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab, h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 1); h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 3);
} }
for (p = 0; p < plane_count; p++) for (p = 0; p < plane_count; p++)
@ -2690,6 +2769,8 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
case 9 : case 9 :
if (CHROMA444) if (CHROMA444)
s->avctx->pix_fmt = PIX_FMT_YUV444P9; s->avctx->pix_fmt = PIX_FMT_YUV444P9;
else if (CHROMA422)
s->avctx->pix_fmt = PIX_FMT_YUV422P9;
else else
s->avctx->pix_fmt = PIX_FMT_YUV420P9; s->avctx->pix_fmt = PIX_FMT_YUV420P9;
break; break;
@ -2708,7 +2789,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
s->avctx->pix_fmt = PIX_FMT_GBR24P; s->avctx->pix_fmt = PIX_FMT_GBR24P;
av_log(h->s.avctx, AV_LOG_DEBUG, "Detected GBR colorspace.\n"); av_log(h->s.avctx, AV_LOG_DEBUG, "Detected GBR colorspace.\n");
} }
}else if (CHROMA422) { } else if (CHROMA422) {
s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P : PIX_FMT_YUV422P; s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P : PIX_FMT_YUV422P;
}else{ }else{
s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
@ -3384,7 +3465,7 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
const int end_mb_y= s->mb_y + FRAME_MBAFF; const int end_mb_y= s->mb_y + FRAME_MBAFF;
const int old_slice_type= h->slice_type; const int old_slice_type= h->slice_type;
const int pixel_shift = h->pixel_shift; const int pixel_shift = h->pixel_shift;
const int block_h = 16>>s->chroma_y_shift; const int block_h = 16 >> s->chroma_y_shift;
if(h->deblocking_filter) { if(h->deblocking_filter) {
for(mb_x= start_x; mb_x<end_x; mb_x++){ for(mb_x= start_x; mb_x<end_x; mb_x++){
@ -3401,8 +3482,8 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
s->mb_x= mb_x; s->mb_x= mb_x;
s->mb_y= mb_y; s->mb_y= mb_y;
dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16;
dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*(8<<CHROMA444) + mb_y * s->uvlinesize * block_h; dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h;
dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*(8<<CHROMA444) + mb_y * s->uvlinesize * block_h; dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h;
//FIXME simplify above //FIXME simplify above
if (MB_FIELD) { if (MB_FIELD) {
@ -3410,8 +3491,8 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
if(mb_y&1){ //FIXME move out of this function? if(mb_y&1){ //FIXME move out of this function?
dest_y -= s->linesize*15; dest_y -= s->linesize*15;
dest_cb-= s->uvlinesize*(block_h-1); dest_cb-= s->uvlinesize * (block_h - 1);
dest_cr-= s->uvlinesize*(block_h-1); dest_cr-= s->uvlinesize * (block_h - 1);
} }
} else { } else {
linesize = h->mb_linesize = s->linesize; linesize = h->mb_linesize = s->linesize;

View File

@ -1565,7 +1565,12 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
}; };
static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) { static av_always_inline void
decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
const uint32_t *qmul, int max_coeff,
int is_dc, int chroma422)
{
static const int significant_coeff_flag_offset[2][14] = { static const int significant_coeff_flag_offset[2][14] = {
{ 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 }, { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 },
{ 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 } { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 }
@ -1593,7 +1598,10 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
* map node ctx => cabac ctx for level=1 */ * map node ctx => cabac ctx for level=1 */
static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 }; static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
/* map node ctx => cabac ctx for level>1 */ /* map node ctx => cabac ctx for level>1 */
static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 }; static const uint8_t coeff_abs_levelgt1_ctx[2][8] = {
{ 5, 5, 5, 5, 6, 7, 8, 9 },
{ 5, 5, 5, 5, 6, 7, 8, 8 }, // 422/dc case
};
static const uint8_t coeff_abs_level_transition[2][8] = { static const uint8_t coeff_abs_level_transition[2][8] = {
/* update node ctx after decoding a level=1 */ /* update node ctx after decoding a level=1 */
{ 1, 2, 3, 3, 4, 5, 6, 7 }, { 1, 2, 3, 3, 4, 5, 6, 7 },
@ -1652,7 +1660,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
last_coeff_ctx_base, sig_off); last_coeff_ctx_base, sig_off);
} else { } else {
if (is_dc && max_coeff == 8) { // dc 422 if (is_dc && chroma422) { // dc 422
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
} else { } else {
coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
@ -1661,7 +1669,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
#else #else
DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
} else { } else {
if (is_dc && max_coeff == 8) { // dc 422 if (is_dc && chroma422) { // dc 422
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
} else { } else {
DECODE_SIGNIFICANCE(max_coeff - 1, last, last); DECODE_SIGNIFICANCE(max_coeff - 1, last, last);
@ -1701,9 +1709,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
} \ } \
} else { \ } else { \
int coeff_abs = 2; \ int coeff_abs = 2; \
if (is_dc && max_coeff == 8) \ ctx = coeff_abs_levelgt1_ctx[is_dc && chroma422][node_ctx] + abs_level_m1_ctx_base; \
node_ctx = FFMIN(node_ctx, 6); \
ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \
node_ctx = coeff_abs_level_transition[1][node_ctx]; \ node_ctx = coeff_abs_level_transition[1][node_ctx]; \
\ \
while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \ while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \
@ -1745,11 +1751,18 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
} }
static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) { static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1); decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 0);
}
static void decode_cabac_residual_dc_internal_422(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
int max_coeff)
{
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 1);
} }
static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0); decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0, 0);
} }
/* cat: 0-> DC 16x16 n = 0 /* cat: 0-> DC 16x16 n = 0
@ -1773,6 +1786,19 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM *
decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff ); decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff );
} }
static av_always_inline void
decode_cabac_residual_dc_422(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
int max_coeff)
{
/* read coded block flag */
if (get_cabac(&h->cabac, &h->cabac_state[get_cabac_cbf_ctx(h, cat, n, max_coeff, 1)]) == 0) {
h->non_zero_count_cache[scan8[n]] = 0;
return;
}
decode_cabac_residual_dc_internal_422(h, block, cat, n, scantable, max_coeff);
}
static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
/* read coded block flag */ /* read coded block flag */
if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) { if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) {
@ -2325,17 +2351,14 @@ decode_intra_mb:
if(CHROMA444){ if(CHROMA444){
decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1); decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1);
decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2); decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2);
} else { } else if (CHROMA422) {
const int num_c8x8 = h->sps.chroma_format_idc;
if( cbp&0x30 ){ if( cbp&0x30 ){
int c; int c;
for( c = 0; c < 2; c++ ) { for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, decode_cabac_residual_dc_422(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3,
CHROMA_DC_BLOCK_INDEX+c, CHROMA_DC_BLOCK_INDEX + c,
CHROMA422 ? chroma422_dc_scan : chroma_dc_scan, chroma422_dc_scan, 8);
4*num_c8x8);
} }
} }
@ -2344,7 +2367,7 @@ decode_intra_mb:
for( c = 0; c < 2; c++ ) { for( c = 0; c < 2; c++ ) {
DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift); DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift);
qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]]; qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
for (i8x8 = 0; i8x8 < num_c8x8; i8x8++) { for (i8x8 = 0; i8x8 < 2; i8x8++) {
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
const int index = 16 + 16 * c + 8*i8x8 + i; const int index = 16 + 16 * c + 8*i8x8 + i;
//av_log(s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16); //av_log(s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16);
@ -2357,6 +2380,29 @@ decode_intra_mb:
fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
} }
} else /* yuv420 */ {
if( cbp&0x30 ){
int c;
for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
decode_cabac_residual_dc(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
}
}
if( cbp&0x20 ) {
int c, i;
for( c = 0; c < 2; c++ ) {
qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
for( i = 0; i < 4; i++ ) {
const int index = 16 + 16 * c + i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
decode_cabac_residual_nondc(h, h->mb + (16*index << pixel_shift), 4, index, scan + 1, qmul, 15);
}
}
} else {
fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
}
} }
} else { } else {
fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1); fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);

View File

@ -415,7 +415,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
#endif #endif
sps->crop= get_bits1(&s->gb); sps->crop= get_bits1(&s->gb);
if(sps->crop){ if(sps->crop){
int crop_vertical_limit = sps->chroma_format_idc & 2 ? 16 : 8; int crop_vertical_limit = sps->chroma_format_idc & 2 ? 16 : 8;
int crop_horizontal_limit = sps->chroma_format_idc == 3 ? 16 : 8; int crop_horizontal_limit = sps->chroma_format_idc == 3 ? 16 : 8;
sps->crop_left = get_ue_golomb(&s->gb); sps->crop_left = get_ue_golomb(&s->gb);
sps->crop_right = get_ue_golomb(&s->gb); sps->crop_right = get_ue_golomb(&s->gb);

View File

@ -64,26 +64,14 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_fo
else\ else\
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\ c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
\ \
c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\ c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16, depth);\
c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\ c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels8, depth);\
c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels8x16, depth);\ c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels4, depth);\
c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels8x8, depth);\ c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels2, depth);\
c->weight_h264_pixels_tab[4]= FUNC(weight_h264_pixels8x4, depth);\ c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16, depth);\
c->weight_h264_pixels_tab[5]= FUNC(weight_h264_pixels4x8, depth);\ c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels8, depth);\
c->weight_h264_pixels_tab[6]= FUNC(weight_h264_pixels4x4, depth);\ c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels4, depth);\
c->weight_h264_pixels_tab[7]= FUNC(weight_h264_pixels4x2, depth);\ c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels2, depth);\
c->weight_h264_pixels_tab[8]= FUNC(weight_h264_pixels2x4, depth);\
c->weight_h264_pixels_tab[9]= FUNC(weight_h264_pixels2x2, depth);\
c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16x16, depth);\
c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels16x8, depth);\
c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels8x16, depth);\
c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels8x8, depth);\
c->biweight_h264_pixels_tab[4]= FUNC(biweight_h264_pixels8x4, depth);\
c->biweight_h264_pixels_tab[5]= FUNC(biweight_h264_pixels4x8, depth);\
c->biweight_h264_pixels_tab[6]= FUNC(biweight_h264_pixels4x4, depth);\
c->biweight_h264_pixels_tab[7]= FUNC(biweight_h264_pixels4x2, depth);\
c->biweight_h264_pixels_tab[8]= FUNC(biweight_h264_pixels2x4, depth);\
c->biweight_h264_pixels_tab[9]= FUNC(biweight_h264_pixels2x2, depth);\
\ \
c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\ c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\
c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\ c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\

View File

@ -31,16 +31,18 @@
#include "dsputil.h" #include "dsputil.h"
//typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); //typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset); typedef void (*h264_weight_func)(uint8_t *block, int stride, int height,
typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset); int log2_denom, int weight, int offset);
typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int height,
int log2_denom, int weightd, int weights, int offset);
/** /**
* Context for storing H.264 DSP functions * Context for storing H.264 DSP functions
*/ */
typedef struct H264DSPContext{ typedef struct H264DSPContext{
/* weighted MC */ /* weighted MC */
h264_weight_func weight_h264_pixels_tab[10]; h264_weight_func weight_h264_pixels_tab[4];
h264_biweight_func biweight_h264_pixels_tab[10]; h264_biweight_func biweight_h264_pixels_tab[4];
/* loop filter */ /* loop filter */
void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);

View File

@ -29,14 +29,16 @@
#define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom ) #define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom )
#define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) #define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
#define H264_WEIGHT(W,H) \ #define H264_WEIGHT(W) \
static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *p_block, int stride, int log2_denom, int weight, int offset){ \ static void FUNCC(weight_h264_pixels ## W)(uint8_t *_block, int stride, int height, \
int log2_denom, int weight, int offset) \
{ \
int y; \ int y; \
pixel *block = (pixel*)p_block; \ pixel *block = (pixel*)_block; \
stride >>= sizeof(pixel)-1; \ stride >>= sizeof(pixel)-1; \
offset <<= (log2_denom + (BIT_DEPTH-8)); \ offset <<= (log2_denom + (BIT_DEPTH-8)); \
if(log2_denom) offset += 1<<(log2_denom-1); \ if(log2_denom) offset += 1<<(log2_denom-1); \
for(y=0; y<H; y++, block += stride){ \ for (y = 0; y < height; y++, block += stride) { \
op_scale1(0); \ op_scale1(0); \
op_scale1(1); \ op_scale1(1); \
if(W==2) continue; \ if(W==2) continue; \
@ -58,14 +60,16 @@ static void FUNCC(weight_h264_pixels ## W ## x ## H)(uint8_t *p_block, int strid
op_scale1(15); \ op_scale1(15); \
} \ } \
} \ } \
static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_src, int stride, int log2_denom, int weightd, int weights, int offset){ \ static void FUNCC(biweight_h264_pixels ## W)(uint8_t *_dst, uint8_t *_src, int stride, int height, \
int log2_denom, int weightd, int weights, int offset) \
{ \
int y; \ int y; \
pixel *dst = (pixel*)_dst; \ pixel *dst = (pixel*)_dst; \
pixel *src = (pixel*)_src; \ pixel *src = (pixel*)_src; \
stride >>= sizeof(pixel)-1; \ stride >>= sizeof(pixel)-1; \
offset <<= (BIT_DEPTH-8); \ offset <<= (BIT_DEPTH-8); \
offset = ((offset + 1) | 1) << log2_denom; \ offset = ((offset + 1) | 1) << log2_denom; \
for(y=0; y<H; y++, dst += stride, src += stride){ \ for (y = 0; y < height; y++, dst += stride, src += stride) { \
op_scale2(0); \ op_scale2(0); \
op_scale2(1); \ op_scale2(1); \
if(W==2) continue; \ if(W==2) continue; \
@ -88,16 +92,10 @@ static void FUNCC(biweight_h264_pixels ## W ## x ## H)(uint8_t *_dst, uint8_t *_
} \ } \
} }
H264_WEIGHT(16,16) H264_WEIGHT(16)
H264_WEIGHT(16,8) H264_WEIGHT(8)
H264_WEIGHT(8,16) H264_WEIGHT(4)
H264_WEIGHT(8,8) H264_WEIGHT(2)
H264_WEIGHT(8,4)
H264_WEIGHT(4,8)
H264_WEIGHT(4,4)
H264_WEIGHT(4,2)
H264_WEIGHT(2,4)
H264_WEIGHT(2,2)
#undef op_scale1 #undef op_scale1
#undef op_scale2 #undef op_scale2

View File

@ -228,16 +228,6 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *
void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){ void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
int i, j; int i, j;
#if 0
av_log(NULL, AV_LOG_INFO, "idct\n");
int32_t *b = block;
for (int i = 0; i < 256; i++) {
av_log(NULL, AV_LOG_INFO, "%5d ", b[i+256]);
if (!((i+1) % 16))
av_log(NULL, AV_LOG_INFO, "\n");
}
#endif
for(j=1; j<3; j++){ for(j=1; j<3; j++){
for(i=j*16; i<j*16+4; i++){ for(i=j*16; i<j*16+4; i++){
if(nnzc[ scan8[i] ]) if(nnzc[ scan8[i] ])
@ -296,13 +286,13 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *p_output, DCTELEM *p_input, in
#undef stride #undef stride
} }
void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *p_block, int qmul){ void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *_block, int qmul){
const int stride= 16*2; const int stride= 16*2;
const int xStride= 16; const int xStride= 16;
int i; int i;
int temp[8]; int temp[8];
static const uint8_t x_offset[2]={0, 16}; static const uint8_t x_offset[2]={0, 16};
dctcoef *block = (dctcoef*)p_block; dctcoef *block = (dctcoef*)_block;
for(i=0; i<4; i++){ for(i=0; i<4; i++){
temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1]; temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1];
@ -321,22 +311,13 @@ void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *p_block, int qmul){
block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8; block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8;
block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8; block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8;
} }
#if 0
av_log(NULL, AV_LOG_INFO, "after chroma dc\n");
for (int i = 0; i < 256; i++) {
av_log(NULL, AV_LOG_INFO, "%5d ", block[i]);
if (!((i+1) % 16))
av_log(NULL, AV_LOG_INFO, "\n");
}
#endif
} }
void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *p_block, int qmul){ void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){
const int stride= 16*2; const int stride= 16*2;
const int xStride= 16; const int xStride= 16;
int a,b,c,d,e; int a,b,c,d,e;
dctcoef *block = (dctcoef*)p_block; dctcoef *block = (dctcoef*)_block;
a= block[stride*0 + xStride*0]; a= block[stride*0 + xStride*0];
b= block[stride*0 + xStride*1]; b= block[stride*0 + xStride*1];

View File

@ -462,10 +462,10 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, co
h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x16_dc , depth);\ h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x16_dc , depth);\
h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc , depth);\ h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc , depth);\
h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc , depth);\ h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc , depth);\
h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l0t, depth);\
h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0lt, depth);\
h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l00, depth);\
h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0l0, depth);\
}\ }\
}else{\ }else{\
h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\ h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\
@ -510,8 +510,13 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, co
h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\ h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\
h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\ h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\
h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\ h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\
if (chroma_format_idc == 1) {\
h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add , depth);\ h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add , depth);\
h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\ h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\
} else {\
h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x16_vertical_add , depth);\
h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x16_horizontal_add , depth);\
}\
h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add , depth);\ h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add , depth);\
h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add , depth);\ h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add , depth);\

View File

@ -663,23 +663,45 @@ static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
FUNCC(pred4x4_dc)(src, NULL, stride); FUNCC(pred4x4_dc)(src, NULL, stride);
} }
static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, int stride){
FUNCC(pred8x16_top_dc)(src, stride);
FUNCC(pred4x4_dc)(src, NULL, stride);
}
static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){ static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){
FUNCC(pred8x8_dc)(src, stride); FUNCC(pred8x8_dc)(src, stride);
FUNCC(pred4x4_top_dc)(src, NULL, stride); FUNCC(pred4x4_top_dc)(src, NULL, stride);
} }
static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, int stride){
FUNCC(pred8x16_dc)(src, stride);
FUNCC(pred4x4_top_dc)(src, NULL, stride);
}
static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){ static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){
FUNCC(pred8x8_left_dc)(src, stride); FUNCC(pred8x8_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
} }
static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, int stride){
FUNCC(pred8x16_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
}
static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){ static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){
FUNCC(pred8x8_left_dc)(src, stride); FUNCC(pred8x8_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src , NULL, stride); FUNCC(pred4x4_128_dc)(src , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
} }
static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, int stride){
FUNCC(pred8x16_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
}
static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){ static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
int j, k; int j, k;
int a; int a;
@ -1126,8 +1148,24 @@ static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, c
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
} }
static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
int i;
for(i=0; i<4; i++)
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
for(i=4; i<8; i++)
FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
}
static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
int i; int i;
for(i=0; i<4; i++) for(i=0; i<4; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
} }
static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
int i;
for(i=0; i<4; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
for(i=4; i<8; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
}

View File

@ -18,11 +18,11 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#include "avcodec.h"
#include <speex/speex.h> #include <speex/speex.h>
#include <speex/speex_header.h> #include <speex/speex_header.h>
#include <speex/speex_stereo.h> #include <speex/speex_stereo.h>
#include <speex/speex_callbacks.h> #include <speex/speex_callbacks.h>
#include "avcodec.h"
typedef struct { typedef struct {
SpeexBits bits; SpeexBits bits;
@ -60,14 +60,14 @@ static av_cold int libspeex_decode_init(AVCodecContext *avctx)
mode = speex_lib_get_mode(s->header->mode); mode = speex_lib_get_mode(s->header->mode);
if (!mode) { if (!mode) {
av_log(avctx, AV_LOG_ERROR, "Unknown Speex mode %d", s->header->mode); av_log(avctx, AV_LOG_ERROR, "Unknown Speex mode %d", s->header->mode);
return -1; return AVERROR_INVALIDDATA;
} }
} else } else
av_log(avctx, AV_LOG_INFO, "Missing Speex header, assuming defaults.\n"); av_log(avctx, AV_LOG_INFO, "Missing Speex header, assuming defaults.\n");
if (avctx->channels > 2) { if (avctx->channels > 2) {
av_log(avctx, AV_LOG_ERROR, "Only stereo and mono are supported.\n"); av_log(avctx, AV_LOG_ERROR, "Only stereo and mono are supported.\n");
return -1; return AVERROR(EINVAL);
} }
speex_bits_init(&s->bits); speex_bits_init(&s->bits);
@ -99,32 +99,42 @@ static int libspeex_decode_frame(AVCodecContext *avctx,
uint8_t *buf = avpkt->data; uint8_t *buf = avpkt->data;
int buf_size = avpkt->size; int buf_size = avpkt->size;
LibSpeexContext *s = avctx->priv_data; LibSpeexContext *s = avctx->priv_data;
int16_t *output = data, *end; int16_t *output = data;
int i, num_samples; int out_size, ret, consumed = 0;
num_samples = s->frame_size * avctx->channels; /* check output buffer size */
end = output + *data_size / sizeof(*output); out_size = s->frame_size * avctx->channels *
av_get_bytes_per_sample(avctx->sample_fmt);
speex_bits_read_from(&s->bits, buf, buf_size); if (*data_size < out_size) {
av_log(avctx, AV_LOG_ERROR, "Output buffer is too small\n");
for (i = 0; speex_bits_remaining(&s->bits) && output + num_samples < end; i++) { return AVERROR(EINVAL);
int ret = speex_decode_int(s->dec_state, &s->bits, output);
if (ret <= -2) {
av_log(avctx, AV_LOG_ERROR, "Error decoding Speex frame.\n");
return -1;
} else if (ret == -1)
// end of stream
break;
if (avctx->channels == 2)
speex_decode_stereo_int(output, s->frame_size, &s->stereo);
output += num_samples;
} }
avctx->frame_size = s->frame_size * i; /* if there is not enough data left for the smallest possible frame,
*data_size = avctx->channels * avctx->frame_size * sizeof(*output); reset the libspeex buffer using the current packet, otherwise ignore
return buf_size; the current packet and keep decoding frames from the libspeex buffer. */
if (speex_bits_remaining(&s->bits) < 43) {
/* check for flush packet */
if (!buf || !buf_size) {
*data_size = 0;
return buf_size;
}
/* set new buffer */
speex_bits_read_from(&s->bits, buf, buf_size);
consumed = buf_size;
}
/* decode a single frame */
ret = speex_decode_int(s->dec_state, &s->bits, output);
if (ret <= -2) {
av_log(avctx, AV_LOG_ERROR, "Error decoding Speex frame.\n");
return AVERROR_INVALIDDATA;
}
if (avctx->channels == 2)
speex_decode_stereo_int(output, s->frame_size, &s->stereo);
*data_size = out_size;
return consumed;
} }
static av_cold int libspeex_decode_close(AVCodecContext *avctx) static av_cold int libspeex_decode_close(AVCodecContext *avctx)
@ -138,6 +148,12 @@ static av_cold int libspeex_decode_close(AVCodecContext *avctx)
return 0; return 0;
} }
static av_cold void libspeex_decode_flush(AVCodecContext *avctx)
{
LibSpeexContext *s = avctx->priv_data;
speex_bits_reset(&s->bits);
}
AVCodec ff_libspeex_decoder = { AVCodec ff_libspeex_decoder = {
.name = "libspeex", .name = "libspeex",
.type = AVMEDIA_TYPE_AUDIO, .type = AVMEDIA_TYPE_AUDIO,
@ -146,5 +162,7 @@ AVCodec ff_libspeex_decoder = {
.init = libspeex_decode_init, .init = libspeex_decode_init,
.close = libspeex_decode_close, .close = libspeex_decode_close,
.decode = libspeex_decode_frame, .decode = libspeex_decode_frame,
.flush = libspeex_decode_flush,
.capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DELAY,
.long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"), .long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"),
}; };

View File

@ -1893,24 +1893,50 @@ typedef struct MP3On4DecodeContext {
int syncword; ///< syncword patch int syncword; ///< syncword patch
const uint8_t *coff; ///< channels offsets in output buffer const uint8_t *coff; ///< channels offsets in output buffer
MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance
OUT_INT *decoded_buf; ///< output buffer for decoded samples
} MP3On4DecodeContext; } MP3On4DecodeContext;
#include "mpeg4audio.h" #include "mpeg4audio.h"
/* Next 3 arrays are indexed by channel config number (passed via codecdata) */ /* Next 3 arrays are indexed by channel config number (passed via codecdata) */
static const uint8_t mp3Frames[8] = {0,1,1,2,3,3,4,5}; /* number of mp3 decoder instances */ static const uint8_t mp3Frames[8] = {0,1,1,2,3,3,4,5}; /* number of mp3 decoder instances */
/* offsets into output buffer, assume output order is FL FR BL BR C LFE */ /* offsets into output buffer, assume output order is FL FR C LFE BL BR SL SR */
static const uint8_t chan_offset[8][5] = { static const uint8_t chan_offset[8][5] = {
{0}, {0},
{0}, // C {0}, // C
{0}, // FLR {0}, // FLR
{2,0}, // C FLR {2,0}, // C FLR
{2,0,3}, // C FLR BS {2,0,3}, // C FLR BS
{4,0,2}, // C FLR BLRS {2,0,3}, // C FLR BLRS
{4,0,2,5}, // C FLR BLRS LFE {2,0,4,3}, // C FLR BLRS LFE
{4,0,2,6,5}, // C FLR BLRS BLR LFE {2,0,6,4,3}, // C FLR BLRS BLR LFE
}; };
/* mp3on4 channel layouts */
static const int16_t chan_layout[8] = {
0,
AV_CH_LAYOUT_MONO,
AV_CH_LAYOUT_STEREO,
AV_CH_LAYOUT_SURROUND,
AV_CH_LAYOUT_4POINT0,
AV_CH_LAYOUT_5POINT0,
AV_CH_LAYOUT_5POINT1,
AV_CH_LAYOUT_7POINT1
};
static av_cold int decode_close_mp3on4(AVCodecContext * avctx)
{
MP3On4DecodeContext *s = avctx->priv_data;
int i;
for (i = 0; i < s->frames; i++)
av_free(s->mp3decctx[i]);
av_freep(&s->decoded_buf);
return 0;
}
static int decode_init_mp3on4(AVCodecContext * avctx) static int decode_init_mp3on4(AVCodecContext * avctx)
{ {
@ -1931,6 +1957,7 @@ static int decode_init_mp3on4(AVCodecContext * avctx)
s->frames = mp3Frames[cfg.chan_config]; s->frames = mp3Frames[cfg.chan_config];
s->coff = chan_offset[cfg.chan_config]; s->coff = chan_offset[cfg.chan_config];
avctx->channels = ff_mpeg4audio_channels[cfg.chan_config]; avctx->channels = ff_mpeg4audio_channels[cfg.chan_config];
avctx->channel_layout = chan_layout[cfg.chan_config];
if (cfg.sample_rate < 16000) if (cfg.sample_rate < 16000)
s->syncword = 0xffe00000; s->syncword = 0xffe00000;
@ -1944,6 +1971,8 @@ static int decode_init_mp3on4(AVCodecContext * avctx)
*/ */
// Allocate zeroed memory for the first decoder context // Allocate zeroed memory for the first decoder context
s->mp3decctx[0] = av_mallocz(sizeof(MPADecodeContext)); s->mp3decctx[0] = av_mallocz(sizeof(MPADecodeContext));
if (!s->mp3decctx[0])
goto alloc_fail;
// Put decoder context in place to make init_decode() happy // Put decoder context in place to make init_decode() happy
avctx->priv_data = s->mp3decctx[0]; avctx->priv_data = s->mp3decctx[0];
decode_init(avctx); decode_init(avctx);
@ -1956,23 +1985,38 @@ static int decode_init_mp3on4(AVCodecContext * avctx)
*/ */
for (i = 1; i < s->frames; i++) { for (i = 1; i < s->frames; i++) {
s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext)); s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext));
if (!s->mp3decctx[i])
goto alloc_fail;
s->mp3decctx[i]->adu_mode = 1; s->mp3decctx[i]->adu_mode = 1;
s->mp3decctx[i]->avctx = avctx; s->mp3decctx[i]->avctx = avctx;
s->mp3decctx[i]->mpadsp = s->mp3decctx[0]->mpadsp;
}
/* Allocate buffer for multi-channel output if needed */
if (s->frames > 1) {
s->decoded_buf = av_malloc(MPA_FRAME_SIZE * MPA_MAX_CHANNELS *
sizeof(*s->decoded_buf));
if (!s->decoded_buf)
goto alloc_fail;
} }
return 0; return 0;
alloc_fail:
decode_close_mp3on4(avctx);
return AVERROR(ENOMEM);
} }
static av_cold int decode_close_mp3on4(AVCodecContext * avctx) static void flush_mp3on4(AVCodecContext *avctx)
{ {
MP3On4DecodeContext *s = avctx->priv_data;
int i; int i;
MP3On4DecodeContext *s = avctx->priv_data;
for (i = 0; i < s->frames; i++) for (i = 0; i < s->frames; i++) {
av_free(s->mp3decctx[i]); MPADecodeContext *m = s->mp3decctx[i];
memset(m->synth_buf, 0, sizeof(m->synth_buf));
return 0; m->last_buf_size = 0;
}
} }
@ -1987,12 +2031,13 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
int fsize, len = buf_size, out_size = 0; int fsize, len = buf_size, out_size = 0;
uint32_t header; uint32_t header;
OUT_INT *out_samples = data; OUT_INT *out_samples = data;
OUT_INT decoded_buf[MPA_FRAME_SIZE * MPA_MAX_CHANNELS];
OUT_INT *outptr, *bp; OUT_INT *outptr, *bp;
int fr, j, n; int fr, j, n, ch;
if(*data_size < MPA_FRAME_SIZE * MPA_MAX_CHANNELS * s->frames * sizeof(OUT_INT)) if (*data_size < MPA_FRAME_SIZE * avctx->channels * sizeof(OUT_INT)) {
return -1; av_log(avctx, AV_LOG_ERROR, "output buffer is too small\n");
return AVERROR(EINVAL);
}
*data_size = 0; *data_size = 0;
// Discard too short frames // Discard too short frames
@ -2000,10 +2045,11 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
return -1; return -1;
// If only one decoder interleave is not needed // If only one decoder interleave is not needed
outptr = s->frames == 1 ? out_samples : decoded_buf; outptr = s->frames == 1 ? out_samples : s->decoded_buf;
avctx->bit_rate = 0; avctx->bit_rate = 0;
ch = 0;
for (fr = 0; fr < s->frames; fr++) { for (fr = 0; fr < s->frames; fr++) {
fsize = AV_RB16(buf) >> 4; fsize = AV_RB16(buf) >> 4;
fsize = FFMIN3(fsize, len, MPA_MAX_CODED_FRAME_SIZE); fsize = FFMIN3(fsize, len, MPA_MAX_CODED_FRAME_SIZE);
@ -2016,6 +2062,14 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
break; break;
avpriv_mpegaudio_decode_header((MPADecodeHeader *)m, header); avpriv_mpegaudio_decode_header((MPADecodeHeader *)m, header);
if (ch + m->nb_channels > avctx->channels) {
av_log(avctx, AV_LOG_ERROR, "frame channel count exceeds codec "
"channel count\n");
return AVERROR_INVALIDDATA;
}
ch += m->nb_channels;
out_size += mp_decode_frame(m, outptr, buf, fsize); out_size += mp_decode_frame(m, outptr, buf, fsize);
buf += fsize; buf += fsize;
len -= fsize; len -= fsize;
@ -2026,13 +2080,13 @@ static int decode_frame_mp3on4(AVCodecContext * avctx,
bp = out_samples + s->coff[fr]; bp = out_samples + s->coff[fr];
if(m->nb_channels == 1) { if(m->nb_channels == 1) {
for(j = 0; j < n; j++) { for(j = 0; j < n; j++) {
*bp = decoded_buf[j]; *bp = s->decoded_buf[j];
bp += avctx->channels; bp += avctx->channels;
} }
} else { } else {
for(j = 0; j < n; j++) { for(j = 0; j < n; j++) {
bp[0] = decoded_buf[j++]; bp[0] = s->decoded_buf[j++];
bp[1] = decoded_buf[j]; bp[1] = s->decoded_buf[j];
bp += avctx->channels; bp += avctx->channels;
} }
} }
@ -2110,7 +2164,7 @@ AVCodec ff_mp3on4_decoder = {
.init = decode_init_mp3on4, .init = decode_init_mp3on4,
.close = decode_close_mp3on4, .close = decode_close_mp3on4,
.decode = decode_frame_mp3on4, .decode = decode_frame_mp3on4,
.flush = flush, .flush = flush_mp3on4,
.long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"), .long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"),
}; };
#endif #endif

View File

@ -83,7 +83,7 @@ AVCodec ff_mp3on4float_decoder = {
.init = decode_init_mp3on4, .init = decode_init_mp3on4,
.close = decode_close_mp3on4, .close = decode_close_mp3on4,
.decode = decode_frame_mp3on4, .decode = decode_frame_mp3on4,
.flush = flush, .flush = flush_mp3on4,
.long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"), .long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"),
}; };
#endif #endif

View File

@ -843,7 +843,8 @@ static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha,
} }
static av_always_inline static av_always_inline
void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset, int w, int h) void weight_h264_W_altivec(uint8_t *block, int stride, int height,
int log2_denom, int weight, int offset, int w)
{ {
int y, aligned; int y, aligned;
vec_u8 vblock; vec_u8 vblock;
@ -864,7 +865,7 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
voffset = vec_splat(vtemp, 5); voffset = vec_splat(vtemp, 5);
aligned = !((unsigned long)block & 0xf); aligned = !((unsigned long)block & 0xf);
for (y=0; y<h; y++) { for (y = 0; y < height; y++) {
vblock = vec_ld(0, block); vblock = vec_ld(0, block);
v0 = (vec_s16)vec_mergeh(zero_u8v, vblock); v0 = (vec_s16)vec_mergeh(zero_u8v, vblock);
@ -888,8 +889,8 @@ void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
} }
static av_always_inline static av_always_inline
void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, void biweight_h264_W_altivec(uint8_t *dst, uint8_t *src, int stride, int height,
int weightd, int weights, int offset, int w, int h) int log2_denom, int weightd, int weights, int offset, int w)
{ {
int y, dst_aligned, src_aligned; int y, dst_aligned, src_aligned;
vec_u8 vsrc, vdst; vec_u8 vsrc, vdst;
@ -912,7 +913,7 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
dst_aligned = !((unsigned long)dst & 0xf); dst_aligned = !((unsigned long)dst & 0xf);
src_aligned = !((unsigned long)src & 0xf); src_aligned = !((unsigned long)src & 0xf);
for (y=0; y<h; y++) { for (y = 0; y < height; y++) {
vdst = vec_ld(0, dst); vdst = vec_ld(0, dst);
vsrc = vec_ld(0, src); vsrc = vec_ld(0, src);
@ -952,19 +953,18 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
} }
} }
#define H264_WEIGHT(W,H) \ #define H264_WEIGHT(W) \
static void ff_weight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ static void ff_weight_h264_pixels ## W ## _altivec(uint8_t *block, int stride, int height, \
weight_h264_WxH_altivec(block, stride, log2_denom, weight, offset, W, H); \ int log2_denom, int weight, int offset){ \
weight_h264_W_altivec(block, stride, height, log2_denom, weight, offset, W); \
}\ }\
static void ff_biweight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \ static void ff_biweight_h264_pixels ## W ## _altivec(uint8_t *dst, uint8_t *src, int stride, int height, \
biweight_h264_WxH_altivec(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \ int log2_denom, int weightd, int weights, int offset){ \
biweight_h264_W_altivec(dst, src, stride, height, log2_denom, weightd, weights, offset, W); \
} }
H264_WEIGHT(16,16) H264_WEIGHT(16)
H264_WEIGHT(16, 8) H264_WEIGHT( 8)
H264_WEIGHT( 8,16)
H264_WEIGHT( 8, 8)
H264_WEIGHT( 8, 4)
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
const int high_bit_depth = avctx->bits_per_raw_sample > 8; const int high_bit_depth = avctx->bits_per_raw_sample > 8;
@ -1015,16 +1015,10 @@ void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec; c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec;
c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec; c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec;
c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16x16_altivec; c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16_altivec;
c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels16x8_altivec; c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_altivec;
c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec; c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_altivec;
c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec; c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_altivec;
c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec;
c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec;
c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec;
c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec;
c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;
c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;
} }
} }
} }

View File

@ -158,6 +158,8 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, int l
case PIX_FMT_YUV420P9BE: case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV420P10LE: case PIX_FMT_YUV420P10LE:
case PIX_FMT_YUV420P10BE: case PIX_FMT_YUV420P10BE:
case PIX_FMT_YUV422P9LE:
case PIX_FMT_YUV422P9BE:
case PIX_FMT_YUV422P10LE: case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV422P10BE: case PIX_FMT_YUV422P10BE:
case PIX_FMT_YUV444P9LE: case PIX_FMT_YUV444P9LE:

View File

@ -41,24 +41,57 @@ static void free_buffers(VP8Context *s)
av_freep(&s->top_nnz); av_freep(&s->top_nnz);
av_freep(&s->edge_emu_buffer); av_freep(&s->edge_emu_buffer);
av_freep(&s->top_border); av_freep(&s->top_border);
av_freep(&s->segmentation_map);
s->macroblocks = NULL; s->macroblocks = NULL;
} }
static void vp8_decode_flush(AVCodecContext *avctx) static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
{
int ret;
if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
return ret;
if (!s->maps_are_invalid && s->num_maps_to_be_freed) {
f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
} else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
ff_thread_release_buffer(s->avctx, f);
return AVERROR(ENOMEM);
}
return 0;
}
static void vp8_release_frame(VP8Context *s, AVFrame *f, int is_close)
{
if (!is_close) {
if (f->ref_index[0]) {
assert(s->num_maps_to_be_freed < FF_ARRAY_ELEMS(s->segmentation_maps));
s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
f->ref_index[0] = NULL;
}
} else {
av_freep(&f->ref_index[0]);
}
ff_thread_release_buffer(s->avctx, f);
}
static void vp8_decode_flush_impl(AVCodecContext *avctx, int force, int is_close)
{ {
VP8Context *s = avctx->priv_data; VP8Context *s = avctx->priv_data;
int i; int i;
if (!avctx->is_copy) { if (!avctx->is_copy || force) {
for (i = 0; i < 5; i++) for (i = 0; i < 5; i++)
if (s->frames[i].data[0]) if (s->frames[i].data[0])
ff_thread_release_buffer(avctx, &s->frames[i]); vp8_release_frame(s, &s->frames[i], is_close);
} }
memset(s->framep, 0, sizeof(s->framep)); memset(s->framep, 0, sizeof(s->framep));
free_buffers(s); free_buffers(s);
s->maps_are_invalid = 1;
}
static void vp8_decode_flush(AVCodecContext *avctx)
{
vp8_decode_flush_impl(avctx, 0, 0);
} }
static int update_dimensions(VP8Context *s, int width, int height) static int update_dimensions(VP8Context *s, int width, int height)
@ -68,7 +101,7 @@ static int update_dimensions(VP8Context *s, int width, int height)
if (av_image_check_size(width, height, 0, s->avctx)) if (av_image_check_size(width, height, 0, s->avctx))
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
vp8_decode_flush(s->avctx); vp8_decode_flush_impl(s->avctx, 1, 0);
avcodec_set_dimensions(s->avctx, width, height); avcodec_set_dimensions(s->avctx, width, height);
} }
@ -81,10 +114,9 @@ static int update_dimensions(VP8Context *s, int width, int height)
s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4); s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
s->segmentation_map = av_mallocz(s->mb_width*s->mb_height);
if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top || if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
!s->top_nnz || !s->top_border || !s->segmentation_map) !s->top_nnz || !s->top_border)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
s->macroblocks = s->macroblocks_base + 1; s->macroblocks = s->macroblocks_base + 1;
@ -1508,6 +1540,14 @@ static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
} }
} }
static void release_queued_segmaps(VP8Context *s, int is_close)
{
int leave_behind = is_close ? 0 : !s->maps_are_invalid;
while (s->num_maps_to_be_freed > leave_behind)
av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
s->maps_are_invalid = 0;
}
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
AVPacket *avpkt) AVPacket *avpkt)
{ {
@ -1516,6 +1556,8 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
enum AVDiscard skip_thresh; enum AVDiscard skip_thresh;
AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT]; AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
release_queued_segmaps(s, 0);
if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0) if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
return ret; return ret;
@ -1538,7 +1580,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
&s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
&s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
ff_thread_release_buffer(avctx, &s->frames[i]); vp8_release_frame(s, &s->frames[i], 0);
// find a free buffer // find a free buffer
for (i = 0; i < 5; i++) for (i = 0; i < 5; i++)
@ -1559,8 +1601,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
curframe->key_frame = s->keyframe; curframe->key_frame = s->keyframe;
curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P; curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
curframe->reference = referenced ? 3 : 0; curframe->reference = referenced ? 3 : 0;
curframe->ref_index[0] = s->segmentation_map; if ((ret = vp8_alloc_frame(s, curframe))) {
if ((ret = ff_thread_get_buffer(avctx, curframe))) {
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n"); av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
return ret; return ret;
} }
@ -1652,8 +1693,8 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy, decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL); prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
@ -1736,7 +1777,8 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx)
static av_cold int vp8_decode_free(AVCodecContext *avctx) static av_cold int vp8_decode_free(AVCodecContext *avctx)
{ {
vp8_decode_flush(avctx); vp8_decode_flush_impl(avctx, 0, 1);
release_queued_segmaps(avctx->priv_data, 1);
return 0; return 0;
} }

View File

@ -130,7 +130,6 @@ typedef struct {
uint8_t *intra4x4_pred_mode_top; uint8_t *intra4x4_pred_mode_top;
uint8_t intra4x4_pred_mode_left[4]; uint8_t intra4x4_pred_mode_left[4];
uint8_t *segmentation_map;
/** /**
* Macroblocks can have one of 4 different quants in a frame when * Macroblocks can have one of 4 different quants in a frame when
@ -237,6 +236,16 @@ typedef struct {
H264PredContext hpc; H264PredContext hpc;
vp8_mc_func put_pixels_tab[3][3][3]; vp8_mc_func put_pixels_tab[3][3][3];
AVFrame frames[5]; AVFrame frames[5];
/**
* A list of segmentation_map buffers that are to be free()'ed in
* the next decoding iteration. We can't free() them right away
* because the map may still be used by subsequent decoding threads.
* Unused if frame threading is off.
*/
uint8_t *segmentation_maps[5];
int num_maps_to_be_freed;
int maps_are_invalid;
} VP8Context; } VP8Context;
#endif /* AVCODEC_VP8_H */ #endif /* AVCODEC_VP8_H */

View File

@ -1055,14 +1055,6 @@ emu_edge mmx
; int32_t max, unsigned int len) ; int32_t max, unsigned int len)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro SPLATD_MMX 1
punpckldq %1, %1
%endmacro
%macro SPLATD_SSE2 1
pshufd %1, %1, 0
%endmacro
%macro VECTOR_CLIP_INT32 4 %macro VECTOR_CLIP_INT32 4
cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
%ifidn %1, sse2 %ifidn %1, sse2

View File

@ -24,6 +24,146 @@
SECTION_TEXT SECTION_TEXT
;---------------------------------------------------------------------------------
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
;---------------------------------------------------------------------------------
%macro INT32_TO_FLOAT_FMUL_SCALAR 2
%ifdef ARCH_X86_64
cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len
%else
cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
movss m0, mulm
%endif
SPLATD m0
shl lenq, 2
add srcq, lenq
add dstq, lenq
neg lenq
.loop:
%ifidn %1, sse2
cvtdq2ps m1, [srcq+lenq ]
cvtdq2ps m2, [srcq+lenq+16]
%else
cvtpi2ps m1, [srcq+lenq ]
cvtpi2ps m3, [srcq+lenq+ 8]
cvtpi2ps m2, [srcq+lenq+16]
cvtpi2ps m4, [srcq+lenq+24]
movlhps m1, m3
movlhps m2, m4
%endif
mulps m1, m0
mulps m2, m0
mova [dstq+lenq ], m1
mova [dstq+lenq+16], m2
add lenq, 32
jl .loop
REP_RET
%endmacro
INIT_XMM
%define SPLATD SPLATD_SSE
%define movdqa movaps
INT32_TO_FLOAT_FMUL_SCALAR sse, 5
%undef movdqa
%define SPLATD SPLATD_SSE2
INT32_TO_FLOAT_FMUL_SCALAR sse2, 3
%undef SPLATD
;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------
%macro FLOAT_TO_INT16 2
cglobal float_to_int16_%1, 3,3,%2, dst, src, len
add lenq, lenq
lea srcq, [srcq+2*lenq]
add dstq, lenq
neg lenq
.loop:
%ifidn %1, sse2
cvtps2dq m0, [srcq+2*lenq ]
cvtps2dq m1, [srcq+2*lenq+16]
packssdw m0, m1
mova [dstq+lenq], m0
%else
cvtps2pi m0, [srcq+2*lenq ]
cvtps2pi m1, [srcq+2*lenq+ 8]
cvtps2pi m2, [srcq+2*lenq+16]
cvtps2pi m3, [srcq+2*lenq+24]
packssdw m0, m1
packssdw m2, m3
mova [dstq+lenq ], m0
mova [dstq+lenq+8], m2
%endif
add lenq, 16
js .loop
%ifnidn %1, sse2
emms
%endif
REP_RET
%endmacro
INIT_XMM
FLOAT_TO_INT16 sse2, 2
INIT_MMX
FLOAT_TO_INT16 sse, 0
%define cvtps2pi pf2id
FLOAT_TO_INT16 3dnow, 0
%undef cvtps2pi
;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
;-------------------------------------------------------------------------------
%macro FLOAT_TO_INT16_INTERLEAVE2 1
cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
lea lenq, [4*r2q]
mov src1q, [src0q+gprsize]
mov src0q, [src0q]
add dstq, lenq
add src0q, lenq
add src1q, lenq
neg lenq
.loop:
%ifidn %1, sse2
cvtps2dq m0, [src0q+lenq]
cvtps2dq m1, [src1q+lenq]
packssdw m0, m1
movhlps m1, m0
punpcklwd m0, m1
mova [dstq+lenq], m0
%else
cvtps2pi m0, [src0q+lenq ]
cvtps2pi m1, [src0q+lenq+8]
cvtps2pi m2, [src1q+lenq ]
cvtps2pi m3, [src1q+lenq+8]
packssdw m0, m1
packssdw m2, m3
mova m1, m0
punpcklwd m0, m2
punpckhwd m1, m2
mova [dstq+lenq ], m0
mova [dstq+lenq+8], m1
%endif
add lenq, 16
js .loop
%ifnidn %1, sse2
emms
%endif
REP_RET
%endmacro
INIT_MMX
%define cvtps2pi pf2id
FLOAT_TO_INT16_INTERLEAVE2 3dnow
%undef cvtps2pi
%define movdqa movaps
FLOAT_TO_INT16_INTERLEAVE2 sse
%undef movdqa
INIT_XMM
FLOAT_TO_INT16_INTERLEAVE2 sse2
%macro PSWAPD_SSE 2 %macro PSWAPD_SSE 2
pshufw %1, %2, 0x4e pshufw %1, %2, 0x4e
%endmacro %endmacro

View File

@ -26,133 +26,32 @@
#include "libavutil/x86_cpu.h" #include "libavutil/x86_cpu.h"
#include "libavcodec/fmtconvert.h" #include "libavcodec/fmtconvert.h"
static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len) #if HAVE_YASM
{
x86_reg i = -4*len;
__asm__ volatile(
"movss %3, %%xmm4 \n"
"shufps $0, %%xmm4, %%xmm4 \n"
"1: \n"
"cvtpi2ps (%2,%0), %%xmm0 \n"
"cvtpi2ps 8(%2,%0), %%xmm1 \n"
"cvtpi2ps 16(%2,%0), %%xmm2 \n"
"cvtpi2ps 24(%2,%0), %%xmm3 \n"
"movlhps %%xmm1, %%xmm0 \n"
"movlhps %%xmm3, %%xmm2 \n"
"mulps %%xmm4, %%xmm0 \n"
"mulps %%xmm4, %%xmm2 \n"
"movaps %%xmm0, (%1,%0) \n"
"movaps %%xmm2, 16(%1,%0) \n"
"add $32, %0 \n"
"jl 1b \n"
:"+r"(i)
:"r"(dst+len), "r"(src+len), "m"(mul)
);
}
static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len) void ff_int32_to_float_fmul_scalar_sse (float *dst, const int *src, float mul, int len);
{ void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len);
x86_reg i = -4*len;
__asm__ volatile(
"movss %3, %%xmm4 \n"
"shufps $0, %%xmm4, %%xmm4 \n"
"1: \n"
"cvtdq2ps (%2,%0), %%xmm0 \n"
"cvtdq2ps 16(%2,%0), %%xmm1 \n"
"mulps %%xmm4, %%xmm0 \n"
"mulps %%xmm4, %%xmm1 \n"
"movaps %%xmm0, (%1,%0) \n"
"movaps %%xmm1, 16(%1,%0) \n"
"add $32, %0 \n"
"jl 1b \n"
:"+r"(i)
:"r"(dst+len), "r"(src+len), "m"(mul)
);
}
static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){ void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
x86_reg reglen = len; void ff_float_to_int16_sse (int16_t *dst, const float *src, long len);
// not bit-exact: pf2id uses different rounding than C and SSE void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len);
__asm__ volatile(
"add %0 , %0 \n\t"
"lea (%2,%0,2) , %2 \n\t"
"add %0 , %1 \n\t"
"neg %0 \n\t"
"1: \n\t"
"pf2id (%2,%0,2) , %%mm0 \n\t"
"pf2id 8(%2,%0,2) , %%mm1 \n\t"
"pf2id 16(%2,%0,2) , %%mm2 \n\t"
"pf2id 24(%2,%0,2) , %%mm3 \n\t"
"packssdw %%mm1 , %%mm0 \n\t"
"packssdw %%mm3 , %%mm2 \n\t"
"movq %%mm0 , (%1,%0) \n\t"
"movq %%mm2 , 8(%1,%0) \n\t"
"add $16 , %0 \n\t"
" js 1b \n\t"
"femms \n\t"
:"+r"(reglen), "+r"(dst), "+r"(src)
);
}
static void float_to_int16_sse(int16_t *dst, const float *src, long len){ void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len);
x86_reg reglen = len; void ff_float_to_int16_interleave2_sse (int16_t *dst, const float **src, long len);
__asm__ volatile( void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len);
"add %0 , %0 \n\t"
"lea (%2,%0,2) , %2 \n\t"
"add %0 , %1 \n\t"
"neg %0 \n\t"
"1: \n\t"
"cvtps2pi (%2,%0,2) , %%mm0 \n\t"
"cvtps2pi 8(%2,%0,2) , %%mm1 \n\t"
"cvtps2pi 16(%2,%0,2) , %%mm2 \n\t"
"cvtps2pi 24(%2,%0,2) , %%mm3 \n\t"
"packssdw %%mm1 , %%mm0 \n\t"
"packssdw %%mm3 , %%mm2 \n\t"
"movq %%mm0 , (%1,%0) \n\t"
"movq %%mm2 , 8(%1,%0) \n\t"
"add $16 , %0 \n\t"
" js 1b \n\t"
"emms \n\t"
:"+r"(reglen), "+r"(dst), "+r"(src)
);
}
static void float_to_int16_sse2(int16_t *dst, const float *src, long len){
x86_reg reglen = len;
__asm__ volatile(
"add %0 , %0 \n\t"
"lea (%2,%0,2) , %2 \n\t"
"add %0 , %1 \n\t"
"neg %0 \n\t"
"1: \n\t"
"cvtps2dq (%2,%0,2) , %%xmm0 \n\t"
"cvtps2dq 16(%2,%0,2) , %%xmm1 \n\t"
"packssdw %%xmm1 , %%xmm0 \n\t"
"movdqa %%xmm0 , (%1,%0) \n\t"
"add $16 , %0 \n\t"
" js 1b \n\t"
:"+r"(reglen), "+r"(dst), "+r"(src)
);
}
void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
#if !HAVE_YASM
#define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6)
#define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
#define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
#endif
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \ #define FLOAT_TO_INT16_INTERLEAVE(cpu) \
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\ /* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\ static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
DECLARE_ALIGNED(16, int16_t, tmp)[len];\ DECLARE_ALIGNED(16, int16_t, tmp)[len];\
int i,j,c;\ int i,j,c;\
for(c=0; c<channels; c++){\ for(c=0; c<channels; c++){\
float_to_int16_##cpu(tmp, src[c], len);\ ff_float_to_int16_##cpu(tmp, src[c], len);\
for(i=0, j=c; i<len; i++, j+=channels)\ for(i=0, j=c; i<len; i++, j+=channels)\
dst[j] = tmp[i];\ dst[j] = tmp[i];\
}\ }\
@ -160,73 +59,18 @@ static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const
\ \
static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\ static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
if(channels==1)\ if(channels==1)\
float_to_int16_##cpu(dst, src[0], len);\ ff_float_to_int16_##cpu(dst, src[0], len);\
else if(channels==2){\ else if(channels==2){\
x86_reg reglen = len; \ ff_float_to_int16_interleave2_##cpu(dst, src, len);\
const float *src0 = src[0];\
const float *src1 = src[1];\
__asm__ volatile(\
"shl $2, %0 \n"\
"add %0, %1 \n"\
"add %0, %2 \n"\
"add %0, %3 \n"\
"neg %0 \n"\
body\
:"+r"(reglen), "+r"(dst), "+r"(src0), "+r"(src1)\
);\
}else if(channels==6){\ }else if(channels==6){\
ff_float_to_int16_interleave6_##cpu(dst, src, len);\ ff_float_to_int16_interleave6_##cpu(dst, src, len);\
}else\ }else\
float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\ float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
} }
FLOAT_TO_INT16_INTERLEAVE(3dnow, FLOAT_TO_INT16_INTERLEAVE(3dnow)
"1: \n" FLOAT_TO_INT16_INTERLEAVE(sse)
"pf2id (%2,%0), %%mm0 \n" FLOAT_TO_INT16_INTERLEAVE(sse2)
"pf2id 8(%2,%0), %%mm1 \n"
"pf2id (%3,%0), %%mm2 \n"
"pf2id 8(%3,%0), %%mm3 \n"
"packssdw %%mm1, %%mm0 \n"
"packssdw %%mm3, %%mm2 \n"
"movq %%mm0, %%mm1 \n"
"punpcklwd %%mm2, %%mm0 \n"
"punpckhwd %%mm2, %%mm1 \n"
"movq %%mm0, (%1,%0)\n"
"movq %%mm1, 8(%1,%0)\n"
"add $16, %0 \n"
"js 1b \n"
"femms \n"
)
FLOAT_TO_INT16_INTERLEAVE(sse,
"1: \n"
"cvtps2pi (%2,%0), %%mm0 \n"
"cvtps2pi 8(%2,%0), %%mm1 \n"
"cvtps2pi (%3,%0), %%mm2 \n"
"cvtps2pi 8(%3,%0), %%mm3 \n"
"packssdw %%mm1, %%mm0 \n"
"packssdw %%mm3, %%mm2 \n"
"movq %%mm0, %%mm1 \n"
"punpcklwd %%mm2, %%mm0 \n"
"punpckhwd %%mm2, %%mm1 \n"
"movq %%mm0, (%1,%0)\n"
"movq %%mm1, 8(%1,%0)\n"
"add $16, %0 \n"
"js 1b \n"
"emms \n"
)
FLOAT_TO_INT16_INTERLEAVE(sse2,
"1: \n"
"cvtps2dq (%2,%0), %%xmm0 \n"
"cvtps2dq (%3,%0), %%xmm1 \n"
"packssdw %%xmm1, %%xmm0 \n"
"movhlps %%xmm0, %%xmm1 \n"
"punpcklwd %%xmm1, %%xmm0 \n"
"movdqa %%xmm0, (%1,%0) \n"
"add $16, %0 \n"
"js 1b \n"
)
static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){ static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){
if(channels==6) if(channels==6)
@ -235,7 +79,6 @@ static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long
float_to_int16_interleave_3dnow(dst, src, len, channels); float_to_int16_interleave_3dnow(dst, src, len, channels);
} }
#if HAVE_YASM
void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len); void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len);
void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len); void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len);
@ -269,34 +112,32 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
{ {
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX) {
#if HAVE_YASM #if HAVE_YASM
if (mm_flags & AV_CPU_FLAG_MMX) {
c->float_interleave = float_interleave_mmx; c->float_interleave = float_interleave_mmx;
#endif
if(mm_flags & AV_CPU_FLAG_3DNOW){ if (HAVE_AMD3DNOW && mm_flags & AV_CPU_FLAG_3DNOW) {
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16 = float_to_int16_3dnow; c->float_to_int16 = ff_float_to_int16_3dnow;
c->float_to_int16_interleave = float_to_int16_interleave_3dnow; c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
} }
} }
if(mm_flags & AV_CPU_FLAG_3DNOWEXT){ if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) {
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
c->float_to_int16_interleave = float_to_int16_interleave_3dn2; c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
} }
} }
if(mm_flags & AV_CPU_FLAG_SSE){ if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) {
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse; c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
c->float_to_int16 = float_to_int16_sse; c->float_to_int16 = ff_float_to_int16_sse;
c->float_to_int16_interleave = float_to_int16_interleave_sse; c->float_to_int16_interleave = float_to_int16_interleave_sse;
#if HAVE_YASM
c->float_interleave = float_interleave_sse; c->float_interleave = float_interleave_sse;
#endif
} }
if(mm_flags & AV_CPU_FLAG_SSE2){ if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE2) {
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2; c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
c->float_to_int16 = float_to_int16_sse2; c->float_to_int16 = ff_float_to_int16_sse2;
c->float_to_int16_interleave = float_to_int16_interleave_sse2; c->float_to_int16_interleave = float_to_int16_interleave_sse2;
} }
} }
#endif
} }

View File

@ -28,21 +28,20 @@ SECTION .text
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; biweight pred: ; biweight pred:
; ;
; void h264_biweight_16x16_sse2(uint8_t *dst, uint8_t *src, int stride, ; void h264_biweight_16_sse2(uint8_t *dst, uint8_t *src, int stride,
; int log2_denom, int weightd, int weights, ; int height, int log2_denom, int weightd,
; int offset); ; int weights, int offset);
; and ; and
; void h264_weight_16x16_sse2(uint8_t *dst, int stride, ; void h264_weight_16_sse2(uint8_t *dst, int stride, int height,
; int log2_denom, int weight, ; int log2_denom, int weight, int offset);
; int offset);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro WEIGHT_SETUP 0 %macro WEIGHT_SETUP 0
add r4, r4 add r5, r5
inc r4 inc r5
movd m3, r3d movd m3, r4d
movd m5, r4d movd m5, r5d
movd m6, r2d movd m6, r3d
pslld m5, m6 pslld m5, m6
psrld m5, 1 psrld m5, 1
%if mmsize == 16 %if mmsize == 16
@ -71,60 +70,41 @@ SECTION .text
packuswb m0, m1 packuswb m0, m1
%endmacro %endmacro
%macro WEIGHT_FUNC_DBL_MM 1 INIT_MMX
cglobal h264_weight_16x%1_mmx2, 5, 5, 0 cglobal h264_weight_16_mmx2, 6, 6, 0
WEIGHT_SETUP WEIGHT_SETUP
mov r2, %1
%if %1 == 16
.nextrow .nextrow
WEIGHT_OP 0, 4 WEIGHT_OP 0, 4
mova [r0 ], m0 mova [r0 ], m0
WEIGHT_OP 8, 12 WEIGHT_OP 8, 12
mova [r0+8], m0 mova [r0+8], m0
add r0, r1 add r0, r1
dec r2 dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_weight_16x16_mmx2.nextrow)
%endif
%endmacro
INIT_MMX %macro WEIGHT_FUNC_MM 3
WEIGHT_FUNC_DBL_MM 16 cglobal h264_weight_%1_%3, 6, 6, %2
WEIGHT_FUNC_DBL_MM 8
%macro WEIGHT_FUNC_MM 4
cglobal h264_weight_%1x%2_%4, 7, 7, %3
WEIGHT_SETUP WEIGHT_SETUP
mov r2, %2
%if %2 == 16
.nextrow .nextrow
WEIGHT_OP 0, mmsize/2 WEIGHT_OP 0, mmsize/2
mova [r0], m0 mova [r0], m0
add r0, r1 add r0, r1
dec r2 dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_weight_%1x16_%4.nextrow)
%endif
%endmacro %endmacro
INIT_MMX INIT_MMX
WEIGHT_FUNC_MM 8, 16, 0, mmx2 WEIGHT_FUNC_MM 8, 0, mmx2
WEIGHT_FUNC_MM 8, 8, 0, mmx2
WEIGHT_FUNC_MM 8, 4, 0, mmx2
INIT_XMM INIT_XMM
WEIGHT_FUNC_MM 16, 16, 8, sse2 WEIGHT_FUNC_MM 16, 8, sse2
WEIGHT_FUNC_MM 16, 8, 8, sse2
%macro WEIGHT_FUNC_HALF_MM 5 %macro WEIGHT_FUNC_HALF_MM 3
cglobal h264_weight_%1x%2_%5, 5, 5, %4 cglobal h264_weight_%1_%3, 6, 6, %2
WEIGHT_SETUP WEIGHT_SETUP
mov r2, %2/2 sar r2d, 1
lea r3, [r1*2] lea r3, [r1*2]
%if %2 == mmsize
.nextrow .nextrow
WEIGHT_OP 0, r1 WEIGHT_OP 0, r1
movh [r0], m0 movh [r0], m0
@ -135,31 +115,34 @@ cglobal h264_weight_%1x%2_%5, 5, 5, %4
movh [r0+r1], m0 movh [r0+r1], m0
%endif %endif
add r0, r3 add r0, r3
dec r2 dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_weight_%1x%3_%5.nextrow)
%endif
%endmacro %endmacro
INIT_MMX INIT_MMX
WEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2 WEIGHT_FUNC_HALF_MM 4, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2 WEIGHT_FUNC_HALF_MM 4, 0, mmx2
WEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2 WEIGHT_FUNC_HALF_MM 4, 0, mmx2
INIT_XMM INIT_XMM
WEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2 WEIGHT_FUNC_HALF_MM 8, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2 WEIGHT_FUNC_HALF_MM 8, 8, sse2
WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2 WEIGHT_FUNC_HALF_MM 8, 8, sse2
%macro BIWEIGHT_SETUP 0 %macro BIWEIGHT_SETUP 0
add r6, 1 %ifdef ARCH_X86_64
or r6, 1 %define off_regd r11d
add r3, 1 %else
movd m3, r4d %define off_regd r3d
movd m4, r5d %endif
movd m5, r6d mov off_regd, r7m
movd m6, r3d add off_regd, 1
or off_regd, 1
add r4, 1
movd m3, r5d
movd m4, r6d
movd m5, off_regd
movd m6, r4d
pslld m5, m6 pslld m5, m6
psrld m5, 1 psrld m5, 1
%if mmsize == 16 %if mmsize == 16
@ -195,11 +178,10 @@ WEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
packuswb m0, m1 packuswb m0, m1
%endmacro %endmacro
%macro BIWEIGHT_FUNC_DBL_MM 1 INIT_MMX
cglobal h264_biweight_16x%1_mmx2, 7, 7, 0 cglobal h264_biweight_16_mmx2, 7, 7, 0
BIWEIGHT_SETUP BIWEIGHT_SETUP
mov r3, %1 movifnidn r3d, r3m
%if %1 == 16
.nextrow .nextrow
BIWEIGHT_STEPA 0, 1, 0 BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, 4 BIWEIGHT_STEPA 1, 2, 4
@ -211,23 +193,14 @@ cglobal h264_biweight_16x%1_mmx2, 7, 7, 0
mova [r0+8], m0 mova [r0+8], m0
add r0, r2 add r0, r2
add r1, r2 add r1, r2
dec r3 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_16x16_mmx2.nextrow)
%endif
%endmacro
INIT_MMX %macro BIWEIGHT_FUNC_MM 3
BIWEIGHT_FUNC_DBL_MM 16 cglobal h264_biweight_%1_%3, 7, 7, %2
BIWEIGHT_FUNC_DBL_MM 8
%macro BIWEIGHT_FUNC_MM 4
cglobal h264_biweight_%1x%2_%4, 7, 7, %3
BIWEIGHT_SETUP BIWEIGHT_SETUP
mov r3, %2 movifnidn r3d, r3m
%if %2 == 16
.nextrow .nextrow
BIWEIGHT_STEPA 0, 1, 0 BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, mmsize/2 BIWEIGHT_STEPA 1, 2, mmsize/2
@ -235,28 +208,22 @@ cglobal h264_biweight_%1x%2_%4, 7, 7, %3
mova [r0], m0 mova [r0], m0
add r0, r2 add r0, r2
add r1, r2 add r1, r2
dec r3 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_%1x16_%4.nextrow)
%endif
%endmacro %endmacro
INIT_MMX INIT_MMX
BIWEIGHT_FUNC_MM 8, 16, 0, mmx2 BIWEIGHT_FUNC_MM 8, 0, mmx2
BIWEIGHT_FUNC_MM 8, 8, 0, mmx2
BIWEIGHT_FUNC_MM 8, 4, 0, mmx2
INIT_XMM INIT_XMM
BIWEIGHT_FUNC_MM 16, 16, 8, sse2 BIWEIGHT_FUNC_MM 16, 8, sse2
BIWEIGHT_FUNC_MM 16, 8, 8, sse2
%macro BIWEIGHT_FUNC_HALF_MM 5 %macro BIWEIGHT_FUNC_HALF_MM 3
cglobal h264_biweight_%1x%2_%5, 7, 7, %4 cglobal h264_biweight_%1_%3, 7, 7, %2
BIWEIGHT_SETUP BIWEIGHT_SETUP
mov r3, %2/2 movifnidn r3d, r3m
sar r3, 1
lea r4, [r2*2] lea r4, [r2*2]
%if %2 == mmsize
.nextrow .nextrow
BIWEIGHT_STEPA 0, 1, 0 BIWEIGHT_STEPA 0, 1, 0
BIWEIGHT_STEPA 1, 2, r2 BIWEIGHT_STEPA 1, 2, r2
@ -270,31 +237,30 @@ cglobal h264_biweight_%1x%2_%5, 7, 7, %4
%endif %endif
add r0, r4 add r0, r4
add r1, r4 add r1, r4
dec r3 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_%1x%3_%5.nextrow)
%endif
%endmacro %endmacro
INIT_MMX INIT_MMX
BIWEIGHT_FUNC_HALF_MM 4, 8, 8, 0, mmx2 BIWEIGHT_FUNC_HALF_MM 4, 0, mmx2
BIWEIGHT_FUNC_HALF_MM 4, 4, 8, 0, mmx2
BIWEIGHT_FUNC_HALF_MM 4, 2, 8, 0, mmx2
INIT_XMM INIT_XMM
BIWEIGHT_FUNC_HALF_MM 8, 16, 16, 8, sse2 BIWEIGHT_FUNC_HALF_MM 8, 8, sse2
BIWEIGHT_FUNC_HALF_MM 8, 8, 16, 8, sse2
BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
%macro BIWEIGHT_SSSE3_SETUP 0 %macro BIWEIGHT_SSSE3_SETUP 0
add r6, 1 %ifdef ARCH_X86_64
or r6, 1 %define off_regd r11d
add r3, 1 %else
movd m4, r4d %define off_regd r3d
movd m0, r5d %endif
movd m5, r6d mov off_regd, r7m
movd m6, r3d add off_regd, 1
or off_regd, 1
add r4, 1
movd m4, r5d
movd m0, r6d
movd m5, off_regd
movd m6, r4d
pslld m5, m6 pslld m5, m6
psrld m5, 1 psrld m5, 1
punpcklbw m4, m0 punpcklbw m4, m0
@ -314,12 +280,11 @@ BIWEIGHT_FUNC_HALF_MM 8, 4, 16, 8, sse2
packuswb m0, m2 packuswb m0, m2
%endmacro %endmacro
%macro BIWEIGHT_SSSE3_16 1 INIT_XMM
cglobal h264_biweight_16x%1_ssse3, 7, 7, 8 cglobal h264_biweight_16_ssse3, 7, 7, 8
BIWEIGHT_SSSE3_SETUP BIWEIGHT_SSSE3_SETUP
mov r3, %1 movifnidn r3d, r3m
%if %1 == 16
.nextrow .nextrow
movh m0, [r0] movh m0, [r0]
movh m2, [r0+8] movh m2, [r0+8]
@ -330,25 +295,17 @@ cglobal h264_biweight_16x%1_ssse3, 7, 7, 8
mova [r0], m0 mova [r0], m0
add r0, r2 add r0, r2
add r1, r2 add r1, r2
dec r3 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_16x16_ssse3.nextrow)
%endif
%endmacro
INIT_XMM INIT_XMM
BIWEIGHT_SSSE3_16 16 cglobal h264_biweight_8_ssse3, 7, 7, 8
BIWEIGHT_SSSE3_16 8
%macro BIWEIGHT_SSSE3_8 1
cglobal h264_biweight_8x%1_ssse3, 7, 7, 8
BIWEIGHT_SSSE3_SETUP BIWEIGHT_SSSE3_SETUP
mov r3, %1/2 movifnidn r3d, r3m
sar r3, 1
lea r4, [r2*2] lea r4, [r2*2]
%if %1 == 16
.nextrow .nextrow
movh m0, [r0] movh m0, [r0]
movh m1, [r1] movh m1, [r1]
@ -361,15 +318,6 @@ cglobal h264_biweight_8x%1_ssse3, 7, 7, 8
movhps [r0+r2], m0 movhps [r0+r2], m0
add r0, r4 add r0, r4
add r1, r4 add r1, r4
dec r3 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
%else
jmp mangle(ff_h264_biweight_8x16_ssse3.nextrow)
%endif
%endmacro
INIT_XMM
BIWEIGHT_SSSE3_8 16
BIWEIGHT_SSSE3_8 8
BIWEIGHT_SSSE3_8 4

View File

@ -36,33 +36,26 @@ cextern pw_1
SECTION .text SECTION .text
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_weight(uint8_t *dst, int stride, int log2_denom, ; void h264_weight(uint8_t *dst, int stride, int height, int log2_denom,
; int weight, int offset); ; int weight, int offset);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%ifdef ARCH_X86_32 %macro WEIGHT_PROLOGUE 0
DECLARE_REG_TMP 2
%else
DECLARE_REG_TMP 10
%endif
%macro WEIGHT_PROLOGUE 1
mov t0, %1
.prologue .prologue
PROLOGUE 0,5,8 PROLOGUE 0,6,8
movifnidn r0, r0mp movifnidn r0, r0mp
movifnidn r1d, r1m movifnidn r1d, r1m
movifnidn r3d, r3m
movifnidn r4d, r4m movifnidn r4d, r4m
movifnidn r5d, r5m
%endmacro %endmacro
%macro WEIGHT_SETUP 1 %macro WEIGHT_SETUP 1
mova m0, [pw_1] mova m0, [pw_1]
movd m2, r2m movd m2, r3m
pslld m0, m2 ; 1<<log2_denom pslld m0, m2 ; 1<<log2_denom
SPLATW m0, m0 SPLATW m0, m0
shl r4, 19 ; *8, move to upper half of dword shl r5, 19 ; *8, move to upper half of dword
lea r4, [r4+r3*2+0x10000] lea r5, [r5+r4*2+0x10000]
movd m3, r4d ; weight<<1 | 1+(offset<<(3)) movd m3, r5d ; weight<<1 | 1+(offset<<(3))
pshufd m3, m3, 0 pshufd m3, m3, 0
mova m4, [pw_pixel_max] mova m4, [pw_pixel_max]
paddw m2, [sq_1] ; log2_denom+1 paddw m2, [sq_1] ; log2_denom+1
@ -96,8 +89,8 @@ DECLARE_REG_TMP 10
%endmacro %endmacro
%macro WEIGHT_FUNC_DBL 1 %macro WEIGHT_FUNC_DBL 1
cglobal h264_weight_16x16_10_%1 cglobal h264_weight_16_10_%1
WEIGHT_PROLOGUE 16 WEIGHT_PROLOGUE
WEIGHT_SETUP %1 WEIGHT_SETUP %1
.nextrow .nextrow
WEIGHT_OP %1, 0 WEIGHT_OP %1, 0
@ -105,13 +98,9 @@ cglobal h264_weight_16x16_10_%1
WEIGHT_OP %1, 16 WEIGHT_OP %1, 16
mova [r0+16], m5 mova [r0+16], m5
add r0, r1 add r0, r1
dec t0 dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
cglobal h264_weight_16x8_10_%1
mov t0, 8
jmp mangle(ff_h264_weight_16x16_10_%1.prologue)
%endmacro %endmacro
INIT_XMM INIT_XMM
@ -120,24 +109,16 @@ WEIGHT_FUNC_DBL sse4
%macro WEIGHT_FUNC_MM 1 %macro WEIGHT_FUNC_MM 1
cglobal h264_weight_8x16_10_%1 cglobal h264_weight_8_10_%1
WEIGHT_PROLOGUE 16 WEIGHT_PROLOGUE
WEIGHT_SETUP %1 WEIGHT_SETUP %1
.nextrow .nextrow
WEIGHT_OP %1, 0 WEIGHT_OP %1, 0
mova [r0], m5 mova [r0], m5
add r0, r1 add r0, r1
dec t0 dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
cglobal h264_weight_8x8_10_%1
mov t0, 8
jmp mangle(ff_h264_weight_8x16_10_%1.prologue)
cglobal h264_weight_8x4_10_%1
mov t0, 4
jmp mangle(ff_h264_weight_8x16_10_%1.prologue)
%endmacro %endmacro
INIT_XMM INIT_XMM
@ -146,8 +127,9 @@ WEIGHT_FUNC_MM sse4
%macro WEIGHT_FUNC_HALF_MM 1 %macro WEIGHT_FUNC_HALF_MM 1
cglobal h264_weight_4x8_10_%1 cglobal h264_weight_4_10_%1
WEIGHT_PROLOGUE 4 WEIGHT_PROLOGUE
sar r2d, 1
WEIGHT_SETUP %1 WEIGHT_SETUP %1
lea r3, [r1*2] lea r3, [r1*2]
.nextrow .nextrow
@ -155,17 +137,9 @@ cglobal h264_weight_4x8_10_%1
movh [r0], m5 movh [r0], m5
movhps [r0+r1], m5 movhps [r0+r1], m5
add r0, r3 add r0, r3
dec t0 dec r2d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
cglobal h264_weight_4x4_10_%1
mov t0, 2
jmp mangle(ff_h264_weight_4x8_10_%1.prologue)
cglobal h264_weight_4x2_10_%1
mov t0, 1
jmp mangle(ff_h264_weight_4x8_10_%1.prologue)
%endmacro %endmacro
INIT_XMM INIT_XMM
@ -174,40 +148,40 @@ WEIGHT_FUNC_HALF_MM sse4
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int log2_denom, ; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int height,
; int weightd, int weights, int offset); ; int log2_denom, int weightd, int weights, int offset);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%ifdef ARCH_X86_32 %ifdef ARCH_X86_32
DECLARE_REG_TMP 2,3 DECLARE_REG_TMP 3
%else %else
DECLARE_REG_TMP 10,2 DECLARE_REG_TMP 10
%endif %endif
%macro BIWEIGHT_PROLOGUE 1 %macro BIWEIGHT_PROLOGUE 0
mov t0, %1
.prologue .prologue
PROLOGUE 0,7,8 PROLOGUE 0,7,8
movifnidn r0, r0mp movifnidn r0, r0mp
movifnidn r1, r1mp movifnidn r1, r1mp
movifnidn t1d, r2m movifnidn r2d, r2m
movifnidn r4d, r4m
movifnidn r5d, r5m movifnidn r5d, r5m
movifnidn r6d, r6m movifnidn r6d, r6m
movifnidn t0d, r7m
%endmacro %endmacro
%macro BIWEIGHT_SETUP 1 %macro BIWEIGHT_SETUP 1
lea r6, [r6*4+1] ; (offset<<2)+1 lea t0, [t0*4+1] ; (offset<<2)+1
or r6, 1 or t0, 1
shl r5, 16 shl r6, 16
or r4, r5 or r5, r6
movd m4, r4d ; weightd | weights movd m4, r5d ; weightd | weights
movd m5, r6d ; (offset+1)|1 movd m5, t0d ; (offset+1)|1
movd m6, r3m ; log2_denom movd m6, r4m ; log2_denom
pslld m5, m6 ; (((offset<<2)+1)|1)<<log2_denom pslld m5, m6 ; (((offset<<2)+1)|1)<<log2_denom
paddd m6, [sq_1] paddd m6, [sq_1]
pshufd m4, m4, 0 pshufd m4, m4, 0
pshufd m5, m5, 0 pshufd m5, m5, 0
mova m3, [pw_pixel_max] mova m3, [pw_pixel_max]
movifnidn r3d, r3m
%ifnidn %1, sse4 %ifnidn %1, sse4
pxor m7, m7 pxor m7, m7
%endif %endif
@ -243,23 +217,19 @@ DECLARE_REG_TMP 10,2
%endmacro %endmacro
%macro BIWEIGHT_FUNC_DBL 1 %macro BIWEIGHT_FUNC_DBL 1
cglobal h264_biweight_16x16_10_%1 cglobal h264_biweight_16_10_%1
BIWEIGHT_PROLOGUE 16 BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1 BIWEIGHT_SETUP %1
.nextrow .nextrow
BIWEIGHT %1, 0 BIWEIGHT %1, 0
mova [r0 ], m0 mova [r0 ], m0
BIWEIGHT %1, 16 BIWEIGHT %1, 16
mova [r0+16], m0 mova [r0+16], m0
add r0, t1 add r0, r2
add r1, t1 add r1, r2
dec t0 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
cglobal h264_biweight_16x8_10_%1
mov t0, 8
jmp mangle(ff_h264_biweight_16x16_10_%1.prologue)
%endmacro %endmacro
INIT_XMM INIT_XMM
@ -267,25 +237,17 @@ BIWEIGHT_FUNC_DBL sse2
BIWEIGHT_FUNC_DBL sse4 BIWEIGHT_FUNC_DBL sse4
%macro BIWEIGHT_FUNC 1 %macro BIWEIGHT_FUNC 1
cglobal h264_biweight_8x16_10_%1 cglobal h264_biweight_8_10_%1
BIWEIGHT_PROLOGUE 16 BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1 BIWEIGHT_SETUP %1
.nextrow .nextrow
BIWEIGHT %1, 0 BIWEIGHT %1, 0
mova [r0], m0 mova [r0], m0
add r0, t1 add r0, r2
add r1, t1 add r1, r2
dec t0 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
cglobal h264_biweight_8x8_10_%1
mov t0, 8
jmp mangle(ff_h264_biweight_8x16_10_%1.prologue)
cglobal h264_biweight_8x4_10_%1
mov t0, 4
jmp mangle(ff_h264_biweight_8x16_10_%1.prologue)
%endmacro %endmacro
INIT_XMM INIT_XMM
@ -293,27 +255,20 @@ BIWEIGHT_FUNC sse2
BIWEIGHT_FUNC sse4 BIWEIGHT_FUNC sse4
%macro BIWEIGHT_FUNC_HALF 1 %macro BIWEIGHT_FUNC_HALF 1
cglobal h264_biweight_4x8_10_%1 cglobal h264_biweight_4_10_%1
BIWEIGHT_PROLOGUE 4 BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP %1 BIWEIGHT_SETUP %1
lea r4, [t1*2] sar r3d, 1
lea r4, [r2*2]
.nextrow .nextrow
BIWEIGHT %1, 0, t1 BIWEIGHT %1, 0, r2
movh [r0 ], m0 movh [r0 ], m0
movhps [r0+t1], m0 movhps [r0+r2], m0
add r0, r4 add r0, r4
add r1, r4 add r1, r4
dec t0 dec r3d
jnz .nextrow jnz .nextrow
REP_RET REP_RET
cglobal h264_biweight_4x4_10_%1
mov t0, 2
jmp mangle(ff_h264_biweight_4x8_10_%1.prologue)
cglobal h264_biweight_4x2_10_%1
mov t0, 1
jmp mangle(ff_h264_biweight_4x8_10_%1.prologue)
%endmacro %endmacro
INIT_XMM INIT_XMM

View File

@ -298,63 +298,53 @@ LF_IFUNC(v, luma_intra, 10, mmxext)
/***********************************/ /***********************************/
/* weighted prediction */ /* weighted prediction */
#define H264_WEIGHT(W, H, OPT) \ #define H264_WEIGHT(W, OPT) \
void ff_h264_weight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \ void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, \
int stride, int log2_denom, int weight, int offset); int stride, int height, int log2_denom, int weight, int offset);
#define H264_BIWEIGHT(W, H, OPT) \ #define H264_BIWEIGHT(W, OPT) \
void ff_h264_biweight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \ void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, \
uint8_t *src, int stride, int log2_denom, int weightd, \ uint8_t *src, int stride, int height, int log2_denom, int weightd, \
int weights, int offset); int weights, int offset);
#define H264_BIWEIGHT_MMX(W,H) \ #define H264_BIWEIGHT_MMX(W) \
H264_WEIGHT (W, H, mmx2) \ H264_WEIGHT (W, mmx2) \
H264_BIWEIGHT(W, H, mmx2) H264_BIWEIGHT(W, mmx2)
#define H264_BIWEIGHT_MMX_SSE(W,H) \ #define H264_BIWEIGHT_MMX_SSE(W) \
H264_BIWEIGHT_MMX(W, H) \ H264_BIWEIGHT_MMX(W) \
H264_WEIGHT (W, H, sse2) \ H264_WEIGHT (W, sse2) \
H264_BIWEIGHT (W, H, sse2) \ H264_BIWEIGHT (W, sse2) \
H264_BIWEIGHT (W, H, ssse3) H264_BIWEIGHT (W, ssse3)
H264_BIWEIGHT_MMX_SSE(16, 16) H264_BIWEIGHT_MMX_SSE(16)
H264_BIWEIGHT_MMX_SSE(16, 8) H264_BIWEIGHT_MMX_SSE( 8)
H264_BIWEIGHT_MMX_SSE( 8, 16) H264_BIWEIGHT_MMX ( 4)
H264_BIWEIGHT_MMX_SSE( 8, 8)
H264_BIWEIGHT_MMX_SSE( 8, 4)
H264_BIWEIGHT_MMX ( 4, 8)
H264_BIWEIGHT_MMX ( 4, 4)
H264_BIWEIGHT_MMX ( 4, 2)
#define H264_WEIGHT_10(W, H, DEPTH, OPT) \ #define H264_WEIGHT_10(W, DEPTH, OPT) \
void ff_h264_weight_ ## W ## x ## H ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \ void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \
int stride, int log2_denom, int weight, int offset); int stride, int height, int log2_denom, int weight, int offset);
#define H264_BIWEIGHT_10(W, H, DEPTH, OPT) \ #define H264_BIWEIGHT_10(W, DEPTH, OPT) \
void ff_h264_biweight_ ## W ## x ## H ## _ ## DEPTH ## _ ## OPT \ void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT \
(uint8_t *dst, uint8_t *src, int stride, int log2_denom, \ (uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, \
int weightd, int weights, int offset); int weightd, int weights, int offset);
#define H264_BIWEIGHT_10_SSE(W, H, DEPTH) \ #define H264_BIWEIGHT_10_SSE(W, DEPTH) \
H264_WEIGHT_10 (W, H, DEPTH, sse2) \ H264_WEIGHT_10 (W, DEPTH, sse2) \
H264_WEIGHT_10 (W, H, DEPTH, sse4) \ H264_WEIGHT_10 (W, DEPTH, sse4) \
H264_BIWEIGHT_10(W, H, DEPTH, sse2) \ H264_BIWEIGHT_10(W, DEPTH, sse2) \
H264_BIWEIGHT_10(W, H, DEPTH, sse4) H264_BIWEIGHT_10(W, DEPTH, sse4)
H264_BIWEIGHT_10_SSE(16, 16, 10) H264_BIWEIGHT_10_SSE(16, 10)
H264_BIWEIGHT_10_SSE(16, 8, 10) H264_BIWEIGHT_10_SSE( 8, 10)
H264_BIWEIGHT_10_SSE( 8, 16, 10) H264_BIWEIGHT_10_SSE( 4, 10)
H264_BIWEIGHT_10_SSE( 8, 8, 10)
H264_BIWEIGHT_10_SSE( 8, 4, 10)
H264_BIWEIGHT_10_SSE( 4, 8, 10)
H264_BIWEIGHT_10_SSE( 4, 4, 10)
H264_BIWEIGHT_10_SSE( 4, 2, 10)
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
{ {
int mm_flags = av_get_cpu_flags(); int mm_flags = av_get_cpu_flags();
if (mm_flags & AV_CPU_FLAG_MMX2) { if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMX2) {
c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2; c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
} }
@ -394,23 +384,13 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext;
#endif #endif
c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; c->weight_h264_pixels_tab[0]= ff_h264_weight_16_mmx2;
c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; c->weight_h264_pixels_tab[1]= ff_h264_weight_8_mmx2;
c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2; c->weight_h264_pixels_tab[2]= ff_h264_weight_4_mmx2;
c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2; c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_mmx2;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2; c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_mmx2;
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2; c->biweight_h264_pixels_tab[2]= ff_h264_biweight_4_mmx2;
c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
if (mm_flags&AV_CPU_FLAG_SSE2) { if (mm_flags&AV_CPU_FLAG_SSE2) {
c->h264_idct8_add = ff_h264_idct8_add_8_sse2; c->h264_idct8_add = ff_h264_idct8_add_8_sse2;
@ -422,17 +402,11 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2;
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2; c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2;
c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_sse2; c->weight_h264_pixels_tab[0]= ff_h264_weight_16_sse2;
c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_sse2; c->weight_h264_pixels_tab[1]= ff_h264_weight_8_sse2;
c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_sse2;
c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_sse2;
c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_sse2;
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2; c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_sse2;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_sse2; c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_sse2;
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_sse2;
c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2;
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2;
#if HAVE_ALIGNED_STACK #if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2; c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2;
@ -442,11 +416,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
#endif #endif
} }
if (mm_flags&AV_CPU_FLAG_SSSE3) { if (mm_flags&AV_CPU_FLAG_SSSE3) {
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3; c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_ssse3;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_ssse3; c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_ssse3;
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_ssse3;
c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3;
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_ssse3;
} }
if (HAVE_AVX && mm_flags&AV_CPU_FLAG_AVX) { if (HAVE_AVX && mm_flags&AV_CPU_FLAG_AVX) {
#if HAVE_ALIGNED_STACK #if HAVE_ALIGNED_STACK
@ -485,23 +456,13 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2; c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2;
#endif #endif
c->weight_h264_pixels_tab[0] = ff_h264_weight_16x16_10_sse2; c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
c->weight_h264_pixels_tab[1] = ff_h264_weight_16x8_10_sse2; c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
c->weight_h264_pixels_tab[2] = ff_h264_weight_8x16_10_sse2; c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;
c->weight_h264_pixels_tab[3] = ff_h264_weight_8x8_10_sse2;
c->weight_h264_pixels_tab[4] = ff_h264_weight_8x4_10_sse2;
c->weight_h264_pixels_tab[5] = ff_h264_weight_4x8_10_sse2;
c->weight_h264_pixels_tab[6] = ff_h264_weight_4x4_10_sse2;
c->weight_h264_pixels_tab[7] = ff_h264_weight_4x2_10_sse2;
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16x16_10_sse2; c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_16x8_10_sse2; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_8x16_10_sse2; c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;
c->biweight_h264_pixels_tab[3] = ff_h264_biweight_8x8_10_sse2;
c->biweight_h264_pixels_tab[4] = ff_h264_biweight_8x4_10_sse2;
c->biweight_h264_pixels_tab[5] = ff_h264_biweight_4x8_10_sse2;
c->biweight_h264_pixels_tab[6] = ff_h264_biweight_4x4_10_sse2;
c->biweight_h264_pixels_tab[7] = ff_h264_biweight_4x2_10_sse2;
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2; c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2;
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2; c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2;
@ -513,23 +474,13 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
#endif #endif
} }
if (mm_flags&AV_CPU_FLAG_SSE4) { if (mm_flags&AV_CPU_FLAG_SSE4) {
c->weight_h264_pixels_tab[0] = ff_h264_weight_16x16_10_sse4; c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
c->weight_h264_pixels_tab[1] = ff_h264_weight_16x8_10_sse4; c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
c->weight_h264_pixels_tab[2] = ff_h264_weight_8x16_10_sse4; c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
c->weight_h264_pixels_tab[3] = ff_h264_weight_8x8_10_sse4;
c->weight_h264_pixels_tab[4] = ff_h264_weight_8x4_10_sse4;
c->weight_h264_pixels_tab[5] = ff_h264_weight_4x8_10_sse4;
c->weight_h264_pixels_tab[6] = ff_h264_weight_4x4_10_sse4;
c->weight_h264_pixels_tab[7] = ff_h264_weight_4x2_10_sse4;
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16x16_10_sse4; c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_16x8_10_sse4; c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_8x16_10_sse4; c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
c->biweight_h264_pixels_tab[3] = ff_h264_biweight_8x8_10_sse4;
c->biweight_h264_pixels_tab[4] = ff_h264_biweight_8x4_10_sse4;
c->biweight_h264_pixels_tab[5] = ff_h264_biweight_4x8_10_sse4;
c->biweight_h264_pixels_tab[6] = ff_h264_biweight_4x4_10_sse4;
c->biweight_h264_pixels_tab[7] = ff_h264_biweight_4x2_10_sse4;
} }
#if HAVE_AVX #if HAVE_AVX
if (mm_flags&AV_CPU_FLAG_AVX) { if (mm_flags&AV_CPU_FLAG_AVX) {

View File

@ -10,7 +10,7 @@ OBJS = alldevices.o avdevice.o
# input/output devices # input/output devices
OBJS-$(CONFIG_ALSA_INDEV) += alsa-audio-common.o \ OBJS-$(CONFIG_ALSA_INDEV) += alsa-audio-common.o \
alsa-audio-dec.o alsa-audio-dec.o timefilter.o
OBJS-$(CONFIG_ALSA_OUTDEV) += alsa-audio-common.o \ OBJS-$(CONFIG_ALSA_OUTDEV) += alsa-audio-common.o \
alsa-audio-enc.o alsa-audio-enc.o
OBJS-$(CONFIG_BKTR_INDEV) += bktr.o OBJS-$(CONFIG_BKTR_INDEV) += bktr.o
@ -19,7 +19,7 @@ OBJS-$(CONFIG_DSHOW_INDEV) += dshow.o dshow_enummediatypes.o \
dshow_pin.o dshow_common.o dshow_pin.o dshow_common.o
OBJS-$(CONFIG_DV1394_INDEV) += dv1394.o OBJS-$(CONFIG_DV1394_INDEV) += dv1394.o
OBJS-$(CONFIG_FBDEV_INDEV) += fbdev.o OBJS-$(CONFIG_FBDEV_INDEV) += fbdev.o
OBJS-$(CONFIG_JACK_INDEV) += jack_audio.o OBJS-$(CONFIG_JACK_INDEV) += jack_audio.o timefilter.o
OBJS-$(CONFIG_LAVFI_INDEV) += lavfi.o OBJS-$(CONFIG_LAVFI_INDEV) += lavfi.o
OBJS-$(CONFIG_OPENAL_INDEV) += openal-dec.o OBJS-$(CONFIG_OPENAL_INDEV) += openal-dec.o
OBJS-$(CONFIG_OSS_INDEV) += oss_audio.o OBJS-$(CONFIG_OSS_INDEV) += oss_audio.o
@ -39,4 +39,6 @@ OBJS-$(CONFIG_LIBDC1394_INDEV) += libdc1394.o
SKIPHEADERS-$(HAVE_ALSA_ASOUNDLIB_H) += alsa-audio.h SKIPHEADERS-$(HAVE_ALSA_ASOUNDLIB_H) += alsa-audio.h
SKIPHEADERS-$(HAVE_SNDIO_H) += sndio_common.h SKIPHEADERS-$(HAVE_SNDIO_H) += sndio_common.h
TESTPROGS = timefilter
include $(SRC_PATH)/subdir.mak include $(SRC_PATH)/subdir.mak

View File

@ -33,7 +33,7 @@
#include <alsa/asoundlib.h> #include <alsa/asoundlib.h>
#include "config.h" #include "config.h"
#include "libavutil/log.h" #include "libavutil/log.h"
#include "libavformat/timefilter.h" #include "timefilter.h"
#include "avdevice.h" #include "avdevice.h"
/* XXX: we make the assumption that the soundcard accepts this format */ /* XXX: we make the assumption that the soundcard accepts this format */

View File

@ -28,7 +28,8 @@
#include "libavutil/fifo.h" #include "libavutil/fifo.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavcodec/avcodec.h" #include "libavcodec/avcodec.h"
#include "libavformat/timefilter.h" #include "libavformat/avformat.h"
#include "timefilter.h"
#include "avdevice.h" #include "avdevice.h"
/** /**

View File

@ -24,8 +24,8 @@
#include "config.h" #include "config.h"
#include "avformat.h"
#include "timefilter.h" #include "timefilter.h"
#include "libavutil/mem.h"
struct TimeFilter { struct TimeFilter {
/// Delay Locked Loop data. These variables refer to mathematical /// Delay Locked Loop data. These variables refer to mathematical

View File

@ -22,8 +22,8 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#ifndef AVFORMAT_TIMEFILTER_H #ifndef AVDEVICE_TIMEFILTER_H
#define AVFORMAT_TIMEFILTER_H #define AVDEVICE_TIMEFILTER_H
/** /**
* Opaque type representing a time filter state * Opaque type representing a time filter state
@ -94,4 +94,4 @@ void ff_timefilter_reset(TimeFilter *);
*/ */
void ff_timefilter_destroy(TimeFilter *); void ff_timefilter_destroy(TimeFilter *);
#endif /* AVFORMAT_TIMEFILTER_H */ #endif /* AVDEVICE_TIMEFILTER_H */

View File

@ -354,11 +354,8 @@ OBJS-$(CONFIG_RTP_PROTOCOL) += rtpproto.o
OBJS-$(CONFIG_TCP_PROTOCOL) += tcp.o OBJS-$(CONFIG_TCP_PROTOCOL) += tcp.o
OBJS-$(CONFIG_UDP_PROTOCOL) += udp.o OBJS-$(CONFIG_UDP_PROTOCOL) += udp.o
# libavdevice dependencies
OBJS-$(CONFIG_ALSA_INDEV) += timefilter.o
OBJS-$(CONFIG_JACK_INDEV) += timefilter.o
TESTPROGS = seek timefilter TESTPROGS = seek
TOOLS = pktdumper probetest TOOLS = pktdumper probetest
include $(SRC_PATH)/subdir.mak include $(SRC_PATH)/subdir.mak

View File

@ -228,8 +228,9 @@ static int amf_parse_object(AVFormatContext *s, AVStream *astream, AVStream *vst
case AMF_DATA_TYPE_OBJECT: { case AMF_DATA_TYPE_OBJECT: {
unsigned int keylen; unsigned int keylen;
if (vstream && ioc->seekable && key && !strcmp(KEYFRAMES_TAG, key) && depth == 1) if ((vstream || astream) && ioc->seekable && key && !strcmp(KEYFRAMES_TAG, key) && depth == 1)
if (parse_keyframes_index(s, ioc, vstream, max_pos) < 0) if (parse_keyframes_index(s, ioc, vstream ? vstream : astream,
max_pos) < 0)
av_log(s, AV_LOG_ERROR, "Keyframe index parsing failed\n"); av_log(s, AV_LOG_ERROR, "Keyframe index parsing failed\n");
while(avio_tell(ioc) < max_pos - 2 && (keylen = avio_rb16(ioc))) { while(avio_tell(ioc) < max_pos - 2 && (keylen = avio_rb16(ioc))) {

View File

@ -60,10 +60,13 @@ typedef struct FLVContext {
int64_t duration_offset; int64_t duration_offset;
int64_t filesize_offset; int64_t filesize_offset;
int64_t duration; int64_t duration;
int delay; ///< first dts delay for AVC
int64_t last_ts;
} FLVContext; } FLVContext;
typedef struct FLVStreamContext {
int delay; ///< first dts delay for each stream (needed for AVC & Speex)
int64_t last_ts; ///< last timestamp for each stream
} FLVStreamContext;
static int get_audio_flags(AVCodecContext *enc){ static int get_audio_flags(AVCodecContext *enc){
int flags = (enc->bits_per_coded_sample == 16) ? FLV_SAMPLESSIZE_16BIT : FLV_SAMPLESSIZE_8BIT; int flags = (enc->bits_per_coded_sample == 16) ? FLV_SAMPLESSIZE_16BIT : FLV_SAMPLESSIZE_8BIT;
@ -182,6 +185,7 @@ static int flv_write_header(AVFormatContext *s)
for(i=0; i<s->nb_streams; i++){ for(i=0; i<s->nb_streams; i++){
AVCodecContext *enc = s->streams[i]->codec; AVCodecContext *enc = s->streams[i]->codec;
FLVStreamContext *sc;
if (enc->codec_type == AVMEDIA_TYPE_VIDEO) { if (enc->codec_type == AVMEDIA_TYPE_VIDEO) {
if (s->streams[i]->r_frame_rate.den && s->streams[i]->r_frame_rate.num) { if (s->streams[i]->r_frame_rate.den && s->streams[i]->r_frame_rate.num) {
framerate = av_q2d(s->streams[i]->r_frame_rate); framerate = av_q2d(s->streams[i]->r_frame_rate);
@ -199,6 +203,12 @@ static int flv_write_header(AVFormatContext *s)
return -1; return -1;
} }
av_set_pts_info(s->streams[i], 32, 1, 1000); /* 32 bit pts in ms */ av_set_pts_info(s->streams[i], 32, 1, 1000); /* 32 bit pts in ms */
sc = av_mallocz(sizeof(FLVStreamContext));
if (!sc)
return AVERROR(ENOMEM);
s->streams[i]->priv_data = sc;
sc->last_ts = -1;
} }
avio_write(pb, "FLV", 3); avio_write(pb, "FLV", 3);
avio_w8(pb,1); avio_w8(pb,1);
@ -218,8 +228,6 @@ static int flv_write_header(AVFormatContext *s)
} }
} }
flv->last_ts = -1;
/* write meta_tag */ /* write meta_tag */
avio_w8(pb, 18); // tag type META avio_w8(pb, 18); // tag type META
metadata_size_pos= avio_tell(pb); metadata_size_pos= avio_tell(pb);
@ -361,9 +369,10 @@ static int flv_write_trailer(AVFormatContext *s)
/* Add EOS tag */ /* Add EOS tag */
for (i = 0; i < s->nb_streams; i++) { for (i = 0; i < s->nb_streams; i++) {
AVCodecContext *enc = s->streams[i]->codec; AVCodecContext *enc = s->streams[i]->codec;
FLVStreamContext *sc = s->streams[i]->priv_data;
if (enc->codec_type == AVMEDIA_TYPE_VIDEO && if (enc->codec_type == AVMEDIA_TYPE_VIDEO &&
(enc->codec_id == CODEC_ID_H264 || enc->codec_id == CODEC_ID_MPEG4)) { (enc->codec_id == CODEC_ID_H264 || enc->codec_id == CODEC_ID_MPEG4)) {
put_avc_eos_tag(pb, flv->last_ts); put_avc_eos_tag(pb, sc->last_ts);
} }
} }
@ -384,6 +393,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
AVIOContext *pb = s->pb; AVIOContext *pb = s->pb;
AVCodecContext *enc = s->streams[pkt->stream_index]->codec; AVCodecContext *enc = s->streams[pkt->stream_index]->codec;
FLVContext *flv = s->priv_data; FLVContext *flv = s->priv_data;
FLVStreamContext *sc = s->streams[pkt->stream_index]->priv_data;
unsigned ts; unsigned ts;
int size= pkt->size; int size= pkt->size;
uint8_t *data= NULL; uint8_t *data= NULL;
@ -434,20 +444,20 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
av_log(s, AV_LOG_ERROR, "malformated aac bitstream, use -absf aac_adtstoasc\n"); av_log(s, AV_LOG_ERROR, "malformated aac bitstream, use -absf aac_adtstoasc\n");
return -1; return -1;
} }
if (!flv->delay && pkt->dts < 0) if (!sc->delay && pkt->dts < 0)
flv->delay = -pkt->dts; sc->delay = -pkt->dts;
ts = pkt->dts + flv->delay; // add delay to force positive dts ts = pkt->dts + sc->delay; // add delay to force positive dts
/* check Speex packet duration */ /* check Speex packet duration */
if (enc->codec_id == CODEC_ID_SPEEX && ts - flv->last_ts > 160) { if (enc->codec_id == CODEC_ID_SPEEX && ts - sc->last_ts > 160) {
av_log(s, AV_LOG_WARNING, "Warning: Speex stream has more than " av_log(s, AV_LOG_WARNING, "Warning: Speex stream has more than "
"8 frames per packet. Adobe Flash " "8 frames per packet. Adobe Flash "
"Player cannot handle this!\n"); "Player cannot handle this!\n");
} }
if (flv->last_ts < ts) if (sc->last_ts < ts)
flv->last_ts = ts; sc->last_ts = ts;
avio_wb24(pb,size + flags_size); avio_wb24(pb,size + flags_size);
avio_wb24(pb,ts); avio_wb24(pb,ts);
@ -471,7 +481,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
avio_write(pb, data ? data : pkt->data, size); avio_write(pb, data ? data : pkt->data, size);
avio_wb32(pb,size+flags_size+11); // previous tag size avio_wb32(pb,size+flags_size+11); // previous tag size
flv->duration = FFMAX(flv->duration, pkt->pts + flv->delay + pkt->duration); flv->duration = FFMAX(flv->duration, pkt->pts + sc->delay + pkt->duration);
avio_flush(pb); avio_flush(pb);

View File

@ -35,6 +35,7 @@
#include "riff.h" #include "riff.h"
#include "isom.h" #include "isom.h"
#include "libavcodec/get_bits.h" #include "libavcodec/get_bits.h"
#include "id3v1.h"
#if CONFIG_ZLIB #if CONFIG_ZLIB
#include <zlib.h> #include <zlib.h>
@ -99,31 +100,48 @@ static int mov_metadata_track_or_disc_number(MOVContext *c, AVIOContext *pb,
return 0; return 0;
} }
static int mov_metadata_int8(MOVContext *c, AVIOContext *pb, static int mov_metadata_int8_bypass_padding(MOVContext *c, AVIOContext *pb,
unsigned len, const char *key) unsigned len, const char *key)
{ {
char buf[16]; char buf[16];
/* bypass padding bytes */ /* bypass padding bytes */
avio_r8(pb); avio_r8(pb);
avio_r8(pb); avio_r8(pb);
avio_r8(pb); avio_r8(pb);
snprintf(buf, sizeof(buf), "%hu", avio_r8(pb)); snprintf(buf, sizeof(buf), "%hu", avio_r8(pb));
av_dict_set(&c->fc->metadata, key, buf, 0); av_dict_set(&c->fc->metadata, key, buf, 0);
return 0; return 0;
} }
static int mov_metadata_stik(MOVContext *c, AVIOContext *pb, static int mov_metadata_int8_no_padding(MOVContext *c, AVIOContext *pb,
unsigned len, const char *key)
{
char buf[16];
snprintf(buf, sizeof(buf), "%hu", avio_r8(pb));
av_dict_set(&c->fc->metadata, key, buf, 0);
return 0;
}
static int mov_metadata_gnre(MOVContext *c, AVIOContext *pb,
unsigned len, const char *key) unsigned len, const char *key)
{ {
char buf[16]; short genre;
char buf[20];
snprintf(buf, sizeof(buf), "%hu", avio_r8(pb)); avio_r8(pb); // unknown
av_dict_set(&c->fc->metadata, key, buf, 0);
return 0; genre = avio_r8(pb);
if (genre < 1 || genre > ID3v1_GENRE_MAX)
return 0;
snprintf(buf, sizeof(buf), "%s", ff_id3v1_genre_str[genre-1]);
av_dict_set(&c->fc->metadata, key, buf, 0);
return 0;
} }
static const uint32_t mac_to_unicode[128] = { static const uint32_t mac_to_unicode[128] = {
@ -189,6 +207,8 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
case MKTAG(0xa9,'a','l','b'): key = "album"; break; case MKTAG(0xa9,'a','l','b'): key = "album"; break;
case MKTAG(0xa9,'d','a','y'): key = "date"; break; case MKTAG(0xa9,'d','a','y'): key = "date"; break;
case MKTAG(0xa9,'g','e','n'): key = "genre"; break; case MKTAG(0xa9,'g','e','n'): key = "genre"; break;
case MKTAG( 'g','n','r','e'): key = "genre";
parse = mov_metadata_gnre; break;
case MKTAG(0xa9,'t','o','o'): case MKTAG(0xa9,'t','o','o'):
case MKTAG(0xa9,'s','w','r'): key = "encoder"; break; case MKTAG(0xa9,'s','w','r'): key = "encoder"; break;
case MKTAG(0xa9,'e','n','c'): key = "encoder"; break; case MKTAG(0xa9,'e','n','c'): key = "encoder"; break;
@ -202,11 +222,15 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
case MKTAG( 'd','i','s','k'): key = "disc"; case MKTAG( 'd','i','s','k'): key = "disc";
parse = mov_metadata_track_or_disc_number; break; parse = mov_metadata_track_or_disc_number; break;
case MKTAG( 't','v','e','s'): key = "episode_sort"; case MKTAG( 't','v','e','s'): key = "episode_sort";
parse = mov_metadata_int8; break; parse = mov_metadata_int8_bypass_padding; break;
case MKTAG( 't','v','s','n'): key = "season_number"; case MKTAG( 't','v','s','n'): key = "season_number";
parse = mov_metadata_int8; break; parse = mov_metadata_int8_bypass_padding; break;
case MKTAG( 's','t','i','k'): key = "media_type"; case MKTAG( 's','t','i','k'): key = "media_type";
parse = mov_metadata_stik; break; parse = mov_metadata_int8_no_padding; break;
case MKTAG( 'h','d','v','d'): key = "hd_video";
parse = mov_metadata_int8_no_padding; break;
case MKTAG( 'p','g','a','p'): key = "gapless_playback";
parse = mov_metadata_int8_no_padding; break;
} }
if (c->itunes_metadata && atom.size > 8) { if (c->itunes_metadata && atom.size > 8) {

View File

@ -859,6 +859,29 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = {
}, },
.flags = PIX_FMT_BE, .flags = PIX_FMT_BE,
}, },
[PIX_FMT_YUV422P9LE] = {
.name = "yuv422p9le",
.nb_components= 3,
.log2_chroma_w= 1,
.log2_chroma_h= 0,
.comp = {
{0,1,1,0,8}, /* Y */
{1,1,1,0,8}, /* U */
{2,1,1,0,8}, /* V */
},
},
[PIX_FMT_YUV422P9BE] = {
.name = "yuv422p9be",
.nb_components= 3,
.log2_chroma_w= 1,
.log2_chroma_h= 0,
.comp = {
{0,1,1,0,8}, /* Y */
{1,1,1,0,8}, /* U */
{2,1,1,0,8}, /* V */
},
.flags = PIX_FMT_BE,
},
[PIX_FMT_YUV422P10LE] = { [PIX_FMT_YUV422P10LE] = {
.name = "yuv422p10le", .name = "yuv422p10le",
.nb_components= 3, .nb_components= 3,

View File

@ -149,12 +149,15 @@ enum PixelFormat {
PIX_FMT_YUV444P9LE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian PIX_FMT_YUV444P9LE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
PIX_FMT_YUV444P10BE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian PIX_FMT_YUV444P10BE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
PIX_FMT_YUV444P10LE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian PIX_FMT_YUV444P10LE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
PIX_FMT_YUV422P9BE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
PIX_FMT_YUV422P9LE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
PIX_FMT_RGBA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian PIX_FMT_RGBA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
PIX_FMT_RGBA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian PIX_FMT_RGBA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
PIX_FMT_BGRA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian PIX_FMT_BGRA64BE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
PIX_FMT_BGRA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian PIX_FMT_BGRA64LE, ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
PIX_FMT_GBR24P, ///< planar GBR, 24bpp, 8G, 8B, 8R. PIX_FMT_GBR24P, ///< planar GBR, 24bpp, 8G, 8B, 8R.
PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
}; };
@ -182,6 +185,7 @@ enum PixelFormat {
#define PIX_FMT_BGR444 PIX_FMT_NE(BGR444BE, BGR444LE) #define PIX_FMT_BGR444 PIX_FMT_NE(BGR444BE, BGR444LE)
#define PIX_FMT_YUV420P9 PIX_FMT_NE(YUV420P9BE , YUV420P9LE) #define PIX_FMT_YUV420P9 PIX_FMT_NE(YUV420P9BE , YUV420P9LE)
#define PIX_FMT_YUV422P9 PIX_FMT_NE(YUV422P9BE , YUV422P9LE)
#define PIX_FMT_YUV444P9 PIX_FMT_NE(YUV444P9BE , YUV444P9LE) #define PIX_FMT_YUV444P9 PIX_FMT_NE(YUV444P9BE , YUV444P9LE)
#define PIX_FMT_YUV420P10 PIX_FMT_NE(YUV420P10BE, YUV420P10LE) #define PIX_FMT_YUV420P10 PIX_FMT_NE(YUV420P10BE, YUV420P10LE)
#define PIX_FMT_YUV422P10 PIX_FMT_NE(YUV422P10BE, YUV422P10LE) #define PIX_FMT_YUV422P10 PIX_FMT_NE(YUV422P10BE, YUV422P10LE)

View File

@ -536,6 +536,18 @@
%endif %endif
%endmacro %endmacro
%macro SPLATD_MMX 1
punpckldq %1, %1
%endmacro
%macro SPLATD_SSE 1
shufps %1, %1, 0
%endmacro
%macro SPLATD_SSE2 1
pshufd %1, %1, 0
%endmacro
%macro CLIPW 3 ;(dst, min, max) %macro CLIPW 3 ;(dst, min, max)
pmaxsw %1, %2 pmaxsw %1, %2
pminsw %1, %3 pminsw %1, %3

View File

@ -2843,6 +2843,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break; case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
case PIX_FMT_YUV444P9LE: case PIX_FMT_YUV444P9LE:
case PIX_FMT_YUV422P9LE:
case PIX_FMT_YUV420P9LE: case PIX_FMT_YUV420P9LE:
case PIX_FMT_YUV422P10LE: case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV420P10LE: case PIX_FMT_YUV420P10LE:
@ -2852,6 +2853,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break; case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
#else #else
case PIX_FMT_YUV444P9BE: case PIX_FMT_YUV444P9BE:
case PIX_FMT_YUV422P9BE:
case PIX_FMT_YUV420P9BE: case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV444P10BE: case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE: case PIX_FMT_YUV422P10BE:
@ -2912,6 +2914,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
switch (srcFormat) { switch (srcFormat) {
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
case PIX_FMT_YUV444P9LE: case PIX_FMT_YUV444P9LE:
case PIX_FMT_YUV422P9LE:
case PIX_FMT_YUV420P9LE: case PIX_FMT_YUV420P9LE:
case PIX_FMT_YUV422P10LE: case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV420P10LE: case PIX_FMT_YUV420P10LE:
@ -2922,6 +2925,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break; case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
#else #else
case PIX_FMT_YUV444P9BE: case PIX_FMT_YUV444P9BE:
case PIX_FMT_YUV422P9BE:
case PIX_FMT_YUV420P9BE: case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV444P10BE: case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE: case PIX_FMT_YUV422P10BE:

View File

@ -547,6 +547,8 @@ const char *sws_format_name(enum PixelFormat format);
#define isNBPS(x) ( \ #define isNBPS(x) ( \
(x)==PIX_FMT_YUV420P9LE \ (x)==PIX_FMT_YUV420P9LE \
|| (x)==PIX_FMT_YUV420P9BE \ || (x)==PIX_FMT_YUV420P9BE \
|| (x)==PIX_FMT_YUV422P9LE \
|| (x)==PIX_FMT_YUV422P9BE \
|| (x)==PIX_FMT_YUV444P9BE \ || (x)==PIX_FMT_YUV444P9BE \
|| (x)==PIX_FMT_YUV444P9LE \ || (x)==PIX_FMT_YUV444P9LE \
|| (x)==PIX_FMT_YUV422P10BE \ || (x)==PIX_FMT_YUV422P10BE \
@ -574,6 +576,7 @@ const char *sws_format_name(enum PixelFormat format);
#define isPlanarYUV(x) ( \ #define isPlanarYUV(x) ( \
isPlanar8YUV(x) \ isPlanar8YUV(x) \
|| (x)==PIX_FMT_YUV420P9LE \ || (x)==PIX_FMT_YUV420P9LE \
|| (x)==PIX_FMT_YUV422P9LE \
|| (x)==PIX_FMT_YUV444P9LE \ || (x)==PIX_FMT_YUV444P9LE \
|| (x)==PIX_FMT_YUV420P10LE \ || (x)==PIX_FMT_YUV420P10LE \
|| (x)==PIX_FMT_YUV422P10LE \ || (x)==PIX_FMT_YUV422P10LE \
@ -583,6 +586,7 @@ const char *sws_format_name(enum PixelFormat format);
|| (x)==PIX_FMT_YUV422P16LE \ || (x)==PIX_FMT_YUV422P16LE \
|| (x)==PIX_FMT_YUV444P16LE \ || (x)==PIX_FMT_YUV444P16LE \
|| (x)==PIX_FMT_YUV420P9BE \ || (x)==PIX_FMT_YUV420P9BE \
|| (x)==PIX_FMT_YUV422P9BE \
|| (x)==PIX_FMT_YUV444P9BE \ || (x)==PIX_FMT_YUV444P9BE \
|| (x)==PIX_FMT_YUV420P10BE \ || (x)==PIX_FMT_YUV420P10BE \
|| (x)==PIX_FMT_YUV422P10BE \ || (x)==PIX_FMT_YUV422P10BE \

View File

@ -136,6 +136,8 @@ const static FormatEntry format_entries[PIX_FMT_NB] = {
[PIX_FMT_YUV420P9LE] = { 1 , 1 }, [PIX_FMT_YUV420P9LE] = { 1 , 1 },
[PIX_FMT_YUV420P10BE] = { 1 , 1 }, [PIX_FMT_YUV420P10BE] = { 1 , 1 },
[PIX_FMT_YUV420P10LE] = { 1 , 1 }, [PIX_FMT_YUV420P10LE] = { 1 , 1 },
[PIX_FMT_YUV422P9BE] = { 1 , 1 },
[PIX_FMT_YUV422P9LE] = { 1 , 1 },
[PIX_FMT_YUV422P10BE] = { 1 , 1 }, [PIX_FMT_YUV422P10BE] = { 1 , 1 },
[PIX_FMT_YUV422P10LE] = { 1 , 1 }, [PIX_FMT_YUV422P10LE] = { 1 , 1 },
[PIX_FMT_YUV444P9BE] = { 1 , 1 }, [PIX_FMT_YUV444P9BE] = { 1 , 1 },
@ -280,15 +282,18 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
if (flags & SWS_BICUBIC) { if (flags & SWS_BICUBIC) {
int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1<<24); int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1<<24);
int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24); int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24);
int64_t dd = ( d*d)>>30;
int64_t ddd= (dd*d)>>30;
if (d < 1LL<<30) if (d >= 1LL<<31) {
coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30); coeff = 0.0;
else if (d < 1LL<<31) } else {
coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30); int64_t dd = (d * d) >> 30;
else int64_t ddd = (dd * d) >> 30;
coeff=0.0;
if (d < 1LL<<30)
coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30);
else
coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
}
coeff *= fone>>(30+24); coeff *= fone>>(30+24);
} }
/* else if (flags & SWS_X) { /* else if (flags & SWS_X) {

View File

@ -790,8 +790,8 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
y_table32 = c->yuvTable; y_table32 = c->yuvTable;
yb = -(384<<16) - oy; yb = -(384<<16) - oy;
for (i = 0; i < 1024; i++) { for (i = 0; i < 1024; i++) {
uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16); unsigned yval = av_clip_uint8((yb + 0x8000) >> 16);
y_table32[i ] = (yval << rbase) + (needAlpha ? 0 : (255 << abase)); y_table32[i ] = (yval << rbase) + (needAlpha ? 0 : (255u << abase));
y_table32[i+1024] = yval << gbase; y_table32[i+1024] = yval << gbase;
y_table32[i+2048] = yval << bbase; y_table32[i+2048] = yval << bbase;
yb += cy; yb += cy;

View File

@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4 yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6 yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6

View File

@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4 yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6 yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6

View File

@ -42,6 +42,8 @@ yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71
yuv422p10le d0607c260a45c973e6639f4e449730ad yuv422p10le d0607c260a45c973e6639f4e449730ad
yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed
yuv422p16le f87c81bf16916b64d201359be0b4b6f4 yuv422p16le f87c81bf16916b64d201359be0b4b6f4
yuv422p9be 29b71579946940a8c00fa844c9dff507
yuv422p9le 062b7f9cbb972bf36b5bdb1a7623701a
yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf
yuv444p 0a98447b78fd476aa39686da6a74fa2e yuv444p 0a98447b78fd476aa39686da6a74fa2e
yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6 yuv444p10be e65cbae7e4f1892c23defbc8e8052cf6

View File

@ -42,6 +42,8 @@ yuv422p10be cea7ca6b0e66d6f29539885896c88603
yuv422p10le a10c4a5837547716f13cd61918b145f9 yuv422p10le a10c4a5837547716f13cd61918b145f9
yuv422p16be 285993ee0c0f4f8e511ee46f93c5f38c yuv422p16be 285993ee0c0f4f8e511ee46f93c5f38c
yuv422p16le 61bfcee8e54465f760164f5a75d40b5e yuv422p16le 61bfcee8e54465f760164f5a75d40b5e
yuv422p9be 82494823944912f73cebc58ad2979bbd
yuv422p9le fc69c8a21f473916a4b4225636b97e06
yuv440p 461503fdb9b90451020aa3b25ddf041c yuv440p 461503fdb9b90451020aa3b25ddf041c
yuv444p 81b2eba962d12e8d64f003ac56f6faf2 yuv444p 81b2eba962d12e8d64f003ac56f6faf2
yuv444p10be e9d3c8e744b8b0d8187ca092fa203fc9 yuv444p10be e9d3c8e744b8b0d8187ca092fa203fc9

View File

@ -42,6 +42,8 @@ yuv422p10be 588fe319b96513c32e21d3e32b45447f
yuv422p10le 11b57f2bd9661024153f3973b9090cdb yuv422p10le 11b57f2bd9661024153f3973b9090cdb
yuv422p16be c092d083548c2a144c372a98c46875c7 yuv422p16be c092d083548c2a144c372a98c46875c7
yuv422p16le c071b9397a416d51cbe339345cbcba84 yuv422p16le c071b9397a416d51cbe339345cbcba84
yuv422p9be 7c6f1e140b3999ee7d923854e507752a
yuv422p9le 51f10d79c07989060dd06e767e6d7d60
yuv440p 876385e96165acf51271b20e5d85a416 yuv440p 876385e96165acf51271b20e5d85a416
yuv444p 9c3c667d1613b72d15bc6d851c5eb8f7 yuv444p 9c3c667d1613b72d15bc6d851c5eb8f7
yuv444p10be 944a4997c4edb3a8dd0f0493cfd5a1fd yuv444p10be 944a4997c4edb3a8dd0f0493cfd5a1fd