diff --git a/cmdutils.c b/cmdutils.c index 3d5c5f2bcc..2879a420c1 100644 --- a/cmdutils.c +++ b/cmdutils.c @@ -38,6 +38,7 @@ #if CONFIG_POSTPROC #include "libpostproc/postprocess.h" #endif +#include "libavutil/avassert.h" #include "libavutil/avstring.h" #include "libavutil/mathematics.h" #include "libavutil/parseutils.h" @@ -1075,7 +1076,7 @@ int check_stream_specifier(AVFormatContext *s, AVStream *st, const char *spec) case 's': type = AVMEDIA_TYPE_SUBTITLE; break; case 'd': type = AVMEDIA_TYPE_DATA; break; case 't': type = AVMEDIA_TYPE_ATTACHMENT; break; - default: abort(); // never reached, silence warning + default: av_assert0(0); } if (type != st->codec->codec_type) return 0; diff --git a/configure b/configure index 4313bdd8db..270de65f28 100755 --- a/configure +++ b/configure @@ -2662,7 +2662,7 @@ case $target_os in SHFLAGS='-shared -Wl,-h,$$(@F)' enabled x86 && SHFLAGS="-mimpure-text $SHFLAGS" network_extralibs="-lsocket -lnsl" - add_cppflags -D__EXTENSIONS__ + add_cppflags -D__EXTENSIONS__ -D_XOPEN_SOURCE=600 # When using suncc to build, the Solaris linker will mark # an executable with each instruction set encountered by # the Solaris assembler. As our libraries contain their own diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c index 05639f0b33..c0064208ad 100644 --- a/libavcodec/apedec.c +++ b/libavcodec/apedec.c @@ -393,7 +393,7 @@ static inline int range_get_symbol(APEContext *ctx, } /** @} */ // group rangecoder -static inline void update_rice(APERice *rice, int x) +static inline void update_rice(APERice *rice, unsigned int x) { int lim = rice->k ? (1 << (rice->k + 4)) : 0; rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5); @@ -406,7 +406,7 @@ static inline void update_rice(APERice *rice, int x) static inline int ape_decode_value(APEContext *ctx, APERice *rice) { - int x, overflow; + unsigned int x, overflow; if (ctx->fileversion < 3990) { int tmpk; diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index c660cb0d4c..7b301707d9 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -95,6 +95,7 @@ endfunc .endm .macro pixels16_y2 rnd=1, avg=0 + sub r3, r3, #2 vld1.64 {q0}, [r1], r2 vld1.64 {q1}, [r1], r2 1: subs r3, r3, #2 @@ -114,10 +115,25 @@ endfunc vst1.64 {q2}, [r0,:128], r2 vst1.64 {q3}, [r0,:128], r2 bne 1b + + avg q2, q0, q1 + vld1.64 {q0}, [r1], r2 + avg q3, q0, q1 + .if \avg + vld1.8 {q8}, [r0,:128], r2 + vld1.8 {q9}, [r0,:128] + vrhadd.u8 q2, q2, q8 + vrhadd.u8 q3, q3, q9 + sub r0, r0, r2 + .endif + vst1.64 {q2}, [r0,:128], r2 + vst1.64 {q3}, [r0,:128], r2 + bx lr .endm .macro pixels16_xy2 rnd=1, avg=0 + sub r3, r3, #2 vld1.64 {d0-d2}, [r1], r2 vld1.64 {d4-d6}, [r1], r2 .ifeq \rnd @@ -173,6 +189,42 @@ endfunc vaddl.u8 q11, d3, d5 vst1.64 {q15}, [r0,:128], r2 bgt 1b + + vld1.64 {d0-d2}, [r1], r2 + vadd.u16 q12, q8, q9 + .ifeq \rnd + vadd.u16 q12, q12, q13 + .endif + vext.8 q15, q0, q1, #1 + vadd.u16 q1 , q10, q11 + shrn d28, q12, #2 + .ifeq \rnd + vadd.u16 q1, q1, q13 + .endif + shrn d29, q1, #2 + .if \avg + vld1.8 {q8}, [r0,:128] + vrhadd.u8 q14, q14, q8 + .endif + vaddl.u8 q8, d0, d30 + vaddl.u8 q10, d1, d31 + vst1.64 {q14}, [r0,:128], r2 + vadd.u16 q12, q8, q9 + .ifeq \rnd + vadd.u16 q12, q12, q13 + .endif + vadd.u16 q0, q10, q11 + shrn d30, q12, #2 + .ifeq \rnd + vadd.u16 q0, q0, q13 + .endif + shrn d31, q0, #2 + .if \avg + vld1.8 {q9}, [r0,:128] + vrhadd.u8 q15, q15, q9 + .endif + vst1.64 {q15}, [r0,:128], r2 + bx lr .endm @@ -228,6 +280,7 @@ endfunc .endm .macro pixels8_y2 rnd=1, avg=0 + sub r3, r3, #2 vld1.64 {d0}, [r1], r2 vld1.64 {d1}, [r1], r2 1: subs r3, r3, #2 @@ -246,10 +299,24 @@ endfunc vst1.64 {d4}, [r0,:64], r2 vst1.64 {d5}, [r0,:64], r2 bne 1b + + avg d4, d0, d1 + vld1.64 {d0}, [r1], r2 + avg d5, d0, d1 + .if \avg + vld1.8 {d2}, [r0,:64], r2 + vld1.8 {d3}, [r0,:64] + vrhadd.u8 q2, q2, q1 + sub r0, r0, r2 + .endif + vst1.64 {d4}, [r0,:64], r2 + vst1.64 {d5}, [r0,:64], r2 + bx lr .endm .macro pixels8_xy2 rnd=1, avg=0 + sub r3, r3, #2 vld1.64 {q0}, [r1], r2 vld1.64 {q1}, [r1], r2 .ifeq \rnd @@ -291,6 +358,31 @@ endfunc vaddl.u8 q9, d2, d6 vst1.64 {d7}, [r0,:64], r2 bgt 1b + + vld1.64 {q0}, [r1], r2 + vadd.u16 q10, q8, q9 + vext.8 d4, d0, d1, #1 + .ifeq \rnd + vadd.u16 q10, q10, q11 + .endif + vaddl.u8 q8, d0, d4 + shrn d5, q10, #2 + vadd.u16 q10, q8, q9 + .if \avg + vld1.8 {d7}, [r0,:64] + vrhadd.u8 d5, d5, d7 + .endif + .ifeq \rnd + vadd.u16 q10, q10, q11 + .endif + vst1.64 {d5}, [r0,:64], r2 + shrn d7, q10, #2 + .if \avg + vld1.8 {d5}, [r0,:64] + vrhadd.u8 d7, d7, d5 + .endif + vst1.64 {d7}, [r0,:64], r2 + bx lr .endm diff --git a/libavcodec/h264.h b/libavcodec/h264.h index f5542bfb1d..0e11e304ee 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -37,14 +37,14 @@ #include "rectangle.h" #define interlaced_dct interlaced_dct_is_a_bad_name -#define mb_intra mb_intra_is_not_initialized_see_mb_type +#define mb_intra mb_intra_is_not_initialized_see_mb_type -#define MAX_SPS_COUNT 32 -#define MAX_PPS_COUNT 256 +#define MAX_SPS_COUNT 32 +#define MAX_PPS_COUNT 256 -#define MAX_MMCO_COUNT 66 +#define MAX_MMCO_COUNT 66 -#define MAX_DELAYED_PIC_COUNT 16 +#define MAX_DELAYED_PIC_COUNT 16 #define MAX_MBPAIR_SIZE (256*1024) // a tighter bound could be calculated if someone cares about a few bytes @@ -61,25 +61,25 @@ #define MAX_SLICES 16 #ifdef ALLOW_INTERLACE -#define MB_MBAFF h->mb_mbaff -#define MB_FIELD h->mb_field_decoding_flag +#define MB_MBAFF h->mb_mbaff +#define MB_FIELD h->mb_field_decoding_flag #define FRAME_MBAFF h->mb_aff_frame #define FIELD_PICTURE (s->picture_structure != PICT_FRAME) #define LEFT_MBS 2 -#define LTOP 0 -#define LBOT 1 -#define LEFT(i) (i) +#define LTOP 0 +#define LBOT 1 +#define LEFT(i) (i) #else -#define MB_MBAFF 0 -#define MB_FIELD 0 -#define FRAME_MBAFF 0 +#define MB_MBAFF 0 +#define MB_FIELD 0 +#define FRAME_MBAFF 0 #define FIELD_PICTURE 0 #undef IS_INTERLACED #define IS_INTERLACED(mb_type) 0 #define LEFT_MBS 1 -#define LTOP 0 -#define LBOT 0 -#define LEFT(i) 0 +#define LTOP 0 +#define LBOT 0 +#define LEFT(i) 0 #endif #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE) @@ -91,9 +91,9 @@ #define CHROMA422 (h->sps.chroma_format_idc == 2) #define CHROMA444 (h->sps.chroma_format_idc == 3) -#define EXTENDED_SAR 255 +#define EXTENDED_SAR 255 -#define MB_TYPE_REF0 MB_TYPE_ACPRED //dirty but it fits in 16 bit +#define MB_TYPE_REF0 MB_TYPE_ACPRED // dirty but it fits in 16 bit #define MB_TYPE_8x8DCT 0x01000000 #define IS_REF0(a) ((a) & MB_TYPE_REF0) #define IS_8x8DCT(a) ((a) & MB_TYPE_8x8DCT) @@ -108,7 +108,7 @@ /* NAL unit types */ enum { - NAL_SLICE=1, + NAL_SLICE = 1, NAL_DPA, NAL_DPB, NAL_DPC, @@ -121,17 +121,17 @@ enum { NAL_END_STREAM, NAL_FILLER_DATA, NAL_SPS_EXT, - NAL_AUXILIARY_SLICE=19 + NAL_AUXILIARY_SLICE = 19 }; /** * SEI message types */ typedef enum { - SEI_BUFFERING_PERIOD = 0, ///< buffering period (H.264, D.1.1) - SEI_TYPE_PIC_TIMING = 1, ///< picture timing - SEI_TYPE_USER_DATA_UNREGISTERED = 5, ///< unregistered user data - SEI_TYPE_RECOVERY_POINT = 6 ///< recovery point (frame # to decoder sync) + SEI_BUFFERING_PERIOD = 0, ///< buffering period (H.264, D.1.1) + SEI_TYPE_PIC_TIMING = 1, ///< picture timing + SEI_TYPE_USER_DATA_UNREGISTERED = 5, ///< unregistered user data + SEI_TYPE_RECOVERY_POINT = 6 ///< recovery point (frame # to decoder sync) } SEI_Type; /** @@ -152,8 +152,7 @@ typedef enum { /** * Sequence parameter set */ -typedef struct SPS{ - +typedef struct SPS { int profile_idc; int level_idc; int chroma_format_idc; @@ -170,9 +169,9 @@ typedef struct SPS{ int mb_width; ///< pic_width_in_mbs_minus1 + 1 int mb_height; ///< pic_height_in_map_units_minus1 + 1 int frame_mbs_only_flag; - int mb_aff; ///b4_stride + int b_stride; // FIXME use s->b4_stride - int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff + int mb_linesize; ///< may be equal to s->linesize or s->linesize * 2, for mbaff int mb_uvlinesize; int emu_edge_width; @@ -338,32 +337,32 @@ typedef struct H264Context{ /** * current pps */ - PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? + PPS pps; // FIXME move to Picture perhaps? (->no) do we need that? - uint32_t dequant4_buffer[6][QP_MAX_NUM+1][16]; //FIXME should these be moved down? - uint32_t dequant8_buffer[6][QP_MAX_NUM+1][64]; - uint32_t (*dequant4_coeff[6])[16]; - uint32_t (*dequant8_coeff[6])[64]; + uint32_t dequant4_buffer[6][QP_MAX_NUM + 1][16]; // FIXME should these be moved down? + uint32_t dequant8_buffer[6][QP_MAX_NUM + 1][64]; + uint32_t(*dequant4_coeff[6])[16]; + uint32_t(*dequant8_coeff[6])[64]; int slice_num; - uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 + uint16_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 int slice_type; - int slice_type_nos; ///< S free slice type (SI/SP are remapped to I/P) + int slice_type_nos; ///< S free slice type (SI/SP are remapped to I/P) int slice_type_fixed; - //interlacing specific flags + // interlacing specific flags int mb_aff_frame; int mb_field_decoding_flag; - int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag + int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4]; - //Weighted pred stuff + // Weighted pred stuff int use_weight; int use_weight_chroma; int luma_log2_weight_denom; int chroma_log2_weight_denom; - //The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss + // The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss int luma_weight[48][2][2]; int chroma_weight[48][2][2][2]; int implicit_weight[48][48][2]; @@ -373,48 +372,48 @@ typedef struct H264Context{ int col_fieldoff; int dist_scale_factor[16]; int dist_scale_factor_field[2][32]; - int map_col_to_list0[2][16+32]; - int map_col_to_list0_field[2][2][16+32]; + int map_col_to_list0[2][16 + 32]; + int map_col_to_list0_field[2][2][16 + 32]; /** * num_ref_idx_l0/1_active_minus1 + 1 */ - uint8_t *list_counts; ///< Array of list_count per MB specifying the slice type - unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode + unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode unsigned int list_count; - Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs. - Reordered version of default_ref_list - according to picture reordering in slice header */ - int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1 + uint8_t *list_counts; ///< Array of list_count per MB specifying the slice type + Picture ref_list[2][48]; /**< 0..15: frame refs, 16..47: mbaff field refs. + * Reordered version of default_ref_list + * according to picture reordering in slice header */ + int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1 - //data partitioning + // data partitioning GetBitContext intra_gb; GetBitContext inter_gb; GetBitContext *intra_gb_ptr; GetBitContext *inter_gb_ptr; - DECLARE_ALIGNED(16, DCTELEM, mb)[16*48*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space. - DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16*2]; - DCTELEM mb_padding[256*2]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb + DECLARE_ALIGNED(16, DCTELEM, mb)[16 * 48 * 2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space. + DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16 * 2]; + DCTELEM mb_padding[256 * 2]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb /** * Cabac */ CABACContext cabac; - uint8_t cabac_state[1024]; + uint8_t cabac_state[1024]; - /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ - uint16_t *cbp_table; + /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0, 1, 2), 0x0? luma_cbp */ + uint16_t *cbp_table; int cbp; int top_cbp; int left_cbp; /* chroma_pred_mode for i4x4 or i16x16, else 0 */ - uint8_t *chroma_pred_mode_table; - int last_qscale_diff; - uint8_t (*mvd_table[2])[2]; - DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5*8][2]; - uint8_t *direct_table; - uint8_t direct_cache[5*8]; + uint8_t *chroma_pred_mode_table; + int last_qscale_diff; + uint8_t (*mvd_table[2])[2]; + DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5 * 8][2]; + uint8_t *direct_table; + uint8_t direct_cache[5 * 8]; uint8_t zigzag_scan[16]; uint8_t zigzag_scan8x8[64]; @@ -435,13 +434,13 @@ typedef struct H264Context{ int is_complex; - //deblock - int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0 + // deblock + int deblocking_filter; ///< disable_deblocking_filter_idc with 1 <-> 0 int slice_alpha_c0_offset; int slice_beta_offset; -//============================================================= - //Things below are not used in the MB or more inner code + // ============================================================= + // Things below are not used in the MB or more inner code int nal_ref_idc; int nal_unit_type; @@ -451,37 +450,36 @@ typedef struct H264Context{ /** * Used to parse AVC variant of h264 */ - int is_avc; ///< this flag is != 0 if codec is avc1 - int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) - int got_first; ///< this flag is != 0 if we've parsed a frame + int is_avc; ///< this flag is != 0 if codec is avc1 + int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) + int got_first; ///< this flag is != 0 if we've parsed a frame SPS *sps_buffers[MAX_SPS_COUNT]; PPS *pps_buffers[MAX_PPS_COUNT]; - int dequant_coeff_pps; ///< reinit tables when pps changes + int dequant_coeff_pps; ///< reinit tables when pps changes uint16_t *slice_table_base; - - //POC stuff + // POC stuff int poc_lsb; int poc_msb; int delta_poc_bottom; int delta_poc[2]; int frame_num; - int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0 - int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0 - int frame_num_offset; ///< for POC type 2 - int prev_frame_num_offset; ///< for POC type 2 - int prev_frame_num; ///< frame_num of the last pic for POC type 1/2 + int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0 + int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0 + int frame_num_offset; ///< for POC type 2 + int prev_frame_num_offset; ///< for POC type 2 + int prev_frame_num; ///< frame_num of the last pic for POC type 1/2 /** - * frame_num for frames or 2*frame_num+1 for field pics. + * frame_num for frames or 2 * frame_num + 1 for field pics. */ int curr_pic_num; /** - * max_frame_num or 2*max_frame_num for field pics. + * max_frame_num or 2 * max_frame_num for field pics. */ int max_pic_num; @@ -490,7 +488,7 @@ typedef struct H264Context{ Picture *short_ref[32]; Picture *long_ref[32]; Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture - Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size? + Picture *delayed_pic[MAX_DELAYED_PIC_COUNT + 2]; // FIXME size? int last_pocs[MAX_DELAYED_PIC_COUNT]; Picture *next_output_pic; int outputed_poc; @@ -503,10 +501,10 @@ typedef struct H264Context{ int mmco_index; int mmco_reset; - int long_ref_count; ///< number of actual long term references - int short_ref_count; ///< number of actual short term references + int long_ref_count; ///< number of actual long term references + int short_ref_count; ///< number of actual short term references - int cabac_init_idc; + int cabac_init_idc; /** * @name Members for slice based multithreading @@ -582,12 +580,12 @@ typedef struct H264Context{ */ int recovery_frame; - int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag - int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag + int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag + int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag // Timestamp stuff - int sei_buffering_period_present; ///< Buffering period SEI flag - int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs + int sei_buffering_period_present; ///< Buffering period SEI flag + int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs int cur_chroma_format_idc; @@ -598,10 +596,9 @@ typedef struct H264Context{ uint8_t parse_history[4]; int parse_history_count; int parse_last_mb; -}H264Context; +} H264Context; - -extern const uint8_t ff_h264_chroma_qp[5][QP_MAX_NUM+1]; ///< One chroma qp table for each possible bit depth (8-12). +extern const uint8_t ff_h264_chroma_qp[5][QP_MAX_NUM + 1]; ///< One chroma qp table for each possible bit depth (8-12). extern const uint16_t ff_h264_mb_sizes[4]; /** @@ -628,13 +625,16 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length); * Decode a network abstraction layer unit. * @param consumed is the number of bytes used as input * @param length is the length of the array - * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing? + * @param dst_length is the number of decoded bytes FIXME here + * or a decode rbsp tailing? * @return decoded bytes, might be src+1 if no escapes */ -const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length); +const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, + int *dst_length, int *consumed, int length); /** - * Free any data that may have been allocated in the H264 context like SPS, PPS etc. + * Free any data that may have been allocated in the H264 context + * like SPS, PPS etc. */ av_cold void ff_h264_free_context(H264Context *h); @@ -667,12 +667,16 @@ int ff_h264_decode_ref_pic_marking(H264Context *h, GetBitContext *gb); void ff_generate_sliding_window_mmcos(H264Context *h); - /** - * Check if the top & left blocks are available if needed & change the dc mode so it only uses the available blocks. + * Check if the top & left blocks are available if needed & change the + * dc mode so it only uses the available blocks. */ int ff_h264_check_intra4x4_pred_mode(H264Context *h); +/** + * Check if the top & left blocks are available if needed & change the + * dc mode so it only uses the available blocks. + */ int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma); void ff_h264_hl_decode_mb(H264Context *h); @@ -683,24 +687,28 @@ av_cold void ff_h264_decode_init_vlc(void); /** * Decode a macroblock - * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR if an error is noticed + * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR on error */ int ff_h264_decode_mb_cavlc(H264Context *h); /** * Decode a CABAC coded macroblock - * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR if an error is noticed + * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR on error */ int ff_h264_decode_mb_cabac(H264Context *h); void ff_h264_init_cabac_states(H264Context *h); -void ff_h264_direct_dist_scale_factor(H264Context * const h); -void ff_h264_direct_ref_list_init(H264Context * const h); -void ff_h264_pred_direct_motion(H264Context * const h, int *mb_type); +void ff_h264_direct_dist_scale_factor(H264Context *const h); +void ff_h264_direct_ref_list_init(H264Context *const h); +void ff_h264_pred_direct_motion(H264Context *const h, int *mb_type); -void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); -void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); +void ff_h264_filter_mb_fast(H264Context *h, int mb_x, int mb_y, + uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, + unsigned int linesize, unsigned int uvlinesize); +void ff_h264_filter_mb(H264Context *h, int mb_x, int mb_y, + uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, + unsigned int linesize, unsigned int uvlinesize); /** * Reset SEI values at the beginning of the frame. @@ -709,16 +717,15 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint */ void ff_h264_reset_sei(H264Context *h); - /* -o-o o-o - / / / -o-o o-o - ,---' -o-o o-o - / / / -o-o o-o -*/ + * o-o o-o + * / / / + * o-o o-o + * ,---' + * o-o o-o + * / / / + * o-o o-o + */ /* Scan8 organization: * 0 1 2 3 4 5 6 7 @@ -743,156 +750,173 @@ o-o o-o #define LUMA_DC_BLOCK_INDEX 48 #define CHROMA_DC_BLOCK_INDEX 49 -//This table must be here because scan8[constant] must be known at compiletime -static const uint8_t scan8[16*3 + 3]={ - 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8, - 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8, - 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8, - 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8, - 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8, - 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8, - 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8, - 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8, - 4+11*8, 5+11*8, 4+12*8, 5+12*8, - 6+11*8, 7+11*8, 6+12*8, 7+12*8, - 4+13*8, 5+13*8, 4+14*8, 5+14*8, - 6+13*8, 7+13*8, 6+14*8, 7+14*8, - 0+ 0*8, 0+ 5*8, 0+10*8 +// This table must be here because scan8[constant] must be known at compiletime +static const uint8_t scan8[16 * 3 + 3] = { + 4 + 1 * 8, 5 + 1 * 8, 4 + 2 * 8, 5 + 2 * 8, + 6 + 1 * 8, 7 + 1 * 8, 6 + 2 * 8, 7 + 2 * 8, + 4 + 3 * 8, 5 + 3 * 8, 4 + 4 * 8, 5 + 4 * 8, + 6 + 3 * 8, 7 + 3 * 8, 6 + 4 * 8, 7 + 4 * 8, + 4 + 6 * 8, 5 + 6 * 8, 4 + 7 * 8, 5 + 7 * 8, + 6 + 6 * 8, 7 + 6 * 8, 6 + 7 * 8, 7 + 7 * 8, + 4 + 8 * 8, 5 + 8 * 8, 4 + 9 * 8, 5 + 9 * 8, + 6 + 8 * 8, 7 + 8 * 8, 6 + 9 * 8, 7 + 9 * 8, + 4 + 11 * 8, 5 + 11 * 8, 4 + 12 * 8, 5 + 12 * 8, + 6 + 11 * 8, 7 + 11 * 8, 6 + 12 * 8, 7 + 12 * 8, + 4 + 13 * 8, 5 + 13 * 8, 4 + 14 * 8, 5 + 14 * 8, + 6 + 13 * 8, 7 + 13 * 8, 6 + 14 * 8, 7 + 14 * 8, + 0 + 0 * 8, 0 + 5 * 8, 0 + 10 * 8 }; -static av_always_inline uint32_t pack16to32(int a, int b){ +static av_always_inline uint32_t pack16to32(int a, int b) +{ #if HAVE_BIGENDIAN - return (b&0xFFFF) + (a<<16); + return (b & 0xFFFF) + (a << 16); #else - return (a&0xFFFF) + (b<<16); + return (a & 0xFFFF) + (b << 16); #endif } -static av_always_inline uint16_t pack8to16(int a, int b){ +static av_always_inline uint16_t pack8to16(int a, int b) +{ #if HAVE_BIGENDIAN - return (b&0xFF) + (a<<8); + return (b & 0xFF) + (a << 8); #else - return (a&0xFF) + (b<<8); + return (a & 0xFF) + (b << 8); #endif } /** * Get the chroma qp. */ -static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale){ +static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale) +{ return h->pps.chroma_qp_table[t][qscale]; } /** * Get the predicted intra4x4 prediction mode. */ -static av_always_inline int pred_intra_mode(H264Context *h, int n){ - const int index8= scan8[n]; - const int left= h->intra4x4_pred_mode_cache[index8 - 1]; - const int top = h->intra4x4_pred_mode_cache[index8 - 8]; - const int min= FFMIN(left, top); +static av_always_inline int pred_intra_mode(H264Context *h, int n) +{ + const int index8 = scan8[n]; + const int left = h->intra4x4_pred_mode_cache[index8 - 1]; + const int top = h->intra4x4_pred_mode_cache[index8 - 8]; + const int min = FFMIN(left, top); - tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min); + tprintf(h->s.avctx, "mode:%d %d min:%d\n", left, top, min); - if(min<0) return DC_PRED; - else return min; + if (min < 0) + return DC_PRED; + else + return min; } -static av_always_inline void write_back_intra_pred_mode(H264Context *h){ - int8_t *i4x4= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy]; - int8_t *i4x4_cache= h->intra4x4_pred_mode_cache; +static av_always_inline void write_back_intra_pred_mode(H264Context *h) +{ + int8_t *i4x4 = h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy]; + int8_t *i4x4_cache = h->intra4x4_pred_mode_cache; - AV_COPY32(i4x4, i4x4_cache + 4 + 8*4); - i4x4[4]= i4x4_cache[7+8*3]; - i4x4[5]= i4x4_cache[7+8*2]; - i4x4[6]= i4x4_cache[7+8*1]; + AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4); + i4x4[4] = i4x4_cache[7 + 8 * 3]; + i4x4[5] = i4x4_cache[7 + 8 * 2]; + i4x4[6] = i4x4_cache[7 + 8 * 1]; } -static av_always_inline void write_back_non_zero_count(H264Context *h){ - const int mb_xy= h->mb_xy; - uint8_t *nnz = h->non_zero_count[mb_xy]; +static av_always_inline void write_back_non_zero_count(H264Context *h) +{ + const int mb_xy = h->mb_xy; + uint8_t *nnz = h->non_zero_count[mb_xy]; uint8_t *nnz_cache = h->non_zero_count_cache; - AV_COPY32(&nnz[ 0], &nnz_cache[4+8* 1]); - AV_COPY32(&nnz[ 4], &nnz_cache[4+8* 2]); - AV_COPY32(&nnz[ 8], &nnz_cache[4+8* 3]); - AV_COPY32(&nnz[12], &nnz_cache[4+8* 4]); - AV_COPY32(&nnz[16], &nnz_cache[4+8* 6]); - AV_COPY32(&nnz[20], &nnz_cache[4+8* 7]); - AV_COPY32(&nnz[32], &nnz_cache[4+8*11]); - AV_COPY32(&nnz[36], &nnz_cache[4+8*12]); + AV_COPY32(&nnz[ 0], &nnz_cache[4 + 8 * 1]); + AV_COPY32(&nnz[ 4], &nnz_cache[4 + 8 * 2]); + AV_COPY32(&nnz[ 8], &nnz_cache[4 + 8 * 3]); + AV_COPY32(&nnz[12], &nnz_cache[4 + 8 * 4]); + AV_COPY32(&nnz[16], &nnz_cache[4 + 8 * 6]); + AV_COPY32(&nnz[20], &nnz_cache[4 + 8 * 7]); + AV_COPY32(&nnz[32], &nnz_cache[4 + 8 * 11]); + AV_COPY32(&nnz[36], &nnz_cache[4 + 8 * 12]); - if(!h->s.chroma_y_shift){ - AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]); - AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]); - AV_COPY32(&nnz[40], &nnz_cache[4+8*13]); - AV_COPY32(&nnz[44], &nnz_cache[4+8*14]); + if (!h->s.chroma_y_shift) { + AV_COPY32(&nnz[24], &nnz_cache[4 + 8 * 8]); + AV_COPY32(&nnz[28], &nnz_cache[4 + 8 * 9]); + AV_COPY32(&nnz[40], &nnz_cache[4 + 8 * 13]); + AV_COPY32(&nnz[44], &nnz_cache[4 + 8 * 14]); } } -static av_always_inline void write_back_motion_list(H264Context *h, MpegEncContext * const s, int b_stride, - int b_xy, int b8_xy, int mb_type, int list ) +static av_always_inline void write_back_motion_list(H264Context *h, + MpegEncContext *const s, + int b_stride, + int b_xy, int b8_xy, + int mb_type, int list) { - int16_t (*mv_dst)[2] = &s->current_picture.f.motion_val[list][b_xy]; - int16_t (*mv_src)[2] = &h->mv_cache[list][scan8[0]]; - AV_COPY128(mv_dst + 0*b_stride, mv_src + 8*0); - AV_COPY128(mv_dst + 1*b_stride, mv_src + 8*1); - AV_COPY128(mv_dst + 2*b_stride, mv_src + 8*2); - AV_COPY128(mv_dst + 3*b_stride, mv_src + 8*3); - if( CABAC ) { - uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]]; - uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; - if(IS_SKIP(mb_type)) + int16_t(*mv_dst)[2] = &s->current_picture.f.motion_val[list][b_xy]; + int16_t(*mv_src)[2] = &h->mv_cache[list][scan8[0]]; + AV_COPY128(mv_dst + 0 * b_stride, mv_src + 8 * 0); + AV_COPY128(mv_dst + 1 * b_stride, mv_src + 8 * 1); + AV_COPY128(mv_dst + 2 * b_stride, mv_src + 8 * 2); + AV_COPY128(mv_dst + 3 * b_stride, mv_src + 8 * 3); + if (CABAC) { + uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8 * h->mb_xy + : h->mb2br_xy[h->mb_xy]]; + uint8_t(*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; + if (IS_SKIP(mb_type)) { AV_ZERO128(mvd_dst); - else{ - AV_COPY64(mvd_dst, mvd_src + 8*3); - AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0); - AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1); - AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2); + } else { + AV_COPY64(mvd_dst, mvd_src + 8 * 3); + AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8 * 0); + AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8 * 1); + AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8 * 2); } } { int8_t *ref_index = &s->current_picture.f.ref_index[list][b8_xy]; int8_t *ref_cache = h->ref_cache[list]; - ref_index[0+0*2]= ref_cache[scan8[0]]; - ref_index[1+0*2]= ref_cache[scan8[4]]; - ref_index[0+1*2]= ref_cache[scan8[8]]; - ref_index[1+1*2]= ref_cache[scan8[12]]; + ref_index[0 + 0 * 2] = ref_cache[scan8[0]]; + ref_index[1 + 0 * 2] = ref_cache[scan8[4]]; + ref_index[0 + 1 * 2] = ref_cache[scan8[8]]; + ref_index[1 + 1 * 2] = ref_cache[scan8[12]]; } } -static av_always_inline void write_back_motion(H264Context *h, int mb_type){ - MpegEncContext * const s = &h->s; - const int b_stride = h->b_stride; - const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy - const int b8_xy= 4*h->mb_xy; +static av_always_inline void write_back_motion(H264Context *h, int mb_type) +{ + MpegEncContext *const s = &h->s; + const int b_stride = h->b_stride; + const int b_xy = 4 * s->mb_x + 4 * s->mb_y * h->b_stride; // try mb2b(8)_xy + const int b8_xy = 4 * h->mb_xy; - if(USES_LIST(mb_type, 0)){ + if (USES_LIST(mb_type, 0)) { write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 0); - }else{ + } else { fill_rectangle(&s->current_picture.f.ref_index[0][b8_xy], 2, 2, 2, (uint8_t)LIST_NOT_USED, 1); } - if(USES_LIST(mb_type, 1)){ + if (USES_LIST(mb_type, 1)) write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 1); - } - if(h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC){ - if(IS_8X8(mb_type)){ - uint8_t *direct_table = &h->direct_table[4*h->mb_xy]; - direct_table[1] = h->sub_mb_type[1]>>1; - direct_table[2] = h->sub_mb_type[2]>>1; - direct_table[3] = h->sub_mb_type[3]>>1; + if (h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC) { + if (IS_8X8(mb_type)) { + uint8_t *direct_table = &h->direct_table[4 * h->mb_xy]; + direct_table[1] = h->sub_mb_type[1] >> 1; + direct_table[2] = h->sub_mb_type[2] >> 1; + direct_table[3] = h->sub_mb_type[3] >> 1; } } } -static av_always_inline int get_dct8x8_allowed(H264Context *h){ - if(h->sps.direct_8x8_inference_flag) - return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); +static av_always_inline int get_dct8x8_allowed(H264Context *h) +{ + if (h->sps.direct_8x8_inference_flag) + return !(AV_RN64A(h->sub_mb_type) & + ((MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8) * + 0x0001000100010001ULL)); else - return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); + return !(AV_RN64A(h->sub_mb_type) & + ((MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8 | MB_TYPE_DIRECT2) * + 0x0001000100010001ULL)); } #endif /* AVCODEC_H264_H */ diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h index 85405c1542..12064c81e7 100644 --- a/libavcodec/h264_mvpred.h +++ b/libavcodec/h264_mvpred.h @@ -35,53 +35,53 @@ //#undef NDEBUG #include -static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){ - const int topright_ref= h->ref_cache[list][ i - 8 + part_width ]; - MpegEncContext *s = &h->s; +static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, + int i, int list, int part_width) +{ + const int topright_ref = h->ref_cache[list][i - 8 + part_width]; + MpegEncContext *s = &h->s; /* there is no consistent mapping of mvs to neighboring locations that will * make mbaff happy, so we can't move all this logic to fill_caches */ - if(FRAME_MBAFF){ + if (FRAME_MBAFF) { +#define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4) \ + const int xy = XY, y4 = Y4; \ + const int mb_type = mb_types[xy + (y4 >> 2) * s->mb_stride]; \ + if (!USES_LIST(mb_type, list)) \ + return LIST_NOT_USED; \ + mv = s->current_picture_ptr->f.motion_val[list][h->mb2b_xy[xy] + 3 + y4 * h->b_stride]; \ + h->mv_cache[list][scan8[0] - 2][0] = mv[0]; \ + h->mv_cache[list][scan8[0] - 2][1] = mv[1] MV_OP; \ + return s->current_picture_ptr->f.ref_index[list][4 * xy + 1 + (y4 & ~1)] REF_OP; -#define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4)\ - const int xy = XY, y4 = Y4;\ - const int mb_type = mb_types[xy+(y4>>2)*s->mb_stride];\ - if(!USES_LIST(mb_type,list))\ - return LIST_NOT_USED;\ - mv = s->current_picture_ptr->f.motion_val[list][h->mb2b_xy[xy] + 3 + y4*h->b_stride];\ - h->mv_cache[list][scan8[0]-2][0] = mv[0];\ - h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\ - return s->current_picture_ptr->f.ref_index[list][4*xy + 1 + (y4 & ~1)] REF_OP; - - if(topright_ref == PART_NOT_AVAILABLE - && i >= scan8[0]+8 && (i&7)==4 - && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){ + if (topright_ref == PART_NOT_AVAILABLE + && i >= scan8[0] + 8 && (i & 7) == 4 + && h->ref_cache[list][scan8[0] - 1] != PART_NOT_AVAILABLE) { const uint32_t *mb_types = s->current_picture_ptr->f.mb_type; const int16_t *mv; - AV_ZERO32(h->mv_cache[list][scan8[0]-2]); - *C = h->mv_cache[list][scan8[0]-2]; + AV_ZERO32(h->mv_cache[list][scan8[0] - 2]); + *C = h->mv_cache[list][scan8[0] - 2]; - if(!MB_FIELD - && IS_INTERLACED(h->left_type[0])){ - SET_DIAG_MV(*2, >>1, h->left_mb_xy[0]+s->mb_stride, (s->mb_y&1)*2+(i>>5)); + if (!MB_FIELD && IS_INTERLACED(h->left_type[0])) { + SET_DIAG_MV(* 2, >> 1, h->left_mb_xy[0] + s->mb_stride, + (s->mb_y & 1) * 2 + (i >> 5)); } - if(MB_FIELD - && !IS_INTERLACED(h->left_type[0])){ + if (MB_FIELD && !IS_INTERLACED(h->left_type[0])) { // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK. - SET_DIAG_MV(/2, <<1, h->left_mb_xy[i>=36], ((i>>2))&3); + SET_DIAG_MV(/ 2, << 1, h->left_mb_xy[i >= 36], ((i >> 2)) & 3); } } #undef SET_DIAG_MV } - if(topright_ref != PART_NOT_AVAILABLE){ - *C= h->mv_cache[list][ i - 8 + part_width ]; + if (topright_ref != PART_NOT_AVAILABLE) { + *C = h->mv_cache[list][i - 8 + part_width]; return topright_ref; - }else{ + } else { tprintf(s->avctx, "topright MV not available\n"); - *C= h->mv_cache[list][ i - 8 - 1 ]; - return h->ref_cache[list][ i - 8 - 1 ]; + *C = h->mv_cache[list][i - 8 - 1]; + return h->ref_cache[list][i - 8 - 1]; } } @@ -92,53 +92,61 @@ static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, * @param mx the x component of the predicted motion vector * @param my the y component of the predicted motion vector */ -static av_always_inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){ - const int index8= scan8[n]; - const int top_ref= h->ref_cache[list][ index8 - 8 ]; - const int left_ref= h->ref_cache[list][ index8 - 1 ]; - const int16_t * const A= h->mv_cache[list][ index8 - 1 ]; - const int16_t * const B= h->mv_cache[list][ index8 - 8 ]; - const int16_t * C; +static av_always_inline void pred_motion(H264Context *const h, int n, + int part_width, int list, int ref, + int *const mx, int *const my) +{ + const int index8 = scan8[n]; + const int top_ref = h->ref_cache[list][index8 - 8]; + const int left_ref = h->ref_cache[list][index8 - 1]; + const int16_t *const A = h->mv_cache[list][index8 - 1]; + const int16_t *const B = h->mv_cache[list][index8 - 8]; + const int16_t *C; int diagonal_ref, match_count; - assert(part_width==1 || part_width==2 || part_width==4); + assert(part_width == 1 || part_width == 2 || part_width == 4); /* mv_cache - B . . A T T T T - U . . L . . , . - U . . L . . . . - U . . L . . , . - . . . L . . . . -*/ + * B . . A T T T T + * U . . L . . , . + * U . . L . . . . + * U . . L . . , . + * . . . L . . . . + */ - diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width); - match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref); + diagonal_ref = fetch_diagonal_mv(h, &C, index8, list, part_width); + match_count = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref); tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count); - if(match_count > 1){ //most common - *mx= mid_pred(A[0], B[0], C[0]); - *my= mid_pred(A[1], B[1], C[1]); - }else if(match_count==1){ - if(left_ref==ref){ - *mx= A[0]; - *my= A[1]; - }else if(top_ref==ref){ - *mx= B[0]; - *my= B[1]; - }else{ - *mx= C[0]; - *my= C[1]; + if (match_count > 1) { //most common + *mx = mid_pred(A[0], B[0], C[0]); + *my = mid_pred(A[1], B[1], C[1]); + } else if (match_count == 1) { + if (left_ref == ref) { + *mx = A[0]; + *my = A[1]; + } else if (top_ref == ref) { + *mx = B[0]; + *my = B[1]; + } else { + *mx = C[0]; + *my = C[1]; } - }else{ - if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){ - *mx= A[0]; - *my= A[1]; - }else{ - *mx= mid_pred(A[0], B[0], C[0]); - *my= mid_pred(A[1], B[1], C[1]); + } else { + if (top_ref == PART_NOT_AVAILABLE && + diagonal_ref == PART_NOT_AVAILABLE && + left_ref != PART_NOT_AVAILABLE) { + *mx = A[0]; + *my = A[1]; + } else { + *mx = mid_pred(A[0], B[0], C[0]); + *my = mid_pred(A[1], B[1], C[1]); } } - tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list); + tprintf(h->s.avctx, + "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", + top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, + A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list); } /** @@ -147,27 +155,32 @@ static av_always_inline void pred_motion(H264Context * const h, int n, int part_ * @param mx the x component of the predicted motion vector * @param my the y component of the predicted motion vector */ -static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ - if(n==0){ - const int top_ref= h->ref_cache[list][ scan8[0] - 8 ]; - const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ]; +static av_always_inline void pred_16x8_motion(H264Context *const h, + int n, int list, int ref, + int *const mx, int *const my) +{ + if (n == 0) { + const int top_ref = h->ref_cache[list][scan8[0] - 8]; + const int16_t *const B = h->mv_cache[list][scan8[0] - 8]; - tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list); + tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", + top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list); - if(top_ref == ref){ - *mx= B[0]; - *my= B[1]; + if (top_ref == ref) { + *mx = B[0]; + *my = B[1]; return; } - }else{ - const int left_ref= h->ref_cache[list][ scan8[8] - 1 ]; - const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ]; + } else { + const int left_ref = h->ref_cache[list][scan8[8] - 1]; + const int16_t *const A = h->mv_cache[list][scan8[8] - 1]; - tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); + tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", + left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); - if(left_ref == ref){ - *mx= A[0]; - *my= A[1]; + if (left_ref == ref) { + *mx = A[0]; + *my = A[1]; return; } } @@ -182,29 +195,34 @@ static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int * @param mx the x component of the predicted motion vector * @param my the y component of the predicted motion vector */ -static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ - if(n==0){ - const int left_ref= h->ref_cache[list][ scan8[0] - 1 ]; - const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ]; +static av_always_inline void pred_8x16_motion(H264Context *const h, + int n, int list, int ref, + int *const mx, int *const my) +{ + if (n == 0) { + const int left_ref = h->ref_cache[list][scan8[0] - 1]; + const int16_t *const A = h->mv_cache[list][scan8[0] - 1]; - tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); + tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", + left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); - if(left_ref == ref){ - *mx= A[0]; - *my= A[1]; + if (left_ref == ref) { + *mx = A[0]; + *my = A[1]; return; } - }else{ - const int16_t * C; + } else { + const int16_t *C; int diagonal_ref; - diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2); + diagonal_ref = fetch_diagonal_mv(h, &C, scan8[4], list, 2); - tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list); + tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", + diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list); - if(diagonal_ref == ref){ - *mx= C[0]; - *my= C[1]; + if (diagonal_ref == ref) { + *mx = C[0]; + *my = C[1]; return; } } @@ -213,168 +231,174 @@ static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int pred_motion(h, n, 2, list, ref, mx, my); } -#define FIX_MV_MBAFF(type, refn, mvn, idx)\ - if(FRAME_MBAFF){\ - if(MB_FIELD){\ - if(!IS_INTERLACED(type)){\ - refn <<= 1;\ - AV_COPY32(mvbuf[idx], mvn);\ - mvbuf[idx][1] /= 2;\ - mvn = mvbuf[idx];\ - }\ - }else{\ - if(IS_INTERLACED(type)){\ - refn >>= 1;\ - AV_COPY32(mvbuf[idx], mvn);\ - mvbuf[idx][1] <<= 1;\ - mvn = mvbuf[idx];\ - }\ - }\ +#define FIX_MV_MBAFF(type, refn, mvn, idx) \ + if (FRAME_MBAFF) { \ + if (MB_FIELD) { \ + if (!IS_INTERLACED(type)) { \ + refn <<= 1; \ + AV_COPY32(mvbuf[idx], mvn); \ + mvbuf[idx][1] /= 2; \ + mvn = mvbuf[idx]; \ + } \ + } else { \ + if (IS_INTERLACED(type)) { \ + refn >>= 1; \ + AV_COPY32(mvbuf[idx], mvn); \ + mvbuf[idx][1] <<= 1; \ + mvn = mvbuf[idx]; \ + } \ + } \ } -static av_always_inline void pred_pskip_motion(H264Context * const h){ - DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = {0}; +static av_always_inline void pred_pskip_motion(H264Context *const h) +{ + DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = { 0 }; DECLARE_ALIGNED(4, int16_t, mvbuf)[3][2]; - MpegEncContext * const s = &h->s; - int8_t *ref = s->current_picture.f.ref_index[0]; - int16_t (*mv)[2] = s->current_picture.f.motion_val[0]; + MpegEncContext *const s = &h->s; + int8_t *ref = s->current_picture.f.ref_index[0]; + int16_t(*mv)[2] = s->current_picture.f.motion_val[0]; int top_ref, left_ref, diagonal_ref, match_count, mx, my; const int16_t *A, *B, *C; int b_stride = h->b_stride; fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); - /* To avoid doing an entire fill_decode_caches, we inline the relevant parts here. - * FIXME: this is a partial duplicate of the logic in fill_decode_caches, but it's - * faster this way. Is there a way to avoid this duplication? + /* To avoid doing an entire fill_decode_caches, we inline the relevant + * parts here. + * FIXME: this is a partial duplicate of the logic in fill_decode_caches, + * but it's faster this way. Is there a way to avoid this duplication? */ - if(USES_LIST(h->left_type[LTOP], 0)){ - left_ref = ref[4*h->left_mb_xy[LTOP] + 1 + (h->left_block[0]&~1)]; - A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride*h->left_block[0]]; + if (USES_LIST(h->left_type[LTOP], 0)) { + left_ref = ref[4 * h->left_mb_xy[LTOP] + 1 + (h->left_block[0] & ~1)]; + A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride * h->left_block[0]]; FIX_MV_MBAFF(h->left_type[LTOP], left_ref, A, 0); - if(!(left_ref | AV_RN32A(A))){ + if (!(left_ref | AV_RN32A(A))) goto zeromv; - } - }else if(h->left_type[LTOP]){ + } else if (h->left_type[LTOP]) { left_ref = LIST_NOT_USED; - A = zeromv; - }else{ + A = zeromv; + } else { goto zeromv; } - if(USES_LIST(h->top_type, 0)){ - top_ref = ref[4*h->top_mb_xy + 2]; - B = mv[h->mb2b_xy[h->top_mb_xy] + 3*b_stride]; + if (USES_LIST(h->top_type, 0)) { + top_ref = ref[4 * h->top_mb_xy + 2]; + B = mv[h->mb2b_xy[h->top_mb_xy] + 3 * b_stride]; FIX_MV_MBAFF(h->top_type, top_ref, B, 1); - if(!(top_ref | AV_RN32A(B))){ + if (!(top_ref | AV_RN32A(B))) goto zeromv; - } - }else if(h->top_type){ + } else if (h->top_type) { top_ref = LIST_NOT_USED; - B = zeromv; - }else{ + B = zeromv; + } else { goto zeromv; } - tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y); + tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", + top_ref, left_ref, h->s.mb_x, h->s.mb_y); - if(USES_LIST(h->topright_type, 0)){ - diagonal_ref = ref[4*h->topright_mb_xy + 2]; - C = mv[h->mb2b_xy[h->topright_mb_xy] + 3*b_stride]; + if (USES_LIST(h->topright_type, 0)) { + diagonal_ref = ref[4 * h->topright_mb_xy + 2]; + C = mv[h->mb2b_xy[h->topright_mb_xy] + 3 * b_stride]; FIX_MV_MBAFF(h->topright_type, diagonal_ref, C, 2); - }else if(h->topright_type){ + } else if (h->topright_type) { diagonal_ref = LIST_NOT_USED; C = zeromv; - }else{ - if(USES_LIST(h->topleft_type, 0)){ - diagonal_ref = ref[4*h->topleft_mb_xy + 1 + (h->topleft_partition & 2)]; - C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride)]; + } else { + if (USES_LIST(h->topleft_type, 0)) { + diagonal_ref = ref[4 * h->topleft_mb_xy + 1 + + (h->topleft_partition & 2)]; + C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride + + (h->topleft_partition & 2 * b_stride)]; FIX_MV_MBAFF(h->topleft_type, diagonal_ref, C, 2); - }else if(h->topleft_type){ + } else if (h->topleft_type) { diagonal_ref = LIST_NOT_USED; - C = zeromv; - }else{ + C = zeromv; + } else { diagonal_ref = PART_NOT_AVAILABLE; - C = zeromv; + C = zeromv; } } - match_count= !diagonal_ref + !top_ref + !left_ref; + match_count = !diagonal_ref + !top_ref + !left_ref; tprintf(h->s.avctx, "pred_pskip_motion match_count=%d\n", match_count); - if(match_count > 1){ + if (match_count > 1) { mx = mid_pred(A[0], B[0], C[0]); my = mid_pred(A[1], B[1], C[1]); - }else if(match_count==1){ - if(!left_ref){ + } else if (match_count == 1) { + if (!left_ref) { mx = A[0]; my = A[1]; - }else if(!top_ref){ + } else if (!top_ref) { mx = B[0]; my = B[1]; - }else{ + } else { mx = C[0]; my = C[1]; } - }else{ + } else { mx = mid_pred(A[0], B[0], C[0]); my = mid_pred(A[1], B[1], C[1]); } - fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); + fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx, my), 4); return; + zeromv: - fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); + fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); return; } -static void fill_decode_neighbors(H264Context *h, int mb_type){ - MpegEncContext * const s = &h->s; - const int mb_xy= h->mb_xy; +static void fill_decode_neighbors(H264Context *h, int mb_type) +{ + MpegEncContext *const s = &h->s; + const int mb_xy = h->mb_xy; int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS]; - static const uint8_t left_block_options[4][32]={ - {0,1,2,3,7,10,8,11,3+0*4, 3+1*4, 3+2*4, 3+3*4, 1+4*4, 1+8*4, 1+5*4, 1+9*4}, - {2,2,3,3,8,11,8,11,3+2*4, 3+2*4, 3+3*4, 3+3*4, 1+5*4, 1+9*4, 1+5*4, 1+9*4}, - {0,0,1,1,7,10,7,10,3+0*4, 3+0*4, 3+1*4, 3+1*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4}, - {0,2,0,2,7,10,7,10,3+0*4, 3+2*4, 3+0*4, 3+2*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4} + static const uint8_t left_block_options[4][32] = { + { 0, 1, 2, 3, 7, 10, 8, 11, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 5 * 4, 1 + 9 * 4 }, + { 2, 2, 3, 3, 8, 11, 8, 11, 3 + 2 * 4, 3 + 2 * 4, 3 + 3 * 4, 3 + 3 * 4, 1 + 5 * 4, 1 + 9 * 4, 1 + 5 * 4, 1 + 9 * 4 }, + { 0, 0, 1, 1, 7, 10, 7, 10, 3 + 0 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 1 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 }, + { 0, 2, 0, 2, 7, 10, 7, 10, 3 + 0 * 4, 3 + 2 * 4, 3 + 0 * 4, 3 + 2 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 } }; - h->topleft_partition= -1; + h->topleft_partition = -1; - top_xy = mb_xy - (s->mb_stride << MB_FIELD); + top_xy = mb_xy - (s->mb_stride << MB_FIELD); /* Wow, what a mess, why didn't they simplify the interlacing & intra * stuff, I can't imagine that these complex rules are worth it. */ - topleft_xy = top_xy - 1; - topright_xy= top_xy + 1; - left_xy[LBOT] = left_xy[LTOP] = mb_xy-1; + topleft_xy = top_xy - 1; + topright_xy = top_xy + 1; + left_xy[LBOT] = left_xy[LTOP] = mb_xy - 1; h->left_block = left_block_options[0]; - if(FRAME_MBAFF){ + if (FRAME_MBAFF) { const int left_mb_field_flag = IS_INTERLACED(s->current_picture.f.mb_type[mb_xy - 1]); const int curr_mb_field_flag = IS_INTERLACED(mb_type); - if(s->mb_y&1){ + if (s->mb_y & 1) { if (left_mb_field_flag != curr_mb_field_flag) { left_xy[LBOT] = left_xy[LTOP] = mb_xy - s->mb_stride - 1; if (curr_mb_field_flag) { left_xy[LBOT] += s->mb_stride; - h->left_block = left_block_options[3]; + h->left_block = left_block_options[3]; } else { topleft_xy += s->mb_stride; - // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition + /* take top left mv from the middle of the mb, as opposed + * to all other modes which use the bottom right partition */ h->topleft_partition = 0; - h->left_block = left_block_options[1]; + h->left_block = left_block_options[1]; } } - }else{ - if(curr_mb_field_flag){ + } else { + if (curr_mb_field_flag) { topleft_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy - 1] >> 7) & 1) - 1); topright_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy + 1] >> 7) & 1) - 1); - top_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy ] >> 7) & 1) - 1); + top_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy] >> 7) & 1) - 1); } if (left_mb_field_flag != curr_mb_field_flag) { if (curr_mb_field_flag) { left_xy[LBOT] += s->mb_stride; - h->left_block = left_block_options[3]; + h->left_block = left_block_options[3]; } else { h->left_block = left_block_options[2]; } @@ -382,9 +406,9 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){ } } - h->topleft_mb_xy = topleft_xy; - h->top_mb_xy = top_xy; - h->topright_mb_xy= topright_xy; + h->topleft_mb_xy = topleft_xy; + h->top_mb_xy = top_xy; + h->topright_mb_xy = topright_xy; h->left_mb_xy[LTOP] = left_xy[LTOP]; h->left_mb_xy[LBOT] = left_xy[LBOT]; //FIXME do we need all in the context? @@ -395,351 +419,372 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){ h->left_type[LTOP] = s->current_picture.f.mb_type[left_xy[LTOP]]; h->left_type[LBOT] = s->current_picture.f.mb_type[left_xy[LBOT]]; - if(FMO){ - if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0; - if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0; - if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0; - }else{ - if(h->slice_table[topleft_xy ] != h->slice_num){ + if (FMO) { + if (h->slice_table[topleft_xy] != h->slice_num) h->topleft_type = 0; - if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0; - if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0; + if (h->slice_table[top_xy] != h->slice_num) + h->top_type = 0; + if (h->slice_table[left_xy[LTOP]] != h->slice_num) + h->left_type[LTOP] = h->left_type[LBOT] = 0; + } else { + if (h->slice_table[topleft_xy] != h->slice_num) { + h->topleft_type = 0; + if (h->slice_table[top_xy] != h->slice_num) + h->top_type = 0; + if (h->slice_table[left_xy[LTOP]] != h->slice_num) + h->left_type[LTOP] = h->left_type[LBOT] = 0; } } - if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0; + if (h->slice_table[topright_xy] != h->slice_num) + h->topright_type = 0; } -static void fill_decode_caches(H264Context *h, int mb_type){ - MpegEncContext * const s = &h->s; +static void fill_decode_caches(H264Context *h, int mb_type) +{ + MpegEncContext *const s = &h->s; int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS]; int topleft_type, top_type, topright_type, left_type[LEFT_MBS]; - const uint8_t * left_block= h->left_block; + const uint8_t *left_block = h->left_block; int i; uint8_t *nnz; uint8_t *nnz_cache; - topleft_xy = h->topleft_mb_xy; - top_xy = h->top_mb_xy; - topright_xy = h->topright_mb_xy; - left_xy[LTOP] = h->left_mb_xy[LTOP]; - left_xy[LBOT] = h->left_mb_xy[LBOT]; - topleft_type = h->topleft_type; - top_type = h->top_type; - topright_type = h->topright_type; - left_type[LTOP]= h->left_type[LTOP]; - left_type[LBOT]= h->left_type[LBOT]; + topleft_xy = h->topleft_mb_xy; + top_xy = h->top_mb_xy; + topright_xy = h->topright_mb_xy; + left_xy[LTOP] = h->left_mb_xy[LTOP]; + left_xy[LBOT] = h->left_mb_xy[LBOT]; + topleft_type = h->topleft_type; + top_type = h->top_type; + topright_type = h->topright_type; + left_type[LTOP] = h->left_type[LTOP]; + left_type[LBOT] = h->left_type[LBOT]; - if(!IS_SKIP(mb_type)){ - if(IS_INTRA(mb_type)){ - int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; - h->topleft_samples_available= - h->top_samples_available= - h->left_samples_available= 0xFFFF; - h->topright_samples_available= 0xEEEA; + if (!IS_SKIP(mb_type)) { + if (IS_INTRA(mb_type)) { + int type_mask = h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; + h->topleft_samples_available = + h->top_samples_available = + h->left_samples_available = 0xFFFF; + h->topright_samples_available = 0xEEEA; - if(!(top_type & type_mask)){ - h->topleft_samples_available= 0xB3FF; - h->top_samples_available= 0x33FF; - h->topright_samples_available= 0x26EA; + if (!(top_type & type_mask)) { + h->topleft_samples_available = 0xB3FF; + h->top_samples_available = 0x33FF; + h->topright_samples_available = 0x26EA; } - if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])){ - if(IS_INTERLACED(mb_type)){ - if(!(left_type[LTOP] & type_mask)){ - h->topleft_samples_available&= 0xDFFF; - h->left_samples_available&= 0x5FFF; + if (IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])) { + if (IS_INTERLACED(mb_type)) { + if (!(left_type[LTOP] & type_mask)) { + h->topleft_samples_available &= 0xDFFF; + h->left_samples_available &= 0x5FFF; } - if(!(left_type[LBOT] & type_mask)){ - h->topleft_samples_available&= 0xFF5F; - h->left_samples_available&= 0xFF5F; + if (!(left_type[LBOT] & type_mask)) { + h->topleft_samples_available &= 0xFF5F; + h->left_samples_available &= 0xFF5F; } - }else{ + } else { int left_typei = s->current_picture.f.mb_type[left_xy[LTOP] + s->mb_stride]; assert(left_xy[LTOP] == left_xy[LBOT]); - if(!((left_typei & type_mask) && (left_type[LTOP] & type_mask))){ - h->topleft_samples_available&= 0xDF5F; - h->left_samples_available&= 0x5F5F; + if (!((left_typei & type_mask) && (left_type[LTOP] & type_mask))) { + h->topleft_samples_available &= 0xDF5F; + h->left_samples_available &= 0x5F5F; } } - }else{ - if(!(left_type[LTOP] & type_mask)){ - h->topleft_samples_available&= 0xDF5F; - h->left_samples_available&= 0x5F5F; + } else { + if (!(left_type[LTOP] & type_mask)) { + h->topleft_samples_available &= 0xDF5F; + h->left_samples_available &= 0x5F5F; } } - if(!(topleft_type & type_mask)) - h->topleft_samples_available&= 0x7FFF; + if (!(topleft_type & type_mask)) + h->topleft_samples_available &= 0x7FFF; - if(!(topright_type & type_mask)) - h->topright_samples_available&= 0xFBFF; + if (!(topright_type & type_mask)) + h->topright_samples_available &= 0xFBFF; - if(IS_INTRA4x4(mb_type)){ - if(IS_INTRA4x4(top_type)){ - AV_COPY32(h->intra4x4_pred_mode_cache+4+8*0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]); - }else{ - h->intra4x4_pred_mode_cache[4+8*0]= - h->intra4x4_pred_mode_cache[5+8*0]= - h->intra4x4_pred_mode_cache[6+8*0]= - h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask); + if (IS_INTRA4x4(mb_type)) { + if (IS_INTRA4x4(top_type)) { + AV_COPY32(h->intra4x4_pred_mode_cache + 4 + 8 * 0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]); + } else { + h->intra4x4_pred_mode_cache[4 + 8 * 0] = + h->intra4x4_pred_mode_cache[5 + 8 * 0] = + h->intra4x4_pred_mode_cache[6 + 8 * 0] = + h->intra4x4_pred_mode_cache[7 + 8 * 0] = 2 - 3 * !(top_type & type_mask); } - for(i=0; i<2; i++){ - if(IS_INTRA4x4(left_type[LEFT(i)])){ - int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]]; - h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]]; - h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]]; - }else{ - h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= - h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[LEFT(i)] & type_mask); + for (i = 0; i < 2; i++) { + if (IS_INTRA4x4(left_type[LEFT(i)])) { + int8_t *mode = h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]]; + h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = mode[6 - left_block[0 + 2 * i]]; + h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = mode[6 - left_block[1 + 2 * i]]; + } else { + h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = + h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = 2 - 3 * !(left_type[LEFT(i)] & type_mask); } } } } - -/* -0 . T T. T T T T -1 L . .L . . . . -2 L . .L . . . . -3 . T TL . . . . -4 L . .L . . . . -5 L . .. . . . . -*/ -//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) - nnz_cache = h->non_zero_count_cache; - if(top_type){ - nnz = h->non_zero_count[top_xy]; - AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]); - if(!s->chroma_y_shift){ - AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]); - AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]); - }else{ - AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 5]); - AV_COPY32(&nnz_cache[4+8*10], &nnz[4* 9]); - } - }else{ - uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040; - AV_WN32A(&nnz_cache[4+8* 0], top_empty); - AV_WN32A(&nnz_cache[4+8* 5], top_empty); - AV_WN32A(&nnz_cache[4+8*10], top_empty); - } - - for (i=0; i<2; i++) { - if(left_type[LEFT(i)]){ - nnz = h->non_zero_count[left_xy[LEFT(i)]]; - nnz_cache[3+8* 1 + 2*8*i]= nnz[left_block[8+0+2*i]]; - nnz_cache[3+8* 2 + 2*8*i]= nnz[left_block[8+1+2*i]]; - if(CHROMA444){ - nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]+4*4]; - nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4]; - nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4]; - nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4]; - }else if(CHROMA422) { - nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]-2+4*4]; - nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]-2+4*4]; - nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]-2+8*4]; - nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]-2+8*4]; - }else{ - nnz_cache[3+8* 6 + 8*i]= nnz[left_block[8+4+2*i]]; - nnz_cache[3+8*11 + 8*i]= nnz[left_block[8+5+2*i]]; + /* + * 0 . T T. T T T T + * 1 L . .L . . . . + * 2 L . .L . . . . + * 3 . T TL . . . . + * 4 L . .L . . . . + * 5 L . .. . . . . + */ + /* FIXME: constraint_intra_pred & partitioning & nnz + * (let us hope this is just a typo in the spec) */ + nnz_cache = h->non_zero_count_cache; + if (top_type) { + nnz = h->non_zero_count[top_xy]; + AV_COPY32(&nnz_cache[4 + 8 * 0], &nnz[4 * 3]); + if (!s->chroma_y_shift) { + AV_COPY32(&nnz_cache[4 + 8 * 5], &nnz[4 * 7]); + AV_COPY32(&nnz_cache[4 + 8 * 10], &nnz[4 * 11]); + } else { + AV_COPY32(&nnz_cache[4 + 8 * 5], &nnz[4 * 5]); + AV_COPY32(&nnz_cache[4 + 8 * 10], &nnz[4 * 9]); + } + } else { + uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040; + AV_WN32A(&nnz_cache[4 + 8 * 0], top_empty); + AV_WN32A(&nnz_cache[4 + 8 * 5], top_empty); + AV_WN32A(&nnz_cache[4 + 8 * 10], top_empty); + } + + for (i = 0; i < 2; i++) { + if (left_type[LEFT(i)]) { + nnz = h->non_zero_count[left_xy[LEFT(i)]]; + nnz_cache[3 + 8 * 1 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i]]; + nnz_cache[3 + 8 * 2 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i]]; + if (CHROMA444) { + nnz_cache[3 + 8 * 6 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] + 4 * 4]; + nnz_cache[3 + 8 * 7 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] + 4 * 4]; + nnz_cache[3 + 8 * 11 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] + 8 * 4]; + nnz_cache[3 + 8 * 12 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] + 8 * 4]; + } else if (CHROMA422) { + nnz_cache[3 + 8 * 6 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] - 2 + 4 * 4]; + nnz_cache[3 + 8 * 7 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] - 2 + 4 * 4]; + nnz_cache[3 + 8 * 11 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] - 2 + 8 * 4]; + nnz_cache[3 + 8 * 12 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] - 2 + 8 * 4]; + } else { + nnz_cache[3 + 8 * 6 + 8 * i] = nnz[left_block[8 + 4 + 2 * i]]; + nnz_cache[3 + 8 * 11 + 8 * i] = nnz[left_block[8 + 5 + 2 * i]]; + } + } else { + nnz_cache[3 + 8 * 1 + 2 * 8 * i] = + nnz_cache[3 + 8 * 2 + 2 * 8 * i] = + nnz_cache[3 + 8 * 6 + 2 * 8 * i] = + nnz_cache[3 + 8 * 7 + 2 * 8 * i] = + nnz_cache[3 + 8 * 11 + 2 * 8 * i] = + nnz_cache[3 + 8 * 12 + 2 * 8 * i] = CABAC && !IS_INTRA(mb_type) ? 0 : 64; + } + } + + if (CABAC) { + // top_cbp + if (top_type) + h->top_cbp = h->cbp_table[top_xy]; + else + h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; + // left_cbp + if (left_type[LTOP]) { + h->left_cbp = (h->cbp_table[left_xy[LTOP]] & 0x7F0) | + ((h->cbp_table[left_xy[LTOP]] >> (left_block[0] & (~1))) & 2) | + (((h->cbp_table[left_xy[LBOT]] >> (left_block[2] & (~1))) & 2) << 2); + } else { + h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; } - }else{ - nnz_cache[3+8* 1 + 2*8*i]= - nnz_cache[3+8* 2 + 2*8*i]= - nnz_cache[3+8* 6 + 2*8*i]= - nnz_cache[3+8* 7 + 2*8*i]= - nnz_cache[3+8*11 + 2*8*i]= - nnz_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64; } } - if( CABAC ) { - // top_cbp - if(top_type) { - h->top_cbp = h->cbp_table[top_xy]; - } else { - h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; - } - // left_cbp - if (left_type[LTOP]) { - h->left_cbp = (h->cbp_table[left_xy[LTOP]] & 0x7F0) - | ((h->cbp_table[left_xy[LTOP]]>>(left_block[0]&(~1)))&2) - | (((h->cbp_table[left_xy[LBOT]]>>(left_block[2]&(~1)))&2) << 2); - } else { - h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; - } - } - } - - if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){ + if (IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)) { int list; int b_stride = h->b_stride; - for(list=0; listlist_count; list++){ + for (list = 0; list < h->list_count; list++) { int8_t *ref_cache = &h->ref_cache[list][scan8[0]]; int8_t *ref = s->current_picture.f.ref_index[list]; - int16_t (*mv_cache)[2] = &h->mv_cache[list][scan8[0]]; - int16_t (*mv)[2] = s->current_picture.f.motion_val[list]; - if(!USES_LIST(mb_type, list)){ + int16_t(*mv_cache)[2] = &h->mv_cache[list][scan8[0]]; + int16_t(*mv)[2] = s->current_picture.f.motion_val[list]; + if (!USES_LIST(mb_type, list)) continue; - } assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)); - if(USES_LIST(top_type, list)){ - const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride; - AV_COPY128(mv_cache[0 - 1*8], mv[b_xy + 0]); - ref_cache[0 - 1*8]= - ref_cache[1 - 1*8]= ref[4*top_xy + 2]; - ref_cache[2 - 1*8]= - ref_cache[3 - 1*8]= ref[4*top_xy + 3]; - }else{ - AV_ZERO128(mv_cache[0 - 1*8]); - AV_WN32A(&ref_cache[0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101u); + if (USES_LIST(top_type, list)) { + const int b_xy = h->mb2b_xy[top_xy] + 3 * b_stride; + AV_COPY128(mv_cache[0 - 1 * 8], mv[b_xy + 0]); + ref_cache[0 - 1 * 8] = + ref_cache[1 - 1 * 8] = ref[4 * top_xy + 2]; + ref_cache[2 - 1 * 8] = + ref_cache[3 - 1 * 8] = ref[4 * top_xy + 3]; + } else { + AV_ZERO128(mv_cache[0 - 1 * 8]); + AV_WN32A(&ref_cache[0 - 1 * 8], + ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE) & 0xFF) * 0x01010101u); } - if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){ - for(i=0; i<2; i++){ - int cache_idx = -1 + i*2*8; - if(USES_LIST(left_type[LEFT(i)], list)){ - const int b_xy= h->mb2b_xy[left_xy[LEFT(i)]] + 3; - const int b8_xy= 4*left_xy[LEFT(i)] + 1; - AV_COPY32(mv_cache[cache_idx ], mv[b_xy + b_stride*left_block[0+i*2]]); - AV_COPY32(mv_cache[cache_idx+8], mv[b_xy + b_stride*left_block[1+i*2]]); - ref_cache[cache_idx ]= ref[b8_xy + (left_block[0+i*2]&~1)]; - ref_cache[cache_idx+8]= ref[b8_xy + (left_block[1+i*2]&~1)]; - }else{ - AV_ZERO32(mv_cache[cache_idx ]); - AV_ZERO32(mv_cache[cache_idx+8]); - ref_cache[cache_idx ]= - ref_cache[cache_idx+8]= (left_type[LEFT(i)]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; + if (mb_type & (MB_TYPE_16x8 | MB_TYPE_8x8)) { + for (i = 0; i < 2; i++) { + int cache_idx = -1 + i * 2 * 8; + if (USES_LIST(left_type[LEFT(i)], list)) { + const int b_xy = h->mb2b_xy[left_xy[LEFT(i)]] + 3; + const int b8_xy = 4 * left_xy[LEFT(i)] + 1; + AV_COPY32(mv_cache[cache_idx], + mv[b_xy + b_stride * left_block[0 + i * 2]]); + AV_COPY32(mv_cache[cache_idx + 8], + mv[b_xy + b_stride * left_block[1 + i * 2]]); + ref_cache[cache_idx] = ref[b8_xy + (left_block[0 + i * 2] & ~1)]; + ref_cache[cache_idx + 8] = ref[b8_xy + (left_block[1 + i * 2] & ~1)]; + } else { + AV_ZERO32(mv_cache[cache_idx]); + AV_ZERO32(mv_cache[cache_idx + 8]); + ref_cache[cache_idx] = + ref_cache[cache_idx + 8] = (left_type[LEFT(i)]) ? LIST_NOT_USED + : PART_NOT_AVAILABLE; + } } - } - }else{ - if(USES_LIST(left_type[LTOP], list)){ - const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3; - const int b8_xy= 4*left_xy[LTOP] + 1; - AV_COPY32(mv_cache[-1], mv[b_xy + b_stride*left_block[0]]); - ref_cache[-1]= ref[b8_xy + (left_block[0]&~1)]; - }else{ + } else { + if (USES_LIST(left_type[LTOP], list)) { + const int b_xy = h->mb2b_xy[left_xy[LTOP]] + 3; + const int b8_xy = 4 * left_xy[LTOP] + 1; + AV_COPY32(mv_cache[-1], mv[b_xy + b_stride * left_block[0]]); + ref_cache[-1] = ref[b8_xy + (left_block[0] & ~1)]; + } else { AV_ZERO32(mv_cache[-1]); - ref_cache[-1]= left_type[LTOP] ? LIST_NOT_USED : PART_NOT_AVAILABLE; + ref_cache[-1] = left_type[LTOP] ? LIST_NOT_USED + : PART_NOT_AVAILABLE; } } - if(USES_LIST(topright_type, list)){ - const int b_xy= h->mb2b_xy[topright_xy] + 3*b_stride; - AV_COPY32(mv_cache[4 - 1*8], mv[b_xy]); - ref_cache[4 - 1*8]= ref[4*topright_xy + 2]; - }else{ - AV_ZERO32(mv_cache[4 - 1*8]); - ref_cache[4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; + if (USES_LIST(topright_type, list)) { + const int b_xy = h->mb2b_xy[topright_xy] + 3 * b_stride; + AV_COPY32(mv_cache[4 - 1 * 8], mv[b_xy]); + ref_cache[4 - 1 * 8] = ref[4 * topright_xy + 2]; + } else { + AV_ZERO32(mv_cache[4 - 1 * 8]); + ref_cache[4 - 1 * 8] = topright_type ? LIST_NOT_USED + : PART_NOT_AVAILABLE; } if(ref_cache[2 - 1*8] < 0 || ref_cache[4 - 1*8] < 0){ - if(USES_LIST(topleft_type, list)){ - const int b_xy = h->mb2b_xy[topleft_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride); - const int b8_xy= 4*topleft_xy + 1 + (h->topleft_partition & 2); - AV_COPY32(mv_cache[-1 - 1*8], mv[b_xy]); - ref_cache[-1 - 1*8]= ref[b8_xy]; - }else{ - AV_ZERO32(mv_cache[-1 - 1*8]); - ref_cache[-1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; + if (USES_LIST(topleft_type, list)) { + const int b_xy = h->mb2b_xy[topleft_xy] + 3 + b_stride + + (h->topleft_partition & 2 * b_stride); + const int b8_xy = 4 * topleft_xy + 1 + (h->topleft_partition & 2); + AV_COPY32(mv_cache[-1 - 1 * 8], mv[b_xy]); + ref_cache[-1 - 1 * 8] = ref[b8_xy]; + } else { + AV_ZERO32(mv_cache[-1 - 1 * 8]); + ref_cache[-1 - 1 * 8] = topleft_type ? LIST_NOT_USED + : PART_NOT_AVAILABLE; } } - if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF) + if ((mb_type & (MB_TYPE_SKIP | MB_TYPE_DIRECT2)) && !FRAME_MBAFF) continue; - if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))){ - uint8_t (*mvd_cache)[2] = &h->mvd_cache[list][scan8[0]]; - uint8_t (*mvd)[2] = h->mvd_table[list]; - ref_cache[2+8*0] = - ref_cache[2+8*2] = PART_NOT_AVAILABLE; - AV_ZERO32(mv_cache[2+8*0]); - AV_ZERO32(mv_cache[2+8*2]); + if (!(mb_type & (MB_TYPE_SKIP | MB_TYPE_DIRECT2))) { + uint8_t(*mvd_cache)[2] = &h->mvd_cache[list][scan8[0]]; + uint8_t(*mvd)[2] = h->mvd_table[list]; + ref_cache[2 + 8 * 0] = + ref_cache[2 + 8 * 2] = PART_NOT_AVAILABLE; + AV_ZERO32(mv_cache[2 + 8 * 0]); + AV_ZERO32(mv_cache[2 + 8 * 2]); - if( CABAC ) { - if(USES_LIST(top_type, list)){ - const int b_xy= h->mb2br_xy[top_xy]; - AV_COPY64(mvd_cache[0 - 1*8], mvd[b_xy + 0]); - }else{ - AV_ZERO64(mvd_cache[0 - 1*8]); + if (CABAC) { + if (USES_LIST(top_type, list)) { + const int b_xy = h->mb2br_xy[top_xy]; + AV_COPY64(mvd_cache[0 - 1 * 8], mvd[b_xy + 0]); + } else { + AV_ZERO64(mvd_cache[0 - 1 * 8]); } - if(USES_LIST(left_type[LTOP], list)){ - const int b_xy= h->mb2br_xy[left_xy[LTOP]] + 6; - AV_COPY16(mvd_cache[-1 + 0*8], mvd[b_xy - left_block[0]]); - AV_COPY16(mvd_cache[-1 + 1*8], mvd[b_xy - left_block[1]]); - }else{ - AV_ZERO16(mvd_cache[-1 + 0*8]); - AV_ZERO16(mvd_cache[-1 + 1*8]); + if (USES_LIST(left_type[LTOP], list)) { + const int b_xy = h->mb2br_xy[left_xy[LTOP]] + 6; + AV_COPY16(mvd_cache[-1 + 0 * 8], mvd[b_xy - left_block[0]]); + AV_COPY16(mvd_cache[-1 + 1 * 8], mvd[b_xy - left_block[1]]); + } else { + AV_ZERO16(mvd_cache[-1 + 0 * 8]); + AV_ZERO16(mvd_cache[-1 + 1 * 8]); } - if(USES_LIST(left_type[LBOT], list)){ - const int b_xy= h->mb2br_xy[left_xy[LBOT]] + 6; - AV_COPY16(mvd_cache[-1 + 2*8], mvd[b_xy - left_block[2]]); - AV_COPY16(mvd_cache[-1 + 3*8], mvd[b_xy - left_block[3]]); - }else{ - AV_ZERO16(mvd_cache[-1 + 2*8]); - AV_ZERO16(mvd_cache[-1 + 3*8]); + if (USES_LIST(left_type[LBOT], list)) { + const int b_xy = h->mb2br_xy[left_xy[LBOT]] + 6; + AV_COPY16(mvd_cache[-1 + 2 * 8], mvd[b_xy - left_block[2]]); + AV_COPY16(mvd_cache[-1 + 3 * 8], mvd[b_xy - left_block[3]]); + } else { + AV_ZERO16(mvd_cache[-1 + 2 * 8]); + AV_ZERO16(mvd_cache[-1 + 3 * 8]); } - AV_ZERO16(mvd_cache[2+8*0]); - AV_ZERO16(mvd_cache[2+8*2]); - if(h->slice_type_nos == AV_PICTURE_TYPE_B){ + AV_ZERO16(mvd_cache[2 + 8 * 0]); + AV_ZERO16(mvd_cache[2 + 8 * 2]); + if (h->slice_type_nos == AV_PICTURE_TYPE_B) { uint8_t *direct_cache = &h->direct_cache[scan8[0]]; uint8_t *direct_table = h->direct_table; - fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16>>1, 1); + fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16 >> 1, 1); - if(IS_DIRECT(top_type)){ - AV_WN32A(&direct_cache[-1*8], 0x01010101u*(MB_TYPE_DIRECT2>>1)); - }else if(IS_8X8(top_type)){ - int b8_xy = 4*top_xy; - direct_cache[0 - 1*8]= direct_table[b8_xy + 2]; - direct_cache[2 - 1*8]= direct_table[b8_xy + 3]; - }else{ - AV_WN32A(&direct_cache[-1*8], 0x01010101*(MB_TYPE_16x16>>1)); + if (IS_DIRECT(top_type)) { + AV_WN32A(&direct_cache[-1 * 8], + 0x01010101u * (MB_TYPE_DIRECT2 >> 1)); + } else if (IS_8X8(top_type)) { + int b8_xy = 4 * top_xy; + direct_cache[0 - 1 * 8] = direct_table[b8_xy + 2]; + direct_cache[2 - 1 * 8] = direct_table[b8_xy + 3]; + } else { + AV_WN32A(&direct_cache[-1 * 8], + 0x01010101 * (MB_TYPE_16x16 >> 1)); } - if(IS_DIRECT(left_type[LTOP])) - direct_cache[-1 + 0*8]= MB_TYPE_DIRECT2>>1; - else if(IS_8X8(left_type[LTOP])) - direct_cache[-1 + 0*8]= direct_table[4*left_xy[LTOP] + 1 + (left_block[0]&~1)]; + if (IS_DIRECT(left_type[LTOP])) + direct_cache[-1 + 0 * 8] = MB_TYPE_DIRECT2 >> 1; + else if (IS_8X8(left_type[LTOP])) + direct_cache[-1 + 0 * 8] = direct_table[4 * left_xy[LTOP] + 1 + (left_block[0] & ~1)]; else - direct_cache[-1 + 0*8]= MB_TYPE_16x16>>1; + direct_cache[-1 + 0 * 8] = MB_TYPE_16x16 >> 1; - if(IS_DIRECT(left_type[LBOT])) - direct_cache[-1 + 2*8]= MB_TYPE_DIRECT2>>1; - else if(IS_8X8(left_type[LBOT])) - direct_cache[-1 + 2*8]= direct_table[4*left_xy[LBOT] + 1 + (left_block[2]&~1)]; + if (IS_DIRECT(left_type[LBOT])) + direct_cache[-1 + 2 * 8] = MB_TYPE_DIRECT2 >> 1; + else if (IS_8X8(left_type[LBOT])) + direct_cache[-1 + 2 * 8] = direct_table[4 * left_xy[LBOT] + 1 + (left_block[2] & ~1)]; else - direct_cache[-1 + 2*8]= MB_TYPE_16x16>>1; + direct_cache[-1 + 2 * 8] = MB_TYPE_16x16 >> 1; } } } - if(FRAME_MBAFF){ -#define MAP_MVS\ - MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\ - MAP_F2F(scan8[0] + 0 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 1 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 2 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 3 - 1*8, top_type)\ - MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\ - MAP_F2F(scan8[0] - 1 + 0*8, left_type[LTOP])\ - MAP_F2F(scan8[0] - 1 + 1*8, left_type[LTOP])\ - MAP_F2F(scan8[0] - 1 + 2*8, left_type[LBOT])\ - MAP_F2F(scan8[0] - 1 + 3*8, left_type[LBOT]) - if(MB_FIELD){ -#define MAP_F2F(idx, mb_type)\ - if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ - h->ref_cache[list][idx] <<= 1;\ - h->mv_cache[list][idx][1] /= 2;\ - h->mvd_cache[list][idx][1] >>=1;\ - } + +#define MAP_MVS \ + MAP_F2F(scan8[0] - 1 - 1 * 8, topleft_type) \ + MAP_F2F(scan8[0] + 0 - 1 * 8, top_type) \ + MAP_F2F(scan8[0] + 1 - 1 * 8, top_type) \ + MAP_F2F(scan8[0] + 2 - 1 * 8, top_type) \ + MAP_F2F(scan8[0] + 3 - 1 * 8, top_type) \ + MAP_F2F(scan8[0] + 4 - 1 * 8, topright_type) \ + MAP_F2F(scan8[0] - 1 + 0 * 8, left_type[LTOP]) \ + MAP_F2F(scan8[0] - 1 + 1 * 8, left_type[LTOP]) \ + MAP_F2F(scan8[0] - 1 + 2 * 8, left_type[LBOT]) \ + MAP_F2F(scan8[0] - 1 + 3 * 8, left_type[LBOT]) + + if (FRAME_MBAFF) { + if (MB_FIELD) { + +#define MAP_F2F(idx, mb_type) \ + if (!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) { \ + h->ref_cache[list][idx] <<= 1; \ + h->mv_cache[list][idx][1] /= 2; \ + h->mvd_cache[list][idx][1] >>= 1; \ + } + MAP_MVS + } else { + #undef MAP_F2F - }else{ -#define MAP_F2F(idx, mb_type)\ - if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ - h->ref_cache[list][idx] >>= 1;\ - h->mv_cache[list][idx][1] <<= 1;\ - h->mvd_cache[list][idx][1] <<= 1;\ - } +#define MAP_F2F(idx, mb_type) \ + if (IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) { \ + h->ref_cache[list][idx] >>= 1; \ + h->mv_cache[list][idx][1] <<= 1; \ + h->mvd_cache[list][idx][1] <<= 1; \ + } + MAP_MVS #undef MAP_F2F } @@ -747,36 +792,34 @@ static void fill_decode_caches(H264Context *h, int mb_type){ } } - h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]); + h->neighbor_transform_size = !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]); } /** * decodes a P_SKIP or B_SKIP macroblock */ -static void av_unused decode_mb_skip(H264Context *h){ - MpegEncContext * const s = &h->s; - const int mb_xy= h->mb_xy; - int mb_type=0; +static void av_unused decode_mb_skip(H264Context *h) +{ + MpegEncContext *const s = &h->s; + const int mb_xy = h->mb_xy; + int mb_type = 0; memset(h->non_zero_count[mb_xy], 0, 48); - if(MB_FIELD) - mb_type|= MB_TYPE_INTERLACED; + if (MB_FIELD) + mb_type |= MB_TYPE_INTERLACED; - if( h->slice_type_nos == AV_PICTURE_TYPE_B ) - { + if (h->slice_type_nos == AV_PICTURE_TYPE_B) { // just for fill_caches. pred_direct_motion will set the real mb_type - mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; - if(h->direct_spatial_mv_pred){ + mb_type |= MB_TYPE_L0L1 | MB_TYPE_DIRECT2 | MB_TYPE_SKIP; + if (h->direct_spatial_mv_pred) { fill_decode_neighbors(h, mb_type); - fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ... + fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ... } ff_h264_pred_direct_motion(h, &mb_type); - mb_type|= MB_TYPE_SKIP; - } - else - { - mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; + mb_type |= MB_TYPE_SKIP; + } else { + mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_SKIP; fill_decode_neighbors(h, mb_type); pred_pskip_motion(h); @@ -785,8 +828,8 @@ static void av_unused decode_mb_skip(H264Context *h){ write_back_motion(h, mb_type); s->current_picture.f.mb_type[mb_xy] = mb_type; s->current_picture.f.qscale_table[mb_xy] = s->qscale; - h->slice_table[ mb_xy ]= h->slice_num; - h->prev_mb_skipped= 1; + h->slice_table[mb_xy] = h->slice_num; + h->prev_mb_skipped = 1; } #endif /* AVCODEC_H264_MVPRED_H */ diff --git a/libavcodec/h264data.h b/libavcodec/h264data.h index a5ed069e94..60df532658 100644 --- a/libavcodec/h264data.h +++ b/libavcodec/h264data.h @@ -30,240 +30,243 @@ #define AVCODEC_H264DATA_H #include + #include "libavutil/rational.h" #include "mpegvideo.h" #include "h264.h" - -static const uint8_t golomb_to_pict_type[5]= -{AV_PICTURE_TYPE_P, AV_PICTURE_TYPE_B, AV_PICTURE_TYPE_I, AV_PICTURE_TYPE_SP, AV_PICTURE_TYPE_SI}; - -static const uint8_t golomb_to_intra4x4_cbp[48]={ - 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46, - 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4, - 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41 +static const uint8_t golomb_to_pict_type[5] = { + AV_PICTURE_TYPE_P, AV_PICTURE_TYPE_B, AV_PICTURE_TYPE_I, + AV_PICTURE_TYPE_SP, AV_PICTURE_TYPE_SI }; -static const uint8_t golomb_to_inter_cbp[48]={ - 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13, - 14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46, - 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41 +static const uint8_t golomb_to_intra4x4_cbp[48] = { + 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46, + 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4, + 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41 }; -static const uint8_t zigzag_scan[16]={ - 0+0*4, 1+0*4, 0+1*4, 0+2*4, - 1+1*4, 2+0*4, 3+0*4, 2+1*4, - 1+2*4, 0+3*4, 1+3*4, 2+2*4, - 3+1*4, 3+2*4, 2+3*4, 3+3*4, +static const uint8_t golomb_to_inter_cbp[48] = { + 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13, + 14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46, + 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41 }; -static const uint8_t field_scan[16]={ - 0+0*4, 0+1*4, 1+0*4, 0+2*4, - 0+3*4, 1+1*4, 1+2*4, 1+3*4, - 2+0*4, 2+1*4, 2+2*4, 2+3*4, - 3+0*4, 3+1*4, 3+2*4, 3+3*4, +static const uint8_t zigzag_scan[16] = { + 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, + 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, + 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, + 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, }; -static const uint8_t luma_dc_zigzag_scan[16]={ - 0*16 + 0*64, 1*16 + 0*64, 2*16 + 0*64, 0*16 + 2*64, - 3*16 + 0*64, 0*16 + 1*64, 1*16 + 1*64, 2*16 + 1*64, - 1*16 + 2*64, 2*16 + 2*64, 3*16 + 2*64, 0*16 + 3*64, - 3*16 + 1*64, 1*16 + 3*64, 2*16 + 3*64, 3*16 + 3*64, +static const uint8_t field_scan[16] = { + 0 + 0 * 4, 0 + 1 * 4, 1 + 0 * 4, 0 + 2 * 4, + 0 + 3 * 4, 1 + 1 * 4, 1 + 2 * 4, 1 + 3 * 4, + 2 + 0 * 4, 2 + 1 * 4, 2 + 2 * 4, 2 + 3 * 4, + 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4, }; -static const uint8_t luma_dc_field_scan[16]={ - 0*16 + 0*64, 2*16 + 0*64, 1*16 + 0*64, 0*16 + 2*64, - 2*16 + 2*64, 3*16 + 0*64, 1*16 + 2*64, 3*16 + 2*64, - 0*16 + 1*64, 2*16 + 1*64, 0*16 + 3*64, 2*16 + 3*64, - 1*16 + 1*64, 3*16 + 1*64, 1*16 + 3*64, 3*16 + 3*64, +static const uint8_t luma_dc_zigzag_scan[16] = { + 0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64, + 3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64, + 1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64, + 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64, }; -static const uint8_t chroma_dc_scan[4]={ - (0+0*2)*16, (1+0*2)*16, - (0+1*2)*16, (1+1*2)*16, +static const uint8_t luma_dc_field_scan[16] = { + 0 * 16 + 0 * 64, 2 * 16 + 0 * 64, 1 * 16 + 0 * 64, 0 * 16 + 2 * 64, + 2 * 16 + 2 * 64, 3 * 16 + 0 * 64, 1 * 16 + 2 * 64, 3 * 16 + 2 * 64, + 0 * 16 + 1 * 64, 2 * 16 + 1 * 64, 0 * 16 + 3 * 64, 2 * 16 + 3 * 64, + 1 * 16 + 1 * 64, 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 3 * 16 + 3 * 64, }; -static const uint8_t chroma422_dc_scan[8]={ - (0+0*2)*16, (0+1*2)*16, - (1+0*2)*16, (0+2*2)*16, - (0+3*2)*16, (1+1*2)*16, - (1+2*2)*16, (1+3*2)*16, +static const uint8_t chroma_dc_scan[4] = { + (0 + 0 * 2) * 16, (1 + 0 * 2) * 16, + (0 + 1 * 2) * 16, (1 + 1 * 2) * 16, +}; + +static const uint8_t chroma422_dc_scan[8] = { + (0 + 0 * 2) * 16, (0 + 1 * 2) * 16, + (1 + 0 * 2) * 16, (0 + 2 * 2) * 16, + (0 + 3 * 2) * 16, (1 + 1 * 2) * 16, + (1 + 2 * 2) * 16, (1 + 3 * 2) * 16, }; // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)] -static const uint8_t zigzag_scan8x8_cavlc[64]={ - 0+0*8, 1+1*8, 1+2*8, 2+2*8, - 4+1*8, 0+5*8, 3+3*8, 7+0*8, - 3+4*8, 1+7*8, 5+3*8, 6+3*8, - 2+7*8, 6+4*8, 5+6*8, 7+5*8, - 1+0*8, 2+0*8, 0+3*8, 3+1*8, - 3+2*8, 0+6*8, 4+2*8, 6+1*8, - 2+5*8, 2+6*8, 6+2*8, 5+4*8, - 3+7*8, 7+3*8, 4+7*8, 7+6*8, - 0+1*8, 3+0*8, 0+4*8, 4+0*8, - 2+3*8, 1+5*8, 5+1*8, 5+2*8, - 1+6*8, 3+5*8, 7+1*8, 4+5*8, - 4+6*8, 7+4*8, 5+7*8, 6+7*8, - 0+2*8, 2+1*8, 1+3*8, 5+0*8, - 1+4*8, 2+4*8, 6+0*8, 4+3*8, - 0+7*8, 4+4*8, 7+2*8, 3+6*8, - 5+5*8, 6+5*8, 6+6*8, 7+7*8, +static const uint8_t zigzag_scan8x8_cavlc[64] = { + 0 + 0 * 8, 1 + 1 * 8, 1 + 2 * 8, 2 + 2 * 8, + 4 + 1 * 8, 0 + 5 * 8, 3 + 3 * 8, 7 + 0 * 8, + 3 + 4 * 8, 1 + 7 * 8, 5 + 3 * 8, 6 + 3 * 8, + 2 + 7 * 8, 6 + 4 * 8, 5 + 6 * 8, 7 + 5 * 8, + 1 + 0 * 8, 2 + 0 * 8, 0 + 3 * 8, 3 + 1 * 8, + 3 + 2 * 8, 0 + 6 * 8, 4 + 2 * 8, 6 + 1 * 8, + 2 + 5 * 8, 2 + 6 * 8, 6 + 2 * 8, 5 + 4 * 8, + 3 + 7 * 8, 7 + 3 * 8, 4 + 7 * 8, 7 + 6 * 8, + 0 + 1 * 8, 3 + 0 * 8, 0 + 4 * 8, 4 + 0 * 8, + 2 + 3 * 8, 1 + 5 * 8, 5 + 1 * 8, 5 + 2 * 8, + 1 + 6 * 8, 3 + 5 * 8, 7 + 1 * 8, 4 + 5 * 8, + 4 + 6 * 8, 7 + 4 * 8, 5 + 7 * 8, 6 + 7 * 8, + 0 + 2 * 8, 2 + 1 * 8, 1 + 3 * 8, 5 + 0 * 8, + 1 + 4 * 8, 2 + 4 * 8, 6 + 0 * 8, 4 + 3 * 8, + 0 + 7 * 8, 4 + 4 * 8, 7 + 2 * 8, 3 + 6 * 8, + 5 + 5 * 8, 6 + 5 * 8, 6 + 6 * 8, 7 + 7 * 8, }; -static const uint8_t field_scan8x8[64]={ - 0+0*8, 0+1*8, 0+2*8, 1+0*8, - 1+1*8, 0+3*8, 0+4*8, 1+2*8, - 2+0*8, 1+3*8, 0+5*8, 0+6*8, - 0+7*8, 1+4*8, 2+1*8, 3+0*8, - 2+2*8, 1+5*8, 1+6*8, 1+7*8, - 2+3*8, 3+1*8, 4+0*8, 3+2*8, - 2+4*8, 2+5*8, 2+6*8, 2+7*8, - 3+3*8, 4+1*8, 5+0*8, 4+2*8, - 3+4*8, 3+5*8, 3+6*8, 3+7*8, - 4+3*8, 5+1*8, 6+0*8, 5+2*8, - 4+4*8, 4+5*8, 4+6*8, 4+7*8, - 5+3*8, 6+1*8, 6+2*8, 5+4*8, - 5+5*8, 5+6*8, 5+7*8, 6+3*8, - 7+0*8, 7+1*8, 6+4*8, 6+5*8, - 6+6*8, 6+7*8, 7+2*8, 7+3*8, - 7+4*8, 7+5*8, 7+6*8, 7+7*8, +static const uint8_t field_scan8x8[64] = { + 0 + 0 * 8, 0 + 1 * 8, 0 + 2 * 8, 1 + 0 * 8, + 1 + 1 * 8, 0 + 3 * 8, 0 + 4 * 8, 1 + 2 * 8, + 2 + 0 * 8, 1 + 3 * 8, 0 + 5 * 8, 0 + 6 * 8, + 0 + 7 * 8, 1 + 4 * 8, 2 + 1 * 8, 3 + 0 * 8, + 2 + 2 * 8, 1 + 5 * 8, 1 + 6 * 8, 1 + 7 * 8, + 2 + 3 * 8, 3 + 1 * 8, 4 + 0 * 8, 3 + 2 * 8, + 2 + 4 * 8, 2 + 5 * 8, 2 + 6 * 8, 2 + 7 * 8, + 3 + 3 * 8, 4 + 1 * 8, 5 + 0 * 8, 4 + 2 * 8, + 3 + 4 * 8, 3 + 5 * 8, 3 + 6 * 8, 3 + 7 * 8, + 4 + 3 * 8, 5 + 1 * 8, 6 + 0 * 8, 5 + 2 * 8, + 4 + 4 * 8, 4 + 5 * 8, 4 + 6 * 8, 4 + 7 * 8, + 5 + 3 * 8, 6 + 1 * 8, 6 + 2 * 8, 5 + 4 * 8, + 5 + 5 * 8, 5 + 6 * 8, 5 + 7 * 8, 6 + 3 * 8, + 7 + 0 * 8, 7 + 1 * 8, 6 + 4 * 8, 6 + 5 * 8, + 6 + 6 * 8, 6 + 7 * 8, 7 + 2 * 8, 7 + 3 * 8, + 7 + 4 * 8, 7 + 5 * 8, 7 + 6 * 8, 7 + 7 * 8, }; -static const uint8_t field_scan8x8_cavlc[64]={ - 0+0*8, 1+1*8, 2+0*8, 0+7*8, - 2+2*8, 2+3*8, 2+4*8, 3+3*8, - 3+4*8, 4+3*8, 4+4*8, 5+3*8, - 5+5*8, 7+0*8, 6+6*8, 7+4*8, - 0+1*8, 0+3*8, 1+3*8, 1+4*8, - 1+5*8, 3+1*8, 2+5*8, 4+1*8, - 3+5*8, 5+1*8, 4+5*8, 6+1*8, - 5+6*8, 7+1*8, 6+7*8, 7+5*8, - 0+2*8, 0+4*8, 0+5*8, 2+1*8, - 1+6*8, 4+0*8, 2+6*8, 5+0*8, - 3+6*8, 6+0*8, 4+6*8, 6+2*8, - 5+7*8, 6+4*8, 7+2*8, 7+6*8, - 1+0*8, 1+2*8, 0+6*8, 3+0*8, - 1+7*8, 3+2*8, 2+7*8, 4+2*8, - 3+7*8, 5+2*8, 4+7*8, 5+4*8, - 6+3*8, 6+5*8, 7+3*8, 7+7*8, +static const uint8_t field_scan8x8_cavlc[64] = { + 0 + 0 * 8, 1 + 1 * 8, 2 + 0 * 8, 0 + 7 * 8, + 2 + 2 * 8, 2 + 3 * 8, 2 + 4 * 8, 3 + 3 * 8, + 3 + 4 * 8, 4 + 3 * 8, 4 + 4 * 8, 5 + 3 * 8, + 5 + 5 * 8, 7 + 0 * 8, 6 + 6 * 8, 7 + 4 * 8, + 0 + 1 * 8, 0 + 3 * 8, 1 + 3 * 8, 1 + 4 * 8, + 1 + 5 * 8, 3 + 1 * 8, 2 + 5 * 8, 4 + 1 * 8, + 3 + 5 * 8, 5 + 1 * 8, 4 + 5 * 8, 6 + 1 * 8, + 5 + 6 * 8, 7 + 1 * 8, 6 + 7 * 8, 7 + 5 * 8, + 0 + 2 * 8, 0 + 4 * 8, 0 + 5 * 8, 2 + 1 * 8, + 1 + 6 * 8, 4 + 0 * 8, 2 + 6 * 8, 5 + 0 * 8, + 3 + 6 * 8, 6 + 0 * 8, 4 + 6 * 8, 6 + 2 * 8, + 5 + 7 * 8, 6 + 4 * 8, 7 + 2 * 8, 7 + 6 * 8, + 1 + 0 * 8, 1 + 2 * 8, 0 + 6 * 8, 3 + 0 * 8, + 1 + 7 * 8, 3 + 2 * 8, 2 + 7 * 8, 4 + 2 * 8, + 3 + 7 * 8, 5 + 2 * 8, 4 + 7 * 8, 5 + 4 * 8, + 6 + 3 * 8, 6 + 5 * 8, 7 + 3 * 8, 7 + 7 * 8, }; -typedef struct IMbInfo{ +typedef struct IMbInfo { uint16_t type; uint8_t pred_mode; uint8_t cbp; } IMbInfo; -static const IMbInfo i_mb_type_info[26]={ -{MB_TYPE_INTRA4x4 , -1, -1}, -{MB_TYPE_INTRA16x16, 2, 0}, -{MB_TYPE_INTRA16x16, 1, 0}, -{MB_TYPE_INTRA16x16, 0, 0}, -{MB_TYPE_INTRA16x16, 3, 0}, -{MB_TYPE_INTRA16x16, 2, 16}, -{MB_TYPE_INTRA16x16, 1, 16}, -{MB_TYPE_INTRA16x16, 0, 16}, -{MB_TYPE_INTRA16x16, 3, 16}, -{MB_TYPE_INTRA16x16, 2, 32}, -{MB_TYPE_INTRA16x16, 1, 32}, -{MB_TYPE_INTRA16x16, 0, 32}, -{MB_TYPE_INTRA16x16, 3, 32}, -{MB_TYPE_INTRA16x16, 2, 15+0}, -{MB_TYPE_INTRA16x16, 1, 15+0}, -{MB_TYPE_INTRA16x16, 0, 15+0}, -{MB_TYPE_INTRA16x16, 3, 15+0}, -{MB_TYPE_INTRA16x16, 2, 15+16}, -{MB_TYPE_INTRA16x16, 1, 15+16}, -{MB_TYPE_INTRA16x16, 0, 15+16}, -{MB_TYPE_INTRA16x16, 3, 15+16}, -{MB_TYPE_INTRA16x16, 2, 15+32}, -{MB_TYPE_INTRA16x16, 1, 15+32}, -{MB_TYPE_INTRA16x16, 0, 15+32}, -{MB_TYPE_INTRA16x16, 3, 15+32}, -{MB_TYPE_INTRA_PCM , -1, -1}, +static const IMbInfo i_mb_type_info[26] = { + { MB_TYPE_INTRA4x4, -1, -1 }, + { MB_TYPE_INTRA16x16, 2, 0 }, + { MB_TYPE_INTRA16x16, 1, 0 }, + { MB_TYPE_INTRA16x16, 0, 0 }, + { MB_TYPE_INTRA16x16, 3, 0 }, + { MB_TYPE_INTRA16x16, 2, 16 }, + { MB_TYPE_INTRA16x16, 1, 16 }, + { MB_TYPE_INTRA16x16, 0, 16 }, + { MB_TYPE_INTRA16x16, 3, 16 }, + { MB_TYPE_INTRA16x16, 2, 32 }, + { MB_TYPE_INTRA16x16, 1, 32 }, + { MB_TYPE_INTRA16x16, 0, 32 }, + { MB_TYPE_INTRA16x16, 3, 32 }, + { MB_TYPE_INTRA16x16, 2, 15 + 0 }, + { MB_TYPE_INTRA16x16, 1, 15 + 0 }, + { MB_TYPE_INTRA16x16, 0, 15 + 0 }, + { MB_TYPE_INTRA16x16, 3, 15 + 0 }, + { MB_TYPE_INTRA16x16, 2, 15 + 16 }, + { MB_TYPE_INTRA16x16, 1, 15 + 16 }, + { MB_TYPE_INTRA16x16, 0, 15 + 16 }, + { MB_TYPE_INTRA16x16, 3, 15 + 16 }, + { MB_TYPE_INTRA16x16, 2, 15 + 32 }, + { MB_TYPE_INTRA16x16, 1, 15 + 32 }, + { MB_TYPE_INTRA16x16, 0, 15 + 32 }, + { MB_TYPE_INTRA16x16, 3, 15 + 32 }, + { MB_TYPE_INTRA_PCM, -1, -1 }, }; -typedef struct PMbInfo{ +typedef struct PMbInfo { uint16_t type; uint8_t partition_count; } PMbInfo; -static const PMbInfo p_mb_type_info[5]={ -{MB_TYPE_16x16|MB_TYPE_P0L0 , 1}, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2}, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2}, -{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P1L0, 4}, -{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_REF0, 4}, +static const PMbInfo p_mb_type_info[5] = { + { MB_TYPE_16x16 | MB_TYPE_P0L0, 1 }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2 }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2 }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 4 }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_REF0, 4 }, }; -static const PMbInfo p_sub_mb_type_info[4]={ -{MB_TYPE_16x16|MB_TYPE_P0L0 , 1}, -{MB_TYPE_16x8 |MB_TYPE_P0L0 , 2}, -{MB_TYPE_8x16 |MB_TYPE_P0L0 , 2}, -{MB_TYPE_8x8 |MB_TYPE_P0L0 , 4}, +static const PMbInfo p_sub_mb_type_info[4] = { + { MB_TYPE_16x16 | MB_TYPE_P0L0, 1 }, + { MB_TYPE_16x8 | MB_TYPE_P0L0, 2 }, + { MB_TYPE_8x16 | MB_TYPE_P0L0, 2 }, + { MB_TYPE_8x8 | MB_TYPE_P0L0, 4 }, }; -static const PMbInfo b_mb_type_info[23]={ -{MB_TYPE_DIRECT2|MB_TYPE_L0L1 , 1, }, -{MB_TYPE_16x16|MB_TYPE_P0L0 , 1, }, -{MB_TYPE_16x16 |MB_TYPE_P0L1 , 1, }, -{MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1 , 1, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, }, +static const PMbInfo b_mb_type_info[23] = { + { MB_TYPE_DIRECT2 | MB_TYPE_L0L1, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L0, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L1, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1, 1, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, }, }; -static const PMbInfo b_sub_mb_type_info[13]={ -{MB_TYPE_DIRECT2 , 1, }, -{MB_TYPE_16x16|MB_TYPE_P0L0 , 1, }, -{MB_TYPE_16x16 |MB_TYPE_P0L1 , 1, }, -{MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1 , 1, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 4, }, -{MB_TYPE_8x8 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 4, }, -{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, }, +static const PMbInfo b_sub_mb_type_info[13] = { + { MB_TYPE_DIRECT2, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L0, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L1, 1, }, + { MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1, 1, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_16x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P1L0, 4, }, + { MB_TYPE_8x8 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 4, }, + { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, }, }; -static const uint8_t dequant4_coeff_init[6][3]={ - {10,13,16}, - {11,14,18}, - {13,16,20}, - {14,18,23}, - {16,20,25}, - {18,23,29}, +static const uint8_t dequant4_coeff_init[6][3] = { + { 10, 13, 16 }, + { 11, 14, 18 }, + { 13, 16, 20 }, + { 14, 18, 23 }, + { 16, 20, 25 }, + { 18, 23, 29 }, }; static const uint8_t dequant8_coeff_init_scan[16] = { - 0,3,4,3, 3,1,5,1, 4,5,2,5, 3,1,5,1 + 0, 3, 4, 3, 3, 1, 5, 1, 4, 5, 2, 5, 3, 1, 5, 1 }; -static const uint8_t dequant8_coeff_init[6][6]={ - {20,18,32,19,25,24}, - {22,19,35,21,28,26}, - {26,23,42,24,33,31}, - {28,25,45,26,35,33}, - {32,28,51,30,40,38}, - {36,32,58,34,46,43}, + +static const uint8_t dequant8_coeff_init[6][6] = { + { 20, 18, 32, 19, 25, 24 }, + { 22, 19, 35, 21, 28, 26 }, + { 26, 23, 42, 24, 33, 31 }, + { 28, 25, 45, 26, 35, 33 }, + { 32, 28, 51, 30, 40, 38 }, + { 36, 32, 58, 34, 46, 43 }, }; #endif /* AVCODEC_H264DATA_H */ diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h index d7c192012a..45f81a09c8 100644 --- a/libavcodec/h264dsp.h +++ b/libavcodec/h264dsp.h @@ -28,56 +28,90 @@ #define AVCODEC_H264DSP_H #include + #include "dsputil.h" typedef void (*h264_weight_func)(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset); -typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int height, - int log2_denom, int weightd, int weights, int offset); +typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, + int stride, int height, int log2_denom, + int weightd, int weights, int offset); /** * Context for storing H.264 DSP functions */ -typedef struct H264DSPContext{ +typedef struct H264DSPContext { /* weighted MC */ h264_weight_func weight_h264_pixels_tab[4]; h264_biweight_func biweight_h264_pixels_tab[4]; /* loop filter */ - void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, int stride, + int alpha, int beta, int8_t *tc0); /* v/h_loop_filter_luma_intra: align 16 */ - void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); - void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); - void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta); - void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); - void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); - void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); + void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, + int alpha, int beta); + void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, + int alpha, int beta); + void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix /*align 16*/, + int stride, int alpha, int beta); + void (*h264_v_loop_filter_chroma)(uint8_t *pix /*align 8*/, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_chroma)(uint8_t *pix /*align 4*/, int stride, + int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta, + int8_t *tc0); + void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta); + void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta); + void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix /*align 8*/, + int stride, int alpha, int beta); // h264_loop_filter_strength: simd only. the C version is inlined in h264.c - void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], - int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field); + void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], + int8_t ref[2][40], int16_t mv[2][40][2], + int bidir, int edges, int step, + int mask_mv0, int mask_mv1, int field); /* IDCT */ - void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); + void (*h264_idct_add)(uint8_t *dst /*align 4*/, + DCTELEM *block /*align 16*/, int stride); + void (*h264_idct8_add)(uint8_t *dst /*align 8*/, + DCTELEM *block /*align 16*/, int stride); + void (*h264_idct_dc_add)(uint8_t *dst /*align 4*/, + DCTELEM *block /*align 16*/, int stride); + void (*h264_idct8_dc_add)(uint8_t *dst /*align 8*/, + DCTELEM *block /*align 16*/, int stride); - void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); - void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); - void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); - void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); - void (*h264_luma_dc_dequant_idct)(DCTELEM *output, DCTELEM *input/*align 16*/, int qmul); + void (*h264_idct_add16)(uint8_t *dst /*align 16*/, const int *blockoffset, + DCTELEM *block /*align 16*/, int stride, + const uint8_t nnzc[15 * 8]); + void (*h264_idct8_add4)(uint8_t *dst /*align 16*/, const int *blockoffset, + DCTELEM *block /*align 16*/, int stride, + const uint8_t nnzc[15 * 8]); + void (*h264_idct_add8)(uint8_t **dst /*align 16*/, const int *blockoffset, + DCTELEM *block /*align 16*/, int stride, + const uint8_t nnzc[15 * 8]); + void (*h264_idct_add16intra)(uint8_t *dst /*align 16*/, const int *blockoffset, + DCTELEM *block /*align 16*/, + int stride, const uint8_t nnzc[15 * 8]); + void (*h264_luma_dc_dequant_idct)(DCTELEM *output, + DCTELEM *input /*align 16*/, int qmul); void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul); -}H264DSPContext; +} H264DSPContext; -void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); -void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); -void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); -void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); +void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); +void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); +void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); +void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, + const int chroma_format_idc); #endif /* AVCODEC_H264DSP_H */ diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h index 599cdb228b..d68f39bf8c 100644 --- a/libavcodec/h264pred.h +++ b/libavcodec/h264pred.h @@ -35,18 +35,18 @@ * Prediction types */ //@{ -#define VERT_PRED 0 -#define HOR_PRED 1 -#define DC_PRED 2 -#define DIAG_DOWN_LEFT_PRED 3 -#define DIAG_DOWN_RIGHT_PRED 4 -#define VERT_RIGHT_PRED 5 -#define HOR_DOWN_PRED 6 -#define VERT_LEFT_PRED 7 -#define HOR_UP_PRED 8 +#define VERT_PRED 0 +#define HOR_PRED 1 +#define DC_PRED 2 +#define DIAG_DOWN_LEFT_PRED 3 +#define DIAG_DOWN_RIGHT_PRED 4 +#define VERT_RIGHT_PRED 5 +#define HOR_DOWN_PRED 6 +#define VERT_LEFT_PRED 7 +#define HOR_UP_PRED 8 // DC edge (not for VP8) -#define LEFT_DC_PRED 9 +#define LEFT_DC_PRED 9 #define TOP_DC_PRED 10 #define DC_128_PRED 11 @@ -56,7 +56,7 @@ #define VERT_LEFT_PRED_RV40_NODOWN 14 // VP8 specific -#define TM_VP8_PRED 9 ///< "True Motion", used instead of plane +#define TM_VP8_PRED 9 ///< "True Motion", used instead of plane #define VERT_VP8_PRED 10 ///< for VP8, #VERT_PRED is the average of ///< (left col+cur col x2+right col) / 4; ///< this is the "unaveraged" one @@ -65,44 +65,53 @@ #define DC_127_PRED 12 #define DC_129_PRED 13 -#define DC_PRED8x8 0 -#define HOR_PRED8x8 1 -#define VERT_PRED8x8 2 -#define PLANE_PRED8x8 3 +#define DC_PRED8x8 0 +#define HOR_PRED8x8 1 +#define VERT_PRED8x8 2 +#define PLANE_PRED8x8 3 // DC edge -#define LEFT_DC_PRED8x8 4 -#define TOP_DC_PRED8x8 5 -#define DC_128_PRED8x8 6 +#define LEFT_DC_PRED8x8 4 +#define TOP_DC_PRED8x8 5 +#define DC_128_PRED8x8 6 // H264/SVQ3 (8x8) specific -#define ALZHEIMER_DC_L0T_PRED8x8 7 -#define ALZHEIMER_DC_0LT_PRED8x8 8 -#define ALZHEIMER_DC_L00_PRED8x8 9 +#define ALZHEIMER_DC_L0T_PRED8x8 7 +#define ALZHEIMER_DC_0LT_PRED8x8 8 +#define ALZHEIMER_DC_L00_PRED8x8 9 #define ALZHEIMER_DC_0L0_PRED8x8 10 // VP8 specific -#define DC_127_PRED8x8 7 -#define DC_129_PRED8x8 8 +#define DC_127_PRED8x8 7 +#define DC_129_PRED8x8 8 //@} /** * Context for storing H.264 prediction functions */ -typedef struct H264PredContext{ - void (*pred4x4 [9+3+3])(uint8_t *src, const uint8_t *topright, int stride);//FIXME move to dsp? - void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride); - void (*pred8x8 [4+3+4])(uint8_t *src, int stride); - void (*pred16x16[4+3+2])(uint8_t *src, int stride); +typedef struct H264PredContext { + void(*pred4x4[9 + 3 + 3])(uint8_t *src, const uint8_t *topright, int stride); //FIXME move to dsp? + void(*pred8x8l[9 + 3])(uint8_t *src, int topleft, int topright, int stride); + void(*pred8x8[4 + 3 + 4])(uint8_t *src, int stride); + void(*pred16x16[4 + 3 + 2])(uint8_t *src, int stride); - void (*pred4x4_add [2])(uint8_t *pix/*align 4*/, const DCTELEM *block/*align 16*/, int stride); - void (*pred8x8l_add [2])(uint8_t *pix/*align 8*/, const DCTELEM *block/*align 16*/, int stride); - void (*pred8x8_add [3])(uint8_t *pix/*align 8*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride); - void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride); -}H264PredContext; + void(*pred4x4_add[2])(uint8_t *pix /*align 4*/, + const DCTELEM *block /*align 16*/, int stride); + void(*pred8x8l_add[2])(uint8_t *pix /*align 8*/, + const DCTELEM *block /*align 16*/, int stride); + void(*pred8x8_add[3])(uint8_t *pix /*align 8*/, + const int *block_offset, + const DCTELEM *block /*align 16*/, int stride); + void(*pred16x16_add[3])(uint8_t *pix /*align 16*/, + const int *block_offset, + const DCTELEM *block /*align 16*/, int stride); +} H264PredContext; -void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); -void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); -void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc); +void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, + const int bit_depth, const int chroma_format_idc); #endif /* AVCODEC_H264PRED_H */ diff --git a/libavcodec/vcr1.c b/libavcodec/vcr1.c index 2a25982531..13aded9122 100644 --- a/libavcodec/vcr1.c +++ b/libavcodec/vcr1.c @@ -21,39 +21,57 @@ /** * @file - * ati vcr1 codec. + * ATI VCR1 codec */ #include "avcodec.h" #include "dsputil.h" -//#undef NDEBUG -//#include - -/* Disable the encoder. */ -#undef CONFIG_VCR1_ENCODER -#define CONFIG_VCR1_ENCODER 0 - -typedef struct VCR1Context{ - AVCodecContext *avctx; +typedef struct VCR1Context { AVFrame picture; int delta[16]; int offset[4]; } VCR1Context; -static int decode_frame(AVCodecContext *avctx, - void *data, int *data_size, - AVPacket *avpkt) +static av_cold void common_init(AVCodecContext *avctx) { - const uint8_t *buf = avpkt->data; - int buf_size = avpkt->size; - VCR1Context * const a = avctx->priv_data; - AVFrame *picture = data; - AVFrame * const p = &a->picture; - const uint8_t *bytestream= buf; + VCR1Context *const a = avctx->priv_data; + + avctx->coded_frame = &a->picture; + avcodec_get_frame_defaults(&a->picture); +} + +static av_cold int decode_init(AVCodecContext *avctx) +{ + common_init(avctx); + + avctx->pix_fmt = PIX_FMT_YUV410P; + + return 0; +} + +static av_cold int decode_end(AVCodecContext *avctx) +{ + VCR1Context *s = avctx->priv_data; + + if (s->picture.data[0]) + avctx->release_buffer(avctx, &s->picture); + + return 0; +} + +static int decode_frame(AVCodecContext *avctx, void *data, + int *data_size, AVPacket *avpkt) +{ + const uint8_t *buf = avpkt->data; + int buf_size = avpkt->size; + VCR1Context *const a = avctx->priv_data; + AVFrame *picture = data; + AVFrame *const p = &a->picture; + const uint8_t *bytestream = buf; int i, x, y; - if(p->data[0]) + if (p->data[0]) avctx->release_buffer(avctx, p); if(buf_size < 16 + avctx->height + avctx->width*avctx->height*5/8){ @@ -61,57 +79,57 @@ static int decode_frame(AVCodecContext *avctx, return AVERROR(EINVAL); } - p->reference= 0; - if(avctx->get_buffer(avctx, p) < 0){ + p->reference = 0; + if (avctx->get_buffer(avctx, p) < 0) { av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); return -1; } - p->pict_type= AV_PICTURE_TYPE_I; - p->key_frame= 1; + p->pict_type = AV_PICTURE_TYPE_I; + p->key_frame = 1; - for(i=0; i<16; i++){ - a->delta[i]= *(bytestream++); + for (i = 0; i < 16; i++) { + a->delta[i] = *bytestream++; bytestream++; } - for(y=0; yheight; y++){ + for (y = 0; y < avctx->height; y++) { int offset; - uint8_t *luma= &a->picture.data[0][ y*a->picture.linesize[0] ]; + uint8_t *luma = &a->picture.data[0][y * a->picture.linesize[0]]; - if((y&3) == 0){ - uint8_t *cb= &a->picture.data[1][ (y>>2)*a->picture.linesize[1] ]; - uint8_t *cr= &a->picture.data[2][ (y>>2)*a->picture.linesize[2] ]; + if ((y & 3) == 0) { + uint8_t *cb = &a->picture.data[1][(y >> 2) * a->picture.linesize[1]]; + uint8_t *cr = &a->picture.data[2][(y >> 2) * a->picture.linesize[2]]; - for(i=0; i<4; i++) - a->offset[i]= *(bytestream++); + for (i = 0; i < 4; i++) + a->offset[i] = *bytestream++; - offset= a->offset[0] - a->delta[ bytestream[2]&0xF ]; - for(x=0; xwidth; x+=4){ - luma[0]=( offset += a->delta[ bytestream[2]&0xF ]); - luma[1]=( offset += a->delta[ bytestream[2]>>4 ]); - luma[2]=( offset += a->delta[ bytestream[0]&0xF ]); - luma[3]=( offset += a->delta[ bytestream[0]>>4 ]); - luma += 4; + offset = a->offset[0] - a->delta[bytestream[2] & 0xF]; + for (x = 0; x < avctx->width; x += 4) { + luma[0] = offset += a->delta[bytestream[2] & 0xF]; + luma[1] = offset += a->delta[bytestream[2] >> 4]; + luma[2] = offset += a->delta[bytestream[0] & 0xF]; + luma[3] = offset += a->delta[bytestream[0] >> 4]; + luma += 4; - *(cb++) = bytestream[3]; - *(cr++) = bytestream[1]; + *cb++ = bytestream[3]; + *cr++ = bytestream[1]; - bytestream+= 4; + bytestream += 4; } - }else{ - offset= a->offset[y&3] - a->delta[ bytestream[2]&0xF ]; + } else { + offset = a->offset[y & 3] - a->delta[bytestream[2] & 0xF]; - for(x=0; xwidth; x+=8){ - luma[0]=( offset += a->delta[ bytestream[2]&0xF ]); - luma[1]=( offset += a->delta[ bytestream[2]>>4 ]); - luma[2]=( offset += a->delta[ bytestream[3]&0xF ]); - luma[3]=( offset += a->delta[ bytestream[3]>>4 ]); - luma[4]=( offset += a->delta[ bytestream[0]&0xF ]); - luma[5]=( offset += a->delta[ bytestream[0]>>4 ]); - luma[6]=( offset += a->delta[ bytestream[1]&0xF ]); - luma[7]=( offset += a->delta[ bytestream[1]>>4 ]); - luma += 8; - bytestream+= 4; + for (x = 0; x < avctx->width; x += 8) { + luma[0] = offset += a->delta[bytestream[2] & 0xF]; + luma[1] = offset += a->delta[bytestream[2] >> 4]; + luma[2] = offset += a->delta[bytestream[3] & 0xF]; + luma[3] = offset += a->delta[bytestream[3] >> 4]; + luma[4] = offset += a->delta[bytestream[0] & 0xF]; + luma[5] = offset += a->delta[bytestream[0] >> 4]; + luma[6] = offset += a->delta[bytestream[1] & 0xF]; + luma[7] = offset += a->delta[bytestream[1] >> 4]; + luma += 8; + bytestream += 4; } } } @@ -122,62 +140,6 @@ static int decode_frame(AVCodecContext *avctx, return buf_size; } -#if CONFIG_VCR1_ENCODER -static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ - VCR1Context * const a = avctx->priv_data; - AVFrame *pict = data; - AVFrame * const p = &a->picture; - int size; - - *p = *pict; - p->pict_type= AV_PICTURE_TYPE_I; - p->key_frame= 1; - - avpriv_align_put_bits(&a->pb); - while(get_bit_count(&a->pb)&31) - put_bits(&a->pb, 8, 0); - - size= get_bit_count(&a->pb)/32; - - return size*4; -} -#endif - -static av_cold void common_init(AVCodecContext *avctx){ - VCR1Context * const a = avctx->priv_data; - - avctx->coded_frame = &a->picture; - avcodec_get_frame_defaults(&a->picture); - a->avctx= avctx; -} - -static av_cold int decode_init(AVCodecContext *avctx){ - - common_init(avctx); - - avctx->pix_fmt= PIX_FMT_YUV410P; - - return 0; -} - -static av_cold int decode_end(AVCodecContext *avctx){ - VCR1Context *s = avctx->priv_data; - - if (s->picture.data[0]) - avctx->release_buffer(avctx, &s->picture); - - return 0; -} - -#if CONFIG_VCR1_ENCODER -static av_cold int encode_init(AVCodecContext *avctx){ - - common_init(avctx); - - return 0; -} -#endif - AVCodec ff_vcr1_decoder = { .name = "vcr1", .type = AVMEDIA_TYPE_VIDEO, @@ -190,14 +152,39 @@ AVCodec ff_vcr1_decoder = { .long_name = NULL_IF_CONFIG_SMALL("ATI VCR1"), }; +/* Disable the encoder. */ +#undef CONFIG_VCR1_ENCODER +#define CONFIG_VCR1_ENCODER 0 + #if CONFIG_VCR1_ENCODER +static int encode_frame(AVCodecContext *avctx, unsigned char *buf, + int buf_size, void *data) +{ + VCR1Context *const a = avctx->priv_data; + AVFrame *pict = data; + AVFrame *const p = &a->picture; + int size; + + *p = *pict; + p->pict_type = AV_PICTURE_TYPE_I; + p->key_frame = 1; + + avpriv_align_put_bits(&a->pb); + while (get_bit_count(&a->pb) & 31) + put_bits(&a->pb, 8, 0); + + size = get_bit_count(&a->pb) / 32; + + return size * 4; +} + AVCodec ff_vcr1_encoder = { .name = "vcr1", .type = AVMEDIA_TYPE_VIDEO, .id = CODEC_ID_VCR1, .priv_data_size = sizeof(VCR1Context), - .init = encode_init, + .init = common_init, .encode = encode_frame, .long_name = NULL_IF_CONFIG_SMALL("ATI VCR1"), }; -#endif +#endif /* CONFIG_VCR1_ENCODER */ diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 68df954897..e6cd161bfd 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -1792,6 +1792,22 @@ QPEL_2TAP(avg_, 16, 3dnow) QPEL_2TAP(put_, 8, 3dnow) QPEL_2TAP(avg_, 8, 3dnow) +void ff_put_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) +{ + put_pixels8_xy2_mmx(dst, src, stride, 8); +} +void ff_put_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) +{ + put_pixels16_xy2_mmx(dst, src, stride, 16); +} +void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) +{ + avg_pixels8_xy2_mmx(dst, src, stride, 8); +} +void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) +{ + avg_pixels16_xy2_mmx(dst, src, stride, 16); +} #if HAVE_YASM typedef void emu_edge_core_func(uint8_t *buf, const uint8_t *src, diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h index 6ba5ea83bc..91940f6ee8 100644 --- a/libavcodec/x86/dsputil_mmx.h +++ b/libavcodec/x86/dsputil_mmx.h @@ -199,6 +199,11 @@ void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd); void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd); +void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); +void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); +void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); +void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); + void ff_mmx_idct(DCTELEM *block); void ff_mmxext_idct(DCTELEM *block); diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm index e8acfb25fe..9b50940d4d 100644 --- a/libavcodec/x86/rv40dsp.asm +++ b/libavcodec/x86/rv40dsp.asm @@ -1,5 +1,7 @@ ;****************************************************************************** ;* MMX/SSE2-optimized functions for the RV40 decoder +;* Copyright (c) 2010 Ronald S. Bultje +;* Copyright (c) 2010 Jason Garrett-Glaser ;* Copyright (C) 2012 Christophe Gisquet ;* ;* This file is part of Libav. @@ -25,11 +27,319 @@ SECTION_RODATA align 16 -shift_round: times 8 dw 1 << (16 - 6) -cextern pw_16 +pw_1024: times 8 dw 1 << (16 - 6) ; pw_1024 + +sixtap_filter_hb_m: times 8 db 1, -5 + times 8 db 52, 20 + ; multiplied by 2 to have the same shift + times 8 db 2, -10 + times 8 db 40, 40 + ; back to normal + times 8 db 1, -5 + times 8 db 20, 52 + +sixtap_filter_v_m: times 8 dw 1 + times 8 dw -5 + times 8 dw 52 + times 8 dw 20 + ; multiplied by 2 to have the same shift + times 8 dw 2 + times 8 dw -10 + times 8 dw 40 + times 8 dw 40 + ; back to normal + times 8 dw 1 + times 8 dw -5 + times 8 dw 20 + times 8 dw 52 + +%ifdef PIC +%define sixtap_filter_hw picregq +%define sixtap_filter_hb picregq +%define sixtap_filter_v picregq +%define npicregs 1 +%else +%define sixtap_filter_hw sixtap_filter_hw_m +%define sixtap_filter_hb sixtap_filter_hb_m +%define sixtap_filter_v sixtap_filter_v_m +%define npicregs 0 +%endif + +filter_h6_shuf1: db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 +filter_h6_shuf2: db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 +filter_h6_shuf3: db 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11, 10, 12, 11 + +cextern pw_32 +cextern pw_16 +cextern pw_512 SECTION .text +;----------------------------------------------------------------------------- +; subpel MC functions: +; +; void [put|rv40]_rv40_qpel_[h|v]_(uint8_t *dst, int deststride, +; uint8_t *src, int srcstride, +; int len, int m); +;---------------------------------------------------------------------- +%macro LOAD 2 +%if WIN64 + movsxd %1q, %1d +%endif +%ifdef PIC + add %1q, picregq +%else + add %1q, %2 +%endif +%endmacro + +%macro STORE 3 +%ifidn %3, avg + movh %2, [dstq] +%endif + packuswb %1, %1 +%ifidn %3, avg +%if cpuflag(3dnow) + pavgusb %1, %2 +%else + pavgb %1, %2 +%endif +%endif + movh [dstq], %1 +%endmacro + +%macro FILTER_V 1 +cglobal %1_rv40_qpel_v, 6,6+npicregs,12, dst, dststride, src, srcstride, height, my, picreg +%ifdef PIC + lea picregq, [sixtap_filter_v_m] +%endif + pxor m7, m7 + LOAD my, sixtap_filter_v + + ; read 5 lines + sub srcq, srcstrideq + sub srcq, srcstrideq + movh m0, [srcq] + movh m1, [srcq+srcstrideq] + movh m2, [srcq+srcstrideq*2] + lea srcq, [srcq+srcstrideq*2] + add srcq, srcstrideq + movh m3, [srcq] + movh m4, [srcq+srcstrideq] + punpcklbw m0, m7 + punpcklbw m1, m7 + punpcklbw m2, m7 + punpcklbw m3, m7 + punpcklbw m4, m7 + +%ifdef m8 + mova m8, [myq+ 0] + mova m9, [myq+16] + mova m10, [myq+32] + mova m11, [myq+48] +%define COEFF05 m8 +%define COEFF14 m9 +%define COEFF2 m10 +%define COEFF3 m11 +%else +%define COEFF05 [myq+ 0] +%define COEFF14 [myq+16] +%define COEFF2 [myq+32] +%define COEFF3 [myq+48] +%endif +.nextrow: + mova m6, m1 + movh m5, [srcq+2*srcstrideq] ; read new row + paddw m6, m4 + punpcklbw m5, m7 + pmullw m6, COEFF14 + paddw m0, m5 + pmullw m0, COEFF05 + paddw m6, m0 + mova m0, m1 + paddw m6, [pw_32] + mova m1, m2 + pmullw m2, COEFF2 + paddw m6, m2 + mova m2, m3 + pmullw m3, COEFF3 + paddw m6, m3 + + ; round/clip/store + mova m3, m4 + psraw m6, 6 + mova m4, m5 + STORE m6, m5, %1 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET +%endmacro + +%macro FILTER_H 1 +cglobal %1_rv40_qpel_h, 6, 6+npicregs, 12, dst, dststride, src, srcstride, height, mx, picreg +%ifdef PIC + lea picregq, [sixtap_filter_v_m] +%endif + pxor m7, m7 + LOAD mx, sixtap_filter_v + mova m6, [pw_32] +%ifdef m8 + mova m8, [mxq+ 0] + mova m9, [mxq+16] + mova m10, [mxq+32] + mova m11, [mxq+48] +%define COEFF05 m8 +%define COEFF14 m9 +%define COEFF2 m10 +%define COEFF3 m11 +%else +%define COEFF05 [mxq+ 0] +%define COEFF14 [mxq+16] +%define COEFF2 [mxq+32] +%define COEFF3 [mxq+48] +%endif +.nextrow: + movq m0, [srcq-2] + movq m5, [srcq+3] + movq m1, [srcq-1] + movq m4, [srcq+2] + punpcklbw m0, m7 + punpcklbw m5, m7 + punpcklbw m1, m7 + punpcklbw m4, m7 + movq m2, [srcq-0] + movq m3, [srcq+1] + paddw m0, m5 + paddw m1, m4 + punpcklbw m2, m7 + punpcklbw m3, m7 + pmullw m0, COEFF05 + pmullw m1, COEFF14 + pmullw m2, COEFF2 + pmullw m3, COEFF3 + paddw m0, m6 + paddw m1, m2 + paddw m0, m3 + paddw m0, m1 + psraw m0, 6 + STORE m0, m1, %1 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +FILTER_V put +FILTER_H put + +INIT_MMX mmx2 +FILTER_V avg +FILTER_H avg + +INIT_MMX 3dnow +FILTER_V avg +FILTER_H avg +%endif + +INIT_XMM sse2 +FILTER_H put +FILTER_H avg +FILTER_V put +FILTER_V avg + +%macro FILTER_SSSE3 1 +cglobal %1_rv40_qpel_v, 6,6+npicregs,8, dst, dststride, src, srcstride, height, my, picreg +%ifdef PIC + lea picregq, [sixtap_filter_hb_m] +%endif + + ; read 5 lines + sub srcq, srcstrideq + LOAD my, sixtap_filter_hb + sub srcq, srcstrideq + movh m0, [srcq] + movh m1, [srcq+srcstrideq] + movh m2, [srcq+srcstrideq*2] + lea srcq, [srcq+srcstrideq*2] + add srcq, srcstrideq + mova m5, [myq] + movh m3, [srcq] + movh m4, [srcq+srcstrideq] + lea srcq, [srcq+2*srcstrideq] + +.nextrow: + mova m6, m2 + punpcklbw m0, m1 + punpcklbw m6, m3 + pmaddubsw m0, m5 + pmaddubsw m6, [myq+16] + movh m7, [srcq] ; read new row + paddw m6, m0 + mova m0, m1 + mova m1, m2 + mova m2, m3 + mova m3, m4 + mova m4, m7 + punpcklbw m7, m3 + pmaddubsw m7, m5 + paddw m6, m7 + pmulhrsw m6, [pw_512] + STORE m6, m7, %1 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET + +cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height, mx, picreg +%ifdef PIC + lea picregq, [sixtap_filter_hb_m] +%endif + mova m3, [filter_h6_shuf2] + mova m4, [filter_h6_shuf3] + LOAD mx, sixtap_filter_hb + mova m5, [mxq] ; set up 6tap filter in bytes + mova m6, [mxq+16] + mova m7, [filter_h6_shuf1] + +.nextrow: + movu m0, [srcq-2] + mova m1, m0 + mova m2, m0 + pshufb m0, m7 + pshufb m1, m3 + pshufb m2, m4 + pmaddubsw m0, m5 + pmaddubsw m1, m6 + pmaddubsw m2, m5 + paddw m0, m1 + paddw m0, m2 + pmulhrsw m0, [pw_512] + STORE m0, m1, %1 + + ; go to next line + add dstq, dststrideq + add srcq, srcstrideq + dec heightd ; next row + jg .nextrow + REP_RET +%endmacro + +INIT_XMM ssse3 +FILTER_SSSE3 put +FILTER_SSSE3 avg + ; %1=5bits weights?, %2=dst %3=src1 %4=src3 %5=stride if sse2 %macro RV40_WCORE 4-5 movh m4, [%3 + r6 + 0] @@ -143,7 +453,7 @@ SECTION .text %macro RV40_WEIGHT 3 cglobal rv40_weight_func_%1_%2, 6, 7, 8 %if cpuflag(ssse3) - mova m1, [shift_round] + mova m1, [pw_1024] %else mova m1, [pw_16] %endif diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c index df468aa9e5..3f42363e4e 100644 --- a/libavcodec/x86/rv40dsp_init.c +++ b/libavcodec/x86/rv40dsp_init.c @@ -22,8 +22,11 @@ /** * @file * RV40 decoder motion compensation functions x86-optimised + * 2,0 and 0,2 have h264 equivalents. + * 3,3 is bugged in the rv40 format and maps to _xy2 version */ +#include "libavcodec/x86/dsputil_mmx.h" #include "libavcodec/rv34dsp.h" void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src, @@ -53,6 +56,132 @@ DECLARE_WEIGHT(mmx) DECLARE_WEIGHT(sse2) DECLARE_WEIGHT(ssse3) +/** @{ */ +/** + * Define one qpel function. + * LOOPSIZE must be already set to the number of pixels processed per + * iteration in the inner loop of the called functions. + * COFF(x) must be already defined so as to provide the offset into any + * array of coeffs used by the called function for the qpel position x. + */ +#define QPEL_FUNC_DECL(OP, SIZE, PH, PV, OPT) \ +static void OP ## rv40_qpel ##SIZE ##_mc ##PH ##PV ##OPT(uint8_t *dst, \ + uint8_t *src, \ + int stride) \ +{ \ + int i; \ + if (PH && PV) { \ + DECLARE_ALIGNED(16, uint8_t, tmp)[SIZE * (SIZE + 5)]; \ + uint8_t *tmpptr = tmp + SIZE * 2; \ + src -= stride * 2; \ + \ + for (i = 0; i < SIZE; i += LOOPSIZE) \ + ff_put_rv40_qpel_h ##OPT(tmp + i, SIZE, src + i, stride, \ + SIZE + 5, HCOFF(PH)); \ + for (i = 0; i < SIZE; i += LOOPSIZE) \ + ff_ ##OP ##rv40_qpel_v ##OPT(dst + i, stride, tmpptr + i, \ + SIZE, SIZE, VCOFF(PV)); \ + } else if (PV) { \ + for (i = 0; i < SIZE; i += LOOPSIZE) \ + ff_ ##OP ##rv40_qpel_v ## OPT(dst + i, stride, src + i, \ + stride, SIZE, VCOFF(PV)); \ + } else { \ + for (i = 0; i < SIZE; i += LOOPSIZE) \ + ff_ ##OP ##rv40_qpel_h ## OPT(dst + i, stride, src + i, \ + stride, SIZE, HCOFF(PH)); \ + } \ +}; + +/** Declare functions for sizes 8 and 16 and given operations + * and qpel position. */ +#define QPEL_FUNCS_DECL(OP, PH, PV, OPT) \ + QPEL_FUNC_DECL(OP, 8, PH, PV, OPT) \ + QPEL_FUNC_DECL(OP, 16, PH, PV, OPT) + +/** Declare all functions for all sizes and qpel positions */ +#define QPEL_MC_DECL(OP, OPT) \ +void ff_ ##OP ##rv40_qpel_h ##OPT(uint8_t *dst, ptrdiff_t dstStride, \ + const uint8_t *src, \ + ptrdiff_t srcStride, \ + int len, int m); \ +void ff_ ##OP ##rv40_qpel_v ##OPT(uint8_t *dst, ptrdiff_t dstStride, \ + const uint8_t *src, \ + ptrdiff_t srcStride, \ + int len, int m); \ +QPEL_FUNCS_DECL(OP, 0, 1, OPT) \ +QPEL_FUNCS_DECL(OP, 0, 3, OPT) \ +QPEL_FUNCS_DECL(OP, 1, 0, OPT) \ +QPEL_FUNCS_DECL(OP, 1, 1, OPT) \ +QPEL_FUNCS_DECL(OP, 1, 2, OPT) \ +QPEL_FUNCS_DECL(OP, 1, 3, OPT) \ +QPEL_FUNCS_DECL(OP, 2, 1, OPT) \ +QPEL_FUNCS_DECL(OP, 2, 2, OPT) \ +QPEL_FUNCS_DECL(OP, 2, 3, OPT) \ +QPEL_FUNCS_DECL(OP, 3, 0, OPT) \ +QPEL_FUNCS_DECL(OP, 3, 1, OPT) \ +QPEL_FUNCS_DECL(OP, 3, 2, OPT) +/** @} */ + +#define LOOPSIZE 8 +#define HCOFF(x) (32 * (x - 1)) +#define VCOFF(x) (32 * (x - 1)) +QPEL_MC_DECL(put_, _ssse3) +QPEL_MC_DECL(avg_, _ssse3) + +#undef LOOPSIZE +#undef HCOFF +#undef VCOFF +#define LOOPSIZE 8 +#define HCOFF(x) (64 * (x - 1)) +#define VCOFF(x) (64 * (x - 1)) +QPEL_MC_DECL(put_, _sse2) +QPEL_MC_DECL(avg_, _sse2) + +#if ARCH_X86_32 +#undef LOOPSIZE +#undef HCOFF +#undef VCOFF +#define LOOPSIZE 4 +#define HCOFF(x) (64 * (x - 1)) +#define VCOFF(x) (64 * (x - 1)) + +QPEL_MC_DECL(put_, _mmx) + +#define ff_put_rv40_qpel_h_mmx2 ff_put_rv40_qpel_h_mmx +#define ff_put_rv40_qpel_v_mmx2 ff_put_rv40_qpel_v_mmx +QPEL_MC_DECL(avg_, _mmx2) + +#define ff_put_rv40_qpel_h_3dnow ff_put_rv40_qpel_h_mmx +#define ff_put_rv40_qpel_v_3dnow ff_put_rv40_qpel_v_mmx +QPEL_MC_DECL(avg_, _3dnow) +#endif + +/** @{ */ +/** Set one function */ +#define QPEL_FUNC_SET(OP, SIZE, PH, PV, OPT) \ + c-> OP ## pixels_tab[2 - SIZE / 8][4 * PV + PH] = OP ## rv40_qpel ##SIZE ## _mc ##PH ##PV ##OPT; + +/** Set functions put and avg for sizes 8 and 16 and a given qpel position */ +#define QPEL_FUNCS_SET(OP, PH, PV, OPT) \ + QPEL_FUNC_SET(OP, 8, PH, PV, OPT) \ + QPEL_FUNC_SET(OP, 16, PH, PV, OPT) + +/** Set all functions for all sizes and qpel positions */ +#define QPEL_MC_SET(OP, OPT) \ +QPEL_FUNCS_SET (OP, 0, 1, OPT) \ +QPEL_FUNCS_SET (OP, 0, 3, OPT) \ +QPEL_FUNCS_SET (OP, 1, 0, OPT) \ +QPEL_FUNCS_SET (OP, 1, 1, OPT) \ +QPEL_FUNCS_SET (OP, 1, 2, OPT) \ +QPEL_FUNCS_SET (OP, 1, 3, OPT) \ +QPEL_FUNCS_SET (OP, 2, 1, OPT) \ +QPEL_FUNCS_SET (OP, 2, 2, OPT) \ +QPEL_FUNCS_SET (OP, 2, 3, OPT) \ +QPEL_FUNCS_SET (OP, 3, 0, OPT) \ +QPEL_FUNCS_SET (OP, 3, 1, OPT) \ +QPEL_FUNCS_SET (OP, 3, 2, OPT) +/** @} */ + void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) { #if HAVE_YASM @@ -65,25 +194,42 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_mmx; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_mmx; c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_mmx; + c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_mmx; + c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_mmx; + c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_mmx; + c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_mmx; +#if ARCH_X86_32 + QPEL_MC_SET(put_, _mmx) +#endif } if (mm_flags & AV_CPU_FLAG_MMX2) { c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2; c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2; +#if ARCH_X86_32 + QPEL_MC_SET(avg_, _mmx2) +#endif } else if (mm_flags & AV_CPU_FLAG_3DNOW) { c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow; c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow; +#if ARCH_X86_32 + QPEL_MC_SET(avg_, _3dnow) +#endif } if (mm_flags & AV_CPU_FLAG_SSE2) { c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2; c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2; c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_sse2; + QPEL_MC_SET(put_, _sse2) + QPEL_MC_SET(avg_, _sse2) } if (mm_flags & AV_CPU_FLAG_SSSE3) { c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3; c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3; c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_ssse3; + QPEL_MC_SET(put_, _ssse3) + QPEL_MC_SET(avg_, _ssse3) } #endif } diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 0647772128..962dbf63a9 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -21,6 +21,7 @@ HEADERS = asrc_abuffer.h \ vsrc_buffer.h \ OBJS = allfilters.o \ + audio.o \ avfilter.o \ avfiltergraph.o \ buffersink.o \ diff --git a/libavfilter/af_aconvert.c b/libavfilter/af_aconvert.c index 8c1b5dc346..e0f347413b 100644 --- a/libavfilter/af_aconvert.c +++ b/libavfilter/af_aconvert.c @@ -144,7 +144,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamplesref AConvertContext *aconvert = inlink->dst->priv; const int n = insamplesref->audio->nb_samples; AVFilterLink *const outlink = inlink->dst->outputs[0]; - AVFilterBufferRef *outsamplesref = avfilter_get_audio_buffer(outlink, AV_PERM_WRITE, n); + AVFilterBufferRef *outsamplesref = ff_get_audio_buffer(outlink, AV_PERM_WRITE, n); swr_convert(aconvert->swr, outsamplesref->data, n, (void *)insamplesref->data, n); @@ -153,7 +153,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamplesref outsamplesref->audio->channel_layout = outlink->channel_layout; outsamplesref->audio->planar = outlink->planar; - avfilter_filter_samples(outlink, outsamplesref); + ff_filter_samples(outlink, outsamplesref); avfilter_unref_buffer(insamplesref); } diff --git a/libavfilter/af_aformat.c b/libavfilter/af_aformat.c index e7ef7f062a..ab64840c76 100644 --- a/libavfilter/af_aformat.c +++ b/libavfilter/af_aformat.c @@ -89,7 +89,7 @@ static int query_formats(AVFilterContext *ctx) static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamplesref) { - avfilter_filter_samples(inlink->dst->outputs[0], insamplesref); + ff_filter_samples(inlink->dst->outputs[0], insamplesref); } AVFilter avfilter_af_aformat = { diff --git a/libavfilter/af_amerge.c b/libavfilter/af_amerge.c index a160391a57..6d141fef8b 100644 --- a/libavfilter/af_amerge.c +++ b/libavfilter/af_amerge.c @@ -208,7 +208,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples) nb_samples = FFMIN(am->queue[0].nb_samples, am->queue[1].nb_samples); - outbuf = avfilter_get_audio_buffer(ctx->outputs[0], AV_PERM_WRITE, + outbuf = ff_get_audio_buffer(ctx->outputs[0], AV_PERM_WRITE, nb_samples); outs = outbuf->data[0]; for (i = 0; i < 2; i++) { @@ -264,7 +264,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples) am->queue[i].nb_buf * sizeof(**inbuf)); } } - avfilter_filter_samples(ctx->outputs[0], outbuf); + ff_filter_samples(ctx->outputs[0], outbuf); } AVFilter avfilter_af_amerge = { diff --git a/libavfilter/af_anull.c b/libavfilter/af_anull.c index 8419f57cfb..140ecb3759 100644 --- a/libavfilter/af_anull.c +++ b/libavfilter/af_anull.c @@ -21,6 +21,7 @@ * null audio filter */ +#include "audio.h" #include "avfilter.h" AVFilter avfilter_af_anull = { @@ -31,8 +32,8 @@ AVFilter avfilter_af_anull = { .inputs = (const AVFilterPad[]) {{ .name = "default", .type = AVMEDIA_TYPE_AUDIO, - .get_audio_buffer = avfilter_null_get_audio_buffer, - .filter_samples = avfilter_null_filter_samples }, + .get_audio_buffer = ff_null_get_audio_buffer, + .filter_samples = ff_null_filter_samples }, { .name = NULL}}, .outputs = (const AVFilterPad[]) {{ .name = "default", diff --git a/libavfilter/af_aresample.c b/libavfilter/af_aresample.c index 786fb8565b..23980d1556 100644 --- a/libavfilter/af_aresample.c +++ b/libavfilter/af_aresample.c @@ -92,7 +92,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamplesref const int n_in = insamplesref->audio->nb_samples; int n_out = n_in * aresample->ratio; AVFilterLink *const outlink = inlink->dst->outputs[0]; - AVFilterBufferRef *outsamplesref = avfilter_get_audio_buffer(outlink, AV_PERM_WRITE, n_out); + AVFilterBufferRef *outsamplesref = ff_get_audio_buffer(outlink, AV_PERM_WRITE, n_out); n_out = swr_convert(aresample->swr, outsamplesref->data, n_out, (void *)insamplesref->data, n_in); @@ -103,7 +103,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamplesref outsamplesref->pts = insamplesref->pts == AV_NOPTS_VALUE ? AV_NOPTS_VALUE : av_rescale(outlink->sample_rate, insamplesref->pts, inlink ->sample_rate); - avfilter_filter_samples(outlink, outsamplesref); + ff_filter_samples(outlink, outsamplesref); avfilter_unref_buffer(insamplesref); } diff --git a/libavfilter/af_ashowinfo.c b/libavfilter/af_ashowinfo.c index 12d0315f0e..c336c52c0b 100644 --- a/libavfilter/af_ashowinfo.c +++ b/libavfilter/af_ashowinfo.c @@ -83,7 +83,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref) av_log(ctx, AV_LOG_INFO, "]\n"); showinfo->frame++; - avfilter_filter_samples(inlink->dst->outputs[0], samplesref); + ff_filter_samples(inlink->dst->outputs[0], samplesref); } AVFilter avfilter_af_ashowinfo = { @@ -95,7 +95,7 @@ AVFilter avfilter_af_ashowinfo = { .inputs = (const AVFilterPad[]) {{ .name = "default", .type = AVMEDIA_TYPE_AUDIO, - .get_audio_buffer = avfilter_null_get_audio_buffer, + .get_audio_buffer = ff_null_get_audio_buffer, .filter_samples = filter_samples, .min_perms = AV_PERM_READ, }, { .name = NULL}}, diff --git a/libavfilter/af_asplit.c b/libavfilter/af_asplit.c index ec5032bd6c..3bdbeac19f 100644 --- a/libavfilter/af_asplit.c +++ b/libavfilter/af_asplit.c @@ -27,9 +27,9 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples) { - avfilter_filter_samples(inlink->dst->outputs[0], + ff_filter_samples(inlink->dst->outputs[0], avfilter_ref_buffer(insamples, ~AV_PERM_WRITE)); - avfilter_filter_samples(inlink->dst->outputs[1], + ff_filter_samples(inlink->dst->outputs[1], avfilter_ref_buffer(insamples, ~AV_PERM_WRITE)); avfilter_unref_buffer(insamples); } @@ -41,7 +41,7 @@ AVFilter avfilter_af_asplit = { .inputs = (const AVFilterPad[]) { { .name = "default", .type = AVMEDIA_TYPE_AUDIO, - .get_audio_buffer = avfilter_null_get_audio_buffer, + .get_audio_buffer = ff_null_get_audio_buffer, .filter_samples = filter_samples, }, { .name = NULL} }, diff --git a/libavfilter/af_astreamsync.c b/libavfilter/af_astreamsync.c index ccf55c0ce0..2e8a02cd55 100644 --- a/libavfilter/af_astreamsync.c +++ b/libavfilter/af_astreamsync.c @@ -119,7 +119,7 @@ static void send_out(AVFilterContext *ctx, int out_id) av_q2d(ctx->outputs[out_id]->time_base) * buf->pts; as->var_values[VAR_T1 + out_id] += buf->audio->nb_samples / (double)ctx->inputs[out_id]->sample_rate; - avfilter_filter_samples(ctx->outputs[out_id], buf); + ff_filter_samples(ctx->outputs[out_id], buf); queue->nb--; queue->tail = (queue->tail + 1) % QUEUE_SIZE; if (as->req[out_id]) diff --git a/libavfilter/af_earwax.c b/libavfilter/af_earwax.c index 7caeb2f6c3..da5c06eae1 100644 --- a/libavfilter/af_earwax.c +++ b/libavfilter/af_earwax.c @@ -122,7 +122,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples) AVFilterLink *outlink = inlink->dst->outputs[0]; int16_t *taps, *endin, *in, *out; AVFilterBufferRef *outsamples = - avfilter_get_audio_buffer(inlink, AV_PERM_WRITE, + ff_get_audio_buffer(inlink, AV_PERM_WRITE, insamples->audio->nb_samples); avfilter_copy_buffer_ref_props(outsamples, insamples); @@ -141,7 +141,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples) // save part of input for next round memcpy(taps, endin, NUMTAPS * sizeof(*taps)); - avfilter_filter_samples(outlink, outsamples); + ff_filter_samples(outlink, outsamples); avfilter_unref_buffer(insamples); } diff --git a/libavfilter/af_pan.c b/libavfilter/af_pan.c index 30d5d1b979..dba930e576 100644 --- a/libavfilter/af_pan.c +++ b/libavfilter/af_pan.c @@ -340,7 +340,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples) { int n = insamples->audio->nb_samples; AVFilterLink *const outlink = inlink->dst->outputs[0]; - AVFilterBufferRef *outsamples = avfilter_get_audio_buffer(outlink, AV_PERM_WRITE, n); + AVFilterBufferRef *outsamples = ff_get_audio_buffer(outlink, AV_PERM_WRITE, n); PanContext *pan = inlink->dst->priv; swr_convert(pan->swr, outsamples->data, n, (void *)insamples->data, n); @@ -348,7 +348,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples) outsamples->audio->channel_layout = outlink->channel_layout; outsamples->audio->planar = outlink->planar; - avfilter_filter_samples(outlink, outsamples); + ff_filter_samples(outlink, outsamples); avfilter_unref_buffer(insamples); } diff --git a/libavfilter/af_silencedetect.c b/libavfilter/af_silencedetect.c index 4f95b54f82..94197d7b85 100644 --- a/libavfilter/af_silencedetect.c +++ b/libavfilter/af_silencedetect.c @@ -123,7 +123,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples) } } - avfilter_filter_samples(inlink->dst->outputs[0], insamples); + ff_filter_samples(inlink->dst->outputs[0], insamples); } static int query_formats(AVFilterContext *ctx) @@ -163,7 +163,7 @@ AVFilter avfilter_af_silencedetect = { .inputs = (const AVFilterPad[]) { { .name = "default", .type = AVMEDIA_TYPE_AUDIO, - .get_audio_buffer = avfilter_null_get_audio_buffer, + .get_audio_buffer = ff_null_get_audio_buffer, .filter_samples = filter_samples, }, { .name = NULL } }, diff --git a/libavfilter/af_volume.c b/libavfilter/af_volume.c index 99ae8b82fe..881a4516dd 100644 --- a/libavfilter/af_volume.c +++ b/libavfilter/af_volume.c @@ -167,7 +167,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples) } } } - avfilter_filter_samples(outlink, insamples); + ff_filter_samples(outlink, insamples); } AVFilter avfilter_af_volume = { diff --git a/libavfilter/asrc_aevalsrc.c b/libavfilter/asrc_aevalsrc.c index 7bd6a89eb3..11750bb1ce 100644 --- a/libavfilter/asrc_aevalsrc.c +++ b/libavfilter/asrc_aevalsrc.c @@ -205,7 +205,7 @@ static int request_frame(AVFilterLink *outlink) if (eval->duration >= 0 && t > eval->duration) return AVERROR_EOF; - samplesref = avfilter_get_audio_buffer(outlink, AV_PERM_WRITE, eval->nb_samples); + samplesref = ff_get_audio_buffer(outlink, AV_PERM_WRITE, eval->nb_samples); /* evaluate expression for each single sample and for each channel */ for (i = 0; i < eval->nb_samples; i++, eval->n++) { @@ -223,7 +223,7 @@ static int request_frame(AVFilterLink *outlink) samplesref->audio->sample_rate = eval->sample_rate; eval->pts += eval->nb_samples; - avfilter_filter_samples(outlink, samplesref); + ff_filter_samples(outlink, samplesref); return 0; } diff --git a/libavfilter/asrc_anullsrc.c b/libavfilter/asrc_anullsrc.c index 288e2bf910..86497b9b9e 100644 --- a/libavfilter/asrc_anullsrc.c +++ b/libavfilter/asrc_anullsrc.c @@ -108,13 +108,13 @@ static int request_frame(AVFilterLink *outlink) AVFilterBufferRef *samplesref; samplesref = - avfilter_get_audio_buffer(outlink, AV_PERM_WRITE, null->nb_samples); + ff_get_audio_buffer(outlink, AV_PERM_WRITE, null->nb_samples); samplesref->pts = null->pts; samplesref->pos = -1; samplesref->audio->channel_layout = null->channel_layout; samplesref->audio->sample_rate = outlink->sample_rate; - avfilter_filter_samples(outlink, avfilter_ref_buffer(samplesref, ~0)); + ff_filter_samples(outlink, avfilter_ref_buffer(samplesref, ~0)); avfilter_unref_buffer(samplesref); null->pts += null->nb_samples; diff --git a/libavfilter/audio.c b/libavfilter/audio.c new file mode 100644 index 0000000000..31f6796437 --- /dev/null +++ b/libavfilter/audio.c @@ -0,0 +1,291 @@ +/* + * Copyright (c) Stefano Sabatini | stefasab at gmail.com + * Copyright (c) S.N. Hemanth Meenakshisundaram | smeenaks at ucsd.edu + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/audioconvert.h" + +#include "audio.h" +#include "avfilter.h" +#include "internal.h" + +AVFilterBufferRef *ff_null_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples) +{ + return ff_get_audio_buffer(link->dst->outputs[0], perms, nb_samples); +} + +AVFilterBufferRef *ff_default_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples) +{ + AVFilterBufferRef *samplesref = NULL; + int linesize[8] = {0}; + uint8_t *data[8] = {0}; + int ch, nb_channels = av_get_channel_layout_nb_channels(link->channel_layout); + + /* right now we don't support more than 8 channels */ + av_assert0(nb_channels <= 8); + + /* Calculate total buffer size, round to multiple of 16 to be SIMD friendly */ + if (av_samples_alloc(data, linesize, + nb_channels, nb_samples, + av_get_alt_sample_fmt(link->format, link->planar), + 16) < 0) + return NULL; + + for (ch = 1; link->planar && ch < nb_channels; ch++) + linesize[ch] = linesize[0]; + samplesref = + avfilter_get_audio_buffer_ref_from_arrays(data, linesize, perms, + nb_samples, link->format, + link->channel_layout, link->planar); + if (!samplesref) { + av_free(data[0]); + return NULL; + } + + return samplesref; +} + +static AVFilterBufferRef *ff_default_get_audio_buffer_alt(AVFilterLink *link, int perms, + int nb_samples) +{ + AVFilterBufferRef *samplesref = NULL; + uint8_t **data; + int planar = av_sample_fmt_is_planar(link->format); + int nb_channels = av_get_channel_layout_nb_channels(link->channel_layout); + int planes = planar ? nb_channels : 1; + int linesize; + + if (!(data = av_mallocz(sizeof(*data) * planes))) + goto fail; + + if (av_samples_alloc(data, &linesize, nb_channels, nb_samples, link->format, 0) < 0) + goto fail; + + samplesref = avfilter_get_audio_buffer_ref_from_arrays_alt(data, linesize, perms, + nb_samples, link->format, + link->channel_layout); + if (!samplesref) + goto fail; + + av_freep(&data); + +fail: + if (data) + av_freep(&data[0]); + av_freep(&data); + return samplesref; +} + +AVFilterBufferRef *ff_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples) +{ + AVFilterBufferRef *ret = NULL; + + if (link->dstpad->get_audio_buffer) + ret = link->dstpad->get_audio_buffer(link, perms, nb_samples); + + if (!ret) + ret = ff_default_get_audio_buffer(link, perms, nb_samples); + + if (ret) + ret->type = AVMEDIA_TYPE_AUDIO; + + return ret; +} + +AVFilterBufferRef * +avfilter_get_audio_buffer_ref_from_arrays(uint8_t *data[8], int linesize[8], int perms, + int nb_samples, enum AVSampleFormat sample_fmt, + uint64_t channel_layout, int planar) +{ + AVFilterBuffer *samples = av_mallocz(sizeof(AVFilterBuffer)); + AVFilterBufferRef *samplesref = av_mallocz(sizeof(AVFilterBufferRef)); + + if (!samples || !samplesref) + goto fail; + + samplesref->buf = samples; + samplesref->buf->free = ff_avfilter_default_free_buffer; + if (!(samplesref->audio = av_mallocz(sizeof(AVFilterBufferRefAudioProps)))) + goto fail; + + samplesref->audio->nb_samples = nb_samples; + samplesref->audio->channel_layout = channel_layout; + samplesref->audio->planar = planar; + + /* make sure the buffer gets read permission or it's useless for output */ + samplesref->perms = perms | AV_PERM_READ; + + samples->refcount = 1; + samplesref->type = AVMEDIA_TYPE_AUDIO; + samplesref->format = sample_fmt; + + memcpy(samples->data, data, sizeof(samples->data)); + memcpy(samples->linesize, linesize, sizeof(samples->linesize)); + memcpy(samplesref->data, data, sizeof(samplesref->data)); + memcpy(samplesref->linesize, linesize, sizeof(samplesref->linesize)); + + return samplesref; + +fail: + if (samplesref && samplesref->audio) + av_freep(&samplesref->audio); + av_freep(&samplesref); + av_freep(&samples); + return NULL; +} + +AVFilterBufferRef* avfilter_get_audio_buffer_ref_from_arrays_alt(uint8_t **data, + int linesize,int perms, + int nb_samples, + enum AVSampleFormat sample_fmt, + uint64_t channel_layout) +{ + int planes; + AVFilterBuffer *samples = av_mallocz(sizeof(*samples)); + AVFilterBufferRef *samplesref = av_mallocz(sizeof(*samplesref)); + + if (!samples || !samplesref) + goto fail; + + samplesref->buf = samples; + samplesref->buf->free = ff_avfilter_default_free_buffer; + if (!(samplesref->audio = av_mallocz(sizeof(*samplesref->audio)))) + goto fail; + + samplesref->audio->nb_samples = nb_samples; + samplesref->audio->channel_layout = channel_layout; + samplesref->audio->planar = av_sample_fmt_is_planar(sample_fmt); + + planes = samplesref->audio->planar ? av_get_channel_layout_nb_channels(channel_layout) : 1; + + /* make sure the buffer gets read permission or it's useless for output */ + samplesref->perms = perms | AV_PERM_READ; + + samples->refcount = 1; + samplesref->type = AVMEDIA_TYPE_AUDIO; + samplesref->format = sample_fmt; + + memcpy(samples->data, data, + FFMIN(FF_ARRAY_ELEMS(samples->data), planes)*sizeof(samples->data[0])); + memcpy(samplesref->data, samples->data, sizeof(samples->data)); + + samples->linesize[0] = samplesref->linesize[0] = linesize; + + if (planes > FF_ARRAY_ELEMS(samples->data)) { + samples-> extended_data = av_mallocz(sizeof(*samples->extended_data) * + planes); + samplesref->extended_data = av_mallocz(sizeof(*samplesref->extended_data) * + planes); + + if (!samples->extended_data || !samplesref->extended_data) + goto fail; + + memcpy(samples-> extended_data, data, sizeof(*data)*planes); + memcpy(samplesref->extended_data, data, sizeof(*data)*planes); + } else { + samples->extended_data = samples->data; + samplesref->extended_data = samplesref->data; + } + + return samplesref; + +fail: + if (samples && samples->extended_data != samples->data) + av_freep(&samples->extended_data); + if (samplesref) { + av_freep(&samplesref->audio); + if (samplesref->extended_data != samplesref->data) + av_freep(&samplesref->extended_data); + } + av_freep(&samplesref); + av_freep(&samples); + return NULL; +} + +void ff_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) +{ + ff_filter_samples(link->dst->outputs[0], samplesref); +} + +/* FIXME: samplesref is same as link->cur_buf. Need to consider removing the redundant parameter. */ +void ff_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref) +{ + AVFilterLink *outlink = NULL; + + if (inlink->dst->output_count) + outlink = inlink->dst->outputs[0]; + + if (outlink) { + outlink->out_buf = ff_default_get_audio_buffer(inlink, AV_PERM_WRITE, + samplesref->audio->nb_samples); + outlink->out_buf->pts = samplesref->pts; + outlink->out_buf->audio->sample_rate = samplesref->audio->sample_rate; + ff_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0)); + avfilter_unref_buffer(outlink->out_buf); + outlink->out_buf = NULL; + } + avfilter_unref_buffer(samplesref); + inlink->cur_buf = NULL; +} + +void ff_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) +{ + void (*filter_samples)(AVFilterLink *, AVFilterBufferRef *); + AVFilterPad *dst = link->dstpad; + int64_t pts; + + FF_DPRINTF_START(NULL, filter_samples); ff_dlog_link(NULL, link, 1); + + if (!(filter_samples = dst->filter_samples)) + filter_samples = ff_default_filter_samples; + + /* prepare to copy the samples if the buffer has insufficient permissions */ + if ((dst->min_perms & samplesref->perms) != dst->min_perms || + dst->rej_perms & samplesref->perms) { + int i, planar = av_sample_fmt_is_planar(samplesref->format); + int planes = !planar ? 1: + av_get_channel_layout_nb_channels(samplesref->audio->channel_layout); + + av_log(link->dst, AV_LOG_DEBUG, + "Copying audio data in avfilter (have perms %x, need %x, reject %x)\n", + samplesref->perms, link->dstpad->min_perms, link->dstpad->rej_perms); + + link->cur_buf = ff_default_get_audio_buffer(link, dst->min_perms, + samplesref->audio->nb_samples); + link->cur_buf->pts = samplesref->pts; + link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate; + + /* Copy actual data into new samples buffer */ + for (i = 0; samplesref->data[i] && i < 8; i++) + memcpy(link->cur_buf->data[i], samplesref->data[i], samplesref->linesize[0]); + for (i = 0; i < planes; i++) + memcpy(link->cur_buf->extended_data[i], samplesref->extended_data[i], samplesref->linesize[0]); + + avfilter_unref_buffer(samplesref); + } else + link->cur_buf = samplesref; + + pts = link->cur_buf->pts; + filter_samples(link, link->cur_buf); + ff_update_link_current_pts(link, pts); +} diff --git a/libavfilter/audio.h b/libavfilter/audio.h new file mode 100644 index 0000000000..051efb2024 --- /dev/null +++ b/libavfilter/audio.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) Stefano Sabatini | stefasab at gmail.com + * Copyright (c) S.N. Hemanth Meenakshisundaram | smeenaks at ucsd.edu + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_AUDIO_H +#define AVFILTER_AUDIO_H + +#include "avfilter.h" + + +/** default handler for get_audio_buffer() for audio inputs */ +AVFilterBufferRef *ff_default_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples); + +/** get_audio_buffer() handler for filters which simply pass audio along */ +AVFilterBufferRef *ff_null_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples); + +/** + * Request an audio samples buffer with a specific set of permissions. + * + * @param link the output link to the filter from which the buffer will + * be requested + * @param perms the required access permissions + * @param nb_samples the number of samples per channel + * @return A reference to the samples. This must be unreferenced with + * avfilter_unref_buffer when you are finished with it. + */ +AVFilterBufferRef *ff_get_audio_buffer(AVFilterLink *link, int perms, + int nb_samples); + +/** default handler for filter_samples() for audio inputs */ +void ff_default_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref); + +/** filter_samples() handler for filters which simply pass audio along */ +void ff_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref); + +/** + * Send a buffer of audio samples to the next filter. + * + * @param link the output link over which the audio samples are being sent + * @param samplesref a reference to the buffer of audio samples being sent. The + * receiving filter will free this reference when it no longer + * needs it or pass it on to the next filter. + */ +void ff_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref); + +#endif /* AVFILTER_AUDIO_H */ diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c index 89afee060f..073b259949 100644 --- a/libavfilter/avfilter.c +++ b/libavfilter/avfilter.c @@ -27,6 +27,7 @@ #include "libavutil/imgutils.h" #include "libavutil/avassert.h" #include "libavutil/avstring.h" + #include "avfilter.h" #include "internal.h" @@ -410,7 +411,7 @@ static void ff_dlog_ref(void *ctx, AVFilterBufferRef *ref, int end) av_dlog(ctx, "]%s", end ? "\n" : ""); } -static void ff_dlog_link(void *ctx, AVFilterLink *link, int end) +void ff_dlog_link(void *ctx, AVFilterLink *link, int end) { if (link->type == AVMEDIA_TYPE_VIDEO) { av_dlog(ctx, @@ -434,8 +435,6 @@ static void ff_dlog_link(void *ctx, AVFilterLink *link, int end) } } -#define FF_DPRINTF_START(ctx, func) av_dlog(NULL, "%-16s: ", #func) - AVFilterBufferRef *avfilter_get_video_buffer(AVFilterLink *link, int perms, int w, int h) { AVFilterBufferRef *ret = NULL; @@ -501,133 +500,6 @@ fail: return NULL; } -AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, - int perms, int nb_samples) -{ - AVFilterBufferRef *ret = NULL; - - if (link->dstpad->get_audio_buffer) - ret = link->dstpad->get_audio_buffer(link, perms, nb_samples); - - if (!ret) - ret = avfilter_default_get_audio_buffer(link, perms, nb_samples); - - if (ret) - ret->type = AVMEDIA_TYPE_AUDIO; - - return ret; -} - -AVFilterBufferRef * -avfilter_get_audio_buffer_ref_from_arrays(uint8_t *data[8], int linesize[8], int perms, - int nb_samples, enum AVSampleFormat sample_fmt, - uint64_t channel_layout, int planar) -{ - AVFilterBuffer *samples = av_mallocz(sizeof(AVFilterBuffer)); - AVFilterBufferRef *samplesref = av_mallocz(sizeof(AVFilterBufferRef)); - - if (!samples || !samplesref) - goto fail; - - samplesref->buf = samples; - samplesref->buf->free = ff_avfilter_default_free_buffer; - if (!(samplesref->audio = av_mallocz(sizeof(AVFilterBufferRefAudioProps)))) - goto fail; - - samplesref->audio->nb_samples = nb_samples; - samplesref->audio->channel_layout = channel_layout; - samplesref->audio->planar = planar; - - /* make sure the buffer gets read permission or it's useless for output */ - samplesref->perms = perms | AV_PERM_READ; - - samples->refcount = 1; - samplesref->type = AVMEDIA_TYPE_AUDIO; - samplesref->format = sample_fmt; - - memcpy(samples->data, data, sizeof(samples->data)); - memcpy(samples->linesize, linesize, sizeof(samples->linesize)); - memcpy(samplesref->data, data, sizeof(samplesref->data)); - memcpy(samplesref->linesize, linesize, sizeof(samplesref->linesize)); - - return samplesref; - -fail: - if (samplesref && samplesref->audio) - av_freep(&samplesref->audio); - av_freep(&samplesref); - av_freep(&samples); - return NULL; -} - -AVFilterBufferRef *avfilter_get_audio_buffer_ref_from_arrays_alt(uint8_t **data, - int linesize, int perms, - int nb_samples, - enum AVSampleFormat sample_fmt, - uint64_t channel_layout) -{ - int planes; - AVFilterBuffer *samples = av_mallocz(sizeof(*samples)); - AVFilterBufferRef *samplesref = av_mallocz(sizeof(*samplesref)); - - if (!samples || !samplesref) - goto fail; - - samplesref->buf = samples; - samplesref->buf->free = ff_avfilter_default_free_buffer; - if (!(samplesref->audio = av_mallocz(sizeof(*samplesref->audio)))) - goto fail; - - samplesref->audio->nb_samples = nb_samples; - samplesref->audio->channel_layout = channel_layout; - samplesref->audio->planar = av_sample_fmt_is_planar(sample_fmt); - - planes = samplesref->audio->planar ? av_get_channel_layout_nb_channels(channel_layout) : 1; - - /* make sure the buffer gets read permission or it's useless for output */ - samplesref->perms = perms | AV_PERM_READ; - - samples->refcount = 1; - samplesref->type = AVMEDIA_TYPE_AUDIO; - samplesref->format = sample_fmt; - - memcpy(samples->data, data, - FFMIN(FF_ARRAY_ELEMS(samples->data), planes)*sizeof(samples->data[0])); - memcpy(samplesref->data, samples->data, sizeof(samples->data)); - - samples->linesize[0] = samplesref->linesize[0] = linesize; - - if (planes > FF_ARRAY_ELEMS(samples->data)) { - samples-> extended_data = av_mallocz(sizeof(*samples->extended_data) * - planes); - samplesref->extended_data = av_mallocz(sizeof(*samplesref->extended_data) * - planes); - - if (!samples->extended_data || !samplesref->extended_data) - goto fail; - - memcpy(samples-> extended_data, data, sizeof(*data)*planes); - memcpy(samplesref->extended_data, data, sizeof(*data)*planes); - } else { - samples->extended_data = samples->data; - samplesref->extended_data = samplesref->data; - } - - return samplesref; - -fail: - if (samples && samples->extended_data != samples->data) - av_freep(&samples->extended_data); - if (samplesref) { - av_freep(&samplesref->audio); - if (samplesref->extended_data != samplesref->data) - av_freep(&samplesref->extended_data); - } - av_freep(&samplesref); - av_freep(&samples); - return NULL; -} - int avfilter_request_frame(AVFilterLink *link) { FF_DPRINTF_START(NULL, request_frame); ff_dlog_link(NULL, link, 1); @@ -657,7 +529,7 @@ int avfilter_poll_frame(AVFilterLink *link) return min; } -static void update_link_current_pts(AVFilterLink *link, int64_t pts) +void ff_update_link_current_pts(AVFilterLink *link, int64_t pts) { if (pts == AV_NOPTS_VALUE) return; @@ -706,7 +578,7 @@ void avfilter_start_frame(AVFilterLink *link, AVFilterBufferRef *picref) } start_frame(link, link->cur_buf); - update_link_current_pts(link, link->cur_buf->pts); + ff_update_link_current_pts(link, link->cur_buf->pts); } void avfilter_end_frame(AVFilterLink *link) @@ -778,44 +650,6 @@ int avfilter_process_command(AVFilterContext *filter, const char *cmd, const cha return AVERROR(ENOSYS); } -void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) -{ - void (*filter_samples)(AVFilterLink *, AVFilterBufferRef *); - AVFilterPad *dst = link->dstpad; - int i; - int64_t pts; - - FF_DPRINTF_START(NULL, filter_samples); ff_dlog_link(NULL, link, 1); - - if (!(filter_samples = dst->filter_samples)) - filter_samples = avfilter_default_filter_samples; - - /* prepare to copy the samples if the buffer has insufficient permissions */ - if ((dst->min_perms & samplesref->perms) != dst->min_perms || - dst->rej_perms & samplesref->perms) { - - av_log(link->dst, AV_LOG_DEBUG, - "Copying audio data in avfilter (have perms %x, need %x, reject %x)\n", - samplesref->perms, link->dstpad->min_perms, link->dstpad->rej_perms); - - link->cur_buf = avfilter_default_get_audio_buffer(link, dst->min_perms, - samplesref->audio->nb_samples); - link->cur_buf->pts = samplesref->pts; - link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate; - - /* Copy actual data into new samples buffer */ - for (i = 0; samplesref->data[i] && i < 8; i++) - memcpy(link->cur_buf->data[i], samplesref->data[i], samplesref->linesize[0]); - - avfilter_unref_buffer(samplesref); - } else - link->cur_buf = samplesref; - - pts = link->cur_buf->pts; - filter_samples(link, link->cur_buf); - update_link_current_pts(link, pts); -} - #define MAX_REGISTERED_AVFILTERS_NB 128 static AVFilter *registered_avfilters[MAX_REGISTERED_AVFILTERS_NB + 1]; diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h index d7d4c16ec0..9fa6ab4023 100644 --- a/libavfilter/avfilter.h +++ b/libavfilter/avfilter.h @@ -430,7 +430,8 @@ struct AVFilterPad { * * Input audio pads only. */ - AVFilterBufferRef *(*get_audio_buffer)(AVFilterLink *link, int perms, int nb_samples); + AVFilterBufferRef *(*get_audio_buffer)(AVFilterLink *link, int perms, + int nb_samples); /** * Callback called after the slices of a frame are completely sent. If @@ -508,16 +509,10 @@ void avfilter_default_draw_slice(AVFilterLink *link, int y, int h, int slice_dir /** default handler for end_frame() for video inputs */ void avfilter_default_end_frame(AVFilterLink *link); -/** default handler for filter_samples() for audio inputs */ -void avfilter_default_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref); - /** default handler for get_video_buffer() for video inputs */ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int perms, int w, int h); -/** default handler for get_audio_buffer() for audio inputs */ -AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, - int perms, int nb_samples); /** * Helpers for query_formats() which set all links to the same list of @@ -541,17 +536,10 @@ void avfilter_null_draw_slice(AVFilterLink *link, int y, int h, int slice_dir); /** end_frame() handler for filters which simply pass video along */ void avfilter_null_end_frame(AVFilterLink *link); -/** filter_samples() handler for filters which simply pass audio along */ -void avfilter_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref); - /** get_video_buffer() handler for filters which simply pass video along */ AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, int perms, int w, int h); -/** get_audio_buffer() handler for filters which simply pass audio along */ -AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link, - int perms, int nb_samples); - /** * Filter definition. This defines the pads a filter contains, and all the * callback functions used to interact with the filter. @@ -665,7 +653,7 @@ struct AVFilterLink { AVRational sample_aspect_ratio; ///< agreed upon sample aspect ratio /* These parameters apply only to audio */ uint64_t channel_layout; ///< channel layout of current buffer (see libavutil/audioconvert.h) -#if LIBAVFILTER_VERSION_MAJOR < 3 +#if FF_API_SAMPLERATE64 int64_t sample_rate; ///< samples per second #else int sample_rate; ///< samples per second @@ -790,19 +778,6 @@ AVFilterBufferRef * avfilter_get_video_buffer_ref_from_arrays(uint8_t * const data[4], const int linesize[4], int perms, int w, int h, enum PixelFormat format); -/** - * Request an audio samples buffer with a specific set of permissions. - * - * @param link the output link to the filter from which the buffer will - * be requested - * @param perms the required access permissions - * @param nb_samples the number of samples per channel - * @return A reference to the samples. This must be unreferenced with - * avfilter_unref_buffer when you are finished with it. - */ -AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms, - int nb_samples); - /** * Create an audio buffer reference wrapped around an already * allocated samples buffer. @@ -904,17 +879,7 @@ void avfilter_draw_slice(AVFilterLink *link, int y, int h, int slice_dir); */ int avfilter_process_command(AVFilterContext *filter, const char *cmd, const char *arg, char *res, int res_len, int flags); -/** - * Send a buffer of audio samples to the next filter. - * - * @param link the output link over which the audio samples are being sent - * @param samplesref a reference to the buffer of audio samples being sent. The - * receiving filter will free this reference when it no longer - * needs it or pass it on to the next filter. - */ -void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref); - -/** Initialize the filter system. Register all built-in filters. */ +/** Initialize the filter system. Register all builtin filters. */ void avfilter_register_all(void); /** Uninitialize the filter system. Unregister all filters. */ @@ -1024,4 +989,6 @@ static inline void avfilter_insert_outpad(AVFilterContext *f, unsigned index, &f->output_pads, &f->outputs, p); } +#include "audio.h" + #endif /* AVFILTER_AVFILTER_H */ diff --git a/libavfilter/defaults.c b/libavfilter/defaults.c index e57a1290aa..d2e01499be 100644 --- a/libavfilter/defaults.c +++ b/libavfilter/defaults.c @@ -23,6 +23,7 @@ #include "libavutil/audioconvert.h" #include "libavutil/imgutils.h" #include "libavutil/samplefmt.h" + #include "avfilter.h" #include "internal.h" @@ -87,38 +88,6 @@ AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link, int per return picref; } -AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link, int perms, - int nb_samples) -{ - AVFilterBufferRef *samplesref = NULL; - int linesize[8] = {0}; - uint8_t *data[8] = {0}; - int ch, nb_channels = av_get_channel_layout_nb_channels(link->channel_layout); - - /* right now we don't support more than 8 channels */ - av_assert0(nb_channels <= 8); - - /* Calculate total buffer size, round to multiple of 16 to be SIMD friendly */ - if (av_samples_alloc(data, linesize, - nb_channels, nb_samples, - av_get_alt_sample_fmt(link->format, link->planar), - 16) < 0) - return NULL; - - for (ch = 1; link->planar && ch < nb_channels; ch++) - linesize[ch] = linesize[0]; - samplesref = - avfilter_get_audio_buffer_ref_from_arrays(data, linesize, perms, - nb_samples, link->format, - link->channel_layout, link->planar); - if (!samplesref) { - av_free(data[0]); - return NULL; - } - - return samplesref; -} - void avfilter_default_start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref) { AVFilterLink *outlink = NULL; @@ -163,27 +132,6 @@ void avfilter_default_end_frame(AVFilterLink *inlink) } } -/* FIXME: samplesref is same as link->cur_buf. Need to consider removing the redundant parameter. */ -void avfilter_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref) -{ - AVFilterLink *outlink = NULL; - - if (inlink->dst->output_count) - outlink = inlink->dst->outputs[0]; - - if (outlink) { - outlink->out_buf = avfilter_default_get_audio_buffer(inlink, AV_PERM_WRITE, - samplesref->audio->nb_samples); - outlink->out_buf->pts = samplesref->pts; - outlink->out_buf->audio->sample_rate = samplesref->audio->sample_rate; - avfilter_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0)); - avfilter_unref_buffer(outlink->out_buf); - outlink->out_buf = NULL; - } - avfilter_unref_buffer(samplesref); - inlink->cur_buf = NULL; -} - static void set_common_formats(AVFilterContext *ctx, AVFilterFormats *fmts, enum AVMediaType type, int offin, int offout) { @@ -258,18 +206,7 @@ void avfilter_null_end_frame(AVFilterLink *link) avfilter_end_frame(link->dst->outputs[0]); } -void avfilter_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref) -{ - avfilter_filter_samples(link->dst->outputs[0], samplesref); -} - AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link, int perms, int w, int h) { return avfilter_get_video_buffer(link->dst->outputs[0], perms, w, h); } - -AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link, int perms, - int nb_samples) -{ - return avfilter_get_audio_buffer(link->dst->outputs[0], perms, nb_samples); -} diff --git a/libavfilter/internal.h b/libavfilter/internal.h index e8516c3ba1..09d605541f 100644 --- a/libavfilter/internal.h +++ b/libavfilter/internal.h @@ -149,4 +149,10 @@ static inline void ff_null_start_frame_keep_ref(AVFilterLink *inlink, avfilter_start_frame(inlink->dst->outputs[0], avfilter_ref_buffer(picref, ~0)); } +void ff_update_link_current_pts(AVFilterLink *link, int64_t pts); + +#define FF_DPRINTF_START(ctx, func) av_dlog(NULL, "%-16s: ", #func) + +void ff_dlog_link(void *ctx, AVFilterLink *link, int end); + #endif /* AVFILTER_INTERNAL_H */ diff --git a/libavfilter/src_buffer.c b/libavfilter/src_buffer.c index 19c49a3a9b..f6c67f4db0 100644 --- a/libavfilter/src_buffer.c +++ b/libavfilter/src_buffer.c @@ -297,7 +297,7 @@ static AVFilterBufferRef *copy_buffer_ref(AVFilterContext *ctx, break; case AVMEDIA_TYPE_AUDIO: - buf = avfilter_get_audio_buffer(outlink, AV_PERM_WRITE, + buf = ff_get_audio_buffer(outlink, AV_PERM_WRITE, ref->audio->nb_samples); channels = av_get_channel_layout_nb_channels(ref->audio->channel_layout); data_size = av_samples_get_buffer_size(NULL, channels, @@ -562,7 +562,7 @@ static int request_frame(AVFilterLink *link) avfilter_unref_buffer(buf); break; case AVMEDIA_TYPE_AUDIO: - avfilter_filter_samples(link, avfilter_ref_buffer(buf, ~0)); + ff_filter_samples(link, avfilter_ref_buffer(buf, ~0)); avfilter_unref_buffer(buf); break; default: diff --git a/libavfilter/src_movie.c b/libavfilter/src_movie.c index 1d65ade5c8..15b82ab08d 100644 --- a/libavfilter/src_movie.c +++ b/libavfilter/src_movie.c @@ -437,7 +437,7 @@ static int amovie_get_samples(AVFilterLink *outlink) if (data_size < 0) return data_size; movie->samplesref = - avfilter_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples); + ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples); memcpy(movie->samplesref->data[0], movie->frame->data[0], data_size); movie->samplesref->pts = movie->pkt.pts; movie->samplesref->pos = movie->pkt.pos; @@ -463,7 +463,7 @@ static int amovie_request_frame(AVFilterLink *outlink) return ret; } while (!movie->samplesref); - avfilter_filter_samples(outlink, avfilter_ref_buffer(movie->samplesref, ~0)); + ff_filter_samples(outlink, avfilter_ref_buffer(movie->samplesref, ~0)); avfilter_unref_buffer(movie->samplesref); movie->samplesref = NULL; diff --git a/libavfilter/version.h b/libavfilter/version.h index 1aee0374fe..82817a7115 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -47,5 +47,8 @@ #ifndef FF_API_GRAPH_AVCLASS #define FF_API_GRAPH_AVCLASS (LIBAVFILTER_VERSION_MAJOR > 2) #endif +#ifndef FF_API_SAMPLERATE64 +#define FF_API_SAMPLERATE64 (LIBAVFILTER_VERSION_MAJOR < 3) +#endif #endif // AVFILTER_VERSION_H diff --git a/libavformat/mov.c b/libavformat/mov.c index 240bccf402..9a2a59f36c 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -1051,7 +1051,7 @@ static int mov_read_glbl(MOVContext *c, AVIOContext *pb, MOVAtom atom) return AVERROR_INVALIDDATA; if (atom.size >= 10) { - // Broken files created by legacy versions of Libav and FFmpeg will + // Broken files created by legacy versions of libavformat will // wrap a whole fiel atom inside of a glbl atom. unsigned size = avio_rb32(pb); unsigned type = avio_rl32(pb); diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c index 2feb2409ba..c7db91de9e 100644 --- a/libavformat/rtmpproto.c +++ b/libavformat/rtmpproto.c @@ -44,6 +44,8 @@ #define APP_MAX_LENGTH 128 #define PLAYPATH_MAX_LENGTH 256 +#define TCURL_MAX_LENGTH 512 +#define FLASHVER_MAX_LENGTH 64 /** RTMP protocol handler state */ typedef enum { @@ -82,6 +84,9 @@ typedef struct RTMPContext { int flv_header_bytes; ///< number of initialized bytes in flv_header int nb_invokes; ///< keeps track of invoke messages int create_stream_invoke; ///< invoke id for the create stream command + char* tcurl; ///< url of the target stream + char* flashver; ///< version of the flash plugin + char* swfurl; ///< url of the swf player } RTMPContext; #define PLAYER_KEY_OPEN_PART_LEN 30 ///< length of partial key used for first client digest signing @@ -110,35 +115,34 @@ static const uint8_t rtmp_server_key[] = { /** * Generate 'connect' call and send it to the server. */ -static void gen_connect(URLContext *s, RTMPContext *rt, const char *proto, - const char *host, int port) +static void gen_connect(URLContext *s, RTMPContext *rt) { RTMPPacket pkt; - uint8_t ver[64], *p; - char tcurl[512]; + uint8_t *p; ff_rtmp_packet_create(&pkt, RTMP_SYSTEM_CHANNEL, RTMP_PT_INVOKE, 0, 4096); p = pkt.data; - ff_url_join(tcurl, sizeof(tcurl), proto, NULL, host, port, "/%s", rt->app); ff_amf_write_string(&p, "connect"); ff_amf_write_number(&p, ++rt->nb_invokes); ff_amf_write_object_start(&p); ff_amf_write_field_name(&p, "app"); ff_amf_write_string(&p, rt->app); - if (rt->is_input) { - snprintf(ver, sizeof(ver), "%s %d,%d,%d,%d", RTMP_CLIENT_PLATFORM, RTMP_CLIENT_VER1, - RTMP_CLIENT_VER2, RTMP_CLIENT_VER3, RTMP_CLIENT_VER4); - } else { - snprintf(ver, sizeof(ver), "FMLE/3.0 (compatible; %s)", LIBAVFORMAT_IDENT); + if (!rt->is_input) { ff_amf_write_field_name(&p, "type"); ff_amf_write_string(&p, "nonprivate"); } ff_amf_write_field_name(&p, "flashVer"); - ff_amf_write_string(&p, ver); + ff_amf_write_string(&p, rt->flashver); + + if (rt->swfurl) { + ff_amf_write_field_name(&p, "swfUrl"); + ff_amf_write_string(&p, rt->swfurl); + } + ff_amf_write_field_name(&p, "tcUrl"); - ff_amf_write_string(&p, tcurl); + ff_amf_write_string(&p, rt->tcurl); if (rt->is_input) { ff_amf_write_field_name(&p, "fpad"); ff_amf_write_bool(&p, 0); @@ -368,6 +372,25 @@ static void gen_server_bw(URLContext *s, RTMPContext *rt) ff_rtmp_packet_destroy(&pkt); } +/** + * Generate check bandwidth message and send it to the server. + */ +static void gen_check_bw(URLContext *s, RTMPContext *rt) +{ + RTMPPacket pkt; + uint8_t *p; + + ff_rtmp_packet_create(&pkt, RTMP_SYSTEM_CHANNEL, RTMP_PT_INVOKE, 0, 21); + + p = pkt.data; + ff_amf_write_string(&p, "_checkbw"); + ff_amf_write_number(&p, ++rt->nb_invokes); + ff_amf_write_null(&p); + + ff_rtmp_packet_write(rt->stream, &pkt, rt->chunk_size, rt->prev_pkt[1]); + ff_rtmp_packet_destroy(&pkt); +} + /** * Generate report on bytes read so far and send it to the server. */ @@ -687,6 +710,8 @@ static int rtmp_parse_result(URLContext *s, RTMPContext *rt, RTMPPacket *pkt) if (!t && !strcmp(tmpstr, "NetStream.Play.Stop")) rt->state = STATE_STOPPED; if (!t && !strcmp(tmpstr, "NetStream.Play.UnpublishNotify")) rt->state = STATE_STOPPED; if (!t && !strcmp(tmpstr, "NetStream.Publish.Start")) rt->state = STATE_PUBLISHING; + } else if (!memcmp(pkt->data, "\002\000\010onBWDone", 11)) { + gen_check_bw(s, rt); } break; } @@ -910,13 +935,31 @@ static int rtmp_open(URLContext *s, const char *uri, int flags) strncat(rt->playpath, fname, PLAYPATH_MAX_LENGTH - 5); } + if (!rt->tcurl) { + rt->tcurl = av_malloc(TCURL_MAX_LENGTH); + ff_url_join(rt->tcurl, TCURL_MAX_LENGTH, proto, NULL, hostname, + port, "/%s", rt->app); + } + + if (!rt->flashver) { + rt->flashver = av_malloc(FLASHVER_MAX_LENGTH); + if (rt->is_input) { + snprintf(rt->flashver, FLASHVER_MAX_LENGTH, "%s %d,%d,%d,%d", + RTMP_CLIENT_PLATFORM, RTMP_CLIENT_VER1, RTMP_CLIENT_VER2, + RTMP_CLIENT_VER3, RTMP_CLIENT_VER4); + } else { + snprintf(rt->flashver, FLASHVER_MAX_LENGTH, + "FMLE/3.0 (compatible; %s)", LIBAVFORMAT_IDENT); + } + } + rt->client_report_size = 1048576; rt->bytes_read = 0; rt->last_bytes_read = 0; av_log(s, AV_LOG_DEBUG, "Proto = %s, path = %s, app = %s, fname = %s\n", proto, path, rt->app, rt->playpath); - gen_connect(s, rt, proto, hostname, port); + gen_connect(s, rt); do { ret = get_packet(s, 1); @@ -1052,11 +1095,14 @@ static int rtmp_write(URLContext *s, const uint8_t *buf, int size) static const AVOption rtmp_options[] = { {"rtmp_app", "Name of application to connect to on the RTMP server", OFFSET(app), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC}, + {"rtmp_flashver", "Version of the Flash plugin used to run the SWF player.", OFFSET(flashver), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC}, {"rtmp_live", "Specify that the media is a live stream.", OFFSET(live), AV_OPT_TYPE_INT, {-2}, INT_MIN, INT_MAX, DEC, "rtmp_live"}, {"any", "both", 0, AV_OPT_TYPE_CONST, {-2}, 0, 0, DEC, "rtmp_live"}, {"live", "live stream", 0, AV_OPT_TYPE_CONST, {-1}, 0, 0, DEC, "rtmp_live"}, {"recorded", "recorded stream", 0, AV_OPT_TYPE_CONST, {0}, 0, 0, DEC, "rtmp_live"}, {"rtmp_playpath", "Stream identifier to play or to publish", OFFSET(playpath), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC}, + {"rtmp_swfurl", "URL of the SWF player. By default no value will be sent", OFFSET(swfurl), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC}, + {"rtmp_tcurl", "URL of the target stream. Defaults to rtmp://host[:port]/app.", OFFSET(tcurl), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC}, { NULL }, }; diff --git a/libavformat/sctp.c b/libavformat/sctp.c index 3823e03ebe..817b0049a9 100644 --- a/libavformat/sctp.c +++ b/libavformat/sctp.c @@ -227,7 +227,7 @@ static int sctp_open(URLContext *h, const char *uri, int flags) if (s->max_streams) { initparams.sinit_max_instreams = s->max_streams; initparams.sinit_num_ostreams = s->max_streams; - if (setsockopt(fd, SOL_SCTP, SCTP_INITMSG, &initparams, + if (setsockopt(fd, IPPROTO_SCTP, SCTP_INITMSG, &initparams, sizeof(initparams)) < 0) av_log(h, AV_LOG_ERROR, "SCTP ERROR: Unable to initialize socket max streams %d\n", diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index 809c5d1378..ba59f3314f 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -54,26 +54,24 @@ cglobal conv_fltp_to_flt_6ch, 2,8,7, dst, src, src1, src2, src3, src4, src5, len mova m3, [srcq+src3q] mova m4, [srcq+src4q] mova m5, [srcq+src5q] -%if cpuflag(sse) +%if cpuflag(sse4) SBUTTERFLYPS 0, 1, 6 SBUTTERFLYPS 2, 3, 6 SBUTTERFLYPS 4, 5, 6 - movaps m6, m4 - shufps m4, m0, q3210 + blendps m6, m4, m0, 1100b movlhps m0, m2 - movhlps m6, m2 - movaps [dstq ], m0 - movaps [dstq+16], m4 - movaps [dstq+32], m6 - - movaps m6, m5 - shufps m5, m1, q3210 + movhlps m4, m2 + blendps m2, m5, m1, 1100b movlhps m1, m3 - movhlps m6, m3 + movhlps m5, m3 + + movaps [dstq ], m0 + movaps [dstq+16], m6 + movaps [dstq+32], m4 movaps [dstq+48], m1 - movaps [dstq+64], m5 - movaps [dstq+80], m6 + movaps [dstq+64], m2 + movaps [dstq+80], m5 %else ; mmx SBUTTERFLY dq, 0, 1, 6 SBUTTERFLY dq, 2, 3, 6 @@ -100,5 +98,9 @@ cglobal conv_fltp_to_flt_6ch, 2,8,7, dst, src, src1, src2, src3, src4, src5, len INIT_MMX mmx CONV_FLTP_TO_FLT_6CH -INIT_XMM sse +INIT_XMM sse4 CONV_FLTP_TO_FLT_6CH +%if HAVE_AVX +INIT_XMM avx +CONV_FLTP_TO_FLT_6CH +%endif diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index 6883f10a21..206aede751 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -22,8 +22,9 @@ #include "libavutil/cpu.h" #include "libavresample/audio_convert.h" -extern void ff_conv_fltp_to_flt_6ch_mmx(float *dst, float *const *src, int len); -extern void ff_conv_fltp_to_flt_6ch_sse(float *dst, float *const *src, int len); +extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len); +extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len); +extern void ff_conv_fltp_to_flt_6ch_avx (float *dst, float *const *src, int len); av_cold void ff_audio_convert_init_x86(AudioConvert *ac) { @@ -34,9 +35,13 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx); } - if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { + if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, - 6, 16, 4, "SSE", ff_conv_fltp_to_flt_6ch_sse); + 6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4); + } + if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { + ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, + 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx); } #endif } diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index 8c8d4856e0..258626af21 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -42,10 +42,9 @@ %endmacro %macro SBUTTERFLYPS 3 - movaps m%3, m%1 - unpcklps m%1, m%2 - unpckhps m%3, m%2 - SWAP %2, %3 + unpcklps m%3, m%1, m%2 + unpckhps m%1, m%1, m%2 + SWAP %1, %3, %2 %endmacro %macro TRANSPOSE4x4B 5