diff --git a/ffmpeg.c b/ffmpeg.c index 0724984678..8476a0b03d 100644 --- a/ffmpeg.c +++ b/ffmpeg.c @@ -119,8 +119,10 @@ static int use_obmc = 0; static int use_aic = 0; static int use_aiv = 0; static int use_umv = 0; +static int use_alt_scan = 0; static int do_deinterlace = 0; -static int do_interlace = 0; +static int do_interlace_dct = 0; +static int do_interlace_me = 0; static int workaround_bugs = FF_BUG_AUTODETECT; static int error_resilience = 2; static int error_concealment = 3; @@ -130,6 +132,8 @@ static int use_part = 0; static int packet_size = 0; static int error_rate = 0; static int strict = 0; +static int top_field_first = -1; +static int noise_reduction = 0; static int debug = 0; static int debug_mv = 0; extern int loop_input; /* currently a hack */ @@ -635,7 +639,12 @@ static void do_video_out(AVFormatContext *s, /* better than nothing: use input picture interlaced settings */ big_picture.interlaced_frame = in_picture->interlaced_frame; - big_picture.top_field_first = in_picture->top_field_first; + if(do_interlace_me || do_interlace_dct){ + if(top_field_first == -1) + big_picture.top_field_first = in_picture->top_field_first; + else + big_picture.top_field_first = 1; + } /* handles sameq here. This is not correct because it may not be a global option */ @@ -1946,6 +1955,16 @@ static void opt_strict(const char *arg) strict= atoi(arg); } +static void opt_top_field_first(const char *arg) +{ + top_field_first= atoi(arg); +} + +static void opt_noise_reduction(const char *arg) +{ + noise_reduction= atoi(arg); +} + static void opt_audio_bitrate(const char *arg) { audio_bit_rate = atoi(arg) * 1000; @@ -2373,14 +2392,20 @@ static void opt_output_file(const char *filename) if(use_part) { video_enc->flags |= CODEC_FLAG_PART; } + if (use_alt_scan) { + video_enc->flags |= CODEC_FLAG_ALT_SCAN; + } if (b_frames) { video_enc->max_b_frames = b_frames; video_enc->b_frame_strategy = 0; video_enc->b_quant_factor = 2.0; } - if (do_interlace) { + if (do_interlace_dct) { video_enc->flags |= CODEC_FLAG_INTERLACED_DCT; } + if (do_interlace_me) { + video_enc->flags |= CODEC_FLAG_INTERLACED_ME; + } video_enc->qmin = video_qmin; video_enc->qmax = video_qmax; video_enc->mb_qmin = video_mb_qmin; @@ -2430,6 +2455,7 @@ static void opt_output_file(const char *filename) video_enc->idct_algo = idct_algo; video_enc->strict_std_compliance = strict; video_enc->error_rate = error_rate; + video_enc->noise_reduction= noise_reduction; if(packet_size){ video_enc->rtp_mode= 1; video_enc->rtp_payload_size= packet_size; @@ -2992,16 +3018,21 @@ const OptionDef options[] = { { "passlogfile", HAS_ARG | OPT_STRING | OPT_VIDEO, {(void*)&pass_logfilename}, "select two pass log file name", "file" }, { "deinterlace", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_deinterlace}, "deinterlace pictures" }, - { "interlace", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_interlace}, - "force interlacing support in encoder (MPEG2/MPEG4)" }, + { "ildct", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_interlace_dct}, + "force interlaced dct support in encoder (MPEG2/MPEG4)" }, + { "ilme", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_interlace_me}, + "force interlacied me support in encoder MPEG2" }, { "psnr", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_psnr}, "calculate PSNR of compressed frames" }, { "vstats", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_vstats}, "dump video coding statistics to file" }, { "vhook", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)add_frame_hooker}, "insert video processing module", "module" }, { "aic", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_aic}, "enable Advanced intra coding (h263+)" }, { "aiv", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_aiv}, "enable Alternative inter vlc (h263+)" }, { "umv", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_umv}, "enable Unlimited Motion Vector (h263+)" }, + { "alt", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_alt_scan}, "enable alternate scantable (mpeg2)" }, { "intra_matrix", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_intra_matrix}, "specify intra matrix coeffs", "matrix" }, { "inter_matrix", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_inter_matrix}, "specify inter matrix coeffs", "matrix" }, + { "top", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_top_field_first}, "top=1/bottom=0/auto=-1 field first", "" }, + { "nr", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_noise_reduction}, "noise reduction", "" }, /* audio options */ { "ab", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_bitrate}, "set audio bitrate (in kbit/s)", "bitrate", }, diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c index 82ff7db668..496f461203 100644 --- a/libavcodec/alpha/dsputil_alpha.c +++ b/libavcodec/alpha/dsputil_alpha.c @@ -39,11 +39,11 @@ void get_pixels_mvi(DCTELEM *restrict block, const uint8_t *restrict pixels, int line_size); void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride); -int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); +int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size); -int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); -int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); -int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); +int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); +int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); +int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); #if 0 /* These functions were the base for the optimized assembler routines, @@ -290,11 +290,6 @@ static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride) return pix_abs16x16_mvi_asm(a, b, stride); } -static int sad8x8_mvi(void *s, uint8_t *a, uint8_t *b, int stride) -{ - return pix_abs8x8_mvi(a, b, stride); -} - void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) { c->put_pixels_tab[0][0] = put_pixels16_axp_asm; @@ -347,12 +342,13 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) c->get_pixels = get_pixels_mvi; c->diff_pixels = diff_pixels_mvi; c->sad[0] = sad16x16_mvi; - c->sad[1] = sad8x8_mvi; - c->pix_abs8x8 = pix_abs8x8_mvi; - c->pix_abs16x16 = pix_abs16x16_mvi_asm; - c->pix_abs16x16_x2 = pix_abs16x16_x2_mvi; - c->pix_abs16x16_y2 = pix_abs16x16_y2_mvi; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mvi; + c->sad[1] = pix_abs8x8_mvi; +// c->pix_abs[0][0] = pix_abs16x16_mvi_asm; //FIXME function arguments for the asm must be fixed + c->pix_abs[0][0] = sad16x16_mvi; + c->pix_abs[1][0] = pix_abs8x8_mvi; + c->pix_abs[0][1] = pix_abs16x16_x2_mvi; + c->pix_abs[0][2] = pix_abs16x16_y2_mvi; + c->pix_abs[0][3] = pix_abs16x16_xy2_mvi; } put_pixels_clamped_axp_p = c->put_pixels_clamped; diff --git a/libavcodec/alpha/motion_est_alpha.c b/libavcodec/alpha/motion_est_alpha.c index 804e1d2b6b..8b8a0a25c5 100644 --- a/libavcodec/alpha/motion_est_alpha.c +++ b/libavcodec/alpha/motion_est_alpha.c @@ -84,10 +84,9 @@ static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) return r1 + r2; } -int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 8; if ((size_t) pix2 & 0x7) { /* works only when pix2 is actually unaligned */ @@ -160,10 +159,9 @@ int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) } #endif -int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 16; uint64_t disalign = (size_t) pix2 & 0x7; switch (disalign) { @@ -234,10 +232,9 @@ int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) return result; } -int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 16; if ((size_t) pix2 & 0x7) { uint64_t t, p2_l, p2_r; @@ -288,10 +285,9 @@ int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) return result; } -int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 16; uint64_t p1_l, p1_r; uint64_t p2_l, p2_r, p2_x; diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index ef9950bf43..7d9f8c46ed 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -17,7 +17,7 @@ extern "C" { #define FFMPEG_VERSION_INT 0x000408 #define FFMPEG_VERSION "0.4.8" -#define LIBAVCODEC_BUILD 4697 +#define LIBAVCODEC_BUILD 4698 #define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT #define LIBAVCODEC_VERSION FFMPEG_VERSION @@ -263,7 +263,8 @@ static const __attribute__((unused)) int Motion_Est_QTab[] = #define CODEC_FLAG_H263P_AIV 0x00000008 ///< H263 Alternative inter vlc #define CODEC_FLAG_OBMC 0x00000001 ///< OBMC #define CODEC_FLAG_LOOP_FILTER 0x00000800 ///< loop filter -#define CODEC_FLAG_H263P_SLICE_STRUCT 0x10000000 +#define CODEC_FLAG_H263P_SLICE_STRUCT 0x10000000 +#define CODEC_FLAG_INTERLACED_ME 0x20000000 ///< interlaced motion estimation /* Unsupported options : * Syntax Arithmetic coding (SAC) * Reference Picture Selection diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index e516d7ee99..bc2ef8cf07 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -218,13 +218,13 @@ static void bswap_buf(uint32_t *dst, uint32_t *src, int w){ } } -static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) +static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { int s, i; uint32_t *sq = squareTbl + 256; s = 0; - for (i = 0; i < 8; i++) { + for (i = 0; i < h; i++) { s += sq[pix1[0] - pix2[0]]; s += sq[pix1[1] - pix2[1]]; s += sq[pix1[2] - pix2[2]]; @@ -239,13 +239,13 @@ static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) return s; } -static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size) +static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int s, i; uint32_t *sq = squareTbl + 256; s = 0; - for (i = 0; i < 16; i++) { + for (i = 0; i < h; i++) { s += sq[pix1[ 0] - pix2[ 0]]; s += sq[pix1[ 1] - pix2[ 1]]; s += sq[pix1[ 2] - pix2[ 2]]; @@ -2331,12 +2331,12 @@ static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ } } -static inline int pix_abs16x16_c(uint8_t *pix1, uint8_t *pix2, int line_size) +static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int s, i; s = 0; - for(i=0;i<16;i++) { + for(i=0;idsp.diff_pixels(temp, src1, src2, stride); s->dsp.fdct(temp); @@ -2752,13 +2748,14 @@ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2 void simple_idct(DCTELEM *block); //FIXME -static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ +static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ MpegEncContext * const s= (MpegEncContext *)c; uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8]; DCTELEM * const temp= (DCTELEM*)aligned_temp; DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; int sum=0, i; + assert(h==8); s->mb_intra=0; s->dsp.diff_pixels(temp, src1, src2, stride); @@ -2775,7 +2772,7 @@ static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *s return sum; } -static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ +static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ MpegEncContext * const s= (MpegEncContext *)c; const uint8_t *scantable= s->intra_scantable.permutated; uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; @@ -2787,6 +2784,8 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int uint8_t * length; uint8_t * last_length; + assert(h==8); + for(i=0; i<8; i++){ ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; @@ -2847,12 +2846,12 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int s->dsp.idct_add(bak, stride, temp); - distoration= s->dsp.sse[1](NULL, bak, src1, stride); + distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8); return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7); } -static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ +static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ MpegEncContext * const s= (MpegEncContext *)c; const uint8_t *scantable= s->intra_scantable.permutated; uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; @@ -2861,6 +2860,8 @@ static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in const int esc_length= s->ac_esc_length; uint8_t * length; uint8_t * last_length; + + assert(h==8); s->dsp.diff_pixels(temp, src1, src2, stride); @@ -2910,12 +2911,11 @@ static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in return bits; } - -WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c) -WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c) -WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c) -WARPER88_1616(rd8x8_c, rd16x16_c) -WARPER88_1616(bit8x8_c, bit16x16_c) +WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) +WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) +WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) +WARPER8_16_SQ(rd8x8_c, rd16_c) +WARPER8_16_SQ(bit8x8_c, bit16_c) /* XXX: those functions should be suppressed ASAP when all IDCTs are converted */ @@ -2989,18 +2989,16 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->clear_blocks = clear_blocks_c; c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; - c->sse[0]= sse16_c; - c->sse[1]= sse8_c; /* TODO [0] 16 [1] 8 */ - c->pix_abs16x16 = pix_abs16x16_c; - c->pix_abs16x16_x2 = pix_abs16x16_x2_c; - c->pix_abs16x16_y2 = pix_abs16x16_y2_c; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c; - c->pix_abs8x8 = pix_abs8x8_c; - c->pix_abs8x8_x2 = pix_abs8x8_x2_c; - c->pix_abs8x8_y2 = pix_abs8x8_y2_c; - c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c; + c->pix_abs[0][0] = pix_abs16_c; + c->pix_abs[0][1] = pix_abs16_x2_c; + c->pix_abs[0][2] = pix_abs16_y2_c; + c->pix_abs[0][3] = pix_abs16_xy2_c; + c->pix_abs[1][0] = pix_abs8_c; + c->pix_abs[1][1] = pix_abs8_x2_c; + c->pix_abs[1][2] = pix_abs8_y2_c; + c->pix_abs[1][3] = pix_abs8_xy2_c; #define dspfunc(PFX, IDX, NUM) \ c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ @@ -3097,24 +3095,21 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; - c->hadamard8_diff[0]= hadamard8_diff16_c; - c->hadamard8_diff[1]= hadamard8_diff_c; c->hadamard8_abs = hadamard8_abs_c; - - c->dct_sad[0]= dct_sad16x16_c; - c->dct_sad[1]= dct_sad8x8_c; - - c->sad[0]= sad16x16_c; - c->sad[1]= sad8x8_c; - - c->quant_psnr[0]= quant_psnr16x16_c; - c->quant_psnr[1]= quant_psnr8x8_c; - c->rd[0]= rd16x16_c; - c->rd[1]= rd8x8_c; - - c->bit[0]= bit16x16_c; - c->bit[1]= bit8x8_c; +#define SET_CMP_FUNC(name) \ + c->name[0]= name ## 16_c;\ + c->name[1]= name ## 8x8_c; + + SET_CMP_FUNC(hadamard8_diff) + SET_CMP_FUNC(dct_sad) + c->sad[0]= pix_abs16_c; + c->sad[1]= pix_abs8_c; + c->sse[0]= sse16_c; + c->sse[1]= sse8_c; + SET_CMP_FUNC(quant_psnr) + SET_CMP_FUNC(rd) + SET_CMP_FUNC(bit) c->add_bytes= add_bytes_c; c->diff_bytes= diff_bytes_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 79b6c59c70..0bd85e19ec 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -110,9 +110,7 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ /* motion estimation */ -typedef int (*op_pixels_abs_func)(uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/; - -typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/; +typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/; /** @@ -136,19 +134,21 @@ typedef struct DSPContext { void (*clear_blocks)(DCTELEM *blocks/*align 16*/); int (*pix_sum)(uint8_t * pix, int line_size); int (*pix_norm1)(uint8_t * pix, int line_size); - me_cmp_func sad[2]; /* identical to pix_absAxA except additional void * */ - me_cmp_func sse[2]; - me_cmp_func hadamard8_diff[2]; - me_cmp_func dct_sad[2]; - me_cmp_func quant_psnr[2]; - me_cmp_func bit[2]; - me_cmp_func rd[2]; +// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4 + + me_cmp_func sad[4]; /* identical to pix_absAxA except additional void * */ + me_cmp_func sse[4]; + me_cmp_func hadamard8_diff[4]; + me_cmp_func dct_sad[4]; + me_cmp_func quant_psnr[4]; + me_cmp_func bit[4]; + me_cmp_func rd[4]; int (*hadamard8_abs )(uint8_t *src, int stride, int mean); - me_cmp_func me_pre_cmp[11]; - me_cmp_func me_cmp[11]; - me_cmp_func me_sub_cmp[11]; - me_cmp_func mb_cmp[11]; + me_cmp_func me_pre_cmp[5]; + me_cmp_func me_cmp[5]; + me_cmp_func me_sub_cmp[5]; + me_cmp_func mb_cmp[5]; /* maybe create an array for 16/8/4/2 functions */ /** @@ -226,14 +226,7 @@ typedef struct DSPContext { qpel_mc_func put_h264_qpel_pixels_tab[3][16]; qpel_mc_func avg_h264_qpel_pixels_tab[3][16]; - op_pixels_abs_func pix_abs16x16; - op_pixels_abs_func pix_abs16x16_x2; - op_pixels_abs_func pix_abs16x16_y2; - op_pixels_abs_func pix_abs16x16_xy2; - op_pixels_abs_func pix_abs8x8; - op_pixels_abs_func pix_abs8x8_x2; - op_pixels_abs_func pix_abs8x8_y2; - op_pixels_abs_func pix_abs8x8_xy2; + me_cmp_func pix_abs[2][4]; /* huffyuv specific */ void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); @@ -484,12 +477,24 @@ void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input, FFTSample *tmp); void ff_mdct_end(MDCTContext *s); -#define WARPER88_1616(name8, name16)\ -static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride){\ - return name8(s, dst , src , stride)\ - +name8(s, dst+8 , src+8 , stride)\ - +name8(s, dst +8*stride, src +8*stride, stride)\ - +name8(s, dst+8+8*stride, src+8+8*stride, stride);\ +#define WARPER8_16(name8, name16)\ +static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ + return name8(s, dst , src , stride, h)\ + +name8(s, dst+8 , src+8 , stride, h);\ +} + +#define WARPER8_16_SQ(name8, name16)\ +static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ + int score=0;\ + score +=name8(s, dst , src , stride, 8);\ + score +=name8(s, dst+8 , src+8 , stride, 8);\ + if(h==16){\ + dst += 8*stride;\ + src += 8*stride;\ + score +=name8(s, dst , src , stride, 8);\ + score +=name8(s, dst+8 , src+8 , stride, 8);\ + }\ + return score;\ } #ifndef HAVE_LRINTF diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c index fd39926b01..ee3b2816af 100644 --- a/libavcodec/error_resilience.c +++ b/libavcodec/error_resilience.c @@ -582,8 +582,8 @@ static int is_intra_more_likely(MpegEncContext *s){ uint8_t *mb_ptr = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize; uint8_t *last_mb_ptr= s->last_picture.data [0] + mb_x*16 + mb_y*16*s->linesize; - is_intra_likely += s->dsp.pix_abs16x16(last_mb_ptr, mb_ptr , s->linesize); - is_intra_likely -= s->dsp.pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize); + is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16); + is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16); }else{ if(IS_INTRA(s->current_picture.mb_type[mb_xy])) is_intra_likely++; diff --git a/libavcodec/h263.c b/libavcodec/h263.c index 916cb764a5..f2ab381c1f 100644 --- a/libavcodec/h263.c +++ b/libavcodec/h263.c @@ -479,9 +479,9 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){ for(i=1; imb_num; i++){ int mb_xy= s->mb_index2xy[i]; - if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&MB_TYPE_INTER4V)){ - s->mb_type[mb_xy]&= ~MB_TYPE_INTER4V; - s->mb_type[mb_xy]|= MB_TYPE_INTER; + if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTER4V)){ + s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_INTER4V; + s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_INTER; } } @@ -508,9 +508,9 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){ for(i=1; imb_num; i++){ int mb_xy= s->mb_index2xy[i]; - if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&MB_TYPE_DIRECT)){ - s->mb_type[mb_xy]&= ~MB_TYPE_DIRECT; - s->mb_type[mb_xy]|= MB_TYPE_BIDIR; + if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_DIRECT)){ + s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_DIRECT; + s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_BIDIR; } } } @@ -523,7 +523,7 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){ */ int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){ const int mb_index= s->mb_x + s->mb_y*s->mb_stride; - const int colocated_mb_type= s->next_picture.mb_type[mb_index]; //FIXME or next? + const int colocated_mb_type= s->next_picture.mb_type[mb_index]; int xy= s->block_index[0]; uint16_t time_pp= s->pp_time; uint16_t time_pb= s->pb_time; @@ -547,18 +547,18 @@ int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){ s->mv_type = MV_TYPE_FIELD; for(i=0; i<2; i++){ if(s->top_field_first){ - time_pp= s->pp_field_time - s->field_select_table[mb_index][i] + i; - time_pb= s->pb_field_time - s->field_select_table[mb_index][i] + i; + time_pp= s->pp_field_time - s->p_field_select_table[i][mb_index] + i; + time_pb= s->pb_field_time - s->p_field_select_table[i][mb_index] + i; }else{ - time_pp= s->pp_field_time + s->field_select_table[mb_index][i] - i; - time_pb= s->pb_field_time + s->field_select_table[mb_index][i] - i; + time_pp= s->pp_field_time + s->p_field_select_table[i][mb_index] - i; + time_pb= s->pb_field_time + s->p_field_select_table[i][mb_index] - i; } - s->mv[0][i][0] = s->field_mv_table[mb_index][i][0]*time_pb/time_pp + mx; - s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my; - s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0] - : s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp; - s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1] - : s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp; + s->mv[0][i][0] = s->p_field_mv_table[i][0][mb_index][0]*time_pb/time_pp + mx; + s->mv[0][i][1] = s->p_field_mv_table[i][0][mb_index][1]*time_pb/time_pp + my; + s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->p_field_mv_table[i][0][mb_index][0] + : s->p_field_mv_table[i][0][mb_index][0]*(time_pb - time_pp)/time_pp; + s->mv[1][i][1] = my ? s->mv[0][i][1] - s->p_field_mv_table[i][0][mb_index][1] + : s->p_field_mv_table[i][0][mb_index][1]*(time_pb - time_pp)/time_pp; } return MB_TYPE_DIRECT2 | MB_TYPE_16x8 | MB_TYPE_L0L1 | MB_TYPE_INTERLACED; }else{ @@ -598,9 +598,9 @@ void ff_h263_update_motion_val(MpegEncContext * s){ motion_y = s->mv[0][0][1] + s->mv[0][1][1]; motion_x = (motion_x>>1) | (motion_x&1); for(i=0; i<2; i++){ - s->field_mv_table[mb_xy][i][0]= s->mv[0][i][0]; - s->field_mv_table[mb_xy][i][1]= s->mv[0][i][1]; - s->field_select_table[mb_xy][i]= s->field_select[0][i]; + s->p_field_mv_table[i][0][mb_xy][0]= s->mv[0][i][0]; + s->p_field_mv_table[i][0][mb_xy][1]= s->mv[0][i][1]; + s->p_field_select_table[i][mb_xy]= s->field_select[0][i]; } } @@ -744,12 +744,14 @@ void mpeg4_encode_mb(MpegEncContext * s, if(s->pict_type==B_TYPE){ static const int mb_type_table[8]= {-1, 2, 3, 1,-1,-1,-1, 0}; /* convert from mv_dir to type */ int mb_type= mb_type_table[s->mv_dir]; - + if(s->mb_x==0){ - s->last_mv[0][0][0]= - s->last_mv[0][0][1]= - s->last_mv[1][0][0]= - s->last_mv[1][0][1]= 0; + for(i=0; i<2; i++){ + s->last_mv[i][0][0]= + s->last_mv[i][0][1]= + s->last_mv[i][1][0]= + s->last_mv[i][1][1]= 0; + } } assert(s->dquant>=-2 && s->dquant<=2); @@ -803,50 +805,64 @@ void mpeg4_encode_mb(MpegEncContext * s, if(cbp) put_bits(&s->pb, 1, s->interlaced_dct); if(mb_type) // not diect mode - put_bits(&s->pb, 1, 0); // no interlaced ME yet + put_bits(&s->pb, 1, s->mv_type == MV_TYPE_FIELD); } if(interleaved_stats){ s->misc_bits+= get_bits_diff(s); } - switch(mb_type) - { - case 0: /* direct */ + if(mb_type == 0){ + assert(s->mv_dir & MV_DIRECT); h263_encode_motion(s, motion_x, 1); h263_encode_motion(s, motion_y, 1); s->b_count++; s->f_count++; - break; - case 1: /* bidir */ - h263_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code); - h263_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); - h263_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code); - h263_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); - s->last_mv[0][0][0]= s->mv[0][0][0]; - s->last_mv[0][0][1]= s->mv[0][0][1]; - s->last_mv[1][0][0]= s->mv[1][0][0]; - s->last_mv[1][0][1]= s->mv[1][0][1]; - s->b_count++; - s->f_count++; - break; - case 2: /* backward */ - h263_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code); - h263_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code); - s->last_mv[1][0][0]= motion_x; - s->last_mv[1][0][1]= motion_y; - s->b_count++; - break; - case 3: /* forward */ - h263_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code); - h263_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code); - s->last_mv[0][0][0]= motion_x; - s->last_mv[0][0][1]= motion_y; - s->f_count++; - break; - default: - av_log(s->avctx, AV_LOG_ERROR, "unknown mb type\n"); - return; + }else{ + assert(mb_type > 0 && mb_type < 4); + if(s->mv_type != MV_TYPE_FIELD){ + if(s->mv_dir & MV_DIR_FORWARD){ + h263_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code); + h263_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); + s->last_mv[0][0][0]= s->last_mv[0][1][0]= s->mv[0][0][0]; + s->last_mv[0][0][1]= s->last_mv[0][1][1]= s->mv[0][0][1]; + s->f_count++; + } + if(s->mv_dir & MV_DIR_BACKWARD){ + h263_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code); + h263_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); + s->last_mv[1][0][0]= s->last_mv[1][1][0]= s->mv[1][0][0]; + s->last_mv[1][0][1]= s->last_mv[1][1][1]= s->mv[1][0][1]; + s->b_count++; + } + }else{ + if(s->mv_dir & MV_DIR_FORWARD){ + put_bits(&s->pb, 1, s->field_select[0][0]); + put_bits(&s->pb, 1, s->field_select[0][1]); + } + if(s->mv_dir & MV_DIR_BACKWARD){ + put_bits(&s->pb, 1, s->field_select[1][0]); + put_bits(&s->pb, 1, s->field_select[1][1]); + } + if(s->mv_dir & MV_DIR_FORWARD){ + for(i=0; i<2; i++){ + h263_encode_motion(s, s->mv[0][i][0] - s->last_mv[0][i][0] , s->f_code); + h263_encode_motion(s, s->mv[0][i][1] - s->last_mv[0][i][1]/2, s->f_code); + s->last_mv[0][i][0]= s->mv[0][i][0]; + s->last_mv[0][i][1]= s->mv[0][i][1]*2; + } + s->f_count++; + } + if(s->mv_dir & MV_DIR_BACKWARD){ + for(i=0; i<2; i++){ + h263_encode_motion(s, s->mv[1][i][0] - s->last_mv[1][i][0] , s->b_code); + h263_encode_motion(s, s->mv[1][i][1] - s->last_mv[1][i][1]/2, s->b_code); + s->last_mv[1][i][0]= s->mv[1][i][0]; + s->last_mv[1][i][1]= s->mv[1][i][1]*2; + } + s->b_count++; + } + } } if(interleaved_stats){ @@ -861,6 +877,7 @@ void mpeg4_encode_mb(MpegEncContext * s, if(interleaved_stats){ s->p_tex_bits+= get_bits_diff(s); } + }else{ /* s->pict_type==B_TYPE */ cbp= get_p_cbp(s, block, motion_x, motion_y); @@ -889,7 +906,7 @@ void mpeg4_encode_mb(MpegEncContext * s, if(pic==NULL || pic->pict_type!=B_TYPE) break; b_pic= pic->data[0] + offset + 16; //FIXME +16 - diff= s->dsp.pix_abs16x16(p_pic, b_pic, s->linesize); + diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16); if(diff>s->qscale*70){ //FIXME check that 70 is optimal s->mb_skiped=0; break; @@ -929,7 +946,7 @@ void mpeg4_encode_mb(MpegEncContext * s, if(!s->progressive_sequence){ if(cbp) put_bits(pb2, 1, s->interlaced_dct); - put_bits(pb2, 1, 0); // no interlaced ME yet + put_bits(pb2, 1, 0); } if(interleaved_stats){ @@ -941,7 +958,38 @@ void mpeg4_encode_mb(MpegEncContext * s, h263_encode_motion(s, motion_x - pred_x, s->f_code); h263_encode_motion(s, motion_y - pred_y, s->f_code); + }else if(s->mv_type==MV_TYPE_FIELD){ + if(s->dquant) cbpc+= 8; + put_bits(&s->pb, + inter_MCBPC_bits[cbpc], + inter_MCBPC_code[cbpc]); + + put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(s->dquant) + put_bits(pb2, 2, dquant_code[s->dquant+2]); + + assert(!s->progressive_sequence); + if(cbp) + put_bits(pb2, 1, s->interlaced_dct); + put_bits(pb2, 1, 1); + + if(interleaved_stats){ + s->misc_bits+= get_bits_diff(s); + } + + /* motion vectors: 16x8 interlaced mode */ + h263_pred_motion(s, 0, &pred_x, &pred_y); + pred_y /=2; + + put_bits(&s->pb, 1, s->field_select[0][0]); + put_bits(&s->pb, 1, s->field_select[0][1]); + + h263_encode_motion(s, s->mv[0][0][0] - pred_x, s->f_code); + h263_encode_motion(s, s->mv[0][0][1] - pred_y, s->f_code); + h263_encode_motion(s, s->mv[0][1][0] - pred_x, s->f_code); + h263_encode_motion(s, s->mv[0][1][1] - pred_y, s->f_code); }else{ + assert(s->mv_type==MV_TYPE_8X8); put_bits(&s->pb, inter_MCBPC_bits[cbpc+16], inter_MCBPC_code[cbpc+16]); diff --git a/libavcodec/h263data.h b/libavcodec/h263data.h index 25435adb59..4da105ffc5 100644 --- a/libavcodec/h263data.h +++ b/libavcodec/h263data.h @@ -61,8 +61,8 @@ static const int h263_mb_type_b_map[15]= { MB_TYPE_L0L1 | MB_TYPE_CBP | MB_TYPE_16x16, MB_TYPE_L0L1 | MB_TYPE_CBP | MB_TYPE_QUANT | MB_TYPE_16x16, 0, //stuffing - MB_TYPE_INTRA | MB_TYPE_CBP, - MB_TYPE_INTRA | MB_TYPE_CBP | MB_TYPE_QUANT, + MB_TYPE_INTRA4x4 | MB_TYPE_CBP, + MB_TYPE_INTRA4x4 | MB_TYPE_CBP | MB_TYPE_QUANT, }; const uint8_t cbpc_b_tab[4][2] = { diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index faafca223f..341aa0a235 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -687,10 +687,10 @@ static int pix_norm1_mmx(uint8_t *pix, int line_size) { return tmp; } -static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) { +static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { int tmp; asm volatile ( - "movl $16,%%ecx\n" + "movl %4,%%ecx\n" "pxor %%mm0,%%mm0\n" /* mm0 = 0 */ "pxor %%mm7,%%mm7\n" /* mm7 holds the sum */ "1:\n" @@ -741,7 +741,9 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size) { "psrlq $32, %%mm7\n" /* shift hi dword to lo */ "paddd %%mm7,%%mm1\n" "movd %%mm1,%2\n" - : "+r" (pix1), "+r" (pix2), "=r"(tmp) : "r" (line_size) : "%ecx"); + : "+r" (pix1), "+r" (pix2), "=r"(tmp) + : "r" (line_size) , "m" (h) + : "%ecx"); return tmp; } @@ -866,9 +868,11 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t "movq "#c", "#o"+32(%1) \n\t"\ "movq "#d", "#o"+48(%1) \n\t"\ -static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride){ +static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){ uint64_t temp[16] __align8; int sum=0; + + assert(h==8); diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride); @@ -951,9 +955,11 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride) return sum&0xFFFF; } -static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride){ +static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){ uint64_t temp[16] __align8; int sum=0; + + assert(h==8); diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride); @@ -1037,8 +1043,8 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride } -WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx) -WARPER88_1616(hadamard8_diff_mmx2, hadamard8_diff16_mmx2) +WARPER8_16_SQ(hadamard8_diff_mmx, hadamard8_diff16_mmx) +WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2) #endif //CONFIG_ENCODERS #define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d) diff --git a/libavcodec/i386/motion_est_mmx.c b/libavcodec/i386/motion_est_mmx.c index d71453a4bd..c36d081b1b 100644 --- a/libavcodec/i386/motion_est_mmx.c +++ b/libavcodec/i386/motion_est_mmx.c @@ -28,9 +28,9 @@ static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={ static __attribute__ ((aligned(8), unused)) uint64_t bone= 0x0101010101010101LL; -static inline void sad8_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) +static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) { - int len= -(stride<pix_abs16x16 = pix_abs16x16_mmx; - c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx; - c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; - c->pix_abs8x8 = pix_abs8x8_mmx; - c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx; - c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx; - c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx; + c->pix_abs[0][0] = sad16_mmx; + c->pix_abs[0][1] = sad16_x2_mmx; + c->pix_abs[0][2] = sad16_y2_mmx; + c->pix_abs[0][3] = sad16_xy2_mmx; + c->pix_abs[1][0] = sad8_mmx; + c->pix_abs[1][1] = sad8_x2_mmx; + c->pix_abs[1][2] = sad8_y2_mmx; + c->pix_abs[1][3] = sad8_xy2_mmx; - c->sad[0]= sad16x16_mmx; - c->sad[1]= sad8x8_mmx; + c->sad[0]= sad16_mmx; + c->sad[1]= sad8_mmx; } if (mm_flags & MM_MMXEXT) { - c->pix_abs16x16 = pix_abs16x16_mmx2; - c->pix_abs8x8 = pix_abs8x8_mmx2; + c->pix_abs[0][0] = sad16_mmx2; + c->pix_abs[1][0] = sad8_mmx2; - c->sad[0]= sad16x16_mmx2; - c->sad[1]= sad8x8_mmx2; + c->sad[0]= sad16_mmx2; + c->sad[1]= sad8_mmx2; if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ - c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; - c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2; - c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; - c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; - c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2; + c->pix_abs[0][1] = sad16_x2_mmx2; + c->pix_abs[0][2] = sad16_y2_mmx2; + c->pix_abs[0][3] = sad16_xy2_mmx2; + c->pix_abs[1][1] = sad8_x2_mmx2; + c->pix_abs[1][2] = sad8_y2_mmx2; + c->pix_abs[1][3] = sad8_xy2_mmx2; } } } diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c index 1a449cedd0..cfdbea9c0c 100644 --- a/libavcodec/motion_est.c +++ b/libavcodec/motion_est.c @@ -46,9 +46,9 @@ static inline int sad_hpel_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, Picture *picture, - int n, int size, uint8_t * const mv_penalty); + int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[6], int stride, int uvstride, + int size, int h, uint8_t * const mv_penalty); static inline int update_map_generation(MpegEncContext * s) { @@ -78,20 +78,21 @@ static int minima_cmp(const void *a, const void *b){ #define RENAME(a) simple_ ## a #define CMP(d, x, y, size)\ -d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride); +d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride, h); #define CMP_HPEL(d, dx, dy, x, y, size)\ {\ const int dxy= (dx) + 2*(dy);\ - hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\ - d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ + hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, h);\ + d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\ } + #define CMP_QPEL(d, dx, dy, x, y, size)\ {\ const int dxy= (dx) + 4*(dy);\ qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\ - d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ + d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\ } #include "motion_est_template.c" @@ -105,29 +106,29 @@ d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride); #define RENAME(a) simple_chroma_ ## a #define CMP(d, x, y, size)\ -d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);\ +d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride, h);\ if(chroma_cmp){\ int dxy= ((x)&1) + 2*((y)&1);\ int c= ((x)>>1) + ((y)>>1)*uvstride;\ \ - chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, 8);\ - d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride);\ - chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, 8);\ - d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride);\ + chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\ + d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride, h>>1);\ + chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\ + d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride, h>>1);\ } #define CMP_HPEL(d, dx, dy, x, y, size)\ {\ const int dxy= (dx) + 2*(dy);\ - hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\ - d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ + hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, h);\ + d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\ if(chroma_cmp_sub){\ int cxy= (dxy) | ((x)&1) | (2*((y)&1));\ int c= ((x)>>1) + ((y)>>1)*uvstride;\ - chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\ - d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\ - chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\ - d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\ + chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\ + d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride, h>>1);\ + chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\ + d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride, h>>1);\ }\ } @@ -135,7 +136,7 @@ if(chroma_cmp){\ {\ const int dxy= (dx) + 4*(dy);\ qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\ - d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ + d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\ if(chroma_cmp_sub){\ int cxy, c;\ int cx= (4*(x) + (dx))/2;\ @@ -144,10 +145,10 @@ if(chroma_cmp){\ cy= (cy>>1)|(cy&1);\ cxy= (cx&1) + 2*(cy&1);\ c= ((cx)>>1) + ((cy)>>1)*uvstride;\ - chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\ - d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\ - chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\ - d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\ + chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\ + d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride, h>>1);\ + chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\ + d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride, h>>1);\ }\ } @@ -178,7 +179,7 @@ if((x) >= xmin && 2*(x) + (dx) <= 2*xmax && (y) >= ymin && 2*(y) + (dy) <= 2*yma \ uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\ hpel_put[1][fxy](dst, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 8);\ - hpel_avg[1][bxy](dst, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 8);\ + hpel_avg[1][bxy](dst, (ref_data[3]) + (bx>>1) + (by>>1)*(stride), stride, 8);\ }\ }else{\ int fx = s->me.direct_basis_mv[0][0] + hx;\ @@ -198,9 +199,9 @@ if((x) >= xmin && 2*(x) + (dx) <= 2*xmax && (y) >= ymin && 2*(y) + (dy) <= 2*yma assert((by>>1) + 16*s->mb_y <= s->height);\ \ hpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 16);\ - hpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 16);\ + hpel_avg[0][bxy](s->me.scratchpad, (ref_data[3]) + (bx>>1) + (by>>1)*(stride), stride, 16);\ }\ - d = cmp_func(s, s->me.scratchpad, src_y, stride);\ + d = cmp_func(s, s->me.scratchpad, src_y, stride, 16);\ }else\ d= 256*256*256*32; @@ -238,7 +239,7 @@ if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*yma \ uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\ qpel_put[1][fxy](dst, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\ - qpel_avg[1][bxy](dst, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\ + qpel_avg[1][bxy](dst, (ref_data[3]) + (bx>>2) + (by>>2)*(stride), stride);\ }\ }else{\ int fx = s->me.direct_basis_mv[0][0] + qx;\ @@ -252,12 +253,12 @@ if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*yma qpel_put[1][fxy](s->me.scratchpad + 8 , (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 , stride);\ qpel_put[1][fxy](s->me.scratchpad + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8*stride, stride);\ qpel_put[1][fxy](s->me.scratchpad + 8 + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 + 8*stride, stride);\ - qpel_avg[1][bxy](s->me.scratchpad , (ref2_y) + (bx>>2) + (by>>2)*(stride) , stride);\ - qpel_avg[1][bxy](s->me.scratchpad + 8 , (ref2_y) + (bx>>2) + (by>>2)*(stride) + 8 , stride);\ - qpel_avg[1][bxy](s->me.scratchpad + 8*stride, (ref2_y) + (bx>>2) + (by>>2)*(stride) + 8*stride, stride);\ - qpel_avg[1][bxy](s->me.scratchpad + 8 + 8*stride, (ref2_y) + (bx>>2) + (by>>2)*(stride) + 8 + 8*stride, stride);\ + qpel_avg[1][bxy](s->me.scratchpad , (ref_data[3]) + (bx>>2) + (by>>2)*(stride) , stride);\ + qpel_avg[1][bxy](s->me.scratchpad + 8 , (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8 , stride);\ + qpel_avg[1][bxy](s->me.scratchpad + 8*stride, (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8*stride, stride);\ + qpel_avg[1][bxy](s->me.scratchpad + 8 + 8*stride, (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8 + 8*stride, stride);\ }\ - d = cmp_func(s, s->me.scratchpad, src_y, stride);\ + d = cmp_func(s, s->me.scratchpad, src_y, stride, 16);\ }else\ d= 256*256*256*32; @@ -277,7 +278,7 @@ if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*yma #undef CMP__DIRECT -static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride){ +static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ return 0; } @@ -285,44 +286,37 @@ static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){ DSPContext* c= &s->dsp; int i; - memset(cmp, 0, sizeof(void*)*11); - - switch(type&0xFF){ - case FF_CMP_SAD: - cmp[0]= c->sad[0]; - cmp[1]= c->sad[1]; - break; - case FF_CMP_SATD: - cmp[0]= c->hadamard8_diff[0]; - cmp[1]= c->hadamard8_diff[1]; - break; - case FF_CMP_SSE: - cmp[0]= c->sse[0]; - cmp[1]= c->sse[1]; - break; - case FF_CMP_DCT: - cmp[0]= c->dct_sad[0]; - cmp[1]= c->dct_sad[1]; - break; - case FF_CMP_PSNR: - cmp[0]= c->quant_psnr[0]; - cmp[1]= c->quant_psnr[1]; - break; - case FF_CMP_BIT: - cmp[0]= c->bit[0]; - cmp[1]= c->bit[1]; - break; - case FF_CMP_RD: - cmp[0]= c->rd[0]; - cmp[1]= c->rd[1]; - break; - case FF_CMP_ZERO: - for(i=0; i<7; i++){ + memset(cmp, 0, sizeof(void*)*5); + + for(i=0; i<4; i++){ + switch(type&0xFF){ + case FF_CMP_SAD: + cmp[i]= c->sad[i]; + break; + case FF_CMP_SATD: + cmp[i]= c->hadamard8_diff[i]; + break; + case FF_CMP_SSE: + cmp[i]= c->sse[i]; + break; + case FF_CMP_DCT: + cmp[i]= c->dct_sad[i]; + break; + case FF_CMP_PSNR: + cmp[i]= c->quant_psnr[i]; + break; + case FF_CMP_BIT: + cmp[i]= c->bit[i]; + break; + case FF_CMP_RD: + cmp[i]= c->rd[i]; + break; + case FF_CMP_ZERO: cmp[i]= zero_cmp; + break; + default: + av_log(s->avctx, AV_LOG_ERROR,"internal error in cmp function selection\n"); } - break; - default: - av_log(s->avctx, AV_LOG_ERROR,"internal error in cmp function selection\n"); } } @@ -362,7 +356,7 @@ void ff_init_me(MpegEncContext *s){ else if( s->avctx->me_sub_cmp == FF_CMP_SAD && s->avctx-> me_cmp == FF_CMP_SAD && s->avctx-> mb_cmp == FF_CMP_SAD) - s->me.sub_motion_search= sad_hpel_motion_search; + s->me.sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles else s->me.sub_motion_search= simple_hpel_motion_search; } @@ -370,9 +364,11 @@ void ff_init_me(MpegEncContext *s){ if(s->avctx->me_cmp&FF_CMP_CHROMA){ s->me.motion_search[0]= simple_chroma_epzs_motion_search; s->me.motion_search[1]= simple_chroma_epzs_motion_search4; + s->me.motion_search[4]= simple_chroma_epzs_motion_search2; }else{ s->me.motion_search[0]= simple_epzs_motion_search; s->me.motion_search[1]= simple_epzs_motion_search4; + s->me.motion_search[4]= simple_epzs_motion_search2; } if(s->avctx->me_pre_cmp&FF_CMP_CHROMA){ @@ -453,8 +449,8 @@ static int full_motion_search(MpegEncContext * s, my = 0; for (y = y1; y <= y2; y++) { for (x = x1; x <= x2; x++) { - d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, - s->linesize); + d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, + s->linesize, 16); if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < @@ -518,7 +514,7 @@ static int log_motion_search(MpegEncContext * s, do { for (y = y1; y <= y2; y += range) { for (x = x1; x <= x2; x += range) { - d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); + d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { dmin = d; mx = x; @@ -598,7 +594,7 @@ static int phods_motion_search(MpegEncContext * s, lastx = x; for (x = x1; x <= x2; x += range) { - d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); + d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { dminx = d; mx = x; @@ -607,7 +603,7 @@ static int phods_motion_search(MpegEncContext * s, x = lastx; for (y = y1; y <= y2; y += range) { - d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); + d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16); if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { dminy = d; my = y; @@ -651,35 +647,25 @@ static int phods_motion_search(MpegEncContext * s, #define CHECK_SAD_HALF_MV(suffix, x, y) \ {\ - d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\ + d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\ d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\ COPY3_IF_LT(dminh, d, dx, x, dy, y)\ } static inline int sad_hpel_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, Picture *picture, - int n, int size, uint8_t * const mv_penalty) + int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[6], int stride, int uvstride, + int size, int h, uint8_t * const mv_penalty) { - uint8_t *ref_picture= picture->data[0]; uint32_t *score_map= s->me.score_map; const int penalty_factor= s->me.sub_penalty_factor; - int mx, my, xx, yy, dminh; + int mx, my, dminh; uint8_t *pix, *ptr; - op_pixels_abs_func pix_abs_x2; - op_pixels_abs_func pix_abs_y2; - op_pixels_abs_func pix_abs_xy2; - - if(size==0){ - pix_abs_x2 = s->dsp.pix_abs16x16_x2; - pix_abs_y2 = s->dsp.pix_abs16x16_y2; - pix_abs_xy2= s->dsp.pix_abs16x16_xy2; - }else{ - pix_abs_x2 = s->dsp.pix_abs8x8_x2; - pix_abs_y2 = s->dsp.pix_abs8x8_y2; - pix_abs_xy2= s->dsp.pix_abs8x8_xy2; - } + const int xmin= s->me.xmin; + const int ymin= s->me.ymin; + const int xmax= s->me.xmax; + const int ymax= s->me.ymax; if(s->me.skip){ // printf("S"); @@ -689,13 +675,11 @@ static inline int sad_hpel_motion_search(MpegEncContext * s, } // printf("N"); - xx = 16 * s->mb_x + 8*(n&1); - yy = 16 * s->mb_y + 8*(n>>1); - pix = s->new_picture.data[0] + (yy * s->linesize) + xx; + pix = src_data[0]; mx = *mx_ptr; my = *my_ptr; - ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx); + ptr = ref_data[0] + (my * stride) + mx; dminh = dmin; @@ -715,16 +699,16 @@ static inline int sad_hpel_motion_search(MpegEncContext * s, pen_x= pred_x + mx; pen_y= pred_y + my; - ptr-= s->linesize; + ptr-= stride; if(t<=b){ CHECK_SAD_HALF_MV(y2 , 0, -1) if(l<=r){ CHECK_SAD_HALF_MV(xy2, -1, -1) if(t+r<=b+l){ CHECK_SAD_HALF_MV(xy2, +1, -1) - ptr+= s->linesize; + ptr+= stride; }else{ - ptr+= s->linesize; + ptr+= stride; CHECK_SAD_HALF_MV(xy2, -1, +1) } CHECK_SAD_HALF_MV(x2 , -1, 0) @@ -732,9 +716,9 @@ static inline int sad_hpel_motion_search(MpegEncContext * s, CHECK_SAD_HALF_MV(xy2, +1, -1) if(t+l<=b+r){ CHECK_SAD_HALF_MV(xy2, -1, -1) - ptr+= s->linesize; + ptr+= stride; }else{ - ptr+= s->linesize; + ptr+= stride; CHECK_SAD_HALF_MV(xy2, +1, +1) } CHECK_SAD_HALF_MV(x2 , +1, 0) @@ -743,9 +727,9 @@ static inline int sad_hpel_motion_search(MpegEncContext * s, if(l<=r){ if(t+l<=b+r){ CHECK_SAD_HALF_MV(xy2, -1, -1) - ptr+= s->linesize; + ptr+= stride; }else{ - ptr+= s->linesize; + ptr+= stride; CHECK_SAD_HALF_MV(xy2, +1, +1) } CHECK_SAD_HALF_MV(x2 , -1, 0) @@ -753,9 +737,9 @@ static inline int sad_hpel_motion_search(MpegEncContext * s, }else{ if(t+r<=b+l){ CHECK_SAD_HALF_MV(xy2, +1, -1) - ptr+= s->linesize; + ptr+= stride; }else{ - ptr+= s->linesize; + ptr+= stride; CHECK_SAD_HALF_MV(xy2, -1, +1) } CHECK_SAD_HALF_MV(x2 , +1, 0) @@ -802,35 +786,41 @@ static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4) /** * get fullpel ME search limits. - * @param range the approximate search range for the old ME code, unused for EPZS and newer */ -static inline void get_limits(MpegEncContext *s, int *range, int *xmin, int *ymin, int *xmax, int *ymax) +static inline void get_limits(MpegEncContext *s, int x, int y) { - if(s->avctx->me_range) *range= s->avctx->me_range >> 1; - else *range= 16; - +/* + if(s->avctx->me_range) s->me.range= s->avctx->me_range >> 1; + else s->me.range= 16; +*/ if (s->unrestricted_mv) { - *xmin = -16; - *ymin = -16; - *xmax = s->mb_width*16; - *ymax = s->mb_height*16; + s->me.xmin = - x - 16; + s->me.ymin = - y - 16; + s->me.xmax = - x + s->mb_width *16; + s->me.ymax = - y + s->mb_height*16; } else { - *xmin = 0; - *ymin = 0; - *xmax = s->mb_width*16 - 16; - *ymax = s->mb_height*16 - 16; + s->me.xmin = - x; + s->me.ymin = - y; + s->me.xmax = - x + s->mb_width *16 - 16; + s->me.ymax = - y + s->mb_height*16 - 16; } - - //FIXME try to limit x/y min/max if me_range is set } -static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift) +static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift) { + const int size= 1; + const int h=8; int block; int P[10][2]; int dmin_sum=0, mx4_sum=0, my4_sum=0; uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; int same=1; + const int stride= s->linesize; + const int uvstride= s->uvlinesize; + const int xmin= s->me.xmin; + const int ymin= s->me.ymin; + const int xmax= s->me.xmax; + const int ymax= s->me.ymax; for(block=0; block<4; block++){ int mx4, my4; @@ -839,23 +829,23 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma static const int off[4]= {2, 1, 1, -1}; const int mot_stride = s->block_wrap[0]; const int mot_xy = s->block_index[block]; -// const int block_x= (block&1); -// const int block_y= (block>>1); -#if 1 // this saves us a bit of cliping work and shouldnt affect compression in a negative way - const int rel_xmin4= xmin; - const int rel_xmax4= xmax; - const int rel_ymin4= ymin; - const int rel_ymax4= ymax; -#else - const int rel_xmin4= xmin - block_x*8; - const int rel_xmax4= xmax - block_x*8 + 8; - const int rel_ymin4= ymin - block_y*8; - const int rel_ymax4= ymax - block_y*8 + 8; -#endif + const int block_x= (block&1); + const int block_y= (block>>1); + uint8_t *src_data[3]= { + s->new_picture.data[0] + 8*(2*s->mb_x + block_x) + stride *8*(2*s->mb_y + block_y), //FIXME chroma? + s->new_picture.data[1] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y), + s->new_picture.data[2] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y) + }; + uint8_t *ref_data[3]= { + s->last_picture.data[0] + 8*(2*s->mb_x + block_x) + stride *8*(2*s->mb_y + block_y), //FIXME chroma? + s->last_picture.data[1] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y), + s->last_picture.data[2] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y) + }; + P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0]; P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1]; - if(P_LEFT[0] > (rel_xmax4< (s->me.xmax<me.xmax<mb_y == 0 && block<2) { @@ -866,10 +856,10 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1]; P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0]; P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1]; - if(P_TOP[1] > (rel_ymax4< (rel_xmax4< (rel_ymax4< (s->me.ymax<me.ymax<me.xmin<me.xmin< (s->me.xmax<me.xmax< (s->me.ymax<me.ymax<me.motion_search[1](s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, - &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty); + dmin4 = s->me.motion_search[1](s, &mx4, &my4, P, pred_x4, pred_y4, + src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty); - dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, - pred_x4, pred_y4, &s->last_picture, block, 1, mv_penalty); + dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, + pred_x4, pred_y4, src_data, ref_data, stride, uvstride, size, h, mv_penalty); if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){ int dxy; - const int offset= ((block&1) + (block>>1)*s->linesize)*8; + const int offset= ((block&1) + (block>>1)*stride)*8; uint8_t *dest_y = s->me.scratchpad + offset; if(s->quarter_sample){ - uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>2)) + (s->mb_y*16 + (my4>>2))*s->linesize + offset; + uint8_t *ref= ref_data[0] + (mx4>>2) + (my4>>2)*stride + offset; dxy = ((my4 & 3) << 2) | (mx4 & 3); if(s->no_rounding) s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , s->linesize); else - s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , s->linesize); + s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , stride); }else{ - uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>1)) + (s->mb_y*16 + (my4>>1))*s->linesize + offset; + uint8_t *ref= ref_data[0] + (mx4>>1) + (my4>>1)*stride + offset; dxy = ((my4 & 1) << 1) | (mx4 & 1); if(s->no_rounding) - s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y , ref , s->linesize, 8); + s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y , ref , stride, h); else - s->dsp.put_pixels_tab [1][dxy](dest_y , ref , s->linesize, 8); + s->dsp.put_pixels_tab [1][dxy](dest_y , ref , stride, h); } dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*s->me.mb_penalty_factor; }else @@ -937,7 +927,7 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma return INT_MAX; if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){ - dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*s->linesize, s->me.scratchpad, s->linesize); + dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, s->me.scratchpad, stride, 16); } if(s->avctx->mb_cmp&FF_CMP_CHROMA){ @@ -959,8 +949,8 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad+8 , s->last_picture.data[2] + offset, s->uvlinesize, 8); } - dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad , s->uvlinesize); - dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize); + dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad , s->uvlinesize, 8); + dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize, 8); } switch(s->avctx->mb_cmp&0xFF){ @@ -973,13 +963,134 @@ static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xma } } +static int interlaced_search(MpegEncContext *s, uint8_t *frame_src_data[3], uint8_t *frame_ref_data[3], + int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int f_code, int mx, int my) +{ + const int size=0; + const int h=8; + int block; + int P[10][2]; + uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV; + int same=1; + const int stride= 2*s->linesize; + const int uvstride= 2*s->uvlinesize; + int dmin_sum= 0; + const int mot_stride= s->mb_stride; + const int xy= s->mb_x + s->mb_y*mot_stride; + + s->me.ymin>>=1; + s->me.ymax>>=1; + + for(block=0; block<2; block++){ + int field_select; + int best_dmin= INT_MAX; + int best_field= -1; + + uint8_t *src_data[3]= { + frame_src_data[0] + s-> linesize*block, + frame_src_data[1] + s->uvlinesize*block, + frame_src_data[2] + s->uvlinesize*block + }; + + for(field_select=0; field_select<2; field_select++){ + int dmin, mx_i, my_i, pred_x, pred_y; + uint8_t *ref_data[3]= { + frame_ref_data[0] + s-> linesize*field_select, + frame_ref_data[1] + s->uvlinesize*field_select, + frame_ref_data[2] + s->uvlinesize*field_select + }; + int16_t (*mv_table)[2]= mv_tables[block][field_select]; + + P_LEFT[0] = mv_table[xy - 1][0]; + P_LEFT[1] = mv_table[xy - 1][1]; + if(P_LEFT[0] > (s->me.xmax<<1)) P_LEFT[0] = (s->me.xmax<<1); + + pred_x= P_LEFT[0]; + pred_y= P_LEFT[1]; + + if(s->mb_y){ + P_TOP[0] = mv_table[xy - mot_stride][0]; + P_TOP[1] = mv_table[xy - mot_stride][1]; + P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0]; + P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1]; + if(P_TOP[1] > (s->me.ymax<<1)) P_TOP[1] = (s->me.ymax<<1); + if(P_TOPRIGHT[0] < (s->me.xmin<<1)) P_TOPRIGHT[0]= (s->me.xmin<<1); + if(P_TOPRIGHT[0] > (s->me.xmax<<1)) P_TOPRIGHT[0]= (s->me.xmax<<1); + if(P_TOPRIGHT[1] > (s->me.ymax<<1)) P_TOPRIGHT[1]= (s->me.ymax<<1); + + P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); + P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); + } + P_MV1[0]= mx; //FIXME not correct if block != field_select + P_MV1[1]= my / 2; + + dmin = s->me.motion_search[4](s, &mx_i, &my_i, P, pred_x, pred_y, + src_data, ref_data, stride, uvstride, mv_table, (1<<16)>>1, mv_penalty); + + dmin= s->me.sub_motion_search(s, &mx_i, &my_i, dmin, + pred_x, pred_y, src_data, ref_data, stride, uvstride, size, h, mv_penalty); + + mv_table[xy][0]= mx_i; + mv_table[xy][1]= my_i; + + if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){ + int dxy; + + //FIXME chroma ME + uint8_t *ref= ref_data[0] + (mx_i>>1) + (my_i>>1)*stride; + dxy = ((my_i & 1) << 1) | (mx_i & 1); + + if(s->no_rounding){ + s->dsp.put_no_rnd_pixels_tab[size][dxy](s->me.scratchpad, ref , stride, h); + }else{ + s->dsp.put_pixels_tab [size][dxy](s->me.scratchpad, ref , stride, h); + } + dmin= s->dsp.mb_cmp[size](s, src_data[0], s->me.scratchpad, stride, h); + dmin+= (mv_penalty[mx_i-pred_x] + mv_penalty[my_i-pred_y] + 1)*s->me.mb_penalty_factor; + }else + dmin+= s->me.mb_penalty_factor; //field_select bits + + dmin += field_select != block; //slightly prefer same field + + if(dmin < best_dmin){ + best_dmin= dmin; + best_field= field_select; + } + } + { + int16_t (*mv_table)[2]= mv_tables[block][best_field]; + + if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all + if(mv_table[xy][1]&1) same=0; + if(mv_table[xy][1]*2 != my) same=0; + if(best_field != block) same=0; + } + + field_select_tables[block][xy]= best_field; + dmin_sum += best_dmin; + } + + s->me.ymin<<=1; + s->me.ymax<<=1; + + if(same) + return INT_MAX; + + switch(s->avctx->mb_cmp&0xFF){ + /*case FF_CMP_SSE: + return dmin_sum+ 32*s->qscale*s->qscale;*/ + case FF_CMP_RD: + return dmin_sum; + default: + return dmin_sum+ 11*s->me.mb_penalty_factor; + } +} + void ff_estimate_p_frame_motion(MpegEncContext * s, int mb_x, int mb_y) { uint8_t *pix, *ppix; - int sum, varc, vard, mx, my, range, dmin, xx, yy; - int xmin, ymin, xmax, ymax; - int rel_xmin, rel_ymin, rel_xmax, rel_ymax; + int sum, varc, vard, mx, my, dmin, xx, yy; int pred_x=0, pred_y=0; int P[10][2]; const int shift= 1+s->quarter_sample; @@ -987,18 +1098,26 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, uint8_t *ref_picture= s->last_picture.data[0]; Picture * const pic= &s->current_picture; uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; - + const int stride= s->linesize; + const int uvstride= s->uvlinesize; + uint8_t *src_data[3]= { + s->new_picture.data[0] + 16*(mb_x + stride*mb_y), + s->new_picture.data[1] + 8*(mb_x + uvstride*mb_y), + s->new_picture.data[2] + 8*(mb_x + uvstride*mb_y) + }; + uint8_t *ref_data[3]= { + s->last_picture.data[0] + 16*(mb_x + stride*mb_y), + s->last_picture.data[1] + 8*(mb_x + uvstride*mb_y), + s->last_picture.data[2] + 8*(mb_x + uvstride*mb_y) + }; + assert(s->quarter_sample==0 || s->quarter_sample==1); s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp); s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp); s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp); - get_limits(s, &range, &xmin, &ymin, &xmax, &ymax); - rel_xmin= xmin - mb_x*16; - rel_xmax= xmax - mb_x*16; - rel_ymin= ymin - mb_y*16; - rel_ymax= ymax - mb_y*16; + get_limits(s, 16*mb_x, 16*mb_y); s->me.skip=0; switch(s->me_method) { @@ -1009,21 +1128,23 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, my-= mb_y*16; dmin = 0; break; +#if 0 case ME_FULL: - dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture); + dmin = full_motion_search(s, &mx, &my, range, ref_picture); mx-= mb_x*16; my-= mb_y*16; break; case ME_LOG: - dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); + dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture); mx-= mb_x*16; my-= mb_y*16; break; case ME_PHODS: - dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); + dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture); mx-= mb_x*16; my-= mb_y*16; break; +#endif case ME_X1: case ME_EPZS: { @@ -1033,16 +1154,16 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0]; P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1]; - if(P_LEFT[0] > (rel_xmax< (s->me.xmax<me.xmax<current_picture.motion_val[0][mot_xy - mot_stride ][0]; P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1]; P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0]; P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1]; - if(P_TOP[1] > (rel_ymax< (rel_ymax< (s->me.ymax<me.ymax<me.xmin<me.xmin< (s->me.ymax<me.ymax<me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty); + dmin = s->me.motion_search[0](s, &mx, &my, P, pred_x, pred_y, + src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty); break; } @@ -1070,14 +1191,14 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, xx = mb_x * 16; yy = mb_y * 16; - pix = s->new_picture.data[0] + (yy * s->linesize) + xx; + pix = src_data[0]; /* At this point (mx,my) are full-pell and the relative displacement */ - ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx); + ppix = ref_data[0] + (my * s->linesize) + mx; sum = s->dsp.pix_sum(pix, s->linesize); varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; - vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize)+128)>>8; + vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16)+128)>>8; //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); pic->mb_var [s->mb_stride * mb_y + mb_x] = varc; @@ -1099,47 +1220,59 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, s->scene_change_score+= s->qscale; if (vard*2 + 200 > varc) - mb_type|= MB_TYPE_INTRA; + mb_type|= CANDIDATE_MB_TYPE_INTRA; if (varc*2 + 200 > vard){ - mb_type|= MB_TYPE_INTER; - s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty); + mb_type|= CANDIDATE_MB_TYPE_INTER; + s->me.sub_motion_search(s, &mx, &my, dmin, + pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty); if(s->flags&CODEC_FLAG_MV0) if(mx || my) - mb_type |= MB_TYPE_SKIPED; //FIXME check difference + mb_type |= CANDIDATE_MB_TYPE_SKIPED; //FIXME check difference }else{ mx <<=shift; my <<=shift; } if((s->flags&CODEC_FLAG_4MV) && !s->me.skip && varc>50 && vard>10){ - if(h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift) < INT_MAX) - mb_type|=MB_TYPE_INTER4V; + if(h263_mv4_search(s, mx, my, shift) < INT_MAX) + mb_type|=CANDIDATE_MB_TYPE_INTER4V; set_p_mv_tables(s, mx, my, 0); }else set_p_mv_tables(s, mx, my, 1); + if((s->flags&CODEC_FLAG_INTERLACED_ME) + && !s->me.skip){ //FIXME varc/d checks + if(interlaced_search(s, src_data, ref_data, s->p_field_mv_table, s->p_field_select_table, s->f_code, mx, my) < INT_MAX) + mb_type |= CANDIDATE_MB_TYPE_INTER_I; + } }else{ int intra_score, i; - mb_type= MB_TYPE_INTER; + mb_type= CANDIDATE_MB_TYPE_INTER; - dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty); - + dmin= s->me.sub_motion_search(s, &mx, &my, dmin, + pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty); if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) - dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, &s->last_picture, mv_penalty); + dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, src_data, ref_data, stride, uvstride, mv_penalty); if((s->flags&CODEC_FLAG_4MV) && !s->me.skip && varc>50 && vard>10){ - int dmin4= h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); + int dmin4= h263_mv4_search(s, mx, my, shift); if(dmin4 < dmin){ - mb_type= MB_TYPE_INTER4V; + mb_type= CANDIDATE_MB_TYPE_INTER4V; dmin=dmin4; } } + if((s->flags&CODEC_FLAG_INTERLACED_ME) + && !s->me.skip){ //FIXME varc/d checks + int dmin_i= interlaced_search(s, src_data, ref_data, s->p_field_mv_table, s->p_field_select_table, s->f_code, mx, my); + if(dmin_i < dmin){ + mb_type = CANDIDATE_MB_TYPE_INTER_I; + dmin= dmin_i; + } + } // pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin; - set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V); + set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V); /* get intra luma score */ if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){ @@ -1155,7 +1288,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean; } - intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, pix, s->linesize); + intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, pix, s->linesize, 16); } #if 0 //FIXME /* get chroma score */ @@ -1184,8 +1317,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, intra_score += s->me.mb_penalty_factor*16; if(intra_score < dmin){ - mb_type= MB_TYPE_INTRA; - s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= MB_TYPE_INTRA; //FIXME cleanup + mb_type= CANDIDATE_MB_TYPE_INTRA; + s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup }else s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0; @@ -1202,30 +1335,36 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, int ff_pre_estimate_p_frame_motion(MpegEncContext * s, int mb_x, int mb_y) { - int mx, my, range, dmin; - int xmin, ymin, xmax, ymax; - int rel_xmin, rel_ymin, rel_xmax, rel_ymax; + int mx, my, dmin; int pred_x=0, pred_y=0; int P[10][2]; const int shift= 1+s->quarter_sample; uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; const int xy= mb_x + mb_y*s->mb_stride; + const int stride= s->linesize; + const int uvstride= s->uvlinesize; + uint8_t *src_data[3]= { + s->new_picture.data[0] + 16*(mb_x + stride*mb_y), + s->new_picture.data[1] + 8*(mb_x + uvstride*mb_y), + s->new_picture.data[2] + 8*(mb_x + uvstride*mb_y) + }; + uint8_t *ref_data[3]= { + s->last_picture.data[0] + 16*(mb_x + stride*mb_y), + s->last_picture.data[1] + 8*(mb_x + uvstride*mb_y), + s->last_picture.data[2] + 8*(mb_x + uvstride*mb_y) + }; assert(s->quarter_sample==0 || s->quarter_sample==1); s->me.pre_penalty_factor = get_penalty_factor(s, s->avctx->me_pre_cmp); - get_limits(s, &range, &xmin, &ymin, &xmax, &ymax); - rel_xmin= xmin - mb_x*16; - rel_xmax= xmax - mb_x*16; - rel_ymin= ymin - mb_y*16; - rel_ymax= ymax - mb_y*16; + get_limits(s, 16*mb_x, 16*mb_y); s->me.skip=0; P_LEFT[0] = s->p_mv_table[xy + 1][0]; P_LEFT[1] = s->p_mv_table[xy + 1][1]; - if(P_LEFT[0] < (rel_xmin<me.xmin<me.xmin<mb_height-1) { @@ -1238,9 +1377,9 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s, P_TOP[1] = s->p_mv_table[xy + s->mb_stride ][1]; P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0]; P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1]; - if(P_TOP[1] < (rel_ymin< (rel_xmax<me.ymin<me.ymin< (s->me.xmax<me.xmax<me.ymin<me.ymin<me.pre_motion_search(s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty); + dmin = s->me.pre_motion_search(s, &mx, &my, P, pred_x, pred_y, + src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty); s->p_mv_table[xy][0] = mx<p_mv_table[xy][1] = my<quarter_sample; const int mot_stride = s->mb_stride; const int mot_xy = mb_y*mot_stride + mb_x; - uint8_t * const ref_picture= picture->data[0]; + uint8_t * const ref_picture= ref_data[0] - 16*s->mb_x - 16*s->mb_y*s->linesize; //FIXME ugly uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV; int mv_scale; @@ -1276,11 +1414,7 @@ static int ff_estimate_motion_b(MpegEncContext * s, s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp); s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp); - get_limits(s, &range, &xmin, &ymin, &xmax, &ymax); - rel_xmin= xmin - mb_x*16; - rel_xmax= xmax - mb_x*16; - rel_ymin= ymin - mb_y*16; - rel_ymax= ymax - mb_y*16; + get_limits(s, 16*mb_x, 16*mb_y); switch(s->me_method) { case ME_ZERO: @@ -1290,28 +1424,30 @@ static int ff_estimate_motion_b(MpegEncContext * s, mx-= mb_x*16; my-= mb_y*16; break; +#if 0 case ME_FULL: - dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture); + dmin = full_motion_search(s, &mx, &my, range, ref_picture); mx-= mb_x*16; my-= mb_y*16; break; case ME_LOG: - dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); + dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture); mx-= mb_x*16; my-= mb_y*16; break; case ME_PHODS: - dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); + dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture); mx-= mb_x*16; my-= mb_y*16; break; +#endif case ME_X1: case ME_EPZS: { P_LEFT[0] = mv_table[mot_xy - 1][0]; P_LEFT[1] = mv_table[mot_xy - 1][1]; - if(P_LEFT[0] > (rel_xmax< (s->me.xmax<me.xmax< (rel_ymax< (rel_ymax< (s->me.ymax<me.ymax<me.xmin<me.xmin< (s->me.ymax<me.ymax<pb_time - s->pp_time)<<16) / (s->pp_time<me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - picture, s->p_mv_table, mv_scale, mv_penalty); + dmin = s->me.motion_search[0](s, &mx, &my, P, pred_x, pred_y, + src_data, ref_data, stride, uvstride, s->p_mv_table, mv_scale, mv_penalty); break; } - dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - pred_x, pred_y, picture, 0, 0, mv_penalty); + dmin= s->me.sub_motion_search(s, &mx, &my, dmin, + pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty); if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) - dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, picture, mv_penalty); + dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, src_data, ref_data, stride, uvstride, mv_penalty); //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my); // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; @@ -1356,16 +1492,18 @@ static int ff_estimate_motion_b(MpegEncContext * s, return dmin; } -static inline int check_bidir_mv(MpegEncContext * s, - int mb_x, int mb_y, +static inline int check_bidir_mv(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6], + int stride, int uvstride, int motion_fx, int motion_fy, int motion_bx, int motion_by, int pred_fx, int pred_fy, - int pred_bx, int pred_by) + int pred_bx, int pred_by, + int size, int h) { //FIXME optimize? //FIXME move into template? //FIXME better f_code prediction (max mv & distance) + //FIXME pointers uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame uint8_t *dest_y = s->me.scratchpad; uint8_t *ptr; @@ -1375,45 +1513,37 @@ static inline int check_bidir_mv(MpegEncContext * s, if(s->quarter_sample){ dxy = ((motion_fy & 3) << 2) | (motion_fx & 3); - src_x = mb_x * 16 + (motion_fx >> 2); - src_y = mb_y * 16 + (motion_fy >> 2); - assert(src_x >=-16 && src_x<=s->h_edge_pos); - assert(src_y >=-16 && src_y<=s->v_edge_pos); + src_x = motion_fx >> 2; + src_y = motion_fy >> 2; - ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x; - s->dsp.put_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize); + ptr = ref_data[0] + (src_y * stride) + src_x; + s->dsp.put_qpel_pixels_tab[0][dxy](dest_y , ptr , stride); dxy = ((motion_by & 3) << 2) | (motion_bx & 3); - src_x = mb_x * 16 + (motion_bx >> 2); - src_y = mb_y * 16 + (motion_by >> 2); - assert(src_x >=-16 && src_x<=s->h_edge_pos); - assert(src_y >=-16 && src_y<=s->v_edge_pos); + src_x = motion_bx >> 2; + src_y = motion_by >> 2; - ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x; - s->dsp.avg_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize); + ptr = ref_data[3] + (src_y * stride) + src_x; + s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y , ptr , stride); }else{ dxy = ((motion_fy & 1) << 1) | (motion_fx & 1); - src_x = mb_x * 16 + (motion_fx >> 1); - src_y = mb_y * 16 + (motion_fy >> 1); - assert(src_x >=-16 && src_x<=s->h_edge_pos); - assert(src_y >=-16 && src_y<=s->v_edge_pos); + src_x = motion_fx >> 1; + src_y = motion_fy >> 1; - ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x; - s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); + ptr = ref_data[0] + (src_y * stride) + src_x; + s->dsp.put_pixels_tab[size][dxy](dest_y , ptr , stride, h); dxy = ((motion_by & 1) << 1) | (motion_bx & 1); - src_x = mb_x * 16 + (motion_bx >> 1); - src_y = mb_y * 16 + (motion_by >> 1); - assert(src_x >=-16 && src_x<=s->h_edge_pos); - assert(src_y >=-16 && src_y<=s->v_edge_pos); + src_x = motion_bx >> 1; + src_y = motion_by >> 1; - ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x; - s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); + ptr = ref_data[3] + (src_y * stride) + src_x; + s->dsp.avg_pixels_tab[size][dxy](dest_y , ptr , stride, h); } fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.mb_penalty_factor +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.mb_penalty_factor - + s->dsp.mb_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize); + + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic if(s->avctx->mb_cmp&FF_CMP_CHROMA){ } @@ -1423,7 +1553,8 @@ static inline int check_bidir_mv(MpegEncContext * s, } /* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/ -static inline int bidir_refine(MpegEncContext * s, +static inline int bidir_refine(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6], + int stride, int uvstride, int mb_x, int mb_y) { const int mot_stride = s->mb_stride; @@ -1440,16 +1571,18 @@ static inline int bidir_refine(MpegEncContext * s, //FIXME do refinement and add flag - fbmin= check_bidir_mv(s, mb_x, mb_y, + fbmin= check_bidir_mv(s, src_data, ref_data, stride, uvstride, motion_fx, motion_fy, motion_bx, motion_by, pred_fx, pred_fy, - pred_bx, pred_by); + pred_bx, pred_by, + 0, 16); return fbmin; } -static inline int direct_search(MpegEncContext * s, +static inline int direct_search(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6], + int stride, int uvstride, int mb_x, int mb_y) { int P[10][2]; @@ -1508,6 +1641,11 @@ static inline int direct_search(MpegEncContext * s, return 256*256*256*64; } + + s->me.xmin= xmin; + s->me.ymin= ymin; + s->me.xmax= xmax; + s->me.ymax= ymax; P_LEFT[0] = clip(mv_table[mot_xy - 1][0], xmin<flags&CODEC_FLAG_QPEL){ - dmin = simple_direct_qpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, - &s->last_picture, mv_table, 1<<14, mv_penalty); - dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, - 0, 0, &s->last_picture, 0, 0, mv_penalty); + dmin = simple_direct_qpel_epzs_motion_search(s, &mx, &my, P, 0, 0, + src_data, ref_data, stride, uvstride, mv_table, 1<<14, mv_penalty); + dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, + 0, 0, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty); if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) - dmin= simple_direct_qpel_qpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty); + dmin= simple_direct_qpel_qpel_get_mb_score(s, mx, my, 0, 0, src_data, ref_data, stride, uvstride, mv_penalty); }else{ - dmin = simple_direct_hpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, - &s->last_picture, mv_table, 1<<15, mv_penalty); - dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, - 0, 0, &s->last_picture, 0, 0, mv_penalty); + dmin = simple_direct_hpel_epzs_motion_search(s, &mx, &my, P, 0, 0, + src_data, ref_data, stride, uvstride, mv_table, 1<<15, mv_penalty); + dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, + 0, 0, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty); if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) - dmin= simple_direct_hpel_hpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty); + dmin= simple_direct_hpel_hpel_get_mb_score(s, mx, my, 0, 0, src_data, ref_data, stride, uvstride, mv_penalty); } + + get_limits(s, 16*mb_x, 16*mb_y); //restore s->me.?min/max, maybe not needed s->b_direct_mv_table[mot_xy][0]= mx; s->b_direct_mv_table[mot_xy][1]= my; @@ -1551,40 +1691,80 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, int mb_x, int mb_y) { const int penalty_factor= s->me.mb_penalty_factor; - int fmin, bmin, dmin, fbmin; + int fmin, bmin, dmin, fbmin, bimin, fimin; int type=0; + const int stride= s->linesize; + const int uvstride= s->uvlinesize; + uint8_t *src_data[3]= { + s->new_picture.data[0] + 16*(s->mb_x + stride*s->mb_y), + s->new_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y), + s->new_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y) + }; + uint8_t *ref_data[6]= { + s->last_picture.data[0] + 16*(s->mb_x + stride*s->mb_y), + s->last_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y), + s->last_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y), + s->next_picture.data[0] + 16*(s->mb_x + stride*s->mb_y), + s->next_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y), + s->next_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y) + }; s->me.skip=0; if (s->codec_id == CODEC_ID_MPEG4) - dmin= direct_search(s, mb_x, mb_y); + dmin= direct_search(s, src_data, ref_data, stride, uvstride, mb_x, mb_y); else dmin= INT_MAX; - +//FIXME penalty stuff for non mpeg4 s->me.skip=0; - fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code) + 3*penalty_factor; + fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, src_data, + ref_data, stride, uvstride, s->f_code) + 3*penalty_factor; s->me.skip=0; - bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) + 2*penalty_factor; + bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, src_data, + ref_data+3, stride, uvstride, s->b_code) + 2*penalty_factor; //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]); s->me.skip=0; - fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor; + fbmin= bidir_refine(s, src_data, ref_data, stride, uvstride, mb_x, mb_y) + penalty_factor; //printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin); + + if(s->flags & CODEC_FLAG_INTERLACED_ME){ + const int xy = mb_y*s->mb_stride + mb_x; + +//FIXME mb type penalty + s->me.skip=0; + fimin= interlaced_search(s, src_data, ref_data , + s->b_field_mv_table[0], s->b_field_select_table[0], s->f_code, + s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]); + bimin= interlaced_search(s, src_data, ref_data+3, + s->b_field_mv_table[1], s->b_field_select_table[1], s->b_code, + s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1]); + }else + fimin= bimin= INT_MAX; + { int score= fmin; - type = MB_TYPE_FORWARD; + type = CANDIDATE_MB_TYPE_FORWARD; if (dmin <= score){ score = dmin; - type = MB_TYPE_DIRECT; + type = CANDIDATE_MB_TYPE_DIRECT; } if(bmin>16; @@ -1593,8 +1773,16 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, } if(s->avctx->mb_decision > FF_MB_DECISION_SIMPLE){ - type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT; //FIXME something smarter - if(dmin>256*256*16) type&= ~MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB + type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT; + if(fimin < INT_MAX) + type |= CANDIDATE_MB_TYPE_FORWARD_I; + if(bimin < INT_MAX) + type |= CANDIDATE_MB_TYPE_BACKWARD_I; + if(fimin < INT_MAX && bimin < INT_MAX){ + type |= CANDIDATE_MB_TYPE_BIDIR_I; + } + //FIXME something smarter + if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB } s->mb_type[mb_y*s->mb_stride + mb_x]= type; @@ -1661,24 +1849,6 @@ void ff_fix_long_p_mvs(MpegEncContext * s) if(s->avctx->me_range && range > s->avctx->me_range) range= s->avctx->me_range; - /* clip / convert to intra 16x16 type MVs */ - for(y=0; ymb_height; y++){ - int x; - int xy= y*s->mb_stride; - for(x=0; xmb_width; x++){ - if(s->mb_type[xy]&MB_TYPE_INTER){ - if( s->p_mv_table[xy][0] >=range || s->p_mv_table[xy][0] <-range - || s->p_mv_table[xy][1] >=range || s->p_mv_table[xy][1] <-range){ - s->mb_type[xy] &= ~MB_TYPE_INTER; - s->mb_type[xy] |= MB_TYPE_INTRA; - s->current_picture.mb_type[xy]= MB_TYPE_INTRA; - s->p_mv_table[xy][0] = 0; - s->p_mv_table[xy][1] = 0; - } - } - xy++; - } - } //printf("%d no:%d %d//\n", clip, noclip, f_code); if(s->flags&CODEC_FLAG_4MV){ const int wrap= 2+ s->mb_width*2; @@ -1690,7 +1860,7 @@ void ff_fix_long_p_mvs(MpegEncContext * s) int x; for(x=0; xmb_width; x++){ - if(s->mb_type[i]&MB_TYPE_INTER4V){ + if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){ int block; for(block=0; block<4; block++){ int off= (block& 1) + (block>>1)*wrap; @@ -1699,9 +1869,9 @@ void ff_fix_long_p_mvs(MpegEncContext * s) if( mx >=range || mx <-range || my >=range || my <-range){ - s->mb_type[i] &= ~MB_TYPE_INTER4V; - s->mb_type[i] |= MB_TYPE_INTRA; - s->current_picture.mb_type[i]= MB_TYPE_INTRA; + s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V; + s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA; + s->current_picture.mb_type[i]= CANDIDATE_MB_TYPE_INTRA; } } } @@ -1712,30 +1882,45 @@ void ff_fix_long_p_mvs(MpegEncContext * s) } } -void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type) +/** + * + * @param truncate 1 for truncation, 0 for using intra + */ +void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select, + int16_t (*mv_table)[2], int f_code, int type, int truncate) { - int y; + int y, h_range, v_range; // RAL: 8 in MPEG-1, 16 in MPEG-4 int range = (((s->out_format == FMT_MPEG1) ? 8 : 16) << f_code); - + + if(s->msmpeg4_version) range= 16; if(s->avctx->me_range && range > s->avctx->me_range) range= s->avctx->me_range; + h_range= range; + v_range= field_select_table ? range>>1 : range; + /* clip / convert to intra 16x16 type MVs */ for(y=0; ymb_height; y++){ int x; int xy= y*s->mb_stride; for(x=0; xmb_width; x++){ if (s->mb_type[xy] & type){ // RAL: "type" test added... - if( mv_table[xy][0] >=range || mv_table[xy][0] <-range - || mv_table[xy][1] >=range || mv_table[xy][1] <-range){ + if(field_select_table==NULL || field_select_table[xy] == field_select){ + if( mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range + || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){ - if(s->codec_id == CODEC_ID_MPEG1VIDEO && 0){ - }else{ - if (mv_table[xy][0] > range-1) mv_table[xy][0]= range-1; - else if(mv_table[xy][0] < -range ) mv_table[xy][0]= -range; - if (mv_table[xy][1] > range-1) mv_table[xy][1]= range-1; - else if(mv_table[xy][1] < -range ) mv_table[xy][1]= -range; + if(truncate){ + if (mv_table[xy][0] > h_range-1) mv_table[xy][0]= h_range-1; + else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range; + if (mv_table[xy][1] > v_range-1) mv_table[xy][1]= v_range-1; + else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range; + }else{ + s->mb_type[xy] &= ~type; + s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA; + mv_table[xy][0]= + mv_table[xy][1]= 0; + } } } } diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c index db51d676db..0f3a6b4cdd 100644 --- a/libavcodec/motion_est_template.c +++ b/libavcodec/motion_est_template.c @@ -22,29 +22,31 @@ * @file motion_est_template.c * Motion estimation template. */ - +//FIXME ref2_y next_pic? //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...) //Note, the last line is there to kill these ugly unused var warnings -#define LOAD_COMMON(x, y)\ +#define LOAD_COMMON\ uint32_t * const score_map= s->me.score_map;\ - const int stride= s->linesize;\ - const int uvstride= s->uvlinesize;\ const int time_pp= s->pp_time;\ const int time_pb= s->pb_time;\ - uint8_t * const src_y= s->new_picture.data[0] + ((y) * stride) + (x);\ - uint8_t * const src_u= s->new_picture.data[1] + (((y)>>1) * uvstride) + ((x)>>1);\ - uint8_t * const src_v= s->new_picture.data[2] + (((y)>>1) * uvstride) + ((x)>>1);\ - uint8_t * const ref_y= ref_picture->data[0] + ((y) * stride) + (x);\ - uint8_t * const ref_u= ref_picture->data[1] + (((y)>>1) * uvstride) + ((x)>>1);\ - uint8_t * const ref_v= ref_picture->data[2] + (((y)>>1) * uvstride) + ((x)>>1);\ - uint8_t * const ref2_y= s->next_picture.data[0] + ((y) * stride) + (x);\ + const int xmin= s->me.xmin;\ + const int ymin= s->me.ymin;\ + const int xmax= s->me.xmax;\ + const int ymax= s->me.ymax;\ + uint8_t * const src_y= src_data[0];\ + uint8_t * const src_u= src_data[1];\ + uint8_t * const src_v= src_data[2];\ + uint8_t * const ref_y= ref_data[0];\ + uint8_t * const ref_u= ref_data[1];\ + uint8_t * const ref_v= ref_data[2];\ op_pixels_func (*hpel_put)[4];\ op_pixels_func (*hpel_avg)[4]= &s->dsp.avg_pixels_tab[size];\ op_pixels_func (*chroma_hpel_put)[4];\ qpel_mc_func (*qpel_put)[16];\ qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\ const __attribute__((unused)) int unu= time_pp + time_pb + (size_t)src_u + (size_t)src_v + (size_t)ref_u + (size_t)ref_v\ - + (size_t)ref2_y + (size_t)hpel_avg + (size_t)qpel_avg + (size_t)score_map;\ + + (size_t)hpel_avg + (size_t)qpel_avg + (size_t)score_map\ + + xmin + xmax + ymin + ymax;\ if(s->no_rounding /*FIXME b_type*/){\ hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\ chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\ @@ -70,9 +72,8 @@ #if 0 static int RENAME(hpel_motion_search)(MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, Picture *ref_picture, - int n, int size, uint8_t * const mv_penalty) + int pred_x, int pred_y, uint8_t *ref_data[3], + int size, uint8_t * const mv_penalty) { const int xx = 16 * s->mb_x + 8*(n&1); const int yy = 16 * s->mb_y + 8*(n>>1); @@ -80,7 +81,7 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s, const int my = *my_ptr; const int penalty_factor= s->me.sub_penalty_factor; - LOAD_COMMON(xx, yy); + LOAD_COMMON // INIT; //FIXME factorize @@ -139,19 +140,17 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s, #else static int RENAME(hpel_motion_search)(MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, Picture *ref_picture, - int n, int size, uint8_t * const mv_penalty) + int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, + int size, int h, uint8_t * const mv_penalty) { - const int xx = 16 * s->mb_x + 8*(n&1); - const int yy = 16 * s->mb_y + 8*(n>>1); const int mx = *mx_ptr; const int my = *my_ptr; const int penalty_factor= s->me.sub_penalty_factor; me_cmp_func cmp_sub, chroma_cmp_sub; int bx=2*mx, by=2*my; - LOAD_COMMON(xx, yy); + LOAD_COMMON //FIXME factorize @@ -247,20 +246,18 @@ static int RENAME(hpel_motion_search)(MpegEncContext * s, } #endif -static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, +static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, uint8_t * const mv_penalty) { // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp; const int size= 0; - const int xx = 16 * s->mb_x; - const int yy = 16 * s->mb_y; + const int h= 16; const int penalty_factor= s->me.mb_penalty_factor; - const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these - const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit me_cmp_func cmp_sub, chroma_cmp_sub; int d; - LOAD_COMMON(xx, yy); + LOAD_COMMON //FIXME factorize @@ -295,12 +292,10 @@ static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pre static int RENAME(qpel_motion_search)(MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, Picture *ref_picture, - int n, int size, uint8_t * const mv_penalty) + int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, + int size, int h, uint8_t * const mv_penalty) { - const int xx = 16 * s->mb_x + 8*(n&1); - const int yy = 16 * s->mb_y + 8*(n>>1); const int mx = *mx_ptr; const int my = *my_ptr; const int penalty_factor= s->me.sub_penalty_factor; @@ -310,7 +305,7 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s, me_cmp_func cmp, chroma_cmp; me_cmp_func cmp_sub, chroma_cmp_sub; - LOAD_COMMON(xx, yy); + LOAD_COMMON cmp= s->dsp.me_cmp[size]; chroma_cmp= s->dsp.me_cmp[size+1]; //factorize FIXME @@ -514,19 +509,17 @@ static int RENAME(qpel_motion_search)(MpegEncContext * s, return dmin; } -static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, +static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, uint8_t * const mv_penalty) { const int size= 0; - const int xx = 16 * s->mb_x; - const int yy = 16 * s->mb_y; + const int h= 16; const int penalty_factor= s->me.mb_penalty_factor; - const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these - const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit me_cmp_func cmp_sub, chroma_cmp_sub; int d; - LOAD_COMMON(xx, yy); + LOAD_COMMON //FIXME factorize @@ -597,15 +590,16 @@ if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, int dmin, - Picture *ref_picture, + uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, int const pred_x, int const pred_y, int const penalty_factor, - int const xmin, int const ymin, int const xmax, int const ymax, int const shift, - uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty + int const shift, + uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty ) { me_cmp_func cmp, chroma_cmp; int next_dir=-1; - LOAD_COMMON(s->mb_x*16, s->mb_y*16); + LOAD_COMMON cmp= s->dsp.me_cmp[size]; chroma_cmp= s->dsp.me_cmp[size+1]; @@ -639,15 +633,16 @@ static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, in } static inline int RENAME(funny_diamond_search)(MpegEncContext * s, int *best, int dmin, - Picture *ref_picture, + uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, int const pred_x, int const pred_y, int const penalty_factor, - int const xmin, int const ymin, int const xmax, int const ymax, int const shift, - uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty + int const shift, + uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty ) { me_cmp_func cmp, chroma_cmp; int dia_size; - LOAD_COMMON(s->mb_x*16, s->mb_y*16); + LOAD_COMMON cmp= s->dsp.me_cmp[size]; chroma_cmp= s->dsp.me_cmp[size+1]; @@ -730,17 +725,18 @@ if(256*256*256*64 % (stats[0]+1)==0){ #define MAX_SAB_SIZE 16 static inline int RENAME(sab_diamond_search)(MpegEncContext * s, int *best, int dmin, - Picture *ref_picture, + uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, int const pred_x, int const pred_y, int const penalty_factor, - int const xmin, int const ymin, int const xmax, int const ymax, int const shift, - uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty + int const shift, + uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty ) { me_cmp_func cmp, chroma_cmp; Minima minima[MAX_SAB_SIZE]; const int minima_count= ABS(s->me.dia_size); int i, j; - LOAD_COMMON(s->mb_x*16, s->mb_y*16); + LOAD_COMMON cmp= s->dsp.me_cmp[size]; chroma_cmp= s->dsp.me_cmp[size+1]; @@ -810,15 +806,16 @@ static inline int RENAME(sab_diamond_search)(MpegEncContext * s, int *best, int } static inline int RENAME(var_diamond_search)(MpegEncContext * s, int *best, int dmin, - Picture *ref_picture, + uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, int const pred_x, int const pred_y, int const penalty_factor, - int const xmin, int const ymin, int const xmax, int const ymax, int const shift, - uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty + int const shift, + uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty ) { me_cmp_func cmp, chroma_cmp; int dia_size; - LOAD_COMMON(s->mb_x*16, s->mb_y*16); + LOAD_COMMON cmp= s->dsp.me_cmp[size]; chroma_cmp= s->dsp.me_cmp[size+1]; @@ -886,10 +883,10 @@ if(256*256*256*64 % (stats[0]+1)==0){ return dmin; } -static int RENAME(epzs_motion_search)(MpegEncContext * s, int block, +static int RENAME(epzs_motion_search)(MpegEncContext * s, int *mx_ptr, int *my_ptr, - int P[10][2], int pred_x, int pred_y, - int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], + int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], int ref_mv_scale, uint8_t * const mv_penalty) { int best[2]={0, 0}; @@ -899,10 +896,11 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s, int block, int map_generation; const int penalty_factor= s->me.penalty_factor; const int size=0; - const int ref_mv_stride= s->mb_stride; - const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; + const int h=16; + const int ref_mv_stride= s->mb_stride; //pass as arg FIXME + const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME me_cmp_func cmp, chroma_cmp; - LOAD_COMMON(s->mb_x*16, s->mb_y*16); + LOAD_COMMON cmp= s->dsp.me_cmp[size]; chroma_cmp= s->dsp.me_cmp[size+1]; @@ -973,21 +971,21 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s, int block, //check(best[0],best[1],0, b0) if(s->me.dia_size==-1) - dmin= RENAME(funny_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); else if(s->me.dia_size<-1) - dmin= RENAME(sab_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); else if(s->me.dia_size<2) - dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); else - dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); //check(best[0],best[1],0, b1) *mx_ptr= best[0]; @@ -998,10 +996,11 @@ static int RENAME(epzs_motion_search)(MpegEncContext * s, int block, } #ifndef CMP_DIRECT /* no 4mv search needed in direct mode */ -static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block, +static int RENAME(epzs_motion_search4)(MpegEncContext * s, int *mx_ptr, int *my_ptr, int P[10][2], int pred_x, int pred_y, - int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], + uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], int ref_mv_scale, uint8_t * const mv_penalty) { int best[2]={0, 0}; @@ -1011,10 +1010,11 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block, int map_generation; const int penalty_factor= s->me.penalty_factor; const int size=1; + const int h=8; const int ref_mv_stride= s->mb_stride; const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride; me_cmp_func cmp, chroma_cmp; - LOAD_COMMON((s->mb_x*2 + (block&1))*8, (s->mb_y*2 + (block>>1))*8); + LOAD_COMMON cmp= s->dsp.me_cmp[size]; chroma_cmp= s->dsp.me_cmp[size+1]; @@ -1024,7 +1024,7 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block, dmin = 1000000; //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); /* first line */ - if (s->mb_y == 0 && block<2) { + if (s->mb_y == 0/* && block<2*/) { CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) @@ -1049,21 +1049,100 @@ static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block, } if(s->me.dia_size==-1) - dmin= RENAME(funny_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); else if(s->me.dia_size<-1) - dmin= RENAME(sab_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); else if(s->me.dia_size<2) - dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); else - dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture, - pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, - shift, map, map_generation, size, mv_penalty); + dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); + + + *mx_ptr= best[0]; + *my_ptr= best[1]; + +// printf("%d %d %d \n", best[0], best[1], dmin); + return dmin; +} + +//try to merge with above FIXME (needs PSNR test) +static int RENAME(epzs_motion_search2)(MpegEncContext * s, + int *mx_ptr, int *my_ptr, + int P[10][2], int pred_x, int pred_y, + uint8_t *src_data[3], + uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], + int ref_mv_scale, uint8_t * const mv_penalty) +{ + int best[2]={0, 0}; + int d, dmin; + const int shift= 1+s->quarter_sample; + uint32_t *map= s->me.map; + int map_generation; + const int penalty_factor= s->me.penalty_factor; + const int size=0; //FIXME pass as arg + const int h=8; + const int ref_mv_stride= s->mb_stride; + const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride; + me_cmp_func cmp, chroma_cmp; + LOAD_COMMON + + cmp= s->dsp.me_cmp[size]; + chroma_cmp= s->dsp.me_cmp[size+1]; + + map_generation= update_map_generation(s); + + dmin = 1000000; +//printf("%d %d %d %d //",xmin, ymin, xmax, ymax); + /* first line */ + if (s->mb_y == 0) { + CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) + CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, + (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) + CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) + }else{ + CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) + //FIXME try some early stop + if(dmin>64*2){ + CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) + CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) + CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift) + CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) + CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, + (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) + } + } + if(dmin>64*4){ + CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, + (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16) + CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, + (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) + } + + if(s->me.dia_size==-1) + dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); + else if(s->me.dia_size<-1) + dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); + else if(s->me.dia_size<2) + dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); + else + dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride, + pred_x, pred_y, penalty_factor, + shift, map, map_generation, size, h, mv_penalty); + *mx_ptr= best[0]; *my_ptr= best[1]; diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c index 4cc164de31..81cd9619c1 100644 --- a/libavcodec/mpeg12.c +++ b/libavcodec/mpeg12.c @@ -29,6 +29,9 @@ #include "mpeg12data.h" +//#undef NDEBUG +//#include + /* Start codes. */ #define SEQ_END_CODE 0x000001b7 @@ -476,12 +479,12 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number) } static inline void put_mb_modes(MpegEncContext *s, int n, int bits, - int has_mv) + int has_mv, int field_motion) { put_bits(&s->pb, n, bits); if (!s->frame_pred_frame_dct) { if (has_mv) - put_bits(&s->pb, 2, 2); /* motion_type: frame */ + put_bits(&s->pb, 2, 2 - field_motion); /* motion_type: frame/field */ put_bits(&s->pb, 1, s->interlaced_dct); } } @@ -501,9 +504,9 @@ void mpeg1_encode_mb(MpegEncContext *s, if (s->block_last_index[i] >= 0) cbp |= 1 << (5 - i); } - + if (cbp == 0 && !first_mb && (mb_x != s->mb_width - 1 || (mb_y != s->mb_height - 1 && s->codec_id == CODEC_ID_MPEG1VIDEO)) && - ((s->pict_type == P_TYPE && (motion_x | motion_y) == 0) || + ((s->pict_type == P_TYPE && s->mv_type == MV_TYPE_16X16 && (motion_x | motion_y) == 0) || (s->pict_type == B_TYPE && s->mv_dir == s->last_mv_dir && (((s->mv_dir & MV_DIR_FORWARD) ? ((s->mv[0][0][0] - s->last_mv[0][0][0])|(s->mv[0][0][1] - s->last_mv[0][0][1])) : 0) | ((s->mv_dir & MV_DIR_BACKWARD) ? ((s->mv[1][0][0] - s->last_mv[1][0][0])|(s->mv[1][0][1] - s->last_mv[1][0][1])) : 0)) == 0))) { s->mb_skip_run++; @@ -511,6 +514,10 @@ void mpeg1_encode_mb(MpegEncContext *s, s->skip_count++; s->misc_bits++; s->last_bits++; + if(s->pict_type == P_TYPE){ + s->last_mv[0][1][0]= s->last_mv[0][0][0]= + s->last_mv[0][1][1]= s->last_mv[0][0][1]= 0; + } } else { if(first_mb){ assert(s->mb_skip_run == 0); @@ -521,150 +528,167 @@ void mpeg1_encode_mb(MpegEncContext *s, if (s->pict_type == I_TYPE) { if(s->dquant && cbp){ - put_mb_modes(s, 2, 1, 0); /* macroblock_type : macroblock_quant = 1 */ + put_mb_modes(s, 2, 1, 0, 0); /* macroblock_type : macroblock_quant = 1 */ put_bits(&s->pb, 5, s->qscale); }else{ - put_mb_modes(s, 1, 1, 0); /* macroblock_type : macroblock_quant = 0 */ + put_mb_modes(s, 1, 1, 0, 0); /* macroblock_type : macroblock_quant = 0 */ s->qscale -= s->dquant; } s->misc_bits+= get_bits_diff(s); s->i_count++; } else if (s->mb_intra) { if(s->dquant && cbp){ - put_mb_modes(s, 6, 0x01, 0); + put_mb_modes(s, 6, 0x01, 0, 0); put_bits(&s->pb, 5, s->qscale); }else{ - put_mb_modes(s, 5, 0x03, 0); + put_mb_modes(s, 5, 0x03, 0, 0); s->qscale -= s->dquant; } s->misc_bits+= get_bits_diff(s); s->i_count++; - s->last_mv[0][0][0] = - s->last_mv[0][0][1] = 0; + memset(s->last_mv, 0, sizeof(s->last_mv)); } else if (s->pict_type == P_TYPE) { + if(s->mv_type == MV_TYPE_16X16){ if (cbp != 0) { - if (motion_x == 0 && motion_y == 0) { + if ((motion_x|motion_y) == 0) { if(s->dquant){ - put_mb_modes(s, 5, 1, 0); /* macroblock_pattern & quant */ + put_mb_modes(s, 5, 1, 0, 0); /* macroblock_pattern & quant */ put_bits(&s->pb, 5, s->qscale); }else{ - put_mb_modes(s, 2, 1, 0); /* macroblock_pattern only */ + put_mb_modes(s, 2, 1, 0, 0); /* macroblock_pattern only */ } s->misc_bits+= get_bits_diff(s); - put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); } else { if(s->dquant){ - put_mb_modes(s, 5, 2, 1); /* motion + cbp */ + put_mb_modes(s, 5, 2, 1, 0); /* motion + cbp */ put_bits(&s->pb, 5, s->qscale); }else{ - put_mb_modes(s, 1, 1, 1); /* motion + cbp */ + put_mb_modes(s, 1, 1, 1, 0); /* motion + cbp */ } s->misc_bits+= get_bits_diff(s); mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code); // RAL: f_code parameter added mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code); // RAL: f_code parameter added s->mv_bits+= get_bits_diff(s); - put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); } } else { put_bits(&s->pb, 3, 1); /* motion only */ if (!s->frame_pred_frame_dct) put_bits(&s->pb, 2, 2); /* motion_type: frame */ + s->misc_bits+= get_bits_diff(s); mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code); // RAL: f_code parameter added mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code); // RAL: f_code parameter added s->qscale -= s->dquant; s->mv_bits+= get_bits_diff(s); } - s->f_count++; - } else - { // RAL: All the following bloc added for B frames: - if (cbp != 0) - { // With coded bloc pattern - if (s->mv_dir == (MV_DIR_FORWARD | MV_DIR_BACKWARD)) - { // Bi-directional motion - if (s->dquant) { - put_mb_modes(s, 5, 2, 1); - put_bits(&s->pb, 5, s->qscale); - } else { - put_mb_modes(s, 2, 3, 1); - } - s->misc_bits += get_bits_diff(s); - mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code); - mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); - mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code); - mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); - s->b_count++; - s->f_count++; - s->mv_bits += get_bits_diff(s); - put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); - } - else if (s->mv_dir == MV_DIR_BACKWARD) - { // Backward motion - if (s->dquant) { - put_mb_modes(s, 6, 2, 1); - put_bits(&s->pb, 5, s->qscale); - } else { - put_mb_modes(s, 3, 3, 1); - } - s->misc_bits += get_bits_diff(s); - mpeg1_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code); - mpeg1_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code); - s->b_count++; - s->mv_bits += get_bits_diff(s); - put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); - } - else if (s->mv_dir == MV_DIR_FORWARD) - { // Forward motion - if (s->dquant) { - put_mb_modes(s, 6, 3, 1); - put_bits(&s->pb, 5, s->qscale); - } else { - put_mb_modes(s, 4, 3, 1); - } - s->misc_bits += get_bits_diff(s); - mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code); - mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code); - s->f_count++; - s->mv_bits += get_bits_diff(s); - put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); - } + s->last_mv[0][1][0]= s->last_mv[0][0][0]= motion_x; + s->last_mv[0][1][1]= s->last_mv[0][0][1]= motion_y; + }else{ + assert(!s->frame_pred_frame_dct && s->mv_type == MV_TYPE_FIELD); + + if (cbp) { + if(s->dquant){ + put_mb_modes(s, 5, 2, 1, 1); /* motion + cbp */ + put_bits(&s->pb, 5, s->qscale); + }else{ + put_mb_modes(s, 1, 1, 1, 1); /* motion + cbp */ } - else - { // No coded bloc pattern - if (s->mv_dir == (MV_DIR_FORWARD | MV_DIR_BACKWARD)) - { // Bi-directional motion - put_bits(&s->pb, 2, 2); /* backward & forward motion */ - if (!s->frame_pred_frame_dct) - put_bits(&s->pb, 2, 2); /* motion_type: frame */ - mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code); - mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); - mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code); - mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); - s->b_count++; - s->f_count++; - } - else if (s->mv_dir == MV_DIR_BACKWARD) - { // Backward motion - put_bits(&s->pb, 3, 2); /* backward motion only */ - if (!s->frame_pred_frame_dct) - put_bits(&s->pb, 2, 2); /* motion_type: frame */ - mpeg1_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code); - mpeg1_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code); - s->b_count++; - } - else if (s->mv_dir == MV_DIR_FORWARD) - { // Forward motion - put_bits(&s->pb, 4, 2); /* forward motion only */ - if (!s->frame_pred_frame_dct) - put_bits(&s->pb, 2, 2); /* motion_type: frame */ - mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code); - mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code); - s->f_count++; - } + } else { + put_bits(&s->pb, 3, 1); /* motion only */ + put_bits(&s->pb, 2, 1); /* motion_type: field */ s->qscale -= s->dquant; - s->mv_bits += get_bits_diff(s); - } - // End of bloc from RAL + } + s->misc_bits+= get_bits_diff(s); + for(i=0; i<2; i++){ + put_bits(&s->pb, 1, s->field_select[0][i]); + mpeg1_encode_motion(s, s->mv[0][i][0] - s->last_mv[0][i][0] , s->f_code); + mpeg1_encode_motion(s, s->mv[0][i][1] - (s->last_mv[0][i][1]>>1), s->f_code); + s->last_mv[0][i][0]= s->mv[0][i][0]; + s->last_mv[0][i][1]= 2*s->mv[0][i][1]; + } + s->mv_bits+= get_bits_diff(s); } + if(cbp) + put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); + s->f_count++; + } else{ + static const int mb_type_len[4]={0,3,4,2}; //bak,for,bi + + if(s->mv_type == MV_TYPE_16X16){ + if (cbp){ // With coded bloc pattern + if (s->dquant) { + if(s->mv_dir == MV_DIR_FORWARD) + put_mb_modes(s, 6, 3, 1, 0); + else + put_mb_modes(s, mb_type_len[s->mv_dir]+3, 2, 1, 0); + put_bits(&s->pb, 5, s->qscale); + } else { + put_mb_modes(s, mb_type_len[s->mv_dir], 3, 1, 0); + } + }else{ // No coded bloc pattern + put_bits(&s->pb, mb_type_len[s->mv_dir], 2); + if (!s->frame_pred_frame_dct) + put_bits(&s->pb, 2, 2); /* motion_type: frame */ + s->qscale -= s->dquant; + } + s->misc_bits += get_bits_diff(s); + if (s->mv_dir&MV_DIR_FORWARD){ + mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code); + mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); + s->last_mv[0][0][0]=s->last_mv[0][1][0]= s->mv[0][0][0]; + s->last_mv[0][0][1]=s->last_mv[0][1][1]= s->mv[0][0][1]; + s->f_count++; + } + if (s->mv_dir&MV_DIR_BACKWARD){ + mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code); + mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); + s->last_mv[1][0][0]=s->last_mv[1][1][0]= s->mv[1][0][0]; + s->last_mv[1][0][1]=s->last_mv[1][1][1]= s->mv[1][0][1]; + s->b_count++; + } + }else{ + assert(s->mv_type == MV_TYPE_FIELD); + assert(!s->frame_pred_frame_dct); + if (cbp){ // With coded bloc pattern + if (s->dquant) { + if(s->mv_dir == MV_DIR_FORWARD) + put_mb_modes(s, 6, 3, 1, 1); + else + put_mb_modes(s, mb_type_len[s->mv_dir]+3, 2, 1, 1); + put_bits(&s->pb, 5, s->qscale); + } else { + put_mb_modes(s, mb_type_len[s->mv_dir], 3, 1, 1); + } + }else{ // No coded bloc pattern + put_bits(&s->pb, mb_type_len[s->mv_dir], 2); + put_bits(&s->pb, 2, 1); /* motion_type: field */ + s->qscale -= s->dquant; + } + s->misc_bits += get_bits_diff(s); + if (s->mv_dir&MV_DIR_FORWARD){ + for(i=0; i<2; i++){ + put_bits(&s->pb, 1, s->field_select[0][i]); + mpeg1_encode_motion(s, s->mv[0][i][0] - s->last_mv[0][i][0] , s->f_code); + mpeg1_encode_motion(s, s->mv[0][i][1] - (s->last_mv[0][i][1]>>1), s->f_code); + s->last_mv[0][i][0]= s->mv[0][i][0]; + s->last_mv[0][i][1]= 2*s->mv[0][i][1]; + } + s->f_count++; + } + if (s->mv_dir&MV_DIR_BACKWARD){ + for(i=0; i<2; i++){ + put_bits(&s->pb, 1, s->field_select[1][i]); + mpeg1_encode_motion(s, s->mv[1][i][0] - s->last_mv[1][i][0] , s->b_code); + mpeg1_encode_motion(s, s->mv[1][i][1] - (s->last_mv[1][i][1]>>1), s->b_code); + s->last_mv[1][i][0]= s->mv[1][i][0]; + s->last_mv[1][i][1]= 2*s->mv[1][i][1]; + } + s->b_count++; + } + } + s->mv_bits += get_bits_diff(s); + if(cbp) + put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); + } for(i=0;i<6;i++) { if (cbp & (1 << (5 - i))) { mpeg1_encode_block(s, block[i], i); @@ -676,18 +700,6 @@ void mpeg1_encode_mb(MpegEncContext *s, else s->p_tex_bits+= get_bits_diff(s); } - - // RAL: By this: - if (s->mv_dir & MV_DIR_FORWARD) - { - s->last_mv[0][0][0]= s->mv[0][0][0]; - s->last_mv[0][0][1]= s->mv[0][0][1]; - } - if (s->mv_dir & MV_DIR_BACKWARD) - { - s->last_mv[1][0][0]= s->mv[1][0][0]; - s->last_mv[1][0][1]= s->mv[1][0][1]; - } } // RAL: Parameter added: f_or_b_code @@ -1952,7 +1964,7 @@ static void mpeg_decode_picture_coding_extension(MpegEncContext *s) s->repeat_first_field = get_bits1(&s->gb); s->chroma_420_type = get_bits1(&s->gb); s->progressive_frame = get_bits1(&s->gb); - + if(s->picture_structure == PICT_FRAME) s->first_field=0; else{ @@ -1963,13 +1975,9 @@ static void mpeg_decode_picture_coding_extension(MpegEncContext *s) if(s->alternate_scan){ ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_alternate_vertical_scan); ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_alternate_vertical_scan); - ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_vertical_scan); - ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan); }else{ ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_zigzag_direct); ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_zigzag_direct); - ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan); - ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan); } /* composite display not parsed */ @@ -2103,10 +2111,10 @@ static int mpeg_decode_slice(AVCodecContext *avctx, s->qscale = get_qscale(s); if (s->first_slice && (s->first_field || s->picture_structure==PICT_FRAME)) { if(s->avctx->debug&FF_DEBUG_PICT_INFO){ - av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", + av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", s->qscale, s->mpeg_f_code[0][0],s->mpeg_f_code[0][1],s->mpeg_f_code[1][0],s->mpeg_f_code[1][1], s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), - s->progressive_sequence ? "pro" :"", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"", + s->progressive_sequence ? "ps" :"", s->progressive_frame ? "pf" : "", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"", s->intra_dc_precision, s->picture_structure, s->frame_pred_frame_dct, s->concealment_motion_vectors, s->q_scale_type, s->intra_vlc_format, s->repeat_first_field, s->chroma_420_type ? "420" :""); } diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 1b3be9dbc2..bbb427b558 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -252,8 +252,13 @@ int DCT_common_init(MpegEncContext *s) /* load & permutate scantables note: only wmv uses differnt ones */ - ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_zigzag_direct); - ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_zigzag_direct); + if(s->alternate_scan){ + ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_alternate_vertical_scan); + ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_alternate_vertical_scan); + }else{ + ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_zigzag_direct); + ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_zigzag_direct); + } ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan); ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan); @@ -394,7 +399,7 @@ static void free_picture(MpegEncContext *s, Picture *pic){ /* init common structure for both encoder and decoder */ int MPV_common_init(MpegEncContext *s) { - int y_size, c_size, yc_size, i, mb_array_size, x, y; + int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y; dsputil_init(&s->dsp, s->avctx); DCT_common_init(s); @@ -407,6 +412,7 @@ int MPV_common_init(MpegEncContext *s) s->b8_stride = s->mb_width*2 + 1; s->b4_stride = s->mb_width*4 + 1; mb_array_size= s->mb_height * s->mb_stride; + mv_table_size= (s->mb_height+2) * s->mb_stride + 1; /* set default edge pos, will be overriden in decode_header if needed */ s->h_edge_pos= s->mb_width*16; @@ -458,8 +464,6 @@ int MPV_common_init(MpegEncContext *s) s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed? if (s->encoding) { - int mv_table_size= s->mb_stride * (s->mb_height+2) + 1; - /* Allocate MV tables */ CHECKED_ALLOCZ(s->p_mv_table_base , mv_table_size * 2 * sizeof(int16_t)) CHECKED_ALLOCZ(s->b_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t)) @@ -491,7 +495,7 @@ int MPV_common_init(MpegEncContext *s) CHECKED_ALLOCZ(s->avctx->stats_out, 256); /* Allocate MB type table */ - CHECKED_ALLOCZ(s->mb_type , mb_array_size * sizeof(uint8_t)) //needed for encoding + CHECKED_ALLOCZ(s->mb_type , mb_array_size * sizeof(uint16_t)) //needed for encoding CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int)) @@ -513,10 +517,21 @@ int MPV_common_init(MpegEncContext *s) CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t)) - if(s->codec_id==CODEC_ID_MPEG4){ + if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){ /* interlaced direct mode decoding tables */ - CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t)) - CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t)) + for(i=0; i<2; i++){ + int j, k; + for(j=0; j<2; j++){ + for(k=0; k<2; k++){ + CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k] , mv_table_size * 2 * sizeof(int16_t)) + s->b_field_mv_table[i][j][k] = s->b_field_mv_table_base[i][j][k] + s->mb_stride + 1; + } + CHECKED_ALLOCZ(s->b_field_select_table[i][j] , mb_array_size * 2 * sizeof(uint8_t)) + CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j] , mv_table_size * 2 * sizeof(int16_t)) + s->p_field_mv_table[i][j] = s->p_field_mv_table_base[i][j] + s->mb_stride + 1; + } + CHECKED_ALLOCZ(s->p_field_select_table[i] , mb_array_size * 2 * sizeof(uint8_t)) + } } if (s->out_format == FMT_H263) { /* ac values */ @@ -583,7 +598,7 @@ int MPV_common_init(MpegEncContext *s) /* init common structure for both encoder and decoder */ void MPV_common_end(MpegEncContext *s) { - int i; + int i, j, k; av_freep(&s->parse_context.buffer); s->parse_context.buffer_size=0; @@ -601,6 +616,18 @@ void MPV_common_end(MpegEncContext *s) s->b_bidir_forw_mv_table= NULL; s->b_bidir_back_mv_table= NULL; s->b_direct_mv_table= NULL; + for(i=0; i<2; i++){ + for(j=0; j<2; j++){ + for(k=0; k<2; k++){ + av_freep(&s->b_field_mv_table_base[i][j][k]); + s->b_field_mv_table[i][j][k]=NULL; + } + av_freep(&s->b_field_select_table[i][j]); + av_freep(&s->p_field_mv_table_base[i][j]); + s->p_field_mv_table[i][j]=NULL; + } + av_freep(&s->p_field_select_table[i]); + } av_freep(&s->dc_val[0]); av_freep(&s->ac_val[0]); @@ -618,8 +645,6 @@ void MPV_common_end(MpegEncContext *s) av_freep(&s->tex_pb_buffer); av_freep(&s->pb2_buffer); av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL; - av_freep(&s->field_mv_table); - av_freep(&s->field_select_table); av_freep(&s->avctx->stats_out); av_freep(&s->ac_stats); av_freep(&s->error_status_table); @@ -692,7 +717,7 @@ int MPV_encode_init(AVCodecContext *avctx) s->me_method = avctx->me_method; /* Fixed QSCALE */ - s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE); + s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE); s->adaptive_quant= ( s->avctx->lumi_masking || s->avctx->dark_masking @@ -702,8 +727,9 @@ int MPV_encode_init(AVCodecContext *avctx) || (s->flags&CODEC_FLAG_QP_RD)) && !s->fixed_qscale; - s->obmc= (s->flags & CODEC_FLAG_OBMC); - s->loop_filter= (s->flags & CODEC_FLAG_LOOP_FILTER); + s->obmc= !!(s->flags & CODEC_FLAG_OBMC); + s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER); + s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN); if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){ @@ -934,7 +960,7 @@ int MPV_encode_init(AVCodecContext *avctx) if(s->modified_quant) s->chroma_qscale_table= ff_h263_chroma_qscale_table; s->progressive_frame= - s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT); + s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME)); ff_init_me(s); @@ -1610,7 +1636,7 @@ static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int st for(y=0; ydsp.pix_abs16x16(src + offset, ref + offset, stride); + int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16); int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8; int sae = get_sae(src + offset, mean, stride); @@ -1906,7 +1932,7 @@ int MPV_encode_picture(AVCodecContext *avctx, if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate){ int vbv_delay; - assert(s->repeat_first_field==0 && s->avctx->repeat_pic==0); + assert(s->repeat_first_field==0); vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate); assert(vbv_delay < 0xFFFF); @@ -3300,7 +3326,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) if(s->flags&CODEC_FLAG_INTERLACED_DCT){ int progressive_score, interlaced_score; - + progressive_score= pix_vcmp16x8(ptr, wrap_y ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y ); interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y , wrap_y*2); @@ -3417,12 +3443,12 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) /* pre quantization */ if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){ //FIXME optimize - if(s->dsp.pix_abs8x8(ptr_y , dest_y , wrap_y) < 20*s->qscale) skip_dct[0]= 1; - if(s->dsp.pix_abs8x8(ptr_y + 8, dest_y + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1; - if(s->dsp.pix_abs8x8(ptr_y +dct_offset , dest_y +dct_offset , wrap_y) < 20*s->qscale) skip_dct[2]= 1; - if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1; - if(s->dsp.pix_abs8x8(ptr_cb , dest_cb , wrap_c) < 20*s->qscale) skip_dct[4]= 1; - if(s->dsp.pix_abs8x8(ptr_cr , dest_cr , wrap_c) < 20*s->qscale) skip_dct[5]= 1; + if(s->dsp.sad[1](NULL, ptr_y , dest_y , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1; + if(s->dsp.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1; + if(s->dsp.sad[1](NULL, ptr_y +dct_offset , dest_y +dct_offset , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1; + if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1; + if(s->dsp.sad[1](NULL, ptr_cb , dest_cb , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1; + if(s->dsp.sad[1](NULL, ptr_cr , dest_cr , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1; #if 0 { static int stat[7]; @@ -3484,6 +3510,19 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale; } + //non c quantize code returns incorrect block_last_index FIXME + if(s->alternate_scan && s->dct_quantize != dct_quantize_c){ + for(i=0; i<6; i++){ + int j; + if(s->block_last_index[i]>0){ + for(j=63; j>0; j--){ + if(s->block[i][ s->intra_scantable.permutated[j] ]) break; + } + s->block_last_index[i]= j; + } + } + } + /* huffman encode */ switch(s->codec_id){ //FIXME funct ptr could be slightly faster case CODEC_ID_MPEG1VIDEO: @@ -3724,9 +3763,9 @@ static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, in int x,y; if(w==16 && h==16) - return s->dsp.sse[0](NULL, src1, src2, stride); + return s->dsp.sse[0](NULL, src1, src2, stride, 16); else if(w==8 && h==8) - return s->dsp.sse[1](NULL, src1, src2, stride); + return s->dsp.sse[1](NULL, src1, src2, stride, 8); for(y=0; ymb_y*16 + 16 > s->height) h= s->height- s->mb_y*16; if(w==16 && h==16) - return s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize) - +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize) - +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize); + return s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16) + +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8) + +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8); else return sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize) +sse(s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize) @@ -3759,7 +3798,7 @@ static int sse_mb(MpegEncContext *s){ static void encode_picture(MpegEncContext *s, int picture_number) { int mb_x, mb_y, pdif = 0; - int i; + int i, j; int bits; MpegEncContext best_s, backup_s; uint8_t bit_buf[2][3000]; @@ -3843,7 +3882,8 @@ static void encode_picture(MpegEncContext *s, int picture_number) //FIXME do we need to zero them? memset(s->current_picture.motion_val[0][0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2); memset(s->p_mv_table , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2); - memset(s->mb_type , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height); + for(i=0; imb_stride*s->mb_height; i++) + s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA; if(!s->fixed_qscale){ /* finding spatial complexity for I-frame rate control */ @@ -3868,32 +3908,61 @@ static void encode_picture(MpegEncContext *s, int picture_number) if(s->scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){ s->pict_type= I_TYPE; - memset(s->mb_type , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height); + for(i=0; imb_stride*s->mb_height; i++) + s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA; //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum); } if(!s->umvplus){ if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) { - s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER); - + s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER); + + if(s->flags & CODEC_FLAG_INTERLACED_ME){ + int a,b; + a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select + b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I); + s->f_code= FFMAX(s->f_code, FFMAX(a,b)); + } + ff_fix_long_p_mvs(s); + ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0); + if(s->flags & CODEC_FLAG_INTERLACED_ME){ + for(i=0; i<2; i++){ + for(j=0; j<2; j++) + ff_fix_long_mvs(s, s->p_field_select_table[i], j, + s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0); + } + } } if(s->pict_type==B_TYPE){ int a, b; - a = ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD); - b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, MB_TYPE_BIDIR); + a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD); + b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR); s->f_code = FFMAX(a, b); - a = ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD); - b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, MB_TYPE_BIDIR); + a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD); + b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR); s->b_code = FFMAX(a, b); - ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD); - ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD); - ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR); - ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR); + ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1); + ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1); + ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1); + ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1); + if(s->flags & CODEC_FLAG_INTERLACED_ME){ + int dir; + for(dir=0; dir<2; dir++){ + for(i=0; i<2; i++){ + for(j=0; j<2; j++){ + int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I) + : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I); + ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j, + s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1); + } + } + } + } } } @@ -3990,10 +4059,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->current_picture_ptr->error[i] = 0; } s->mb_skip_run = 0; - s->last_mv[0][0][0] = 0; - s->last_mv[0][0][1] = 0; - s->last_mv[1][0][0] = 0; - s->last_mv[1][0][1] = 0; + memset(s->last_mv, 0, sizeof(s->last_mv)); s->last_mv_dir = 0; @@ -4027,6 +4093,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) int mb_type= s->mb_type[xy]; // int d; int dmin= INT_MAX; + int dir; s->mb_x = mb_x; ff_update_block_index(s); @@ -4134,25 +4201,37 @@ static void encode_picture(MpegEncContext *s, int picture_number) backup_s.tex_pb= s->tex_pb; } - if(mb_type&MB_TYPE_INTER){ + if(mb_type&CANDIDATE_MB_TYPE_INTER){ s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_16X16; s->mb_intra= 0; s->mv[0][0][0] = s->p_mv_table[xy][0]; s->mv[0][0][1] = s->p_mv_table[xy][1]; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb, &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]); } - if(mb_type&MB_TYPE_SKIPED){ + if(mb_type&CANDIDATE_MB_TYPE_INTER_I){ + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(i=0; i<2; i++){ + j= s->field_select[0][i] = s->p_field_select_table[i][xy]; + s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0]; + s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1]; + } + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb, + &dmin, &next_block, 0, 0); + } + if(mb_type&CANDIDATE_MB_TYPE_SKIPED){ s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_16X16; s->mb_intra= 0; s->mv[0][0][0] = 0; s->mv[0][0][1] = 0; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_SKIPED, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPED, pb, pb2, tex_pb, &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]); } - if(mb_type&MB_TYPE_INTER4V){ + if(mb_type&CANDIDATE_MB_TYPE_INTER4V){ s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_8X8; s->mb_intra= 0; @@ -4160,28 +4239,28 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0]; s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1]; } - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb, &dmin, &next_block, 0, 0); } - if(mb_type&MB_TYPE_FORWARD){ + if(mb_type&CANDIDATE_MB_TYPE_FORWARD){ s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_16X16; s->mb_intra= 0; s->mv[0][0][0] = s->b_forw_mv_table[xy][0]; s->mv[0][0][1] = s->b_forw_mv_table[xy][1]; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb, &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]); } - if(mb_type&MB_TYPE_BACKWARD){ + if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){ s->mv_dir = MV_DIR_BACKWARD; s->mv_type = MV_TYPE_16X16; s->mb_intra= 0; s->mv[1][0][0] = s->b_back_mv_table[xy][0]; s->mv[1][0][1] = s->b_back_mv_table[xy][1]; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb, &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]); } - if(mb_type&MB_TYPE_BIDIR){ + if(mb_type&CANDIDATE_MB_TYPE_BIDIR){ s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD; s->mv_type = MV_TYPE_16X16; s->mb_intra= 0; @@ -4189,10 +4268,10 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1]; s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0]; s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1]; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb, &dmin, &next_block, 0, 0); } - if(mb_type&MB_TYPE_DIRECT){ + if(mb_type&CANDIDATE_MB_TYPE_DIRECT){ int mx= s->b_direct_mv_table[xy][0]; int my= s->b_direct_mv_table[xy][1]; @@ -4201,16 +4280,54 @@ static void encode_picture(MpegEncContext *s, int picture_number) #ifdef CONFIG_RISKY ff_mpeg4_set_direct_mv(s, mx, my); #endif - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, &dmin, &next_block, mx, my); } - if(mb_type&MB_TYPE_INTRA){ + if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){ + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(i=0; i<2; i++){ + j= s->field_select[0][i] = s->b_field_select_table[0][i][xy]; + s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0]; + s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1]; + } + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb, + &dmin, &next_block, 0, 0); + } + if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){ + s->mv_dir = MV_DIR_BACKWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(i=0; i<2; i++){ + j= s->field_select[1][i] = s->b_field_select_table[1][i][xy]; + s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0]; + s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1]; + } + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb, + &dmin, &next_block, 0, 0); + } + if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){ + s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(dir=0; dir<2; dir++){ + for(i=0; i<2; i++){ + j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy]; + s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0]; + s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1]; + } + } + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb, + &dmin, &next_block, 0, 0); + } + if(mb_type&CANDIDATE_MB_TYPE_INTRA){ s->mv_dir = 0; s->mv_type = MV_TYPE_16X16; s->mb_intra= 1; s->mv[0][0][0] = 0; s->mv[0][0][1] = 0; - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb, &dmin, &next_block, 0, 0); if(s->h263_pred || s->h263_aic){ if(best_s.mb_intra) @@ -4252,7 +4369,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) } } - encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, + encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]); if(best_s.qscale != qp){ if(s->mb_intra){ @@ -4312,19 +4429,30 @@ static void encode_picture(MpegEncContext *s, int picture_number) // only one MB-Type possible switch(mb_type){ - case MB_TYPE_INTRA: + case CANDIDATE_MB_TYPE_INTRA: s->mv_dir = 0; s->mb_intra= 1; motion_x= s->mv[0][0][0] = 0; motion_y= s->mv[0][0][1] = 0; break; - case MB_TYPE_INTER: + case CANDIDATE_MB_TYPE_INTER: s->mv_dir = MV_DIR_FORWARD; s->mb_intra= 0; motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0]; motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1]; break; - case MB_TYPE_INTER4V: + case CANDIDATE_MB_TYPE_INTER_I: + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(i=0; i<2; i++){ + j= s->field_select[0][i] = s->p_field_select_table[i][xy]; + s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0]; + s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1]; + } + motion_x = motion_y = 0; + break; + case CANDIDATE_MB_TYPE_INTER4V: s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_8X8; s->mb_intra= 0; @@ -4334,7 +4462,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) } motion_x= motion_y= 0; break; - case MB_TYPE_DIRECT: + case CANDIDATE_MB_TYPE_DIRECT: s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; s->mb_intra= 0; motion_x=s->b_direct_mv_table[xy][0]; @@ -4343,7 +4471,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) ff_mpeg4_set_direct_mv(s, motion_x, motion_y); #endif break; - case MB_TYPE_BIDIR: + case CANDIDATE_MB_TYPE_BIDIR: s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD; s->mb_intra= 0; motion_x=0; @@ -4353,19 +4481,54 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0]; s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1]; break; - case MB_TYPE_BACKWARD: + case CANDIDATE_MB_TYPE_BACKWARD: s->mv_dir = MV_DIR_BACKWARD; s->mb_intra= 0; motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0]; motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1]; break; - case MB_TYPE_FORWARD: + case CANDIDATE_MB_TYPE_FORWARD: s->mv_dir = MV_DIR_FORWARD; s->mb_intra= 0; motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0]; motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1]; // printf(" %d %d ", motion_x, motion_y); break; + case CANDIDATE_MB_TYPE_FORWARD_I: + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(i=0; i<2; i++){ + j= s->field_select[0][i] = s->b_field_select_table[0][i][xy]; + s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0]; + s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1]; + } + motion_x=motion_y=0; + break; + case CANDIDATE_MB_TYPE_BACKWARD_I: + s->mv_dir = MV_DIR_BACKWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(i=0; i<2; i++){ + j= s->field_select[1][i] = s->b_field_select_table[1][i][xy]; + s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0]; + s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1]; + } + motion_x=motion_y=0; + break; + case CANDIDATE_MB_TYPE_BIDIR_I: + s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD; + s->mv_type = MV_TYPE_FIELD; + s->mb_intra= 0; + for(dir=0; dir<2; dir++){ + for(i=0; i<2; i++){ + j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy]; + s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0]; + s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1]; + } + } + motion_x=motion_y=0; + break; default: motion_x=motion_y=0; //gcc warning fix av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n"); diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index cf2b5cf258..4bd99e8e92 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -137,6 +137,7 @@ typedef struct Picture{ int16_t (*motion_val_base[2])[2]; int8_t *ref_index[2]; uint32_t *mb_type_base; +#define MB_TYPE_INTRA MB_TYPE_INTRA4x4 //default mb_type if theres just one type #define IS_INTRA4x4(a) ((a)&MB_TYPE_INTRA4x4) #define IS_INTRA16x16(a) ((a)&MB_TYPE_INTRA16x16) #define IS_PCM(a) ((a)&MB_TYPE_INTRA_PCM) @@ -206,23 +207,28 @@ typedef struct MotionEstContext{ int mb_penalty_factor; int pre_pass; ///< = 1 for the pre pass int dia_size; + int xmin; + int xmax; + int ymin; + int ymax; uint8_t (*mv_penalty)[MAX_MV*2+1]; ///< amount of bits needed to encode a MV int (*sub_motion_search)(struct MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, Picture *ref_picture, - int n, int size, uint8_t * const mv_penalty); - int (*motion_search[7])(struct MpegEncContext * s, int block, + int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[6], int stride, int uvstride, + int size, int h, uint8_t * const mv_penalty); + int (*motion_search[7])(struct MpegEncContext * s, int *mx_ptr, int *my_ptr, - int P[10][2], int pred_x, int pred_y, - int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], + int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[6], int stride, int uvstride, int16_t (*last_mv)[2], int ref_mv_scale, uint8_t * const mv_penalty); - int (*pre_motion_search)(struct MpegEncContext * s, int block, + int (*pre_motion_search)(struct MpegEncContext * s, int *mx_ptr, int *my_ptr, - int P[10][2], int pred_x, int pred_y, - int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], + int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[6], int stride, int uvstride, int16_t (*last_mv)[2], int ref_mv_scale, uint8_t * const mv_penalty); - int (*get_mb_score)(struct MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, + int (*get_mb_score)(struct MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3], + uint8_t *ref_data[6], int stride, int uvstride, uint8_t * const mv_penalty); }MotionEstContext; @@ -351,12 +357,18 @@ typedef struct MpegEncContext { int16_t (*b_bidir_forw_mv_table_base)[2]; int16_t (*b_bidir_back_mv_table_base)[2]; int16_t (*b_direct_mv_table_base)[2]; + int16_t (*p_field_mv_table_base[2][2])[2]; + int16_t (*b_field_mv_table_base[2][2][2])[2]; int16_t (*p_mv_table)[2]; ///< MV table (1MV per MB) p-frame encoding int16_t (*b_forw_mv_table)[2]; ///< MV table (1MV per MB) forward mode b-frame encoding int16_t (*b_back_mv_table)[2]; ///< MV table (1MV per MB) backward mode b-frame encoding int16_t (*b_bidir_forw_mv_table)[2]; ///< MV table (1MV per MB) bidir mode b-frame encoding int16_t (*b_bidir_back_mv_table)[2]; ///< MV table (1MV per MB) bidir mode b-frame encoding int16_t (*b_direct_mv_table)[2]; ///< MV table (1MV per MB) direct mode b-frame encoding + int16_t (*p_field_mv_table[2][2])[2]; ///< MV table (2MV per MB) interlaced p-frame encoding + int16_t (*b_field_mv_table[2][2][2])[2];///< MV table (4MV per MB) interlaced b-frame encoding + uint8_t (*p_field_select_table[2]); + uint8_t (*b_field_select_table[2][2]); int me_method; ///< ME algorithm int scene_change_score; int mv_dir; @@ -391,17 +403,22 @@ typedef struct MpegEncContext { int mb_x, mb_y; int mb_skip_run; int mb_intra; - uint8_t *mb_type; ///< Table for MB type FIXME remove and use picture->mb_type -#define MB_TYPE_INTRA 0x01 -#define MB_TYPE_INTER 0x02 -#define MB_TYPE_INTER4V 0x04 -#define MB_TYPE_SKIPED 0x08 + uint16_t *mb_type; ///< Table for candidate MB types for encoding +#define CANDIDATE_MB_TYPE_INTRA 0x01 +#define CANDIDATE_MB_TYPE_INTER 0x02 +#define CANDIDATE_MB_TYPE_INTER4V 0x04 +#define CANDIDATE_MB_TYPE_SKIPED 0x08 //#define MB_TYPE_GMC 0x10 -#define MB_TYPE_DIRECT 0x10 -#define MB_TYPE_FORWARD 0x20 -#define MB_TYPE_BACKWARD 0x40 -#define MB_TYPE_BIDIR 0x80 +#define CANDIDATE_MB_TYPE_DIRECT 0x10 +#define CANDIDATE_MB_TYPE_FORWARD 0x20 +#define CANDIDATE_MB_TYPE_BACKWARD 0x40 +#define CANDIDATE_MB_TYPE_BIDIR 0x80 + +#define CANDIDATE_MB_TYPE_INTER_I 0x100 +#define CANDIDATE_MB_TYPE_FORWARD_I 0x200 +#define CANDIDATE_MB_TYPE_BACKWARD_I 0x400 +#define CANDIDATE_MB_TYPE_BIDIR_I 0x800 int block_index[6]; ///< index to current MB in block based arrays with edges int block_wrap[6]; @@ -551,8 +568,6 @@ typedef struct MpegEncContext { uint8_t *tex_pb_buffer; uint8_t *pb2_buffer; int mpeg_quant; - int16_t (*field_mv_table)[2][2]; ///< used for interlaced b frame decoding - int8_t (*field_select_table)[2]; ///< wtf, no really another table for interlaced b frames int t_frame; ///< time distance of first I -> B, used for interlaced b frames int padding_bug_score; ///< used to detect the VERY common padding bug in MPEG4 @@ -748,7 +763,8 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, int mb_x, int mb_y); int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type); void ff_fix_long_p_mvs(MpegEncContext * s); -void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type); +void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select, + int16_t (*mv_table)[2], int f_code, int type, int truncate); void ff_init_me(MpegEncContext *s); int ff_pre_estimate_p_frame_motion(MpegEncContext * s, int mb_x, int mb_y); diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c index 6354807844..d1a2943656 100644 --- a/libavcodec/ppc/dsputil_altivec.c +++ b/libavcodec/ppc/dsputil_altivec.c @@ -45,7 +45,7 @@ static void sigill_handler (int sig) } #endif /* CONFIG_DARWIN */ -int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) +int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int i; int s __attribute__((aligned(16))); @@ -57,7 +57,7 @@ int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) s = 0; sad = (vector unsigned int)vec_splat_u32(0); - for(i=0;i<16;i++) { + for(i=0;ipix_abs16x16_x2 = pix_abs16x16_x2_altivec; - c->pix_abs16x16_y2 = pix_abs16x16_y2_altivec; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_altivec; - c->pix_abs16x16 = pix_abs16x16_altivec; - c->pix_abs8x8 = pix_abs8x8_altivec; - c->sad[0]= sad16x16_altivec; - c->sad[1]= sad8x8_altivec; + c->pix_abs[0][1] = sad16_x2_altivec; + c->pix_abs[0][2] = sad16_y2_altivec; + c->pix_abs[0][3] = sad16_xy2_altivec; + c->pix_abs[0][0] = sad16_altivec; + c->pix_abs[1][0] = sad8_altivec; + c->sad[0]= sad16_altivec; + c->sad[1]= sad8_altivec; c->pix_norm1 = pix_norm1_altivec; c->sse[1]= sse8_altivec; c->sse[0]= sse16_altivec; diff --git a/libavcodec/ratecontrol.c b/libavcodec/ratecontrol.c index 955290e44b..53d90056c3 100644 --- a/libavcodec/ratecontrol.c +++ b/libavcodec/ratecontrol.c @@ -520,7 +520,7 @@ static void adaptive_quantization(MpegEncContext *s, double q){ if(spat_cplx < 4) spat_cplx= 4; //FIXME finetune if(temp_cplx < 4) temp_cplx= 4; //FIXME finetune - if((s->mb_type[mb_xy]&MB_TYPE_INTRA)){//FIXME hq mode + if((s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTRA)){//FIXME hq mode cplx= spat_cplx; factor= 1.0 + p_masking; }else{ diff --git a/tests/ffmpeg.regression.ref b/tests/ffmpeg.regression.ref index 08afa61a84..4ec3c8f519 100644 --- a/tests/ffmpeg.regression.ref +++ b/tests/ffmpeg.regression.ref @@ -26,21 +26,21 @@ stddev: 8.18 bytes:7602176 920a0a8a0063655d1f34dcaad7857f98 *./data/a-h263p.avi 0eb167c9dfcbeeecbf3debed8af8f811 *./data/out.yuv stddev: 2.08 bytes:7602176 -a8cc41cd5016bbb821e7c2691f5090ea *./data/a-odivx.mp4 -e48114a50ef4cfb4fe2016fa5b34ae4c *./data/out.yuv -stddev: 8.02 bytes:7602176 +66f8b4b5b4f0655cff7bdbc44969cab3 *./data/a-odivx.mp4 +5bd332c77ef45e58b7017e06a0467dd3 *./data/out.yuv +stddev: 7.94 bytes:7602176 5704a082cc5c5970620123ae20566286 *./data/a-huffyuv.avi 799d3db687f6cdd7a837ec156efc171f *./data/out.yuv stddev: 0.00 bytes:7602176 e9f63126859b97cd23cd1413038f8f7b *./data/a-mpeg4-rc.avi 90a159074b1b109569914ee63f387860 *./data/out.yuv stddev: 10.18 bytes:7145472 -b3f1425e266569d5d726b88eadc13dd4 *./data/a-mpeg4-adv.avi -fb61365b22c947adbaeab74478579020 *./data/out.yuv -stddev: 7.31 bytes:7602176 -25ec5ab399fd4db0c8aaea78cb692611 *./data/a-error-mpeg4-adv.avi -bd441fc1e2fb9a3c0bdc9c5f1ed25ef0 *./data/out.yuv -stddev: 13.57 bytes:7602176 +d7d295f97a1e07b633f973d2325880ce *./data/a-mpeg4-adv.avi +612f79510c8098f1421aa154047e2bf2 *./data/out.yuv +stddev: 7.25 bytes:7602176 +f863f4198521bd76930ea33991b47273 *./data/a-error-mpeg4-adv.avi +ba7fcd126c7c9fead5a5de71aaaf0624 *./data/out.yuv +stddev: 16.80 bytes:7602176 328ebd044362116e274739e23c482ee7 *./data/a-mpeg1b.mpg 788a9d500dc8986231a18076fc80fd73 *./data/out.yuv stddev: 10.07 bytes:7145472 diff --git a/tests/regression.sh b/tests/regression.sh index 0a895f6f45..714613a1a4 100755 --- a/tests/regression.sh +++ b/tests/regression.sh @@ -138,7 +138,7 @@ do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst # mpeg2 encoding interlaced file=${outfile}mpeg2i.mpg -do_ffmpeg $file -y -qscale 10 -f pgmyuv -i $raw_src -vcodec mpeg2video -f mpeg1video -interlace $file +do_ffmpeg $file -y -qscale 10 -f pgmyuv -i $raw_src -vcodec mpeg2video -f mpeg1video -ildct $file # mpeg2 decoding do_ffmpeg $raw_dst -y -i $file -f rawvideo $raw_dst diff --git a/tests/rotozoom.regression.ref b/tests/rotozoom.regression.ref index 66bcf4e318..3277747836 100644 --- a/tests/rotozoom.regression.ref +++ b/tests/rotozoom.regression.ref @@ -26,21 +26,21 @@ stddev: 5.41 bytes:7602176 f7828488c31ccb6787367ef4e4a2ad42 *./data/a-h263p.avi 7d39d1f272205a6a231d0e0baf32ff9d *./data/out.yuv stddev: 1.91 bytes:7602176 -f17dc7346f5d1d4307ecf4507f10fcc6 *./data/a-odivx.mp4 -ff7ddb57d9038b94f08c43bae7e1329f *./data/out.yuv -stddev: 5.28 bytes:7602176 +a831828595e5764e6ee30c2d9e548385 *./data/a-odivx.mp4 +ad75d173bd30d642147f00da21df0012 *./data/out.yuv +stddev: 5.27 bytes:7602176 242a7a18c2793e115007bc163861ef4e *./data/a-huffyuv.avi dde5895817ad9d219f79a52d0bdfb001 *./data/out.yuv stddev: 0.00 bytes:7602176 6a469f42ce6946dd4c708f9e51e3da6a *./data/a-mpeg4-rc.avi df9de7134d961119705b4e0cabca1f12 *./data/out.yuv stddev: 4.20 bytes:7145472 -742ffadf3c309d2c4ac888a6a0905bf9 *./data/a-mpeg4-adv.avi -b02f71e91e9368ce94814ab3d74f91ba *./data/out.yuv -stddev: 4.97 bytes:7602176 -f2888ab759ac28aba85a16d3d54b80d0 *./data/a-error-mpeg4-adv.avi -93ab926aad2e658a5bb00c25b7cefdab *./data/out.yuv -stddev: 5.22 bytes:7602176 +483504d060b0bd8ac1acfa3a823c2ad7 *./data/a-mpeg4-adv.avi +08d24bdd7da80cffaf8abaa3e71b1843 *./data/out.yuv +stddev: 4.96 bytes:7602176 +03ff35856faefb4882eaf4d86d95bea7 *./data/a-error-mpeg4-adv.avi +8550acff0851ee915bd5800f1e20f37c *./data/out.yuv +stddev: 9.66 bytes:7602176 671802a2c5078e69f7f422765ea87f2a *./data/a-mpeg1b.mpg d3d5876cef34b728602d5a22eee9249f *./data/out.yuv stddev: 5.93 bytes:7145472