From 9dbcbd92e84b2cd033fa6688935ffaad4b67e64e Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 17 Apr 2002 04:32:12 +0000
Subject: [PATCH] fixed mpeg4 time stuff on encoding mpeg4 b-frame enoding
 support removed old, out-commented ratecontrol reuse motion compensation code
 between encoding & decoding prefix newly added global functions with ff_ to
 reduce namespace polution b-frame ME (unfinished, but working) added some
 comments to mpegvideo.h do MC on encoding only once if possible bugs? ;)

Originally committed as revision 403 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 libavcodec/avcodec.h          |   9 +-
 libavcodec/common.c           |   4 +
 libavcodec/common.h           |   3 +
 libavcodec/dsputil.c          |  24 +
 libavcodec/dsputil.h          |   2 +
 libavcodec/h263.c             | 353 ++++++++++-----
 libavcodec/i386/dsputil_mmx.c |  30 +-
 libavcodec/motion_est.c       | 394 +++++++++++++----
 libavcodec/mpegvideo.c        | 797 ++++++++++++++++++----------------
 libavcodec/mpegvideo.h        | 109 +++--
 10 files changed, 1088 insertions(+), 637 deletions(-)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 07d6d5889c..3356dcfb68 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -77,16 +77,20 @@ extern int motion_estimation_method;
 /* ME algos sorted by quality */
 static const int Motion_Est_QTab[] = { 1, 4, 3, 6, 5, 2 };
 
+#define FF_MAX_B_FRAMES 4
+
 /* encoding support */
 /* note not everything is supported yet */
 
 #define CODEC_FLAG_HQ     0x0001 /* high quality (non real time) encoding */
 #define CODEC_FLAG_QSCALE 0x0002 /* use fixed qscale */
 #define CODEC_FLAG_4MV    0x0004 /* 4 MV per MB allowed */
-#define CODEC_FLAG_B      0x0008 /* use B frames */
 #define CODEC_FLAG_QPEL   0x0010 /* use qpel MC */
 #define CODEC_FLAG_GMC    0x0020 /* use GMC */
 #define CODEC_FLAG_TYPE   0x0040 /* fixed I/P frame type, from avctx->key_frame */
+/* parent program gurantees that the input for b-frame containing streams is not written to 
+   for at least s->max_b_frames+1 frames, if this is not set than the input will be copied */
+#define CODEC_FLAG_INPUT_PRESERVED 0x0100 
 
 /* codec capabilities */
 
@@ -141,7 +145,8 @@ typedef struct AVCodecContext {
     int qmin;         /* min qscale */
     int qmax;         /* max qscale */
     int max_qdiff;    /* max qscale difference between frames */
-    
+    int max_b_frames; /* maximum b frames, the output will be delayed by max_b_frames+1 relative to the input */
+
     struct AVCodec *codec;
     void *priv_data;
 
diff --git a/libavcodec/common.c b/libavcodec/common.c
index 47b23822c9..5224299fda 100644
--- a/libavcodec/common.c
+++ b/libavcodec/common.c
@@ -451,3 +451,7 @@ void free_vlc(VLC *vlc)
     free(vlc->table_codes);
 }
 
+int ff_gcd(int a, int b){
+    if(b) return ff_gcd(b, a%b);
+    else  return a;
+}
diff --git a/libavcodec/common.h b/libavcodec/common.h
index 786f83248f..96b545d0dc 100644
--- a/libavcodec/common.h
+++ b/libavcodec/common.h
@@ -884,4 +884,7 @@ static inline int mid_pred(int a, int b, int c)
 /* memory */
 void *av_mallocz(int size);
 
+/* math */
+int ff_gcd(int a, int b);
+
 #endif
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 0d15b2893a..54779bf008 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -27,6 +27,7 @@
 
 void (*ff_idct)(DCTELEM *block);
 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
+void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
@@ -181,6 +182,28 @@ void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
     }
 }
 
+void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride){
+    DCTELEM *p;
+    int i;
+
+    /* read the pixels */
+    p = block;
+    for(i=0;i<8;i++) {
+        p[0] = s1[0] - s2[0];
+        p[1] = s1[1] - s2[1];
+        p[2] = s1[2] - s2[2];
+        p[3] = s1[3] - s2[3];
+        p[4] = s1[4] - s2[4];
+        p[5] = s1[5] - s2[5];
+        p[6] = s1[6] - s2[6];
+        p[7] = s1[7] - s2[7];
+        s1 += stride;
+        s2 += stride;
+        p += 8;
+    }
+}
+
+
 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
 {
     const DCTELEM *p;
@@ -898,6 +921,7 @@ void dsputil_init(void)
     ff_idct = j_rev_dct;
 #endif
     get_pixels = get_pixels_c;
+    diff_pixels = diff_pixels_c;
     put_pixels_clamped = put_pixels_clamped_c;
     add_pixels_clamped = add_pixels_clamped_c;
     gmc1= gmc1_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index ea6a3d84d2..982fab5f54 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -37,6 +37,7 @@ void dsputil_init(void);
 
 extern void (*ff_idct)(DCTELEM *block);
 extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
+extern void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
 extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
@@ -44,6 +45,7 @@ extern void (*clear_blocks)(DCTELEM *blocks);
 
 
 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
+void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
 void clear_blocks_c(DCTELEM *blocks);
diff --git a/libavcodec/h263.c b/libavcodec/h263.c
index f415e00661..eb922e5b4b 100644
--- a/libavcodec/h263.c
+++ b/libavcodec/h263.c
@@ -36,7 +36,7 @@
 
 static void h263_encode_block(MpegEncContext * s, DCTELEM * block,
 			      int n);
-static void h263_encode_motion(MpegEncContext * s, int val);
+static void h263_encode_motion(MpegEncContext * s, int val, int fcode);
 static void h263p_encode_umotion(MpegEncContext * s, int val);
 static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block,
 			       int n, int dc, UINT8 *scan_table);
@@ -252,78 +252,167 @@ void mpeg4_encode_mb(MpegEncContext * s,
 		    DCTELEM block[6][64],
 		    int motion_x, int motion_y)
 {
-    int cbpc, cbpy, i, cbp, pred_x, pred_y;
+    int cbpc, cbpy, i, pred_x, pred_y;
     int bits;
     
     //    printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
     if (!s->mb_intra) {
         /* compute cbp */
-        cbp = 0;
+        int cbp = 0;
         for (i = 0; i < 6; i++) {
-        if (s->block_last_index[i] >= 0)
-            cbp |= 1 << (5 - i);
+            if (s->block_last_index[i] >= 0)
+                cbp |= 1 << (5 - i);
         }
-        if ((cbp | motion_x | motion_y) == 0 && s->mv_type==MV_TYPE_16X16) {
-            /* skip macroblock */
-            put_bits(&s->pb, 1, 1);
-            s->misc_bits++;
-            s->last_bits++;
-            s->skip_count++;
-            return;
-        }
-        put_bits(&s->pb, 1, 0);	/* mb coded */
-        if(s->mv_type==MV_TYPE_16X16){
-            cbpc = cbp & 3;
-            put_bits(&s->pb,
-                    inter_MCBPC_bits[cbpc],
-                    inter_MCBPC_code[cbpc]);
-            cbpy = cbp >> 2;
-            cbpy ^= 0xf;
-            put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
-                
-            bits= get_bit_count(&s->pb);
-            s->misc_bits+= bits - s->last_bits;
-            s->last_bits=bits;
 
-            /* motion vectors: 16x16 mode */
-            h263_pred_motion(s, 0, &pred_x, &pred_y);
-        
-            h263_encode_motion(s, motion_x - pred_x);
-            h263_encode_motion(s, motion_y - pred_y);
-        }else{
-            cbpc = (cbp & 3)+16;
-            put_bits(&s->pb,
-                    inter_MCBPC_bits[cbpc],
-                    inter_MCBPC_code[cbpc]);
-            cbpy = cbp >> 2;
-            cbpy ^= 0xf;
-            put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
-
-            bits= get_bit_count(&s->pb);
-            s->misc_bits+= bits - s->last_bits;
-            s->last_bits=bits;
-
-            for(i=0; i<4; i++){
-                /* motion vectors: 8x8 mode*/
-                h263_pred_motion(s, i, &pred_x, &pred_y);
-
-                h263_encode_motion(s, s->motion_val[ s->block_index[i] ][0] - pred_x);
-                h263_encode_motion(s, s->motion_val[ s->block_index[i] ][1] - pred_y);
+        if(s->pict_type==B_TYPE){
+            static const int mb_type_table[8]= {-1, 2, 3, 1,-1,-1,-1, 0}; /* convert from mv_dir to type */
+            int mb_type=  mb_type_table[s->mv_dir];
+            
+            if(s->mb_x==0){
+                s->last_mv[0][0][0]= 
+                s->last_mv[0][0][1]= 
+                s->last_mv[1][0][0]= 
+                s->last_mv[1][0][1]= 0;
             }
-        }
-        bits= get_bit_count(&s->pb);
-        s->mv_bits+= bits - s->last_bits;
-        s->last_bits=bits;
 
-        /* encode each block */
-        for (i = 0; i < 6; i++) {
-            mpeg4_encode_block(s, block[i], i, 0, zigzag_direct);
+            /* nothing to do if this MB was skiped in the next P Frame */
+            if(s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]){
+                s->skip_count++;
+                s->mv[0][0][0]= 
+                s->mv[0][0][1]= 
+                s->mv[1][0][0]= 
+                s->mv[1][0][1]= 0;
+//                s->mv_dir= MV_DIR_FORWARD; //doesnt matter
+                return;
+            }
+
+            if ((cbp | motion_x | motion_y | mb_type) ==0) {
+                /* direct MB with MV={0,0} */
+                put_bits(&s->pb, 1, 1); /* mb not coded modb1=1 */
+                s->misc_bits++;
+                s->last_bits++;
+                s->skip_count++;
+                return;
+            }
+            put_bits(&s->pb, 1, 0);	/* mb coded modb1=0 */
+            put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge
+            put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we dont need it :)
+            if(cbp) put_bits(&s->pb, 6, cbp);
+            
+            if(cbp && mb_type)
+                put_bits(&s->pb, 1, 0); /* no q-scale change */
+
+            bits= get_bit_count(&s->pb);
+            s->misc_bits+= bits - s->last_bits;
+            s->last_bits=bits;
+
+            switch(mb_type)
+            {
+            case 0: /* direct */
+                h263_encode_motion(s, motion_x, 1);
+                h263_encode_motion(s, motion_y, 1);                
+                break;
+            case 1: /* bidir */
+                h263_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
+                h263_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
+                h263_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
+                h263_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
+                s->last_mv[0][0][0]= s->mv[0][0][0];
+                s->last_mv[0][0][1]= s->mv[0][0][1];
+                s->last_mv[1][0][0]= s->mv[1][0][0];
+                s->last_mv[1][0][1]= s->mv[1][0][1];
+                break;
+            case 2: /* backward */
+                h263_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code);
+                h263_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code);
+                s->last_mv[1][0][0]= motion_x;
+                s->last_mv[1][0][1]= motion_y;
+                break;
+            case 3: /* forward */
+                h263_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code);
+                h263_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code);
+                s->last_mv[0][0][0]= motion_x;
+                s->last_mv[0][0][1]= motion_y;
+                break;
+            default: 
+                return;
+            }
+            bits= get_bit_count(&s->pb);
+            s->mv_bits+= bits - s->last_bits;
+            s->last_bits=bits;
+
+            /* encode each block */
+            for (i = 0; i < 6; i++) {
+                mpeg4_encode_block(s, block[i], i, 0, zigzag_direct);
+            }
+            bits= get_bit_count(&s->pb);
+            s->p_tex_bits+= bits - s->last_bits;
+            s->last_bits=bits;
+        }else{ /* s->pict_type==B_TYPE */
+            if ((cbp | motion_x | motion_y) == 0 && s->mv_type==MV_TYPE_16X16) {
+                /* skip macroblock */
+                put_bits(&s->pb, 1, 1);
+                s->misc_bits++;
+                s->last_bits++;
+                s->skip_count++;
+                s->mb_skiped=1; // we need that for b-frames
+                return;
+            }
+            put_bits(&s->pb, 1, 0);	/* mb coded */
+            if(s->mv_type==MV_TYPE_16X16){
+                cbpc = cbp & 3;
+                put_bits(&s->pb,
+                        inter_MCBPC_bits[cbpc],
+                        inter_MCBPC_code[cbpc]);
+                cbpy = cbp >> 2;
+                cbpy ^= 0xf;
+                put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+                    
+                bits= get_bit_count(&s->pb);
+                s->misc_bits+= bits - s->last_bits;
+                s->last_bits=bits;
+
+                /* motion vectors: 16x16 mode */
+                h263_pred_motion(s, 0, &pred_x, &pred_y);
+            
+                h263_encode_motion(s, motion_x - pred_x, s->f_code);
+                h263_encode_motion(s, motion_y - pred_y, s->f_code);
+            }else{
+                cbpc = (cbp & 3)+16;
+                put_bits(&s->pb,
+                        inter_MCBPC_bits[cbpc],
+                        inter_MCBPC_code[cbpc]);
+                cbpy = cbp >> 2;
+                cbpy ^= 0xf;
+                put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+
+                bits= get_bit_count(&s->pb);
+                s->misc_bits+= bits - s->last_bits;
+                s->last_bits=bits;
+
+                for(i=0; i<4; i++){
+                    /* motion vectors: 8x8 mode*/
+                    h263_pred_motion(s, i, &pred_x, &pred_y);
+
+                    h263_encode_motion(s, s->motion_val[ s->block_index[i] ][0] - pred_x, s->f_code);
+                    h263_encode_motion(s, s->motion_val[ s->block_index[i] ][1] - pred_y, s->f_code);
+                }
+            }
+            bits= get_bit_count(&s->pb);
+            s->mv_bits+= bits - s->last_bits;
+            s->last_bits=bits;
+
+            /* encode each block */
+            for (i = 0; i < 6; i++) {
+                mpeg4_encode_block(s, block[i], i, 0, zigzag_direct);
+            }
+            bits= get_bit_count(&s->pb);
+            s->p_tex_bits+= bits - s->last_bits;
+            s->last_bits=bits;
+            s->p_count++;
         }
-        bits= get_bit_count(&s->pb);
-        s->p_tex_bits+= bits - s->last_bits;
-        s->last_bits=bits;
-        s->p_count++;
     } else {
+        int cbp;
         int dc_diff[6];   //dc values with the dc prediction subtracted 
         int dir[6];  //prediction direction
         int zigzag_last_index[6];
@@ -427,41 +516,41 @@ void h263_encode_mb(MpegEncContext * s,
     int cbpc, cbpy, i, cbp, pred_x, pred_y;
    
     //    printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
-   if (!s->mb_intra) {
+    if (!s->mb_intra) {
 	   /* compute cbp */
-	   cbp = 0;
-	   for (i = 0; i < 6; i++) {
-	      if (s->block_last_index[i] >= 0)
-		   cbp |= 1 << (5 - i);
-	   }
-	   if ((cbp | motion_x | motion_y) == 0) {
-	      /* skip macroblock */
-	      put_bits(&s->pb, 1, 1);
-	      return;
-	   }
-	   put_bits(&s->pb, 1, 0);	/* mb coded */
-	   cbpc = cbp & 3;
-	   put_bits(&s->pb,
-		inter_MCBPC_bits[cbpc],
-		inter_MCBPC_code[cbpc]);
-	   cbpy = cbp >> 2;
-	   cbpy ^= 0xf;
-	   put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+        cbp = 0;
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 0)
+                cbp |= 1 << (5 - i);
+        }
+        if ((cbp | motion_x | motion_y) == 0) {
+            /* skip macroblock */
+            put_bits(&s->pb, 1, 1);
+            return;
+        }
+        put_bits(&s->pb, 1, 0);    /* mb coded */
+        cbpc = cbp & 3;
+        put_bits(&s->pb,
+                 inter_MCBPC_bits[cbpc],
+                 inter_MCBPC_code[cbpc]);
+        cbpy = cbp >> 2;
+        cbpy ^= 0xf;
+        put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
 
-	   /* motion vectors: 16x16 mode only now */
-      h263_pred_motion(s, 0, &pred_x, &pred_y);
+       /* motion vectors: 16x16 mode only now */
+        h263_pred_motion(s, 0, &pred_x, &pred_y);
       
-      if (!s->umvplus) {  
-         h263_encode_motion(s, motion_x - pred_x);
-         h263_encode_motion(s, motion_y - pred_y);
-      }
-      else {
-         h263p_encode_umotion(s, motion_x - pred_x);
-         h263p_encode_umotion(s, motion_y - pred_y);
-         if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
+        if (!s->umvplus) {  
+            h263_encode_motion(s, motion_x - pred_x, s->f_code);
+            h263_encode_motion(s, motion_y - pred_y, s->f_code);
+        }
+        else {
+            h263p_encode_umotion(s, motion_x - pred_x);
+            h263p_encode_umotion(s, motion_y - pred_y);
+            if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
             /* To prevent Start Code emulation */
-            put_bits(&s->pb,1,1);
-      }
+                put_bits(&s->pb,1,1);
+        }
    } else {
 	/* compute cbp */
 	cbp = 0;
@@ -603,7 +692,7 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block,
     return mot_val;
 }
 
-static void h263_encode_motion(MpegEncContext * s, int val)
+static void h263_encode_motion(MpegEncContext * s, int val, int f_code)
 {
     int range, l, m, bit_size, sign, code, bits;
 
@@ -612,7 +701,7 @@ static void h263_encode_motion(MpegEncContext * s, int val)
         code = 0;
         put_bits(&s->pb, mvtab[code][1], mvtab[code][0]);
     } else {
-        bit_size = s->f_code - 1;
+        bit_size = f_code - 1;
         range = 1 << bit_size;
         /* modulo encoding */
         l = range * 32;
@@ -893,7 +982,11 @@ static void mpeg4_encode_vol_header(MpegEncContext * s)
     put_bits(&s->pb, 1, 0);		/* vol control parameters= no */
     put_bits(&s->pb, 2, RECT_SHAPE);	/* vol shape= rectangle */
     put_bits(&s->pb, 1, 1);		/* marker bit */
-    put_bits(&s->pb, 16, s->time_increment_resolution=30000);
+    
+    s->time_increment_resolution= s->frame_rate/ff_gcd(s->frame_rate, FRAME_RATE_BASE);
+    if(s->time_increment_resolution>=256*256) s->time_increment_resolution= 256*128;
+
+    put_bits(&s->pb, 16, s->time_increment_resolution);
     s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
     if (s->time_increment_bits < 1)
         s->time_increment_bits = 1;
@@ -936,18 +1029,38 @@ static void mpeg4_encode_vol_header(MpegEncContext * s)
 /* write mpeg4 VOP header */
 void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
 {
+    int time_incr;
+    int time_div, time_mod;
+    
     if(s->pict_type==I_TYPE) mpeg4_encode_vol_header(s);
-
+    
+    s->time= s->picture_number*(int64_t)FRAME_RATE_BASE*s->time_increment_resolution/s->frame_rate;
+    time_div= s->time/s->time_increment_resolution;
+    time_mod= s->time%s->time_increment_resolution;
+//printf("num:%d rate:%d base:%d\n", s->picture_number, s->frame_rate, FRAME_RATE_BASE);
+    
     if(get_bit_count(&s->pb)!=0) mpeg4_stuffing(&s->pb);
     put_bits(&s->pb, 16, 0);	        /* vop header */
     put_bits(&s->pb, 16, 0x1B6);	/* vop header */
     put_bits(&s->pb, 2, s->pict_type - 1);	/* pict type: I = 0 , P = 1 */
-    /* XXX: time base + 1 not always correct */
-    put_bits(&s->pb, 1, 1);
+
+    if(s->pict_type==B_TYPE){
+        s->bp_time= s->last_non_b_time - s->time;
+    }else{
+        s->last_time_base= s->time_base;
+        s->time_base= time_div;
+        s->pp_time= s->time - s->last_non_b_time;
+        s->last_non_b_time= s->time;
+    }
+
+    time_incr= time_div - s->last_time_base;
+    while(time_incr--)
+        put_bits(&s->pb, 1, 1);
+        
     put_bits(&s->pb, 1, 0);
 
     put_bits(&s->pb, 1, 1);	/* marker */
-    put_bits(&s->pb, s->time_increment_bits, 1);	/* XXX: correct time increment */
+    put_bits(&s->pb, s->time_increment_bits, time_mod);	/* time increment */
     put_bits(&s->pb, 1, 1);	/* marker */
     put_bits(&s->pb, 1, 1);	/* vop coded */
     if (    s->pict_type == P_TYPE 
@@ -1361,6 +1474,7 @@ static int mpeg4_resync(MpegEncContext *s)
     int mb_num_bits= av_log2(s->mb_num - 1) + 1;
     int header_extension=0, mb_num;
     int c_wrap, c_xy, l_wrap, l_xy;
+    int time_increment;
 //printf("resync at %d %d\n", s->mb_x, s->mb_y);
 //printf("%X\n", show_bits(&s->gb, 24));
 
@@ -1415,14 +1529,16 @@ static int mpeg4_resync(MpegEncContext *s)
             time_incr++;
 
         check_marker(&s->gb, "before time_increment in video packed header");
-        s->time_increment= get_bits(&s->gb, s->time_increment_bits);
+        time_increment= get_bits(&s->gb, s->time_increment_bits);
         if(s->pict_type!=B_TYPE){
+            s->last_time_base= s->time_base;
             s->time_base+= time_incr;
-            s->last_non_b_time[1]= s->last_non_b_time[0];
-            s->last_non_b_time[0]= s->time_base*s->time_increment_resolution + s->time_increment;
+            s->time= s->time_base*s->time_increment_resolution + time_increment;
+            s->pp_time= s->time - s->last_non_b_time;
+            s->last_non_b_time= s->time;
         }else{
-            s->time= (s->last_non_b_time[1]/s->time_increment_resolution + time_incr)*s->time_increment_resolution;
-            s->time+= s->time_increment;
+            s->time= (s->last_time_base + time_incr)*s->time_increment_resolution + time_increment;
+            s->bp_time= s->last_non_b_time - s->time;
         }
         check_marker(&s->gb, "before vop_coding_type in video packed header");
         
@@ -1642,8 +1758,8 @@ int h263_decode_mb(MpegEncContext *s,
         int modb1; // first bit of modb
         int modb2; // second bit of modb
         int mb_type;
-        int time_pp;
-        int time_pb;
+        uint16_t time_pp;
+        uint16_t time_pb;
         int xy;
 
         s->mb_intra = 0; //B-frames never contain intra blocks
@@ -1673,7 +1789,6 @@ int h263_decode_mb(MpegEncContext *s,
 //FIXME is this correct?
 /*            s->last_mv[0][0][0]=
             s->last_mv[0][0][1]=0;*/
-            s->mb_skiped = 1;
             return 0;
         }
 
@@ -1701,14 +1816,14 @@ int h263_decode_mb(MpegEncContext *s,
         mx=my=0; //for case 4, we could put this to the mb_type=4 but than gcc compains about uninitalized mx/my
         switch(mb_type)
         {
-        case 0: 
+        case 0: /* direct */
             mx = h263_decode_motion(s, 0, 1);
             my = h263_decode_motion(s, 0, 1);
-        case 4: 
+        case 4: /* direct with mx=my=0 */
             s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
             xy= s->block_index[0];
-            time_pp= s->last_non_b_time[0] - s->last_non_b_time[1];
-            time_pb= s->time - s->last_non_b_time[1];
+            time_pp= s->pp_time;
+            time_pb= time_pp - s->bp_time;
 //if(time_pp>3000 )printf("%d %d  ", time_pp, time_pb);
             //FIXME 4MV
             //FIXME avoid divides
@@ -2397,6 +2512,7 @@ printf("%d %d\n", s->sprite_delta[1][1][1], a<<s->sprite_shift[1][1]);*/
 int mpeg4_decode_picture_header(MpegEncContext * s)
 {
     int time_incr, startcode, state, v;
+    int time_increment;
 
  redo:
     /* search next start code */
@@ -2630,20 +2746,23 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
     }
 
     s->pict_type = get_bits(&s->gb, 2) + 1;	/* pict type: I = 0 , P = 1 */
-//printf("pic: %d, qpel:%d\n", s->pict_type, s->quarter_sample); 
+// printf("pic: %d, qpel:%d\n", s->pict_type, s->quarter_sample); 
     time_incr=0;
     while (get_bits1(&s->gb) != 0) 
         time_incr++;
 
     check_marker(&s->gb, "before time_increment");
-    s->time_increment= get_bits(&s->gb, s->time_increment_bits);
+    time_increment= get_bits(&s->gb, s->time_increment_bits);
+//printf(" type:%d incr:%d increment:%d\n", s->pict_type, time_incr, time_increment);
     if(s->pict_type!=B_TYPE){
+        s->last_time_base= s->time_base;
         s->time_base+= time_incr;
-        s->last_non_b_time[1]= s->last_non_b_time[0];
-        s->last_non_b_time[0]= s->time_base*s->time_increment_resolution + s->time_increment;
+        s->time= s->time_base*s->time_increment_resolution + time_increment;
+        s->pp_time= s->time - s->last_non_b_time;
+        s->last_non_b_time= s->time;
     }else{
-        s->time= (s->last_non_b_time[1]/s->time_increment_resolution + time_incr)*s->time_increment_resolution;
-        s->time+= s->time_increment;
+        s->time= (s->last_time_base + time_incr)*s->time_increment_resolution + time_increment;
+        s->bp_time= s->last_non_b_time - s->time;
     }
 
     if(check_marker(&s->gb, "before vop_coded")==0 && s->picture_number==0){
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index bf729d9638..da86eba480 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -44,7 +44,6 @@ int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
 int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
 int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
 
-
 /* external functions, from idct_mmx.c */
 void ff_mmx_idct(DCTELEM *block);
 void ff_mmxext_idct(DCTELEM *block);
@@ -136,6 +135,34 @@ static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size)
     }
 }
 
+static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride)
+{
+    asm volatile(
+        ".balign 16		\n\t"
+        "movl $-128, %%eax	\n\t"
+        "1:			\n\t"
+        "movq (%0), %%mm0	\n\t"
+        "movq (%1), %%mm2	\n\t"
+        "movq %%mm0, %%mm1	\n\t"
+        "movq %%mm2, %%mm3	\n\t"
+        "punpcklbw %%mm7, %%mm0	\n\t"
+        "punpckhbw %%mm7, %%mm1	\n\t"
+        "punpcklbw %%mm7, %%mm2	\n\t"
+        "punpckhbw %%mm7, %%mm3	\n\t"
+        "psubw %%mm2, %%mm0	\n\t"
+        "psubw %%mm3, %%mm1	\n\t"
+        "movq %%mm0, (%2, %%eax)\n\t"
+        "movq %%mm1, 8(%2, %%eax)\n\t"
+        "addl %3, %0		\n\t"
+        "addl %3, %1		\n\t"
+        "addl $16, %%eax	\n\t"
+        "jnz 1b			\n\t"
+        : "+r" (s1), "+r" (s2)
+        : "r" (block+64), "r" (stride)
+        : "%eax"
+    );
+}
+
 static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
 {
     const DCTELEM *p;
@@ -1064,6 +1091,7 @@ void dsputil_init_mmx(void)
 
     if (mm_flags & MM_MMX) {
         get_pixels = get_pixels_mmx;
+        diff_pixels = diff_pixels_mmx;
         put_pixels_clamped = put_pixels_clamped_mmx;
         add_pixels_clamped = add_pixels_clamped_mmx;
         clear_blocks= clear_blocks_mmx;
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index c378e3658b..7fc0cec18c 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -32,10 +32,7 @@
 static void halfpel_motion_search(MpegEncContext * s,
 				  int *mx_ptr, int *my_ptr, int dmin,
 				  int xmin, int ymin, int xmax, int ymax,
-                                  int pred_x, int pred_y);
-
-/* config it to test motion vector encoding (send random vectors) */
-//#define CONFIG_TEST_MV_ENCODE
+                                  int pred_x, int pred_y, uint8_t *ref_picture);
 
 static int pix_sum(UINT8 * pix, int line_size)
 {
@@ -138,7 +135,7 @@ static void no_motion_search(MpegEncContext * s,
 
 static int full_motion_search(MpegEncContext * s,
                               int *mx_ptr, int *my_ptr, int range,
-                              int xmin, int ymin, int xmax, int ymax)
+                              int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
 {
     int x1, y1, x2, y2, xx, yy, x, y;
     int mx, my, dmin, d;
@@ -164,7 +161,7 @@ static int full_motion_search(MpegEncContext * s,
     my = 0;
     for (y = y1; y <= y2; y++) {
 	for (x = x1; x <= x2; x++) {
-	    d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x,
+	    d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x,
 			     s->linesize);
 	    if (d < dmin ||
 		(d == dmin &&
@@ -192,7 +189,7 @@ static int full_motion_search(MpegEncContext * s,
 
 static int log_motion_search(MpegEncContext * s,
                              int *mx_ptr, int *my_ptr, int range,
-                             int xmin, int ymin, int xmax, int ymax)
+                             int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
 {
     int x1, y1, x2, y2, xx, yy, x, y;
     int mx, my, dmin, d;
@@ -229,7 +226,7 @@ static int log_motion_search(MpegEncContext * s,
     do {
 	for (y = y1; y <= y2; y += range) {
 	    for (x = x1; x <= x2; x += range) {
-		d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize);
+		d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
 		if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
 		    dmin = d;
 		    mx = x;
@@ -268,7 +265,7 @@ static int log_motion_search(MpegEncContext * s,
 
 static int phods_motion_search(MpegEncContext * s,
                                int *mx_ptr, int *my_ptr, int range,
-                               int xmin, int ymin, int xmax, int ymax)
+                               int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
 {
     int x1, y1, x2, y2, xx, yy, x, y, lastx, d;
     int mx, my, dminx, dminy;
@@ -309,7 +306,7 @@ static int phods_motion_search(MpegEncContext * s,
 
 	lastx = x;
 	for (x = x1; x <= x2; x += range) {
-	    d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize);
+	    d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
 	    if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
 		dminx = d;
 		mx = x;
@@ -318,7 +315,7 @@ static int phods_motion_search(MpegEncContext * s,
 
 	x = lastx;
 	for (y = y1; y <= y2; y += range) {
-	    d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize);
+	    d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
 	    if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
 		dminy = d;
 		my = y;
@@ -554,7 +551,7 @@ if(256*256*256*64%point==0)
 static int epzs_motion_search(MpegEncContext * s,
                              int *mx_ptr, int *my_ptr,
                              int P[5][2], int pred_x, int pred_y,
-                             int xmin, int ymin, int xmax, int ymax)
+                             int xmin, int ymin, int xmax, int ymax, uint8_t * ref_picture)
 {
     int best[2]={0, 0};
     int d, dmin; 
@@ -566,7 +563,7 @@ static int epzs_motion_search(MpegEncContext * s,
     const int shift= 1+s->quarter_sample;
 
     new_pic = s->new_picture[0] + pic_xy;
-    old_pic = s->last_picture[0] + pic_xy;
+    old_pic = ref_picture + pic_xy;
    
     dmin = pix_abs16x16(new_pic, old_pic, pic_stride);
     if(dmin<Z_THRESHOLD){
@@ -611,7 +608,7 @@ static int epzs_motion_search(MpegEncContext * s,
 static int epzs_motion_search4(MpegEncContext * s, int block,
                              int *mx_ptr, int *my_ptr,
                              int P[6][2], int pred_x, int pred_y,
-                             int xmin, int ymin, int xmax, int ymax)
+                             int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
 {
     int best[2]={0, 0};
     int d, dmin; 
@@ -623,7 +620,7 @@ static int epzs_motion_search4(MpegEncContext * s, int block,
     const int shift= 1+s->quarter_sample;
 
     new_pic = s->new_picture[0] + pic_xy;
-    old_pic = s->last_picture[0] + pic_xy;
+    old_pic = ref_picture + pic_xy;
    
     dmin = pix_abs8x8(new_pic, old_pic, pic_stride);
 
@@ -679,7 +676,7 @@ static int epzs_motion_search4(MpegEncContext * s, int block,
 static inline void halfpel_motion_search(MpegEncContext * s,
 				  int *mx_ptr, int *my_ptr, int dmin,
 				  int xmin, int ymin, int xmax, int ymax,
-                                  int pred_x, int pred_y)
+                                  int pred_x, int pred_y, uint8_t *ref_picture)
 {
     UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
     const int quant= s->qscale;
@@ -689,7 +686,7 @@ static inline void halfpel_motion_search(MpegEncContext * s,
 
     mx = *mx_ptr;
     my = *my_ptr;
-    ptr = s->last_picture[0] + (my * s->linesize) + mx;
+    ptr = ref_picture + (my * s->linesize) + mx;
 
     xx = 16 * s->mb_x;
     yy = 16 * s->mb_y;
@@ -735,7 +732,8 @@ static inline void halfpel_motion_search(MpegEncContext * s,
 static inline void halfpel_motion_search4(MpegEncContext * s,
 				  int *mx_ptr, int *my_ptr, int dmin,
 				  int xmin, int ymin, int xmax, int ymax,
-                                  int pred_x, int pred_y, int block_x, int block_y)
+                                  int pred_x, int pred_y, int block_x, int block_y,
+                                  uint8_t *ref_picture)
 {
     UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
     const int quant= s->qscale;
@@ -749,7 +747,7 @@ static inline void halfpel_motion_search4(MpegEncContext * s,
     
     mx = *mx_ptr;
     my = *my_ptr;
-    ptr = s->last_picture[0] + ((yy+my) * s->linesize) + xx + mx;
+    ptr = ref_picture + ((yy+my) * s->linesize) + xx + mx;
 
     dminh = dmin;
 
@@ -788,12 +786,12 @@ static inline void halfpel_motion_search4(MpegEncContext * s,
     *my_ptr = my;
 }
 
-static inline void set_mv_tables(MpegEncContext * s, int mx, int my)
+static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my)
 {
-    const int xy= s->mb_x + s->mb_y*s->mb_width;
+    const int xy= s->mb_x + 1 + (s->mb_y + 1)*(s->mb_width + 2);
     
-    s->mv_table[0][xy] = mx;
-    s->mv_table[1][xy] = my;
+    s->p_mv_table[xy][0] = mx;
+    s->p_mv_table[xy][1] = my;
 
     /* has allready been set to the 4 MV if 4MV is done */
     if(!(s->flags&CODEC_FLAG_4MV)){
@@ -812,10 +810,37 @@ static inline void set_mv_tables(MpegEncContext * s, int mx, int my)
     }
 }
 
-#ifndef CONFIG_TEST_MV_ENCODE
+static inline void get_limits(MpegEncContext *s, int *range, int *xmin, int *ymin, int *xmax, int *ymax, int f_code)
+{
+    *range = 8 * (1 << (f_code - 1));
+    /* XXX: temporary kludge to avoid overflow for msmpeg4 */
+    if (s->out_format == FMT_H263 && !s->h263_msmpeg4)
+	*range *= 2;
 
-void estimate_motion(MpegEncContext * s,
-		    int mb_x, int mb_y)
+    if (s->unrestricted_mv) {
+        *xmin = -16;
+        *ymin = -16;
+        if (s->h263_plus)
+            *range *= 2;
+        if(s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MPEG4){
+            *xmax = s->mb_width*16;
+            *ymax = s->mb_height*16;
+        }else {
+            /* XXX: dunno if this is correct but ffmpeg4 decoder wont like it otherwise 
+	            (cuz the drawn edge isnt large enough))*/
+            *xmax = s->width;
+            *ymax = s->height;
+        }
+    } else {
+        *xmin = 0;
+        *ymin = 0;
+        *xmax = s->mb_width*16 - 16;
+        *ymax = s->mb_height*16 - 16;
+    }
+}
+
+void ff_estimate_p_frame_motion(MpegEncContext * s,
+                                int mb_x, int mb_y)
 {
     UINT8 *pix, *ppix;
     int sum, varc, vard, mx, my, range, dmin, xx, yy;
@@ -825,32 +850,10 @@ void estimate_motion(MpegEncContext * s,
     int P[6][2];
     const int shift= 1+s->quarter_sample;
     int mb_type=0;
-    //static int skip=0;    
-    range = 8 * (1 << (s->f_code - 1));
-    /* XXX: temporary kludge to avoid overflow for msmpeg4 */
-    if (s->out_format == FMT_H263 && !s->h263_msmpeg4)
-	range = range * 2;
+    uint8_t *ref_picture= s->last_picture[0];
+
+    get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code);
 
-    if (s->unrestricted_mv) {
-        xmin = -16;
-        ymin = -16;
-        if (s->h263_plus)
-            range *= 2;
-        if(s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MPEG4){
-            xmax = s->mb_width*16;
-            ymax = s->mb_height*16;
-        }else {
-            /* XXX: dunno if this is correct but ffmpeg4 decoder wont like it otherwise 
-	            (cuz the drawn edge isnt large enough))*/
-            xmax = s->width;
-            ymax = s->height;
-        }
-    } else {
-        xmin = 0;
-        ymin = 0;
-        xmax = s->mb_width*16 - 16;
-        ymax = s->mb_height*16 - 16;
-    }
     switch(s->me_method) {
     case ME_ZERO:
     default:
@@ -858,13 +861,13 @@ void estimate_motion(MpegEncContext * s,
         dmin = 0;
         break;
     case ME_FULL:
-	dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax);
+	dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
         break;
     case ME_LOG:
-	dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax);
+	dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
         break;
     case ME_PHODS:
-	dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax);
+	dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
         break;
     case ME_X1:
     case ME_EPZS:
@@ -907,7 +910,7 @@ void estimate_motion(MpegEncContext * s,
                 pred_y= P[1][1];
             }
         }
-        dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax);
+        dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture);
  
         mx+= mb_x*16;
         my+= mb_y*16;
@@ -967,10 +970,10 @@ void estimate_motion(MpegEncContext * s,
             P[5][0]= mx - mb_x*16;
             P[5][1]= my - mb_y*16;
 
-            dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4);
+            dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture);
 
             halfpel_motion_search4(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 
-                                   pred_x4, pred_y4, block_x, block_y);
+                                   pred_x4, pred_y4, block_x, block_y, ref_picture);
      
             s->motion_val[ s->block_index[block] ][0]= mx4;
             s->motion_val[ s->block_index[block] ][1]= my4;
@@ -983,7 +986,7 @@ void estimate_motion(MpegEncContext * s,
 
     pix = s->new_picture[0] + (yy * s->linesize) + xx;
     /* At this point (mx,my) are full-pell and the absolute displacement */
-    ppix = s->last_picture[0] + (my * s->linesize) + mx;
+    ppix = ref_picture + (my * s->linesize) + mx;
     
     sum = pix_sum(pix, s->linesize);
 #if 0
@@ -1009,7 +1012,7 @@ void estimate_motion(MpegEncContext * s,
             mb_type|= MB_TYPE_INTRA;
         if (varc*2 + 200 > vard){
             mb_type|= MB_TYPE_INTER;
-            halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y);
+            halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y, ref_picture);
         }else{
             mx = mx*2 - mb_x*32;
             my = my*2 - mb_y*32;
@@ -1018,7 +1021,7 @@ void estimate_motion(MpegEncContext * s,
         if (vard <= 64 || vard < varc) {
             mb_type|= MB_TYPE_INTER;
             if (s->me_method != ME_ZERO) {
-                halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y);
+                halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y, ref_picture);
             } else {
                 mx -= 16 * mb_x;
                 my -= 16 * mb_y;
@@ -1038,46 +1041,249 @@ void estimate_motion(MpegEncContext * s,
     }
 
     s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
-    set_mv_tables(s, mx, my);
+    set_p_mv_tables(s, mx, my);
 }
 
-#else
-
-/* test version which generates valid random vectors */
-int estimate_motion(MpegEncContext * s,
-		    int mb_x, int mb_y,
-		    int *mx_ptr, int *my_ptr)
+void ff_estimate_motion_b(MpegEncContext * s,
+                       int mb_x, int mb_y, int16_t (*mv_table)[2], uint8_t *ref_picture, int f_code)
 {
-    int xx, yy, x1, y1, x2, y2, range;
+    UINT8 *pix, *ppix;
+    int sum, varc, vard, mx, my, range, dmin, xx, yy;
+    int xmin, ymin, xmax, ymax;
+    int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
+    int pred_x=0, pred_y=0;
+    int P[6][2];
+    const int shift= 1+s->quarter_sample;
+    int mb_type=0;
+    const int mot_stride = s->mb_width + 2;
+    const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
+    
+    get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code);
 
-    if ((random() % 10) >= 5) {
-	range = 8 * (1 << (s->f_code - 1));
-	if (s->out_format == FMT_H263 && !s->h263_msmpeg4)
-	    range = range * 2;
+    switch(s->me_method) {
+    case ME_ZERO:
+    default:
+	no_motion_search(s, &mx, &my);
+        dmin = 0;
+        break;
+    case ME_FULL:
+	dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
+        break;
+    case ME_LOG:
+	dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
+        break;
+    case ME_PHODS:
+	dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
+        break;
+    case ME_X1:
+    case ME_EPZS:
+       {
 
-	xx = 16 * s->mb_x;
-	yy = 16 * s->mb_y;
-	x1 = xx - range;
-	if (x1 < 0)
-	    x1 = 0;
-	x2 = xx + range - 1;
-	if (x2 > (s->width - 16))
-	    x2 = s->width - 16;
-	y1 = yy - range;
-	if (y1 < 0)
-	    y1 = 0;
-	y2 = yy + range - 1;
-	if (y2 > (s->height - 16))
-	    y2 = s->height - 16;
+            rel_xmin= xmin - mb_x*16;
+            rel_xmax= xmax - mb_x*16;
+            rel_ymin= ymin - mb_y*16;
+            rel_ymax= ymax - mb_y*16;
 
-	*mx_ptr = (random() % (2 * (x2 - x1 + 1))) + 2 * (x1 - xx);
-	*my_ptr = (random() % (2 * (y2 - y1 + 1))) + 2 * (y1 - yy);
-	return 0;
-    } else {
-	*mx_ptr = 0;
-	*my_ptr = 0;
-	return 1;
+            P[0][0] = mv_table[mot_xy    ][0];
+            P[0][1] = mv_table[mot_xy    ][1];
+            P[1][0] = mv_table[mot_xy - 1][0];
+            P[1][1] = mv_table[mot_xy - 1][1];
+            if(P[1][0] > (rel_xmax<<shift)) P[1][0]= (rel_xmax<<shift);
+
+            /* special case for first line */
+            if ((mb_y == 0 || s->first_slice_line || s->first_gob_line)) {
+                P[4][0] = P[1][0];
+                P[4][1] = P[1][1];
+            } else {
+                P[2][0] = mv_table[mot_xy - mot_stride             ][0];
+                P[2][1] = mv_table[mot_xy - mot_stride             ][1];
+                P[3][0] = mv_table[mot_xy - mot_stride + 1         ][0];
+                P[3][1] = mv_table[mot_xy - mot_stride + 1         ][1];
+                if(P[2][1] > (rel_ymax<<shift)) P[2][1]= (rel_ymax<<shift);
+                if(P[3][0] < (rel_xmin<<shift)) P[3][0]= (rel_xmin<<shift);
+                if(P[3][1] > (rel_ymax<<shift)) P[3][1]= (rel_ymax<<shift);
+        
+                P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]);
+                P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]);
+            }
+            pred_x= P[1][0];
+            pred_y= P[1][1];
+        }
+        dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture);
+ 
+        mx+= mb_x*16;
+        my+= mb_y*16;
+        break;
+    }
+    
+    /* intra / predictive decision */
+//    xx = mb_x * 16;
+//    yy = mb_y * 16;
+
+//    pix = s->new_picture[0] + (yy * s->linesize) + xx;
+    /* At this point (mx,my) are full-pell and the absolute displacement */
+//    ppix = ref_picture + (my * s->linesize) + mx;
+    
+    halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y, ref_picture);
+
+//    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
+    mv_table[mot_xy][0]= mx;
+    mv_table[mot_xy][1]= my;
+}
+
+
+int ff_decide_type(MpegEncContext * s,
+                int mb_x, int mb_y)
+{
+
+}
+
+void ff_estimate_b_frame_motion(MpegEncContext * s,
+                             int mb_x, int mb_y)
+{
+    const int mot_stride = s->mb_width + 2;
+    const int xy = (mb_y + 1)*mot_stride + mb_x + 1;
+
+    ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, s->last_picture[0], s->f_code);
+    ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, s->next_picture[0], s->b_code);
+//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
+    s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
+    s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
+    s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
+    s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
+    
+    s->mb_type[mb_y*s->mb_width + mb_x]= MB_TYPE_FORWARD; //FIXME
+}
+
+/* find best f_code for ME which do unlimited searches */
+int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
+{
+    int f_code;
+
+    if(s->me_method>=ME_EPZS){
+        int mv_num[8];
+        int i, y;
+        int loose=0;
+        UINT8 * fcode_tab= s->fcode_tab;
+
+        for(i=0; i<8; i++) mv_num[i]=0;
+
+        for(y=0; y<s->mb_height; y++){
+            int x;
+            int xy= (y+1)* (s->mb_width+2) + 1;
+            i= y*s->mb_width;
+            for(x=0; x<s->mb_width; x++){
+                if(s->mb_type[i] & type){
+                    mv_num[ fcode_tab[mv_table[xy][0] + MAX_MV] ]++;
+                    mv_num[ fcode_tab[mv_table[xy][1] + MAX_MV] ]++;
+//printf("%d %d %d\n", s->mv_table[0][i], fcode_tab[s->mv_table[0][i] + MAX_MV], i);
+                }
+                i++;
+                xy++;
+            }
+        }
+
+        for(i=MAX_FCODE; i>1; i--){
+            loose+= mv_num[i];
+            if(loose > s->mb_num/20) break; //FIXME this is pretty ineffective
+        }
+//    printf("fcode: %d type: %d\n", i, s->pict_type);
+        return i;
+/*        for(i=0; i<=MAX_FCODE; i++){
+            printf("%d ", mv_num[i]);
+        }
+        printf("\n");*/
+    }else{
+        return 1;
     }
 }
 
-#endif
+void ff_fix_long_p_mvs(MpegEncContext * s)
+{
+    const int f_code= s->f_code;
+    int y;
+    UINT8 * fcode_tab= s->fcode_tab;
+
+    /* clip / convert to intra 16x16 type MVs */
+    for(y=0; y<s->mb_height; y++){
+        int x;
+        int xy= (y+1)* (s->mb_width+2)+1;
+        int i= y*s->mb_width;
+        for(x=0; x<s->mb_width; x++){
+            if(s->mb_type[i]&MB_TYPE_INTER){
+                if(   fcode_tab[s->p_mv_table[xy][0] + MAX_MV] > f_code
+                   || fcode_tab[s->p_mv_table[xy][0] + MAX_MV] == 0
+                   || fcode_tab[s->p_mv_table[xy][1] + MAX_MV] > f_code
+                   || fcode_tab[s->p_mv_table[xy][1] + MAX_MV] == 0 ){
+                    s->mb_type[i] &= ~MB_TYPE_INTER;
+                    s->mb_type[i] |= MB_TYPE_INTRA;
+                    s->p_mv_table[xy][0] = 0;
+                    s->p_mv_table[xy][1] = 0;
+                }
+            }
+            xy++;
+            i++;
+        }
+    }
+
+    if(s->flags&CODEC_FLAG_4MV){
+        const int wrap= 2+ s->mb_width*2;
+
+        /* clip / convert to intra 8x8 type MVs */
+        for(y=0; y<s->mb_height; y++){
+            int xy= (y*2 + 1)*wrap + 1;
+            int i= y*s->mb_width;
+            int x;
+
+            for(x=0; x<s->mb_width; x++){
+                if(s->mb_type[i]&MB_TYPE_INTER4V){
+                    int block;
+                    for(block=0; block<4; block++){
+                        int off= (block& 1) + (block>>1)*wrap;
+                        int mx= s->motion_val[ xy + off ][0];
+                        int my= s->motion_val[ xy + off ][1];
+
+                        if(   fcode_tab[mx + MAX_MV] > f_code
+                           || fcode_tab[mx + MAX_MV] == 0
+                           || fcode_tab[my + MAX_MV] > f_code
+                           || fcode_tab[my + MAX_MV] == 0 ){
+                            s->mb_type[i] &= ~MB_TYPE_INTER4V;
+                            s->mb_type[i] |= MB_TYPE_INTRA;
+                        }
+                    }
+                    xy+=2;
+                    i++;
+                }
+            }
+        }
+    }
+}
+
+void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type)
+{
+    int y;
+    UINT8 * fcode_tab= s->fcode_tab;
+
+    /* clip / convert to intra 16x16 type MVs */
+    for(y=0; y<s->mb_height; y++){
+        int x;
+        int xy= (y+1)* (s->mb_width+2)+1;
+        int i= y*s->mb_width;
+        for(x=0; x<s->mb_width; x++){
+            if(s->mb_type[i]&type){
+                if(   fcode_tab[mv_table[xy][0] + MAX_MV] > f_code
+                   || fcode_tab[mv_table[xy][0] + MAX_MV] == 0
+                   || fcode_tab[mv_table[xy][1] + MAX_MV] > f_code
+                   || fcode_tab[mv_table[xy][1] + MAX_MV] == 0 ){
+                    s->mb_type[i] &= ~type;
+                    if(s->mb_type[i]==0) s->mb_type[i]= MB_TYPE_FORWARD; //FIXME 
+                    mv_table[xy][0] = 0;
+                    mv_table[xy][1] = 0;
+                    //this is certainly bad FIXME
+                }
+            }
+            xy++;
+            i++;
+        }
+    }
+}
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 1f92ffe8d8..3b3d76cd0e 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -148,7 +148,7 @@ int MPV_common_init(MpegEncContext *s)
             goto fail;
         s->next_picture_base[i] = pict;
         s->next_picture[i] = pict + pict_start;
-
+        
         if (s->has_b_frames) {
             pict = av_mallocz(c_size);
             if (pict == NULL) 
@@ -159,6 +159,9 @@ int MPV_common_init(MpegEncContext *s)
     }
     
     if (s->encoding) {
+        int j;
+        int mv_table_size= (s->mb_width+2)*(s->mb_height+2);
+
         /* Allocate MB type table */
         s->mb_type = av_mallocz(s->mb_num * sizeof(char));
         if (s->mb_type == NULL) {
@@ -171,14 +174,72 @@ int MPV_common_init(MpegEncContext *s)
             perror("malloc");
             goto fail;
         }
-        /* Allocate MV table */
-        /* By now we just have one MV per MB */
-        s->mv_table[0] = av_mallocz(s->mb_num * sizeof(INT16));
-        s->mv_table[1] = av_mallocz(s->mb_num * sizeof(INT16));
-        if (s->mv_table[1] == NULL || s->mv_table[0] == NULL) {
+
+        /* Allocate MV tables */
+        s->p_mv_table = av_mallocz(mv_table_size * 2 * sizeof(INT16));
+        if (s->p_mv_table == NULL) {
             perror("malloc");
             goto fail;
         }
+        s->last_p_mv_table = av_mallocz(mv_table_size * 2 * sizeof(INT16));
+        if (s->last_p_mv_table == NULL) {
+            perror("malloc");
+            goto fail;
+        }
+        s->b_forw_mv_table = av_mallocz(mv_table_size * 2 * sizeof(INT16));
+        if (s->b_forw_mv_table == NULL) {
+            perror("malloc");
+            goto fail;
+        }
+        s->b_back_mv_table = av_mallocz(mv_table_size * 2 * sizeof(INT16));
+        if (s->b_back_mv_table == NULL) {
+            perror("malloc");
+            goto fail;
+        }
+        s->b_bidir_forw_mv_table = av_mallocz(mv_table_size * 2 * sizeof(INT16));
+        if (s->b_bidir_forw_mv_table == NULL) {
+            perror("malloc");
+            goto fail;
+        }
+        s->b_bidir_back_mv_table = av_mallocz(mv_table_size * 2 * sizeof(INT16));
+        if (s->b_bidir_back_mv_table == NULL) {
+            perror("malloc");
+            goto fail;
+        }
+        s->b_direct_forw_mv_table = av_mallocz(mv_table_size * 2 * sizeof(INT16));
+        if (s->b_direct_forw_mv_table == NULL) {
+            perror("malloc");
+            goto fail;
+        }
+        s->b_direct_back_mv_table = av_mallocz(mv_table_size * 2 * sizeof(INT16));
+        if (s->b_direct_back_mv_table == NULL) {
+            perror("malloc");
+            goto fail;
+        }
+        s->b_direct_mv_table = av_mallocz(mv_table_size * 2 * sizeof(INT16));
+        if (s->b_direct_mv_table == NULL) {
+            perror("malloc");
+            goto fail;
+        }
+
+        if(s->max_b_frames){
+            for(j=0; j<REORDER_BUFFER_SIZE; j++){
+                int i;
+                for(i=0;i<3;i++) {
+                    int w, h, shift;
+
+                    w = s->linesize;
+                    h = s->mb_height * 16;
+                    shift = (i == 0) ? 0 : 1;
+                    c_size = (w >> shift) * (h >> shift);
+
+                    pict = av_mallocz(c_size);
+                    if (pict == NULL)
+                        goto fail;
+                    s->picture_buffer[j][i] = pict;
+                }
+            }
+        }
     }
     
     if (s->out_format == FMT_H263 || s->encoding) {
@@ -229,11 +290,9 @@ int MPV_common_init(MpegEncContext *s)
     s->picture_structure = PICT_FRAME;
 
     /* init macroblock skip table */
-    if (!s->encoding) {
-        s->mbskip_table = av_mallocz(s->mb_num);
-        if (!s->mbskip_table)
-            goto fail;
-    }
+    s->mbskip_table = av_mallocz(s->mb_num);
+    if (!s->mbskip_table)
+        goto fail;
     
     s->block= s->intra_block;
 
@@ -244,39 +303,43 @@ int MPV_common_init(MpegEncContext *s)
     return -1;
 }
 
+#define CHECK_FREE(p)\
+{\
+    if(p) free(p);\
+    p= NULL;\
+}
+
 /* init common structure for both encoder and decoder */
 void MPV_common_end(MpegEncContext *s)
 {
     int i;
 
-    if (s->mb_type)
-        free(s->mb_type);
-    if (s->mb_var)
-        free(s->mb_var);
-    if (s->mv_table[0])
-        free(s->mv_table[0]);
-    if (s->mv_table[1])
-        free(s->mv_table[1]);
-    if (s->motion_val)
-        free(s->motion_val);
-    if (s->dc_val[0])
-        free(s->dc_val[0]);
-    if (s->ac_val[0])
-        free(s->ac_val[0]);
-    if (s->coded_block)
-        free(s->coded_block);
-    if (s->mbintra_table)
-        free(s->mbintra_table);
+    CHECK_FREE(s->mb_type);
+    CHECK_FREE(s->mb_var);
+    CHECK_FREE(s->p_mv_table);
+    CHECK_FREE(s->last_p_mv_table);
+    CHECK_FREE(s->b_forw_mv_table);
+    CHECK_FREE(s->b_back_mv_table);
+    CHECK_FREE(s->b_bidir_forw_mv_table);
+    CHECK_FREE(s->b_bidir_back_mv_table);
+    CHECK_FREE(s->b_direct_forw_mv_table);
+    CHECK_FREE(s->b_direct_back_mv_table);
+    CHECK_FREE(s->b_direct_mv_table);
+    CHECK_FREE(s->motion_val);
+    CHECK_FREE(s->dc_val[0]);
+    CHECK_FREE(s->ac_val[0]);
+    CHECK_FREE(s->coded_block);
+    CHECK_FREE(s->mbintra_table);
 
-    if (s->mbskip_table)
-        free(s->mbskip_table);
+    CHECK_FREE(s->mbskip_table);
     for(i=0;i<3;i++) {
-        if (s->last_picture_base[i])
-	    free(s->last_picture_base[i]);
-	if (s->next_picture_base[i])
-	    free(s->next_picture_base[i]);
-        if (s->has_b_frames)
-            free(s->aux_picture_base[i]);
+        int j;
+        CHECK_FREE(s->last_picture_base[i]);
+        CHECK_FREE(s->next_picture_base[i]);
+        CHECK_FREE(s->aux_picture_base[i]);
+        for(j=0; j<REORDER_BUFFER_SIZE; j++){
+            CHECK_FREE(s->picture_buffer[j][i]);
+        }
     }
     s->context_initialized = 0;
 }
@@ -307,6 +370,7 @@ int MPV_encode_init(AVCodecContext *avctx)
     s->avctx = avctx;
     s->aspect_ratio_info= avctx->aspect_ratio_info;
     s->flags= avctx->flags;
+    s->max_b_frames= avctx->max_b_frames;
     
     if (s->gop_size <= 1) {
         s->intra_only = 1;
@@ -368,6 +432,7 @@ int MPV_encode_init(AVCodecContext *avctx)
         s->out_format = FMT_H263;
         s->h263_pred = 1;
         s->unrestricted_mv = 1;
+        s->has_b_frames= s->max_b_frames ? 1 : 0;
         break;
     case CODEC_ID_MSMPEG4V1:
         s->out_format = FMT_H263;
@@ -421,7 +486,7 @@ int MPV_encode_init(AVCodecContext *avctx)
         mpeg1_encode_init(s);
 
     /* dont use mv_penalty table for crap MV as it would be confused */
-    if (s->me_method < 5) s->mv_penalty = default_mv_penalty;
+    if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty;
 
     s->encoding = 1;
 
@@ -443,6 +508,7 @@ int MPV_encode_init(AVCodecContext *avctx)
     s->fake_picture_number = 0;
     /* motion detector init */
     s->f_code = 1;
+    s->b_code = 1;
 
     return 0;
 }
@@ -531,76 +597,148 @@ void MPV_frame_end(MpegEncContext *s)
     emms_c();
 }
 
-int MPV_encode_picture(AVCodecContext *avctx,
-                       unsigned char *buf, int buf_size, void *data)
+/* reorder input for encoding */
+void reorder_input(MpegEncContext *s, AVPicture *pict)
 {
-    MpegEncContext *s = avctx->priv_data;
-    AVPicture *pict = data;
-    int i, j;
+    int i, j, index;
+            
+    if(s->max_b_frames > FF_MAX_B_FRAMES) s->max_b_frames= FF_MAX_B_FRAMES;
 
-    if (s->fixed_qscale) 
-        s->qscale = avctx->quality;
+//        delay= s->max_b_frames+1; (or 0 if no b frames cuz decoder diff)
 
-    init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
-
-    s->force_type= (avctx->flags&CODEC_FLAG_TYPE) ?
-	(avctx->key_frame ? I_TYPE : P_TYPE) : 0;
-    if (!s->intra_only) {
-        /* first picture of GOP is intra */
-        if (s->picture_in_gop_number % s->gop_size==0 || s->force_type==I_TYPE){
-            s->picture_in_gop_number=0;
-            s->pict_type = I_TYPE;
-        }else
-            s->pict_type = P_TYPE;
-    } else {
-        s->pict_type = I_TYPE;
+    for(j=0; j<REORDER_BUFFER_SIZE-1; j++){
+        s->coded_order[j]= s->coded_order[j+1];
     }
-    
-    MPV_frame_start(s);
-    
-    for(i=0;i<3;i++) {
-        UINT8 *src = pict->data[i];
-        UINT8 *dest = s->current_picture[i];
-        int src_wrap = pict->linesize[i];
-        int dest_wrap = s->linesize;
-        int w = s->width;
-        int h = s->height;
+    s->coded_order[j].picture[0]= s->coded_order[j].picture[1]= s->coded_order[j].picture[2]= NULL; //catch uninitalized buffers
 
-        if (i >= 1) {
-            dest_wrap >>= 1;
-            w >>= 1;
-            h >>= 1;
+    switch(s->input_pict_type){
+    default: 
+    case I_TYPE:
+    case S_TYPE:
+    case P_TYPE:
+        index= s->max_b_frames - s->b_frames_since_non_b;
+        s->b_frames_since_non_b=0;
+        break;            
+    case B_TYPE:
+        index= s->max_b_frames + 1;
+        s->b_frames_since_non_b++;
+        break;          
+    }
+//printf("index:%d type:%d strides: %d %d\n", index, s->input_pict_type, pict->linesize[0], s->linesize);
+    if(   (index==0 || (s->flags&CODEC_FLAG_INPUT_PRESERVED))
+       && pict->linesize[0] == s->linesize
+       && pict->linesize[1] == s->linesize>>1
+       && pict->linesize[2] == s->linesize>>1){
+//printf("ptr\n");
+        for(i=0; i<3; i++){
+            s->coded_order[index].picture[i]= pict->data[i];
         }
+    }else{
+//printf("copy\n");
+        for(i=0; i<3; i++){
+            uint8_t *src = pict->data[i];
+            uint8_t *dest;
+            int src_wrap = pict->linesize[i];
+            int dest_wrap = s->linesize;
+            int w = s->width;
+            int h = s->height;
 
-        if(dest_wrap==src_wrap){
-            s->new_picture[i] = pict->data[i];
-        } else {
+            if(index==0) dest= s->last_picture[i]+16; //is current_picture indeed but the switch hapens after reordering
+            else         dest= s->picture_buffer[s->picture_buffer_index][i];
+
+            if (i >= 1) {
+                dest_wrap >>= 1;
+                w >>= 1;
+                h >>= 1;
+            }
+
+            s->coded_order[index].picture[i]= dest;
             for(j=0;j<h;j++) {
                 memcpy(dest, src, w);
                 dest += dest_wrap;
                 src += src_wrap;
             }
-            s->new_picture[i] = s->current_picture[i];
-	    }
+        }
+        if(index!=0){
+            s->picture_buffer_index++;
+            if(s->picture_buffer_index >= REORDER_BUFFER_SIZE-1) s->picture_buffer_index=0;
+        }
+    }
+    s->coded_order[index].pict_type = s->input_pict_type;
+    s->coded_order[index].qscale    = s->input_qscale;
+    s->coded_order[index].force_type= s->force_input_type;
+    s->coded_order[index].picture_in_gop_number= s->input_picture_in_gop_number;
+    s->coded_order[index].picture_number= s->input_picture_number;
+
+    for(i=0; i<3; i++){
+        s->new_picture[i]= s->coded_order[0].picture[i];
+    }
+}
+
+int MPV_encode_picture(AVCodecContext *avctx,
+                       unsigned char *buf, int buf_size, void *data)
+{
+    MpegEncContext *s = avctx->priv_data;
+    AVPicture *pict = data;
+
+    s->input_qscale = avctx->quality;
+
+    init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
+
+    s->force_input_type= (avctx->flags&CODEC_FLAG_TYPE) ?
+	(avctx->key_frame ? I_TYPE : P_TYPE) : 0;
+    if (!s->intra_only) {
+        /* first picture of GOP is intra */
+        if (s->input_picture_in_gop_number % s->gop_size==0 || s->force_input_type==I_TYPE){
+            s->input_picture_in_gop_number=0;
+            s->input_pict_type = I_TYPE;
+        }else if(s->max_b_frames==0){
+            s->input_pict_type = P_TYPE;
+        }else{
+            if(s->b_frames_since_non_b < s->max_b_frames) //FIXME more IQ
+                s->input_pict_type = B_TYPE;
+            else
+                s->input_pict_type = P_TYPE;
+        }
+    } else {
+        s->input_pict_type = I_TYPE;
     }
 
-    encode_picture(s, s->picture_number);
-    avctx->key_frame = (s->pict_type == I_TYPE);
-    avctx->header_bits = s->header_bits;
-    avctx->mv_bits     = s->mv_bits;
-    avctx->misc_bits   = s->misc_bits;
-    avctx->i_tex_bits  = s->i_tex_bits;
-    avctx->p_tex_bits  = s->p_tex_bits;
-    avctx->i_count     = s->i_count;
-    avctx->p_count     = s->p_count;
-    avctx->skip_count  = s->skip_count;
+    reorder_input(s, pict);
+    
+    /* output? */
+    if(s->coded_order[0].picture[0]){
 
-    MPV_frame_end(s);
-    s->picture_number++;
-    s->picture_in_gop_number++;
+        s->pict_type= s->coded_order[0].pict_type;
+        if (s->fixed_qscale) /* the ratecontrol needs the last qscale so we dont touch it for CBR */
+            s->qscale= s->coded_order[0].qscale;
+        s->force_type= s->coded_order[0].force_type;
+        s->picture_in_gop_number= s->coded_order[0].picture_in_gop_number;
+        s->picture_number= s->coded_order[0].picture_number;
 
-    if (s->out_format == FMT_MJPEG)
-        mjpeg_picture_trailer(s);
+        MPV_frame_start(s);
+
+        encode_picture(s, s->picture_number);
+        avctx->key_frame = (s->pict_type == I_TYPE);
+        avctx->header_bits = s->header_bits;
+        avctx->mv_bits     = s->mv_bits;
+        avctx->misc_bits   = s->misc_bits;
+        avctx->i_tex_bits  = s->i_tex_bits;
+        avctx->p_tex_bits  = s->p_tex_bits;
+        avctx->i_count     = s->i_count;
+        avctx->p_count     = s->p_count;
+        avctx->skip_count  = s->skip_count;
+
+        MPV_frame_end(s);
+
+        if (s->out_format == FMT_MJPEG)
+            mjpeg_picture_trailer(s);
+
+        avctx->quality = s->qscale;
+    }
+
+    s->input_picture_number++;
+    s->input_picture_in_gop_number++;
 
     flush_put_bits(&s->pb);
     s->last_frame_bits= s->frame_bits;
@@ -610,12 +748,12 @@ int MPV_encode_picture(AVCodecContext *avctx,
 //printf("fcode: %d, type: %d, head: %d, mv: %d, misc: %d, frame: %d, itex: %d, ptex: %d\n", 
 //s->f_code, avctx->key_frame, s->header_bits, s->mv_bits, s->misc_bits, s->frame_bits, s->i_tex_bits, s->p_tex_bits);
 
-    avctx->quality = s->qscale;
     if (avctx->get_psnr) {
         /* At this point pict->data should have the original frame   */
         /* an s->current_picture should have the coded/decoded frame */
         get_psnr(pict->data, s->current_picture,
                  pict->linesize, s->linesize, avctx);
+//        printf("%f\n", avctx->psnr_y);
     }
     return pbBufPtr(&s->pb) - s->pb.buf;
 }
@@ -637,7 +775,7 @@ static inline void gmc1_motion(MpegEncContext *s,
                                int h)
 {
     UINT8 *ptr;
-    int dxy, offset, mx, my, src_x, src_y, height, linesize;
+    int offset, src_x, src_y, linesize;
     int motion_x, motion_y;
 
     if(s->real_sprite_warping_points>1) printf("more than 1 warp point isnt supported\n");
@@ -698,6 +836,7 @@ if(s->quarter_sample)
     motion_x>>=1;
     motion_y>>=1;
 }
+
     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
     src_x = s->mb_x * 16 + (motion_x >> 1);
     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
@@ -945,6 +1084,7 @@ static inline void add_dct(MpegEncContext *s,
         if (!s->mpeg2)
             if(s->encoding || (!s->h263_msmpeg4))
                 s->dct_unquantize(s, block, i, s->qscale);
+
         ff_idct (block);
         add_pixels_clamped(block, dest, line_size);
     }
@@ -1020,7 +1160,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
         s->mbintra_table[mb_x + mb_y*s->mb_width]=1;
 
     /* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */
-    if (s->out_format == FMT_H263) {
+    if (s->out_format == FMT_H263) { //FIXME move into h263.c if possible, format specific stuff shouldnt be here
       if(s->pict_type!=B_TYPE){
         int xy, wrap, motion_x, motion_y;
         
@@ -1047,17 +1187,19 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
       }
     }
     
-    if (!s->intra_only) {
+    if (!(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) {
         UINT8 *dest_y, *dest_cb, *dest_cr;
         UINT8 *mbskip_ptr;
 
-        /* avoid copy if macroblock skipped in last frame too */
-        if (!s->encoding && s->pict_type != B_TYPE) {
+        /* avoid copy if macroblock skipped in last frame too 
+           dont touch it for B-frames as they need the skip info from the next p-frame */
+        if (s->pict_type != B_TYPE) {
             mbskip_ptr = &s->mbskip_table[s->mb_y * s->mb_width + s->mb_x];
             if (s->mb_skiped) {
                 s->mb_skiped = 0;
-                /* if previous was skipped too, then nothing to do ! */
-                if (*mbskip_ptr != 0) 
+                /* if previous was skipped too, then nothing to do ! 
+                   skip only during decoding as we might trash the buffers during encoding a bit */
+                if (*mbskip_ptr != 0 && !s->encoding) 
                     goto the_end;
                 *mbskip_ptr = 1; /* indicate that this time we skiped it */
             } else {
@@ -1079,23 +1221,25 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
 
         if (!s->mb_intra) {
             /* motion handling */
-            if (!s->no_rounding){
-                op_pix = put_pixels_tab;
-                op_qpix= qpel_mc_rnd_tab;
-            }else{
-                op_pix = put_no_rnd_pixels_tab;
-                op_qpix= qpel_mc_no_rnd_tab;
-            }
+            if((s->flags&CODEC_FLAG_HQ) || (!s->encoding)){
+                if (!s->no_rounding){
+                    op_pix = put_pixels_tab;
+                    op_qpix= qpel_mc_rnd_tab;
+                }else{
+                    op_pix = put_no_rnd_pixels_tab;
+                    op_qpix= qpel_mc_no_rnd_tab;
+                }
 
-            if (s->mv_dir & MV_DIR_FORWARD) {
-                MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
-                if (!s->no_rounding) 
-                    op_pix = avg_pixels_tab;
-                else
-                    op_pix = avg_no_rnd_pixels_tab;
-            }
-            if (s->mv_dir & MV_DIR_BACKWARD) {
-                MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
+                if (s->mv_dir & MV_DIR_FORWARD) {
+                    MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
+                    if (!s->no_rounding) 
+                        op_pix = avg_pixels_tab;
+                    else
+                        op_pix = avg_no_rnd_pixels_tab;
+                }
+                if (s->mv_dir & MV_DIR_BACKWARD) {
+                    MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
+                }
             }
 
             /* add dct residue */
@@ -1121,120 +1265,81 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
     emms_c(); //FIXME remove
 }
 
-static void encode_mb(MpegEncContext *s)
+
+static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
 {
-    int wrap;
     const int mb_x= s->mb_x;
     const int mb_y= s->mb_y;
-    UINT8 *ptr;
-    const int motion_x= s->mv[0][0][0];
-    const int motion_y= s->mv[0][0][1];
     int i;
-
-    /* get the pixels */
-    wrap = s->linesize;
-    ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16;
-    get_pixels(s->block[0], ptr, wrap);
-    get_pixels(s->block[1], ptr + 8, wrap);
-    get_pixels(s->block[2], ptr + 8 * wrap, wrap);
-    get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap);
-    wrap = s->linesize >> 1;
-    ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8;
-    get_pixels(s->block[4], ptr, wrap);
-
-    wrap = s->linesize >> 1;
-    ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8;
-    get_pixels(s->block[5], ptr, wrap);
-
-    /* subtract previous frame if non intra */
-    if (!s->mb_intra) {
-        int dxy, offset, mx, my;
-        
-        if(s->mv_type==MV_TYPE_16X16){
-            dxy = ((motion_y & 1) << 1) | (motion_x & 1);
-            ptr = s->last_picture[0] + 
-                ((mb_y * 16 + (motion_y >> 1)) * s->linesize) + 
-                (mb_x * 16 + (motion_x >> 1));
-
-            sub_pixels_2(s->block[0], ptr, s->linesize, dxy);
-            sub_pixels_2(s->block[1], ptr + 8, s->linesize, dxy);
-            sub_pixels_2(s->block[2], ptr + s->linesize * 8, s->linesize, dxy);
-            sub_pixels_2(s->block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy);
-
-            if (s->out_format == FMT_H263) {
-                /* special rounding for h263 */
-                dxy = 0;
-                if ((motion_x & 3) != 0)
-                    dxy |= 1;
-                if ((motion_y & 3) != 0)
-                    dxy |= 2;
-                mx = motion_x >> 2;
-                my = motion_y >> 2;
-            } else {
-                mx = motion_x / 2;
-                my = motion_y / 2;
-                dxy = ((my & 1) << 1) | (mx & 1);
-                mx >>= 1;
-                my >>= 1;
-            }
-            offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx);
-            ptr = s->last_picture[1] + offset;
-            sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy);
-            ptr = s->last_picture[2] + offset;
-            sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy);
-        }else{
-            int src_x, src_y;
-
-            for(i=0;i<4;i++) {
-                int motion_x = s->mv[0][i][0];
-                int motion_y = s->mv[0][i][1];
-
-                dxy = ((motion_y & 1) << 1) | (motion_x & 1);
-                src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
-                src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
-                        
-                ptr = s->last_picture[0] + (src_y * s->linesize) + (src_x);
-                sub_pixels_2(s->block[i], ptr, s->linesize, dxy);
-            }
-            /* In case of 8X8, we construct a single chroma motion vector
-               with a special rounding */
-            mx = 0;
-            my = 0;
-            for(i=0;i<4;i++) {
-                mx += s->mv[0][i][0];
-                my += s->mv[0][i][1];
-            }
-            if (mx >= 0)
-                mx = (h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
-            else {
-                mx = -mx;
-                mx = -(h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
-            }
-            if (my >= 0)
-                my = (h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
-            else {
-                my = -my;
-                my = -(h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
-            }
-            dxy = ((my & 1) << 1) | (mx & 1);
-            mx >>= 1;
-            my >>= 1;
-
-            src_x = mb_x * 8 + mx;
-            src_y = mb_y * 8 + my;
-            src_x = clip(src_x, -8, s->width/2);
-            if (src_x == s->width/2)
-                dxy &= ~1;
-            src_y = clip(src_y, -8, s->height/2);
-            if (src_y == s->height/2)
-                dxy &= ~2;
-            
-            offset = (src_y * (s->linesize >> 1)) + src_x;
-            ptr = s->last_picture[1] + offset;
-            sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy);
-            ptr = s->last_picture[2] + offset;
-            sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy);
+#if 0
+        if (s->interlaced_dct) {
+            dct_linesize = s->linesize * 2;
+            dct_offset = s->linesize;
+        } else {
+            dct_linesize = s->linesize;
+            dct_offset = s->linesize * 8;
         }
+#endif
+
+    if (s->mb_intra) {
+        UINT8 *ptr;
+        int wrap;
+
+        wrap = s->linesize;
+        ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16;
+        get_pixels(s->block[0], ptr               , wrap);
+        get_pixels(s->block[1], ptr            + 8, wrap);
+        get_pixels(s->block[2], ptr + 8 * wrap    , wrap);
+        get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap);
+
+        wrap >>=1;
+        ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8;
+        get_pixels(s->block[4], ptr, wrap);
+
+        ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8;
+        get_pixels(s->block[5], ptr, wrap);
+    }else{
+        op_pixels_func *op_pix;
+        qpel_mc_func *op_qpix;
+        UINT8 *dest_y, *dest_cb, *dest_cr;
+        UINT8 *ptr;
+        int wrap;
+
+        dest_y  = s->current_picture[0] + (mb_y * 16 * s->linesize       ) + mb_x * 16;
+        dest_cb = s->current_picture[1] + (mb_y * 8  * (s->linesize >> 1)) + mb_x * 8;
+        dest_cr = s->current_picture[2] + (mb_y * 8  * (s->linesize >> 1)) + mb_x * 8;
+
+        if (!s->no_rounding){
+            op_pix = put_pixels_tab;
+            op_qpix= qpel_mc_rnd_tab;
+        }else{
+            op_pix = put_no_rnd_pixels_tab;
+            op_qpix= qpel_mc_no_rnd_tab;
+        }
+
+        if (s->mv_dir & MV_DIR_FORWARD) {
+            MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
+            if (!s->no_rounding) 
+                op_pix = avg_pixels_tab;
+            else
+                op_pix = avg_no_rnd_pixels_tab;
+        }
+        if (s->mv_dir & MV_DIR_BACKWARD) {
+            MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
+        }
+        wrap = s->linesize;
+        ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16;
+        diff_pixels(s->block[0], ptr               , dest_y               , wrap);
+        diff_pixels(s->block[1], ptr            + 8, dest_y            + 8, wrap);
+        diff_pixels(s->block[2], ptr + 8 * wrap    , dest_y + 8 * wrap    , wrap);
+        diff_pixels(s->block[3], ptr + 8 * wrap + 8, dest_y + 8 * wrap + 8, wrap);
+
+        wrap >>=1;
+        ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8;
+        diff_pixels(s->block[4], ptr, dest_cb, wrap);
+
+        ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8;
+        diff_pixels(s->block[5], ptr, dest_cr, wrap);
     }
             
 #if 0
@@ -1314,8 +1419,13 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     /* Reset the average MB variance */
     s->avg_mb_var = 0;
     s->mc_mb_var = 0;
+    
     /* Estimate motion for every MB */
-    if(s->pict_type == P_TYPE){
+    if(s->pict_type != I_TYPE){
+//        int16_t (*tmp)[2]= s->p_mv_table;
+//        s->p_mv_table= s->last_mv_table;
+//        s->last_mv_table= s->mv_table;
+    
         for(mb_y=0; mb_y < s->mb_height; mb_y++) {
             s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
             s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
@@ -1330,111 +1440,43 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                 s->block_index[3]+=2;
 
                 /* compute motion vector & mb_type and store in context */
-                estimate_motion(s, mb_x, mb_y);
+                if(s->pict_type==B_TYPE)
+                    ff_estimate_b_frame_motion(s, mb_x, mb_y);
+                else
+                    ff_estimate_p_frame_motion(s, mb_x, mb_y);
 //                s->mb_type[mb_y*s->mb_width + mb_x]=MB_TYPE_INTER;
             }
         }
         emms_c();
-    }else{
+    }else if(s->pict_type == I_TYPE){
         /* I-Frame */
         //FIXME do we need to zero them?
         memset(s->motion_val[0], 0, sizeof(INT16)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
-        memset(s->mv_table[0]  , 0, sizeof(INT16)*s->mb_width*s->mb_height);
-        memset(s->mv_table[1]  , 0, sizeof(INT16)*s->mb_width*s->mb_height);
+        memset(s->p_mv_table   , 0, sizeof(INT16)*(s->mb_width+2)*(s->mb_height+2)*2);
         memset(s->mb_type      , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
     }
 
-    if(s->avg_mb_var < s->mc_mb_var && s->pict_type != B_TYPE && (!s->force_type)){ //FIXME subtract MV bits
+    if(s->avg_mb_var < s->mc_mb_var && s->pict_type != B_TYPE && (!s->force_type) && s->max_b_frames==0){ //FIXME subtract MV bits
+        // FIXME b-frames & scene change detection
+        s->input_pict_type= I_TYPE;
         s->pict_type= I_TYPE;
-        s->picture_in_gop_number=0;
+        s->input_picture_in_gop_number=0;
         memset(s->mb_type   , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
 //printf("Scene change detected, encoding as I Frame\n");
     }
-
-    /* find best f_code for ME which do unlimited searches */
-    if(s->pict_type == P_TYPE && s->me_method >= 5){
-        int mv_num[8];
-        int i;
-        int loose=0;
-        UINT8 * fcode_tab= s->fcode_tab;
-
-        for(i=0; i<8; i++) mv_num[i]=0;
-
-        for(i=0; i<s->mb_num; i++){
-            if(s->mb_type[i] & MB_TYPE_INTER){
-                mv_num[ fcode_tab[s->mv_table[0][i] + MAX_MV] ]++;
-                mv_num[ fcode_tab[s->mv_table[1][i] + MAX_MV] ]++;
-//printf("%d %d %d\n", s->mv_table[0][i], fcode_tab[s->mv_table[0][i] + MAX_MV], i);
-            }
-//else printf("I");
-        }
-
-        for(i=MAX_FCODE; i>1; i--){
-            loose+= mv_num[i];
-            if(loose > 10) break; //FIXME this is pretty ineffective
-        }
-        s->f_code= i;
-/*        for(i=0; i<=MAX_FCODE; i++){
-            printf("%d ", mv_num[i]);
-        }
-        printf("\n");*/
-    }else{
-        s->f_code= 1;
+    
+    if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) 
+        s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
+        ff_fix_long_p_mvs(s);
+    if(s->pict_type==B_TYPE){
+        s->f_code= ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
+        s->b_code= ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
+        //FIXME if BIDIR != for&back
+        ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD |MB_TYPE_BIDIR);
+        ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD|MB_TYPE_BIDIR);
     }
-
+    
 //printf("f_code %d ///\n", s->f_code);
-    /* convert MBs with too long MVs to I-Blocks */
-    if(s->pict_type==P_TYPE){
-        int i, x, y;
-        const int f_code= s->f_code;
-        UINT8 * fcode_tab= s->fcode_tab;
-//FIXME try to clip instead of intra izing ;)
-        /* clip / convert to intra 16x16 type MVs */
-        for(i=0; i<s->mb_num; i++){
-            if(s->mb_type[i]&MB_TYPE_INTER){
-                if(   fcode_tab[s->mv_table[0][i] + MAX_MV] > f_code
-                   || fcode_tab[s->mv_table[0][i] + MAX_MV] == 0
-                   || fcode_tab[s->mv_table[1][i] + MAX_MV] > f_code
-                   || fcode_tab[s->mv_table[1][i] + MAX_MV] == 0 ){
-                    s->mb_type[i] &= ~MB_TYPE_INTER;
-                    s->mb_type[i] |= MB_TYPE_INTRA;
-                    s->mv_table[0][i] = 0;
-                    s->mv_table[1][i] = 0;
-                }
-            }
-        }
-
-        if(s->flags&CODEC_FLAG_4MV){
-            int wrap= 2+ s->mb_width*2;
-
-            /* clip / convert to intra 8x8 type MVs */
-            for(y=0; y<s->mb_height; y++){
-                int xy= (y*2 + 1)*wrap + 1;
-                i= y*s->mb_width;
-
-                for(x=0; x<s->mb_width; x++){
-                    if(s->mb_type[i]&MB_TYPE_INTER4V){
-                        int block;
-                        for(block=0; block<4; block++){
-                            int off= (block& 1) + (block>>1)*wrap;
-                            int mx= s->motion_val[ xy + off ][0];
-                            int my= s->motion_val[ xy + off ][1];
-
-                            if(   fcode_tab[mx + MAX_MV] > f_code
-                               || fcode_tab[mx + MAX_MV] == 0
-                               || fcode_tab[my + MAX_MV] > f_code
-                               || fcode_tab[my + MAX_MV] == 0 ){
-                                s->mb_type[i] &= ~MB_TYPE_INTER4V;
-                                s->mb_type[i] |= MB_TYPE_INTRA;
-                            }
-                        }
-                        xy+=2;
-                        i++;
-                    }
-                }
-            }
-        }
-    }
 
 //    printf("%d %d\n", s->avg_mb_var, s->mc_mb_var);
 
@@ -1526,7 +1568,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
         s->block_index[4]= s->block_wrap[4]*(mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2);
         s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
-            const int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
+            /*const */int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
             PutBitContext pb;
             int d;
             int dmin=10000000;
@@ -1541,18 +1583,19 @@ static void encode_picture(MpegEncContext *s, int picture_number)
             s->block_index[4]++;
             s->block_index[5]++;
 
-            s->mv_dir = MV_DIR_FORWARD;
             if(mb_type & (mb_type-1)){ // more than 1 MB type possible
                 pb= s->pb;
+                s->mv_dir = MV_DIR_FORWARD;
                 if(mb_type&MB_TYPE_INTER){
+                    int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
                     s->mv_type = MV_TYPE_16X16;
                     s->mb_intra= 0;
-                    s->mv[0][0][0] = s->mv_table[0][mb_y * s->mb_width + mb_x];
-                    s->mv[0][0][1] = s->mv_table[1][mb_y * s->mb_width + mb_x];
+                    s->mv[0][0][0] = s->p_mv_table[xy][0];
+                    s->mv[0][0][1] = s->p_mv_table[xy][1];
                     init_put_bits(&s->pb, bit_buf[1], 3000, NULL, NULL);
                     s->block= s->inter_block;
 
-                    encode_mb(s);
+                    encode_mb(s, s->mv[0][0][0], s->mv[0][0][1]);
                     d= get_bit_count(&s->pb);
                     if(d<dmin){
                         flush_put_bits(&s->pb);
@@ -1578,9 +1621,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                     init_put_bits(&s->pb, bit_buf[2], 3000, NULL, NULL);
                     s->block= s->inter4v_block;
 
-                    encode_mb(s);
+                    encode_mb(s, 0, 0);
                     d= get_bit_count(&s->pb);
-                    if(d<dmin){
+                    if(d<dmin && 0){
                         flush_put_bits(&s->pb);
                         dmin=d;
                         for(i=0; i<4; i++){
@@ -1604,7 +1647,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                     init_put_bits(&s->pb, bit_buf[0], 3000, NULL, NULL);
                     s->block= s->intra_block;
                    
-                    encode_mb(s);
+                    encode_mb(s, 0, 0);
                     d= get_bit_count(&s->pb);
                     if(d<dmin){
                         flush_put_bits(&s->pb);
@@ -1634,17 +1677,59 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                 s->block= best_s.block;
                 s->pb= pb;
             } else {
+                int motion_x, motion_y;
+                s->mv_type=MV_TYPE_16X16;
                 // only one MB-Type possible
+                //FIXME convert to swicth()
                 if(mb_type&MB_TYPE_INTRA){
+                    s->mv_dir = MV_DIR_FORWARD;
                     s->mb_intra= 1;
+                    motion_x= s->mv[0][0][0] = 0;
+                    motion_y= s->mv[0][0][1] = 0;
+                }else if(mb_type&MB_TYPE_INTER){
+                    int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mb_intra= 0;
+                    motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
+                    motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
+                }else if(mb_type&MB_TYPE_DIRECT){
+                    int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+                    s->mb_intra= 0;
+                    motion_x=0;
+                    motion_y=0;
                     s->mv[0][0][0] = 0;
                     s->mv[0][0][1] = 0;
-                }else{
+                    s->mv[1][0][0] = 0;
+                    s->mv[1][0][1] = 0;
+                }else if(mb_type&MB_TYPE_BIDIR){
+                    int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
                     s->mb_intra= 0;
-                    s->mv[0][0][0] = s->mv_table[0][mb_y * s->mb_width + mb_x];
-                    s->mv[0][0][1] = s->mv_table[1][mb_y * s->mb_width + mb_x];
+                    motion_x=0;
+                    motion_y=0;
+                    s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
+                    s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
+                    s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
+                    s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
+                }else if(mb_type&MB_TYPE_BACKWARD){
+                    int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+                    s->mv_dir = MV_DIR_BACKWARD;
+                    s->mb_intra= 0;
+                    motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
+                    motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
+                }else if(mb_type&MB_TYPE_FORWARD){
+                    int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mb_intra= 0;
+                    motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
+                    motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
+//                    printf(" %d %d ", motion_x, motion_y);
+                }else{
+                    motion_x=motion_y=0; //gcc warning fix
+                    printf("illegal MB type\n");
                 }
-                encode_mb(s);
+                encode_mb(s, motion_x, motion_y);
             }
 
             MPV_decode_mb(s, s->block);
@@ -1904,13 +1989,8 @@ static void dct_unquantize_h263_c(MpegEncContext *s,
 
 /* rate control */
 
-/* an I frame is I_FRAME_SIZE_RATIO bigger than a P frame */
-#define I_FRAME_SIZE_RATIO 3.0
-#define QSCALE_K           20
-
 static void rate_control_init(MpegEncContext *s)
 {
-#if 1
     emms_c();
 
     //initial values, they dont really matter as they will be totally different within a few frames
@@ -1925,23 +2005,6 @@ static void rate_control_init(MpegEncContext *s)
 
     s->short_term_qsum=0.001;
     s->short_term_qcount=0.001;
-#else
-    s->wanted_bits = 0;
-
-    if (s->intra_only) {
-        s->I_frame_bits = ((INT64)s->bit_rate * FRAME_RATE_BASE) / s->frame_rate;
-        s->P_frame_bits = s->I_frame_bits;
-    } else {
-        s->P_frame_bits = (int) ((float)(s->gop_size * s->bit_rate) / 
-                                 (float)((float)s->frame_rate / FRAME_RATE_BASE * (I_FRAME_SIZE_RATIO + s->gop_size - 1)));
-        s->I_frame_bits = (int)(s->P_frame_bits * I_FRAME_SIZE_RATIO);
-    }
-
-#if defined(DEBUG)
-    printf("I_frame_size=%d P_frame_size=%d\n",
-           s->I_frame_bits, s->P_frame_bits);
-#endif
-#endif
 }
 
 static double predict(Predictor *p, double q, double var)
@@ -1972,7 +2035,6 @@ if(256*256*256*64%count==0){
 
 static int rate_estimate_qscale(MpegEncContext *s)
 {
-#if 1
     int qmin= s->qmin;
     int qmax= s->qmax;
     int rate_q=5;
@@ -2049,41 +2111,6 @@ static int rate_estimate_qscale(MpegEncContext *s)
 //       rate_q, short_term_q, s->mc_mb_var, s->frame_bits);
 //printf("%d %d\n", s->bit_rate, (int)fps);
     return qscale;
-#else
-    INT64 diff, total_bits = s->total_bits;
-    float q;
-    int qscale;
-    if (s->pict_type == I_TYPE) {
-        s->wanted_bits += s->I_frame_bits;
-    } else {
-        s->wanted_bits += s->P_frame_bits;
-    }
-    diff = s->wanted_bits - total_bits;
-    q = 31.0 - (float)diff / (QSCALE_K * s->mb_height * s->mb_width);
-    /* adjust for I frame */
-    if (s->pict_type == I_TYPE && !s->intra_only) {
-        q /= I_FRAME_SIZE_RATIO;
-    }
-
-    /* using a too small Q scale leeds to problems in mpeg1 and h263
-       because AC coefficients are clamped to 255 or 127 */
-    qmin = 3;
-    if (q < qmin)
-        q = qmin;
-    else if (q > 31)
-        q = 31;
-    qscale = (int)(q + 0.5);
-#if defined(DEBUG)
-    printf("\n%d: total=%0.0f wanted=%0.0f br=%0.1f diff=%d qest=%2.1f\n", 
-           s->picture_number, 
-           (double)total_bits, 
-           (double)s->wanted_bits,
-           (float)s->frame_rate / FRAME_RATE_BASE * 
-           total_bits / s->picture_number, 
-           (int)diff, q);
-#endif
-    return qscale;
-#endif
 }
 
 AVCodec mpeg1video_encoder = {
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 68d5bd7fde..6848ed806e 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -26,7 +26,7 @@
 enum OutputFormat {
     FMT_MPEG1,
     FMT_H263,
-    FMT_MJPEG,
+    FMT_MJPEG, 
 };
 
 #define MPEG_BUF_SIZE (16 * 1024)
@@ -36,6 +36,7 @@ enum OutputFormat {
 
 #define MAX_FCODE 7
 #define MAX_MV 2048
+#define REORDER_BUFFER_SIZE (FF_MAX_B_FRAMES+2)
 
 typedef struct Predictor{
     double coeff;
@@ -43,6 +44,15 @@ typedef struct Predictor{
     double decay;
 } Predictor;
 
+typedef struct ReorderBuffer{
+    UINT8 *picture[3];
+    int pict_type;
+    int qscale;
+    int force_type;
+    int picture_number;
+    int picture_in_gop_number;
+} ReorderBuffer;
+
 typedef struct MpegEncContext {
     struct AVCodecContext *avctx;
     /* the following parameters must be initialized before encoding */
@@ -66,7 +76,8 @@ typedef struct MpegEncContext {
     int max_qdiff;    /* max qscale difference between frames */
     int encoding;     /* true if we are encoding (vs decoding) */
     int flags;        /* AVCodecContext.flags (HQ, MV4, ...) */
-    int force_type;   /* 0= no force, otherwise I_TYPE, P_TYPE, ... */
+    int force_input_type;/* 0= no force, otherwise I_TYPE, P_TYPE, ... */
+    int max_b_frames; /* max number of b-frames for encoding */
     /* the following fields are managed internally by the encoder */
 
     /* bit output */
@@ -74,45 +85,62 @@ typedef struct MpegEncContext {
 
     /* sequence parameters */
     int context_initialized;
+    int input_picture_number;
+    int input_picture_in_gop_number; /* 0-> first pic in gop, ... */
     int picture_number;
     int fake_picture_number; /* picture number at the bitstream frame rate */
     int gop_picture_number;  /* index of the first picture of a GOP based on fake_pic_num & mpeg1 specific */
     int picture_in_gop_number; /* 0-> first pic in gop, ... */
-    int mb_width, mb_height;
+    int b_frames_since_non_b;  /* used for encoding, relative to not yet reordered input */
+    int mb_width, mb_height;   /* number of MBs horizontally & vertically */
     int mb_num;                /* number of MBs of a picture */
     int linesize;              /* line size, in bytes, may be different from width */
     UINT8 *new_picture[3];     /* picture to be compressed */
-    UINT8 *last_picture[3];    /* previous picture */
+    UINT8 *picture_buffer[REORDER_BUFFER_SIZE][3]; /* internal buffers used for reordering of input pictures */
+    int picture_buffer_index;
+    ReorderBuffer coded_order[REORDER_BUFFER_SIZE];
+    UINT8 *last_picture[3];      /* previous picture */
     UINT8 *last_picture_base[3]; /* real start of the picture */
-    UINT8 *next_picture[3];    /* previous picture (for bidir pred) */
+    UINT8 *next_picture[3];      /* previous picture (for bidir pred) */
     UINT8 *next_picture_base[3]; /* real start of the picture */
-    UINT8 *aux_picture[3];    /* aux picture (for B frames only) */
-    UINT8 *aux_picture_base[3]; /* real start of the picture */
-    UINT8 *current_picture[3]; /* buffer to store the decompressed current picture */
-    int last_dc[3]; /* last DC values for MPEG1 */
-    INT16 *dc_val[3]; /* used for mpeg4 DC prediction, all 3 arrays must be continuous */
+    UINT8 *aux_picture[3];       /* aux picture (for B frames only) */
+    UINT8 *aux_picture_base[3];  /* real start of the picture */
+    UINT8 *current_picture[3];   /* buffer to store the decompressed current picture */
+    int last_dc[3];              /* last DC values for MPEG1 */
+    INT16 *dc_val[3];            /* used for mpeg4 DC prediction, all 3 arrays must be continuous */
     int y_dc_scale, c_dc_scale;
-    UINT8 *coded_block; /* used for coded block pattern prediction */
-    INT16 (*ac_val[3])[16]; /* used for for mpeg4 AC prediction, all 3 arrays must be continuous */
+    UINT8 *coded_block;          /* used for coded block pattern prediction (msmpeg4v3, wmv1)*/
+    INT16 (*ac_val[3])[16];      /* used for for mpeg4 AC prediction, all 3 arrays must be continuous */
     int ac_pred;
     int mb_skiped;              /* MUST BE SET only during DECODING */
-    UINT8 *mbskip_table;        /* used to avoid copy if macroblock
-                                   skipped (for black regions for example) */
-    UINT8 *mbintra_table;            /* used to kill a few memsets */
+    UINT8 *mbskip_table;        /* used to avoid copy if macroblock skipped (for black regions for example) 
+                                   and used for b-frame encoding & decoding (contains skip table of next P Frame) */
+    UINT8 *mbintra_table;       /* used to avoid setting {ac, dc, cbp}-pred stuff to zero on inter MB decoding */
 
-    int qscale;
-    int pict_type;
-    int last_non_b_pict_type; /* used for mpeg4 gmc b-frames */
-    int last_pict_type; /* used for bit rate stuff (needs that to update the right predictor) */
+    int input_qscale;           /* qscale prior to reordering of frames */
+    int input_pict_type;        /* pict_type prior to reordering of frames */
+    int force_type;             /* 0= no force, otherwise I_TYPE, P_TYPE, ... */
+    int qscale;                 /* QP */
+    int pict_type;              /* I_TYPE, P_TYPE, B_TYPE, ... */
+    int last_non_b_pict_type;   /* used for mpeg4 gmc b-frames */
+    int last_pict_type;         /* used for bit rate stuff (needs that to update the right predictor) */
     int frame_rate_index;
     /* motion compensation */
     int unrestricted_mv;
     int h263_long_vectors; /* use horrible h263v1 long vector mode */
 
-    int f_code; /* resolution */
-    int b_code; /* backward resolution for B Frames (mpeg4) */
-    INT16 *mv_table[2];    /* MV table (1MV per MB)*/
-    INT16 (*motion_val)[2]; /* used for MV prediction (4MV per MB)*/
+    int f_code; /* forward MV resolution */
+    int b_code; /* backward MV resolution for B Frames (mpeg4) */
+    INT16 (*motion_val)[2];            /* used for MV prediction (4MV per MB) */
+    INT16 (*p_mv_table)[2];            /* MV table (1MV per MB) p-frame encoding */
+    INT16 (*last_p_mv_table)[2];       /* MV table (1MV per MB) p-frame encoding */
+    INT16 (*b_forw_mv_table)[2];       /* MV table (1MV per MB) forward mode b-frame encoding */
+    INT16 (*b_back_mv_table)[2];       /* MV table (1MV per MB) backward mode b-frame encoding */
+    INT16 (*b_bidir_forw_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */
+    INT16 (*b_bidir_back_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */
+    INT16 (*b_direct_forw_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
+    INT16 (*b_direct_back_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
+    INT16 (*b_direct_mv_table)[2];     /* MV table (1MV per MB) direct mode b-frame encoding */
     int me_method;          /* ME algorithm */
     int mv_dir;
 #define MV_DIR_BACKWARD  1
@@ -131,12 +159,12 @@ typedef struct MpegEncContext {
     */
     int mv[2][4][2];
     int field_select[2][2];
-    int last_mv[2][2][2];
+    int last_mv[2][2][2];             /* last MV, used for MV prediction in MPEG1 & B-frame MPEG4 */
     UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV, used for ME */
     UINT8 *fcode_tab; /* smallest fcode needed for each MV */
 
     int has_b_frames;
-    int no_rounding; /* apply no rounding to motion estimation (MPEG4) */
+    int no_rounding; /* apply no rounding to motion compensation (MPEG4, msmpeg4, ...) */
 
     /* macroblock layer */
     int mb_x, mb_y;
@@ -150,10 +178,10 @@ typedef struct MpegEncContext {
 #define MB_TYPE_SKIPED   0x08
 #define MB_TYPE_DIRECT   0x10
 #define MB_TYPE_FORWARD  0x20
-#define MB_TYPE_BACKWAD  0x40
+#define MB_TYPE_BACKWARD 0x40
 #define MB_TYPE_BIDIR    0x80
 
-    int block_index[6];
+    int block_index[6]; /* index to current MB in block based arrays with edges*/
     int block_wrap[6];
 
     /* matrix transmitted in the bitstream */
@@ -172,8 +200,7 @@ typedef struct MpegEncContext {
     void *opaque; /* private data for the user */
 
     /* bit rate control */
-    int I_frame_bits;    /* wanted number of bits per I frame */
-    int P_frame_bits;    /* same for P frame */
+    int I_frame_bits; //FIXME used in mpeg12 ...
     int avg_mb_var;        /* average MB variance for current frame */
     int mc_mb_var;     /* motion compensated MB variance for current frame */
     int last_mc_mb_var;     /* motion compensated MB variance for last frame */
@@ -212,11 +239,13 @@ typedef struct MpegEncContext {
     
     /* mpeg4 specific */
     int time_increment_resolution;
-    int time_increment_bits;
-    int time_increment;
-    int time_base;
-    int time;
-    int last_non_b_time[2];
+    int time_increment_bits;        /* number of bits to represent the fractional part of time */
+    int last_time_base;
+    int time_base;                  /* time in seconds of last I,P,S Frame */
+    int64_t time;                   /* time of current frame */ 
+    int64_t last_non_b_time;
+    uint16_t pp_time;               /* time distance between the last 2 p,s,i frames */
+    uint16_t bp_time;               /* time distance between the last b and p,s,i frame */
     int shape;
     int vol_sprite_usage;
     int sprite_width;
@@ -231,7 +260,7 @@ typedef struct MpegEncContext {
     int sprite_shift[2][2];
     int mcsel;
     int quant_precision;
-    int quarter_sample;
+    int quarter_sample;              /* 1->qpel, 0->half pel ME/MC */ 
     int scalability;
     int new_pred;
     int reduced_res_vop;
@@ -327,9 +356,13 @@ void MPV_common_init_mmx(MpegEncContext *s);
 #endif
 
 /* motion_est.c */
-
-void estimate_motion(MpegEncContext *s, 
-                    int mb_x, int mb_y);
+void ff_estimate_p_frame_motion(MpegEncContext * s,
+                             int mb_x, int mb_y);
+void ff_estimate_b_frame_motion(MpegEncContext * s,
+                             int mb_x, int mb_y);
+int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type);
+void ff_fix_long_p_mvs(MpegEncContext * s);
+void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type);
 
 /* mpeg12.c */
 extern INT16 default_intra_matrix[64];