h263 OBMC & 4MV support

cleanup Originally committed as revision 2536 to svn://svn.ffmpeg.org/ffmpeg/trunk
2025-08-15 14:13:16 +02:00 · 2003-11-28 21:31:20 +00:00
parent a11d11aaa8
commit f7190f7399
4 changed files with 488 additions and 234 deletions
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -2268,7 +2268,6 @@ static void opt_output_file(const char *filename)
                    video_enc->flags |= CODEC_FLAG_H263P_AIC;
                }
                if (use_4mv) {
-                    video_enc->mb_decision = FF_MB_DECISION_BITS; //FIXME remove
                    video_enc->flags |= CODEC_FLAG_4MV;
                }
            
--- a/libavcodec/h263.c
+++ b/libavcodec/h263.c
@@ -222,7 +222,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
        calculation of the current MB to see if we're on the limits */
        put_bits(&s->pb, 1, 0);	/* unrestricted motion vector: off */
        put_bits(&s->pb, 1, 0);	/* SAC: off */
-        put_bits(&s->pb, 1, 0);	/* advanced prediction mode: off */
+        put_bits(&s->pb, 1, s->obmc);	/* advanced prediction mode */
        put_bits(&s->pb, 1, 0);	/* not PB frame */
        put_bits(&s->pb, 5, s->qscale);
        put_bits(&s->pb, 1, 0);	/* Continuous Presence Multipoint mode: off */
@@ -240,7 +240,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
        s->umvplus = s->unrestricted_mv;
        put_bits(&s->pb, 1, s->umvplus); /* Unrestricted Motion Vector */
        put_bits(&s->pb,1,0); /* SAC: off */
-        put_bits(&s->pb,1,0); /* Advanced Prediction Mode: off */
+        put_bits(&s->pb,1,s->obmc); /* Advanced Prediction Mode */
        put_bits(&s->pb,1,s->h263_aic); /* Advanced Intra Coding */
        put_bits(&s->pb,1,0); /* Deblocking Filter: off */
        put_bits(&s->pb,1,0); /* Slice Structured: off */
@@ -602,6 +602,8 @@ void ff_h263_update_motion_val(MpegEncContext * s){
    if(s->encoding){ //FIXME encoding MUST be cleaned up
        if (s->mv_type == MV_TYPE_8X8) 
            s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_8x8;
+        else if(s->mb_intra)
+            s->current_picture.mb_type[mb_xy]= MB_TYPE_INTRA;
        else
            s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_16x16;
    }
@@ -896,15 +898,15 @@ void mpeg4_encode_mb(MpegEncContext * s,
            }

            put_bits(&s->pb, 1, 0);	/* mb coded */
+            cbpc = cbp & 3;
+            cbpy = cbp >> 2;
+            cbpy ^= 0xf;
            if(s->mv_type==MV_TYPE_16X16){
-                cbpc = cbp & 3;
                if(s->dquant) cbpc+= 8;
                put_bits(&s->pb,
                        inter_MCBPC_bits[cbpc],
                        inter_MCBPC_code[cbpc]);

-                cbpy = cbp >> 2;
-                cbpy ^= 0xf;
                put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
                if(s->dquant)
                    put_bits(pb2, 2, dquant_code[s->dquant+2]);
@@ -925,12 +927,9 @@ void mpeg4_encode_mb(MpegEncContext * s,
                h263_encode_motion(s, motion_x - pred_x, s->f_code);
                h263_encode_motion(s, motion_y - pred_y, s->f_code);
            }else{
-                cbpc = (cbp & 3)+16;
                put_bits(&s->pb,
-                        inter_MCBPC_bits[cbpc],
-                        inter_MCBPC_code[cbpc]);
-                cbpy = cbp >> 2;
-                cbpy ^= 0xf;
+                        inter_MCBPC_bits[cbpc+16],
+                        inter_MCBPC_code[cbpc+16]);
                put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);

                if(!s->progressive_sequence){
@@ -1070,34 +1069,68 @@ void h263_encode_mb(MpegEncContext * s,
            return;
        }
        put_bits(&s->pb, 1, 0);	/* mb coded */
+        
        cbpc = cbp & 3;
        if(s->dquant) cbpc+= 8;
-        put_bits(&s->pb,
-		    inter_MCBPC_bits[cbpc],
-		    inter_MCBPC_code[cbpc]);
        cbpy = cbp >> 2;
        cbpy ^= 0xf;
-        put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
-        if(s->dquant)
-            put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
+        if(s->mv_type==MV_TYPE_16X16){
+            put_bits(&s->pb,
+                    inter_MCBPC_bits[cbpc],
+                    inter_MCBPC_code[cbpc]);

-        if(interleaved_stats){
-            s->misc_bits+= get_bits_diff(s);
-        }
+            put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+            if(s->dquant)
+                put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
+                
+            if(interleaved_stats){
+                s->misc_bits+= get_bits_diff(s);
+            }

-        /* motion vectors: 16x16 mode only now */
-        h263_pred_motion(s, 0, &pred_x, &pred_y);
-      
-        if (!s->umvplus) {  
-            h263_encode_motion(s, motion_x - pred_x, s->f_code);
-            h263_encode_motion(s, motion_y - pred_y, s->f_code);
-        }
-        else {
-            h263p_encode_umotion(s, motion_x - pred_x);
-            h263p_encode_umotion(s, motion_y - pred_y);
-            if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
-                /* To prevent Start Code emulation */
-                put_bits(&s->pb,1,1);
+            /* motion vectors: 16x16 mode */
+            h263_pred_motion(s, 0, &pred_x, &pred_y);
+            
+            if (!s->umvplus) {  
+                h263_encode_motion(s, motion_x - pred_x, s->f_code);
+                h263_encode_motion(s, motion_y - pred_y, s->f_code);
+            }
+            else {
+                h263p_encode_umotion(s, motion_x - pred_x);
+                h263p_encode_umotion(s, motion_y - pred_y);
+                if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
+                    /* To prevent Start Code emulation */
+                    put_bits(&s->pb,1,1);
+            }
+        }else{
+            put_bits(&s->pb,
+                    inter_MCBPC_bits[cbpc+16],
+                    inter_MCBPC_code[cbpc+16]);
+            put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+            if(s->dquant)
+                put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
+
+            if(interleaved_stats){
+                s->misc_bits+= get_bits_diff(s);
+            }
+
+            for(i=0; i<4; i++){
+                /* motion vectors: 8x8 mode*/
+                h263_pred_motion(s, i, &pred_x, &pred_y);
+
+                motion_x= s->motion_val[ s->block_index[i] ][0];
+                motion_y= s->motion_val[ s->block_index[i] ][1];
+                if (!s->umvplus) {  
+                    h263_encode_motion(s, motion_x - pred_x, s->f_code);
+                    h263_encode_motion(s, motion_y - pred_y, s->f_code);
+                }
+                else {
+                    h263p_encode_umotion(s, motion_x - pred_x);
+                    h263p_encode_umotion(s, motion_y - pred_y);
+                    if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
+                        /* To prevent Start Code emulation */
+                        put_bits(&s->pb,1,1);
+                }
+            }
        }

        if(interleaved_stats){
@@ -3301,6 +3334,97 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
    }
 }

+/**
+ * read the next MVs for OBMC. yes this is a ugly hack, feel free to send a patch :)
+ */
+static void preview_obmc(MpegEncContext *s){
+    GetBitContext gb= s->gb;
+    
+    int cbpc, i, pred_x, pred_y, mx, my;
+    int16_t *mot_val;
+    const int xy= s->mb_x + 1 + s->mb_y * s->mb_stride;
+    const int stride= s->block_wrap[0]*2;
+    
+    for(i=0; i<4; i++)
+        s->block_index[i]+= 2;
+    for(i=4; i<6; i++)
+        s->block_index[i]+= 1;
+    s->mb_x++;
+    
+    assert(s->pict_type == P_TYPE);
+
+    do{
+        if (get_bits1(&s->gb)) {
+            /* skip mb */
+            mot_val = s->motion_val[ s->block_index[0] ];
+            mot_val[0       ]= mot_val[2       ]= 
+            mot_val[0+stride]= mot_val[2+stride]= 0;
+            mot_val[1       ]= mot_val[3       ]=
+            mot_val[1+stride]= mot_val[3+stride]= 0;
+            
+            s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
+            goto end;
+        }
+        cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
+    }while(cbpc == 20);
+    
+    if(cbpc & 4){
+        s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
+    }else{
+        get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+        if (cbpc & 8) {
+            skip_bits(&s->gb, 2);
+        }
+        
+        if ((cbpc & 16) == 0) {
+                s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0; 
+                /* 16x16 motion prediction */
+                mot_val= h263_pred_motion(s, 0, &pred_x, &pred_y);
+                if (s->umvplus)
+                   mx = h263p_decode_umotion(s, pred_x);
+                else
+                   mx = h263_decode_motion(s, pred_x, s->f_code);
+            
+                if (s->umvplus)
+                   my = h263p_decode_umotion(s, pred_y);
+                else
+                   my = h263_decode_motion(s, pred_y, s->f_code);
+            
+                mot_val[0       ]= mot_val[2       ]= 
+                mot_val[0+stride]= mot_val[2+stride]= mx;
+                mot_val[1       ]= mot_val[3       ]=
+                mot_val[1+stride]= mot_val[3+stride]= my;
+        } else {
+            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0; 
+            for(i=0;i<4;i++) {
+                mot_val = h263_pred_motion(s, i, &pred_x, &pred_y);
+                if (s->umvplus)
+                  mx = h263p_decode_umotion(s, pred_x);
+                else
+                  mx = h263_decode_motion(s, pred_x, s->f_code);
+                
+                if (s->umvplus)
+                  my = h263p_decode_umotion(s, pred_y);
+                else    
+                  my = h263_decode_motion(s, pred_y, s->f_code);
+                if (s->umvplus && (mx - pred_x) == 1 && (my - pred_y) == 1)
+                  skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */
+                mot_val[0] = mx;
+                mot_val[1] = my;
+            }
+        }
+    }
+end:
+        
+    for(i=0; i<4; i++)
+        s->block_index[i]-= 2;
+    for(i=4; i<6; i++)
+        s->block_index[i]-= 1;
+    s->mb_x--;
+
+    s->gb= gb;
+}
+
 int ff_h263_decode_mb(MpegEncContext *s,
                      DCTELEM block[6][64])
 {
@@ -3330,7 +3454,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
                s->mcsel=0;
                s->mv[0][0][0] = 0;
                s->mv[0][0][1] = 0;
-                s->mb_skiped = 1;
+                s->mb_skiped = !s->obmc;
            }
            goto end;
        }
@@ -3645,6 +3769,10 @@ intra:
        }
    }
 end:
+    if(s->obmc){
+        if(s->pict_type == P_TYPE && s->mb_x+1<s->mb_width)
+            preview_obmc(s);
+    }

        /* per-MB end of slice check */
    if(s->codec_id==CODEC_ID_MPEG4){
@@ -4231,16 +4359,14 @@ int h263_decode_picture_header(MpegEncContext *s)
        
        s->pict_type = I_TYPE + get_bits1(&s->gb);

-        s->unrestricted_mv = get_bits1(&s->gb); 
-        s->h263_long_vectors = s->unrestricted_mv;
+        s->h263_long_vectors = get_bits1(&s->gb); 

        if (get_bits1(&s->gb) != 0) {
            av_log(s->avctx, AV_LOG_ERROR, "H263 SAC not supported\n");
            return -1;	/* SAC: off */
        }
-        if (get_bits1(&s->gb) != 0) {
-            s->mv_type = MV_TYPE_8X8; /* Advanced prediction mode */
-        }   
+        s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */
+        s->unrestricted_mv = s->h263_long_vectors || s->obmc;
        
        if (get_bits1(&s->gb) != 0) {
            av_log(s->avctx, AV_LOG_ERROR, "H263 PB frame not supported\n");
@@ -4265,14 +4391,10 @@ int h263_decode_picture_header(MpegEncContext *s)
            dprintf("ufep=1, format: %d\n", format);
            skip_bits(&s->gb,1); /* Custom PCF */
            s->umvplus = get_bits(&s->gb, 1); /* Unrestricted Motion Vector */
-            s->unrestricted_mv = s->umvplus;
            skip_bits1(&s->gb); /* Syntax-based Arithmetic Coding (SAC) */
-            if (get_bits1(&s->gb) != 0) {
-                s->mv_type = MV_TYPE_8X8; /* Advanced prediction mode */
-            }
-            if (get_bits1(&s->gb) != 0) { /* Advanced Intra Coding (AIC) */
-                s->h263_aic = 1;
-            }
+            s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */
+            s->unrestricted_mv = s->umvplus || s->obmc;
+            s->h263_aic = get_bits1(&s->gb); /* Advanced Intra Coding (AIC) */
 	    
            if (get_bits1(&s->gb) != 0) {
                av_log(s->avctx, AV_LOG_ERROR, "Deblocking Filter not supported\n");
@@ -4303,7 +4425,6 @@ int h263_decode_picture_header(MpegEncContext *s)
            
        /* MPPTYPE */
        s->pict_type = get_bits(&s->gb, 3) + I_TYPE;
-        dprintf("pict_type: %d\n", s->pict_type);
 	if (s->pict_type == 8 && s->avctx->codec_tag == ff_get_fourcc("ZYGO"))
 	    s->pict_type = I_TYPE;
        if (s->pict_type != I_TYPE &&
@@ -4311,7 +4432,6 @@ int h263_decode_picture_header(MpegEncContext *s)
            return -1;
        skip_bits(&s->gb, 2);
        s->no_rounding = get_bits1(&s->gb);
-        dprintf("RTYPE: %d\n", s->no_rounding);
        skip_bits(&s->gb, 4);
        
        /* Get the picture dimensions */
@@ -4371,13 +4491,14 @@ int h263_decode_picture_header(MpegEncContext *s)
    }

     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-         av_log(s->avctx, AV_LOG_DEBUG, "qp:%d %c size:%d rnd:%d %s %s %s %s\n", 
+         av_log(s->avctx, AV_LOG_DEBUG, "qp:%d %c size:%d rnd:%d%s%s%s%s%s\n", 
         s->qscale, av_get_pict_type_char(s->pict_type),
         s->gb.size_in_bits, 1-s->no_rounding,
-         s->mv_type == MV_TYPE_8X8 ? "ADV" : "",
-         s->umvplus ? "UMV" : "",
-         s->h263_long_vectors ? "LONG" : "",
-         s->h263_plus ? "+" : ""
+         s->obmc ? " AP" : "",
+         s->umvplus ? " UMV" : "",
+         s->h263_long_vectors ? " LONG" : "",
+         s->h263_plus ? " +" : "",
+         s->h263_aic ? " AIC" : ""
         ); 
     }
 #if 1
@@ -5246,8 +5367,9 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
        return -1;	/* SAC: off */
    }
    if (get_bits1(&s->gb) != 0) {
+        s->obmc= 1;
        av_log(s->avctx, AV_LOG_ERROR, "Advanced Prediction Mode not supported\n");
-        return -1;	/* advanced prediction mode: off */
+//        return -1;	/* advanced prediction mode: off */
    }
    if (get_bits1(&s->gb) != 0) {
        av_log(s->avctx, AV_LOG_ERROR, "PB frame mode no supported\n");
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -1120,6 +1120,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
        }else
            set_p_mv_tables(s, mx, my, 1);
    }else{
+        int intra_score, i;
        mb_type= MB_TYPE_INTER;

        dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
@@ -1136,10 +1137,59 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
                dmin=dmin4;
            }
        }
-        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin; 
+                
+//        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin; 
        set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V);
+
+        /* get intra luma score */
+        if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
+            intra_score= (varc<<8) - 500; //FIXME dont scale it down so we dont have to fix it
+        }else{
+            int mean= (sum+128)>>8;
+            mean*= 0x01010101;
+            
+            for(i=0; i<16; i++){
+                *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean;
+                *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean;
+                *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean;
+                *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
+            }
+
+            intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, pix, s->linesize);
+        }
+#if 0 //FIXME
+        /* get chroma score */
+        if(s->avctx->mb_cmp&FF_CMP_CHROMA){
+            for(i=1; i<3; i++){
+                uint8_t *dest_c;
+                int mean;
+                
+                if(s->out_format == FMT_H263){
+                    mean= (s->dc_val[i][mb_x + (mb_y+1)*(s->mb_width+2)] + 4)>>3; //FIXME not exact but simple ;)
+                }else{
+                    mean= (s->last_dc[i] + 4)>>3;
+                }
+                dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
+                
+                mean*= 0x01010101;
+                for(i=0; i<8; i++){
+                    *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean;
+                    *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean;
+                }
+                
+                intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize);
+            }                
+        }
+#endif
+        intra_score += s->me.mb_penalty_factor*16;
        
-        if (vard <= 64 || vard < varc) {
+        if(intra_score < dmin){
+            mb_type= MB_TYPE_INTRA;
+            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= MB_TYPE_INTRA; //FIXME cleanup
+        }else
+            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0;
+        
+        if (vard <= 64 || vard < varc) { //FIXME
            s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
        }else{
            s->scene_change_score+= s->qscale;
@@ -1566,7 +1616,7 @@ int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
            int x;
            int xy= y*s->mb_stride;
            for(x=0; x<s->mb_width; x++){
-                if(s->mb_type[xy] & type){
+                if((s->mb_type[xy] & type) || (s->mb_type[xy] & MB_TYPE_INTRA)){ //FIXME 
                    int fcode= FFMAX(fcode_tab[mv_table[xy][0] + MAX_MV],
                                     fcode_tab[mv_table[xy][1] + MAX_MV]);
                    int j;
@@ -1621,6 +1671,7 @@ void ff_fix_long_p_mvs(MpegEncContext * s)
                   || s->p_mv_table[xy][1] >=range || s->p_mv_table[xy][1] <-range){
                    s->mb_type[xy] &= ~MB_TYPE_INTER;
                    s->mb_type[xy] |= MB_TYPE_INTRA;
+                    s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
                    s->p_mv_table[xy][0] = 0;
                    s->p_mv_table[xy][1] = 0;
                }
@@ -1650,6 +1701,7 @@ void ff_fix_long_p_mvs(MpegEncContext * s)
                           || my >=range || my <-range){
                            s->mb_type[i] &= ~MB_TYPE_INTER4V;
                            s->mb_type[i] |= MB_TYPE_INTRA;
+                            s->current_picture.mb_type[i]= MB_TYPE_INTRA;
                        }
                    }
                }
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -297,7 +297,6 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
            CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
            CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
            CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
-            CHECKED_ALLOCZ(pic->mb_cmp_score, mb_array_size * sizeof(int32_t))
        }

        CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
@@ -338,7 +337,6 @@ static void free_picture(MpegEncContext *s, Picture *pic){
    av_freep(&pic->mb_var);
    av_freep(&pic->mc_mb_var);
    av_freep(&pic->mb_mean);
-    av_freep(&pic->mb_cmp_score);
    av_freep(&pic->mbskip_table);
    av_freep(&pic->qscale_table);
    av_freep(&pic->mb_type_base);
@@ -669,12 +667,21 @@ int MPV_encode_init(AVCodecContext *avctx)
                       && !s->fixed_qscale;
    
    s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
+    
+    s->obmc=    (s->codec_id == CODEC_ID_H263 || s->codec_id == CODEC_ID_H263P) 
+             && (s->flags & CODEC_FLAG_4MV);

-    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4){
+    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4 
+       && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
        av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
        return -1;
    }
    
+    if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
+        av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decission\n");
+        return -1;
+    }
+    
    if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
        av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
        return -1;
@@ -1964,6 +1971,42 @@ void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w,
    }
 }

+static inline int hpel_motion(MpegEncContext *s, 
+                                  uint8_t *dest, uint8_t *src, 
+                                  int src_x, int src_y,
+                                  int width, int height, int stride,
+                                  int h_edge_pos, int v_edge_pos,
+                                  int w, int h, op_pixels_func *pix_op,
+                                  int motion_x, int motion_y)
+{
+    int dxy;
+    int emu=0;
+
+    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
+    src_x += motion_x >> 1;
+    src_y += motion_y >> 1;
+                
+    /* WARNING: do no forget half pels */
+    src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
+    if (src_x == width)
+        dxy &= ~1;
+    src_y = clip(src_y, -16, height);
+    if (src_y == height)
+        dxy &= ~2;
+    src += src_y * stride + src_x;
+
+    if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
+        if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
+           || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
+            ff_emulated_edge_mc(s->edge_emu_buffer, src, stride, w+1, h+1,
+                             src_x, src_y, h_edge_pos, v_edge_pos);
+            src= s->edge_emu_buffer;
+            emu=1;
+        }
+    }
+    pix_op[dxy](dest, src, stride, h);
+    return emu;
+}

 /* apply one mpeg motion vector to the three components */
 static inline void mpeg_motion(MpegEncContext *s,
@@ -1974,7 +2017,7 @@ static inline void mpeg_motion(MpegEncContext *s,
                               int motion_x, int motion_y, int h)
 {
    uint8_t *ptr;
-    int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
+    int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, uvlinesize;
    int emu=0;
 #if 0    
 if(s->quarter_sample)
@@ -1983,34 +2026,19 @@ if(s->quarter_sample)
    motion_y>>=1;
 }
 #endif
-    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
-    src_x = s->mb_x * 16 + (motion_x >> 1);
-    src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
-                
-    /* WARNING: do no forget half pels */
+
    height = s->height >> field_based;
    v_edge_pos = s->v_edge_pos >> field_based;
-    src_x = clip(src_x, -16, s->width);
-    if (src_x == s->width)
-        dxy &= ~1;
-    src_y = clip(src_y, -16, height);
-    if (src_y == height)
-        dxy &= ~2;
-    linesize   = s->current_picture.linesize[0] << field_based;
    uvlinesize = s->current_picture.linesize[1] << field_based;
-    ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
-    dest_y += dest_offset;

-    if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
-        if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
-           || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
-            ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based,  //FIXME linesize? and uv below
-                             src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
-            ptr= s->edge_emu_buffer + src_offset;
-            emu=1;
-        }
-    }
-    pix_op[0][dxy](dest_y, ptr, linesize, h);
+    emu= hpel_motion(s, 
+                dest_y + dest_offset, ref_picture[0] + src_offset,
+                s->mb_x * 16, s->mb_y * (16 >> field_based),
+                s->width, height, s->current_picture.linesize[0] << field_based,
+                s->h_edge_pos, v_edge_pos,
+                16, h, pix_op[0],
+                motion_x, motion_y);
+

    if(s->flags&CODEC_FLAG_GRAY) return;

@@ -2055,6 +2083,87 @@ if(s->quarter_sample)
    }
    pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
 }
+//FIXME move to dsputil, avg variant, 16x16 version
+static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
+    int x;
+    uint8_t * const top   = src[1];
+    uint8_t * const left  = src[2];
+    uint8_t * const mid   = src[0];
+    uint8_t * const right = src[3];
+    uint8_t * const bottom= src[4];
+#define OBMC_FILTER(x, t, l, m, r, b)\
+    dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
+#define OBMC_FILTER4(x, t, l, m, r, b)\
+    OBMC_FILTER(x         , t, l, m, r, b);\
+    OBMC_FILTER(x+1       , t, l, m, r, b);\
+    OBMC_FILTER(x  +stride, t, l, m, r, b);\
+    OBMC_FILTER(x+1+stride, t, l, m, r, b);
+    
+    x=0;
+    OBMC_FILTER (x  , 2, 2, 4, 0, 0);
+    OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
+    OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
+    OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
+    OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
+    OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
+    x+= stride;
+    OBMC_FILTER (x  , 1, 2, 5, 0, 0);
+    OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
+    OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
+    OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
+    x+= stride;
+    OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
+    OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
+    OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
+    OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
+    x+= 2*stride;
+    OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
+    OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
+    OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
+    OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
+    x+= 2*stride;
+    OBMC_FILTER (x  , 0, 2, 5, 0, 1);
+    OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
+    OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
+    OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
+    OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
+    OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
+    x+= stride;
+    OBMC_FILTER (x  , 0, 2, 4, 0, 2);
+    OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
+    OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
+    OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
+}
+
+/* obmc for 1 8x8 luma block */
+static inline void obmc_motion(MpegEncContext *s,
+                               uint8_t *dest, uint8_t *src,
+                               int src_x, int src_y,
+                               op_pixels_func *pix_op,
+                               int16_t mv[5][2]/* mid top left right bottom*/)
+#define MID    0
+{
+    int i;
+    uint8_t *ptr[5];
+    
+    assert(s->quarter_sample==0);
+    
+    for(i=0; i<5; i++){
+        if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
+            ptr[i]= ptr[MID];
+        }else{
+            ptr[i]= s->edge_emu_buffer + 16 + 8*(i&1) + s->linesize*8*(i>>1);
+            hpel_motion(s, ptr[i], src,
+                        src_x, src_y,
+                        s->width, s->height, s->linesize,
+                        s->h_edge_pos, s->v_edge_pos,
+                        8, 8, pix_op,
+                        mv[i][0], mv[i][1]);
+        }
+    }
+
+    put_obmc(dest, ptr, s->linesize);                
+}

 static inline void qpel_motion(MpegEncContext *s,
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
@@ -2163,6 +2272,55 @@ inline int ff_h263_round_chroma(int x){
    }
 }

+/**
+ * h263 chorma 4mv motion compensation.
+ */
+static inline void chroma_4mv_motion(MpegEncContext *s,
+                                     uint8_t *dest_cb, uint8_t *dest_cr,
+                                     uint8_t **ref_picture,
+                                     op_pixels_func *pix_op,
+                                     int mx, int my){
+    int dxy, emu=0, src_x, src_y, offset;
+    uint8_t *ptr;
+    
+    /* In case of 8X8, we construct a single chroma motion vector
+       with a special rounding */
+    mx= ff_h263_round_chroma(mx);
+    my= ff_h263_round_chroma(my);
+    
+    dxy = ((my & 1) << 1) | (mx & 1);
+    mx >>= 1;
+    my >>= 1;
+
+    src_x = s->mb_x * 8 + mx;
+    src_y = s->mb_y * 8 + my;
+    src_x = clip(src_x, -8, s->width/2);
+    if (src_x == s->width/2)
+        dxy &= ~1;
+    src_y = clip(src_y, -8, s->height/2);
+    if (src_y == s->height/2)
+        dxy &= ~2;
+    
+    offset = (src_y * (s->uvlinesize)) + src_x;
+    ptr = ref_picture[1] + offset;
+    if(s->flags&CODEC_FLAG_EMU_EDGE){
+        if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
+           || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+            ptr= s->edge_emu_buffer;
+            emu=1;
+        }
+    }
+    pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
+
+    ptr = ref_picture[2] + offset;
+    if(emu){
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+        ptr= s->edge_emu_buffer;
+    }
+    pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
+}
+
 /**
 * motion compesation of a single macroblock
 * @param s context
@@ -2180,14 +2338,74 @@ static inline void MPV_motion(MpegEncContext *s,
                              int dir, uint8_t **ref_picture, 
                              op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
 {
-    int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
+    int dxy, mx, my, src_x, src_y, motion_x, motion_y;
    int mb_x, mb_y, i;
    uint8_t *ptr, *dest;
-    int emu=0;

    mb_x = s->mb_x;
    mb_y = s->mb_y;

+    if(s->obmc){
+        int16_t mv_cache[4][4][2];
+        const int xy= s->mb_x + s->mb_y*s->mb_stride;
+        const int mot_stride= s->mb_width*2 + 2;
+        const int mot_xy= 1 + mb_x*2 + (mb_y*2 + 1)*mot_stride;
+
+        assert(!s->mb_skiped);
+                
+        memcpy(mv_cache[1][1], s->motion_val[mot_xy           ], sizeof(int16_t)*4);
+        memcpy(mv_cache[2][1], s->motion_val[mot_xy+mot_stride], sizeof(int16_t)*4);
+        memcpy(mv_cache[3][1], s->motion_val[mot_xy+mot_stride], sizeof(int16_t)*4);
+
+        if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
+            memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
+        }else{
+            memcpy(mv_cache[0][1], s->motion_val[mot_xy-mot_stride], sizeof(int16_t)*4);
+        }
+
+        if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
+            *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
+            *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
+        }else{
+            *(int32_t*)mv_cache[1][0]= *(int32_t*)s->motion_val[mot_xy-1];
+            *(int32_t*)mv_cache[2][0]= *(int32_t*)s->motion_val[mot_xy-1+mot_stride];
+        }
+
+        if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
+            *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
+            *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
+        }else{
+            *(int32_t*)mv_cache[1][3]= *(int32_t*)s->motion_val[mot_xy+2];
+            *(int32_t*)mv_cache[2][3]= *(int32_t*)s->motion_val[mot_xy+2+mot_stride];
+        }
+        
+        mx = 0;
+        my = 0;
+        for(i=0;i<4;i++) {
+            const int x= (i&1)+1;
+            const int y= (i>>1)+1;
+            int16_t mv[5][2]= {
+                {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
+                {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
+                {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
+                {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
+                {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
+            //FIXME cleanup
+            obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
+                        ref_picture[0],
+                        mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
+                        pix_op[1],
+                        mv);
+
+            mx += mv[0][0];
+            my += mv[0][1];
+        }
+        if(!(s->flags&CODEC_FLAG_GRAY))
+            chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
+
+        return;
+    }
+   
    switch(s->mv_type) {
    case MV_TYPE_16X16:
 #ifdef CONFIG_RISKY
@@ -2253,73 +2471,21 @@ static inline void MPV_motion(MpegEncContext *s,
            }
        }else{
            for(i=0;i<4;i++) {
-                motion_x = s->mv[dir][i][0];
-                motion_y = s->mv[dir][i][1];
-
-                dxy = ((motion_y & 1) << 1) | (motion_x & 1);
-                src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
-                src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
-                    
-                /* WARNING: do no forget half pels */
-                src_x = clip(src_x, -16, s->width);
-                if (src_x == s->width)
-                    dxy &= ~1;
-                src_y = clip(src_y, -16, s->height);
-                if (src_y == s->height)
-                    dxy &= ~2;
-                    
-                ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
-                if(s->flags&CODEC_FLAG_EMU_EDGE){
-                    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 8
-                       || (unsigned)src_y > s->v_edge_pos - (motion_y&1) - 8){
-                        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
-                        ptr= s->edge_emu_buffer;
-                    }
-                }
-                dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
-                pix_op[1][dxy](dest, ptr, s->linesize, 8);
+                hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
+                            ref_picture[0],
+                            mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
+                            s->width, s->height, s->linesize,
+                            s->h_edge_pos, s->v_edge_pos,
+                            8, 8, pix_op[1],
+                            s->mv[dir][i][0], s->mv[dir][i][1]);

                mx += s->mv[dir][i][0];
                my += s->mv[dir][i][1];
            }
        }

-        if(s->flags&CODEC_FLAG_GRAY) break;
-        /* In case of 8X8, we construct a single chroma motion vector
-           with a special rounding */
-        mx= ff_h263_round_chroma(mx);
-        my= ff_h263_round_chroma(my);
-        dxy = ((my & 1) << 1) | (mx & 1);
-        mx >>= 1;
-        my >>= 1;
-
-        src_x = mb_x * 8 + mx;
-        src_y = mb_y * 8 + my;
-        src_x = clip(src_x, -8, s->width/2);
-        if (src_x == s->width/2)
-            dxy &= ~1;
-        src_y = clip(src_y, -8, s->height/2);
-        if (src_y == s->height/2)
-            dxy &= ~2;
-        
-        offset = (src_y * (s->uvlinesize)) + src_x;
-        ptr = ref_picture[1] + offset;
-        if(s->flags&CODEC_FLAG_EMU_EDGE){
-                if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
-                   || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
-                    ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
-                    ptr= s->edge_emu_buffer;
-                    emu=1;
-                }
-            }
-        pix_op[1][dxy](dest_cb, ptr, s->uvlinesize, 8);
-
-        ptr = ref_picture[2] + offset;
-        if(emu){
-            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
-            ptr= s->edge_emu_buffer;
-        }
-        pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
+        if(!(s->flags&CODEC_FLAG_GRAY))
+            chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
        break;
    case MV_TYPE_FIELD:
        if (s->picture_structure == PICT_FRAME) {
@@ -2459,7 +2625,7 @@ static inline void MPV_motion(MpegEncContext *s,
        }
    }
    break;
-
+    default: assert(0);
    }
 }

@@ -3943,90 +4109,6 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                    MPV_decode_mb(s, s->block);
            } else {
                int motion_x, motion_y;
-                int intra_score;
-                int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_stride];
-                
-              if(s->avctx->mb_decision==FF_MB_DECISION_SIMPLE && s->pict_type==P_TYPE){ //FIXME check if the mess is usefull at all
-                /* get luma score */
-                if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
-                    intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_stride]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
-                }else{
-                    uint8_t *dest_y;
-
-                    int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_stride]; //FIXME
-                    mean*= 0x01010101;
-                    
-                    dest_y  = s->new_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
-                
-                    for(i=0; i<16; i++){
-                        *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean;
-                        *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean;
-                        *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean;
-                        *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
-                    }
-
-                    s->mb_intra=1;
-                    intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize);
-                                        
-/*                    printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8, 
-                        s->current_picture.mb_var[mb_x + mb_y*s->mb_stride],
-                        s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_stride]);*/
-                }
-                
-                /* get chroma score */
-                if(s->avctx->mb_cmp&FF_CMP_CHROMA){
-                    int i;
-                    
-                    s->mb_intra=1;
-                    for(i=1; i<3; i++){
-                        uint8_t *dest_c;
-                        int mean;
-                        
-                        if(s->out_format == FMT_H263){
-                            mean= (s->dc_val[i][mb_x + (mb_y+1)*(s->mb_width+2)] + 4)>>3; //FIXME not exact but simple ;)
-                        }else{
-                            mean= (s->last_dc[i] + 4)>>3;
-                        }
-                        dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
-                        
-                        mean*= 0x01010101;
-                        for(i=0; i<8; i++){
-                            *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean;
-                            *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean;
-                        }
-                        
-                        intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize);
-                    }                
-                }
-
-                /* bias */
-                switch(s->avctx->mb_cmp&0xFF){
-                default:
-                case FF_CMP_SAD:
-                    intra_score+= 32*s->qscale;
-                    break;
-                case FF_CMP_SSE:
-                    intra_score+= 24*s->qscale*s->qscale;
-                    break;
-                case FF_CMP_SATD:
-                    intra_score+= 96*s->qscale;
-                    break;
-                case FF_CMP_DCT:
-                    intra_score+= 48*s->qscale;
-                    break;
-                case FF_CMP_BIT:
-                    intra_score+= 16;
-                    break;
-                case FF_CMP_PSNR:
-                case FF_CMP_RD:
-                    intra_score+= (s->qscale*s->qscale*109*8 + 64)>>7;
-                    break;
-                }
-
-                if(intra_score < inter_score)
-                    mb_type= MB_TYPE_INTRA;
-              }  
-                
                s->mv_type=MV_TYPE_16X16;
                // only one MB-Type possible
                
@@ -4941,4 +5023,3 @@ AVCodec mjpeg_encoder = {
 };

 #endif //CONFIG_ENCODERS
-