mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
Originally committed as revision 6412 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
e0769997cb
commit
2833fc4646
@ -4165,6 +4165,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|||||||
|
|
||||||
c->prefetch= just_return;
|
c->prefetch= just_return;
|
||||||
|
|
||||||
|
memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
|
||||||
|
memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
|
||||||
|
|
||||||
#ifdef HAVE_MMX
|
#ifdef HAVE_MMX
|
||||||
dsputil_init_mmx(c, avctx);
|
dsputil_init_mmx(c, avctx);
|
||||||
#endif
|
#endif
|
||||||
@ -4193,6 +4196,13 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|||||||
dsputil_init_bfin(c,avctx);
|
dsputil_init_bfin(c,avctx);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
for(i=0; i<64; i++){
|
||||||
|
if(!c->put_2tap_qpel_pixels_tab[0][i])
|
||||||
|
c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
|
||||||
|
if(!c->avg_2tap_qpel_pixels_tab[0][i])
|
||||||
|
c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
|
||||||
|
}
|
||||||
|
|
||||||
switch(c->idct_permutation_type){
|
switch(c->idct_permutation_type){
|
||||||
case FF_NO_IDCT_PERM:
|
case FF_NO_IDCT_PERM:
|
||||||
for(i=0; i<64; i++)
|
for(i=0; i<64; i++)
|
||||||
|
@ -277,6 +277,9 @@ typedef struct DSPContext {
|
|||||||
qpel_mc_func put_h264_qpel_pixels_tab[4][16];
|
qpel_mc_func put_h264_qpel_pixels_tab[4][16];
|
||||||
qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
|
qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
|
||||||
|
|
||||||
|
qpel_mc_func put_2tap_qpel_pixels_tab[4][16];
|
||||||
|
qpel_mc_func avg_2tap_qpel_pixels_tab[4][16];
|
||||||
|
|
||||||
h264_weight_func weight_h264_pixels_tab[10];
|
h264_weight_func weight_h264_pixels_tab[10];
|
||||||
h264_biweight_func biweight_h264_pixels_tab[10];
|
h264_biweight_func biweight_h264_pixels_tab[10];
|
||||||
|
|
||||||
|
@ -693,6 +693,17 @@ retry:
|
|||||||
s->next_p_frame_damaged=0;
|
s->next_p_frame_damaged=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if((s->avctx->flags2 & CODEC_FLAG2_FAST) && s->pict_type==B_TYPE){
|
||||||
|
s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
|
||||||
|
s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
|
||||||
|
}else if((!s->no_rounding) || s->pict_type==B_TYPE){
|
||||||
|
s->me.qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab;
|
||||||
|
s->me.qpel_avg= s->dsp.avg_qpel_pixels_tab;
|
||||||
|
}else{
|
||||||
|
s->me.qpel_put= s->dsp.put_qpel_pixels_tab;
|
||||||
|
s->me.qpel_avg= s->dsp.avg_qpel_pixels_tab;
|
||||||
|
}
|
||||||
|
|
||||||
if(MPV_frame_start(s, avctx) < 0)
|
if(MPV_frame_start(s, avctx) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
@ -3782,8 +3782,8 @@ static void hl_decode_mb(H264Context *h){
|
|||||||
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
|
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
|
||||||
}else if(s->codec_id == CODEC_ID_H264){
|
}else if(s->codec_id == CODEC_ID_H264){
|
||||||
hl_motion(h, dest_y, dest_cb, dest_cr,
|
hl_motion(h, dest_y, dest_cb, dest_cr,
|
||||||
s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
|
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
|
||||||
s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
|
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
|
||||||
s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
|
s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4885,6 +4885,14 @@ static int decode_slice_header(H264Context *h){
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
|
||||||
|
s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
|
||||||
|
s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
|
||||||
|
}else{
|
||||||
|
s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
|
||||||
|
s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2400,6 +2400,53 @@ QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, mmx2)
|
|||||||
QPEL_OP(avg_ , ff_pw_16, _ , AVG_MMX2_OP, mmx2)
|
QPEL_OP(avg_ , ff_pw_16, _ , AVG_MMX2_OP, mmx2)
|
||||||
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2)
|
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2)
|
||||||
|
|
||||||
|
/***********************************/
|
||||||
|
/* bilinear qpel: not compliant to any spec, only for -lavdopts fast */
|
||||||
|
|
||||||
|
#define QPEL_2TAP_XY(OPNAME, SIZE, MMX, XY, HPEL)\
|
||||||
|
static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
|
||||||
|
OPNAME ## pixels ## SIZE ## HPEL(dst, src, stride, SIZE);\
|
||||||
|
}
|
||||||
|
#define QPEL_2TAP_L3(OPNAME, SIZE, MMX, XY, S0, S1, S2)\
|
||||||
|
static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
|
||||||
|
OPNAME ## 2tap_qpel ## SIZE ## _l3_ ## MMX(dst, src+S0, stride, SIZE, S1, S2);\
|
||||||
|
}
|
||||||
|
|
||||||
|
#define QPEL_2TAP(OPNAME, SIZE, MMX)\
|
||||||
|
QPEL_2TAP_XY(OPNAME, SIZE, MMX, 20, _x2_ ## MMX)\
|
||||||
|
QPEL_2TAP_XY(OPNAME, SIZE, MMX, 02, _y2_ ## MMX)\
|
||||||
|
QPEL_2TAP_XY(OPNAME, SIZE, MMX, 22, _xy2_mmx)\
|
||||||
|
static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc00_ ## MMX =\
|
||||||
|
OPNAME ## qpel ## SIZE ## _mc00_ ## MMX;\
|
||||||
|
static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc21_ ## MMX =\
|
||||||
|
OPNAME ## 2tap_qpel ## SIZE ## _mc20_ ## MMX;\
|
||||||
|
static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc12_ ## MMX =\
|
||||||
|
OPNAME ## 2tap_qpel ## SIZE ## _mc02_ ## MMX;\
|
||||||
|
static void OPNAME ## 2tap_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
|
||||||
|
OPNAME ## pixels ## SIZE ## _y2_ ## MMX(dst, src+1, stride, SIZE);\
|
||||||
|
}\
|
||||||
|
static void OPNAME ## 2tap_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
|
||||||
|
OPNAME ## pixels ## SIZE ## _x2_ ## MMX(dst, src+stride, stride, SIZE);\
|
||||||
|
}\
|
||||||
|
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 10, 0, 1, 0)\
|
||||||
|
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 30, 1, -1, 0)\
|
||||||
|
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 01, 0, stride, 0)\
|
||||||
|
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 03, stride, -stride, 0)\
|
||||||
|
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 11, 0, stride, 1)\
|
||||||
|
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 31, 1, stride, -1)\
|
||||||
|
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride, -stride, 1)\
|
||||||
|
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride+1, -stride, -1)\
|
||||||
|
|
||||||
|
QPEL_2TAP(put_, 16, mmx2)
|
||||||
|
QPEL_2TAP(avg_, 16, mmx2)
|
||||||
|
QPEL_2TAP(put_, 8, mmx2)
|
||||||
|
QPEL_2TAP(avg_, 8, mmx2)
|
||||||
|
QPEL_2TAP(put_, 16, 3dnow)
|
||||||
|
QPEL_2TAP(avg_, 16, 3dnow)
|
||||||
|
QPEL_2TAP(put_, 8, 3dnow)
|
||||||
|
QPEL_2TAP(avg_, 8, 3dnow)
|
||||||
|
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
static void just_return() { return; }
|
static void just_return() { return; }
|
||||||
#endif
|
#endif
|
||||||
@ -3276,6 +3323,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
dspfunc(avg_h264_qpel, 0, 16);
|
dspfunc(avg_h264_qpel, 0, 16);
|
||||||
dspfunc(avg_h264_qpel, 1, 8);
|
dspfunc(avg_h264_qpel, 1, 8);
|
||||||
dspfunc(avg_h264_qpel, 2, 4);
|
dspfunc(avg_h264_qpel, 2, 4);
|
||||||
|
|
||||||
|
dspfunc(put_2tap_qpel, 0, 16);
|
||||||
|
dspfunc(put_2tap_qpel, 1, 8);
|
||||||
|
dspfunc(avg_2tap_qpel, 0, 16);
|
||||||
|
dspfunc(avg_2tap_qpel, 1, 8);
|
||||||
#undef dspfunc
|
#undef dspfunc
|
||||||
|
|
||||||
c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2;
|
c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2;
|
||||||
@ -3399,6 +3451,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
dspfunc(avg_h264_qpel, 1, 8);
|
dspfunc(avg_h264_qpel, 1, 8);
|
||||||
dspfunc(avg_h264_qpel, 2, 4);
|
dspfunc(avg_h264_qpel, 2, 4);
|
||||||
|
|
||||||
|
dspfunc(put_2tap_qpel, 0, 16);
|
||||||
|
dspfunc(put_2tap_qpel, 1, 8);
|
||||||
|
dspfunc(avg_2tap_qpel, 0, 16);
|
||||||
|
dspfunc(avg_2tap_qpel, 1, 8);
|
||||||
|
|
||||||
c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow;
|
c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow;
|
||||||
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
|
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
|
||||||
}
|
}
|
||||||
|
@ -818,3 +818,51 @@ static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int lin
|
|||||||
DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
|
DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define QPEL_2TAP_L3(OPNAME) \
|
||||||
|
static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
|
||||||
|
asm volatile(\
|
||||||
|
"1: \n\t"\
|
||||||
|
"movq (%1,%2), %%mm0 \n\t"\
|
||||||
|
"movq 8(%1,%2), %%mm1 \n\t"\
|
||||||
|
PAVGB" (%1,%3), %%mm0 \n\t"\
|
||||||
|
PAVGB" 8(%1,%3), %%mm1 \n\t"\
|
||||||
|
PAVGB" (%1), %%mm0 \n\t"\
|
||||||
|
PAVGB" 8(%1), %%mm1 \n\t"\
|
||||||
|
STORE_OP( (%1,%4),%%mm0)\
|
||||||
|
STORE_OP(8(%1,%4),%%mm1)\
|
||||||
|
"movq %%mm0, (%1,%4) \n\t"\
|
||||||
|
"movq %%mm1, 8(%1,%4) \n\t"\
|
||||||
|
"add %5, %1 \n\t"\
|
||||||
|
"decl %0 \n\t"\
|
||||||
|
"jnz 1b \n\t"\
|
||||||
|
:"+g"(h), "+r"(src)\
|
||||||
|
:"r"((long)off1), "r"((long)off2),\
|
||||||
|
"r"((long)(dst-src)), "r"((long)stride)\
|
||||||
|
:"memory"\
|
||||||
|
);\
|
||||||
|
}\
|
||||||
|
static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
|
||||||
|
asm volatile(\
|
||||||
|
"1: \n\t"\
|
||||||
|
"movq (%1,%2), %%mm0 \n\t"\
|
||||||
|
PAVGB" (%1,%3), %%mm0 \n\t"\
|
||||||
|
PAVGB" (%1), %%mm0 \n\t"\
|
||||||
|
STORE_OP((%1,%4),%%mm0)\
|
||||||
|
"movq %%mm0, (%1,%4) \n\t"\
|
||||||
|
"add %5, %1 \n\t"\
|
||||||
|
"decl %0 \n\t"\
|
||||||
|
"jnz 1b \n\t"\
|
||||||
|
:"+g"(h), "+r"(src)\
|
||||||
|
:"r"((long)off1), "r"((long)off2),\
|
||||||
|
"r"((long)(dst-src)), "r"((long)stride)\
|
||||||
|
:"memory"\
|
||||||
|
);\
|
||||||
|
}
|
||||||
|
|
||||||
|
#define STORE_OP(a,b) PAVGB" "#a","#b" \n\t"
|
||||||
|
QPEL_2TAP_L3(avg_)
|
||||||
|
#undef STORE_OP
|
||||||
|
#define STORE_OP(a,b)
|
||||||
|
QPEL_2TAP_L3(put_)
|
||||||
|
#undef STORE_OP
|
||||||
|
#undef QPEL_2TAP_L3
|
||||||
|
@ -3944,17 +3944,16 @@ static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM bloc
|
|||||||
MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
|
MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
|
op_qpix= s->me.qpel_put;
|
||||||
if ((!s->no_rounding) || s->pict_type==B_TYPE){
|
if ((!s->no_rounding) || s->pict_type==B_TYPE){
|
||||||
op_pix = s->dsp.put_pixels_tab;
|
op_pix = s->dsp.put_pixels_tab;
|
||||||
op_qpix= s->dsp.put_qpel_pixels_tab;
|
|
||||||
}else{
|
}else{
|
||||||
op_pix = s->dsp.put_no_rnd_pixels_tab;
|
op_pix = s->dsp.put_no_rnd_pixels_tab;
|
||||||
op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
|
|
||||||
}
|
}
|
||||||
if (s->mv_dir & MV_DIR_FORWARD) {
|
if (s->mv_dir & MV_DIR_FORWARD) {
|
||||||
MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
|
MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
|
||||||
op_pix = s->dsp.avg_pixels_tab;
|
op_pix = s->dsp.avg_pixels_tab;
|
||||||
op_qpix= s->dsp.avg_qpel_pixels_tab;
|
op_qpix= s->me.qpel_avg;
|
||||||
}
|
}
|
||||||
if (s->mv_dir & MV_DIR_BACKWARD) {
|
if (s->mv_dir & MV_DIR_BACKWARD) {
|
||||||
MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
|
MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
|
||||||
|
Loading…
Reference in New Issue
Block a user