From 0fa8158d3e70eccda141df1d18cff648530c0cf5 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Mon, 27 Sep 2004 19:47:17 +0000 Subject: [PATCH] move h264 idct to its own file and call via function pointer in DspContext allow h264 idct to be used for lowres=1 Originally committed as revision 3524 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/avcodec.h | 1 + libavcodec/dsputil.c | 11 +++++-- libavcodec/dsputil.h | 7 ++++- libavcodec/h264.c | 46 ++++------------------------ libavcodec/h264idct.c | 70 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 92 insertions(+), 43 deletions(-) create mode 100644 libavcodec/h264idct.c diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 3abb440376..3039d868ae 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -1113,6 +1113,7 @@ typedef struct AVCodecContext { #define FF_IDCT_ALTIVEC 8 #define FF_IDCT_SH4 9 #define FF_IDCT_SIMPLEARM 10 +#define FF_IDCT_H264 11 /** * slice count. diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 3db04c00a7..1fc698a62f 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -3434,8 +3434,13 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) #endif //CONFIG_ENCODERS if(avctx->lowres==1){ - c->idct_put= ff_jref_idct4_put; - c->idct_add= ff_jref_idct4_add; + if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO){ + c->idct_put= ff_jref_idct4_put; + c->idct_add= ff_jref_idct4_add; + }else{ + c->idct_put= ff_h264_lowres_idct_put_c; + c->idct_add= ff_h264_lowres_idct_add_c; + } c->idct = j_rev_dct4; c->idct_permutation_type= FF_NO_IDCT_PERM; }else if(avctx->lowres==2){ @@ -3462,6 +3467,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) } } + c->h264_idct_add= ff_h264_idct_add_c; + /* VP3 DSP support */ c->vp3_dsp_init = vp3_dsp_init_c; c->vp3_idct = vp3_idct_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index fd02e4e035..43f113e1a0 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -50,6 +50,10 @@ void ff_fdct_mmx(DCTELEM *block); void ff_fdct_mmx2(DCTELEM *block); void ff_fdct_sse2(DCTELEM *block); +void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride); +void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); +void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); + /* encoding scans */ extern const uint8_t ff_alternate_horizontal_scan[64]; extern const uint8_t ff_alternate_vertical_scan[64]; @@ -330,7 +334,8 @@ typedef struct DSPContext { */ void (*vp3_idct)(int16_t *input_data, int16_t *dequant_matrix, int coeff_count, DCTELEM *output_samples); - + + void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride); } DSPContext; void dsputil_static_init(void); diff --git a/libavcodec/h264.c b/libavcodec/h264.c index c8e87a1585..c36e3843f7 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -1323,40 +1323,6 @@ static inline int get_chroma_qp(H264Context *h, int qscale){ } -/** - * - */ -static void h264_add_idct_c(uint8_t *dst, DCTELEM *block, int stride){ - int i; - uint8_t *cm = cropTbl + MAX_NEG_CROP; - - block[0] += 32; - - for(i=0; i<4; i++){ - const int z0= block[0 + 4*i] + block[2 + 4*i]; - const int z1= block[0 + 4*i] - block[2 + 4*i]; - const int z2= (block[1 + 4*i]>>1) - block[3 + 4*i]; - const int z3= block[1 + 4*i] + (block[3 + 4*i]>>1); - - block[0 + 4*i]= z0 + z3; - block[1 + 4*i]= z1 + z2; - block[2 + 4*i]= z1 - z2; - block[3 + 4*i]= z0 - z3; - } - - for(i=0; i<4; i++){ - const int z0= block[i + 4*0] + block[i + 4*2]; - const int z1= block[i + 4*0] - block[i + 4*2]; - const int z2= (block[i + 4*1]>>1) - block[i + 4*3]; - const int z3= block[i + 4*1] + (block[i + 4*3]>>1); - - dst[i + 0*stride]= cm[ dst[i + 0*stride] + ((z0 + z3) >> 6) ]; - dst[i + 1*stride]= cm[ dst[i + 1*stride] + ((z1 + z2) >> 6) ]; - dst[i + 2*stride]= cm[ dst[i + 2*stride] + ((z1 - z2) >> 6) ]; - dst[i + 3*stride]= cm[ dst[i + 3*stride] + ((z0 - z3) >> 6) ]; - } -} - #if 0 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){ int i; @@ -2440,7 +2406,7 @@ static void hl_decode_mb(H264Context *h){ h->pred4x4[ dir ](ptr, topright, linesize); if(h->non_zero_count_cache[ scan8[i] ]){ if(s->codec_id == CODEC_ID_H264) - h264_add_idct_c(ptr, h->mb + i*16, linesize); + s->dsp.h264_idct_add(ptr, h->mb + i*16, linesize); else svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); } @@ -2467,7 +2433,7 @@ static void hl_decode_mb(H264Context *h){ for(i=0; i<16; i++){ if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below uint8_t * const ptr= dest_y + h->block_offset[i]; - h264_add_idct_c(ptr, h->mb + i*16, linesize); + s->dsp.h264_idct_add(ptr, h->mb + i*16, linesize); } } }else{ @@ -2487,13 +2453,13 @@ static void hl_decode_mb(H264Context *h){ for(i=16; i<16+4; i++){ if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ uint8_t * const ptr= dest_cb + h->block_offset[i]; - h264_add_idct_c(ptr, h->mb + i*16, uvlinesize); + s->dsp.h264_idct_add(ptr, h->mb + i*16, uvlinesize); } } for(i=20; i<20+4; i++){ if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ uint8_t * const ptr= dest_cr + h->block_offset[i]; - h264_add_idct_c(ptr, h->mb + i*16, uvlinesize); + s->dsp.h264_idct_add(ptr, h->mb + i*16, uvlinesize); } } }else{ @@ -3232,7 +3198,7 @@ static inline int get_level_prefix(GetBitContext *gb){ log= 32 - av_log2(buf); #ifdef TRACE print_bin(buf>>(32-log), log); - printf("%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__); + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__); #endif LAST_SKIP_BITS(re, gb, log); @@ -5975,7 +5941,7 @@ int main(){ } // printf("\n"); - h264_add_idct_c(ref, block, 4); + s->dsp.h264_idct_add(ref, block, 4); /* for(j=0; j<16; j++){ printf("%d ", ref[j]); } diff --git a/libavcodec/h264idct.c b/libavcodec/h264idct.c new file mode 100644 index 0000000000..551197d37e --- /dev/null +++ b/libavcodec/h264idct.c @@ -0,0 +1,70 @@ +/* + * H.264 IDCT + * Copyright (c) 2004 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/** + * @file h264-idct.c + * H.264 IDCT. + * @author Michael Niedermayer + */ + +#include "dsputil.h" + +static always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + block[0] += 1<<(shift-1); + + for(i=0; i<4; i++){ + const int z0= block[0 + block_stride*i] + block[2 + block_stride*i]; + const int z1= block[0 + block_stride*i] - block[2 + block_stride*i]; + const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i]; + const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1); + + block[0 + block_stride*i]= z0 + z3; + block[1 + block_stride*i]= z1 + z2; + block[2 + block_stride*i]= z1 - z2; + block[3 + block_stride*i]= z0 - z3; + } + + for(i=0; i<4; i++){ + const int z0= block[i + block_stride*0] + block[i + block_stride*2]; + const int z1= block[i + block_stride*0] - block[i + block_stride*2]; + const int z2= (block[i + block_stride*1]>>1) - block[i + block_stride*3]; + const int z3= block[i + block_stride*1] + (block[i + block_stride*3]>>1); + + dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ]; + dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ]; + dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ]; + dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ]; + } +} + +void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){ + idct_internal(dst, block, stride, 4, 6, 1); +} + +void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){ + idct_internal(dst, block, stride, 8, 3, 1); +} + +void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){ + idct_internal(dst, block, stride, 8, 3, 0); +}