diff --git a/common.mak b/common.mak index 66bbed4a40..0f8392a035 100644 --- a/common.mak +++ b/common.mak @@ -27,20 +27,26 @@ $(eval INSTALL = @$(call ECHO,INSTALL,$$(^:$(SRC_DIR)/%=%)); $(INSTALL)) endif # NASM requires -I path terminated with / -IFLAGS := -I. -I$(SRC_PATH)/ -CPPFLAGS := $(IFLAGS) $(CPPFLAGS) -CFLAGS += $(ECFLAGS) -YASMFLAGS += $(IFLAGS) -Pconfig.asm - +IFLAGS := -I. -I$(SRC_PATH)/ +CPPFLAGS := $(IFLAGS) $(CPPFLAGS) +CFLAGS += $(ECFLAGS) +CCFLAGS = $(CFLAGS) +YASMFLAGS += $(IFLAGS) -Pconfig.asm HOSTCFLAGS += $(IFLAGS) +define COMPILE + $($(1)DEP) + $($(1)) $(CPPFLAGS) $($(1)FLAGS) $($(1)_DEPFLAGS) -c $($(1)_O) $< +endef + +COMPILE_C = $(call COMPILE,CC) +COMPILE_S = $(call COMPILE,AS) + %.o: %.c - $(CCDEP) - $(CC) $(CPPFLAGS) $(CFLAGS) $(CC_DEPFLAGS) -c $(CC_O) $< + $(COMPILE_C) %.o: %.S - $(ASDEP) - $(AS) $(CPPFLAGS) $(ASFLAGS) $(AS_DEPFLAGS) -c -o $@ $< + $(COMPILE_S) %.ho: %.h $(CC) $(CPPFLAGS) $(CFLAGS) -Wno-unused -c -o $@ -x c $< diff --git a/configure b/configure index af3c09f7c5..fb197aae55 100755 --- a/configure +++ b/configure @@ -1704,6 +1704,7 @@ SLIBNAME_WITH_VERSION='$(SLIBNAME).$(LIBVERSION)' SLIBNAME_WITH_MAJOR='$(SLIBNAME).$(LIBMAJOR)' LIB_INSTALL_EXTRA_CMD='$$(RANLIB) "$(LIBDIR)/$(LIBNAME)"' +AS_O='-o $@' CC_O='-o $@' host_cflags='-D_ISOC99_SOURCE -O3 -g' @@ -3346,6 +3347,7 @@ STRIP=$strip CPPFLAGS=$CPPFLAGS CFLAGS=$CFLAGS ASFLAGS=$ASFLAGS +AS_O=$CC_O CC_O=$CC_O LDFLAGS=$LDFLAGS FFSERVERLDFLAGS=$FFSERVERLDFLAGS diff --git a/libavcodec/fft-test.c b/libavcodec/fft-test.c index a676627de2..be105fe834 100644 --- a/libavcodec/fft-test.c +++ b/libavcodec/fft-test.c @@ -252,8 +252,9 @@ int main(int argc, char **argv) #if CONFIG_FFT_FLOAT RDFTContext r1, *r = &r1; DCTContext d1, *d = &d1; + int fft_size_2; #endif - int fft_nbits, fft_size, fft_size_2; + int fft_nbits, fft_size; double scale = 1.0; AVLFG prng; av_lfg_init(&prng, 1); @@ -292,7 +293,6 @@ int main(int argc, char **argv) } fft_size = 1 << fft_nbits; - fft_size_2 = fft_size >> 1; tab = av_malloc(fft_size * sizeof(FFTComplex)); tab1 = av_malloc(fft_size * sizeof(FFTComplex)); tab_ref = av_malloc(fft_size * sizeof(FFTComplex)); @@ -372,6 +372,7 @@ int main(int argc, char **argv) break; #if CONFIG_FFT_FLOAT case TRANSFORM_RDFT: + fft_size_2 = fft_size >> 1; if (do_inverse) { tab1[ 0].im = 0; tab1[fft_size_2].im = 0; diff --git a/libavcodec/h264.c b/libavcodec/h264.c index dd9f425c3b..8ecf9b4dbd 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -2488,7 +2488,8 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ s->dropable= h->nal_ref_idc == 0; - if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){ + /* FIXME: 2tap qpel isn't implemented for high bit depth. */ + if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){ s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab; s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab; }else{ diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c index 46abc54c49..226c2aef28 100644 --- a/libavcodec/h264_loopfilter.c +++ b/libavcodec/h264_loopfilter.c @@ -218,10 +218,11 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, int mb_type, left_type; int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh; int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); + int chroma444 = CHROMA444; mb_xy = h->mb_xy; - if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff || CHROMA444) { + if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) { ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); return; } @@ -264,16 +265,46 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h); } if(chroma){ - if(left_type){ - filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h); - filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h); + if(chroma444){ + if(left_type){ + filter_mb_edgev( &img_cb[4*0], linesize, bS4, qpc0, h); + filter_mb_edgev( &img_cr[4*0], linesize, bS4, qpc0, h); + } + if( IS_8x8DCT(mb_type) ) { + filter_mb_edgev( &img_cb[4*2], linesize, bS3, qpc, h); + filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h); + filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h); + filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h); + filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h); + filter_mb_edgeh( &img_cr[4*2*linesize], linesize, bS3, qpc, h); + } else { + filter_mb_edgev( &img_cb[4*1], linesize, bS3, qpc, h); + filter_mb_edgev( &img_cr[4*1], linesize, bS3, qpc, h); + filter_mb_edgev( &img_cb[4*2], linesize, bS3, qpc, h); + filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h); + filter_mb_edgev( &img_cb[4*3], linesize, bS3, qpc, h); + filter_mb_edgev( &img_cr[4*3], linesize, bS3, qpc, h); + filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h); + filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h); + filter_mb_edgeh( &img_cb[4*1*linesize], linesize, bS3, qpc, h); + filter_mb_edgeh( &img_cr[4*1*linesize], linesize, bS3, qpc, h); + filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h); + filter_mb_edgeh( &img_cr[4*2*linesize], linesize, bS3, qpc, h); + filter_mb_edgeh( &img_cb[4*3*linesize], linesize, bS3, qpc, h); + filter_mb_edgeh( &img_cr[4*3*linesize], linesize, bS3, qpc, h); + } + }else{ + if(left_type){ + filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h); + filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h); + } + filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h); + filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h); + filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); + filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h); + filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); + filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h); } - filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h); - filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h); - filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); - filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h); - filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); - filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h); } return; } else { @@ -301,9 +332,14 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, #define FILTER(hv,dir,edge)\ if(AV_RN64A(bS[dir][edge])) { \ filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\ - if(chroma && !(edge&1)) {\ - filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ - filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ + if(chroma){\ + if(chroma444){\ + filter_mb_edge##hv( &img_cb[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ + filter_mb_edge##hv( &img_cr[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ + } else if(!(edge&1)) {\ + filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ + filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ + }\ }\ } if(left_type) diff --git a/libavcodec/mpeg4videoenc.c b/libavcodec/mpeg4videoenc.c index bdff535a99..2760ca064f 100644 --- a/libavcodec/mpeg4videoenc.c +++ b/libavcodec/mpeg4videoenc.c @@ -296,10 +296,6 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n uint8_t *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb) { int i, last_non_zero; -#if 0 //variables for the outcommented version - int code, sign, last; -#endif - const RLTable *rl; uint32_t *bits_tab; uint8_t *len_tab; const int last_index = s->block_last_index[n]; @@ -309,20 +305,17 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n mpeg4_encode_dc(dc_pb, intra_dc, n); if(last_index<1) return; i = 1; - rl = &ff_mpeg4_rl_intra; bits_tab= uni_mpeg4_intra_rl_bits; len_tab = uni_mpeg4_intra_rl_len; } else { if(last_index<0) return; i = 0; - rl = &ff_h263_rl_inter; bits_tab= uni_mpeg4_inter_rl_bits; len_tab = uni_mpeg4_inter_rl_len; } /* AC coefs */ last_non_zero = i - 1; -#if 1 for (; i < last_index; i++) { int level = block[ scan_table[i] ]; if (level) { @@ -348,64 +341,6 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(1<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1); } } -#else - for (; i <= last_index; i++) { - const int slevel = block[ scan_table[i] ]; - if (slevel) { - int level; - int run = i - last_non_zero - 1; - last = (i == last_index); - sign = 0; - level = slevel; - if (level < 0) { - sign = 1; - level = -level; - } - code = get_rl_index(rl, last, run, level); - put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); - if (code == rl->n) { - int level1, run1; - level1 = level - rl->max_level[last][run]; - if (level1 < 1) - goto esc2; - code = get_rl_index(rl, last, run, level1); - if (code == rl->n) { - esc2: - put_bits(ac_pb, 1, 1); - if (level > MAX_LEVEL) - goto esc3; - run1 = run - rl->max_run[last][level] - 1; - if (run1 < 0) - goto esc3; - code = get_rl_index(rl, last, run1, level); - if (code == rl->n) { - esc3: - /* third escape */ - put_bits(ac_pb, 1, 1); - put_bits(ac_pb, 1, last); - put_bits(ac_pb, 6, run); - put_bits(ac_pb, 1, 1); - put_sbits(ac_pb, 12, slevel); - put_bits(ac_pb, 1, 1); - } else { - /* second escape */ - put_bits(ac_pb, 1, 0); - put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); - put_bits(ac_pb, 1, sign); - } - } else { - /* first escape */ - put_bits(ac_pb, 1, 0); - put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); - put_bits(ac_pb, 1, sign); - } - } else { - put_bits(ac_pb, 1, sign); - } - last_non_zero = i; - } - } -#endif } static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c index 197000beb9..08f10d2c18 100644 --- a/libswscale/ppc/swscale_altivec.c +++ b/libswscale/ppc/swscale_altivec.c @@ -222,8 +222,7 @@ yuv2yuvX_altivec_real(SwsContext *c, } static void hScale_altivec_real(int16_t *dst, int dstW, - const uint8_t *src, int srcW, - int xInc, const int16_t *filter, + const uint8_t *src, const int16_t *filter, const int16_t *filterPos, int filterSize) { register int i; diff --git a/libswscale/swscale.c b/libswscale/swscale.c index abbe375685..1f736558df 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1920,10 +1920,8 @@ static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1, } } - // bilinear / bicubic scaling static void hScale_c(int16_t *dst, int dstW, const uint8_t *src, - int srcW, int xInc, const int16_t *filter, const int16_t *filterPos, int filterSize) { @@ -2036,7 +2034,7 @@ static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth, int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; c->hScale16(dst, dstWidth, (const uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift); } else if (!c->hyscale_fast) { - c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); + c->hScale(dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize); } else { // fast bilinear upscale / crap downscale c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc); } @@ -2082,8 +2080,8 @@ static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *ds c->hScale16(dst1, dstWidth, (const uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift); c->hScale16(dst2, dstWidth, (const uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift); } else if (!c->hcscale_fast) { - c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); - c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); + c->hScale(dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize); + c->hScale(dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize); } else { // fast bilinear upscale / crap downscale c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc); } diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index c0f8e64d70..27de6b24d0 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -312,8 +312,8 @@ typedef struct SwsContext { const uint8_t *src1, const uint8_t *src2, int srcW, int xInc); - void (*hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, - int xInc, const int16_t *filter, const int16_t *filterPos, + void (*hScale)(int16_t *dst, int dstW, const uint8_t *src, + const int16_t *filter, const int16_t *filterPos, int filterSize); void (*hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 25399fadef..fdf82b2d06 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -1915,8 +1915,7 @@ static void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, #if !COMPILE_TEMPLATE_MMX2 // bilinear / bicubic scaling static void RENAME(hScale)(int16_t *dst, int dstW, - const uint8_t *src, int srcW, - int xInc, const int16_t *filter, + const uint8_t *src, const int16_t *filter, const int16_t *filterPos, int filterSize) { assert(filterSize % 4 == 0 && filterSize>0); diff --git a/subdir.mak b/subdir.mak index 8b3807378f..0cb6030501 100644 --- a/subdir.mak +++ b/subdir.mak @@ -11,16 +11,17 @@ all-$(CONFIG_STATIC): $(SUBDIR)$(LIBNAME) all-$(CONFIG_SHARED): $(SUBDIR)$(SLIBNAME) $(SUBDIR)%-test.o: $(SUBDIR)%-test.c - $(CC) $(CPPFLAGS) $(CFLAGS) -DTEST -c $(CC_O) $^ + $(COMPILE_C) $(SUBDIR)%-test.o: $(SUBDIR)%.c - $(CC) $(CPPFLAGS) $(CFLAGS) -DTEST -c $(CC_O) $^ + $(COMPILE_C) $(SUBDIR)x86/%.o: $(SUBDIR)x86/%.asm $(YASMDEP) $(YASMFLAGS) -I $( $(@:.o=.d) $(YASM) $(YASMFLAGS) -I $(