From 70d54392f5015b9c6594fcae558f59f952501e3b Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Sun, 22 Apr 2012 21:37:54 +0200 Subject: [PATCH] lowres2 support. The new lowres support is limited to decoders where lowres decoding is possible in high quality. I was not able to measure any speed difference, but if one is found the 2-3 lines that might affect speed can be made compile time conditional Signed-off-by: Michael Niedermayer --- ffplay.c | 4 +- libavcodec/alpha/dsputil_alpha.c | 2 +- libavcodec/arm/dsputil_init_arm.c | 2 +- libavcodec/arm/dsputil_init_armv5te.c | 2 +- libavcodec/arm/dsputil_init_armv6.c | 2 +- libavcodec/arm/dsputil_init_neon.c | 2 +- libavcodec/avcodec.h | 6 +- libavcodec/dsputil.c | 122 ++++++++++++++- libavcodec/dsputil.h | 3 + libavcodec/dv.c | 8 +- libavcodec/dvdec.c | 3 +- libavcodec/error_resilience.c | 2 +- libavcodec/jrevdct.c | 213 ++++++++++++++++++++++++++ libavcodec/libopenjpegdec.c | 1 + libavcodec/mjpegbdec.c | 1 + libavcodec/mjpegdec.c | 6 +- libavcodec/mpegvideo.c | 38 ++--- libavcodec/mpegvideo.h | 2 +- libavcodec/options_table.h | 1 + libavcodec/ppc/dsputil_ppc.c | 2 +- libavcodec/sp5xdec.c | 1 + libavcodec/utils.c | 7 +- libavcodec/x86/dsputil_mmx.c | 2 +- 23 files changed, 387 insertions(+), 45 deletions(-) diff --git a/ffplay.c b/ffplay.c index 8ee9b0d2cd..4489b34cca 100644 --- a/ffplay.c +++ b/ffplay.c @@ -260,6 +260,7 @@ static int64_t duration = AV_NOPTS_VALUE; static int workaround_bugs = 1; static int fast = 0; static int genpts = 0; +static int lowres = 0; static int idct = FF_IDCT_AUTO; static enum AVDiscard skip_frame = AVDISCARD_DEFAULT; static enum AVDiscard skip_idct = AVDISCARD_DEFAULT; @@ -1324,7 +1325,7 @@ static void alloc_picture(void *opaque) /* SDL allocates a buffer smaller than requested if the video * overlay hardware is unable to support the requested size. */ fprintf(stderr, "Error: the video system does not support an image\n" - "size of %dx%d pixels. Try using -vf \"scale=w:h\"\n" + "size of %dx%d pixels. Try using -lowres or -vf \"scale=w:h\"\n" "to reduce the image size.\n", vp->width, vp->height ); do_exit(is); } @@ -3084,6 +3085,7 @@ static const OptionDef options[] = { { "fast", OPT_BOOL | OPT_EXPERT, { (void*)&fast }, "non spec compliant optimizations", "" }, { "genpts", OPT_BOOL | OPT_EXPERT, { (void*)&genpts }, "generate pts", "" }, { "drp", OPT_INT | HAS_ARG | OPT_EXPERT, { (void*)&decoder_reorder_pts }, "let decoder reorder pts 0=off 1=on -1=auto", ""}, + { "lowres", OPT_INT | HAS_ARG | OPT_EXPERT, { (void*)&lowres }, "", "" }, { "skiploop", OPT_INT | HAS_ARG | OPT_EXPERT, { (void*)&skip_loop_filter }, "", "" }, { "skipframe", OPT_INT | HAS_ARG | OPT_EXPERT, { (void*)&skip_frame }, "", "" }, { "skipidct", OPT_INT | HAS_ARG | OPT_EXPERT, { (void*)&skip_idct }, "", "" }, diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c index ed9416ea8c..db2464b02f 100644 --- a/libavcodec/alpha/dsputil_alpha.c +++ b/libavcodec/alpha/dsputil_alpha.c @@ -336,7 +336,7 @@ void ff_dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) put_pixels_clamped_axp_p = c->put_pixels_clamped; add_pixels_clamped_axp_p = c->add_pixels_clamped; - if (avctx->bits_per_raw_sample <= 8 && + if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 && (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLEALPHA)) { c->idct_put = ff_simple_idct_put_axp; diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c index 2bc97bfa4e..03df6b29d0 100644 --- a/libavcodec/arm/dsputil_init_arm.c +++ b/libavcodec/arm/dsputil_init_arm.c @@ -82,7 +82,7 @@ void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) ff_put_pixels_clamped = c->put_pixels_clamped; ff_add_pixels_clamped = c->add_pixels_clamped; - if (avctx->bits_per_raw_sample <= 8) { + if (!avctx->lowres && avctx->bits_per_raw_sample <= 8) { if(avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_ARM){ c->idct_put = j_rev_dct_arm_put; diff --git a/libavcodec/arm/dsputil_init_armv5te.c b/libavcodec/arm/dsputil_init_armv5te.c index 6b2300b3ea..9fedb7a123 100644 --- a/libavcodec/arm/dsputil_init_armv5te.c +++ b/libavcodec/arm/dsputil_init_armv5te.c @@ -29,7 +29,7 @@ void ff_prefetch_arm(void *mem, int stride, int h); av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx) { - if (avctx->bits_per_raw_sample <= 8 && + if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 && (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLEARMV5TE)) { c->idct_put = ff_simple_idct_put_armv5te; diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c index 8401aaceaa..cd5744d3b3 100644 --- a/libavcodec/arm/dsputil_init_armv6.c +++ b/libavcodec/arm/dsputil_init_armv6.c @@ -74,7 +74,7 @@ av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx) { const int high_bit_depth = avctx->bits_per_raw_sample > 8; - if (avctx->bits_per_raw_sample <= 8 && + if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 && (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) { c->idct_put = ff_simple_idct_put_armv6; diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 1aa39de7e7..4c73276a95 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -182,7 +182,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) { const int high_bit_depth = avctx->bits_per_raw_sample > 8; - if (avctx->bits_per_raw_sample <= 8) { + if (!avctx->lowres && avctx->bits_per_raw_sample <= 8) { if (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLENEON) { c->idct_put = ff_simple_idct_put_neon; diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 7887f7fdc9..c3a5192621 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -1451,7 +1451,7 @@ typedef struct AVCodecContext { int width, height; /** - * Bitstream width / height, may be different from width/height. + * Bitstream width / height, may be different from width/height if lowres enabled. * - encoding: unused * - decoding: Set by user before init if known. Codec should override / dynamically change if needed. */ @@ -2649,7 +2649,7 @@ typedef struct AVCodecContext { * - encoding: unused * - decoding: Set by user. */ - attribute_deprecated int lowres; + int lowres; /** * the picture in the bitstream @@ -2920,7 +2920,7 @@ typedef struct AVCodec { const int *supported_samplerates; ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0 const enum AVSampleFormat *sample_fmts; ///< array of supported sample formats, or NULL if unknown, array is terminated by -1 const uint64_t *channel_layouts; ///< array of support channel layouts, or NULL if unknown. array is terminated by 0 - attribute_deprecated uint8_t max_lowres; ///< maximum value for lowres supported by the decoder + uint8_t max_lowres; ///< maximum value for lowres supported by the decoder const AVClass *priv_class; ///< AVClass for the private context const AVProfile *profiles; ///< array of recognized profiles, or NULL if unknown, array is terminated by {FF_PROFILE_UNKNOWN} diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 553fb8c1cd..a13737d77e 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -381,6 +381,38 @@ void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, } } +static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + + /* read the pixels */ + for(i=0;i<4;i++) { + pixels[0] = av_clip_uint8(block[0]); + pixels[1] = av_clip_uint8(block[1]); + pixels[2] = av_clip_uint8(block[2]); + pixels[3] = av_clip_uint8(block[3]); + + pixels += line_size; + block += 8; + } +} + +static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + + /* read the pixels */ + for(i=0;i<2;i++) { + pixels[0] = av_clip_uint8(block[0]); + pixels[1] = av_clip_uint8(block[1]); + + pixels += line_size; + block += 8; + } +} + void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size) @@ -422,6 +454,36 @@ void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, } } +static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + + /* read the pixels */ + for(i=0;i<4;i++) { + pixels[0] = av_clip_uint8(pixels[0] + block[0]); + pixels[1] = av_clip_uint8(pixels[1] + block[1]); + pixels[2] = av_clip_uint8(pixels[2] + block[2]); + pixels[3] = av_clip_uint8(pixels[3] + block[3]); + pixels += line_size; + block += 8; + } +} + +static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + + /* read the pixels */ + for(i=0;i<2;i++) { + pixels[0] = av_clip_uint8(pixels[0] + block[0]); + pixels[1] = av_clip_uint8(pixels[1] + block[1]); + pixels += line_size; + block += 8; + } +} + static int sum_abs_dctelem_c(DCTELEM *block) { int sum=0, i; @@ -2684,6 +2746,37 @@ static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) ff_add_pixels_clamped_c(block, dest, line_size); } +static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_j_rev_dct4 (block); + put_pixels_clamped4_c(block, dest, line_size); +} +static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_j_rev_dct4 (block); + add_pixels_clamped4_c(block, dest, line_size); +} + +static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_j_rev_dct2 (block); + put_pixels_clamped2_c(block, dest, line_size); +} +static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_j_rev_dct2 (block); + add_pixels_clamped2_c(block, dest, line_size); +} + +static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + dest[0] = av_clip_uint8((block[0] + 4)>>3); +} +static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3)); +} + static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } /* init static data */ @@ -2750,12 +2843,28 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) } #endif //CONFIG_ENCODERS - if (avctx->bits_per_raw_sample == 10) { - c->idct_put = ff_simple_idct_put_10; - c->idct_add = ff_simple_idct_add_10; - c->idct = ff_simple_idct_10; - c->idct_permutation_type = FF_NO_IDCT_PERM; - } else { + if(avctx->lowres==1){ + c->idct_put= ff_jref_idct4_put; + c->idct_add= ff_jref_idct4_add; + c->idct = ff_j_rev_dct4; + c->idct_permutation_type= FF_NO_IDCT_PERM; + }else if(avctx->lowres==2){ + c->idct_put= ff_jref_idct2_put; + c->idct_add= ff_jref_idct2_add; + c->idct = ff_j_rev_dct2; + c->idct_permutation_type= FF_NO_IDCT_PERM; + }else if(avctx->lowres==3){ + c->idct_put= ff_jref_idct1_put; + c->idct_add= ff_jref_idct1_add; + c->idct = ff_j_rev_dct1; + c->idct_permutation_type= FF_NO_IDCT_PERM; + }else{ + if (avctx->bits_per_raw_sample == 10) { + c->idct_put = ff_simple_idct_put_10; + c->idct_add = ff_simple_idct_add_10; + c->idct = ff_simple_idct_10; + c->idct_permutation_type = FF_NO_IDCT_PERM; + } else { if(avctx->idct_algo==FF_IDCT_INT){ c->idct_put= ff_jref_idct_put; c->idct_add= ff_jref_idct_add; @@ -2786,6 +2895,7 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) c->idct = ff_simple_idct_8; c->idct_permutation_type= FF_NO_IDCT_PERM; } + } } c->diff_pixels = diff_pixels_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 60bc53e5e8..34d659d532 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -46,6 +46,9 @@ void ff_fdct248_islow_8(DCTELEM *data); void ff_fdct248_islow_10(DCTELEM *data); void ff_j_rev_dct (DCTELEM *data); +void ff_j_rev_dct4 (DCTELEM *data); +void ff_j_rev_dct2 (DCTELEM *data); +void ff_j_rev_dct1 (DCTELEM *data); void ff_wmv2_idct_c(DCTELEM *data); void ff_fdct_mmx(DCTELEM *block); diff --git a/libavcodec/dv.c b/libavcodec/dv.c index e13026628d..5bb84793b5 100644 --- a/libavcodec/dv.c +++ b/libavcodec/dv.c @@ -311,7 +311,13 @@ av_cold int ff_dvvideo_init(AVCodecContext *avctx) /* 248DCT setup */ s->fdct[1] = dsp.fdct248; s->idct_put[1] = ff_simple_idct248_put; // FIXME: need to add it to DSP - memcpy(s->dv_zigzag[1], ff_zigzag248_direct, 64); + if (avctx->lowres){ + for (i = 0; i < 64; i++){ + int j = ff_zigzag248_direct[i]; + s->dv_zigzag[1][i] = dsp.idct_permutation[(j & 7) + (j & 8) * 4 + (j & 48) / 2]; + } + }else + memcpy(s->dv_zigzag[1], ff_zigzag248_direct, 64); avctx->coded_frame = &s->picture; s->avctx = avctx; diff --git a/libavcodec/dvdec.c b/libavcodec/dvdec.c index f0fa5e9023..4e09d0dd9a 100644 --- a/libavcodec/dvdec.c +++ b/libavcodec/dvdec.c @@ -144,7 +144,7 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg) LOCAL_ALIGNED_16(DCTELEM, sblock, [5*DV_MAX_BPM], [64]); LOCAL_ALIGNED_16(uint8_t, mb_bit_buffer, [ 80 + FF_INPUT_BUFFER_PADDING_SIZE]); /* allow some slack */ LOCAL_ALIGNED_16(uint8_t, vs_bit_buffer, [5*80 + FF_INPUT_BUFFER_PADDING_SIZE]); /* allow some slack */ - const int log2_blocksize = 3; + const int log2_blocksize = 3-s->avctx->lowres; int is_field_mode[5]; assert((((int)mb_bit_buffer) & 7) == 0); @@ -382,5 +382,6 @@ AVCodec ff_dvvideo_decoder = { .close = dvvideo_close, .decode = dvvideo_decode_frame, .capabilities = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS, + .max_lowres = 3, .long_name = NULL_IF_CONFIG_SMALL("DV (Digital Video)"), }; diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c index a8ea647af9..2691109e49 100644 --- a/libavcodec/error_resilience.c +++ b/libavcodec/error_resilience.c @@ -907,7 +907,7 @@ void ff_er_frame_end(MpegEncContext *s) /* We do not support ER of field pictures yet, * though it should not crash if enabled. */ - if (!s->err_recognition || s->error_count == 0 || + if (!s->err_recognition || s->error_count == 0 || s->avctx->lowres || s->avctx->hwaccel || s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU || s->picture_structure != PICT_FRAME || diff --git a/libavcodec/jrevdct.c b/libavcodec/jrevdct.c index e33558f825..395eb8c638 100644 --- a/libavcodec/jrevdct.c +++ b/libavcodec/jrevdct.c @@ -940,3 +940,216 @@ void ff_j_rev_dct(DCTBLOCK data) dataptr++; /* advance pointer to next column */ } } + +#undef DCTSIZE +#define DCTSIZE 4 +#define DCTSTRIDE 8 + +void ff_j_rev_dct4(DCTBLOCK data) +{ + int32_t tmp0, tmp1, tmp2, tmp3; + int32_t tmp10, tmp11, tmp12, tmp13; + int32_t z1; + int32_t d0, d2, d4, d6; + register DCTELEM *dataptr; + int rowctr; + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + data[0] += 4; + + dataptr = data; + + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Due to quantization, we will usually find that many of the input + * coefficients are zero, especially the AC terms. We can exploit this + * by short-circuiting the IDCT calculation for any row in which all + * the AC terms are zero. In that case each output is equal to the + * DC coefficient (with scale factor as needed). + * With typical images and quantization tables, half or more of the + * row DCT calculations can be simplified this way. + */ + + register int *idataptr = (int*)dataptr; + + d0 = dataptr[0]; + d2 = dataptr[1]; + d4 = dataptr[2]; + d6 = dataptr[3]; + + if ((d2 | d4 | d6) == 0) { + /* AC terms all zero */ + if (d0) { + /* Compute a 32 bit value to assign. */ + DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); + register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); + + idataptr[0] = v; + idataptr[1] = v; + } + + dataptr += DCTSTRIDE; /* advance pointer to next row */ + continue; + } + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + if (d6) { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } + } else { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = (d0 + d4) << CONST_BITS; + tmp11 = tmp12 = (d0 - d4) << CONST_BITS; + } + } + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); + dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); + dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); + + dataptr += DCTSTRIDE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + dataptr = data; + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Columns of zeroes can be exploited in the same way as we did with rows. + * However, the row calculation has created many nonzero AC terms, so the + * simplification applies less often (typically 5% to 10% of the time). + * On machines with very fast multiplication, it's possible that the + * test takes more time than it's worth. In that case this section + * may be commented out. + */ + + d0 = dataptr[DCTSTRIDE*0]; + d2 = dataptr[DCTSTRIDE*1]; + d4 = dataptr[DCTSTRIDE*2]; + d6 = dataptr[DCTSTRIDE*3]; + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + if (d6) { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } + } else { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = (d0 + d4) << CONST_BITS; + tmp11 = tmp12 = (d0 - d4) << CONST_BITS; + } + } + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3); + dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3); + dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3); + dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3); + + dataptr++; /* advance pointer to next column */ + } +} + +void ff_j_rev_dct2(DCTBLOCK data){ + int d00, d01, d10, d11; + + data[0] += 4; + d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE]; + d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE]; + d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE]; + d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE]; + + data[0+0*DCTSTRIDE]= (d00 + d10)>>3; + data[1+0*DCTSTRIDE]= (d01 + d11)>>3; + data[0+1*DCTSTRIDE]= (d00 - d10)>>3; + data[1+1*DCTSTRIDE]= (d01 - d11)>>3; +} + +void ff_j_rev_dct1(DCTBLOCK data){ + data[0] = (data[0] + 4)>>3; +} + +#undef FIX +#undef CONST_BITS diff --git a/libavcodec/libopenjpegdec.c b/libavcodec/libopenjpegdec.c index 5b09b34634..abfe7308e7 100644 --- a/libavcodec/libopenjpegdec.c +++ b/libavcodec/libopenjpegdec.c @@ -286,6 +286,7 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx, } ctx->dec_params.cp_limit_decoding = NO_LIMITATION; + ctx->dec_params.cp_reduce = avctx->lowres; // Tie decoder with decoding parameters opj_setup_decoder(dec, &ctx->dec_params); stream = opj_cio_open((opj_common_ptr)dec, buf, buf_size); diff --git a/libavcodec/mjpegbdec.c b/libavcodec/mjpegbdec.c index 540e984af5..c457b98081 100644 --- a/libavcodec/mjpegbdec.c +++ b/libavcodec/mjpegbdec.c @@ -161,5 +161,6 @@ AVCodec ff_mjpegb_decoder = { .close = ff_mjpeg_decode_end, .decode = mjpegb_decode_frame, .capabilities = CODEC_CAP_DR1, + .max_lowres = 3, .long_name = NULL_IF_CONFIG_SMALL("Apple MJPEG-B"), }; diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c index 330f4f5892..a841384af2 100644 --- a/libavcodec/mjpegdec.c +++ b/libavcodec/mjpegdec.c @@ -1003,8 +1003,8 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, x = 0; y = 0; for (j = 0; j < n; j++) { - block_offset = ((linesize[c] * (v * mb_y + y) * 8) + - (h * mb_x + x) * 8); + block_offset = (((linesize[c] * (v * mb_y + y) * 8) + + (h * mb_x + x) * 8) >> s->avctx->lowres); if (s->interlaced && s->bottom_field) block_offset += linesize[c] >> 1; @@ -1814,6 +1814,7 @@ AVCodec ff_mjpeg_decoder = { .close = ff_mjpeg_decode_end, .decode = ff_mjpeg_decode_frame, .capabilities = CODEC_CAP_DR1, + .max_lowres = 3, .long_name = NULL_IF_CONFIG_SMALL("MJPEG (Motion JPEG)"), .priv_class = &mjpegdec_class, }; @@ -1827,5 +1828,6 @@ AVCodec ff_thp_decoder = { .close = ff_mjpeg_decode_end, .decode = ff_mjpeg_decode_frame, .capabilities = CODEC_CAP_DR1, + .max_lowres = 3, .long_name = NULL_IF_CONFIG_SMALL("Nintendo Gamecube THP video"), }; diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index d8c28e35dc..0383992975 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -1938,8 +1938,8 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], qpel_mc_func (*op_qpix)[16]; const int linesize = s->current_picture.f.linesize[0]; //not s->linesize as this would be wrong for field pics const int uvlinesize = s->current_picture.f.linesize[1]; - const int readable= s->pict_type != AV_PICTURE_TYPE_B || s->encoding || s->avctx->draw_horiz_band; - const int block_size = 8; + const int readable= s->pict_type != AV_PICTURE_TYPE_B || s->encoding || s->avctx->draw_horiz_band || s->avctx->lowres; + const int block_size= 8 >> s->avctx->lowres; /* avoid copy if macroblock skipped in last frame too */ /* skip only during decoding as we might trash the buffers during encoding a bit */ @@ -2016,9 +2016,9 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], if(s->encoding || !( s->msmpeg4_version || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){ add_dequant_dct(s, block[0], 0, dest_y , dct_linesize, s->qscale); - add_dequant_dct(s, block[1], 1, dest_y + block_size, dct_linesize, s->qscale); + add_dequant_dct(s, block[1], 1, dest_y + 8 , dct_linesize, s->qscale); add_dequant_dct(s, block[2], 2, dest_y + dct_offset , dct_linesize, s->qscale); - add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale); + add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8 , dct_linesize, s->qscale); if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if (s->chroma_y_shift){ @@ -2035,9 +2035,9 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], } } else if(is_mpeg12 || (s->codec_id != CODEC_ID_WMV2)){ add_dct(s, block[0], 0, dest_y , dct_linesize); - add_dct(s, block[1], 1, dest_y + block_size, dct_linesize); + add_dct(s, block[1], 1, dest_y + 8 , dct_linesize); add_dct(s, block[2], 2, dest_y + dct_offset , dct_linesize); - add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize); + add_dct(s, block[3], 3, dest_y + dct_offset + 8 , dct_linesize); if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if(s->chroma_y_shift){//Chroma420 @@ -2046,17 +2046,17 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], }else{ //chroma422 dct_linesize = uvlinesize << s->interlaced_dct; - dct_offset = s->interlaced_dct ? uvlinesize : uvlinesize*block_size; + dct_offset = s->interlaced_dct ? uvlinesize : uvlinesize*8; add_dct(s, block[4], 4, dest_cb, dct_linesize); add_dct(s, block[5], 5, dest_cr, dct_linesize); add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize); add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize); if(!s->chroma_x_shift){//Chroma444 - add_dct(s, block[8], 8, dest_cb+block_size, dct_linesize); - add_dct(s, block[9], 9, dest_cr+block_size, dct_linesize); - add_dct(s, block[10], 10, dest_cb+block_size+dct_offset, dct_linesize); - add_dct(s, block[11], 11, dest_cr+block_size+dct_offset, dct_linesize); + add_dct(s, block[8], 8, dest_cb+8, dct_linesize); + add_dct(s, block[9], 9, dest_cr+8, dct_linesize); + add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize); + add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize); } } }//fi gray @@ -2087,9 +2087,9 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], } }else{ s->dsp.idct_put(dest_y , dct_linesize, block[0]); - s->dsp.idct_put(dest_y + block_size, dct_linesize, block[1]); + s->dsp.idct_put(dest_y + 8 , dct_linesize, block[1]); s->dsp.idct_put(dest_y + dct_offset , dct_linesize, block[2]); - s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]); + s->dsp.idct_put(dest_y + dct_offset + 8 , dct_linesize, block[3]); if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if(s->chroma_y_shift){ @@ -2098,17 +2098,17 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], }else{ dct_linesize = uvlinesize << s->interlaced_dct; - dct_offset = s->interlaced_dct? uvlinesize : uvlinesize*block_size; + dct_offset = s->interlaced_dct? uvlinesize : uvlinesize*8; s->dsp.idct_put(dest_cb, dct_linesize, block[4]); s->dsp.idct_put(dest_cr, dct_linesize, block[5]); s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]); s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]); if(!s->chroma_x_shift){//Chroma444 - s->dsp.idct_put(dest_cb + block_size, dct_linesize, block[8]); - s->dsp.idct_put(dest_cr + block_size, dct_linesize, block[9]); - s->dsp.idct_put(dest_cb + block_size + dct_offset, dct_linesize, block[10]); - s->dsp.idct_put(dest_cr + block_size + dct_offset, dct_linesize, block[11]); + s->dsp.idct_put(dest_cb + 8, dct_linesize, block[8]); + s->dsp.idct_put(dest_cr + 8, dct_linesize, block[9]); + s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]); + s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]); } } }//gray @@ -2204,7 +2204,7 @@ void ff_draw_horiz_band(MpegEncContext *s, int y, int h){ void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename const int linesize = s->current_picture.f.linesize[0]; //not s->linesize as this would be wrong for field pics const int uvlinesize = s->current_picture.f.linesize[1]; - const int mb_size= 4; + const int mb_size= 4 - s->avctx->lowres; s->block_index[0]= s->b8_stride*(s->mb_y*2 ) - 2 + s->mb_x*2; s->block_index[1]= s->b8_stride*(s->mb_y*2 ) - 1 + s->mb_x*2; diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index b1fffda2d0..fc3038eafa 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -793,7 +793,7 @@ extern const enum PixelFormat ff_pixfmt_list_420[]; extern const enum PixelFormat ff_hwaccel_pixfmt_list_420[]; static inline void ff_update_block_index(MpegEncContext *s){ - const int block_size = 8; + const int block_size= 8 >> s->avctx->lowres; s->block_index[0]+=2; s->block_index[1]+=2; diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h index acfbbff2d8..c4d4277b04 100644 --- a/libavcodec/options_table.h +++ b/libavcodec/options_table.h @@ -326,6 +326,7 @@ static const AVOption options[]={ {"dts_hd_ma", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_DTS_HD_MA }, INT_MIN, INT_MAX, A|E, "profile"}, {"level", NULL, OFFSET(level), AV_OPT_TYPE_INT, {.dbl = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "level"}, {"unknown", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "level"}, +{"lowres", "decode at 1= 1/2, 2=1/4, 3=1/8 resolutions", OFFSET(lowres), AV_OPT_TYPE_INT, {.dbl = 0 }, 0, INT_MAX, V|A|D}, {"skip_threshold", "frame skip threshold", OFFSET(frame_skip_threshold), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E}, {"skip_factor", "frame skip factor", OFFSET(frame_skip_factor), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E}, {"skip_exp", "frame skip exponent", OFFSET(frame_skip_exp), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E}, diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index 0b89277f50..195aa20906 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -187,7 +187,7 @@ void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) } #endif //CONFIG_ENCODERS - if (avctx->bits_per_raw_sample <= 8) { + if (avctx->lowres == 0 && avctx->bits_per_raw_sample <= 8) { if ((avctx->idct_algo == FF_IDCT_AUTO) || (avctx->idct_algo == FF_IDCT_ALTIVEC)) { c->idct_put = ff_idct_put_altivec; diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c index 88e1ce6137..4aca0ccbd6 100644 --- a/libavcodec/sp5xdec.c +++ b/libavcodec/sp5xdec.c @@ -102,6 +102,7 @@ AVCodec ff_sp5x_decoder = { .close = ff_mjpeg_decode_end, .decode = sp5x_decode_frame, .capabilities = CODEC_CAP_DR1, + .max_lowres = 3, .long_name = NULL_IF_CONFIG_SMALL("Sunplus JPEG (SP5X)"), }; diff --git a/libavcodec/utils.c b/libavcodec/utils.c index 5af3515f09..bd97431bb2 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -149,8 +149,8 @@ unsigned avcodec_get_edge_width(void) void avcodec_set_dimensions(AVCodecContext *s, int width, int height){ s->coded_width = width; s->coded_height= height; - s->width = width; - s->height = height; + s->width = -((-width )>>s->lowres); + s->height= -((-height)>>s->lowres); } #define INTERNAL_BUFFER_SIZE (32+1) @@ -239,8 +239,9 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, *width = FFALIGN(*width , w_align); *height= FFALIGN(*height, h_align); - if (s->codec_id == CODEC_ID_H264) + if(s->codec_id == CODEC_ID_H264 || s->lowres) *height+=2; // some of the optimized chroma MC reads one line too much + // which is also done in mpeg decoders with lowres > 0 for (i = 0; i < 4; i++) linesize_align[i] = STRIDE_ALIGN; diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 7d9a15282d..4c96d62525 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -3203,7 +3203,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) if (mm_flags & AV_CPU_FLAG_MMX) { const int idct_algo = avctx->idct_algo; - if (avctx->bits_per_raw_sample <= 8) { + if (avctx->lowres == 0 && avctx->bits_per_raw_sample <= 8) { if (idct_algo == FF_IDCT_AUTO || idct_algo == FF_IDCT_SIMPLEMMX) { c->idct_put = ff_simple_idct_put_mmx; c->idct_add = ff_simple_idct_add_mmx;