From 70d54392f5015b9c6594fcae558f59f952501e3b Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 22 Apr 2012 21:37:54 +0200
Subject: [PATCH] lowres2 support.

The new lowres support is limited to decoders where lowres decoding
is possible in high quality.
I was not able to measure any speed difference, but if one is found
the 2-3 lines that might affect speed can be made compile time conditional

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 ffplay.c                              |   4 +-
 libavcodec/alpha/dsputil_alpha.c      |   2 +-
 libavcodec/arm/dsputil_init_arm.c     |   2 +-
 libavcodec/arm/dsputil_init_armv5te.c |   2 +-
 libavcodec/arm/dsputil_init_armv6.c   |   2 +-
 libavcodec/arm/dsputil_init_neon.c    |   2 +-
 libavcodec/avcodec.h                  |   6 +-
 libavcodec/dsputil.c                  | 122 ++++++++++++++-
 libavcodec/dsputil.h                  |   3 +
 libavcodec/dv.c                       |   8 +-
 libavcodec/dvdec.c                    |   3 +-
 libavcodec/error_resilience.c         |   2 +-
 libavcodec/jrevdct.c                  | 213 ++++++++++++++++++++++++++
 libavcodec/libopenjpegdec.c           |   1 +
 libavcodec/mjpegbdec.c                |   1 +
 libavcodec/mjpegdec.c                 |   6 +-
 libavcodec/mpegvideo.c                |  38 ++---
 libavcodec/mpegvideo.h                |   2 +-
 libavcodec/options_table.h            |   1 +
 libavcodec/ppc/dsputil_ppc.c          |   2 +-
 libavcodec/sp5xdec.c                  |   1 +
 libavcodec/utils.c                    |   7 +-
 libavcodec/x86/dsputil_mmx.c          |   2 +-
 23 files changed, 387 insertions(+), 45 deletions(-)

diff --git a/ffplay.c b/ffplay.c
index 8ee9b0d2cd..4489b34cca 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -260,6 +260,7 @@ static int64_t duration = AV_NOPTS_VALUE;
 static int workaround_bugs = 1;
 static int fast = 0;
 static int genpts = 0;
+static int lowres = 0;
 static int idct = FF_IDCT_AUTO;
 static enum AVDiscard skip_frame       = AVDISCARD_DEFAULT;
 static enum AVDiscard skip_idct        = AVDISCARD_DEFAULT;
@@ -1324,7 +1325,7 @@ static void alloc_picture(void *opaque)
         /* SDL allocates a buffer smaller than requested if the video
          * overlay hardware is unable to support the requested size. */
         fprintf(stderr, "Error: the video system does not support an image\n"
-                        "size of %dx%d pixels. Try using -vf \"scale=w:h\"\n"
+                        "size of %dx%d pixels. Try using -lowres or -vf \"scale=w:h\"\n"
                         "to reduce the image size.\n", vp->width, vp->height );
         do_exit(is);
     }
@@ -3084,6 +3085,7 @@ static const OptionDef options[] = {
     { "fast", OPT_BOOL | OPT_EXPERT, { (void*)&fast }, "non spec compliant optimizations", "" },
     { "genpts", OPT_BOOL | OPT_EXPERT, { (void*)&genpts }, "generate pts", "" },
     { "drp", OPT_INT | HAS_ARG | OPT_EXPERT, { (void*)&decoder_reorder_pts }, "let decoder reorder pts 0=off 1=on -1=auto", ""},
+    { "lowres", OPT_INT | HAS_ARG | OPT_EXPERT, { (void*)&lowres }, "", "" },
     { "skiploop", OPT_INT | HAS_ARG | OPT_EXPERT, { (void*)&skip_loop_filter }, "", "" },
     { "skipframe", OPT_INT | HAS_ARG | OPT_EXPERT, { (void*)&skip_frame }, "", "" },
     { "skipidct", OPT_INT | HAS_ARG | OPT_EXPERT, { (void*)&skip_idct }, "", "" },
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c
index ed9416ea8c..db2464b02f 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/dsputil_alpha.c
@@ -336,7 +336,7 @@ void ff_dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
     put_pixels_clamped_axp_p = c->put_pixels_clamped;
     add_pixels_clamped_axp_p = c->add_pixels_clamped;
 
-    if (avctx->bits_per_raw_sample <= 8 &&
+    if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 &&
         (avctx->idct_algo == FF_IDCT_AUTO ||
          avctx->idct_algo == FF_IDCT_SIMPLEALPHA)) {
         c->idct_put = ff_simple_idct_put_axp;
diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c
index 2bc97bfa4e..03df6b29d0 100644
--- a/libavcodec/arm/dsputil_init_arm.c
+++ b/libavcodec/arm/dsputil_init_arm.c
@@ -82,7 +82,7 @@ void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
     ff_put_pixels_clamped = c->put_pixels_clamped;
     ff_add_pixels_clamped = c->add_pixels_clamped;
 
-    if (avctx->bits_per_raw_sample <= 8) {
+    if (!avctx->lowres && avctx->bits_per_raw_sample <= 8) {
         if(avctx->idct_algo == FF_IDCT_AUTO ||
            avctx->idct_algo == FF_IDCT_ARM){
             c->idct_put              = j_rev_dct_arm_put;
diff --git a/libavcodec/arm/dsputil_init_armv5te.c b/libavcodec/arm/dsputil_init_armv5te.c
index 6b2300b3ea..9fedb7a123 100644
--- a/libavcodec/arm/dsputil_init_armv5te.c
+++ b/libavcodec/arm/dsputil_init_armv5te.c
@@ -29,7 +29,7 @@ void ff_prefetch_arm(void *mem, int stride, int h);
 
 av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx)
 {
-    if (avctx->bits_per_raw_sample <= 8 &&
+    if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 &&
         (avctx->idct_algo == FF_IDCT_AUTO ||
          avctx->idct_algo == FF_IDCT_SIMPLEARMV5TE)) {
         c->idct_put              = ff_simple_idct_put_armv5te;
diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c
index 8401aaceaa..cd5744d3b3 100644
--- a/libavcodec/arm/dsputil_init_armv6.c
+++ b/libavcodec/arm/dsputil_init_armv6.c
@@ -74,7 +74,7 @@ av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx)
 {
     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
-    if (avctx->bits_per_raw_sample <= 8 &&
+    if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 &&
         (avctx->idct_algo == FF_IDCT_AUTO ||
          avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) {
         c->idct_put              = ff_simple_idct_put_armv6;
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index 1aa39de7e7..4c73276a95 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -182,7 +182,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
 {
     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
-    if (avctx->bits_per_raw_sample <= 8) {
+    if (!avctx->lowres && avctx->bits_per_raw_sample <= 8) {
         if (avctx->idct_algo == FF_IDCT_AUTO ||
             avctx->idct_algo == FF_IDCT_SIMPLENEON) {
             c->idct_put              = ff_simple_idct_put_neon;
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 7887f7fdc9..c3a5192621 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -1451,7 +1451,7 @@ typedef struct AVCodecContext {
     int width, height;
 
     /**
-     * Bitstream width / height, may be different from width/height.
+     * Bitstream width / height, may be different from width/height if lowres enabled.
      * - encoding: unused
      * - decoding: Set by user before init if known. Codec should override / dynamically change if needed.
      */
@@ -2649,7 +2649,7 @@ typedef struct AVCodecContext {
      * - encoding: unused
      * - decoding: Set by user.
      */
-    attribute_deprecated int lowres;
+     int lowres;
 
     /**
      * the picture in the bitstream
@@ -2920,7 +2920,7 @@ typedef struct AVCodec {
     const int *supported_samplerates;       ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0
     const enum AVSampleFormat *sample_fmts; ///< array of supported sample formats, or NULL if unknown, array is terminated by -1
     const uint64_t *channel_layouts;         ///< array of support channel layouts, or NULL if unknown. array is terminated by 0
-    attribute_deprecated uint8_t max_lowres; ///< maximum value for lowres supported by the decoder
+    uint8_t max_lowres;                     ///< maximum value for lowres supported by the decoder
     const AVClass *priv_class;              ///< AVClass for the private context
     const AVProfile *profiles;              ///< array of recognized profiles, or NULL if unknown, array is terminated by {FF_PROFILE_UNKNOWN}
 
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 553fb8c1cd..a13737d77e 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -381,6 +381,38 @@ void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
     }
 }
 
+static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<4;i++) {
+        pixels[0] = av_clip_uint8(block[0]);
+        pixels[1] = av_clip_uint8(block[1]);
+        pixels[2] = av_clip_uint8(block[2]);
+        pixels[3] = av_clip_uint8(block[3]);
+
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<2;i++) {
+        pixels[0] = av_clip_uint8(block[0]);
+        pixels[1] = av_clip_uint8(block[1]);
+
+        pixels += line_size;
+        block += 8;
+    }
+}
+
 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
                                     uint8_t *restrict pixels,
                                     int line_size)
@@ -422,6 +454,36 @@ void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
     }
 }
 
+static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
+                          int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<4;i++) {
+        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
+        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
+                          int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<2;i++) {
+        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+        pixels += line_size;
+        block += 8;
+    }
+}
+
 static int sum_abs_dctelem_c(DCTELEM *block)
 {
     int sum=0, i;
@@ -2684,6 +2746,37 @@ static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
     ff_add_pixels_clamped_c(block, dest, line_size);
 }
 
+static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_j_rev_dct4 (block);
+    put_pixels_clamped4_c(block, dest, line_size);
+}
+static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_j_rev_dct4 (block);
+    add_pixels_clamped4_c(block, dest, line_size);
+}
+
+static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_j_rev_dct2 (block);
+    put_pixels_clamped2_c(block, dest, line_size);
+}
+static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_j_rev_dct2 (block);
+    add_pixels_clamped2_c(block, dest, line_size);
+}
+
+static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    dest[0] = av_clip_uint8((block[0] + 4)>>3);
+}
+static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
+}
+
 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
 
 /* init static data */
@@ -2750,12 +2843,28 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
     }
 #endif //CONFIG_ENCODERS
 
-    if (avctx->bits_per_raw_sample == 10) {
-        c->idct_put              = ff_simple_idct_put_10;
-        c->idct_add              = ff_simple_idct_add_10;
-        c->idct                  = ff_simple_idct_10;
-        c->idct_permutation_type = FF_NO_IDCT_PERM;
-    } else {
+    if(avctx->lowres==1){
+        c->idct_put= ff_jref_idct4_put;
+        c->idct_add= ff_jref_idct4_add;
+        c->idct    = ff_j_rev_dct4;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+    }else if(avctx->lowres==2){
+        c->idct_put= ff_jref_idct2_put;
+        c->idct_add= ff_jref_idct2_add;
+        c->idct    = ff_j_rev_dct2;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+    }else if(avctx->lowres==3){
+        c->idct_put= ff_jref_idct1_put;
+        c->idct_add= ff_jref_idct1_add;
+        c->idct    = ff_j_rev_dct1;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+    }else{
+        if (avctx->bits_per_raw_sample == 10) {
+            c->idct_put              = ff_simple_idct_put_10;
+            c->idct_add              = ff_simple_idct_add_10;
+            c->idct                  = ff_simple_idct_10;
+            c->idct_permutation_type = FF_NO_IDCT_PERM;
+        } else {
         if(avctx->idct_algo==FF_IDCT_INT){
             c->idct_put= ff_jref_idct_put;
             c->idct_add= ff_jref_idct_add;
@@ -2786,6 +2895,7 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
             c->idct     = ff_simple_idct_8;
             c->idct_permutation_type= FF_NO_IDCT_PERM;
         }
+        }
     }
 
     c->diff_pixels = diff_pixels_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 60bc53e5e8..34d659d532 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -46,6 +46,9 @@ void ff_fdct248_islow_8(DCTELEM *data);
 void ff_fdct248_islow_10(DCTELEM *data);
 
 void ff_j_rev_dct (DCTELEM *data);
+void ff_j_rev_dct4 (DCTELEM *data);
+void ff_j_rev_dct2 (DCTELEM *data);
+void ff_j_rev_dct1 (DCTELEM *data);
 void ff_wmv2_idct_c(DCTELEM *data);
 
 void ff_fdct_mmx(DCTELEM *block);
diff --git a/libavcodec/dv.c b/libavcodec/dv.c
index e13026628d..5bb84793b5 100644
--- a/libavcodec/dv.c
+++ b/libavcodec/dv.c
@@ -311,7 +311,13 @@ av_cold int ff_dvvideo_init(AVCodecContext *avctx)
     /* 248DCT setup */
     s->fdct[1]     = dsp.fdct248;
     s->idct_put[1] = ff_simple_idct248_put;  // FIXME: need to add it to DSP
-    memcpy(s->dv_zigzag[1], ff_zigzag248_direct, 64);
+    if (avctx->lowres){
+        for (i = 0; i < 64; i++){
+            int j = ff_zigzag248_direct[i];
+            s->dv_zigzag[1][i] = dsp.idct_permutation[(j & 7) + (j & 8) * 4 + (j & 48) / 2];
+        }
+    }else
+        memcpy(s->dv_zigzag[1], ff_zigzag248_direct, 64);
 
     avctx->coded_frame = &s->picture;
     s->avctx = avctx;
diff --git a/libavcodec/dvdec.c b/libavcodec/dvdec.c
index f0fa5e9023..4e09d0dd9a 100644
--- a/libavcodec/dvdec.c
+++ b/libavcodec/dvdec.c
@@ -144,7 +144,7 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
     LOCAL_ALIGNED_16(DCTELEM, sblock, [5*DV_MAX_BPM], [64]);
     LOCAL_ALIGNED_16(uint8_t, mb_bit_buffer, [  80 + FF_INPUT_BUFFER_PADDING_SIZE]); /* allow some slack */
     LOCAL_ALIGNED_16(uint8_t, vs_bit_buffer, [5*80 + FF_INPUT_BUFFER_PADDING_SIZE]); /* allow some slack */
-    const int log2_blocksize = 3;
+    const int log2_blocksize = 3-s->avctx->lowres;
     int is_field_mode[5];
 
     assert((((int)mb_bit_buffer) & 7) == 0);
@@ -382,5 +382,6 @@ AVCodec ff_dvvideo_decoder = {
     .close          = dvvideo_close,
     .decode         = dvvideo_decode_frame,
     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS,
+    .max_lowres     = 3,
     .long_name      = NULL_IF_CONFIG_SMALL("DV (Digital Video)"),
 };
diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
index a8ea647af9..2691109e49 100644
--- a/libavcodec/error_resilience.c
+++ b/libavcodec/error_resilience.c
@@ -907,7 +907,7 @@ void ff_er_frame_end(MpegEncContext *s)
 
     /* We do not support ER of field pictures yet,
      * though it should not crash if enabled. */
-    if (!s->err_recognition || s->error_count == 0                     ||
+    if (!s->err_recognition || s->error_count == 0 || s->avctx->lowres ||
         s->avctx->hwaccel                                              ||
         s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU          ||
         s->picture_structure != PICT_FRAME                             ||
diff --git a/libavcodec/jrevdct.c b/libavcodec/jrevdct.c
index e33558f825..395eb8c638 100644
--- a/libavcodec/jrevdct.c
+++ b/libavcodec/jrevdct.c
@@ -940,3 +940,216 @@ void ff_j_rev_dct(DCTBLOCK data)
     dataptr++;                  /* advance pointer to next column */
   }
 }
+
+#undef DCTSIZE
+#define DCTSIZE 4
+#define DCTSTRIDE 8
+
+void ff_j_rev_dct4(DCTBLOCK data)
+{
+  int32_t tmp0, tmp1, tmp2, tmp3;
+  int32_t tmp10, tmp11, tmp12, tmp13;
+  int32_t z1;
+  int32_t d0, d2, d4, d6;
+  register DCTELEM *dataptr;
+  int rowctr;
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  data[0] += 4;
+
+  dataptr = data;
+
+  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any row in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * row DCT calculations can be simplified this way.
+     */
+
+    register int *idataptr = (int*)dataptr;
+
+    d0 = dataptr[0];
+    d2 = dataptr[1];
+    d4 = dataptr[2];
+    d6 = dataptr[3];
+
+    if ((d2 | d4 | d6) == 0) {
+      /* AC terms all zero */
+      if (d0) {
+          /* Compute a 32 bit value to assign. */
+          DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
+          register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
+
+          idataptr[0] = v;
+          idataptr[1] = v;
+      }
+
+      dataptr += DCTSTRIDE;     /* advance pointer to next row */
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    if (d6) {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
+    } else {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
+                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+            }
+      }
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
+    dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
+    dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSTRIDE;       /* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  dataptr = data;
+  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+    /* Columns of zeroes can be exploited in the same way as we did with rows.
+     * However, the row calculation has created many nonzero AC terms, so the
+     * simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+
+    d0 = dataptr[DCTSTRIDE*0];
+    d2 = dataptr[DCTSTRIDE*1];
+    d4 = dataptr[DCTSTRIDE*2];
+    d6 = dataptr[DCTSTRIDE*3];
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    if (d6) {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
+    } else {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
+                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+            }
+    }
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3);
+
+    dataptr++;                  /* advance pointer to next column */
+  }
+}
+
+void ff_j_rev_dct2(DCTBLOCK data){
+  int d00, d01, d10, d11;
+
+  data[0] += 4;
+  d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE];
+  d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE];
+  d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE];
+  d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE];
+
+  data[0+0*DCTSTRIDE]= (d00 + d10)>>3;
+  data[1+0*DCTSTRIDE]= (d01 + d11)>>3;
+  data[0+1*DCTSTRIDE]= (d00 - d10)>>3;
+  data[1+1*DCTSTRIDE]= (d01 - d11)>>3;
+}
+
+void ff_j_rev_dct1(DCTBLOCK data){
+  data[0] = (data[0] + 4)>>3;
+}
+
+#undef FIX
+#undef CONST_BITS
diff --git a/libavcodec/libopenjpegdec.c b/libavcodec/libopenjpegdec.c
index 5b09b34634..abfe7308e7 100644
--- a/libavcodec/libopenjpegdec.c
+++ b/libavcodec/libopenjpegdec.c
@@ -286,6 +286,7 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
     }
 
     ctx->dec_params.cp_limit_decoding = NO_LIMITATION;
+    ctx->dec_params.cp_reduce = avctx->lowres;
     // Tie decoder with decoding parameters
     opj_setup_decoder(dec, &ctx->dec_params);
     stream = opj_cio_open((opj_common_ptr)dec, buf, buf_size);
diff --git a/libavcodec/mjpegbdec.c b/libavcodec/mjpegbdec.c
index 540e984af5..c457b98081 100644
--- a/libavcodec/mjpegbdec.c
+++ b/libavcodec/mjpegbdec.c
@@ -161,5 +161,6 @@ AVCodec ff_mjpegb_decoder = {
     .close          = ff_mjpeg_decode_end,
     .decode         = mjpegb_decode_frame,
     .capabilities   = CODEC_CAP_DR1,
+    .max_lowres     = 3,
     .long_name      = NULL_IF_CONFIG_SMALL("Apple MJPEG-B"),
 };
diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index 330f4f5892..a841384af2 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -1003,8 +1003,8 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah,
                 x = 0;
                 y = 0;
                 for (j = 0; j < n; j++) {
-                    block_offset = ((linesize[c] * (v * mb_y + y) * 8) +
-                                    (h * mb_x + x) * 8);
+                    block_offset = (((linesize[c] * (v * mb_y + y) * 8) +
+                                     (h * mb_x + x) * 8) >> s->avctx->lowres);
 
                     if (s->interlaced && s->bottom_field)
                         block_offset += linesize[c] >> 1;
@@ -1814,6 +1814,7 @@ AVCodec ff_mjpeg_decoder = {
     .close          = ff_mjpeg_decode_end,
     .decode         = ff_mjpeg_decode_frame,
     .capabilities   = CODEC_CAP_DR1,
+    .max_lowres     = 3,
     .long_name      = NULL_IF_CONFIG_SMALL("MJPEG (Motion JPEG)"),
     .priv_class     = &mjpegdec_class,
 };
@@ -1827,5 +1828,6 @@ AVCodec ff_thp_decoder = {
     .close          = ff_mjpeg_decode_end,
     .decode         = ff_mjpeg_decode_frame,
     .capabilities   = CODEC_CAP_DR1,
+    .max_lowres     = 3,
     .long_name      = NULL_IF_CONFIG_SMALL("Nintendo Gamecube THP video"),
 };
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index d8c28e35dc..0383992975 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -1938,8 +1938,8 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
         qpel_mc_func (*op_qpix)[16];
         const int linesize   = s->current_picture.f.linesize[0]; //not s->linesize as this would be wrong for field pics
         const int uvlinesize = s->current_picture.f.linesize[1];
-        const int readable= s->pict_type != AV_PICTURE_TYPE_B || s->encoding || s->avctx->draw_horiz_band;
-        const int block_size = 8;
+        const int readable= s->pict_type != AV_PICTURE_TYPE_B || s->encoding || s->avctx->draw_horiz_band || s->avctx->lowres;
+        const int block_size= 8 >> s->avctx->lowres;
 
         /* avoid copy if macroblock skipped in last frame too */
         /* skip only during decoding as we might trash the buffers during encoding a bit */
@@ -2016,9 +2016,9 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
             if(s->encoding || !(   s->msmpeg4_version || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
-                add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
+                add_dequant_dct(s, block[1], 1, dest_y              + 8         , dct_linesize, s->qscale);
                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
-                add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
+                add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8         , dct_linesize, s->qscale);
 
                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
                     if (s->chroma_y_shift){
@@ -2035,9 +2035,9 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
                 }
             } else if(is_mpeg12 || (s->codec_id != CODEC_ID_WMV2)){
                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
-                add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
+                add_dct(s, block[1], 1, dest_y              + 8         , dct_linesize);
                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
-                add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
+                add_dct(s, block[3], 3, dest_y + dct_offset + 8         , dct_linesize);
 
                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
                     if(s->chroma_y_shift){//Chroma420
@@ -2046,17 +2046,17 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
                     }else{
                         //chroma422
                         dct_linesize = uvlinesize << s->interlaced_dct;
-                        dct_offset   = s->interlaced_dct ? uvlinesize : uvlinesize*block_size;
+                        dct_offset   = s->interlaced_dct ? uvlinesize : uvlinesize*8;
 
                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
                         if(!s->chroma_x_shift){//Chroma444
-                            add_dct(s, block[8], 8, dest_cb+block_size, dct_linesize);
-                            add_dct(s, block[9], 9, dest_cr+block_size, dct_linesize);
-                            add_dct(s, block[10], 10, dest_cb+block_size+dct_offset, dct_linesize);
-                            add_dct(s, block[11], 11, dest_cr+block_size+dct_offset, dct_linesize);
+                            add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
+                            add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
+                            add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
+                            add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
                         }
                     }
                 }//fi gray
@@ -2087,9 +2087,9 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
                 }
             }else{
                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
-                s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
+                s->dsp.idct_put(dest_y              + 8         , dct_linesize, block[1]);
                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
-                s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
+                s->dsp.idct_put(dest_y + dct_offset + 8         , dct_linesize, block[3]);
 
                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
                     if(s->chroma_y_shift){
@@ -2098,17 +2098,17 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
                     }else{
 
                         dct_linesize = uvlinesize << s->interlaced_dct;
-                        dct_offset   = s->interlaced_dct? uvlinesize : uvlinesize*block_size;
+                        dct_offset   = s->interlaced_dct? uvlinesize : uvlinesize*8;
 
                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
                         if(!s->chroma_x_shift){//Chroma444
-                            s->dsp.idct_put(dest_cb + block_size,              dct_linesize, block[8]);
-                            s->dsp.idct_put(dest_cr + block_size,              dct_linesize, block[9]);
-                            s->dsp.idct_put(dest_cb + block_size + dct_offset, dct_linesize, block[10]);
-                            s->dsp.idct_put(dest_cr + block_size + dct_offset, dct_linesize, block[11]);
+                            s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
+                            s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
+                            s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
+                            s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
                         }
                     }
                 }//gray
@@ -2204,7 +2204,7 @@ void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
     const int linesize   = s->current_picture.f.linesize[0]; //not s->linesize as this would be wrong for field pics
     const int uvlinesize = s->current_picture.f.linesize[1];
-    const int mb_size= 4;
+    const int mb_size= 4 - s->avctx->lowres;
 
     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index b1fffda2d0..fc3038eafa 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -793,7 +793,7 @@ extern const enum PixelFormat ff_pixfmt_list_420[];
 extern const enum PixelFormat ff_hwaccel_pixfmt_list_420[];
 
 static inline void ff_update_block_index(MpegEncContext *s){
-    const int block_size = 8;
+    const int block_size= 8 >> s->avctx->lowres;
 
     s->block_index[0]+=2;
     s->block_index[1]+=2;
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index acfbbff2d8..c4d4277b04 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -326,6 +326,7 @@ static const AVOption options[]={
 {"dts_hd_ma", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_PROFILE_DTS_HD_MA }, INT_MIN, INT_MAX, A|E, "profile"},
 {"level", NULL, OFFSET(level), AV_OPT_TYPE_INT, {.dbl = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "level"},
 {"unknown", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "level"},
+{"lowres", "decode at 1= 1/2, 2=1/4, 3=1/8 resolutions", OFFSET(lowres), AV_OPT_TYPE_INT, {.dbl = 0 }, 0, INT_MAX, V|A|D},
 {"skip_threshold", "frame skip threshold", OFFSET(frame_skip_threshold), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"skip_factor", "frame skip factor", OFFSET(frame_skip_factor), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"skip_exp", "frame skip exponent", OFFSET(frame_skip_exp), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, INT_MIN, INT_MAX, V|E},
diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c
index 0b89277f50..195aa20906 100644
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -187,7 +187,7 @@ void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
         }
 #endif //CONFIG_ENCODERS
 
-        if (avctx->bits_per_raw_sample <= 8) {
+        if (avctx->lowres == 0 && avctx->bits_per_raw_sample <= 8) {
             if ((avctx->idct_algo == FF_IDCT_AUTO) ||
                 (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
                 c->idct_put = ff_idct_put_altivec;
diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
index 88e1ce6137..4aca0ccbd6 100644
--- a/libavcodec/sp5xdec.c
+++ b/libavcodec/sp5xdec.c
@@ -102,6 +102,7 @@ AVCodec ff_sp5x_decoder = {
     .close          = ff_mjpeg_decode_end,
     .decode         = sp5x_decode_frame,
     .capabilities   = CODEC_CAP_DR1,
+    .max_lowres     = 3,
     .long_name      = NULL_IF_CONFIG_SMALL("Sunplus JPEG (SP5X)"),
 };
 
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 5af3515f09..bd97431bb2 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -149,8 +149,8 @@ unsigned avcodec_get_edge_width(void)
 void avcodec_set_dimensions(AVCodecContext *s, int width, int height){
     s->coded_width = width;
     s->coded_height= height;
-    s->width  = width;
-    s->height = height;
+    s->width = -((-width )>>s->lowres);
+    s->height= -((-height)>>s->lowres);
 }
 
 #define INTERNAL_BUFFER_SIZE (32+1)
@@ -239,8 +239,9 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
 
     *width = FFALIGN(*width , w_align);
     *height= FFALIGN(*height, h_align);
-    if (s->codec_id == CODEC_ID_H264)
+    if(s->codec_id == CODEC_ID_H264 || s->lowres)
         *height+=2; // some of the optimized chroma MC reads one line too much
+                    // which is also done in mpeg decoders with lowres > 0
 
     for (i = 0; i < 4; i++)
         linesize_align[i] = STRIDE_ALIGN;
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 7d9a15282d..4c96d62525 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -3203,7 +3203,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
     if (mm_flags & AV_CPU_FLAG_MMX) {
         const int idct_algo = avctx->idct_algo;
 
-        if (avctx->bits_per_raw_sample <= 8) {
+        if (avctx->lowres == 0 && avctx->bits_per_raw_sample <= 8) {
             if (idct_algo == FF_IDCT_AUTO || idct_algo == FF_IDCT_SIMPLEMMX) {
                 c->idct_put              = ff_simple_idct_put_mmx;
                 c->idct_add              = ff_simple_idct_add_mmx;