Merge remote-tracking branch 'qatar/master' into master

* qatar/master: (27 commits) ac3enc: fix LOCAL_ALIGNED usage in count_mantissa_bits() ac3dsp: do not use the ff_* prefix when referencing ff_ac3_bap_bits. ac3dsp: fix loop condition in ac3_update_bap_counts_c() ARM: unbreak build ac3enc: modify mantissa bit counting to keep bap counts for all values of bap instead of just 0 to 4. ac3enc: split mantissa bit counting into a separate function. ac3enc: store per-block/channel bap pointers by reference block in a 2D array rather than in the AC3Block struct. get_bits: add av_unused tag to cache variable sws: replace all long with int. ARM: aacdec: fix constraints on inline asm ARM: remove unnecessary volatile from inline asm ARM: add "cc" clobbers to inline asm where needed ARM: improve FASTDIV asm ac3enc: use LOCAL_ALIGNED macro APIchanges: fill in git hash for av_get_pix_fmt_name (0420bd7). lavu: add av_get_pix_fmt_name() convenience function cmdutils: remove OPT_FUNC2 swscale: fix crash in bilinear scaling. vpxenc: add VP8E_SET_STATIC_THRESHOLD mapping webm: support stereo videos in matroska/webm muxer ... Conflicts: Changelog cmdutils.c cmdutils.h doc/APIchanges doc/muxers.texi ffmpeg.c ffplay.c libavcodec/ac3enc.c libavcodec/ac3enc_float.c libavcodec/avcodec.h libavcodec/get_bits.h libavcodec/libvpxenc.c libavcodec/version.h libavdevice/libdc1394.c libavformat/matroskaenc.c libavutil/avutil.h libswscale/rgb2rgb.c libswscale/swscale.c libswscale/swscale_template.c libswscale/x86/swscale_template.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
2025-01-29 22:00:58 +02:00 · 2011-05-29 02:55:19 +02:00 · 2011-05-29 02:55:19 +02:00 · b8a43bc1b5
commit b8a43bc1b5
parent 39d607e5bb 90da52f01f
45 changed files with 1337 additions and 915 deletions
--- a/1
+++ b/1
@ -9,6 +9,7 @@ version <next>:
 - mpeg2 aspect ratio dection fixed
 - libxvid aspect pickiness fixed
 - Frame multithreaded decoding
+- E-AC-3 audio encoder
 - Lots of deprecated API cruft removed
 - fft and imdct optimizations for AVX (Sandy Bridge) processors
 - showinfo filter added
--- a/1
+++ b/1
@ -1268,6 +1268,7 @@ dca_decoder_select="mdct"
 dnxhd_encoder_select="aandct"
 dxa_decoder_select="zlib"
 eac3_decoder_select="ac3_decoder"
+eac3_encoder_select="mdct ac3dsp"
 eamad_decoder_select="aandct"
 eatgq_decoder_select="aandct"
 eatqi_decoder_select="aandct"
--- a/doc/APIchanges
+++ b/doc/APIchanges
@ -30,6 +30,10 @@ API changes, most recent first:
 2011-05-XX - XXXXXX - lavfi 2.6.0 - avcodec.h
  Add avfilter_get_video_buffer_ref_from_frame() to libavfilter/avcodec.h.

+2011-05-28 - 0420bd7 - lavu 51.2.0 - pixdesc.h
+  Add av_get_pix_fmt_name() in libavutil/pixdesc.h, and deprecate
+  avcodec_get_pix_fmt_name() in libavcodec/avcodec.h in its favor.
+
 2011-05-25 - 30315a8 - lavf 53.1.0 - avformat.h
  Add fps_probe_size to AVFormatContext.

--- a/doc/general.texi
+++ b/doc/general.texi
@ -617,7 +617,7 @@ following image formats are supported:
    @tab Used in Origin's Wing Commander IV AVI files.
@item DSP Group TrueSpeech   @tab     @tab  X
@item DV audio               @tab     @tab  X
-@item Enhanced AC-3          @tab     @tab  X
+@item Enhanced AC-3          @tab  X  @tab  X
@item FLAC (Free Lossless Audio Codec)  @tab  X  @tab  IX
@item GSM                    @tab  E  @tab  X
    @tab encoding supported through external library libgsm
--- a/ffmpeg.c
+++ b/ffmpeg.c
@ -4395,10 +4395,10 @@ static const OptionDef options[] = {
    { "copyinkf", OPT_BOOL | OPT_EXPERT, {(void*)&copy_initial_nonkeyframes}, "copy initial non-keyframes" },

    /* video options */
-    { "b",  HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
-    { "vb",  HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
+    { "b", HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
+    { "vb", HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
    { "vframes", OPT_INT | HAS_ARG | OPT_VIDEO, {(void*)&max_frames[AVMEDIA_TYPE_VIDEO]}, "set the number of video frames to record", "number" },
-    { "r",  HAS_ARG | OPT_VIDEO, {(void*)opt_frame_rate}, "set frame rate (Hz value, fraction or abbreviation)", "rate" },
+    { "r", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_rate}, "set frame rate (Hz value, fraction or abbreviation)", "rate" },
    { "s", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_size}, "set frame size (WxH or abbreviation)", "size" },
    { "aspect", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_aspect_ratio}, "set aspect ratio (4:3, 16:9 or 1.3333, 1.7777)", "aspect" },
    { "pix_fmt", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_frame_pix_fmt}, "set pixel format, 'list' as argument shows all the pixel formats supported", "format" },
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@ -127,6 +127,7 @@ OBJS-$(CONFIG_DVVIDEO_DECODER)         += dv.o dvdata.o
 OBJS-$(CONFIG_DVVIDEO_ENCODER)         += dv.o dvdata.o
 OBJS-$(CONFIG_DXA_DECODER)             += dxa.o
 OBJS-$(CONFIG_EAC3_DECODER)            += eac3dec.o eac3dec_data.o
+OBJS-$(CONFIG_EAC3_ENCODER)            += ac3enc_float.o ac3tab.o ac3.o kbdwin.o
 OBJS-$(CONFIG_EACMV_DECODER)           += eacmv.o
 OBJS-$(CONFIG_EAMAD_DECODER)           += eamad.o eaidct.o mpeg12.o \
                                          mpeg12data.o mpegvideo.o  \
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@ -128,24 +128,33 @@ static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd,
    } while (end > ff_ac3_band_start_tab[band++]);
 }

-static int ac3_compute_mantissa_size_c(int mant_cnt[5], uint8_t *bap,
-                                       int nb_coefs)
+static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], uint8_t *bap,
+                                    int len)
 {
-    int bits, b, i;
+    while (len-- > 0)
+        mant_cnt[bap[len]]++;
+}

-    bits = 0;
-    for (i = 0; i < nb_coefs; i++) {
-        b = bap[i];
-        if (b <= 4) {
-            // bap=1 to bap=4 will be counted in compute_mantissa_size_final
-            mant_cnt[b]++;
-        } else if (b <= 13) {
-            // bap=5 to bap=13 use (bap-1) bits
-            bits += b - 1;
-        } else {
-            // bap=14 uses 14 bits and bap=15 uses 16 bits
-            bits += (b == 14) ? 14 : 16;
-        }
+DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
+    0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
+};
+
+static int ac3_compute_mantissa_size_c(uint16_t mant_cnt[6][16])
+{
+    int blk, bap;
+    int bits = 0;
+
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        // bap=1 : 3 mantissas in 5 bits
+        bits += (mant_cnt[blk][1] / 3) * 5;
+        // bap=2 : 3 mantissas in 7 bits
+        // bap=4 : 2 mantissas in 7 bits
+        bits += ((mant_cnt[blk][2] / 3) + (mant_cnt[blk][4] >> 1)) * 7;
+        // bap=3 : 1 mantissa in 3 bits
+        bits += mant_cnt[blk][3] * 3;
+        // bap=5 to 15 : get bits per mantissa from table
+        for (bap = 5; bap < 16; bap++)
+            bits += mant_cnt[blk][bap] * ff_ac3_bap_bits[bap];
    }
    return bits;
 }
@ -181,6 +190,7 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
    c->ac3_rshift_int32 = ac3_rshift_int32_c;
    c->float_to_fixed24 = float_to_fixed24_c;
    c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c;
+    c->update_bap_counts = ac3_update_bap_counts_c;
    c->compute_mantissa_size = ac3_compute_mantissa_size_c;
    c->extract_exponents = ac3_extract_exponents_c;

--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@ -24,6 +24,12 @@

 #include <stdint.h>

+/**
+ * Number of mantissa bits written for each bap value.
+ * bap values with fractional bits are set to 0 and are calculated separately.
+ */
+extern const uint16_t ff_ac3_bap_bits[16];
+
 typedef struct AC3DSPContext {
    /**
     * Set each encoded exponent in a block to the minimum of itself and the
@ -102,9 +108,21 @@ typedef struct AC3DSPContext {
                               const uint8_t *bap_tab, uint8_t *bap);

    /**
-     * Calculate the number of bits needed to encode a set of mantissas.
+     * Update bap counts using the supplied array of bap.
+     *
+     * @param[out] mant_cnt   bap counts for 1 block
+     * @param[in]  bap        array of bap, pointing to start coef bin
+     * @param[in]  len        number of elements to process
     */
-    int (*compute_mantissa_size)(int mant_cnt[5], uint8_t *bap, int nb_coefs);
+    void (*update_bap_counts)(uint16_t mant_cnt[16], uint8_t *bap, int len);
+
+    /**
+     * Calculate the number of bits needed to encode a set of mantissas.
+     *
+     * @param[in] mant_cnt    bap counts for all blocks
+     * @return                mantissa bit count
+     */
+    int (*compute_mantissa_size)(uint16_t mant_cnt[6][16]);

    void (*extract_exponents)(uint8_t *exp, int32_t *coef, int nb_coefs);
 } AC3DSPContext;
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
--- a/libavcodec/ac3enc_combined.c
+++ b/libavcodec/ac3enc_combined.c
@ -11,8 +11,18 @@ typedef struct CombineContext{
    AVCodec *codec;
 }CombineContext;

+#define OFFSET(param) offsetof(CombineContext, options.param)
+#define AC3ENC_PARAM (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
+
+#define AC3ENC_TYPE_AC3_FIXED   0
+#define AC3ENC_TYPE_AC3         1
+#define AC3ENC_TYPE_EAC3        2
+
+#define AC3ENC_TYPE 12354
+#include "ac3enc_opts_template.c"
+
 static AVClass ac3enc_class = { "AC-3 Encoder", av_default_item_name,
-                                ff_ac3_options, LIBAVUTIL_VERSION_INT };
+                                eac3_options, LIBAVUTIL_VERSION_INT };

 static av_cold AVCodec *get_codec(enum AVSampleFormat s){
 #if CONFIG_AC3_FIXED_ENCODER
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@ -100,6 +100,7 @@ static void scale_coefficients(AC3EncodeContext *s)
 }


+#if CONFIG_AC3_ENCODER
 AVCodec ff_ac3_float_encoder = {
    "ac3_float",
    AVMEDIA_TYPE_AUDIO,
@ -114,3 +115,20 @@ AVCodec ff_ac3_float_encoder = {
    .priv_class = &ac3enc_class,
    .channel_layouts = ff_ac3_channel_layouts,
 };
+#endif
+
+#if CONFIG_EAC3_ENCODER
+AVCodec ff_eac3_encoder = {
+    .name            = "eac3",
+    .type            = AVMEDIA_TYPE_AUDIO,
+    .id              = CODEC_ID_EAC3,
+    .priv_data_size  = sizeof(AC3EncodeContext),
+    .init            = ac3_encode_init,
+    .encode          = ac3_encode_frame,
+    .close           = ac3_encode_close,
+    .sample_fmts     = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
+    .long_name       = NULL_IF_CONFIG_SMALL("ATSC A/52 E-AC-3"),
+    .priv_class      = &eac3enc_class,
+    .channel_layouts = ff_ac3_channel_layouts,
+};
+#endif
--- a/libavcodec/ac3enc_opts_template.c
+++ b/libavcodec/ac3enc_opts_template.c
@ -0,0 +1,81 @@
+/*
+ * AC-3 encoder options
+ * Copyright (c) 2010 Justin Ruggles <justin.ruggles@gmail.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if AC3ENC_TYPE == AC3ENC_TYPE_AC3_FIXED
+static const AVOption ac3fixed_options[] = {
+#elif AC3ENC_TYPE == AC3ENC_TYPE_AC3
+static const AVOption ac3_options[] = {
+#else /* AC3ENC_TYPE_EAC3 */
+static const AVOption eac3_options[] = {
+#endif
+#if AC3ENC_TYPE != AC3ENC_TYPE_EAC3
+/* Metadata Options */
+{"per_frame_metadata", "Allow Changing Metadata Per-Frame", OFFSET(allow_per_frame_metadata), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM},
+/* downmix levels */
+{"center_mixlev", "Center Mix Level", OFFSET(center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_4POINT5DB }, 0.0, 1.0, AC3ENC_PARAM},
+{"surround_mixlev", "Surround Mix Level", OFFSET(surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_6DB }, 0.0, 1.0, AC3ENC_PARAM},
+/* audio production information */
+{"mixing_level", "Mixing Level", OFFSET(mixing_level), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 111, AC3ENC_PARAM},
+{"room_type", "Room Type", OFFSET(room_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "room_type"},
+    {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
+    {"large",        "Large Room",              0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
+    {"small",        "Small Room",              0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "room_type"},
+/* other metadata options */
+{"copyright", "Copyright Bit", OFFSET(copyright), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 1, AC3ENC_PARAM},
+#endif
+{"dialnorm", "Dialogue Level (dB)", OFFSET(dialogue_level), FF_OPT_TYPE_INT, {.dbl = -31 }, -31, -1, AC3ENC_PARAM},
+#if AC3ENC_TYPE != AC3ENC_TYPE_EAC3
+{"dsur_mode", "Dolby Surround Mode", OFFSET(dolby_surround_mode), FF_OPT_TYPE_INT, {.dbl = 0 }, 0, 2, AC3ENC_PARAM, "dsur_mode"},
+    {"notindicated", "Not Indicated (default)",    0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
+    {"on",           "Dolby Surround Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
+    {"off",          "Not Dolby Surround Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsur_mode"},
+{"original", "Original Bit Stream", OFFSET(original), FF_OPT_TYPE_INT,   {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
+/* extended bitstream information */
+{"dmix_mode", "Preferred Stereo Downmix Mode", OFFSET(preferred_stereo_downmix), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dmix_mode"},
+    {"notindicated", "Not Indicated (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
+    {"ltrt", "Lt/Rt Downmix Preferred",         0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
+    {"loro", "Lo/Ro Downmix Preferred",         0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dmix_mode"},
+{"ltrt_cmixlev", "Lt/Rt Center Mix Level", OFFSET(ltrt_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"ltrt_surmixlev", "Lt/Rt Surround Mix Level", OFFSET(ltrt_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"loro_cmixlev", "Lo/Ro Center Mix Level", OFFSET(loro_center_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"loro_surmixlev", "Lo/Ro Surround Mix Level", OFFSET(loro_surround_mix_level), FF_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, AC3ENC_PARAM},
+{"dsurex_mode", "Dolby Surround EX Mode", OFFSET(dolby_surround_ex_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dsurex_mode"},
+    {"notindicated", "Not Indicated (default)",       0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
+    {"on",           "Dolby Surround EX Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
+    {"off",          "Not Dolby Surround EX Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dsurex_mode"},
+{"dheadphone_mode", "Dolby Headphone Mode", OFFSET(dolby_headphone_mode), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 2, AC3ENC_PARAM, "dheadphone_mode"},
+    {"notindicated", "Not Indicated (default)",     0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
+    {"on",           "Dolby Headphone Encoded",     0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
+    {"off",          "Not Dolby Headphone Encoded", 0, FF_OPT_TYPE_CONST, {.dbl = 2 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "dheadphone_mode"},
+{"ad_conv_type", "A/D Converter Type", OFFSET(ad_converter_type), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 1, AC3ENC_PARAM, "ad_conv_type"},
+    {"standard", "Standard (default)", 0, FF_OPT_TYPE_CONST, {.dbl = 0 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
+    {"hdcd",     "HDCD",               0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
+#endif
+/* Other Encoding Options */
+{"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
+#if AC3ENC_TYPE != AC3ENC_TYPE_AC3_FIXED
+{"channel_coupling",   "Channel Coupling",   OFFSET(channel_coupling),   FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM, "channel_coupling"},
+    {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "channel_coupling"},
+{"cpl_start_band", "Coupling Start Band", OFFSET(cpl_start), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 15, AC3ENC_PARAM, "cpl_start_band"},
+    {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "cpl_start_band"},
+#endif
+{NULL}
+};
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@ -253,7 +253,7 @@ void avcodec_register_all(void)
    REGISTER_DECODER (COOK, cook);
    REGISTER_ENCDEC  (DCA, dca);
    REGISTER_DECODER (DSICINAUDIO, dsicinaudio);
-    REGISTER_DECODER (EAC3, eac3);
+    REGISTER_ENCDEC  (EAC3, eac3);
    REGISTER_ENCDEC  (FLAC, flac);
    REGISTER_DECODER (GSM, gsm);
    REGISTER_DECODER (GSM_MS, gsm_ms);
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@ -1,5 +1,4 @@
 OBJS-$(CONFIG_AC3DSP)                  += arm/ac3dsp_init_arm.o         \
-                                          arm/ac3dsp_arm.o

 OBJS-$(CONFIG_DCA_DECODER)             += arm/dcadsp_init_arm.o         \

--- a/libavcodec/arm/aac.h
+++ b/libavcodec/arm/aac.h
@ -30,17 +30,17 @@ static inline float *VMUL2(float *dst, const float *v, unsigned idx,
                           const float *scale)
 {
    unsigned v0, v1;
-    __asm__ volatile ("ubfx     %0,  %4,  #0, #4      \n\t"
-                      "ubfx     %1,  %4,  #4, #4      \n\t"
-                      "ldr      %0,  [%3, %0, lsl #2] \n\t"
-                      "ldr      %1,  [%3, %1, lsl #2] \n\t"
-                      "vld1.32  {d1[]},   [%5,:32]    \n\t"
-                      "vmov     d0,  %0,  %1          \n\t"
-                      "vmul.f32 d0,  d0,  d1          \n\t"
-                      "vst1.32  {d0},     [%2,:64]!   \n\t"
-                      : "=&r"(v0), "=&r"(v1), "+r"(dst)
-                      : "r"(v), "r"(idx), "r"(scale)
-                      : "d0", "d1");
+    __asm__ ("ubfx     %0,  %6,  #0, #4      \n\t"
+             "ubfx     %1,  %6,  #4, #4      \n\t"
+             "ldr      %0,  [%5, %0, lsl #2] \n\t"
+             "ldr      %1,  [%5, %1, lsl #2] \n\t"
+             "vld1.32  {d1[]},   [%7,:32]    \n\t"
+             "vmov     d0,  %0,  %1          \n\t"
+             "vmul.f32 d0,  d0,  d1          \n\t"
+             "vst1.32  {d0},     [%2,:64]!   \n\t"
+             : "=&r"(v0), "=&r"(v1), "+r"(dst), "=m"(dst[0]), "=m"(dst[1])
+             : "r"(v), "r"(idx), "r"(scale)
+             : "d0", "d1");
    return dst;
 }

@ -49,22 +49,23 @@ static inline float *VMUL4(float *dst, const float *v, unsigned idx,
                           const float *scale)
 {
    unsigned v0, v1, v2, v3;
-    __asm__ volatile ("ubfx     %0,  %6,  #0, #2      \n\t"
-                      "ubfx     %1,  %6,  #2, #2      \n\t"
-                      "ldr      %0,  [%5, %0, lsl #2] \n\t"
-                      "ubfx     %2,  %6,  #4, #2      \n\t"
-                      "ldr      %1,  [%5, %1, lsl #2] \n\t"
-                      "ubfx     %3,  %6,  #6, #2      \n\t"
-                      "ldr      %2,  [%5, %2, lsl #2] \n\t"
-                      "vmov     d0,  %0,  %1          \n\t"
-                      "ldr      %3,  [%5, %3, lsl #2] \n\t"
-                      "vld1.32  {d2[],d3[]},[%7,:32]  \n\t"
-                      "vmov     d1,  %2,  %3          \n\t"
-                      "vmul.f32 q0,  q0,  q1          \n\t"
-                      "vst1.32  {q0},     [%4,:128]!  \n\t"
-                      : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst)
-                      : "r"(v), "r"(idx), "r"(scale)
-                      : "d0", "d1", "d2", "d3");
+    __asm__ ("ubfx     %0,  %10, #0, #2      \n\t"
+             "ubfx     %1,  %10, #2, #2      \n\t"
+             "ldr      %0,  [%9, %0, lsl #2] \n\t"
+             "ubfx     %2,  %10, #4, #2      \n\t"
+             "ldr      %1,  [%9, %1, lsl #2] \n\t"
+             "ubfx     %3,  %10, #6, #2      \n\t"
+             "ldr      %2,  [%9, %2, lsl #2] \n\t"
+             "vmov     d0,  %0,  %1          \n\t"
+             "ldr      %3,  [%9, %3, lsl #2] \n\t"
+             "vld1.32  {d2[],d3[]},[%11,:32] \n\t"
+             "vmov     d1,  %2,  %3          \n\t"
+             "vmul.f32 q0,  q0,  q1          \n\t"
+             "vst1.32  {q0},     [%4,:128]!  \n\t"
+             : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst),
+               "=m"(dst[0]), "=m"(dst[1]), "=m"(dst[2]), "=m"(dst[3])
+             : "r"(v), "r"(idx), "r"(scale)
+             : "d0", "d1", "d2", "d3");
    return dst;
 }

@ -73,22 +74,23 @@ static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
                            unsigned sign, const float *scale)
 {
    unsigned v0, v1, v2, v3;
-    __asm__ volatile ("ubfx     %0,  %6,  #0, #4      \n\t"
-                      "ubfx     %1,  %6,  #4, #4      \n\t"
-                      "ldr      %0,  [%5, %0, lsl #2] \n\t"
-                      "lsl      %2,  %8,  #30         \n\t"
-                      "ldr      %1,  [%5, %1, lsl #2] \n\t"
-                      "lsl      %3,  %8,  #31         \n\t"
-                      "vmov     d0,  %0,  %1          \n\t"
-                      "bic      %2,  %2,  #1<<30      \n\t"
-                      "vld1.32  {d1[]},   [%7,:32]    \n\t"
-                      "vmov     d2,  %2,  %3          \n\t"
-                      "veor     d0,  d0,  d2          \n\t"
-                      "vmul.f32 d0,  d0,  d1          \n\t"
-                      "vst1.32  {d0},     [%4,:64]!   \n\t"
-                      : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst)
-                      : "r"(v), "r"(idx), "r"(scale), "r"(sign)
-                      : "d0", "d1", "d2");
+    __asm__ ("ubfx     %0,  %8,  #0, #4      \n\t"
+             "ubfx     %1,  %8,  #4, #4      \n\t"
+             "ldr      %0,  [%7, %0, lsl #2] \n\t"
+             "lsl      %2,  %10, #30         \n\t"
+             "ldr      %1,  [%7, %1, lsl #2] \n\t"
+             "lsl      %3,  %10, #31         \n\t"
+             "vmov     d0,  %0,  %1          \n\t"
+             "bic      %2,  %2,  #1<<30      \n\t"
+             "vld1.32  {d1[]},   [%9,:32]    \n\t"
+             "vmov     d2,  %2,  %3          \n\t"
+             "veor     d0,  d0,  d2          \n\t"
+             "vmul.f32 d0,  d0,  d1          \n\t"
+             "vst1.32  {d0},     [%4,:64]!   \n\t"
+             : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst),
+               "=m"(dst[0]), "=m"(dst[1])
+             : "r"(v), "r"(idx), "r"(scale), "r"(sign)
+             : "d0", "d1", "d2");
    return dst;
 }

@ -97,38 +99,39 @@ static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
                            unsigned sign, const float *scale)
 {
    unsigned v0, v1, v2, v3, nz;
-    __asm__ volatile ("vld1.32  {d2[],d3[]},[%9,:32]  \n\t"
-                      "ubfx     %0,  %8,  #0, #2      \n\t"
-                      "ubfx     %1,  %8,  #2, #2      \n\t"
-                      "ldr      %0,  [%7, %0, lsl #2] \n\t"
-                      "ubfx     %2,  %8,  #4, #2      \n\t"
-                      "ldr      %1,  [%7, %1, lsl #2] \n\t"
-                      "ubfx     %3,  %8,  #6, #2      \n\t"
-                      "ldr      %2,  [%7, %2, lsl #2] \n\t"
-                      "vmov     d0,  %0,  %1          \n\t"
-                      "ldr      %3,  [%7, %3, lsl #2] \n\t"
-                      "lsr      %6,  %8,  #12         \n\t"
-                      "rbit     %6,  %6               \n\t"
-                      "vmov     d1,  %2,  %3          \n\t"
-                      "lsls     %6,  %6,  #1          \n\t"
-                      "and      %0,  %5,  #1<<31      \n\t"
-                      "lslcs    %5,  %5,  #1          \n\t"
-                      "lsls     %6,  %6,  #1          \n\t"
-                      "and      %1,  %5,  #1<<31      \n\t"
-                      "lslcs    %5,  %5,  #1          \n\t"
-                      "lsls     %6,  %6,  #1          \n\t"
-                      "and      %2,  %5,  #1<<31      \n\t"
-                      "lslcs    %5,  %5,  #1          \n\t"
-                      "vmov     d4,  %0,  %1          \n\t"
-                      "and      %3,  %5,  #1<<31      \n\t"
-                      "vmov     d5,  %2,  %3          \n\t"
-                      "veor     q0,  q0,  q2          \n\t"
-                      "vmul.f32 q0,  q0,  q1          \n\t"
-                      "vst1.32  {q0},     [%4,:128]!  \n\t"
-                      : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst),
-                        "+r"(sign), "=r"(nz)
-                      : "r"(v), "r"(idx), "r"(scale)
-                      : "d0", "d1", "d2", "d3", "d4", "d5");
+    __asm__ ("vld1.32  {d2[],d3[]},[%13,:32] \n\t"
+             "ubfx     %0,  %12, #0, #2      \n\t"
+             "ubfx     %1,  %12, #2, #2      \n\t"
+             "ldr      %0,  [%11,%0, lsl #2] \n\t"
+             "ubfx     %2,  %12, #4, #2      \n\t"
+             "ldr      %1,  [%11,%1, lsl #2] \n\t"
+             "ubfx     %3,  %12, #6, #2      \n\t"
+             "ldr      %2,  [%11,%2, lsl #2] \n\t"
+             "vmov     d0,  %0,  %1          \n\t"
+             "ldr      %3,  [%11,%3, lsl #2] \n\t"
+             "lsr      %6,  %12, #12         \n\t"
+             "rbit     %6,  %6               \n\t"
+             "vmov     d1,  %2,  %3          \n\t"
+             "lsls     %6,  %6,  #1          \n\t"
+             "and      %0,  %5,  #1<<31      \n\t"
+             "lslcs    %5,  %5,  #1          \n\t"
+             "lsls     %6,  %6,  #1          \n\t"
+             "and      %1,  %5,  #1<<31      \n\t"
+             "lslcs    %5,  %5,  #1          \n\t"
+             "lsls     %6,  %6,  #1          \n\t"
+             "and      %2,  %5,  #1<<31      \n\t"
+             "lslcs    %5,  %5,  #1          \n\t"
+             "vmov     d4,  %0,  %1          \n\t"
+             "and      %3,  %5,  #1<<31      \n\t"
+             "vmov     d5,  %2,  %3          \n\t"
+             "veor     q0,  q0,  q2          \n\t"
+             "vmul.f32 q0,  q0,  q1          \n\t"
+             "vst1.32  {q0},     [%4,:128]!  \n\t"
+             : "=&r"(v0), "=&r"(v1), "=&r"(v2), "=&r"(v3), "+r"(dst),
+               "+r"(sign), "=r"(nz),
+               "=m"(dst[0]), "=m"(dst[1]), "=m"(dst[2]), "=m"(dst[3])
+             : "r"(v), "r"(idx), "r"(scale)
+             : "cc", "d0", "d1", "d2", "d3", "d4", "d5");
    return dst;
 }

--- a/libavcodec/arm/ac3dsp_arm.S
+++ b/libavcodec/arm/ac3dsp_arm.S
@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "asm.S"
-
-function ff_ac3_compute_mantissa_size_arm, export=1
-        push            {r4-r8,lr}
-        ldm             r0,  {r4-r8}
-        mov             r3,  r0
-        mov             r0,  #0
-1:
-        ldrb            lr,  [r1], #1
-        subs            r2,  r2,  #1
-        blt             2f
-        cmp             lr,  #4
-        bgt             3f
-        subs            lr,  lr,  #1
-        addlt           r4,  r4,  #1
-        addeq           r5,  r5,  #1
-        ble             1b
-        subs            lr,  lr,  #2
-        addlt           r6,  r6,  #1
-        addeq           r7,  r7,  #1
-        addgt           r8,  r8,  #1
-        b               1b
-3:
-        cmp             lr,  #14
-        sublt           lr,  lr,  #1
-        addgt           r0,  r0,  #16
-        addle           r0,  r0,  lr
-        b               1b
-2:
-        stm             r3,  {r4-r8}
-        pop             {r4-r8,pc}
-endfunc
--- a/libavcodec/arm/ac3dsp_init_arm.c
+++ b/libavcodec/arm/ac3dsp_init_arm.c
@ -39,8 +39,6 @@ int ff_ac3_compute_mantissa_size_arm(int cnt[5], uint8_t *bap, int nb_coefs);

 av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact)
 {
-    c->compute_mantissa_size     = ff_ac3_compute_mantissa_size_arm;
-
    if (HAVE_ARMV6) {
        c->bit_alloc_calc_bap    = ff_ac3_bit_alloc_calc_bap_armv6;
    }
--- a/libavcodec/arm/mathops.h
+++ b/libavcodec/arm/mathops.h
@ -97,7 +97,7 @@ static inline av_const int MUL16(int ra, int rb)
 static inline av_const int mid_pred(int a, int b, int c)
 {
    int m;
-    __asm__ volatile (
+    __asm__ (
        "mov   %0, %2  \n\t"
        "cmp   %1, %2  \n\t"
        "movgt %0, %1  \n\t"
@ -107,7 +107,8 @@ static inline av_const int mid_pred(int a, int b, int c)
        "cmp   %0, %1  \n\t"
        "movgt %0, %1  \n\t"
        : "=&r"(m), "+r"(a)
-        : "r"(b), "r"(c));
+        : "r"(b), "r"(c)
+        : "cc");
    return m;
 }

--- a/libavcodec/arm/vp56_arith.h
+++ b/libavcodec/arm/vp56_arith.h
@ -31,24 +31,25 @@ static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr)
    unsigned high      = c->high << shift;
    unsigned bit;

-    __asm__ volatile ("adds    %3,  %3,  %0           \n"
-                      "cmpcs   %7,  %4                \n"
-                      "ldrcsh  %2,  [%4], #2          \n"
-                      "rsb     %0,  %6,  #256         \n"
-                      "smlabb  %0,  %5,  %6,  %0      \n"
-                      "rev16cs %2,  %2                \n"
-                      "orrcs   %1,  %1,  %2,  lsl %3  \n"
-                      "subcs   %3,  %3,  #16          \n"
-                      "lsr     %0,  %0,  #8           \n"
-                      "cmp     %1,  %0,  lsl #16      \n"
-                      "subge   %1,  %1,  %0,  lsl #16 \n"
-                      "subge   %0,  %5,  %0           \n"
-                      "movge   %2,  #1                \n"
-                      "movlt   %2,  #0                \n"
-                      : "=&r"(c->high), "=&r"(c->code_word), "=&r"(bit),
-                        "+&r"(c->bits), "+&r"(c->buffer)
-                      : "r"(high), "r"(pr), "r"(c->end - 1),
-                        "0"(shift), "1"(code_word));
+    __asm__ ("adds    %3,  %3,  %0           \n"
+             "cmpcs   %7,  %4                \n"
+             "ldrcsh  %2,  [%4], #2          \n"
+             "rsb     %0,  %6,  #256         \n"
+             "smlabb  %0,  %5,  %6,  %0      \n"
+             "rev16cs %2,  %2                \n"
+             "orrcs   %1,  %1,  %2,  lsl %3  \n"
+             "subcs   %3,  %3,  #16          \n"
+             "lsr     %0,  %0,  #8           \n"
+             "cmp     %1,  %0,  lsl #16      \n"
+             "subge   %1,  %1,  %0,  lsl #16 \n"
+             "subge   %0,  %5,  %0           \n"
+             "movge   %2,  #1                \n"
+             "movlt   %2,  #0                \n"
+             : "=&r"(c->high), "=&r"(c->code_word), "=&r"(bit),
+               "+&r"(c->bits), "+&r"(c->buffer)
+             : "r"(high), "r"(pr), "r"(c->end - 1),
+               "0"(shift), "1"(code_word)
+             : "cc");

    return bit;
 }
@ -62,19 +63,20 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr)
    unsigned low;
    unsigned tmp;

-    __asm__ volatile ("adds    %3,  %3,  %0           \n"
-                      "cmpcs   %7,  %4                \n"
-                      "ldrcsh  %2,  [%4], #2          \n"
-                      "rsb     %0,  %6,  #256         \n"
-                      "smlabb  %0,  %5,  %6,  %0      \n"
-                      "rev16cs %2,  %2                \n"
-                      "orrcs   %1,  %1,  %2,  lsl %3  \n"
-                      "subcs   %3,  %3,  #16          \n"
-                      "lsr     %0,  %0,  #8           \n"
-                      "lsl     %2,  %0,  #16          \n"
-                      : "=&r"(low), "+&r"(code_word), "=&r"(tmp),
-                        "+&r"(c->bits), "+&r"(c->buffer)
-                      : "r"(high), "r"(pr), "r"(c->end - 1), "0"(shift));
+    __asm__ ("adds    %3,  %3,  %0           \n"
+             "cmpcs   %7,  %4                \n"
+             "ldrcsh  %2,  [%4], #2          \n"
+             "rsb     %0,  %6,  #256         \n"
+             "smlabb  %0,  %5,  %6,  %0      \n"
+             "rev16cs %2,  %2                \n"
+             "orrcs   %1,  %1,  %2,  lsl %3  \n"
+             "subcs   %3,  %3,  #16          \n"
+             "lsr     %0,  %0,  #8           \n"
+             "lsl     %2,  %0,  #16          \n"
+             : "=&r"(low), "+&r"(code_word), "=&r"(tmp),
+               "+&r"(c->bits), "+&r"(c->buffer)
+             : "r"(high), "r"(pr), "r"(c->end - 1), "0"(shift)
+             : "cc");

    if (code_word >= tmp) {
        c->high      = high - low;
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@ -27,6 +27,11 @@ SECTION_RODATA
 ; 16777216.0f - used in ff_float_to_fixed24()
 pf_1_24: times 4 dd 0x4B800000

+; used in ff_ac3_compute_mantissa_size()
+cextern ac3_bap_bits
+pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
+pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
+
 SECTION .text

 ;-----------------------------------------------------------------------------
@ -293,3 +298,51 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len
 %endif
    ja .loop
    REP_RET
+
+;------------------------------------------------------------------------------
+; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16])
+;------------------------------------------------------------------------------
+
+%macro PHADDD4 2 ; xmm src, xmm tmp
+    movhlps  %2, %1
+    paddd    %1, %2
+    pshufd   %2, %1, 0x1
+    paddd    %1, %2
+%endmacro
+
+INIT_XMM
+cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
+    movdqa      m0, [mant_cntq      ]
+    movdqa      m1, [mant_cntq+ 1*16]
+    paddw       m0, [mant_cntq+ 2*16]
+    paddw       m1, [mant_cntq+ 3*16]
+    paddw       m0, [mant_cntq+ 4*16]
+    paddw       m1, [mant_cntq+ 5*16]
+    paddw       m0, [mant_cntq+ 6*16]
+    paddw       m1, [mant_cntq+ 7*16]
+    paddw       m0, [mant_cntq+ 8*16]
+    paddw       m1, [mant_cntq+ 9*16]
+    paddw       m0, [mant_cntq+10*16]
+    paddw       m1, [mant_cntq+11*16]
+    pmaddwd     m0, [ac3_bap_bits   ]
+    pmaddwd     m1, [ac3_bap_bits+16]
+    paddd       m0, m1
+    PHADDD4     m0, m1
+    movd      sumd, m0
+    movdqa      m3, [pw_bap_mul1]
+    movhpd      m0, [mant_cntq     +2]
+    movlpd      m0, [mant_cntq+1*32+2]
+    movhpd      m1, [mant_cntq+2*32+2]
+    movlpd      m1, [mant_cntq+3*32+2]
+    movhpd      m2, [mant_cntq+4*32+2]
+    movlpd      m2, [mant_cntq+5*32+2]
+    pmulhuw     m0, m3
+    pmulhuw     m1, m3
+    pmulhuw     m2, m3
+    paddusw     m0, m1
+    paddusw     m0, m2
+    pmaddwd     m0, [pw_bap_mul2]
+    PHADDD4     m0, m1
+    movd       eax, m0
+    add        eax, sumd
+    RET
--- a/libavcodec/x86/ac3dsp_mmx.c
+++ b/libavcodec/x86/ac3dsp_mmx.c
@ -42,6 +42,8 @@ extern void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned i
 extern void ff_float_to_fixed24_sse  (int32_t *dst, const float *src, unsigned int len);
 extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len);

+extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
+
 av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
 {
    int mm_flags = av_get_cpu_flags();
@ -69,6 +71,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
        c->float_to_fixed24 = ff_float_to_fixed24_sse2;
+        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
        if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
            c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
            c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@ -29,6 +29,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"

 #include <dc1394/dc1394.h>

@ -42,6 +43,7 @@ typedef struct dc1394_data {
    int current_frame;
    int fps;
    char *video_size;       /**< String describing video size, set by a private option. */
+    char *pixel_format;     /**< Set by a private option. */

    AVPacket packet;
 } dc1394_data;
@ -82,6 +84,7 @@ struct dc1394_frame_rate {
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
    { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = "qvga"}, 0, 0, DEC },
+    { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = "uyvy422"}, 0, 0, DEC },
    { NULL },
 };

@ -134,6 +137,14 @@ static int dc1394_read_header(AVFormatContext *c, AVFormatParameters * ap)
        goto out_camera;
    }

+    if (dc1394->pixel_format) {
+        if ((ap->pix_fmt = av_get_pix_fmt(dc1394->pixel_format)) == PIX_FMT_NONE) {
+            av_log(c, AV_LOG_ERROR, "No such pixel format: %s.\n", dc1394->pixel_format);
+            ret = AVERROR(EINVAL);
+            goto out;
+        }
+    }
+
    if (dc1394->video_size) {
        if ((ret = av_parse_video_size(&ap->width, &ap->height, dc1394->video_size)) < 0) {
            av_log(c, AV_LOG_ERROR, "Couldn't parse video size.\n");
@ -303,6 +314,8 @@ out_camera:
    dc1394_video_set_transmission(dc1394->camera, DC1394_OFF);
    dc1394_camera_free (dc1394->camera);
 out:
+    av_freep(&dc1394->video_size);
+    av_freep(&dc1394->pixel_format);
    dc1394_free(dc1394->d);
    return ret;
 }
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@ -50,6 +50,7 @@
 #include "libavutil/opt.h"
 #include "avdevice.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"

 static const int desired_video_buffers = 256;

@ -74,6 +75,7 @@ struct video_data {
    char *standard;
    int channel;
    char *video_size; /**< String describing video size, set by a private option. */
+    char *pixel_format; /**< Set by a private option. */
 };

 struct buff_data {
@ -534,12 +536,12 @@ static int v4l2_set_parameters(AVFormatContext *s1, AVFormatParameters *ap)
 }

 static uint32_t device_try_init(AVFormatContext *s1,
-                                const AVFormatParameters *ap,
+                                enum PixelFormat pix_fmt,
                                int *width,
                                int *height,
                                enum CodecID *codec_id)
 {
-    uint32_t desired_format = fmt_ff2v4l(ap->pix_fmt, s1->video_codec_id);
+    uint32_t desired_format = fmt_ff2v4l(pix_fmt, s1->video_codec_id);

    if (desired_format == 0 ||
        device_init(s1, width, height, desired_format) < 0) {
@ -572,6 +574,7 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
    int res = 0;
    uint32_t desired_format, capabilities;
    enum CodecID codec_id;
+    enum PixelFormat pix_fmt = PIX_FMT_NONE;

    st = av_new_stream(s1, 0);
    if (!st) {
@ -584,11 +587,18 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
        av_log(s1, AV_LOG_ERROR, "Couldn't parse video size.\n");
        goto out;
    }
+    if (s->pixel_format && (pix_fmt = av_get_pix_fmt(s->pixel_format)) == PIX_FMT_NONE) {
+        av_log(s1, AV_LOG_ERROR, "No such pixel format: %s.\n", s->pixel_format);
+        res = AVERROR(EINVAL);
+        goto out;
+    }
 #if FF_API_FORMAT_PARAMETERS
    if (ap->width > 0)
        s->width  = ap->width;
    if (ap->height > 0)
        s->height = ap->height;
+    if (ap->pix_fmt)
+        pix_fmt = ap->pix_fmt;
 #endif

    capabilities = 0;
@ -614,7 +624,7 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
        av_log(s1, AV_LOG_VERBOSE, "Setting frame size to %dx%d\n", s->width, s->height);
    }

-    desired_format = device_try_init(s1, ap, &s->width, &s->height, &codec_id);
+    desired_format = device_try_init(s1, pix_fmt, &s->width, &s->height, &codec_id);
    if (desired_format == 0) {
        av_log(s1, AV_LOG_ERROR, "Cannot find a proper format for "
               "codec_id %d, pix_fmt %d.\n", s1->video_codec_id, ap->pix_fmt);
@ -659,6 +669,7 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)

 out:
    av_freep(&s->video_size);
+    av_freep(&s->pixel_format);
    return res;
 }

@ -709,6 +720,7 @@ static const AVOption options[] = {
    { "standard", "", OFFSET(standard), FF_OPT_TYPE_STRING, {.str = NULL }, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
    { "channel",  "", OFFSET(channel),  FF_OPT_TYPE_INT,    {.dbl = 0 }, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
    { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
    { NULL },
 };

--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@ -233,9 +233,7 @@ typedef struct AVFormatParameters {
    attribute_deprecated int channels;
    attribute_deprecated int width;
    attribute_deprecated int height;
-#endif
-    enum PixelFormat pix_fmt;
-#if FF_API_FORMAT_PARAMETERS
+    attribute_deprecated enum PixelFormat pix_fmt;
    attribute_deprecated int channel; /**< Used to select DV channel. */
    attribute_deprecated const char *standard; /**< deprecated, use demuxer-specific options instead. */
    attribute_deprecated unsigned int mpeg2ts_raw:1;  /**< deprecated, use mpegtsraw demuxer */
--- a/libavformat/matroska.h
+++ b/libavformat/matroska.h
@ -223,6 +223,24 @@ typedef enum {
  MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP = 3,
 } MatroskaTrackEncodingCompAlgo;

+typedef enum {
+  MATROSKA_VIDEO_STEREOMODE_TYPE_MONO               = 0,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_LEFT_RIGHT         = 1,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTTOM_TOP         = 2,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_TOP_BOTTOM         = 3,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_CHECKERBOARD_RL    = 4,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_CHECKERBOARD_LR    = 5,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ROW_INTERLEAVED_RL = 6,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ROW_INTERLEAVED_LR = 7,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_COL_INTERLEAVED_RL = 8,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_COL_INTERLEAVED_LR = 9,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ANAGLYPH_CYAN_RED  = 10,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_RIGHT_LEFT         = 11,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_ANAGLYPH_GREEN_MAG = 12,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_LR = 13,
+  MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_RL = 14,
+} MatroskaVideoStereoModeType;
+
 /*
 * Matroska Codec IDs, strings
 */
--- a/libavformat/rawdec.c
+++ b/libavformat/rawdec.c
@ -25,6 +25,7 @@
 #include "rawdec.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"

 /* raw input */
 int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
@ -70,30 +71,37 @@ int ff_raw_read_header(AVFormatContext *s, AVFormatParameters *ap)
        case AVMEDIA_TYPE_VIDEO: {
            FFRawVideoDemuxerContext *s1 = s->priv_data;
            int width = 0, height = 0, ret;
+            enum PixelFormat pix_fmt;
+
            if(ap->time_base.num)
                av_set_pts_info(st, 64, ap->time_base.num, ap->time_base.den);
            else
                av_set_pts_info(st, 64, 1, 25);
-            if (s1->video_size) {
-                ret = av_parse_video_size(&width, &height, s1->video_size);
-                av_freep(&s1->video_size);
-                if (ret < 0) {
-                    av_log(s, AV_LOG_ERROR, "Couldn't parse video size.\n");
-                    return ret;
-                }
+            if (s1->video_size && (ret = av_parse_video_size(&width, &height, s1->video_size)) < 0) {
+                av_log(s, AV_LOG_ERROR, "Couldn't parse video size.\n");
+                goto fail;
+            }
+            if ((pix_fmt = av_get_pix_fmt(s1->pixel_format)) == PIX_FMT_NONE) {
+                av_log(s, AV_LOG_ERROR, "No such pixel format: %s.\n", s1->pixel_format);
+                ret = AVERROR(EINVAL);
+                goto fail;
            }
 #if FF_API_FORMAT_PARAMETERS
            if (ap->width > 0)
                width = ap->width;
            if (ap->height > 0)
                height = ap->height;
+            if (ap->pix_fmt)
+                pix_fmt = ap->pix_fmt;
 #endif
            st->codec->width  = width;
            st->codec->height = height;
-            st->codec->pix_fmt = ap->pix_fmt;
-            if(st->codec->pix_fmt == PIX_FMT_NONE)
-                st->codec->pix_fmt= PIX_FMT_YUV420P;
+            st->codec->pix_fmt = pix_fmt;
            break;
+fail:
+            av_freep(&s1->video_size);
+            av_freep(&s1->pixel_format);
+            return ret;
            }
        default:
            return -1;
@ -188,6 +196,7 @@ const AVClass ff_rawaudio_demuxer_class = {
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption video_options[] = {
    { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(video_size), FF_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
+    { "pixel_format", "", OFFSET(pixel_format), FF_OPT_TYPE_STRING, {.str = "yuv420p"}, 0, 0, DEC },
    { NULL },
 };
 #undef OFFSET
--- a/libavformat/rawdec.h
+++ b/libavformat/rawdec.h
@ -34,6 +34,7 @@ typedef struct RawAudioDemuxerContext {
 typedef struct FFRawVideoDemuxerContext {
    const AVClass *class;     /**< Class for private options. */
    char *video_size;         /**< String describing video size, set by a private option. */
+    char *pixel_format;       /**< Set by a private option. */
 } FFRawVideoDemuxerContext;

 extern const AVClass ff_rawaudio_demuxer_class;
--- a/libavutil/arm/intmath.h
+++ b/libavutil/arm/intmath.h
@ -33,12 +33,12 @@
 #define FASTDIV FASTDIV
 static av_always_inline av_const int FASTDIV(int a, int b)
 {
-    int r, t;
-    __asm__ ("cmp     %3, #2               \n\t"
-             "ldr     %1, [%4, %3, lsl #2] \n\t"
-             "lsrle   %0, %2, #1           \n\t"
-             "smmulgt %0, %1, %2           \n\t"
-             : "=&r"(r), "=&r"(t) : "r"(a), "r"(b), "r"(ff_inverse));
+    int r;
+    __asm__ ("cmp     %2, #2               \n\t"
+             "ldr     %0, [%3, %2, lsl #2] \n\t"
+             "lsrle   %0, %1, #1           \n\t"
+             "smmulgt %0, %0, %1           \n\t"
+             : "=&r"(r) : "r"(a), "r"(b), "r"(ff_inverse) : "cc");
    return r;
 }

--- a/libavutil/pixdesc.h
+++ b/libavutil/pixdesc.h
@ -24,6 +24,7 @@

 #include "pixfmt.h"
 #include <inttypes.h>
+#include "pixfmt.h"

 typedef struct AVComponentDescriptor{
    uint16_t plane        :2;            ///< which of the 4 planes contains the component
--- a/libswscale/bfin/internal_bfin.S
+++ b/libswscale/bfin/internal_bfin.S
@ -466,8 +466,8 @@ DEFUN_END(yuv2rgb24_line)
 #define ARG_srcStride   40

 DEFUN(uyvytoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                         long width, long height,
-                         long lumStride, long chromStride, long srcStride)):
+                         int width, int height,
+                         int lumStride, int chromStride, int srcStride)):
        link 0;
        [--sp] = (r7:4,p5:4);

@ -539,8 +539,8 @@ DEFUN(uyvytoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8
 DEFUN_END(uyvytoyv12)

 DEFUN(yuyvtoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                         long width, long height,
-                         long lumStride, long chromStride, long srcStride)):
+                         int width, int height,
+                         int lumStride, int chromStride, int srcStride)):
        link 0;
        [--sp] = (r7:4,p5:4);

--- a/libswscale/bfin/swscale_bfin.c
+++ b/libswscale/bfin/swscale_bfin.c
@ -38,12 +38,12 @@
 #endif

 int ff_bfin_uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                       long width, long height,
-                       long lumStride, long chromStride, long srcStride) L1CODE;
+                       int width, int height,
+                       int lumStride, int chromStride, int srcStride) L1CODE;

 int ff_bfin_yuyvtoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                       long width, long height,
-                       long lumStride, long chromStride, long srcStride) L1CODE;
+                       int width, int height,
+                       int lumStride, int chromStride, int srcStride) L1CODE;

 static int uyvytoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
                               int srcSliceH, uint8_t* dst[], int dstStride[])
--- a/libswscale/colorspace-test.c
+++ b/libswscale/colorspace-test.c
@ -52,7 +52,7 @@ int main(int argc, char **argv)
            int src_bpp;
            int dst_bpp;
            const char *name;
-            void (*func)(const uint8_t *src, uint8_t *dst, long src_size);
+            void (*func)(const uint8_t *src, uint8_t *dst, int src_size);
        } func_info[] = {
            FUNC(2, 2, rgb15to16),
            FUNC(2, 3, rgb15to24),
--- a/libswscale/ppc/swscale_altivec_template.c
+++ b/libswscale/ppc/swscale_altivec_template.c
@ -29,13 +29,13 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
    register int i;
    vector unsigned int altivec_vectorShiftInt19 =
        vec_add(vec_splat_u32(10), vec_splat_u32(9));
-    if ((unsigned long)dest % 16) {
+    if ((unsigned int)dest % 16) {
        /* badly aligned store, we force store alignment */
        /* and will handle load misalignment on val w/ vec_perm */
        vector unsigned char perm1;
        vector signed int v1;
        for (i = 0 ; (i < dstW) &&
-            (((unsigned long)dest + i) % 16) ; i++) {
+            (((unsigned int)dest + i) % 16) ; i++) {
                int t = val[i] >> 19;
                dest[i] = (t < 0) ? 0 : ((t > 255) ? 255 : t);
        }
--- a/libswscale/ppc/swscale_template.c
+++ b/libswscale/ppc/swscale_template.c
@ -30,7 +30,7 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                                    const int16_t **chrVSrc, int chrFilterSize,
                                    const int16_t **alpSrc,
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, long dstW, long chrDstW)
+                                    uint8_t *aDest, int dstW, int chrDstW)
 {
    yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
                          chrFilter, chrUSrc, chrVSrc, chrFilterSize,
@ -45,7 +45,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
                                       const int16_t *chrFilter, const int16_t **chrUSrc,
                                       const int16_t **chrVSrc, int chrFilterSize,
                                       const int16_t **alpSrc, uint8_t *dest,
-                                       long dstW, long dstY)
+                                       int dstW, int dstY)
 {
    /* The following list of supported dstFormat values should
       match what's found in the body of ff_yuv2packedX_altivec() */
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@ -793,7 +793,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,

    vector signed short   RND = vec_splat_s16(1<<3);
    vector unsigned short SCL = vec_splat_u16(4);
-    DECLARE_ALIGNED(16, unsigned long, scratch)[16];
+    DECLARE_ALIGNED(16, unsigned int, scratch)[16];

    vector signed short *YCoeffs, *CCoeffs;

--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@ -29,70 +29,70 @@
 #include "swscale.h"
 #include "swscale_internal.h"

-void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
+void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);

 void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                   long width, long height,
-                   long lumStride, long chromStride, long dstStride);
+                   int width, int height,
+                   int lumStride, int chromStride, int dstStride);
 void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                   long width, long height,
-                   long lumStride, long chromStride, long dstStride);
+                   int width, int height,
+                   int lumStride, int chromStride, int dstStride);
 void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                      long width, long height,
-                      long lumStride, long chromStride, long dstStride);
+                      int width, int height,
+                      int lumStride, int chromStride, int dstStride);
 void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                      long width, long height,
-                      long lumStride, long chromStride, long dstStride);
+                      int width, int height,
+                      int lumStride, int chromStride, int dstStride);
 void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                   long width, long height,
-                   long lumStride, long chromStride, long srcStride);
+                   int width, int height,
+                   int lumStride, int chromStride, int srcStride);
 void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                    long width, long height,
-                    long lumStride, long chromStride, long srcStride);
-void (*planar2x)(const uint8_t *src, uint8_t *dst, long width, long height,
-                 long srcStride, long dstStride);
+                    int width, int height,
+                    int lumStride, int chromStride, int srcStride);
+void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
+                 int srcStride, int dstStride);
 void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
-                        long width, long height, long src1Stride,
-                        long src2Stride, long dstStride);
+                        int width, int height, int src1Stride,
+                        int src2Stride, int dstStride);
 void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                    uint8_t *dst1, uint8_t *dst2,
-                    long width, long height,
-                    long srcStride1, long srcStride2,
-                    long dstStride1, long dstStride2);
+                    int width, int height,
+                    int srcStride1, int srcStride2,
+                    int dstStride1, int dstStride2);
 void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
                     uint8_t *dst,
-                     long width, long height,
-                     long srcStride1, long srcStride2,
-                     long srcStride3, long dstStride);
+                     int width, int height,
+                     int srcStride1, int srcStride2,
+                     int srcStride3, int dstStride);
 void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     long width, long height,
-                     long lumStride, long chromStride, long srcStride);
+                     int width, int height,
+                     int lumStride, int chromStride, int srcStride);
 void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     long width, long height,
-                     long lumStride, long chromStride, long srcStride);
+                     int width, int height,
+                     int lumStride, int chromStride, int srcStride);
 void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     long width, long height,
-                     long lumStride, long chromStride, long srcStride);
+                     int width, int height,
+                     int lumStride, int chromStride, int srcStride);
 void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     long width, long height,
-                     long lumStride, long chromStride, long srcStride);
+                     int width, int height,
+                     int lumStride, int chromStride, int srcStride);

 #define RGB2YUV_SHIFT 8
 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
@ -151,10 +151,10 @@ void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const ui
 }
 #endif

-void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 2;
+    int i;
+    int num_pixels = src_size >> 2;
    for (i=0; i<num_pixels; i++) {
 #if HAVE_BIGENDIAN
        /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */
@ -169,9 +169,9 @@ void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
+    int i;
    for (i=0; 3*i<src_size; i++) {
 #if HAVE_BIGENDIAN
        /* RGB24 (= R,G,B) -> BGR32 (= A,R,G,B) */
@ -188,7 +188,7 @@ void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint16_t *end;
    uint8_t *d = dst;
@ -211,7 +211,7 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint16_t *end;
    uint8_t *d = dst;
@ -226,10 +226,10 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 1;
+    int i;
+    int num_pixels = src_size >> 1;

    for (i=0; i<num_pixels; i++) {
        unsigned rgb = ((const uint16_t*)src)[i];
@ -237,10 +237,10 @@ void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 1;
+    int i;
+    int num_pixels = src_size >> 1;

    for (i=0; i<num_pixels; i++) {
        unsigned rgb = ((const uint16_t*)src)[i];
@ -248,7 +248,7 @@ void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint16_t *end;
    uint8_t *d = dst;
@ -271,7 +271,7 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint16_t *end;
    uint8_t *d = dst;
@ -286,10 +286,10 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 1;
+    int i;
+    int num_pixels = src_size >> 1;

    for (i=0; i<num_pixels; i++) {
        unsigned rgb = ((const uint16_t*)src)[i];
@ -297,10 +297,10 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
+void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size >> 1;
+    int i;
+    int num_pixels = src_size >> 1;

    for (i=0; i<num_pixels; i++) {
        unsigned br;
@ -310,10 +310,10 @@ void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size)
+void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    long i;
-    long num_pixels = src_size;
+    int i;
+    int num_pixels = src_size;
    for (i=0; i<num_pixels; i++) {
        unsigned b,g,r;
        register uint8_t rgb;
@ -326,9 +326,9 @@ void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size)
 }

 #define DEFINE_SHUFFLE_BYTES(a, b, c, d)                                \
-void shuffle_bytes_##a##b##c##d(const uint8_t *src, uint8_t *dst, long src_size) \
+void shuffle_bytes_##a##b##c##d(const uint8_t *src, uint8_t *dst, int src_size) \
 {                                                                       \
-    long i;                                                             \
+    int i;                                                             \
                                                                        \
    for (i = 0; i < src_size; i+=4) {                                   \
        dst[i + 0] = src[i + a];                                        \
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@ -32,41 +32,41 @@
 #include "libavutil/avutil.h"

 /* A full collection of RGB to RGB(BGR) converters */
-extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32to16)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32to15)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb15to16)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb15to32)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb16to15)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb16to32)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24to16)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb24to15)   (const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
+extern void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32to16)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32to15)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb15to16)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb15to32)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb16to15)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb16to32)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24to16)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb24to15)   (const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
+extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);

-void rgb24to32   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb32to24   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16to24   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15to24   (const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
-void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
-void bgr8torgb8  (const uint8_t *src, uint8_t *dst, long src_size);
+void rgb24to32   (const uint8_t *src, uint8_t *dst, int src_size);
+void rgb32to24   (const uint8_t *src, uint8_t *dst, int src_size);
+void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb16to24   (const uint8_t *src, uint8_t *dst, int src_size);
+void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb15to24   (const uint8_t *src, uint8_t *dst, int src_size);
+void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size);
+void bgr8torgb8  (const uint8_t *src, uint8_t *dst, int src_size);

-void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, long src_size);
-void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, long src_size);
-void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, long src_size);
-void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, long src_size);
+void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, int src_size);
+void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size);
+void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, int src_size);
+void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, int src_size);

 #if LIBSWSCALE_VERSION_MAJOR < 1
 /* deprecated, use the public versions in swscale.h */
@ -80,46 +80,46 @@ attribute_deprecated void palette8tobgr16(const uint8_t *src, uint8_t *dst, long


 void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                   uint8_t *vdst, long width, long height, long lumStride,
-                   long chromStride, long srcStride);
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride);

 /**
 * Height should be a multiple of 2 and width should be a multiple of 16.
 * (If this is a problem for anyone then tell me, and I will fix it.)
 */
 extern void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                          long width, long height,
-                          long lumStride, long chromStride, long dstStride);
+                          int width, int height,
+                          int lumStride, int chromStride, int dstStride);

 /**
 * Width should be a multiple of 16.
 */
 extern void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                             long width, long height,
-                             long lumStride, long chromStride, long dstStride);
+                             int width, int height,
+                             int lumStride, int chromStride, int dstStride);

 /**
 * Height should be a multiple of 2 and width should be a multiple of 16.
 * (If this is a problem for anyone then tell me, and I will fix it.)
 */
 extern void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                          long width, long height,
-                          long lumStride, long chromStride, long srcStride);
+                          int width, int height,
+                          int lumStride, int chromStride, int srcStride);

 /**
 * Height should be a multiple of 2 and width should be a multiple of 16.
 * (If this is a problem for anyone then tell me, and I will fix it.)
 */
 extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                          long width, long height,
-                          long lumStride, long chromStride, long dstStride);
+                          int width, int height,
+                          int lumStride, int chromStride, int dstStride);

 /**
 * Width should be a multiple of 16.
 */
 extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                             long width, long height,
-                             long lumStride, long chromStride, long dstStride);
+                             int width, int height,
+                             int lumStride, int chromStride, int dstStride);

 /**
 * Height should be a multiple of 2 and width should be a multiple of 2.
@ -128,40 +128,40 @@ extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uin
 * FIXME: Write high quality version.
 */
 extern void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           long width, long height,
-                           long lumStride, long chromStride, long srcStride);
-extern void (*planar2x)(const uint8_t *src, uint8_t *dst, long width, long height,
-                        long srcStride, long dstStride);
+                           int width, int height,
+                           int lumStride, int chromStride, int srcStride);
+extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
+                        int srcStride, int dstStride);

 extern void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
-                               long width, long height, long src1Stride,
-                               long src2Stride, long dstStride);
+                               int width, int height, int src1Stride,
+                               int src2Stride, int dstStride);

 extern void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                           uint8_t *dst1, uint8_t *dst2,
-                           long width, long height,
-                           long srcStride1, long srcStride2,
-                           long dstStride1, long dstStride2);
+                           int width, int height,
+                           int srcStride1, int srcStride2,
+                           int dstStride1, int dstStride2);

 extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
                            uint8_t *dst,
-                            long width, long height,
-                            long srcStride1, long srcStride2,
-                            long srcStride3, long dstStride);
+                            int width, int height,
+                            int srcStride1, int srcStride2,
+                            int srcStride3, int dstStride);


 extern void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            long width, long height,
-                            long lumStride, long chromStride, long srcStride);
+                            int width, int height,
+                            int lumStride, int chromStride, int srcStride);
 extern void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            long width, long height,
-                            long lumStride, long chromStride, long srcStride);
+                            int width, int height,
+                            int lumStride, int chromStride, int srcStride);
 extern void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            long width, long height,
-                            long lumStride, long chromStride, long srcStride);
+                            int width, int height,
+                            int lumStride, int chromStride, int srcStride);
 extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                            long width, long height,
-                            long lumStride, long chromStride, long srcStride);
+                            int width, int height,
+                            int lumStride, int chromStride, int srcStride);

 void sws_rgb2rgb_init(void);

--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@ -26,7 +26,7 @@

 #include <stddef.h>

-static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    uint8_t *dest = dst;
    const uint8_t *s = src;
@ -50,7 +50,7 @@ static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, long src_siz
    }
 }

-static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    uint8_t *dest = dst;
    const uint8_t *s = src;
@ -81,7 +81,7 @@ static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz
 MMX2, 3DNOW optimization by Nick Kurshev
 32-bit C version, and and&add trick by Michael Niedermayer
 */
-static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    register const uint8_t* s=src;
    register uint8_t* d=dst;
@ -101,7 +101,7 @@ static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    register const uint8_t* s=src;
    register uint8_t* d=dst;
@ -122,7 +122,7 @@ static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -135,7 +135,7 @@ static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -147,7 +147,7 @@ static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, long src_siz
    }
 }

-static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -159,7 +159,7 @@ static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -171,7 +171,7 @@ static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, long src_siz
    }
 }

-static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -185,7 +185,7 @@ static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, long src_siz
    }
 }

-static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -199,7 +199,7 @@ static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -213,7 +213,7 @@ static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, long src_siz
    }
 }

-static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -248,7 +248,7 @@ static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, long src_size)
       |
   original bits
 */
-static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint16_t *end;
    uint8_t *d = dst;
@ -263,7 +263,7 @@ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz
    }
 }

-static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint16_t *end;
    uint8_t *d = (uint8_t *)dst;
@ -278,7 +278,7 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz
    }
 }

-static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint16_t *end;
    uint8_t *d = dst;
@ -301,7 +301,7 @@ static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint16_t *end;
    uint8_t *d = dst;
@ -324,7 +324,7 @@ static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, long src_size)
    }
 }

-static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    int idx = 15 - src_size;
    const uint8_t *s = src-idx;
@ -336,7 +336,7 @@ static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, long s
    }
 }

-static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
    unsigned i;
    for (i=0; i<src_size; i+=3) {
@ -350,11 +350,11 @@ static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz

 static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                     const uint8_t *vsrc, uint8_t *dst,
-                                     long width, long height,
-                                     long lumStride, long chromStride,
-                                     long dstStride, long vertLumPerChroma)
+                                     int width, int height,
+                                     int lumStride, int chromStride,
+                                     int dstStride, int vertLumPerChroma)
 {
-    long y;
+    int y;
    const int chromWidth = width >> 1;
    for (y=0; y<height; y++) {
 #if HAVE_FAST_64BIT
@ -404,9 +404,9 @@ static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
 */
 static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                const uint8_t *vsrc, uint8_t *dst,
-                                long width, long height,
-                                long lumStride, long chromStride,
-                                long dstStride)
+                                int width, int height,
+                                int lumStride, int chromStride,
+                                int dstStride)
 {
    //FIXME interpolate chroma
    yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
@ -415,11 +415,11 @@ static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,

 static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                     const uint8_t *vsrc, uint8_t *dst,
-                                     long width, long height,
-                                     long lumStride, long chromStride,
-                                     long dstStride, long vertLumPerChroma)
+                                     int width, int height,
+                                     int lumStride, int chromStride,
+                                     int dstStride, int vertLumPerChroma)
 {
-    long y;
+    int y;
    const int chromWidth = width >> 1;
    for (y=0; y<height; y++) {
 #if HAVE_FAST_64BIT
@ -469,9 +469,9 @@ static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
 */
 static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                const uint8_t *vsrc, uint8_t *dst,
-                                long width, long height,
-                                long lumStride, long chromStride,
-                                long dstStride)
+                                int width, int height,
+                                int lumStride, int chromStride,
+                                int dstStride)
 {
    //FIXME interpolate chroma
    yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
@ -483,9 +483,9 @@ static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
 */
 static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                   const uint8_t *vsrc, uint8_t *dst,
-                                   long width, long height,
-                                   long lumStride, long chromStride,
-                                   long dstStride)
+                                   int width, int height,
+                                   int lumStride, int chromStride,
+                                   int dstStride)
 {
    yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                      chromStride, dstStride, 1);
@ -496,9 +496,9 @@ static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
 */
 static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                   const uint8_t *vsrc, uint8_t *dst,
-                                   long width, long height,
-                                   long lumStride, long chromStride,
-                                   long dstStride)
+                                   int width, int height,
+                                   int lumStride, int chromStride,
+                                   int dstStride)
 {
    yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                      chromStride, dstStride, 1);
@ -510,14 +510,14 @@ static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
 */
 static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
                                uint8_t *udst, uint8_t *vdst,
-                                long width, long height,
-                                long lumStride, long chromStride,
-                                long srcStride)
+                                int width, int height,
+                                int lumStride, int chromStride,
+                                int srcStride)
 {
-    long y;
+    int y;
    const int chromWidth = width >> 1;
    for (y=0; y<height; y+=2) {
-        long i;
+        int i;
        for (i=0; i<chromWidth; i++) {
            ydst[2*i+0]     = src[4*i+0];
            udst[i]     = src[4*i+1];
@ -538,10 +538,10 @@ static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
    }
 }

-static inline void planar2x_c(const uint8_t *src, uint8_t *dst, long srcWidth,
-                              long srcHeight, long srcStride, long dstStride)
+static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
+                              int srcHeight, int srcStride, int dstStride)
 {
-    long x,y;
+    int x,y;

    dst[0]= src[0];

@ -598,14 +598,14 @@ static inline void planar2x_c(const uint8_t *src, uint8_t *dst, long srcWidth,
 */
 static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
                                uint8_t *udst, uint8_t *vdst,
-                                long width, long height,
-                                long lumStride, long chromStride,
-                                long srcStride)
+                                int width, int height,
+                                int lumStride, int chromStride,
+                                int srcStride)
 {
-    long y;
+    int y;
    const int chromWidth = width >> 1;
    for (y=0; y<height; y+=2) {
-        long i;
+        int i;
        for (i=0; i<chromWidth; i++) {
            udst[i]     = src[4*i+0];
            ydst[2*i+0] = src[4*i+1];
@ -634,14 +634,14 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
 * FIXME: Write HQ version.
 */
 void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                   uint8_t *vdst, long width, long height, long lumStride,
-                   long chromStride, long srcStride)
+                   uint8_t *vdst, int width, int height, int lumStride,
+                   int chromStride, int srcStride)
 {
-    long y;
+    int y;
    const int chromWidth = width >> 1;
    y=0;
    for (; y<height; y+=2) {
-        long i;
+        int i;
        for (i=0; i<chromWidth; i++) {
            unsigned int b = src[6*i+0];
            unsigned int g = src[6*i+1];
@ -692,14 +692,14 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
 }

 static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
-                              uint8_t *dest, long width,
-                              long height, long src1Stride,
-                              long src2Stride, long dstStride)
+                              uint8_t *dest, int width,
+                              int height, int src1Stride,
+                              int src2Stride, int dstStride)
 {
-    long h;
+    int h;

    for (h=0; h < height; h++) {
-        long w;
+        int w;
        for (w=0; w < width; w++) {
            dest[2*w+0] = src1[w];
            dest[2*w+1] = src2[w];
@ -712,12 +712,12 @@ static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,

 static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
                                 uint8_t *dst1, uint8_t *dst2,
-                                 long width, long height,
-                                 long srcStride1, long srcStride2,
-                                 long dstStride1, long dstStride2)
+                                 int width, int height,
+                                 int srcStride1, int srcStride2,
+                                 int dstStride1, int dstStride2)
 {
    int y;
-    long x,w,h;
+    int x,w,h;
    w=width/2; h=height/2;
    for (y=0;y<h;y++) {
        const uint8_t* s1=src1+srcStride1*(y>>1);
@ -735,12 +735,12 @@ static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,

 static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
                                  const uint8_t *src3, uint8_t *dst,
-                                  long width, long height,
-                                  long srcStride1, long srcStride2,
-                                  long srcStride3, long dstStride)
+                                  int width, int height,
+                                  int srcStride1, int srcStride2,
+                                  int srcStride3, int dstStride)
 {
    int x;
-    long y,w,h;
+    int y,w,h;
    w=width/2; h=height;
    for (y=0;y<h;y++) {
        const uint8_t* yp=src1+srcStride1*y;
@ -749,7 +749,7 @@ static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
        uint8_t* d=dst+dstStride*y;
        x=0;
        for (; x<w; x++) {
-            const long x2 = x<<2;
+            const int x2 = x<<2;
            d[8*x+0] = yp[x2];
            d[8*x+1] = up[x];
            d[8*x+2] = yp[x2+1];
@ -836,11 +836,11 @@ static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
 }

 static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, long width, long height,
-                           long lumStride, long chromStride, long srcStride)
+                           const uint8_t *src, int width, int height,
+                           int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);

    for (y=0; y<height; y++) {
        extract_even_c(src, ydst, width);
@ -856,11 +856,11 @@ static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 }

 static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, long width, long height,
-                           long lumStride, long chromStride, long srcStride)
+                           const uint8_t *src, int width, int height,
+                           int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);

    for (y=0; y<height; y++) {
        extract_even_c(src, ydst, width);
@ -874,11 +874,11 @@ static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 }

 static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, long width, long height,
-                           long lumStride, long chromStride, long srcStride)
+                           const uint8_t *src, int width, int height,
+                           int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);

    for (y=0; y<height; y++) {
        extract_even_c(src + 1, ydst, width);
@ -894,11 +894,11 @@ static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
 }

 static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                           const uint8_t *src, long width, long height,
-                           long lumStride, long chromStride, long srcStride)
+                           const uint8_t *src, int width, int height,
+                           int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);

    for (y=0; y<height; y++) {
        extract_even_c(src + 1, ydst, width);
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@ -1075,7 +1075,7 @@ static void fillPlane(uint8_t* plane, int stride, int width, int height, int y,
 }

 #define RGB48(name, R, B, READ)\
-static inline void name ## ToY(int16_t *dst, const uint16_t *src, long width, uint32_t *unused)\
+static inline void name ## ToY(int16_t *dst, const uint16_t *src, int width, uint32_t *unused)\
 {\
    int i;\
    for (i = 0; i < width; i++) {\
@ -1089,7 +1089,7 @@ static inline void name ## ToY(int16_t *dst, const uint16_t *src, long width, ui
 \
 static inline void name ## ToUV(int16_t *dstU, int16_t *dstV,\
                             const uint16_t *src1, const uint16_t *src2,\
-                             long width, uint32_t *unused)\
+                             int width, uint32_t *unused)\
 {\
    int i;\
    assert(src1==src2);\
@ -1105,7 +1105,7 @@ static inline void name ## ToUV(int16_t *dstU, int16_t *dstV,\
 \
 static inline void name ## ToUV_half(int16_t *dstU, int16_t *dstV,\
                                  const uint16_t *src1, const uint16_t *src2,\
-                                  long width, uint32_t *unused)\
+                                  int width, uint32_t *unused)\
 {\
    int i;\
    assert(src1==src2);\
@ -1125,7 +1125,7 @@ RGB48(bgr48LE, 2, 0, AV_RL16)
 RGB48(bgr48BE, 2, 0, AV_RB16)

 #define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
-static inline void name(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)\
+static inline void name(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)\
 {\
    int i;\
    for (i=0; i<width; i++) {\
@ -1146,7 +1146,7 @@ BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY
 BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
 BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)

-static inline void abgrToA(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void abgrToA(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
    int i;
    for (i=0; i<width; i++) {
@ -1155,7 +1155,7 @@ static inline void abgrToA(int16_t *dst, const uint8_t *src, long width, uint32_
 }

 #define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
-static inline void name(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
+static inline void name(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, int width, uint32_t *unused)\
 {\
    int i;\
    for (i=0; i<width; i++) {\
@ -1167,7 +1167,7 @@ static inline void name(int16_t *dstU, int16_t *dstV, const uint8_t *src, const
        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (256<<((S)-1)) + (1<<(S-7)))>>((S)-6);\
    }\
 }\
-static inline void name ## _half(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
+static inline void name ## _half(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, int width, uint32_t *unused)\
 {\
    int i;\
    for (i=0; i<width; i++) {\
@ -1194,7 +1194,7 @@ BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<
 BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
 BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)

-static inline void palToA(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
+static inline void palToA(int16_t *dst, const uint8_t *src, int width, uint32_t *pal)
 {
    int i;
    for (i=0; i<width; i++) {
@ -1216,7 +1216,7 @@ static inline void palToY(int16_t *dst, const uint8_t *src, long width, uint32_t

 static inline void palToUV(uint16_t *dstU, int16_t *dstV,
                           const uint8_t *src1, const uint8_t *src2,
-                           long width, uint32_t *pal)
+                           int width, uint32_t *pal)
 {
    int i;
    assert(src1 == src2);
@ -1228,7 +1228,7 @@ static inline void palToUV(uint16_t *dstU, int16_t *dstV,
    }
 }

-static inline void monowhite2Y(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void monowhite2Y(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
    int i, j;
    for (i=0; i<width/8; i++) {
@ -1238,7 +1238,7 @@ static inline void monowhite2Y(int16_t *dst, const uint8_t *src, long width, uin
    }
 }

-static inline void monoblack2Y(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void monoblack2Y(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
    int i, j;
    for (i=0; i<width/8; i++) {
@ -1431,24 +1431,24 @@ static int uyvyToYuv422Wrapper(SwsContext *c, const uint8_t* src[], int srcStrid
    return srcSliceH;
 }

-static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+static void gray8aToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;
    for (i=0; i<num_pixels; i++)
        ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | (src[(i<<1)+1] << 24);
 }

-static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+static void gray8aToPacked32_1(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;

    for (i=0; i<num_pixels; i++)
        ((uint32_t *) dst)[i] = ((const uint32_t *)palette)[src[i<<1]] | src[(i<<1)+1];
 }

-static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;

    for (i=0; i<num_pixels; i++) {
        //FIXME slow?
@ -1464,7 +1464,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[],
 {
    const enum PixelFormat srcFormat= c->srcFormat;
    const enum PixelFormat dstFormat= c->dstFormat;
-    void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels,
+    void (*conv)(const uint8_t *src, uint8_t *dst, int num_pixels,
                 const uint8_t *palette)=NULL;
    int i;
    uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
@ -1521,7 +1521,7 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[],
    const int dstBpp= (c->dstFormatBpp + 7) >> 3;
    const int srcId= c->srcFormatBpp >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
    const int dstId= c->dstFormatBpp >> 2;
-    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
+    void (*conv)(const uint8_t *src, uint8_t *dst, int src_size)=NULL;

 #define CONV_IS(src, dst) (srcFormat == PIX_FMT_##src && dstFormat == PIX_FMT_##dst)

@ -2085,18 +2085,18 @@ int sws_scale_ordered(SwsContext *c, const uint8_t* const src[], int srcStride[]
 #endif

 /* Convert the palette to the same packed 32-bit format as the palette */
-void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;

    for (i=0; i<num_pixels; i++)
        ((uint32_t *) dst)[i] = ((const uint32_t *) palette)[src[i]];
 }

 /* Palette format: ABCD -> dst format: ABC */
-void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
+void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette)
 {
-    long i;
+    int i;

    for (i=0; i<num_pixels; i++) {
        //FIXME slow?
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@ -351,7 +351,7 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context,
 * @param num_pixels number of pixels to convert
 * @param palette    array with [256] entries, which must match color arrangement (RGB or BGR) of src
 */
-void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette);

 /**
 * Converts an 8bit paletted frame into a frame with a color depth of 24 bits.
@ -363,7 +363,7 @@ void sws_convertPalette8ToPacked32(const uint8_t *src, uint8_t *dst, long num_pi
 * @param num_pixels number of pixels to convert
 * @param palette    array with [256] entries, which must match color arrangement (RGB or BGR) of src
 */
-void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void sws_convertPalette8ToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels, const uint8_t *palette);


 #endif /* SWSCALE_SWSCALE_H */
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@ -194,6 +194,7 @@ typedef struct SwsContext {
 #define Y_TEMP                "11*8+4*4*256*2+40"
 #define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
 #define UV_OFF                "11*8+4*4*256*3+48"
+#define UV_OFFx2              "11*8+4*4*256*3+56"

    DECLARE_ALIGNED(8, uint64_t, redDither);
    DECLARE_ALIGNED(8, uint64_t, greenDither);
@ -217,6 +218,7 @@ typedef struct SwsContext {
    DECLARE_ALIGNED(8, uint64_t, y_temp);
    int32_t  alpMmxFilter[4*MAX_FILTER_SIZE];
    DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
+    DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes

 #if HAVE_ALTIVEC
    vector signed short   CY;
@ -259,7 +261,7 @@ typedef struct SwsContext {
                        const int16_t *chrVSrc, const int16_t *alpSrc,
                        uint8_t *dest,
                        uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                        long dstW, long chrDstW);
+                        int dstW, int chrDstW);
    void (*yuv2yuvX   )(struct SwsContext *c,
                        const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                        const int16_t *chrFilter, const int16_t **chrUSrc,
@ -267,7 +269,7 @@ typedef struct SwsContext {
                        const int16_t **alpSrc,
                        uint8_t *dest,
                        uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                        long dstW, long chrDstW);
+                        int dstW, int chrDstW);
    void (*yuv2packed1)(struct SwsContext *c,
                        const uint16_t *buf0,
                        const uint16_t *ubuf0, const uint16_t *ubuf1,
@ -287,26 +289,26 @@ typedef struct SwsContext {
                        const int16_t *chrFilter, const int16_t **chrUSrc,
                        const int16_t **chrVSrc, int chrFilterSize,
                        const int16_t **alpSrc, uint8_t *dest,
-                        long dstW, long dstY);
+                        int dstW, int dstY);

    void (*lumToYV12)(uint8_t *dst, const uint8_t *src,
-                      long width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler.
+                      int width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler.
    void (*alpToYV12)(uint8_t *dst, const uint8_t *src,
-                      long width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler.
+                      int width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler.
    void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
                      const uint8_t *src1, const uint8_t *src2,
-                      long width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler.
+                      int width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler.
    void (*hyscale_fast)(struct SwsContext *c,
-                         int16_t *dst, long dstWidth,
+                         int16_t *dst, int dstWidth,
                         const uint8_t *src, int srcW, int xInc);
    void (*hcscale_fast)(struct SwsContext *c,
-                         int16_t *dst1, int16_t *dst2, long dstWidth,
+                         int16_t *dst1, int16_t *dst2, int dstWidth,
                         const uint8_t *src1, const uint8_t *src2,
                         int srcW, int xInc);

    void (*hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW,
                   int xInc, const int16_t *filter, const int16_t *filterPos,
-                   long filterSize);
+                   int filterSize);

    void (*hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW,
                   int xInc, const int16_t *filter, const int16_t *filterPos,
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@ -24,7 +24,7 @@ static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
                              const int16_t **chrVSrc,
                              int chrFilterSize, const int16_t **alpSrc,
                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, long dstW, long chrDstW)
+                              uint8_t *aDest, int dstW, int chrDstW)
 {
    yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
                chrFilter, chrUSrc, chrVSrc, chrFilterSize,
@ -47,7 +47,7 @@ static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
                              const int16_t *chrUSrc, const int16_t *chrVSrc,
                              const int16_t *alpSrc,
                              uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                              uint8_t *aDest, long dstW, long chrDstW)
+                              uint8_t *aDest, int dstW, int chrDstW)
 {
    int i;
    for (i=0; i<dstW; i++) {
@ -79,7 +79,7 @@ static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
                                 const int16_t *chrFilter, const int16_t **chrUSrc,
                                 const int16_t **chrVSrc,
                                 int chrFilterSize, const int16_t **alpSrc,
-                                 uint8_t *dest, long dstW, long dstY)
+                                 uint8_t *dest, int dstW, int dstY)
 {
        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
                       chrFilter, chrUSrc, chrVSrc, chrFilterSize,
@ -128,7 +128,7 @@ static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,

 //FIXME yuy2* can read up to 7 samples too much

-static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, long width,
+static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
                             uint32_t *unused)
 {
    int i;
@ -137,7 +137,7 @@ static inline void yuy2ToY_c(uint8_t *dst, const uint8_t *src, long width,
 }

 static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                              const uint8_t *src2, long width, uint32_t *unused)
+                              const uint8_t *src2, int width, uint32_t *unused)
 {
    int i;
    for (i=0; i<width; i++) {
@ -148,7 +148,7 @@ static inline void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 }

 static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                            const uint8_t *src2, long width, uint32_t *unused)
+                            const uint8_t *src2, int width, uint32_t *unused)
 {
    int i;
    // FIXME I don't think this code is right for YUV444/422, since then h is not subsampled so
@ -161,7 +161,7 @@ static inline void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,

 /* This is almost identical to the previous, end exists only because
 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, long width,
+static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
                             uint32_t *unused)
 {
    int i;
@ -170,7 +170,7 @@ static inline void uyvyToY_c(uint8_t *dst, const uint8_t *src, long width,
 }

 static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                              const uint8_t *src2, long width, uint32_t *unused)
+                              const uint8_t *src2, int width, uint32_t *unused)
 {
    int i;
    for (i=0; i<width; i++) {
@ -181,7 +181,7 @@ static inline void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 }

 static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                            const uint8_t *src2, long width, uint32_t *unused)
+                            const uint8_t *src2, int width, uint32_t *unused)
 {
    int i;
    for (i=0; i<width; i++) {
@ -191,7 +191,7 @@ static inline void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
 }

 static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
-                              const uint8_t *src, long width)
+                              const uint8_t *src, int width)
 {
    int i;
    for (i = 0; i < width; i++) {
@ -205,7 +205,7 @@ static inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
 #define YUV_NBPS(depth, endianness, rfunc) \
 static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
                                          const uint16_t *srcU, const uint16_t *srcV, \
-                                          long width, uint32_t *unused) \
+                                          int width, uint32_t *unused) \
 { \
    int i; \
    for (i = 0; i < width; i++) { \
@ -214,7 +214,7 @@ static inline void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
    } \
 } \
 \
-static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint16_t *srcY, long width, uint32_t *unused) \
+static inline void endianness ## depth ## ToY_c(uint8_t *dstY, const uint16_t *srcY, int width, uint32_t *unused) \
 { \
    int i; \
    for (i = 0; i < width; i++) \
@ -229,20 +229,20 @@ YUV_NBPS(10, BE, AV_RB16)

 static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
                              const uint8_t *src1, const uint8_t *src2,
-                              long width, uint32_t *unused)
+                              int width, uint32_t *unused)
 {
    nvXXtoUV_c(dstU, dstV, src1, width);
 }

 static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
                              const uint8_t *src1, const uint8_t *src2,
-                              long width, uint32_t *unused)
+                              int width, uint32_t *unused)
 {
    nvXXtoUV_c(dstV, dstU, src1, width);
 }

 static inline void bgr24ToY_c(int16_t *dst, const uint8_t *src,
-                              long width, uint32_t *unused)
+                              int width, uint32_t *unused)
 {
    int i;
    for (i=0; i<width; i++) {
@ -255,7 +255,7 @@ static inline void bgr24ToY_c(int16_t *dst, const uint8_t *src,
 }

 static inline void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
-                               const uint8_t *src2, long width, uint32_t *unused)
+                               const uint8_t *src2, int width, uint32_t *unused)
 {
    int i;
    for (i=0; i<width; i++) {
@ -270,7 +270,7 @@ static inline void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1
 }

 static inline void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
-                                    const uint8_t *src2, long width, uint32_t *unused)
+                                    const uint8_t *src2, int width, uint32_t *unused)
 {
    int i;
    for (i=0; i<width; i++) {
@ -284,7 +284,7 @@ static inline void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t
    assert(src1 == src2);
 }

-static inline void rgb24ToY_c(int16_t *dst, const uint8_t *src, long width,
+static inline void rgb24ToY_c(int16_t *dst, const uint8_t *src, int width,
                              uint32_t *unused)
 {
    int i;
@ -298,7 +298,7 @@ static inline void rgb24ToY_c(int16_t *dst, const uint8_t *src, long width,
 }

 static inline void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
-                               const uint8_t *src2, long width, uint32_t *unused)
+                               const uint8_t *src2, int width, uint32_t *unused)
 {
    int i;
    assert(src1==src2);
@ -313,7 +313,7 @@ static inline void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1
 }

 static inline void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
-                                    const uint8_t *src2, long width, uint32_t *unused)
+                                    const uint8_t *src2, int width, uint32_t *unused)
 {
    int i;
    assert(src1==src2);
@ -332,7 +332,7 @@ static inline void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t
 static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
                            int srcW, int xInc,
                            const int16_t *filter, const int16_t *filterPos,
-                            long filterSize)
+                            int filterSize)
 {
    int i;
    for (i=0; i<dstW; i++) {
@ -408,7 +408,7 @@ static void lumRangeFromJpeg_c(int16_t *dst, int width)
        dst[i] = (dst[i]*14071 + 33561947)>>14;
 }

-static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, long dstWidth,
+static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
                                  const uint8_t *src, int srcW, int xInc)
 {
    int i;
@ -422,14 +422,14 @@ static inline void hyscale_fast_c(SwsContext *c, int16_t *dst, long dstWidth,
 }

      // *** horizontal scale Y line to temp buffer
-static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
+static inline void hyscale_c(SwsContext *c, uint16_t *dst, int dstWidth,
                             const uint8_t *src, int srcW, int xInc,
                             const int16_t *hLumFilter,
                             const int16_t *hLumFilterPos, int hLumFilterSize,
                             uint8_t *formatConvBuffer,
                             uint32_t *pal, int isAlpha)
 {
-    void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
+    void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
    void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;

    src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
@ -453,7 +453,7 @@ static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
 }

 static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
-                                  long dstWidth, const uint8_t *src1,
+                                  int dstWidth, const uint8_t *src1,
                                  const uint8_t *src2, int srcW, int xInc)
 {
    int i;
@ -467,7 +467,7 @@ static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
    }
 }

-inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, long dstWidth,
+inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
                             const uint8_t *src1, const uint8_t *src2,
                             int srcW, int xInc, const int16_t *hChrFilter,
                             const int16_t *hChrFilterPos, int hChrFilterSize,
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@ -1009,6 +1009,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
        c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize];
    }
    c->uv_off = dst_stride_px;
+    c->uv_offx2 = dst_stride;
    for (i=0; i<c->vChrBufSize; i++) {
        FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+1, fail);
        c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize];
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@ -61,7 +61,7 @@

 #if !COMPILE_TEMPLATE_AMD3DNOW

-static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    uint8_t *dest = dst;
    const uint8_t *s = src;
@ -143,7 +143,7 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long s
            MOVNTQ"     %%mm4, 16%0"


-static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    uint8_t *dest = dst;
    const uint8_t *s = src;
@ -186,7 +186,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long s
 MMX2, 3DNOW optimization by Nick Kurshev
 32-bit C version, and and&add trick by Michael Niedermayer
 */
-static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    register const uint8_t* s=src;
    register uint8_t* d=dst;
@ -230,7 +230,7 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_
    }
 }

-static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    register const uint8_t* s=src;
    register uint8_t* d=dst;
@ -279,7 +279,7 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_
    }
 }

-static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -371,7 +371,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_
    }
 }

-static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -426,7 +426,7 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long s
    }
 }

-static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -518,7 +518,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_
    }
 }

-static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -573,7 +573,7 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long s
    }
 }

-static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -630,7 +630,7 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long s
    }
 }

-static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -687,7 +687,7 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_
    }
 }

-static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -744,7 +744,7 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s
    }
 }

-static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint8_t *s = src;
    const uint8_t *end;
@ -822,7 +822,7 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_
       |
   original bits
 */
-static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint16_t *end;
    const uint16_t *mm_end;
@ -925,7 +925,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long s
    }
 }

-static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint16_t *end;
    const uint16_t *mm_end;
@ -1046,7 +1046,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long s
    MOVNTQ"     %%mm0,  %0      \n\t"                               \
    MOVNTQ"     %%mm3, 8%0      \n\t"                               \

-static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint16_t *end;
    const uint16_t *mm_end;
@ -1088,7 +1088,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_
    }
 }

-static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    const uint16_t *end;
    const uint16_t *mm_end;
@ -1130,7 +1130,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_
    }
 }

-static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    x86_reg idx = 15 - src_size;
    const uint8_t *s = src-idx;
@ -1192,7 +1192,7 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst,
    }
 }

-static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
+static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
 {
    unsigned i;
    x86_reg mmx_size= 23 - src_size;
@ -1260,10 +1260,10 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s
 }

 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                           long width, long height,
-                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
+                                           int width, int height,
+                                           int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
 {
-    long y;
+    int y;
    const x86_reg chromWidth= width>>1;
    for (y=0; y<height; y++) {
        //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
@ -1317,18 +1317,18 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
 * (If this is a problem for anyone then tell me, and I will fix it.)
 */
 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long dstStride)
+                                      int width, int height,
+                                      int lumStride, int chromStride, int dstStride)
 {
    //FIXME interpolate chroma
    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
 }

 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                           long width, long height,
-                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
+                                           int width, int height,
+                                           int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
 {
-    long y;
+    int y;
    const x86_reg chromWidth= width>>1;
    for (y=0; y<height; y++) {
        //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
@ -1382,8 +1382,8 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
 * (If this is a problem for anyone then tell me, and I will fix it.)
 */
 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long dstStride)
+                                      int width, int height,
+                                      int lumStride, int chromStride, int dstStride)
 {
    //FIXME interpolate chroma
    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
@ -1393,8 +1393,8 @@ static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc,
 * Width should be a multiple of 16.
 */
 static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                         long width, long height,
-                                         long lumStride, long chromStride, long dstStride)
+                                         int width, int height,
+                                         int lumStride, int chromStride, int dstStride)
 {
    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
 }
@ -1403,8 +1403,8 @@ static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usr
 * Width should be a multiple of 16.
 */
 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
-                                         long width, long height,
-                                         long lumStride, long chromStride, long dstStride)
+                                         int width, int height,
+                                         int lumStride, int chromStride, int dstStride)
 {
    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
 }
@ -1414,10 +1414,10 @@ static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usr
 * (If this is a problem for anyone then tell me, and I will fix it.)
 */
 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long srcStride)
+                                      int width, int height,
+                                      int lumStride, int chromStride, int srcStride)
 {
-    long y;
+    int y;
    const x86_reg chromWidth= width>>1;
    for (y=0; y<height; y+=2) {
        __asm__ volatile(
@ -1513,9 +1513,9 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */

 #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
-static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
+static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
 {
-    long x,y;
+    int x,y;

    dst[0]= src[0];

@ -1612,10 +1612,10 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
 * FIXME: Write HQ version.
 */
 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                      long width, long height,
-                                      long lumStride, long chromStride, long srcStride)
+                                      int width, int height,
+                                      int lumStride, int chromStride, int srcStride)
 {
-    long y;
+    int y;
    const x86_reg chromWidth= width>>1;
    for (y=0; y<height; y+=2) {
        __asm__ volatile(
@ -1718,13 +1718,13 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
 * FIXME: Write HQ version.
 */
 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-                                       long width, long height,
-                                       long lumStride, long chromStride, long srcStride)
+                                       int width, int height,
+                                       int lumStride, int chromStride, int srcStride)
 {
-    long y;
+    int y;
    const x86_reg chromWidth= width>>1;
    for (y=0; y<height-2; y+=2) {
-        long i;
+        int i;
        for (i=0; i<2; i++) {
            __asm__ volatile(
                "mov                        %2, %%"REG_a"   \n\t"
@ -1963,13 +1963,13 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_

 #if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
-                                    long width, long height, long src1Stride,
-                                    long src2Stride, long dstStride)
+                                    int width, int height, int src1Stride,
+                                    int src2Stride, int dstStride)
 {
-    long h;
+    int h;

    for (h=0; h < height; h++) {
-        long w;
+        int w;

 #if COMPILE_TEMPLATE_SSE2
        __asm__(
@ -2037,12 +2037,12 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
 #if !COMPILE_TEMPLATE_AMD3DNOW
 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                                       uint8_t *dst1, uint8_t *dst2,
-                                       long width, long height,
-                                       long srcStride1, long srcStride2,
-                                       long dstStride1, long dstStride2)
+                                       int width, int height,
+                                       int srcStride1, int srcStride2,
+                                       int dstStride1, int dstStride2)
 {
    x86_reg y;
-    long x,w,h;
+    int x,w,h;
    w=width/2; h=height/2;
    __asm__ volatile(
        PREFETCH" %0    \n\t"
@ -2131,12 +2131,12 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,

 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
                                        uint8_t *dst,
-                                        long width, long height,
-                                        long srcStride1, long srcStride2,
-                                        long srcStride3, long dstStride)
+                                        int width, int height,
+                                        int srcStride1, int srcStride2,
+                                        int srcStride3, int dstStride)
 {
    x86_reg x;
-    long y,w,h;
+    int y,w,h;
    w=width/2; h=height;
    for (y=0;y<h;y++) {
        const uint8_t* yp=src1+srcStride1*y;
@ -2197,7 +2197,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
                :"memory");
        }
        for (; x<w; x++) {
-            const long x2 = x<<2;
+            const int x2 = x<<2;
            d[8*x+0] = yp[x2];
            d[8*x+1] = up[x];
            d[8*x+2] = yp[x2+1];
@ -2459,11 +2459,11 @@ static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, ui
 }

 static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 long width, long height,
-                                 long lumStride, long chromStride, long srcStride)
+                                 int width, int height,
+                                 int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);

    for (y=0; y<height; y++) {
        RENAME(extract_even)(src, ydst, width);
@ -2485,11 +2485,11 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co

 #if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 long width, long height,
-                                 long lumStride, long chromStride, long srcStride)
+                                 int width, int height,
+                                 int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);

    for (y=0; y<height; y++) {
        RENAME(extract_even)(src, ydst, width);
@ -2509,11 +2509,11 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */

 static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 long width, long height,
-                                 long lumStride, long chromStride, long srcStride)
+                                 int width, int height,
+                                 int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);

    for (y=0; y<height; y++) {
        RENAME(extract_even)(src+1, ydst, width);
@ -2535,11 +2535,11 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co

 #if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                                 long width, long height,
-                                 long lumStride, long chromStride, long srcStride)
+                                 int width, int height,
+                                 int lumStride, int chromStride, int srcStride)
 {
-    long y;
-    const long chromWidth= -((-width)>>1);
+    int y;
+    const int chromWidth= -((-width)>>1);

    for (y=0; y<height; y++) {
        RENAME(extract_even)(src+1, ydst, width);
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@ -78,7 +78,7 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                                    const int16_t **chrVSrc,
                                    int chrFilterSize, const int16_t **alpSrc,
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, long dstW, long chrDstW)
+                                    uint8_t *aDest, int dstW, int chrDstW)
 {
    if (uDest) {
        x86_reg uv_off = c->uv_off;
@ -159,7 +159,7 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
                                       const int16_t **chrVSrc,
                                       int chrFilterSize, const int16_t **alpSrc,
                                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                       uint8_t *aDest, long dstW, long chrDstW)
+                                       uint8_t *aDest, int dstW, int chrDstW)
 {
    if (uDest) {
        x86_reg uv_off = c->uv_off;
@ -190,9 +190,9 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
                                    const int16_t *chrUSrc, const int16_t *chrVSrc,
                                    const int16_t *alpSrc,
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                    uint8_t *aDest, long dstW, long chrDstW)
+                                    uint8_t *aDest, int dstW, int chrDstW)
 {
-    long p= 4;
+    int p= 4;
    const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
    uint8_t *dst[4]= { aDest, dest, uDest, vDest };
    x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
@ -231,9 +231,9 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
                                       const int16_t *chrUSrc, const int16_t *chrVSrc,
                                       const int16_t *alpSrc,
                                       uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                       uint8_t *aDest, long dstW, long chrDstW)
+                                       uint8_t *aDest, int dstW, int chrDstW)
 {
-    long p= 4;
+    int p= 4;
    const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
    uint8_t *dst[4]= { aDest, dest, uDest, vDest };
    x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
@ -471,7 +471,7 @@ static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilte
                                          const int16_t *chrFilter, const int16_t **chrUSrc,
                                          const int16_t **chrVSrc,
                                          int chrFilterSize, const int16_t **alpSrc,
-                                          uint8_t *dest, long dstW, long dstY)
+                                          uint8_t *dest, int dstW, int dstY)
 {
    x86_reg dummy=0;
    x86_reg dstW_reg = dstW;
@ -504,7 +504,7 @@ static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
                                       const int16_t *chrFilter, const int16_t **chrUSrc,
                                       const int16_t **chrVSrc,
                                       int chrFilterSize, const int16_t **alpSrc,
-                                       uint8_t *dest, long dstW, long dstY)
+                                       uint8_t *dest, int dstW, int dstY)
 {
    x86_reg dummy=0;
    x86_reg dstW_reg = dstW;
@ -561,7 +561,7 @@ static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilt
                                           const int16_t *chrFilter, const int16_t **chrUSrc,
                                           const int16_t **chrVSrc,
                                           int chrFilterSize, const int16_t **alpSrc,
-                                           uint8_t *dest, long dstW, long dstY)
+                                           uint8_t *dest, int dstW, int dstY)
 {
    x86_reg dummy=0;
    x86_reg dstW_reg = dstW;
@ -585,7 +585,7 @@ static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t *chrFilter, const int16_t **chrUSrc,
                                        const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
-                                        uint8_t *dest, long dstW, long dstY)
+                                        uint8_t *dest, int dstW, int dstY)
 {
    x86_reg dummy=0;
    x86_reg dstW_reg = dstW;
@ -638,7 +638,7 @@ static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilt
                                           const int16_t *chrFilter, const int16_t **chrUSrc,
                                           const int16_t **chrVSrc,
                                           int chrFilterSize, const int16_t **alpSrc,
-                                           uint8_t *dest, long dstW, long dstY)
+                                           uint8_t *dest, int dstW, int dstY)
 {
    x86_reg dummy=0;
    x86_reg dstW_reg = dstW;
@ -662,7 +662,7 @@ static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
                                        const int16_t *chrFilter, const int16_t **chrUSrc,
                                        const int16_t **chrVSrc,
                                        int chrFilterSize, const int16_t **alpSrc,
-                                        uint8_t *dest, long dstW, long dstY)
+                                        uint8_t *dest, int dstW, int dstY)
 {
    x86_reg dummy=0;
    x86_reg dstW_reg = dstW;
@ -795,7 +795,7 @@ static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilte
                                          const int16_t *chrFilter, const int16_t **chrUSrc,
                                          const int16_t **chrVSrc,
                                          int chrFilterSize, const int16_t **alpSrc,
-                                          uint8_t *dest, long dstW, long dstY)
+                                          uint8_t *dest, int dstW, int dstY)
 {
    x86_reg dummy=0;
    x86_reg dstW_reg = dstW;
@ -819,7 +819,7 @@ static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
                                       const int16_t *chrFilter, const int16_t **chrUSrc,
                                       const int16_t **chrVSrc,
                                       int chrFilterSize, const int16_t **alpSrc,
-                                       uint8_t *dest, long dstW, long dstY)
+                                       uint8_t *dest, int dstW, int dstY)
 {
    x86_reg dummy=0;
    x86_reg dstW_reg = dstW;
@ -860,7 +860,7 @@ static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFil
                                            const int16_t *chrFilter, const int16_t **chrUSrc,
                                            const int16_t **chrVSrc,
                                            int chrFilterSize, const int16_t **alpSrc,
-                                            uint8_t *dest, long dstW, long dstY)
+                                            uint8_t *dest, int dstW, int dstY)
 {
    x86_reg dummy=0;
    x86_reg dstW_reg = dstW;
@ -881,7 +881,7 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
                                         const int16_t *chrFilter, const int16_t **chrUSrc,
                                         const int16_t **chrVSrc,
                                         int chrFilterSize, const int16_t **alpSrc,
-                                         uint8_t *dest, long dstW, long dstY)
+                                         uint8_t *dest, int dstW, int dstY)
 {
    x86_reg dummy=0;
    x86_reg dstW_reg = dstW;
@ -897,16 +897,16 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
    YSCALEYUV2PACKEDX_END
 }

-#define REAL_YSCALEYUV2RGB_UV(index, c, uv_off) \
+#define REAL_YSCALEYUV2RGB_UV(index, c) \
    "xor            "#index", "#index"  \n\t"\
    ".p2align              4            \n\t"\
    "1:                                 \n\t"\
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
@ -969,8 +969,8 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter

 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)

-#define YSCALEYUV2RGB(index, c, uv_off) \
-    REAL_YSCALEYUV2RGB_UV(index, c, uv_off) \
+#define YSCALEYUV2RGB(index, c) \
+    REAL_YSCALEYUV2RGB_UV(index, c) \
    REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
    REAL_YSCALEYUV2RGB_COEFF(c)

@ -984,12 +984,10 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
                                       const uint16_t *abuf1, uint8_t *dest,
                                       int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
 #if ARCH_X86_64
        __asm__ volatile(
-            YSCALEYUV2RGB(%%r8, %5, %8)
+            YSCALEYUV2RGB(%%r8, %5)
            YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
            "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
            "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
@ -997,7 +995,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
            WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
               "a" (&c->redDither),
-               "r" (abuf0), "r" (abuf1), "m"(uv_off)
+               "r" (abuf0), "r" (abuf1)
            : "%r8"
        );
 #else
@ -1007,7 +1005,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
            "mov        %4, %%"REG_b"               \n\t"
            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB(%%REGBP, %5, %6)
+            YSCALEYUV2RGB(%%REGBP, %5)
            "push                   %0              \n\t"
            "push                   %1              \n\t"
            "mov          "U_TEMP"(%5), %0          \n\t"
@ -1022,7 +1020,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
            "pop %%"REG_BP"                         \n\t"
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
        );
 #endif
    } else {
@ -1030,13 +1028,13 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
            "mov        %4, %%"REG_b"               \n\t"
            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB(%%REGBP, %5, %6)
+            YSCALEYUV2RGB(%%REGBP, %5)
            "pcmpeqd %%mm7, %%mm7                   \n\t"
            WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
            "pop %%"REG_BP"                         \n\t"
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
        );
    }
 }
@ -1048,20 +1046,18 @@ static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
                                       const uint16_t *abuf1, uint8_t *dest,
                                       int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
    __asm__ volatile(
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
        "mov        %4, %%"REG_b"               \n\t"
        "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5, %6)
+        YSCALEYUV2RGB(%%REGBP, %5)
        "pxor    %%mm7, %%mm7                   \n\t"
        WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
        "pop %%"REG_BP"                         \n\t"
        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither), "m"(uv_off)
+           "a" (&c->redDither)
    );
 }

@ -1072,14 +1068,12 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
                                        const uint16_t *abuf1, uint8_t *dest,
                                        int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
    __asm__ volatile(
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
        "mov        %4, %%"REG_b"               \n\t"
        "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5, %6)
+        YSCALEYUV2RGB(%%REGBP, %5)
        "pxor    %%mm7, %%mm7                   \n\t"
        /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@ -1091,7 +1085,7 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
        "pop %%"REG_BP"                         \n\t"
        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither), "m"(uv_off)
+           "a" (&c->redDither)
    );
 }

@ -1102,14 +1096,12 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
                                        const uint16_t *abuf1, uint8_t *dest,
                                        int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
    __asm__ volatile(
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
        "mov        %4, %%"REG_b"               \n\t"
        "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2RGB(%%REGBP, %5, %6)
+        YSCALEYUV2RGB(%%REGBP, %5)
        "pxor    %%mm7, %%mm7                   \n\t"
        /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@ -1121,11 +1113,11 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
        "pop %%"REG_BP"                         \n\t"
        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither), "m"(uv_off)
+           "a" (&c->redDither)
    );
 }

-#define REAL_YSCALEYUV2PACKED(index, c, uv_off) \
+#define REAL_YSCALEYUV2PACKED(index, c) \
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
    "psraw                $3, %%mm0                           \n\t"\
@ -1137,10 +1129,10 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
    "1:                                 \n\t"\
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
@ -1163,7 +1155,7 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\

-#define YSCALEYUV2PACKED(index, c, uv_off)  REAL_YSCALEYUV2PACKED(index, c, uv_off)
+#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)

 static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
                                         const uint16_t *buf1, const uint16_t *ubuf0,
@ -1172,30 +1164,28 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
                                         const uint16_t *abuf1, uint8_t *dest,
                                         int dstW, int yalpha, int uvalpha, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
-
    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
    __asm__ volatile(
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
        "mov %4, %%"REG_b"                        \n\t"
        "push %%"REG_BP"                        \n\t"
-        YSCALEYUV2PACKED(%%REGBP, %5, %6)
+        YSCALEYUV2PACKED(%%REGBP, %5)
        WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
        "pop %%"REG_BP"                         \n\t"
        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
        :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-           "a" (&c->redDither), "m"(uv_off)
+           "a" (&c->redDither)
    );
 }

-#define REAL_YSCALEYUV2RGB1(index, c, uv_off) \
+#define REAL_YSCALEYUV2RGB1(index, c) \
    "xor            "#index", "#index"  \n\t"\
    ".p2align              4            \n\t"\
    "1:                                 \n\t"\
    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
    "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
@ -1237,19 +1227,19 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
    "packuswb          %%mm6, %%mm5     \n\t"\
    "packuswb          %%mm3, %%mm4     \n\t"\

-#define YSCALEYUV2RGB1(index, c, uv_off)  REAL_YSCALEYUV2RGB1(index, c, uv_off)
+#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)

 // do vertical chrominance interpolation
-#define REAL_YSCALEYUV2RGB1b(index, c, uv_off) \
+#define REAL_YSCALEYUV2RGB1b(index, c) \
    "xor            "#index", "#index"  \n\t"\
    ".p2align              4            \n\t"\
    "1:                                 \n\t"\
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
@ -1293,7 +1283,7 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
    "packuswb          %%mm6, %%mm5     \n\t"\
    "packuswb          %%mm3, %%mm4     \n\t"\

-#define YSCALEYUV2RGB1b(index, c, uv_off)  REAL_YSCALEYUV2RGB1b(index, c, uv_off)
+#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)

 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
    "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
@ -1313,7 +1303,6 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
                                       int dstW, int uvalpha, enum PixelFormat dstFormat,
                                       int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1

    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@ -1322,26 +1311,26 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                "mov        %4, %%"REG_b"               \n\t"
                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5, %6)
+                YSCALEYUV2RGB1(%%REGBP, %5)
                YSCALEYUV2RGB1_ALPHA(%%REGBP)
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                "pop %%"REG_BP"                         \n\t"
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
                :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither), "m"(uv_off)
+                   "a" (&c->redDither)
            );
        } else {
            __asm__ volatile(
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                "mov        %4, %%"REG_b"               \n\t"
                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1(%%REGBP, %5, %6)
+                YSCALEYUV2RGB1(%%REGBP, %5)
                "pcmpeqd %%mm7, %%mm7                   \n\t"
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                "pop %%"REG_BP"                         \n\t"
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
                :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither), "m"(uv_off)
+                   "a" (&c->redDither)
            );
        }
    } else {
@ -1350,26 +1339,26 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                "mov        %4, %%"REG_b"               \n\t"
                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+                YSCALEYUV2RGB1b(%%REGBP, %5)
                YSCALEYUV2RGB1_ALPHA(%%REGBP)
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                "pop %%"REG_BP"                         \n\t"
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
                :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither), "m"(uv_off)
+                   "a" (&c->redDither)
            );
        } else {
            __asm__ volatile(
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
                "mov        %4, %%"REG_b"               \n\t"
                "push %%"REG_BP"                        \n\t"
-                YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+                YSCALEYUV2RGB1b(%%REGBP, %5)
                "pcmpeqd %%mm7, %%mm7                   \n\t"
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
                "pop %%"REG_BP"                         \n\t"
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
                :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-                   "a" (&c->redDither), "m"(uv_off)
+                   "a" (&c->redDither)
            );
        }
    }
@ -1382,7 +1371,6 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
                                       int dstW, int uvalpha, enum PixelFormat dstFormat,
                                       int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1

    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@ -1390,26 +1378,26 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
            "mov        %4, %%"REG_b"               \n\t"
            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1(%%REGBP, %5)
            "pxor    %%mm7, %%mm7                   \n\t"
            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
            "pop %%"REG_BP"                         \n\t"
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
        );
    } else {
        __asm__ volatile(
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
            "mov        %4, %%"REG_b"               \n\t"
            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1b(%%REGBP, %5)
            "pxor    %%mm7, %%mm7                   \n\t"
            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
            "pop %%"REG_BP"                         \n\t"
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
        );
    }
 }
@ -1421,7 +1409,6 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
                                        int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1

    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@ -1429,7 +1416,7 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
            "mov        %4, %%"REG_b"               \n\t"
            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1(%%REGBP, %5)
            "pxor    %%mm7, %%mm7                   \n\t"
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@ -1441,14 +1428,14 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
            "pop %%"REG_BP"                         \n\t"
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
        );
    } else {
        __asm__ volatile(
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
            "mov        %4, %%"REG_b"               \n\t"
            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1b(%%REGBP, %5)
            "pxor    %%mm7, %%mm7                   \n\t"
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@ -1460,7 +1447,7 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
            "pop %%"REG_BP"                         \n\t"
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
        );
    }
 }
@ -1472,7 +1459,6 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
                                        int dstW, int uvalpha, enum PixelFormat dstFormat,
                                        int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1

    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@ -1480,7 +1466,7 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
            "mov        %4, %%"REG_b"               \n\t"
            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1(%%REGBP, %5)
            "pxor    %%mm7, %%mm7                   \n\t"
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@ -1492,14 +1478,14 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
            "pop %%"REG_BP"                         \n\t"
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
        );
    } else {
        __asm__ volatile(
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
            "mov        %4, %%"REG_b"               \n\t"
            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2RGB1b(%%REGBP, %5, %6)
+            YSCALEYUV2RGB1b(%%REGBP, %5)
            "pxor    %%mm7, %%mm7                   \n\t"
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
 #ifdef DITHER1XBPP
@ -1511,19 +1497,19 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
            "pop %%"REG_BP"                         \n\t"
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
        );
    }
 }

-#define REAL_YSCALEYUV2PACKED1(index, c, uv_off) \
+#define REAL_YSCALEYUV2PACKED1(index, c) \
    "xor            "#index", "#index"  \n\t"\
    ".p2align              4            \n\t"\
    "1:                                 \n\t"\
    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
    "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
    "psraw                $7, %%mm3     \n\t" \
    "psraw                $7, %%mm4     \n\t" \
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
@ -1531,18 +1517,18 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
    "psraw                $7, %%mm1     \n\t" \
    "psraw                $7, %%mm7     \n\t" \

-#define YSCALEYUV2PACKED1(index, c, uv_off)  REAL_YSCALEYUV2PACKED1(index, c, uv_off)
+#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)

-#define REAL_YSCALEYUV2PACKED1b(index, c, uv_off) \
+#define REAL_YSCALEYUV2PACKED1b(index, c) \
    "xor "#index", "#index"             \n\t"\
    ".p2align              4            \n\t"\
    "1:                                 \n\t"\
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add           "#uv_off", "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
    "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
    "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub           "#uv_off", "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
    "psrlw                $8, %%mm3     \n\t" \
@ -1551,7 +1537,7 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
    "psraw                $7, %%mm1     \n\t" \
    "psraw                $7, %%mm7     \n\t"
-#define YSCALEYUV2PACKED1b(index, c, uv_off)  REAL_YSCALEYUV2PACKED1b(index, c, uv_off)
+#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)

 static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
                                         const uint16_t *ubuf0, const uint16_t *ubuf1,
@ -1560,7 +1546,6 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
                                         int dstW, int uvalpha, enum PixelFormat dstFormat,
                                         int flags, int y)
 {
-    x86_reg uv_off = c->uv_off << 1;
    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1

    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
@ -1568,24 +1553,24 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
            "mov        %4, %%"REG_b"               \n\t"
            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2PACKED1(%%REGBP, %5, %6)
+            YSCALEYUV2PACKED1(%%REGBP, %5)
            WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
            "pop %%"REG_BP"                         \n\t"
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
        );
    } else {
        __asm__ volatile(
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
            "mov        %4, %%"REG_b"               \n\t"
            "push %%"REG_BP"                        \n\t"
-            YSCALEYUV2PACKED1b(%%REGBP, %5, %6)
+            YSCALEYUV2PACKED1b(%%REGBP, %5)
            WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
            "pop %%"REG_BP"                         \n\t"
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
            :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
-               "a" (&c->redDither), "m"(uv_off)
+               "a" (&c->redDither)
        );
    }
 }
@ -1593,7 +1578,7 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
 #if !COMPILE_TEMPLATE_MMX2
 //FIXME yuy2* can read up to 7 samples too much

-static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
    __asm__ volatile(
        "movq "MANGLE(bm01010101)", %%mm2           \n\t"
@ -1612,7 +1597,7 @@ static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width,
    );
 }

-static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
    __asm__ volatile(
        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@ -1638,7 +1623,7 @@ static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
    assert(src1 == src2);
 }

-static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
    __asm__ volatile(
        "mov                    %0, %%"REG_a"       \n\t"
@ -1664,7 +1649,7 @@ static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s

 /* This is almost identical to the previous, end exists only because
 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
    __asm__ volatile(
        "mov                  %0, %%"REG_a"         \n\t"
@ -1682,7 +1667,7 @@ static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width,
    );
 }

-static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
    __asm__ volatile(
        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@ -1708,7 +1693,7 @@ static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
    assert(src1 == src2);
 }

-static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
    __asm__ volatile(
        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@ -1734,7 +1719,7 @@ static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
 }

 static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
-                                    const uint8_t *src, long width)
+                                    const uint8_t *src, int width)
 {
    __asm__ volatile(
        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
@ -1761,20 +1746,20 @@ static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,

 static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
                                    const uint8_t *src1, const uint8_t *src2,
-                                    long width, uint32_t *unused)
+                                    int width, uint32_t *unused)
 {
    RENAME(nvXXtoUV)(dstU, dstV, src1, width);
 }

 static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
                                    const uint8_t *src1, const uint8_t *src2,
-                                    long width, uint32_t *unused)
+                                    int width, uint32_t *unused)
 {
    RENAME(nvXXtoUV)(dstV, dstU, src1, width);
 }
 #endif /* !COMPILE_TEMPLATE_MMX2 */

-static inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
+static inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, int width, enum PixelFormat srcFormat)
 {

    if(srcFormat == PIX_FMT_BGR24) {
@ -1826,7 +1811,7 @@ static inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, long w
    );
 }

-static inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
+static inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, const uint8_t *src, int width, enum PixelFormat srcFormat)
 {
    __asm__ volatile(
        "movq                    24(%4), %%mm6       \n\t"
@ -1882,23 +1867,23 @@ static inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, const uin
    );
 }

-static inline void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
 }

-static inline void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
    assert(src1 == src2);
 }

-static inline void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
 {
    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
 }

-static inline void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
+static inline void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused)
 {
    assert(src1==src2);
    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
@ -1907,7 +1892,7 @@ static inline void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t
 #if !COMPILE_TEMPLATE_MMX2
 // bilinear / bicubic scaling
 static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
-                                  const int16_t *filter, const int16_t *filterPos, long filterSize)
+                                  const int16_t *filter, const int16_t *filterPos, int filterSize)
 {
    assert(filterSize % 4 == 0 && filterSize>0);
    if (filterSize==4) { // Always true for upscaling, sometimes for down, too.
@ -2220,7 +2205,7 @@ static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src,

 #if COMPILE_TEMPLATE_MMX2
 static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
-                                        long dstWidth, const uint8_t *src, int srcW,
+                                        int dstWidth, const uint8_t *src, int srcW,
                                        int xInc)
 {
    int32_t *filterPos = c->hLumFilterPos;
@ -2292,7 +2277,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
 }

 static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
-                                        long dstWidth, const uint8_t *src1,
+                                        int dstWidth, const uint8_t *src1,
                                        const uint8_t *src2, int srcW, int xInc)
 {
    int32_t *filterPos = c->hChrFilterPos;