Merge remote-tracking branch 'qatar/master'

* qatar/master: (21 commits) swscale: Add Doxygen for hyscale_fast/hScale. fate: enable lavfi-pixmt tests on big endian systems PPC: swscale: disable altivec functions for unsupported formats fate: merge identical pixdesc_be/le tests swscale: Add Doxygen for yuv2planar*/yuv2packed* functions. build: call texi2pod.pl with full path instead of symlink build: include sub-makefiles using full path instead of symlinks swscale: update big endian reference values after dff5a835. wavpack: skip blocks with no samples cosmetics: remove outdated comment that is no longer true build: replace some addprefix/addsuffix with substitution refs avutil: Remove unused arbitrary precision integer code. configure: Drop check for availability of ten assembler operands. aacenc: Save channel configuration for later use. aacenc: Fix codebook trellising for zeroed bands. swscale: change prototypes of scaled YUV output functions. swscale: re-add support for non-native endianness. swscale: disentangle yuv2rgbX_c_full() into small functions. swscale: split yuv2packed[12X]_c() remainders into small functions. swscale: split yuv2packedX_altivec in smaller functions. ... Conflicts: Makefile configure libavcodec/x86/dsputil_mmx.c libavfilter/Makefile libavformat/Makefile libavutil/integer.c libavutil/integer.h libswscale/swscale.c libswscale/swscale_internal.h libswscale/x86/swscale_template.c tests/ref/lavfi/pixdesc_le tests/ref/lavfi/pixfmts_scale Merged-by: Michael Niedermayer <michaelni@gmx.at>
2025-03-28 12:32:17 +02:00 · 2011-06-29 04:08:31 +02:00 · 2011-06-29 04:08:31 +02:00 · bb9d5171a7
commit bb9d5171a7
parent dbe5f0172b 4578435f35
39 changed files with 1074 additions and 824 deletions
--- a/8
+++ b/8
@ -40,7 +40,7 @@ DATA_FILES := $(wildcard $(SRC_PATH)/ffpresets/*.ffpreset)
 SKIPHEADERS = cmdutils_common_opts.h

 MAIN_MAKEFILE=1
-include common.mak
+include $(SRC_PATH)/common.mak

 FF_LDFLAGS   := $(FFLDFLAGS)
 FF_EXTRALIBS := $(FFEXTRALIBS)
@ -70,7 +70,7 @@ endef
 define DOSUBDIR
 $(foreach V,$(SUBDIR_VARS),$(eval $(call RESET,$(V))))
 SUBDIR := $(1)/
-include $(1)/Makefile
+include $(SRC_PATH)/$(1)/Makefile
 endef

 $(foreach D,$(FFLIBS),$(eval $(call DOSUBDIR,lib$(D))))
@ -147,8 +147,8 @@ config:

 check: test

-include doc/Makefile
-include tests/Makefile
+include $(SRC_PATH)/doc/Makefile
+include $(SRC_PATH)/tests/Makefile

 .PHONY: all alltools *clean check config examples install*
 .PHONY: testprogs uninstall*
--- a/common.mak
+++ b/common.mak
@ -74,21 +74,21 @@ OBJS      += $(OBJS-yes)
 FFLIBS    := $(FFLIBS-yes) $(FFLIBS)
 TESTPROGS += $(TESTPROGS-yes)

-FFEXTRALIBS := $(addprefix -l,$(addsuffix $(BUILDSUF),$(FFLIBS))) $(EXTRALIBS)
-FFLDFLAGS   := $(addprefix -Llib,$(ALLFFLIBS)) $(LDFLAGS)
+FFEXTRALIBS := $(FFLIBS:%=-l%$(BUILDSUF)) $(EXTRALIBS)
+FFLDFLAGS   := $(ALLFFLIBS:%=-Llib%) $(LDFLAGS)

-EXAMPLES  := $(addprefix $(SUBDIR),$(addsuffix -example$(EXESUF),$(EXAMPLES)))
-OBJS      := $(addprefix $(SUBDIR),$(sort $(OBJS)))
-TESTOBJS  := $(addprefix $(SUBDIR),$(TESTOBJS) $(TESTPROGS:%=%-test.o))
-TESTPROGS := $(addprefix $(SUBDIR),$(addsuffix -test$(EXESUF),$(TESTPROGS)))
-HOSTOBJS  := $(addprefix $(SUBDIR),$(addsuffix .o,$(HOSTPROGS)))
-HOSTPROGS := $(addprefix $(SUBDIR),$(addsuffix $(HOSTEXESUF),$(HOSTPROGS)))
+EXAMPLES  := $(EXAMPLES:%=$(SUBDIR)%-example$(EXESUF))
+OBJS      := $(sort $(OBJS:%=$(SUBDIR)%))
+TESTOBJS  := $(TESTOBJS:%=$(SUBDIR)%) $(TESTPROGS:%=$(SUBDIR)%-test.o)
+TESTPROGS := $(TESTPROGS:%=$(SUBDIR)%-test$(EXESUF))
+HOSTOBJS  := $(HOSTPROGS:%=$(SUBDIR)%.o)
+HOSTPROGS := $(HOSTPROGS:%=$(SUBDIR)%$(HOSTEXESUF))

 DEP_LIBS := $(foreach NAME,$(FFLIBS),lib$(NAME)/$($(CONFIG_SHARED:yes=S)LIBNAME))

 ALLHEADERS := $(subst $(SRC_DIR)/,$(SUBDIR),$(wildcard $(SRC_DIR)/*.h $(SRC_DIR)/$(ARCH)/*.h))
-SKIPHEADERS += $(addprefix $(ARCH)/,$(ARCH_HEADERS))
-SKIPHEADERS := $(addprefix $(SUBDIR),$(SKIPHEADERS-) $(SKIPHEADERS))
+SKIPHEADERS += $(ARCH_HEADERS:%=$(ARCH)/%) $(SKIPHEADERS-)
+SKIPHEADERS := $(SKIPHEADERS:%=$(SUBDIR)%)
 checkheaders: $(filter-out $(SKIPHEADERS:.h=.ho),$(ALLHEADERS:.h=.ho))

 $(HOSTOBJS): %.o: %.c
--- a/57
+++ b/57
@ -1125,7 +1125,6 @@ HAVE_LIST="
    sys_select_h
    sys_soundcard_h
    sys_videoio_h
-    ten_operands
    termios_h
    threads
    trunc
@ -1542,11 +1541,6 @@ test_deps(){
    done
 }

-set_ne_test_deps(){
-    eval ${1}_be_test_deps="bigendian"
-    eval ${1}_le_test_deps="!bigendian"
-}
-
 test_deps _encoder _decoder                                             \
    adpcm_g726=g726                                                     \
    adpcm_ima_qt                                                        \
@ -1623,15 +1617,6 @@ test_deps _muxer _demuxer                                               \
 ac3_fixed_test_deps="ac3_fixed_encoder ac3_decoder rm_muxer rm_demuxer"
 mpg_test_deps="mpeg1system_muxer mpegps_demuxer"

-set_ne_test_deps pixdesc
-set_ne_test_deps pixfmts_copy
-set_ne_test_deps pixfmts_crop
-set_ne_test_deps pixfmts_hflip
-set_ne_test_deps pixfmts_null
-set_ne_test_deps pixfmts_pad
-set_ne_test_deps pixfmts_scale
-set_ne_test_deps pixfmts_vflip
-
 # default parameters

 logfile="config.log"
@ -2740,18 +2725,6 @@ EOF
    # check whether xmm clobbers are supported
    check_asm xmm_clobbers '"":::"%xmm0"'

-    # check whether more than 10 operands are supported
-    check_cc <<EOF && enable ten_operands
-int main(void) {
-    int x=0;
-    __asm__ volatile(
-        ""
-        :"+&rm"(x), "+&rm"(x), "+&rm"(x), "+&rm"(x), "+&rm"(x), "+&rm"(x)
-    );
-    return 0;
-}
-EOF
-
    # check whether binutils is new enough to compile SSSE3/MMX2
    enabled ssse3 && check_asm ssse3 '"pabsw %xmm0, %xmm0"'
    enabled mmx2  && check_asm mmx2  '"pmaxub %mm0, %mm1"'
@ -3197,7 +3170,6 @@ if enabled x86; then
    echo "CMOV is fast              ${fast_cmov-no}"
    echo "EBX available             ${ebx_available-no}"
    echo "EBP available             ${ebp_available-no}"
-    echo "10 operands supported     ${ten_operands-no}"
 fi
 if enabled arm; then
    echo "ARMv5TE enabled           ${armv5te-no}"
@ -3303,34 +3275,9 @@ if enabled source_path_used; then
        tests/fate
        tools
    "
-    FILES="
-        Makefile
-        common.mak
-        subdir.mak
-        doc/Makefile
-        doc/texi2pod.pl
-        libavcodec/Makefile
-        libavcodec/${arch}/Makefile
-        libavdevice/Makefile
-        libavfilter/Makefile
-        libavfilter/${arch}/Makefile
-        libavformat/Makefile
-        libavutil/Makefile
-        libpostproc/Makefile
-        libswscale/Makefile
-        tests/Makefile
-        tests/fate.mak
-        tests/fate2.mak
-        tests/fate/aac.mak
-        tests/fate/als.mak
-        tests/fate/fft.mak
-        tests/fate/h264.mak
-        tests/fate/mp3.mak
-        tests/fate/vorbis.mak
-        tests/fate/vp8.mak
-    "
+
    map 'mkdir -p $v' $DIRS;
-    map 'test -f "$source_path/$v" && $ln_s "$source_path/$v" $v' $FILES
+    $ln_s "$source_path/Makefile" .
 fi

 enabled stripping || strip="echo skipping strip"
--- a/doc/Makefile
+++ b/doc/Makefile
@ -18,7 +18,7 @@ doc/%.html: doc/%.texi $(SRC_PATH)/doc/t2h.init
 doc/%.pod: TAG = POD
 doc/%.pod: doc/%.texi
 	$(Q)$(TEXIDEP)
-	$(M)doc/texi2pod.pl $< $@
+	$(M)$(SRC_PATH)/doc/texi2pod.pl $< $@

 doc/%.1: TAG = MAN
 doc/%.1: doc/%.pod
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@ -666,7 +666,7 @@ OBJS-$(CONFIG_MLIB)                    += mlib/dsputil_mlib.o           \
 # well.
 OBJS-$(!CONFIG_SMALL)                  += inverse.o

-include $(SUBDIR)$(ARCH)/Makefile
+-include $(SRC_PATH)/$(SUBDIR)$(ARCH)/Makefile

 SKIPHEADERS                            += %_tablegen.h                  \
                                          %_tables.h                    \
@ -693,7 +693,7 @@ DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86

 CLEANFILES = *_tables.c *_tables.h *_tablegen$(HOSTEXESUF)

-include $(SUBDIR)../subdir.mak
+include $(SRC_PATH)/subdir.mak

 $(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o

--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@ -433,10 +433,26 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
    for (swb = 0; swb < max_sfb; swb++) {
        size = sce->ics.swb_sizes[swb];
        if (sce->zeroes[win*16 + swb]) {
-            for (cb = 0; cb < 12; cb++) {
-                path[swb+1][cb].prev_idx = cb;
-                path[swb+1][cb].cost     = path[swb][cb].cost;
-                path[swb+1][cb].run      = path[swb][cb].run + 1;
+            float cost_stay_here = path[swb][0].cost;
+            float cost_get_here  = next_minrd + run_bits + 4;
+            if (   run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
+                != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
+                cost_stay_here += run_bits;
+            if (cost_get_here < cost_stay_here) {
+                path[swb+1][0].prev_idx = next_mincb;
+                path[swb+1][0].cost     = cost_get_here;
+                path[swb+1][0].run      = 1;
+            } else {
+                path[swb+1][0].prev_idx = 0;
+                path[swb+1][0].cost     = cost_stay_here;
+                path[swb+1][0].run      = path[swb][0].run + 1;
+            }
+            next_minrd = path[swb+1][0].cost;
+            next_mincb = 0;
+            for (cb = 1; cb < 12; cb++) {
+                path[swb+1][cb].cost = 61450;
+                path[swb+1][cb].prev_idx = -1;
+                path[swb+1][cb].run = 0;
            }
        } else {
            float minrd = next_minrd;
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@ -208,8 +208,9 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
    ff_init_ff_sine_windows(10);
    ff_init_ff_sine_windows(7);

+    s->chan_map           = aac_chan_configs[avctx->channels-1];
    s->samples            = av_malloc(2 * 1024 * avctx->channels * sizeof(s->samples[0]));
-    s->cpe                = av_mallocz(sizeof(ChannelElement) * aac_chan_configs[avctx->channels-1][0]);
+    s->cpe                = av_mallocz(sizeof(ChannelElement) * s->chan_map[0]);
    avctx->extradata      = av_mallocz(5 + FF_INPUT_BUFFER_PADDING_SIZE);
    avctx->extradata_size = 5;
    put_audio_specific_config(avctx);
@ -500,7 +501,6 @@ static int aac_encode_frame(AVCodecContext *avctx,
    int16_t *samples = s->samples, *samples2, *la;
    ChannelElement *cpe;
    int i, ch, w, g, chans, tag, start_ch;
-    const uint8_t *chan_map = aac_chan_configs[avctx->channels-1];
    int chan_el_counter[4];
    FFPsyWindowInfo windows[AAC_MAX_CHANNELS];

@ -521,8 +521,8 @@ static int aac_encode_frame(AVCodecContext *avctx,
        } else {
            start_ch = 0;
            samples2 = s->samples + 1024 * avctx->channels;
-            for (i = 0; i < chan_map[0]; i++) {
-                tag = chan_map[i+1];
+            for (i = 0; i < s->chan_map[0]; i++) {
+                tag = s->chan_map[i+1];
                chans = tag == TYPE_CPE ? 2 : 1;
                ff_psy_preprocess(s->psypp,
                                  (uint16_t*)data + channel_maps[avctx->channels-1][start_ch],
@ -538,9 +538,9 @@ static int aac_encode_frame(AVCodecContext *avctx,
    }

    start_ch = 0;
-    for (i = 0; i < chan_map[0]; i++) {
+    for (i = 0; i < s->chan_map[0]; i++) {
        FFPsyWindowInfo* wi = windows + start_ch;
-        tag      = chan_map[i+1];
+        tag      = s->chan_map[i+1];
        chans    = tag == TYPE_CPE ? 2 : 1;
        cpe      = &s->cpe[i];
        for (ch = 0; ch < chans; ch++) {
@ -580,9 +580,9 @@ static int aac_encode_frame(AVCodecContext *avctx,
            put_bitstream_info(avctx, s, LIBAVCODEC_IDENT);
        start_ch = 0;
        memset(chan_el_counter, 0, sizeof(chan_el_counter));
-        for (i = 0; i < chan_map[0]; i++) {
+        for (i = 0; i < s->chan_map[0]; i++) {
            FFPsyWindowInfo* wi = windows + start_ch;
-            tag      = chan_map[i+1];
+            tag      = s->chan_map[i+1];
            chans    = tag == TYPE_CPE ? 2 : 1;
            cpe      = &s->cpe[i];
            put_bits(&s->pb, 3, tag);
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@ -61,6 +61,7 @@ typedef struct AACEncContext {
    int16_t *samples;                            ///< saved preprocessed input

    int samplerate_index;                        ///< MPEG-4 samplerate index
+    uint8_t *chan_map;                           ///< channel configuration map

    ChannelElement *cpe;                         ///< channel elements
    FFPsyContext psy;
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@ -306,8 +306,6 @@ static av_cold void exponent_init(AC3EncodeContext *s)

 /**
 * Extract exponents from the MDCT coefficients.
- * This takes into account the normalization that was done to the input samples
- * by adjusting the exponents by the exponent shift values.
 */
 static void extract_exponents(AC3EncodeContext *s)
 {
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@ -26,7 +26,6 @@
 */

 #include "libavutil/avstring.h"
-#include "libavutil/integer.h"
 #include "libavutil/crc.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/audioconvert.h"
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@ -580,7 +580,7 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
        dst[i+0] += src[i+0];
 }

-#if HAVE_7REGS && HAVE_TEN_OPERANDS
+#if HAVE_7REGS
 static void add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top) {
    x86_reg w2 = -w;
    x86_reg x;
@ -2577,7 +2577,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)

            c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
 #endif
-#if HAVE_7REGS && HAVE_TEN_OPERANDS
+#if HAVE_7REGS
            if( mm_flags&AV_CPU_FLAG_3DNOW )
                c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
 #endif
--- a/libavcodec/x86/mlpdsp.c
+++ b/libavcodec/x86/mlpdsp.c
@ -23,7 +23,7 @@
 #include "libavcodec/dsputil.h"
 #include "libavcodec/mlp.h"

-#if HAVE_7REGS && HAVE_TEN_OPERANDS
+#if HAVE_7REGS

 extern void ff_mlp_firorder_8;
 extern void ff_mlp_firorder_7;
@ -171,11 +171,11 @@ static void mlp_filter_channel_x86(int32_t *state, const int32_t *coeff,
    );
 }

-#endif /* HAVE_7REGS && HAVE_TEN_OPERANDS */
+#endif /* HAVE_7REGS */

 void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx)
 {
-#if HAVE_7REGS && HAVE_TEN_OPERANDS
+#if HAVE_7REGS
    c->mlp_filter_channel = mlp_filter_channel_x86;
 #endif
 }
--- a/libavdevice/Makefile
+++ b/libavdevice/Makefile
@ -36,4 +36,4 @@ OBJS-$(CONFIG_LIBDC1394_INDEV)           += libdc1394.o
 SKIPHEADERS-$(HAVE_ALSA_ASOUNDLIB_H)     += alsa-audio.h
 SKIPHEADERS-$(HAVE_SNDIO_H)              += sndio_common.h

-include $(SUBDIR)../subdir.mak
+include $(SRC_PATH)/subdir.mak
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@ -137,8 +137,8 @@ OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/vf_yvu9.o
 OBJS-$(CONFIG_MP_FILTER) += libmpcodecs/pullup.o


-include $(SUBDIR)$(ARCH)/Makefile
+-include $(SRC_PATH)/$(SUBDIR)$(ARCH)/Makefile

 DIRS = x86 libmpcodecs

-include $(SUBDIR)../subdir.mak
+include $(SRC_PATH)/subdir.mak
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@ -342,4 +342,4 @@ OBJS-$(CONFIG_JACK_INDEV)                += timefilter.o

 TESTPROGS = timefilter

-include $(SUBDIR)../subdir.mak
+include $(SRC_PATH)/subdir.mak
--- a/libavformat/wv.c
+++ b/libavformat/wv.c
@ -110,6 +110,9 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen
        size = wc->blksize;
    }
    wc->flags = AV_RL32(wc->extra + 4);
+    // blocks with zero samples don't contain actual audio information and should be ignored
+    if (!AV_RN32(wc->extra))
+        return 0;
    //parse flags
    bpp = ((wc->flags & 3) + 1) << 3;
    chan = 1 + !(wc->flags & WV_MONO);
@ -207,8 +210,14 @@ static int wv_read_header(AVFormatContext *s,
    AVStream *st;

    wc->block_parsed = 0;
-    if(wv_read_block_header(s, pb, 0) < 0)
-        return -1;
+    for(;;){
+        if(wv_read_block_header(s, pb, 0) < 0)
+            return -1;
+        if(!AV_RN32(wc->extra))
+            avio_skip(pb, wc->blksize - 24);
+        else
+            break;
+    }

    /* now we are ready: build format streams */
    st = av_new_stream(s, 0);
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@ -84,6 +84,6 @@ DIRS = arm bfin sh4 x86

 ARCH_HEADERS = bswap.h intmath.h intreadwrite.h timer.h

-include $(SUBDIR)../subdir.mak
+include $(SRC_PATH)/subdir.mak

 $(SUBDIR)lzo-test$(EXESUF): ELIBS = -llzo2
--- a/libavutil/mathematics.c
+++ b/libavutil/mathematics.c
@ -153,32 +153,3 @@ int64_t av_compare_mod(uint64_t a, uint64_t b, uint64_t mod){
        c-= mod;
    return c;
 }
-
-#ifdef TEST
-#include "integer.h"
-#undef printf
-int main(void){
-    int64_t a,b,c,d,e;
-
-    for(a=7; a<(1LL<<62); a+=a/3+1){
-        for(b=3; b<(1LL<<62); b+=b/4+1){
-            for(c=9; c<(1LL<<62); c+=(c*2)/5+3){
-                int64_t r= c/2;
-                AVInteger ai;
-                ai= av_mul_i(av_int2i(a), av_int2i(b));
-                ai= av_add_i(ai, av_int2i(r));
-
-                d= av_i2int(av_div_i(ai, av_int2i(c)));
-
-                e= av_rescale(a,b,c);
-
-                if((double)a * (double)b / (double)c > (1LL<<63))
-                    continue;
-
-                if(d!=e) printf("%"PRId64"*%"PRId64"/%"PRId64"= %"PRId64"=%"PRId64"\n", a, b, c, d, e);
-            }
-        }
-    }
-    return 0;
-}
-#endif
--- a/libpostproc/Makefile
+++ b/libpostproc/Makefile
@ -7,4 +7,4 @@ HEADERS = postprocess.h

 OBJS = postprocess.o

-include $(SUBDIR)../subdir.mak
+include $(SRC_PATH)/subdir.mak
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@ -24,4 +24,4 @@ TESTPROGS = colorspace swscale

 DIRS = bfin mlib ppc sparc x86

-include $(SUBDIR)../subdir.mak
+include $(SRC_PATH)/subdir.mak
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@ -98,10 +98,9 @@ yuv2yuvX_altivec_real(SwsContext *c,
                      int lumFilterSize, const int16_t *chrFilter,
                      const int16_t **chrUSrc, const int16_t **chrVSrc,
                      int chrFilterSize, const int16_t **alpSrc,
-                      uint8_t *dest, uint8_t *uDest,
-                      uint8_t *vDest, uint8_t *aDest,
-                      int dstW, int chrDstW)
+                      uint8_t *dest[4], int dstW, int chrDstW)
 {
+    uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2];
    const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)};
    register int i, j;
    {
@ -150,7 +149,7 @@ yuv2yuvX_altivec_real(SwsContext *c,
                val[i] += lumSrc[j][i] * lumFilter[j];
            }
        }
-        altivec_packIntArrayToCharArray(val, dest, dstW);
+        altivec_packIntArrayToCharArray(val, yDest, dstW);
    }
    if (uDest != 0) {
        DECLARE_ALIGNED(16, int, u)[chrDstW];
@ -408,16 +407,22 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
        return;

    c->hScale       = hScale_altivec_real;
-    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat)) {
+    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
+        dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21 &&
+        !c->alpPixBuf) {
        c->yuv2yuvX     = yuv2yuvX_altivec_real;
    }

    /* The following list of supported dstFormat values should
     * match what's found in the body of ff_yuv2packedX_altivec() */
-    if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf &&
-        (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
-         c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
-         c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB)) {
-            c->yuv2packedX  = ff_yuv2packedX_altivec;
+    if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf) {
+        switch (c->dstFormat) {
+        case PIX_FMT_ABGR:  c->yuv2packedX = ff_yuv2abgr_X_altivec;  break;
+        case PIX_FMT_BGRA:  c->yuv2packedX = ff_yuv2bgra_X_altivec;  break;
+        case PIX_FMT_ARGB:  c->yuv2packedX = ff_yuv2argb_X_altivec;  break;
+        case PIX_FMT_RGBA:  c->yuv2packedX = ff_yuv2rgba_X_altivec;  break;
+        case PIX_FMT_BGR24: c->yuv2packedX = ff_yuv2bgr24_X_altivec; break;
+        case PIX_FMT_RGB24: c->yuv2packedX = ff_yuv2rgb24_X_altivec; break;
        }
+    }
 }
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@ -627,13 +627,13 @@ void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int b
 }


-void
+static av_always_inline void
 ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
                       const int16_t **lumSrc, int lumFilterSize,
                       const int16_t *chrFilter, const int16_t **chrUSrc,
                       const int16_t **chrVSrc, int chrFilterSize,
                       const int16_t **alpSrc, uint8_t *dest,
-                       int dstW, int dstY)
+                       int dstW, int dstY, enum PixelFormat target)
 {
    int i,j;
    vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
@ -707,7 +707,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
        G  = vec_packclp (G0,G1);
        B  = vec_packclp (B0,B1);

-        switch(c->dstFormat) {
+        switch(target) {
        case PIX_FMT_ABGR:  out_abgr  (R,G,B,out); break;
        case PIX_FMT_BGRA:  out_bgra  (R,G,B,out); break;
        case PIX_FMT_RGBA:  out_rgba  (R,G,B,out); break;
@ -786,7 +786,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
        B  = vec_packclp (B0,B1);

        nout = (vector unsigned char *)scratch;
-        switch(c->dstFormat) {
+        switch(target) {
        case PIX_FMT_ABGR:  out_abgr  (R,G,B,nout); break;
        case PIX_FMT_BGRA:  out_bgra  (R,G,B,nout); break;
        case PIX_FMT_RGBA:  out_rgba  (R,G,B,nout); break;
@ -804,3 +804,23 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
    }

 }
+
+#define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \
+void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
+                            const int16_t **lumSrc, int lumFilterSize, \
+                            const int16_t *chrFilter, const int16_t **chrUSrc, \
+                            const int16_t **chrVSrc, int chrFilterSize, \
+                            const int16_t **alpSrc, uint8_t *dest, \
+                            int dstW, int dstY) \
+{ \
+    ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \
+                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                           alpSrc, dest, dstW, dstY, pixfmt); \
+}
+
+YUV2PACKEDX_WRAPPER(abgr,  PIX_FMT_ABGR);
+YUV2PACKEDX_WRAPPER(bgra,  PIX_FMT_BGRA);
+YUV2PACKEDX_WRAPPER(argb,  PIX_FMT_ARGB);
+YUV2PACKEDX_WRAPPER(rgba,  PIX_FMT_RGBA);
+YUV2PACKEDX_WRAPPER(rgb24, PIX_FMT_RGB24);
+YUV2PACKEDX_WRAPPER(bgr24, PIX_FMT_BGR24);
--- a/libswscale/ppc/yuv2rgb_altivec.h
+++ b/libswscale/ppc/yuv2rgb_altivec.h
@ -24,11 +24,19 @@
 #ifndef PPC_YUV2RGB_ALTIVEC_H
 #define PPC_YUV2RGB_ALTIVEC_H 1

-void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
-                            const int16_t **lumSrc, int lumFilterSize,
-                            const int16_t *chrFilter, const int16_t **chrUSrc,
-                            const int16_t **chrVSrc, int chrFilterSize,
-                            const int16_t **alpSrc, uint8_t *dest,
+#define YUV2PACKEDX_HEADER(suffix) \
+void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
+                            const int16_t **lumSrc, int lumFilterSize, \
+                            const int16_t *chrFilter, const int16_t **chrUSrc, \
+                            const int16_t **chrVSrc, int chrFilterSize, \
+                            const int16_t **alpSrc, uint8_t *dest, \
                            int dstW, int dstY);

+YUV2PACKEDX_HEADER(abgr);
+YUV2PACKEDX_HEADER(bgra);
+YUV2PACKEDX_HEADER(argb);
+YUV2PACKEDX_HEADER(rgba);
+YUV2PACKEDX_HEADER(rgb24);
+YUV2PACKEDX_HEADER(bgr24);
+
 #endif /* PPC_YUV2RGB_ALTIVEC_H */
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@ -59,40 +59,129 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
                       int srcStride[], int srcSliceY, int srcSliceH,
                       uint8_t* dst[], int dstStride[]);

+/**
+ * Write one line of horizontally scaled Y/U/V/A to planar output
+ * without any additional vertical scaling (or point-scaling).
+ *
+ * @param c       SWS scaling context
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest    pointer to the 4 output planes (Y/U/V/A)
+ * @param dstW    width of dest[0], dest[3], lumSrc and alpSrc in pixels
+ * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc
+ */
 typedef void (*yuv2planar1_fn) (struct SwsContext *c,
                                const int16_t *lumSrc, const int16_t *chrUSrc,
                                const int16_t *chrVSrc, const int16_t *alpSrc,
-                                uint8_t *dest,
-                                uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
-                                int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither);
-typedef void (*yuv2planarX_fn) (struct SwsContext *c,
-                                const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+                                uint8_t *dest[4], int dstW, int chrDstW,
+                                const uint8_t *lumDither, const uint8_t *chrDither);
+/**
+ * Write one line of horizontally scaled Y/U/V/A to planar output
+ * with multi-point vertical scaling between input pixels.
+ *
+ * @param c             SWS scaling context
+ * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
+ * @param lumSrc        scaled luma (Y) source data, 15bit for 8bit output
+ * @param lumFilterSize number of vertical luma/alpha input lines to scale
+ * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
+ * @param chrUSrc       scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc       scaled chroma (V) source data, 15bit for 8bit output
+ * @param chrFilterSize number of vertical chroma input lines to scale
+ * @param alpSrc        scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest          pointer to the 4 output planes (Y/U/V/A)
+ * @param dstW          width of dest[0], dest[3], lumSrc and alpSrc in pixels
+ * @param chrDstW       width of dest[1], dest[2], chrUSrc and chrVSrc
+ */
+typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter,
+                                const int16_t **lumSrc, int lumFilterSize,
                                const int16_t *chrFilter, const int16_t **chrUSrc,
-                                const int16_t **chrVSrc, int chrFilterSize,
-                                const int16_t **alpSrc,
-                                uint8_t *dest,
-                                uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
+                                const int16_t **chrVSrc,  int chrFilterSize,
+                                const int16_t **alpSrc, uint8_t *dest[4],
                                int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither);
-typedef void (*yuv2packed1_fn) (struct SwsContext *c,
-                                const uint16_t *buf0,
-                                const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                const uint16_t *abuf0,
-                                uint8_t *dest,
-                                int dstW, int uvalpha, int dstFormat, int flags, int y);
-typedef void (*yuv2packed2_fn) (struct SwsContext *c,
-                                const uint16_t *buf0, const uint16_t *buf1,
-                                const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                const uint16_t *abuf0, const uint16_t *abuf1,
-                                uint8_t *dest,
+/**
+ * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
+ * output without any additional vertical scaling (or point-scaling). Note
+ * that this function may do chroma scaling, see the "uvalpha" argument.
+ *
+ * @param c       SWS scaling context
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest    pointer to the output plane
+ * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
+ *                to write into dest[]
+ * @param uvalpha chroma scaling coefficient for the second line of chroma
+ *                pixels, either 2048 or 0. If 0, one chroma input is used
+ *                for 2 output pixels (or if the SWS_FLAG_FULL_CHR_INT flag
+ *                is set, it generates 1 output pixel). If 2048, two chroma
+ *                input pixels should be averaged for 2 output pixels (this
+ *                only happens if SWS_FLAG_FULL_CHR_INT is not set)
+ * @param y       vertical line number for this output. This does not need
+ *                to be used to calculate the offset in the destination,
+ *                but can be used to generate comfort noise using dithering
+ *                for some output formats.
+ */
+typedef void (*yuv2packed1_fn) (struct SwsContext *c,  const int16_t *lumSrc,
+                                const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
+                                const int16_t *alpSrc,  uint8_t *dest,
+                                int dstW, int uvalpha, int y);
+/**
+ * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
+ * output by doing bilinear scaling between two input lines.
+ *
+ * @param c       SWS scaling context
+ * @param lumSrc  scaled luma (Y) source data, 15bit for 8bit output
+ * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output
+ * @param alpSrc  scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest    pointer to the output plane
+ * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
+ *                to write into dest[]
+ * @param yalpha  luma/alpha scaling coefficients for the second input line.
+ *                The first line's coefficients can be calculated by using
+ *                4096 - yalpha
+ * @param uvalpha chroma scaling coefficient for the second input line. The
+ *                first line's coefficients can be calculated by using
+ *                4096 - uvalpha
+ * @param y       vertical line number for this output. This does not need
+ *                to be used to calculate the offset in the destination,
+ *                but can be used to generate comfort noise using dithering
+ *                for some output formats.
+ */
+typedef void (*yuv2packed2_fn) (struct SwsContext *c,  const int16_t *lumSrc[2],
+                                const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
+                                const int16_t *alpSrc[2], uint8_t *dest,
                                int dstW, int yalpha, int uvalpha, int y);
-typedef void (*yuv2packedX_fn) (struct SwsContext *c,
-                                const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
+/**
+ * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
+ * output by doing multi-point vertical scaling between input pixels.
+ *
+ * @param c             SWS scaling context
+ * @param lumFilter     vertical luma/alpha scaling coefficients, 12bit [0,4096]
+ * @param lumSrc        scaled luma (Y) source data, 15bit for 8bit output
+ * @param lumFilterSize number of vertical luma/alpha input lines to scale
+ * @param chrFilter     vertical chroma scaling coefficients, 12bit [0,4096]
+ * @param chrUSrc       scaled chroma (U) source data, 15bit for 8bit output
+ * @param chrVSrc       scaled chroma (V) source data, 15bit for 8bit output
+ * @param chrFilterSize number of vertical chroma input lines to scale
+ * @param alpSrc        scaled alpha (A) source data, 15bit for 8bit output
+ * @param dest          pointer to the output plane
+ * @param dstW          width of lumSrc and alpSrc in pixels, number of pixels
+ *                      to write into dest[]
+ * @param y             vertical line number for this output. This does not need
+ *                      to be used to calculate the offset in the destination,
+ *                      but can be used to generate comfort noise using dithering
+ *                      or some output formats.
+ */
+typedef void (*yuv2packedX_fn) (struct SwsContext *c, const int16_t *lumFilter,
+                                const int16_t **lumSrc, int lumFilterSize,
                                const int16_t *chrFilter, const int16_t **chrUSrc,
                                const int16_t **chrVSrc, int chrFilterSize,
                                const int16_t **alpSrc, uint8_t *dest,
-                                int dstW, int dstY);
+                                int dstW, int y);

 /* This struct should be aligned on at least a 32-byte boundary. */
 typedef struct SwsContext {
@ -304,6 +393,25 @@ typedef struct SwsContext {
    void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
                      const uint8_t *src1, const uint8_t *src2,
                      int width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler.
+    /**
+     * Scale one horizontal line of input data using a bilinear filter
+     * to produce one line of output data. Compared to SwsContext->hScale(),
+     * please take note of the following caveats when using these:
+     * - Scaling is done using only 7bit instead of 14bit coefficients.
+     * - You can use no more than 5 input pixels to produce 4 output
+     *   pixels. Therefore, this filter should not be used for downscaling
+     *   by more than ~20% in width (because that equals more than 5/4th
+     *   downscaling and thus more than 5 pixels input per 4 pixels output).
+     * - In general, bilinear filters create artifacts during downscaling
+     *   (even when <20%), because one output pixel will span more than one
+     *   input pixel, and thus some pixels will need edges of both neighbor
+     *   pixels to interpolate the output pixel. Since you can use at most
+     *   two input pixels per output pixel in bilinear scaling, this is
+     *   impossible and thus downscaling by any size will create artifacts.
+     * To enable this type of scaling, set SWS_FLAG_FAST_BILINEAR
+     * in SwsContext->flags.
+     */
+    /** @{ */
    void (*hyscale_fast)(struct SwsContext *c,
                         int16_t *dst, int dstWidth,
                         const uint8_t *src, int srcW, int xInc);
@ -311,7 +419,33 @@ typedef struct SwsContext {
                         int16_t *dst1, int16_t *dst2, int dstWidth,
                         const uint8_t *src1, const uint8_t *src2,
                         int srcW, int xInc);
+    /** @} */

+    /**
+     * Scale one horizontal line of input data using a filter over the input
+     * lines, to produce one (differently sized) line of output data.
+     *
+     * @param dst        pointer to destination buffer for horizontally scaled
+     *                   data. If the scaling depth (SwsContext->scalingBpp) is
+     *                   8, data will be 15bpp in 16bits (int16_t) width. If
+     *                   scaling depth is 16, data will be 19bpp in 32bpp
+     *                   (int32_t) width.
+     * @param dstW       width of destination image
+     * @param src        pointer to source data to be scaled. If scaling depth
+     *                   is 8, this is 8bpp in 8bpp (uint8_t) width. If scaling
+     *                   depth is 16, this is 16bpp in 16bpp (uint16_t) depth.
+     * @param filter     filter coefficients to be used per output pixel for
+     *                   scaling. This contains 14bpp filtering coefficients.
+     *                   Guaranteed to contain dstW * filterSize entries.
+     * @param filterPos  position of the first input pixel to be used for
+     *                   each output pixel during scaling. Guaranteed to
+     *                   contain dstW entries.
+     * @param filterSize the number of input coefficients to be used (and
+     *                   thus the number of input pixels to be used) for
+     *                   creating a single output pixel. Is aligned to 4
+     *                   (and input coefficients thus padded with zeroes)
+     *                   to simplify creating SIMD code.
+     */
    void (*hScale)(int16_t *dst, int dstW, const uint8_t *src,
                   const int16_t *filter, const int16_t *filterPos,
                   int filterSize);
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@ -149,12 +149,18 @@ int sws_isSupportedInput(enum PixelFormat pix_fmt)
        || (x)==PIX_FMT_YUVJ444P    \
        || isRGBinBytes(x)          \
        || isBGRinBytes(x)          \
-        || (x)==PIX_FMT_RGB565      \
-        || (x)==PIX_FMT_RGB555      \
-        || (x)==PIX_FMT_RGB444      \
-        || (x)==PIX_FMT_BGR565      \
-        || (x)==PIX_FMT_BGR555      \
-        || (x)==PIX_FMT_BGR444      \
+        || (x)==PIX_FMT_RGB565LE    \
+        || (x)==PIX_FMT_RGB565BE    \
+        || (x)==PIX_FMT_RGB555LE    \
+        || (x)==PIX_FMT_RGB555BE    \
+        || (x)==PIX_FMT_RGB444LE    \
+        || (x)==PIX_FMT_RGB444BE    \
+        || (x)==PIX_FMT_BGR565LE    \
+        || (x)==PIX_FMT_BGR565BE    \
+        || (x)==PIX_FMT_BGR555LE    \
+        || (x)==PIX_FMT_BGR555BE    \
+        || (x)==PIX_FMT_BGR444LE    \
+        || (x)==PIX_FMT_BGR444BE    \
        || (x)==PIX_FMT_RGB8        \
        || (x)==PIX_FMT_BGR8        \
        || (x)==PIX_FMT_RGB4_BYTE   \
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@ -75,11 +75,13 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
                             const int16_t *chrFilter, const int16_t **chrUSrc,
                             const int16_t **chrVSrc,
                             int chrFilterSize, const int16_t **alpSrc,
-                             uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                             uint8_t *aDest, int dstW, int chrDstW,
+                             uint8_t *dest[4], int dstW, int chrDstW,
                             const uint8_t *lumDither, const uint8_t *chrDither)
 {
    int i;
+    uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+            *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
+
    if (uDest) {
        x86_reg uv_off = c->uv_off;
        for(i=0; i<8; i++) c->dither16[i] = chrDither[i]>>4;
@ -92,7 +94,7 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
        YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
    }

-    YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
+    YSCALEYUV2YV12X(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0)
 }

 #define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \
@ -160,11 +162,13 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
                                const int16_t *chrFilter, const int16_t **chrUSrc,
                                const int16_t **chrVSrc,
                                int chrFilterSize, const int16_t **alpSrc,
-                                uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                uint8_t *aDest, int dstW, int chrDstW,
+                                uint8_t *dest[4], int dstW, int chrDstW,
                                const uint8_t *lumDither, const uint8_t *chrDither)
 {
    int i;
+    uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
+            *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
+
    if (uDest) {
        x86_reg uv_off = c->uv_off;
        for(i=0; i<8; i++) c->dither32[i] = chrDither[i]<<12;
@ -177,20 +181,21 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
        YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
    }

-    YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, dest, dstW, 0)
+    YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, yDest, dstW, 0)
 }

 static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
                             const int16_t *chrUSrc, const int16_t *chrVSrc,
                             const int16_t *alpSrc,
-                             uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                             uint8_t *aDest, int dstW, int chrDstW,
+                             uint8_t *dst[4], int dstW, int chrDstW,
                             const uint8_t *lumDither, const uint8_t *chrDither)
 {
    int p= 4;
-    const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
-    uint8_t *dst[4]= { aDest, dest, uDest, vDest };
-    x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
+    const int16_t *src[4]= {
+        lumSrc + dstW,     chrUSrc + chrDstW,
+        chrVSrc + chrDstW, alpSrc + dstW
+    };
+    x86_reg counter[4]= { dstW, chrDstW, chrDstW, dstW };

    while (p--) {
        if (dst[p]) {
@ -217,14 +222,15 @@ static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
 static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
                                const int16_t *chrUSrc, const int16_t *chrVSrc,
                                const int16_t *alpSrc,
-                                uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
-                                uint8_t *aDest, int dstW, int chrDstW,
+                                uint8_t *dst[4], int dstW, int chrDstW,
                                const uint8_t *lumDither, const uint8_t *chrDither)
 {
    int p= 4;
-    const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
-    uint8_t *dst[4]= { aDest, dest, uDest, vDest };
-    x86_reg counter[4]= { dstW, dstW, chrDstW, chrDstW };
+    const int16_t *src[4]= {
+        lumSrc + dstW,     chrUSrc + chrDstW,
+        chrVSrc + chrDstW, alpSrc + dstW
+    };
+    x86_reg counter[4]= { dstW, chrDstW, chrDstW, dstW };

    while (p--) {
        if (dst[p]) {
@ -981,14 +987,16 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
 /**
 * vertical bilinear scale YV12 to RGB
 */
-static void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
-                                const uint16_t *buf1, const uint16_t *ubuf0,
-                                const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                const uint16_t *vbuf1, const uint16_t *abuf0,
-                                const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
+                                const int16_t *ubuf[2], const int16_t *vbuf[2],
+                                const int16_t *abuf[2], uint8_t *dest,
                                int dstW, int yalpha, int uvalpha, int y)
 {
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
+                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+        const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
 #if ARCH_X86_64
        __asm__ volatile(
            YSCALEYUV2RGB(%%r8, %5)
@ -1043,13 +1051,14 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
    }
 }

-static void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
-                                const uint16_t *buf1, const uint16_t *ubuf0,
-                                const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                const uint16_t *vbuf1, const uint16_t *abuf0,
-                                const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
+                                const int16_t *ubuf[2], const int16_t *vbuf[2],
+                                const int16_t *abuf[2], uint8_t *dest,
                                int dstW, int yalpha, int uvalpha, int y)
 {
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
+                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
    __asm__ volatile(
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@ -1065,13 +1074,14 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
    );
 }

-static void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *buf1, const uint16_t *ubuf0,
-                                 const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                 const uint16_t *vbuf1, const uint16_t *abuf0,
-                                 const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
+                                 const int16_t *ubuf[2], const int16_t *vbuf[2],
+                                 const int16_t *abuf[2], uint8_t *dest,
                                 int dstW, int yalpha, int uvalpha, int y)
 {
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
+                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
    __asm__ volatile(
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@ -1093,13 +1103,14 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
    );
 }

-static void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *buf1, const uint16_t *ubuf0,
-                                 const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                 const uint16_t *vbuf1, const uint16_t *abuf0,
-                                 const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
+                                 const int16_t *ubuf[2], const int16_t *vbuf[2],
+                                 const int16_t *abuf[2], uint8_t *dest,
                                 int dstW, int yalpha, int uvalpha, int y)
 {
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
+                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
    __asm__ volatile(
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@ -1161,13 +1172,14 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,

 #define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)

-static void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
-                                  const uint16_t *buf1, const uint16_t *ubuf0,
-                                  const uint16_t *ubuf1, const uint16_t *vbuf0,
-                                  const uint16_t *vbuf1, const uint16_t *abuf0,
-                                  const uint16_t *abuf1, uint8_t *dest,
+static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
+                                  const int16_t *ubuf[2], const int16_t *vbuf[2],
+                                  const int16_t *abuf[2], uint8_t *dest,
                                  int dstW, int yalpha, int uvalpha, int y)
 {
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
+                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+
    //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
    __asm__ volatile(
        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
@ -1300,14 +1312,13 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
 /**
 * YV12 to RGB without scaling or interpolating
 */
-static void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
-                                const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                const uint16_t *abuf0, uint8_t *dest,
-                                int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                int flags, int y)
+static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
+                                const int16_t *ubuf[2], const int16_t *bguf[2],
+                                const int16_t *abuf0, uint8_t *dest,
+                                int dstW, int uvalpha, int y)
 {
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1

    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
        if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
@ -1368,14 +1379,13 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
    }
 }

-static void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
-                                const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                const uint16_t *abuf0, uint8_t *dest,
-                                int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                int flags, int y)
+static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
+                                const int16_t *ubuf[2], const int16_t *bguf[2],
+                                const int16_t *abuf0, uint8_t *dest,
+                                int dstW, int uvalpha, int y)
 {
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1

    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
        __asm__ volatile(
@ -1406,14 +1416,13 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
    }
 }

-static void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                 const uint16_t *abuf0, uint8_t *dest,
-                                 int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                 int flags, int y)
+static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
+                                 const int16_t *ubuf[2], const int16_t *bguf[2],
+                                 const int16_t *abuf0, uint8_t *dest,
+                                 int dstW, int uvalpha, int y)
 {
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1

    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
        __asm__ volatile(
@ -1456,14 +1465,13 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
    }
 }

-static void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
-                                 const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                 const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                 const uint16_t *abuf0, uint8_t *dest,
-                                 int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                 int flags, int y)
+static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
+                                 const int16_t *ubuf[2], const int16_t *bguf[2],
+                                 const int16_t *abuf0, uint8_t *dest,
+                                 int dstW, int uvalpha, int y)
 {
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1

    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
        __asm__ volatile(
@ -1543,14 +1551,13 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
    "psraw                $7, %%mm7     \n\t"
 #define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)

-static void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
-                                  const uint16_t *ubuf0, const uint16_t *ubuf1,
-                                  const uint16_t *vbuf0, const uint16_t *vbuf1,
-                                  const uint16_t *abuf0, uint8_t *dest,
-                                  int dstW, int uvalpha, enum PixelFormat dstFormat,
-                                  int flags, int y)
+static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
+                                  const int16_t *ubuf[2], const int16_t *bguf[2],
+                                  const int16_t *abuf0, uint8_t *dest,
+                                  int dstW, int uvalpha, int y)
 {
-    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
+    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
+    const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1

    if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
        __asm__ volatile(
--- a/subdir.mak
+++ b/subdir.mak
@ -1,6 +1,6 @@
 SRC_DIR := $(SRC_PATH)/lib$(NAME)

-include $(SUBDIR)../common.mak
+include $(SRC_PATH)/common.mak

 LIBVERSION := $(lib$(NAME)_VERSION)
 LIBMAJOR   := $(lib$(NAME)_VERSION_MAJOR)
--- a/tests/Makefile
+++ b/tests/Makefile
@ -112,7 +112,7 @@ clean:: testclean

 testclean:
 	$(RM) -r tests/vsynth1 tests/vsynth2 tests/data
-	$(RM) $(addprefix tests/,$(CLEANSUFFIXES))
+	$(RM) $(CLEANSUFFIXES:%=tests/%)
 	$(RM) tests/seek_test$(EXESUF) tests/seek_test.o
 	$(RM) $(TESTTOOLS:%=tests/%$(HOSTEXESUF))

--- a/tests/lavfi-regression.sh
+++ b/tests/lavfi-regression.sh
@ -69,7 +69,7 @@ do_lavfi_pixfmts "pad"     "500:400:20:20"
 do_lavfi_pixfmts "scale"   "200:100"
 do_lavfi_pixfmts "vflip"   ""

-if [ -n "$do_pixdesc_be" ] || [ -n "$do_pixdesc_le" ]; then
+if [ -n "$do_pixdesc" ]; then
    pix_fmts="$($ffmpeg -pix_fmts list 2>/dev/null | sed -ne '9,$p' | grep '^IO' | cut -d' ' -f2 | sort)"
    for pix_fmt in $pix_fmts; do
        do_video_filter $pix_fmt "slicify=random,format=$pix_fmt,pixdesctest" -pix_fmt $pix_fmt
--- a/tests/ref/lavfi/pixdesc_le
+++ b/tests/ref/lavfi/pixdesc_le
@ -4,7 +4,9 @@ bgr24               0d0cb38ab3fa0b2ec0865c14f78b217b
 bgr48be             4ba0ff7fc9e011ea264610ad1585bb1f
 bgr48le             d022bfdd6a07d5dcc693799322a386b4
 bgr4_byte           50d23cc82d9dcef2fd12adb81fb9b806
+bgr555be            49f01b1f1f0c84fd9e776dd34cc3c280
 bgr555le            378d6ac4223651a1adcbf94a3d0d807b
+bgr565be            257cf78afa35dc31e9696f139c916715
 bgr565le            1dfdd03995c287e3c754b164bf26a355
 bgr8                24bd566170343d06fec6fccfff5abc54
 bgra                76a18a5151242fa137133f604cd624d2
@ -19,7 +21,9 @@ rgb24               b41eba9651e1b5fe386289b506188105
 rgb48be             460b6de89b156290a12d3941db8bd731
 rgb48le             cd93cb34d15996987367dabda3a10128
 rgb4_byte           c93ba89b74c504e7f5ae9d9ab1546c73
+rgb555be            912a62c5e53bfcbac2a0340e10973cf2
 rgb555le            a937a0fc764fb57dc1b3af87cba0273c
+rgb565be            9cadf742e05ddc23a3b5b270f89aad3c
 rgb565le            d39aa298bb525e9be8860351c6f62dab
 rgb8                4a9d8e4f2f154e83a7e1735be6300700
 rgba                93a5b3712e6eb8c5b9a09ffc7b9fbc12
--- a/tests/ref/lavfi/pixdesc_be
+++ b/tests/ref/lavfi/pixdesc_be
@ -1,49 +0,0 @@
-abgr                037bf9df6a765520ad6d490066bf4b89
-argb                c442a8261c2265a07212ef0f72e35f5a
-bgr24               0d0cb38ab3fa0b2ec0865c14f78b217b
-bgr48be             4ba0ff7fc9e011ea264610ad1585bb1f
-bgr48le             d022bfdd6a07d5dcc693799322a386b4
-bgr4_byte           50d23cc82d9dcef2fd12adb81fb9b806
-bgr555be            49f01b1f1f0c84fd9e776dd34cc3c280
-bgr565be            257cf78afa35dc31e9696f139c916715
-bgr8                24bd566170343d06fec6fccfff5abc54
-bgra                76a18a5151242fa137133f604cd624d2
-gray                db08f7f0751900347e6b8649e4164d21
-gray16be            7becf34ae825a3df3969bf4c6bfeb5e2
-gray16le            10bd87059b5c189f3caef2837f4f2b5c
-monob               668ebe8b8103b9046b251b2fa8a1d88f
-monow               9251497f3b0634f1165d12d5a289d943
-nv12                e0af357888584d36eec5aa0f673793ef
-nv21                9a3297f3b34baa038b1f37cb202b512f
-rgb24               b41eba9651e1b5fe386289b506188105
-rgb48be             460b6de89b156290a12d3941db8bd731
-rgb48le             cd93cb34d15996987367dabda3a10128
-rgb4_byte           c93ba89b74c504e7f5ae9d9ab1546c73
-rgb555be            912a62c5e53bfcbac2a0340e10973cf2
-rgb565be            9cadf742e05ddc23a3b5b270f89aad3c
-rgb8                4a9d8e4f2f154e83a7e1735be6300700
-rgba                93a5b3712e6eb8c5b9a09ffc7b9fbc12
-uyvy422             adcf64516a19fce44df77082bdb16291
-yuv410p             2d9225153c83ee1132397d619d94d1b3
-yuv411p             8b298af3e43348ca1b11eb8a3252ac6c
-yuv420p             eba2f135a08829387e2f698ff72a2939
-yuv420p10be         7605e266c088d0fcf68c7b27c3ceff5f
-yuv420p10le         4228ee628c6deec123a13b9784516cc7
-yuv420p16be         16c009a235cd52b74791a895423152a3
-yuv420p16le         2d59c4f1d0314a5a957a7cfc4b6fabcc
-yuv420p9be          ce880fa07830e5297c22acf6e20555ce
-yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
-yuv422p             c9bba4529821d796a6ab09f6a5fd355a
-yuv422p10be         bdc13b630fd668b34c6fe1aae28dfc71
-yuv422p16be         5499502e1c29534a158a1fe60e889f60
-yuv422p16le         e3d61fde6978591596bc36b914386623
-yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
-yuv444p             0a98447b78fd476aa39686da6a74fa2e
-yuv444p16be         ea602a24b8e6969679265078bd8607b6
-yuv444p16le         1262a0dc57ee147967fc896d04206313
-yuva420p            a29884f3f3dfe1e00b961bc17bef3d47
-yuvj420p            32eec78ba51857b16ce9b813a49b7189
-yuvj422p            0dfa0ed434f73be51428758c69e082cb
-yuvj440p            657501a28004e27a592757a7509f5189
-yuvj444p            98d3d054f2ec09a75eeed5d328dc75b7
-yuyv422             f2569f2b5069a0ee0cecae33de0455e3
--- a/tests/ref/lavfi/pixfmts_copy_le
+++ b/tests/ref/lavfi/pixfmts_copy_le
@ -4,7 +4,9 @@ bgr24               0d0cb38ab3fa0b2ec0865c14f78b217b
 bgr48be             4ba0ff7fc9e011ea264610ad1585bb1f
 bgr48le             d022bfdd6a07d5dcc693799322a386b4
 bgr4_byte           50d23cc82d9dcef2fd12adb81fb9b806
+bgr555be            49f01b1f1f0c84fd9e776dd34cc3c280
 bgr555le            378d6ac4223651a1adcbf94a3d0d807b
+bgr565be            257cf78afa35dc31e9696f139c916715
 bgr565le            1dfdd03995c287e3c754b164bf26a355
 bgr8                24bd566170343d06fec6fccfff5abc54
 bgra                76a18a5151242fa137133f604cd624d2
@ -19,7 +21,9 @@ rgb24               b41eba9651e1b5fe386289b506188105
 rgb48be             460b6de89b156290a12d3941db8bd731
 rgb48le             cd93cb34d15996987367dabda3a10128
 rgb4_byte           c93ba89b74c504e7f5ae9d9ab1546c73
+rgb555be            912a62c5e53bfcbac2a0340e10973cf2
 rgb555le            a937a0fc764fb57dc1b3af87cba0273c
+rgb565be            9cadf742e05ddc23a3b5b270f89aad3c
 rgb565le            d39aa298bb525e9be8860351c6f62dab
 rgb8                4a9d8e4f2f154e83a7e1735be6300700
 rgba                93a5b3712e6eb8c5b9a09ffc7b9fbc12
--- a/tests/ref/lavfi/pixfmts_crop_le
+++ b/tests/ref/lavfi/pixfmts_crop_le
@ -4,7 +4,9 @@ bgr24               3450fd00cf1493d1ded75544d82ba3ec
 bgr48be             90cb5d373a1123432d63c6a10c101afa
 bgr48le             9371f54ceda9010f1199e86f4930ac3f
 bgr4_byte           2f6ac3cdd4676ab4e2982bdf0664945b
+bgr555be            d3a7c273604723adeb7e5f5dd1c4272b
 bgr555le            d22442fc13b464f9ba455b08df4e981f
+bgr565be            fadceef4a64ad6873fcb43ddee0deb3c
 bgr565le            891664e5a54ae5968901347da92bc5e9
 bgr8                4b7159e05765bd4703180072d86423c8
 bgra                395c9f706fccda721471acaa5c96c16c
@ -15,7 +17,9 @@ rgb24               3b90ed64b687d3dc186c6ef521dc71a8
 rgb48be             a808128041a1962deaa8620c7448feba
 rgb48le             ce92d02cc322608d5be377cb1940677b
 rgb4_byte           6958029f73c6cdfed4f71020d816f027
+rgb555be            41a7d1836837bc90f2cae19a9c9df3b3
 rgb555le            eeb78f8ce6186fba55c941469e60ba67
+rgb565be            b2d1cb525f3a0cfe27753c0d479b2fa9
 rgb565le            6a49700680be9a0d434411825a769556
 rgb8                88b0398c265d1ed7a837dc084fa0917c
 rgba                fd00b24c7597268c32759a84a1de2de4
--- a/tests/ref/lavfi/pixfmts_hflip_le
+++ b/tests/ref/lavfi/pixfmts_hflip_le
@ -4,7 +4,9 @@ bgr24               cc53d2011d097972db0d22756c3699e3
 bgr48be             11641cf0f4516a9aed98f7872720f801
 bgr48le             b5440734eed128554dd9f83b34ba582f
 bgr4_byte           aac987e7d1a6a96477cfc0b48a4285de
+bgr555be            bc07265898440116772200390d70c092
 bgr555le            ccee08679bac84a1f960c6c9070c5538
+bgr565be            e088789ce46224b87c6e46610ef19add
 bgr565le            3703466e19e1b52e03a34fd244a8e8e4
 bgr8                50b505a889f0428242305acb642da107
 bgra                01ca21e7e6a8d1281b4553bde8e8a404
@ -15,7 +17,9 @@ rgb24               754f1722fc738590cc407ac65749bfe8
 rgb48be             10743e1577dc3198dbbc7c0b3b8f429e
 rgb48le             dd945a44f39119221407bf7a04f1bc49
 rgb4_byte           c8a3f995fcf3e0919239ea2c413ddc29
+rgb555be            045ce8607d3910586f4d97481dda8632
 rgb555le            8778ee0cf58ce9ad1d99a1eca9f95e87
+rgb565be            c8022a1b2470e72f124e4389fad4c372
 rgb565le            2cb690eb3fcb72da3771ad6a48931158
 rgb8                9e462b811b9b6173397b9cfc1f6b2f17
 rgba                d3d0dc1ecef3ed72f26a2986d0efc204
--- a/tests/ref/lavfi/pixfmts_null_le
+++ b/tests/ref/lavfi/pixfmts_null_le
@ -4,7 +4,9 @@ bgr24               0d0cb38ab3fa0b2ec0865c14f78b217b
 bgr48be             4ba0ff7fc9e011ea264610ad1585bb1f
 bgr48le             d022bfdd6a07d5dcc693799322a386b4
 bgr4_byte           50d23cc82d9dcef2fd12adb81fb9b806
+bgr555be            49f01b1f1f0c84fd9e776dd34cc3c280
 bgr555le            378d6ac4223651a1adcbf94a3d0d807b
+bgr565be            257cf78afa35dc31e9696f139c916715
 bgr565le            1dfdd03995c287e3c754b164bf26a355
 bgr8                24bd566170343d06fec6fccfff5abc54
 bgra                76a18a5151242fa137133f604cd624d2
@ -19,7 +21,9 @@ rgb24               b41eba9651e1b5fe386289b506188105
 rgb48be             460b6de89b156290a12d3941db8bd731
 rgb48le             cd93cb34d15996987367dabda3a10128
 rgb4_byte           c93ba89b74c504e7f5ae9d9ab1546c73
+rgb555be            912a62c5e53bfcbac2a0340e10973cf2
 rgb555le            a937a0fc764fb57dc1b3af87cba0273c
+rgb565be            9cadf742e05ddc23a3b5b270f89aad3c
 rgb565le            d39aa298bb525e9be8860351c6f62dab
 rgb8                4a9d8e4f2f154e83a7e1735be6300700
 rgba                93a5b3712e6eb8c5b9a09ffc7b9fbc12
--- a/tests/ref/lavfi/pixfmts_pad_le
+++ b/tests/ref/lavfi/pixfmts_pad_le
--- a/tests/ref/lavfi/pixfmts_scale_le
+++ b/tests/ref/lavfi/pixfmts_scale_le
@ -4,7 +4,9 @@ bgr24               e44192347a45586c6c157e3059610cd1
 bgr48be             6d01b6ccd2ccf18c12985bcb2fde2218
 bgr48le             4caa6914091ad03b8f67c02d6b050bc0
 bgr4_byte           ee1d35a7baf8e9016891929a2f565c0b
+bgr555be            6a2d335856db12e3ea72173d71610e21
 bgr555le            41e3e0961478dc634bf68a7bbd670cc9
+bgr565be            21077a3744c889b97032414b11232933
 bgr565le            614897eaeb422bd9a972f8ee51909be5
 bgr8                7f007fa6c153a16e808a9c51605a4016
 bgra                01cfdda1f72fcabb6c46424e27f8c519
@ -19,7 +21,9 @@ rgb24               13ff53ebeab74dc05492836f1cfbd2c1
 rgb48be             f82e99f13d5ede2a53cf3bf7178ca350
 rgb48le             3a09d89e4b27ea1a98f762e662e306a7
 rgb4_byte           d81ffd3add95842a618eec81024f0b5c
+rgb555be            491dc49ff83258ffe415289bdcfb50b2
 rgb555le            bd698d86c03170c4a16607c0fd1f750f
+rgb565be            35682c17c85f307147041f23ac8092aa
 rgb565le            bfa0c639d80c3c03fd0c9e5f34296a5e
 rgb8                091d0170b354ef0e97312b95feb5483f
 rgba                16873e3ac914e76116629a5ff8940ac4
--- a/tests/ref/lavfi/pixfmts_vflip_le
+++ b/tests/ref/lavfi/pixfmts_vflip_le
@ -4,7 +4,9 @@ bgr24               89108a4ba00201f79b75b9305c42352d
 bgr48be             ed82382da09b64a8e04728fcf76e6814
 bgr48le             0f1f135608c2ff24d26d03e939fc2112
 bgr4_byte           407fcf564ed764c38e1d748f700ab921
+bgr555be            f739d2519f7e9d494359bf67a3821537
 bgr555le            bd7b3ec4d684dfad075d89a606cb8b74
+bgr565be            f19e9a4786395e1ddcd51399c98c9f6c
 bgr565le            fdb617533e1e7ff512ea5b6b6233e738
 bgr8                c60f93fd152c6903391d1fe9decd3547
 bgra                7f9b799fb48544e49ce93e91d7f9fca8
@ -19,7 +21,9 @@ rgb24               eaefabc168d0b14576bab45bc1e56e1e
 rgb48be             4e0c384163ebab06a08e74637beb02bc
 rgb48le             a77bfeefcd96750cf0e1917a2e2bf1e7
 rgb4_byte           8c6ff02df0b06dd2d574836c3741b2a2
+rgb555be            40dc33cfb5cf56aac1c5a290ac486c36
 rgb555le            4f8eaad29a17e0f8e9d8ab743e76b999
+rgb565be            b57623ad9df74648339311a0edcebc7b
 rgb565le            73f247a3315dceaea3022ac7c197c5ef
 rgb8                13a8d89ef78d8127297d899005456ff0
 rgba                1fc6e920a42ec812aaa3b2aa02f37987