From 28c1115a915e4e198bfb6bd39909b2d1327c1454 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 2 Aug 2011 15:42:35 -0700 Subject: [PATCH 1/4] swscale: use 15-bit intermediates for 9/10-bit scaling. --- libswscale/ppc/swscale_altivec.c | 2 +- libswscale/swscale.c | 235 ++++++++++++++++++----------- libswscale/swscale_internal.h | 108 +++++++------ libswscale/utils.c | 20 ++- libswscale/x86/swscale_template.c | 2 +- tests/ref/lavfi/pixdesc | 20 +-- tests/ref/lavfi/pixfmts_copy | 20 +-- tests/ref/lavfi/pixfmts_crop | 16 +- tests/ref/lavfi/pixfmts_hflip | 16 +- tests/ref/lavfi/pixfmts_null | 20 +-- tests/ref/lavfi/pixfmts_scale | 28 ++-- tests/ref/lavfi/pixfmts_vflip | 20 +-- tests/ref/vsynth1/dnxhd_720p_10bit | 8 +- tests/ref/vsynth2/dnxhd_720p_10bit | 8 +- 14 files changed, 301 insertions(+), 222 deletions(-) diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c index 369e93b85a..f988b534ac 100644 --- a/libswscale/ppc/swscale_altivec.c +++ b/libswscale/ppc/swscale_altivec.c @@ -406,7 +406,7 @@ void ff_sws_init_swScale_altivec(SwsContext *c) if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) return; - if (c->scalingBpp == 8) { + if (c->srcBpc == 8 && c->dstBpc <= 10) { c->hScale = hScale_altivec_real; } if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && diff --git a/libswscale/swscale.c b/libswscale/swscale.c index dd9f4a108f..f5b0ab4986 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -211,17 +211,9 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc, #define output_pixel(pos, val) \ if (big_endian) { \ - if (output_bits == 16) { \ - AV_WB16(pos, av_clip_uint16(val >> shift)); \ - } else { \ - AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \ - } \ + AV_WB16(pos, av_clip_uint16(val >> shift)); \ } else { \ - if (output_bits == 16) { \ - AV_WL16(pos, av_clip_uint16(val >> shift)); \ - } else { \ - AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \ - } \ + AV_WL16(pos, av_clip_uint16(val >> shift)); \ } for (i = 0; i < dstW; i++) { int val = 1 << (30-output_bits - 1); @@ -263,7 +255,67 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc, #undef output_pixel } -#define yuv2NBPS(bits, BE_LE, is_be) \ +static av_always_inline void +yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc, + int lumFilterSize, const int16_t *chrFilter, + const int16_t **chrUSrc, const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint16_t *dest[4], int dstW, int chrDstW, + int big_endian, int output_bits) +{ + //FIXME Optimize (just quickly written not optimized..) + int i; + uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2], + *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL; + int shift = 11 + 16 - output_bits - 1; + +#define output_pixel(pos, val) \ + if (big_endian) { \ + AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \ + } else { \ + AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \ + } + for (i = 0; i < dstW; i++) { + int val = 1 << (26-output_bits - 1); + int j; + + for (j = 0; j < lumFilterSize; j++) + val += (lumSrc[j][i] * lumFilter[j]) >> 1; + + output_pixel(&yDest[i], val); + } + + if (uDest) { + for (i = 0; i < chrDstW; i++) { + int u = 1 << (26-output_bits - 1); + int v = 1 << (26-output_bits - 1); + int j; + + for (j = 0; j < chrFilterSize; j++) { + u += (chrUSrc[j][i] * chrFilter[j]) >> 1; + v += (chrVSrc[j][i] * chrFilter[j]) >> 1; + } + + output_pixel(&uDest[i], u); + output_pixel(&vDest[i], v); + } + } + + if (CONFIG_SWSCALE_ALPHA && aDest) { + for (i = 0; i < dstW; i++) { + int val = 1 << (26-output_bits - 1); + int j; + + for (j = 0; j < lumFilterSize; j++) + val += (alpSrc[j][i] * lumFilter[j]) >> 1; + + output_pixel(&aDest[i], val); + } + } +#undef output_pixel +} + +#define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \ static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \ const int16_t **_lumSrc, int lumFilterSize, \ const int16_t *chrFilter, const int16_t **_chrUSrc, \ @@ -271,21 +323,21 @@ static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFil int chrFilterSize, const int16_t **_alpSrc, \ uint8_t *_dest[4], int dstW, int chrDstW) \ { \ - const int32_t **lumSrc = (const int32_t **) _lumSrc, \ - **chrUSrc = (const int32_t **) _chrUSrc, \ - **chrVSrc = (const int32_t **) _chrVSrc, \ - **alpSrc = (const int32_t **) _alpSrc; \ - yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \ - chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ - alpSrc, (uint16_t **) _dest, \ - dstW, chrDstW, is_be, bits); \ + const typeX_t **lumSrc = (const typeX_t **) _lumSrc, \ + **chrUSrc = (const typeX_t **) _chrUSrc, \ + **chrVSrc = (const typeX_t **) _chrVSrc, \ + **alpSrc = (const typeX_t **) _alpSrc; \ + yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \ + chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ + alpSrc, (uint16_t **) _dest, \ + dstW, chrDstW, is_be, bits); \ } -yuv2NBPS( 9, BE, 1); -yuv2NBPS( 9, LE, 0); -yuv2NBPS(10, BE, 1); -yuv2NBPS(10, LE, 0); -yuv2NBPS(16, BE, 1); -yuv2NBPS(16, LE, 0); +yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t); +yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t); +yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t); +yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t); +yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t); +yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t); static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, @@ -1857,15 +1909,15 @@ static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, } } -static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, - const int16_t *filter, - const int16_t *filterPos, int filterSize) +static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, + const int16_t *filter, + const int16_t *filterPos, int filterSize) { int i; int32_t *dst = (int32_t *) _dst; const uint16_t *src = (const uint16_t *) _src; int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; - int sh = (bits <= 7) ? 11 : (bits - 4); + int sh = bits - 4; for (i = 0; i < dstW; i++) { int j; @@ -1880,10 +1932,31 @@ static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_s } } +static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src, + const int16_t *filter, + const int16_t *filterPos, int filterSize) +{ + int i; + const uint16_t *src = (const uint16_t *) _src; + int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + + for (i = 0; i < dstW; i++) { + int j; + int srcPos = filterPos[i]; + int val = 0; + + for (j = 0; j < filterSize; j++) { + val += src[srcPos + j] * filter[filterSize * i + j]; + } + // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit + dst[i] = FFMIN(val >> sh, (1 << 15) - 1); + } +} + // bilinear / bicubic scaling -static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, - const int16_t *filter, const int16_t *filterPos, - int filterSize) +static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, + const int16_t *filter, const int16_t *filterPos, + int filterSize) { int i; for (i=0; i>3, (1<<19)-1); // the cubic equation does overflow ... + //dst[i] = val>>7; + } +} + //FIXME all pal and rgb srcFormats could do this convertion as well //FIXME all scalers more complex than bilinear could do half of this transform static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width) @@ -1978,23 +2070,6 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, } } -static void scale8To16Rv_c(uint16_t *_dst, const uint8_t *src, int len) -{ - int i; - uint8_t *dst = (uint8_t *) _dst; - for (i = len - 1; i >= 0; i--) { - dst[i * 2] = dst[i * 2 + 1] = src[i]; - } -} - -static void scale19To15Fw_c(int16_t *dst, const int32_t *src, int len) -{ - int i; - for (i = 0; i < len; i++) { - dst[i] = src[i] >> 4; - } -} - // *** horizontal scale Y line to temp buffer static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc, @@ -2011,11 +2086,6 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, src= formatConvBuffer; } - if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) { - c->scale8To16Rv((uint16_t *) formatConvBuffer, src, srcW); - src = formatConvBuffer; - } - if (!c->hyscale_fast) { c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize); } else { // fast bilinear upscale / crap downscale @@ -2024,10 +2094,6 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, if (convertRange) convertRange(dst, dstWidth); - - if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) { - c->scale19To15Fw(dst, (int32_t *) dst, dstWidth); - } } static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, @@ -2052,20 +2118,12 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2 uint8_t *formatConvBuffer, uint32_t *pal) { if (c->chrToYV12) { - uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * c->scalingBpp >> 3, 16); + uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16); c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal); src1= formatConvBuffer; src2= buf2; } - if (av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) { - uint8_t *buf2 = (formatConvBuffer + FFALIGN(srcW * 2, 16)); - c->scale8To16Rv((uint16_t *) formatConvBuffer, src1, srcW); - c->scale8To16Rv((uint16_t *) buf2, src2, srcW); - src1 = formatConvBuffer; - src2 = buf2; - } - if (!c->hcscale_fast) { c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize); c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize); @@ -2075,11 +2133,6 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2 if (c->chrConvertRange) c->chrConvertRange(dst1, dst2, dstWidth); - - if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) { - c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth); - c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth); - } } static av_always_inline void @@ -2734,28 +2787,30 @@ static av_cold void sws_init_swScale_c(SwsContext *c) } } - if (c->scalingBpp == 8) { - c->hScale = hScale_c; - if (c->flags & SWS_FAST_BILINEAR) { - c->hyscale_fast = hyscale_fast_c; - c->hcscale_fast = hcscale_fast_c; + if (c->srcBpc == 8) { + if (c->dstBpc <= 10) { + c->hScale = hScale8To15_c; + if (c->flags & SWS_FAST_BILINEAR) { + c->hyscale_fast = hyscale_fast_c; + c->hcscale_fast = hcscale_fast_c; + } + } else { + c->hScale = hScale8To19_c; + } + } else { + c->hScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c; } if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { - if (c->srcRange) { - c->lumConvertRange = lumRangeFromJpeg_c; - c->chrConvertRange = chrRangeFromJpeg_c; + if (c->dstBpc <= 10) { + if (c->srcRange) { + c->lumConvertRange = lumRangeFromJpeg_c; + c->chrConvertRange = chrRangeFromJpeg_c; + } else { + c->lumConvertRange = lumRangeToJpeg_c; + c->chrConvertRange = chrRangeToJpeg_c; + } } else { - c->lumConvertRange = lumRangeToJpeg_c; - c->chrConvertRange = chrRangeToJpeg_c; - } - } - } else { - c->hScale = hScale16_c; - c->scale19To15Fw = scale19To15Fw_c; - c->scale8To16Rv = scale8To16Rv_c; - - if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { if (c->srcRange) { c->lumConvertRange = lumRangeFromJpeg16_c; c->chrConvertRange = chrRangeFromJpeg16_c; diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 9492303301..d09477ec4c 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -64,11 +64,16 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[], * without any additional vertical scaling (or point-scaling). * * @param c SWS scaling context - * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output - * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output - * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output - * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output - * @param dest pointer to the 4 output planes (Y/U/V/A) + * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param dest pointer to the 4 output planes (Y/U/V/A). For >8bit + * output, this is in uint16_t * @param dstW width of dest[0], dest[3], lumSrc and alpSrc in pixels * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc */ @@ -82,14 +87,19 @@ typedef void (*yuv2planar1_fn) (struct SwsContext *c, * * @param c SWS scaling context * @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096] - * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output + * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) * @param lumFilterSize number of vertical luma/alpha input lines to scale * @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] - * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output - * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output + * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) * @param chrFilterSize number of vertical chroma input lines to scale - * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output - * @param dest pointer to the 4 output planes (Y/U/V/A) + * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param dest pointer to the 4 output planes (Y/U/V/A). For >8bit + * output, this is in uint16_t * @param dstW width of dest[0], dest[3], lumSrc and alpSrc in pixels * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc */ @@ -105,11 +115,16 @@ typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter, * that this function may do chroma scaling, see the "uvalpha" argument. * * @param c SWS scaling context - * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output - * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output - * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output - * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output - * @param dest pointer to the output plane + * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param dest pointer to the output plane. For 16bit output, this is + * uint16_t * @param dstW width of lumSrc and alpSrc in pixels, number of pixels * to write into dest[] * @param uvalpha chroma scaling coefficient for the second line of chroma @@ -132,11 +147,16 @@ typedef void (*yuv2packed1_fn) (struct SwsContext *c, const int16_t *lumSrc, * output by doing bilinear scaling between two input lines. * * @param c SWS scaling context - * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output - * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output - * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output - * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output - * @param dest pointer to the output plane + * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param dest pointer to the output plane. For 16bit output, this is + * uint16_t * @param dstW width of lumSrc and alpSrc in pixels, number of pixels * to write into dest[] * @param yalpha luma/alpha scaling coefficients for the second input line. @@ -160,14 +180,19 @@ typedef void (*yuv2packed2_fn) (struct SwsContext *c, const int16_t *lumSrc[2], * * @param c SWS scaling context * @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096] - * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output + * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) * @param lumFilterSize number of vertical luma/alpha input lines to scale * @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] - * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output - * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output + * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) * @param chrFilterSize number of vertical chroma input lines to scale - * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output - * @param dest pointer to the output plane + * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, + * 19-bit for 16bit output (in int32_t) + * @param dest pointer to the output plane. For 16bit output, this is + * uint16_t * @param dstW width of lumSrc and alpSrc in pixels, number of pixels * to write into dest[] * @param y vertical line number for this output. This does not need @@ -207,7 +232,7 @@ typedef struct SwsContext { enum PixelFormat srcFormat; ///< Source pixel format. int dstFormatBpp; ///< Number of bits per pixel of the destination pixel format. int srcFormatBpp; ///< Number of bits per pixel of the source pixel format. - int scalingBpp; + int dstBpc, srcBpc; int chrSrcHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source image. int chrSrcVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image. int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image. @@ -431,17 +456,19 @@ typedef struct SwsContext { * lines, to produce one (differently sized) line of output data. * * @param dst pointer to destination buffer for horizontally scaled - * data. If the scaling depth (SwsContext->scalingBpp) is - * 8, data will be 15bpp in 16bits (int16_t) width. If - * scaling depth is 16, data will be 19bpp in 32bpp - * (int32_t) width. + * data. If the number of bits per component of one + * destination pixel (SwsContext->dstBpc) is <= 10, data + * will be 15bpc in 16bits (int16_t) width. Else (i.e. + * SwsContext->dstBpc == 16), data will be 19bpc in + * 32bits (int32_t) width. * @param dstW width of destination image - * @param src pointer to source data to be scaled. If scaling depth - * is 8, this is 8bpp in 8bpp (uint8_t) width. If scaling - * depth is 16, this is native depth in 16bbp (uint16_t) - * width. In other words, for 9-bit YUV input, this is - * 9bpp, for 10-bit YUV input, this is 10bpp, and for - * 16-bit RGB or YUV, this is 16bpp. + * @param src pointer to source data to be scaled. If the number of + * bits per component of a source pixel (SwsContext->srcBpc) + * is 8, this is 8bpc in 8bits (uint8_t) width. Else + * (i.e. SwsContext->dstBpc > 8), this is native depth + * in 16bits (uint16_t) width. In other words, for 9-bit + * YUV input, this is 9bpc, for 10-bit YUV input, this is + * 10bpc, and for 16-bit RGB or YUV, this is 16bpc. * @param filter filter coefficients to be used per output pixel for * scaling. This contains 14bpp filtering coefficients. * Guaranteed to contain dstW * filterSize entries. @@ -461,15 +488,6 @@ typedef struct SwsContext { void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed. void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed. - /** - * dst[..] = (src[..] << 8) | src[..]; - */ - void (*scale8To16Rv)(uint16_t *dst, const uint8_t *src, int len); - /** - * dst[..] = src[..] >> 4; - */ - void (*scale19To15Fw)(int16_t *dst, const int32_t *src, int len); - int needs_hcscale; ///< Set if there are chroma planes to be converted. } SwsContext; diff --git a/libswscale/utils.c b/libswscale/utils.c index 525a370e7c..2b52199a35 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -853,12 +853,18 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) } } - c->scalingBpp = FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1, - av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1) >= 8 ? 16 : 8; - if (c->scalingBpp == 16) + c->srcBpc = 1 + av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1; + if (c->srcBpc < 8) + c->srcBpc = 8; + c->dstBpc = 1 + av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1; + if (c->dstBpc < 8) + c->dstBpc = 8; + if (c->dstBpc == 16) dst_stride <<= 1; - FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW, 16) * 2 * c->scalingBpp >> 3, fail); - if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->scalingBpp == 8) { + FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, + FFALIGN(srcW, 16) * 2 * FFALIGN(c->srcBpc, 8) >> 3, + fail); + if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->srcBpc == 8 && c->dstBpc <= 10) { c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) { if (flags&SWS_PRINT_INFO) @@ -1011,8 +1017,8 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], dst_stride+16, fail); c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize]; } - // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate) - c->uv_off_px = dst_stride_px + 64 / c->scalingBpp; + // 64 / (c->dstBpc & ~7) is the same as 16 / sizeof(scaling_intermediate) + c->uv_off_px = dst_stride_px + 64 / (c->dstBpc &~ 7); c->uv_off_byte = dst_stride + 16; for (i=0; ivChrBufSize; i++) { FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+32, fail); diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 28ec4d2d9c..0a5f5d5f30 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -2316,7 +2316,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) } } - if (c->scalingBpp == 8) { + if (c->srcBpc == 8 && c->dstBpc <= 10) { #if !COMPILE_TEMPLATE_MMX2 c->hScale = RENAME(hScale ); #endif /* !COMPILE_TEMPLATE_MMX2 */ diff --git a/tests/ref/lavfi/pixdesc b/tests/ref/lavfi/pixdesc index 3730988777..879314f23d 100644 --- a/tests/ref/lavfi/pixdesc +++ b/tests/ref/lavfi/pixdesc @@ -1,8 +1,8 @@ abgr 037bf9df6a765520ad6d490066bf4b89 argb c442a8261c2265a07212ef0f72e35f5a bgr24 0d0cb38ab3fa0b2ec0865c14f78b217b -bgr48be 74dedaaacae8fd1ef46e05f78cf29d62 -bgr48le 0eb7d30801eac6058814bddd330b3c76 +bgr48be 00624e6c7ec7ab19897ba2f0a3257fe8 +bgr48le d02c235ebba7167881ca2d576497ff84 bgr4_byte 50d23cc82d9dcef2fd12adb81fb9b806 bgr555be 49f01b1f1f0c84fd9e776dd34cc3c280 bgr555le 378d6ac4223651a1adcbf94a3d0d807b @@ -18,8 +18,8 @@ monow 9251497f3b0634f1165d12d5a289d943 nv12 e0af357888584d36eec5aa0f673793ef nv21 9a3297f3b34baa038b1f37cb202b512f rgb24 b41eba9651e1b5fe386289b506188105 -rgb48be e3bc84c9af376fb6d0f0293cc7b713a6 -rgb48le f51c0e71638a822458329abb2f4052c7 +rgb48be cc139ec1dd9451f0e049c0cb3a0c8aa2 +rgb48le 86c5608904f75360d492dbc5c9589969 rgb4_byte c93ba89b74c504e7f5ae9d9ab1546c73 rgb555be 912a62c5e53bfcbac2a0340e10973cf2 rgb555le a937a0fc764fb57dc1b3af87cba0273c @@ -38,14 +38,14 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc yuv420p9be ce880fa07830e5297c22acf6e20555ce yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a yuv422p c9bba4529821d796a6ab09f6a5fd355a -yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 -yuv422p10le 3f478be644add24b6cc77e718a6e2afa -yuv422p16be dc9886f2fccf87cc54b27e071a2c251e -yuv422p16le f181c8d8436f1233ba566d9bc88005ec +yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71 +yuv422p10le d0607c260a45c973e6639f4e449730ad +yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed +yuv422p16le f87c81bf16916b64d201359be0b4b6f4 yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf yuv444p 0a98447b78fd476aa39686da6a74fa2e -yuv444p16be af555dbaa401b142a995566864f47545 -yuv444p16le a803e8016997dad95c5b2a72f54c34d6 +yuv444p16be 3ad639fff73e56f3b09dd20c335478d6 +yuv444p16le 8a7e66dc91ab7971fd24a9105ff2699b yuva420p a29884f3f3dfe1e00b961bc17bef3d47 yuvj420p 32eec78ba51857b16ce9b813a49b7189 yuvj422p 0dfa0ed434f73be51428758c69e082cb diff --git a/tests/ref/lavfi/pixfmts_copy b/tests/ref/lavfi/pixfmts_copy index 3730988777..879314f23d 100644 --- a/tests/ref/lavfi/pixfmts_copy +++ b/tests/ref/lavfi/pixfmts_copy @@ -1,8 +1,8 @@ abgr 037bf9df6a765520ad6d490066bf4b89 argb c442a8261c2265a07212ef0f72e35f5a bgr24 0d0cb38ab3fa0b2ec0865c14f78b217b -bgr48be 74dedaaacae8fd1ef46e05f78cf29d62 -bgr48le 0eb7d30801eac6058814bddd330b3c76 +bgr48be 00624e6c7ec7ab19897ba2f0a3257fe8 +bgr48le d02c235ebba7167881ca2d576497ff84 bgr4_byte 50d23cc82d9dcef2fd12adb81fb9b806 bgr555be 49f01b1f1f0c84fd9e776dd34cc3c280 bgr555le 378d6ac4223651a1adcbf94a3d0d807b @@ -18,8 +18,8 @@ monow 9251497f3b0634f1165d12d5a289d943 nv12 e0af357888584d36eec5aa0f673793ef nv21 9a3297f3b34baa038b1f37cb202b512f rgb24 b41eba9651e1b5fe386289b506188105 -rgb48be e3bc84c9af376fb6d0f0293cc7b713a6 -rgb48le f51c0e71638a822458329abb2f4052c7 +rgb48be cc139ec1dd9451f0e049c0cb3a0c8aa2 +rgb48le 86c5608904f75360d492dbc5c9589969 rgb4_byte c93ba89b74c504e7f5ae9d9ab1546c73 rgb555be 912a62c5e53bfcbac2a0340e10973cf2 rgb555le a937a0fc764fb57dc1b3af87cba0273c @@ -38,14 +38,14 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc yuv420p9be ce880fa07830e5297c22acf6e20555ce yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a yuv422p c9bba4529821d796a6ab09f6a5fd355a -yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 -yuv422p10le 3f478be644add24b6cc77e718a6e2afa -yuv422p16be dc9886f2fccf87cc54b27e071a2c251e -yuv422p16le f181c8d8436f1233ba566d9bc88005ec +yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71 +yuv422p10le d0607c260a45c973e6639f4e449730ad +yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed +yuv422p16le f87c81bf16916b64d201359be0b4b6f4 yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf yuv444p 0a98447b78fd476aa39686da6a74fa2e -yuv444p16be af555dbaa401b142a995566864f47545 -yuv444p16le a803e8016997dad95c5b2a72f54c34d6 +yuv444p16be 3ad639fff73e56f3b09dd20c335478d6 +yuv444p16le 8a7e66dc91ab7971fd24a9105ff2699b yuva420p a29884f3f3dfe1e00b961bc17bef3d47 yuvj420p 32eec78ba51857b16ce9b813a49b7189 yuvj422p 0dfa0ed434f73be51428758c69e082cb diff --git a/tests/ref/lavfi/pixfmts_crop b/tests/ref/lavfi/pixfmts_crop index e3bb88c101..d11720f83c 100644 --- a/tests/ref/lavfi/pixfmts_crop +++ b/tests/ref/lavfi/pixfmts_crop @@ -1,8 +1,8 @@ abgr cd761690872843d1b7ab0c695393c751 argb 2ec6ef18769bcd651c2e8904d5a3ee67 bgr24 3450fd00cf1493d1ded75544d82ba3ec -bgr48be a9a7d177cef0914d3f1d266f00dff676 -bgr48le b475d1b529ed80c728ddbacd22d35281 +bgr48be 18ca4002732f278cc9f525215c2fca41 +bgr48le 395a4c187c4e95217d089bd3df9f3654 bgr4_byte 2f6ac3cdd4676ab4e2982bdf0664945b bgr555be d3a7c273604723adeb7e5f5dd1c4272b bgr555le d22442fc13b464f9ba455b08df4e981f @@ -14,8 +14,8 @@ gray 8c4850e66562a587a292dc728a65ea4a gray16be daa5a6b98fb4a280c57c57bff1a2ab5a gray16le 84f5ea7259073edcb893113b42213c8e rgb24 3b90ed64b687d3dc186c6ef521dc71a8 -rgb48be b8f9fd6aaa24d75275ee2f8b8a7b9e55 -rgb48le 3e52e831a040f086c3ae983241172cce +rgb48be e6fd353c0eb9bea889423954414bea35 +rgb48le 68a1723da11ce08b502d42e204376503 rgb4_byte 6958029f73c6cdfed4f71020d816f027 rgb555be 41a7d1836837bc90f2cae19a9c9df3b3 rgb555le eeb78f8ce6186fba55c941469e60ba67 @@ -29,12 +29,12 @@ yuv420p bfea0188ddd4889787c403caae119cc7 yuv420p16be 8365eff38b8c329aeb95fc605fa229bb yuv420p16le 5e8dd38d973d5854abe1ad4efad20cc1 yuv422p f2f930a91fe00d4252c4720b5ecd8961 -yuv422p16be 93f9b6f33f9529db6de6a9f0ddd70eb5 -yuv422p16le 2e66dcfec54ca6b57aa4bbd9ac234639 +yuv422p16be 167e4338811a7d272925a4c6417d60da +yuv422p16le 3359395d5875d581fa1e975013d30114 yuv440p 2472417d980e395ad6843cbb8b633b29 yuv444p 1f151980486848c96bc5585ced99003e -yuv444p16be e7d1ecf0c11a41b5db192f761f55bd3c -yuv444p16le 3298a0043d982e7cf1a33a1292fa11f0 +yuv444p16be 5d0c0ea66ab43c0c590d8c2a9256e43f +yuv444p16le 3c0a747c1b64feb0ab8dfba92f92579a yuva420p 7536753dfbc7932560fb50c921369a0e yuvj420p 21f891093006d42d7683b0e1d773a657 yuvj422p 9a43d474c407590ad8f213880586b45e diff --git a/tests/ref/lavfi/pixfmts_hflip b/tests/ref/lavfi/pixfmts_hflip index 2084d581e1..5eb1b31140 100644 --- a/tests/ref/lavfi/pixfmts_hflip +++ b/tests/ref/lavfi/pixfmts_hflip @@ -1,8 +1,8 @@ abgr 49468c6c9ceee5d52b08b1270a909323 argb 50ba9f16c6475530602f2983278b82d0 bgr24 cc53d2011d097972db0d22756c3699e3 -bgr48be 90374bc92471f1bd4931d71ef8b73f50 -bgr48le 696f628d0dd32121e60a0d61ac47d6e6 +bgr48be 815192d3757c66de97b0d51818acbe0f +bgr48le 8e4184ac6eae251b4bace51dba7d790c bgr4_byte aac987e7d1a6a96477cfc0b48a4285de bgr555be bc07265898440116772200390d70c092 bgr555le ccee08679bac84a1f960c6c9070c5538 @@ -14,8 +14,8 @@ gray 03efcb4ab52a24c0af0e03cfd26c9377 gray16be 9bcbca979601ddc4869f846f08f3d1dd gray16le c1b8965adcc7f847ee343149ff507073 rgb24 754f1722fc738590cc407ac65749bfe8 -rgb48be 2397b9d3c296ac15f8a2325a703f81c7 -rgb48le 527043c72546d8b4bb1ce2dea4b294c3 +rgb48be d690412ca5fada031b5da47b87096248 +rgb48le c901feb564232f5d0bc0eabd66dae3e7 rgb4_byte c8a3f995fcf3e0919239ea2c413ddc29 rgb555be 045ce8607d3910586f4d97481dda8632 rgb555le 8778ee0cf58ce9ad1d99a1eca9f95e87 @@ -29,12 +29,12 @@ yuv420p 2d5c80f9ba2ddd85b2aeda3564cc7d64 yuv420p16be 758b0c1e2113b15e7afde48da4e4d024 yuv420p16le 480ccd951dcb806bc875d307e02e50a0 yuv422p 6e728f4eb9eae287c224f396d84be6ea -yuv422p16be 8657d2c8d443940300fdb4028d555631 -yuv422p16le 4ab27609981e50de5b1150125718ae76 +yuv422p16be 69cf0605496c321546899a8442ee64fb +yuv422p16le f0b443fea72f4b6f462859a73b159664 yuv440p a99e2b57ed601f39852715c9d675d0d3 yuv444p 947e47f7bb5fdccc659d19b7df2b6fc3 -yuv444p16be a5154ce329db0d2caf0bd43f1347dba3 -yuv444p16le 1f703308b90feb048191b3bccc695671 +yuv444p16be bc7d53923cff1d7e98d24540845fb64b +yuv444p16le 5df206a93f85ef8b77f5bdc81d9b0a0b yuva420p d83ec0c01498189f179ec574918185f1 yuvj420p df3aaaec3bb157c3bde5f0365af30f4f yuvj422p d113871528d510a192797af59df9c05c diff --git a/tests/ref/lavfi/pixfmts_null b/tests/ref/lavfi/pixfmts_null index 3730988777..879314f23d 100644 --- a/tests/ref/lavfi/pixfmts_null +++ b/tests/ref/lavfi/pixfmts_null @@ -1,8 +1,8 @@ abgr 037bf9df6a765520ad6d490066bf4b89 argb c442a8261c2265a07212ef0f72e35f5a bgr24 0d0cb38ab3fa0b2ec0865c14f78b217b -bgr48be 74dedaaacae8fd1ef46e05f78cf29d62 -bgr48le 0eb7d30801eac6058814bddd330b3c76 +bgr48be 00624e6c7ec7ab19897ba2f0a3257fe8 +bgr48le d02c235ebba7167881ca2d576497ff84 bgr4_byte 50d23cc82d9dcef2fd12adb81fb9b806 bgr555be 49f01b1f1f0c84fd9e776dd34cc3c280 bgr555le 378d6ac4223651a1adcbf94a3d0d807b @@ -18,8 +18,8 @@ monow 9251497f3b0634f1165d12d5a289d943 nv12 e0af357888584d36eec5aa0f673793ef nv21 9a3297f3b34baa038b1f37cb202b512f rgb24 b41eba9651e1b5fe386289b506188105 -rgb48be e3bc84c9af376fb6d0f0293cc7b713a6 -rgb48le f51c0e71638a822458329abb2f4052c7 +rgb48be cc139ec1dd9451f0e049c0cb3a0c8aa2 +rgb48le 86c5608904f75360d492dbc5c9589969 rgb4_byte c93ba89b74c504e7f5ae9d9ab1546c73 rgb555be 912a62c5e53bfcbac2a0340e10973cf2 rgb555le a937a0fc764fb57dc1b3af87cba0273c @@ -38,14 +38,14 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc yuv420p9be ce880fa07830e5297c22acf6e20555ce yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a yuv422p c9bba4529821d796a6ab09f6a5fd355a -yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 -yuv422p10le 3f478be644add24b6cc77e718a6e2afa -yuv422p16be dc9886f2fccf87cc54b27e071a2c251e -yuv422p16le f181c8d8436f1233ba566d9bc88005ec +yuv422p10be bdc13b630fd668b34c6fe1aae28dfc71 +yuv422p10le d0607c260a45c973e6639f4e449730ad +yuv422p16be 4e9b3b3467aeebb6a528cee5966800ed +yuv422p16le f87c81bf16916b64d201359be0b4b6f4 yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf yuv444p 0a98447b78fd476aa39686da6a74fa2e -yuv444p16be af555dbaa401b142a995566864f47545 -yuv444p16le a803e8016997dad95c5b2a72f54c34d6 +yuv444p16be 3ad639fff73e56f3b09dd20c335478d6 +yuv444p16le 8a7e66dc91ab7971fd24a9105ff2699b yuva420p a29884f3f3dfe1e00b961bc17bef3d47 yuvj420p 32eec78ba51857b16ce9b813a49b7189 yuvj422p 0dfa0ed434f73be51428758c69e082cb diff --git a/tests/ref/lavfi/pixfmts_scale b/tests/ref/lavfi/pixfmts_scale index 392b9ce601..4787614bd2 100644 --- a/tests/ref/lavfi/pixfmts_scale +++ b/tests/ref/lavfi/pixfmts_scale @@ -1,8 +1,8 @@ abgr d894cb97f6c80eb21bdbe8a4eea62d86 argb 54346f2b2eef10919e0f247241df3b24 bgr24 570f8d6b51a838aed022ef67535f6bdc -bgr48be 07f7a0cc34feb3646434d47c0cec8cee -bgr48le 9abd2c3a66088e6c9078232064eba61e +bgr48be 390d3058a12a99c2b153ed7922508bea +bgr48le 39fe06feb4ec1d9730dccc04a0cfac4c bgr4_byte ee1d35a7baf8e9016891929a2f565c0b bgr555be de8901c1358834fddea060fcb3a67beb bgr555le 36b745067197f9ca8c1731cac51329c9 @@ -18,8 +18,8 @@ monow d31772ebaa877fc2a78565937f7f9673 nv12 4676d59db43d657dc12841f6bc3ab452 nv21 69c699510ff1fb777b118ebee1002f14 rgb24 514692e28e8ff6860e415ce4fcf6eb8c -rgb48be f18841c19fc6d9c817a3095f557b9bc5 -rgb48le 819e7b8acd8965ba57ba46198a5cc9bf +rgb48be 8fac63787a711886030f8e056872b488 +rgb48le ab92f2763a2eb264c3870cc758f97149 rgb4_byte d81ffd3add95842a618eec81024f0b5c rgb555be 4607309f9f217d51cbb53d13b84b4537 rgb555le a350ef1dc2c9688ed49e7ba018843795 @@ -31,21 +31,21 @@ uyvy422 314bd486277111a95d9369b944fa0400 yuv410p 7df8f6d69b56a8dcb6c7ee908e5018b5 yuv411p 1143e7c5cc28fe0922b051b17733bc4c yuv420p fdad2d8df8985e3d17e73c71f713cb14 -yuv420p10be af5429f27b9f95bf955e795921c65cdc -yuv420p10le d0b47e6a8a44e6b5ca0fe4349a4e393b +yuv420p10be d7695b9117d5b52819c569459e42669b +yuv420p10le 0ac6d448db2df5f3d1346aa81f2b5f50 yuv420p16be 9688e33e03b8c8275ab2fb1df0f06bee yuv420p16le cba8b390ad5e7b8678e419b8ce79c008 -yuv420p9be a073b2d93b2a7dce2069ba252bc43175 -yuv420p9le b67233c3c7d93763d07d88f697c145e1 +yuv420p9be 8fa6e007b1a40f34eaa3e2beb73ea8af +yuv420p9le a7b131a7dd06906a5aef2e36d117b972 yuv422p 918e37701ee7377d16a8a6c119c56a40 -yuv422p10be 533fd21e7943c20a1026b19069b3b867 -yuv422p10le 59b20a4a8609f5da2dc54c78aea11e6c -yuv422p16be 2cf502d7d386db1f1b3b946679d897b1 -yuv422p16le 3002a4e47520731dcee5929aff49eb74 +yuv422p10be 35206fcd7e00ee582a8c366b37d57d1d +yuv422p10le 396f930e2da02f149ab9dd5b781cbe8d +yuv422p16be 285993ee0c0f4f8e511ee46f93c5f38c +yuv422p16le 61bfcee8e54465f760164f5a75d40b5e yuv440p 461503fdb9b90451020aa3b25ddf041c yuv444p 81b2eba962d12e8d64f003ac56f6faf2 -yuv444p16be b9f051ce7335923fe33efd162e48da1d -yuv444p16le fa47e317efac988b4a7fa55141c89126 +yuv444p16be 2677f3074d255f9dab625e9e2e092ca5 +yuv444p16le 65fa92521ef97088599ea83f9508cd5b yuva420p 8673a9131fb47de69788863f93a50eb7 yuvj420p 30427bd6caf5bda93a173dbebe759e09 yuvj422p fc8288f64fd149573f73cf8da05d8e6d diff --git a/tests/ref/lavfi/pixfmts_vflip b/tests/ref/lavfi/pixfmts_vflip index 2b62518a28..6f51dc7dd7 100644 --- a/tests/ref/lavfi/pixfmts_vflip +++ b/tests/ref/lavfi/pixfmts_vflip @@ -1,8 +1,8 @@ abgr 25e72e9dbd01ab00727c976d577f7be5 argb 19869bf1a5ac0b6af4d8bbe2c104533c bgr24 89108a4ba00201f79b75b9305c42352d -bgr48be 908b4edb525fd154a95a3744c4ab5420 -bgr48le 796c2072d6fa13a091f5c5b175417ed5 +bgr48be 2f23931844f57641f3737348182d118c +bgr48le 4242a026012b6c135a6aa138a6d67031 bgr4_byte 407fcf564ed764c38e1d748f700ab921 bgr555be f739d2519f7e9d494359bf67a3821537 bgr555le bd7b3ec4d684dfad075d89a606cb8b74 @@ -18,8 +18,8 @@ monow ff9869d067ecb94eb9d90c9750c31fea nv12 046f00f598ce14d9854a3534a5c99114 nv21 01ea369dd2d0d3ed7451dc5c8d61497f rgb24 eaefabc168d0b14576bab45bc1e56e1e -rgb48be 8e347deca2902e7dc1ece261322577d8 -rgb48le 2034e485f946e4064b5fb9be09865e55 +rgb48be 62dd185862ed142283bd300eb6dbd216 +rgb48le dcb76353268bc5862194d131762220da rgb4_byte 8c6ff02df0b06dd2d574836c3741b2a2 rgb555be 40dc33cfb5cf56aac1c5a290ac486c36 rgb555le 4f8eaad29a17e0f8e9d8ab743e76b999 @@ -38,14 +38,14 @@ yuv420p16le 0f609e588e5a258644ef85170d70e030 yuv420p9be be40ec975fb2873891643cbbbddbc3b0 yuv420p9le 7e606310d3f5ff12badf911e8f333471 yuv422p d7f5cb44d9b0210d66d6a8762640ab34 -yuv422p10be a28b051168af49435c04af5f58dce47b -yuv422p10le 35936ffff30df2697f47b9b8d2cb7dea -yuv422p16be 51d9aa4e78d121c226d919ce97976fe4 -yuv422p16le 12965c54bda8932ca72da194419a9908 +yuv422p10be 588fe319b96513c32e21d3e32b45447f +yuv422p10le 11b57f2bd9661024153f3973b9090cdb +yuv422p16be c092d083548c2a144c372a98c46875c7 +yuv422p16le c071b9397a416d51cbe339345cbcba84 yuv440p 876385e96165acf51271b20e5d85a416 yuv444p 9c3c667d1613b72d15bc6d851c5eb8f7 -yuv444p16be 6502abd75030d462c58d99a8673ec517 -yuv444p16le cd7e88b6d08425450a57555bc86ab210 +yuv444p16be 6a954614fd2a8ae0df53e4fd76937af8 +yuv444p16le 65613965fb58cc4c3cd480a68b6540ea yuva420p c705d1cf061d8c6580ac690b55f92276 yuvj420p 41fd02b204da0ab62452cd14b595e2e4 yuvj422p 7f6ca9bc1812cde02036d7d29a7cce43 diff --git a/tests/ref/vsynth1/dnxhd_720p_10bit b/tests/ref/vsynth1/dnxhd_720p_10bit index 4a815a973c..cdab77c16a 100644 --- a/tests/ref/vsynth1/dnxhd_720p_10bit +++ b/tests/ref/vsynth1/dnxhd_720p_10bit @@ -1,4 +1,4 @@ -3ed972af47641d39a19916b0cd119120 *./tests/data/vsynth1/dnxhd-720p-10bit.dnxhd -2293760 ./tests/data/vsynth1/dnxhd-720p-10bit.dnxhd -b64efb0b4eb934bb66f4530c12d5d7fa *./tests/data/dnxhd_720p_10bit.vsynth1.out.yuv -stddev: 6.27 PSNR: 32.18 MAXDIFF: 65 bytes: 760320/ 7603200 +cb29b6ae4e1562d95f9311991fef98df *./tests/data/vsynth1/dnxhd-720p-10bit.dnxhd + 2293760 ./tests/data/vsynth1/dnxhd-720p-10bit.dnxhd +2f45bb1af7da5dd3dca870ac87237b7d *./tests/data/dnxhd_720p_10bit.vsynth1.out.yuv +stddev: 6.27 PSNR: 32.18 MAXDIFF: 64 bytes: 760320/ 7603200 diff --git a/tests/ref/vsynth2/dnxhd_720p_10bit b/tests/ref/vsynth2/dnxhd_720p_10bit index 734df4ffe2..81f53d8353 100644 --- a/tests/ref/vsynth2/dnxhd_720p_10bit +++ b/tests/ref/vsynth2/dnxhd_720p_10bit @@ -1,4 +1,4 @@ -0b8389955cce583bd2db7d2e727a6f15 *./tests/data/vsynth2/dnxhd-720p-10bit.dnxhd -2293760 ./tests/data/vsynth2/dnxhd-720p-10bit.dnxhd -bde04e992df2473e89aef4460265332d *./tests/data/dnxhd_720p_10bit.vsynth2.out.yuv -stddev: 1.45 PSNR: 44.89 MAXDIFF: 22 bytes: 760320/ 7603200 +8648511257afb816b5b911706ca391db *./tests/data/vsynth2/dnxhd-720p-10bit.dnxhd + 2293760 ./tests/data/vsynth2/dnxhd-720p-10bit.dnxhd +391b6f5aa7c7b488b479cb43d420b860 *./tests/data/dnxhd_720p_10bit.vsynth2.out.yuv +stddev: 1.35 PSNR: 45.46 MAXDIFF: 23 bytes: 760320/ 7603200 From 28ca701e0b57dfaf03ab1835ce62faa1de8c4712 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Fri, 12 Aug 2011 12:32:31 -0700 Subject: [PATCH 2/4] h264: add missing brackets. This caused failure of a few fate tests. --- libavcodec/h264_refs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libavcodec/h264_refs.c b/libavcodec/h264_refs.c index 50925ce3cc..b7e43e7dca 100644 --- a/libavcodec/h264_refs.c +++ b/libavcodec/h264_refs.c @@ -515,9 +515,10 @@ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){ pic = find_short(h, frame_num, &j); if(!pic){ if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg] - || h->long_ref[mmco[i].long_arg]->frame_num != frame_num) - av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n"); - err = AVERROR_INVALIDDATA; + || h->long_ref[mmco[i].long_arg]->frame_num != frame_num) { + av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n"); + err = AVERROR_INVALIDDATA; + } continue; } } From 78622ef362c7c8d4606fe1a38f612f039413c7c3 Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Fri, 12 Aug 2011 21:50:28 +0200 Subject: [PATCH 3/4] rv34: free+allocate buffer instead of reallocating it to preserve alignment Signed-off-by: Ronald S. Bultje --- libavcodec/rv34.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c index cdc559fa07..253c3952d2 100644 --- a/libavcodec/rv34.c +++ b/libavcodec/rv34.c @@ -1311,7 +1311,8 @@ static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int if (!r->tmp_b_block_base || s->width != r->si.width || s->height != r->si.height) { int i; - r->tmp_b_block_base = av_realloc(r->tmp_b_block_base, s->linesize * 48); + av_free(r->tmp_b_block_base); //realloc() doesn't guarantee alignment + r->tmp_b_block_base = av_malloc(s->linesize * 48); for (i = 0; i < 2; i++) r->tmp_b_block_y[i] = r->tmp_b_block_base + i * 16 * s->linesize; for (i = 0; i < 4; i++) From 3304a1e69a8a050eb66d2304acd2d01354fa1aac Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Wed, 10 Aug 2011 02:16:26 +0200 Subject: [PATCH 4/4] swscale: add dithering to yuv2yuvX_altivec_real It just does that part in scalar form, I doubt using a vector store over 2 array would speed it up particularly. The function should be written to not use a scratch buffer. --- libswscale/ppc/swscale_altivec.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c index f988b534ac..7fdca39acb 100644 --- a/libswscale/ppc/swscale_altivec.c +++ b/libswscale/ppc/swscale_altivec.c @@ -92,6 +92,7 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW) } } +//FIXME remove the usage of scratch buffers. static void yuv2yuvX_altivec_real(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, @@ -101,17 +102,13 @@ yuv2yuvX_altivec_real(SwsContext *c, uint8_t *dest[4], int dstW, int chrDstW) { uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2]; - const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)}; + const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8; register int i, j; { DECLARE_ALIGNED(16, int, val)[dstW]; - for (i = 0; i < (dstW -7); i+=4) { - vec_st(vini, i << 2, val); - } - for (; i < dstW; i++) { - val[i] = (1 << 18); - } + for (i=0; i