From e29d996149692f2ec80c6f20f6a427c7287ab9a4 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Wed, 17 Jun 2015 00:01:47 +0200 Subject: [PATCH] swscale/output: Add rgba64/rgb48/bgra64/bgr48 output functions with full chroma interpolation Signed-off-by: Michael Niedermayer --- libswscale/output.c | 282 +++++++++++++++++++++++++++- libswscale/utils.c | 8 + tests/ref/fate/filter-pixfmts-scale | 20 +- 3 files changed, 299 insertions(+), 11 deletions(-) diff --git a/libswscale/output.c b/libswscale/output.c index 56ed5f62eb..f63af3b210 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -925,6 +925,196 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0, } } +static av_always_inline void +yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter, + const int32_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int32_t **chrUSrc, + const int32_t **chrVSrc, int chrFilterSize, + const int32_t **alpSrc, uint16_t *dest, int dstW, + int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) +{ + int i; + int A = 0xffff<<14; + + for (i = 0; i < dstW; i++) { + int j; + int Y = -0x40000000; + int U = -128 << 23; // 19 + int V = -128 << 23; + int R, G, B; + + for (j = 0; j < lumFilterSize; j++) { + Y += lumSrc[j][i] * (unsigned)lumFilter[j]; + } + for (j = 0; j < chrFilterSize; j++) {; + U += chrUSrc[j][i] * (unsigned)chrFilter[j]; + V += chrVSrc[j][i] * (unsigned)chrFilter[j]; + } + + if (hasAlpha) { + A = -0x40000000; + for (j = 0; j < lumFilterSize; j++) { + A += alpSrc[j][i] * (unsigned)lumFilter[j]; + } + A >>= 1; + A += 0x20002000; + } + + // 8bit: 12+15=27; 16-bit: 12+19=31 + Y >>= 14; // 10 + Y += 0x10000; + U >>= 14; + V >>= 14; + + // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit + Y -= c->yuv2rgb_y_offset; + Y *= c->yuv2rgb_y_coeff; + Y += 1 << 13; // 21 + // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit + + R = V * c->yuv2rgb_v2r_coeff; + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; + B = U * c->yuv2rgb_u2b_coeff; + + // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit + output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14); + output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14); + output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14); + if (eightbytes) { + output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14); + dest += 4; + } else { + dest += 3; + } + } +} + +static av_always_inline void +yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2], + const int32_t *ubuf[2], const int32_t *vbuf[2], + const int32_t *abuf[2], uint16_t *dest, int dstW, + int yalpha, int uvalpha, int y, + enum AVPixelFormat target, int hasAlpha, int eightbytes) +{ + const int32_t *buf0 = buf[0], *buf1 = buf[1], + *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], + *vbuf0 = vbuf[0], *vbuf1 = vbuf[1], + *abuf0 = hasAlpha ? abuf[0] : NULL, + *abuf1 = hasAlpha ? abuf[1] : NULL; + int yalpha1 = 4096 - yalpha; + int uvalpha1 = 4096 - uvalpha; + int i; + int A = 0xffff<<14; + + for (i = 0; i < dstW; i++) { + int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 14; + int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14; + int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14; + int R, G, B; + + Y -= c->yuv2rgb_y_offset; + Y *= c->yuv2rgb_y_coeff; + Y += 1 << 13; + + R = V * c->yuv2rgb_v2r_coeff; + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; + B = U * c->yuv2rgb_u2b_coeff; + + if (hasAlpha) { + A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 1; + + A += 1 << 13; + } + + output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14); + output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14); + output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14); + if (eightbytes) { + output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14); + dest += 4; + } else { + dest += 3; + } + } +} + +static av_always_inline void +yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0, + const int32_t *ubuf[2], const int32_t *vbuf[2], + const int32_t *abuf0, uint16_t *dest, int dstW, + int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) +{ + const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; + int i; + int A = 0xffff<<14; + + if (uvalpha < 2048) { + for (i = 0; i < dstW; i++) { + int Y = (buf0[i]) >> 2; + int U = (ubuf0[i] + (-128 << 11)) >> 2; + int V = (vbuf0[i] + (-128 << 11)) >> 2; + int R, G, B; + + Y -= c->yuv2rgb_y_offset; + Y *= c->yuv2rgb_y_coeff; + Y += 1 << 13; + + if (hasAlpha) { + A = abuf0[i] << 11; + + A += 1 << 13; + } + + R = V * c->yuv2rgb_v2r_coeff; + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; + B = U * c->yuv2rgb_u2b_coeff; + + output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14); + output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14); + output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14); + if (eightbytes) { + output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14); + dest += 4; + } else { + dest += 3; + } + } + } else { + const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; + int A = 0xffff<<14; + for (i = 0; i < dstW; i++) { + int Y = (buf0[i] ) >> 2; + int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3; + int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3; + int R, G, B; + + Y -= c->yuv2rgb_y_offset; + Y *= c->yuv2rgb_y_coeff; + Y += 1 << 13; + + if (hasAlpha) { + A = abuf0[i] << 11; + + A += 1 << 13; + } + + R = V * c->yuv2rgb_v2r_coeff; + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; + B = U * c->yuv2rgb_u2b_coeff; + + output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14); + output_pixel(&dest[1], av_clip_uintp2( G + Y, 30) >> 14); + output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14); + if (eightbytes) { + output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14); + dest += 4; + } else { + dest += 3; + } + } + } +} + #undef output_pixel #undef r_b #undef b_r @@ -988,6 +1178,19 @@ YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1) YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1) YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE, 0, 1) + /* * Write out 2 RGB pixels in the target pixel format. This function takes a * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of @@ -1833,7 +2036,64 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, } #endif /* !CONFIG_SMALL */ break; - case AV_PIX_FMT_RGB24: + case AV_PIX_FMT_RGBA64LE: +#if CONFIG_SWSCALE_ALPHA + if (c->alpPixBuf) { + *yuv2packedX = yuv2rgba64le_full_X_c; + *yuv2packed2 = yuv2rgba64le_full_2_c; + *yuv2packed1 = yuv2rgba64le_full_1_c; + } else +#endif /* CONFIG_SWSCALE_ALPHA */ + { + *yuv2packedX = yuv2rgbx64le_full_X_c; + *yuv2packed2 = yuv2rgbx64le_full_2_c; + *yuv2packed1 = yuv2rgbx64le_full_1_c; + } + break; + case AV_PIX_FMT_RGBA64BE: +#if CONFIG_SWSCALE_ALPHA + if (c->alpPixBuf) { + *yuv2packedX = yuv2rgba64be_full_X_c; + *yuv2packed2 = yuv2rgba64be_full_2_c; + *yuv2packed1 = yuv2rgba64be_full_1_c; + } else +#endif /* CONFIG_SWSCALE_ALPHA */ + { + *yuv2packedX = yuv2rgbx64be_full_X_c; + *yuv2packed2 = yuv2rgbx64be_full_2_c; + *yuv2packed1 = yuv2rgbx64be_full_1_c; + } + break; + case AV_PIX_FMT_BGRA64LE: +#if CONFIG_SWSCALE_ALPHA + if (c->alpPixBuf) { + *yuv2packedX = yuv2bgra64le_full_X_c; + *yuv2packed2 = yuv2bgra64le_full_2_c; + *yuv2packed1 = yuv2bgra64le_full_1_c; + } else +#endif /* CONFIG_SWSCALE_ALPHA */ + { + *yuv2packedX = yuv2bgrx64le_full_X_c; + *yuv2packed2 = yuv2bgrx64le_full_2_c; + *yuv2packed1 = yuv2bgrx64le_full_1_c; + } + break; + case AV_PIX_FMT_BGRA64BE: +#if CONFIG_SWSCALE_ALPHA + if (c->alpPixBuf) { + *yuv2packedX = yuv2bgra64be_full_X_c; + *yuv2packed2 = yuv2bgra64be_full_2_c; + *yuv2packed1 = yuv2bgra64be_full_1_c; + } else +#endif /* CONFIG_SWSCALE_ALPHA */ + { + *yuv2packedX = yuv2bgrx64be_full_X_c; + *yuv2packed2 = yuv2bgrx64be_full_2_c; + *yuv2packed1 = yuv2bgrx64be_full_1_c; + } + break; + + case AV_PIX_FMT_RGB24: *yuv2packedX = yuv2rgb24_full_X_c; *yuv2packed2 = yuv2rgb24_full_2_c; *yuv2packed1 = yuv2rgb24_full_1_c; @@ -1843,6 +2103,26 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, *yuv2packed2 = yuv2bgr24_full_2_c; *yuv2packed1 = yuv2bgr24_full_1_c; break; + case AV_PIX_FMT_RGB48LE: + *yuv2packedX = yuv2rgb48le_full_X_c; + *yuv2packed2 = yuv2rgb48le_full_2_c; + *yuv2packed1 = yuv2rgb48le_full_1_c; + break; + case AV_PIX_FMT_BGR48LE: + *yuv2packedX = yuv2bgr48le_full_X_c; + *yuv2packed2 = yuv2bgr48le_full_2_c; + *yuv2packed1 = yuv2bgr48le_full_1_c; + break; + case AV_PIX_FMT_RGB48BE: + *yuv2packedX = yuv2rgb48be_full_X_c; + *yuv2packed2 = yuv2rgb48be_full_2_c; + *yuv2packed1 = yuv2rgb48be_full_1_c; + break; + case AV_PIX_FMT_BGR48BE: + *yuv2packedX = yuv2bgr48be_full_X_c; + *yuv2packed2 = yuv2bgr48be_full_2_c; + *yuv2packed1 = yuv2bgr48be_full_1_c; + break; case AV_PIX_FMT_BGR4_BYTE: *yuv2packedX = yuv2bgr4_byte_full_X_c; *yuv2packed2 = yuv2bgr4_byte_full_2_c; diff --git a/libswscale/utils.c b/libswscale/utils.c index de0c7f968a..074f8c0d13 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -1158,6 +1158,14 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, if (flags & SWS_FULL_CHR_H_INT && isAnyRGB(dstFormat) && !isPlanarRGB(dstFormat) && + dstFormat != AV_PIX_FMT_RGBA64LE && + dstFormat != AV_PIX_FMT_RGBA64BE && + dstFormat != AV_PIX_FMT_BGRA64LE && + dstFormat != AV_PIX_FMT_BGRA64BE && + dstFormat != AV_PIX_FMT_RGB48LE && + dstFormat != AV_PIX_FMT_RGB48BE && + dstFormat != AV_PIX_FMT_BGR48LE && + dstFormat != AV_PIX_FMT_BGR48BE && dstFormat != AV_PIX_FMT_RGBA && dstFormat != AV_PIX_FMT_ARGB && dstFormat != AV_PIX_FMT_BGRA && diff --git a/tests/ref/fate/filter-pixfmts-scale b/tests/ref/fate/filter-pixfmts-scale index 4d1cb7d812..553a4c5568 100644 --- a/tests/ref/fate/filter-pixfmts-scale +++ b/tests/ref/fate/filter-pixfmts-scale @@ -6,8 +6,8 @@ bgr0 243d58ca64f97b2f415b4c63cb79f0e1 bgr24 18744aaab4b8bce065a7144dc0ccf921 bgr444be 920760bee08c4fa161bf060e21ebba92 bgr444le 01be36a28ebca1a11eb4d192986cd4e9 -bgr48be a6fee4ac9f70d0da6a4b3a0e6353ca7f -bgr48le 9c5d30b3b31ceaf3009fc7f1cf1cf7b6 +bgr48be 3ae02769c69d2512eaa26fff65763acb +bgr48le a6ce2344f07b77438258b6787fe5c24c bgr4_byte 01efea74088e5e3343c19ee053b95f31 bgr555be ab353278d103d379e1ec86e5cabb645f bgr555le 16ccbf59297e4b9ab25fd8af5a84a95d @@ -15,8 +15,8 @@ bgr565be 3477e19fc11f95285836f30fdff26c1d bgr565le 82a81e7c9d4e0431fa22f4df9694afdc bgr8 2c57e76ccf04d51de6acafcf35d6fa70 bgra d8316272bc3a360ef9dff3ecc84520a3 -bgra64be 688499004461a2ce9debadb36dbcde5b -bgra64le c80dda435633c301e14d5b46a7edcf8d +bgra64be 4e6a1b9f9c18b881c27d76611d45f737 +bgra64le efeee0abcc658ebcff049d5e74d74943 gbrap e97ea4a104467c482173b7eaa57c14e3 gbrp dc3387f925f972c61aae7eb23cdc19f0 gbrp10be 3a6d59192b6bb89ab42252b2b4818519 @@ -39,8 +39,8 @@ rgb0 fbd27e98154efb7535826afed41e9bb0 rgb24 e022e741451e81f2ecce1c7240b93e87 rgb444be db52b9ecdf98479b693e3f4bd9e77bac rgb444le 63288425c05f146cde5c82b85bb126e0 -rgb48be c2e456838a71237cb1398ab5a7c35a6e -rgb48le 6ef772549307349c599f419313c75b7a +rgb48be 45b25016f10d54cf36eef3479afd8249 +rgb48le 40577b147620ecfb115717473d000697 rgb4_byte 9e540a2e7193ebcbf1c7f85d192a0c4e rgb555be cb5407a0d40f3d0120155daeaaa9a222 rgb555le c15540d1fc887882c35860634009c439 @@ -48,11 +48,11 @@ rgb565be c69fa7d6e458509de65e911d147629a8 rgb565le a4a6ef89cdc10282b428cb1392f2a353 rgb8 bcdc033b4ef0979d060dbc8893d4db58 rgba 85bb5d03cea1c6e8002ced3373904336 -rgba64be 21611863fbbe149416a11e95877824ac -rgba64le 35c195a441e5f8ca8e7e4ed098ecf0c1 +rgba64be ee73e57923af984b31cc7795d13929da +rgba64le 783d2779adfafe3548bdb671ec0de69e uyvy422 aeb4ba4f9f003ae21f6d18089198244f -xyz12be f6350b9a2f5add20d3d67f59c100166f -xyz12le 982935a6ea6a297fd7be8aee0fda9870 +xyz12be c7ba8345998c0141ddc079cdd29b1a40 +xyz12le 95f5d3a0de834cc495c9032a14987cde yuv410p e8f49b5fb9335b62c074f7f8bb0234fc yuv411p 5af32557c93beb482e26e7af693104c6 yuv420p 5d3ac239c3712143560b1dfbd48a7ddd