mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-04-14 00:58:38 +02:00
swscale: for >8bit scaling, read in native bit-depth.
For 9/10bit, it means we don't have to upscale to 16bit before actual scaling or pixel format conversion, and thus a performance gain.
This commit is contained in:
parent
cdc2c1c576
commit
8a8d0ce208
@ -220,7 +220,7 @@ yuv2yuvX_altivec_real(SwsContext *c,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void hScale_altivec_real(int16_t *dst, int dstW,
|
static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,
|
||||||
const uint8_t *src, const int16_t *filter,
|
const uint8_t *src, const int16_t *filter,
|
||||||
const int16_t *filterPos, int filterSize)
|
const int16_t *filterPos, int filterSize)
|
||||||
{
|
{
|
||||||
|
@ -1766,59 +1766,6 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
|
|||||||
|
|
||||||
#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
|
#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
|
||||||
|
|
||||||
// FIXME Maybe dither instead.
|
|
||||||
static av_always_inline void
|
|
||||||
yuv9_OR_10ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
|
|
||||||
const uint16_t *srcU, const uint16_t *srcV,
|
|
||||||
int width, enum PixelFormat origin, int depth)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < width; i++) {
|
|
||||||
int upx = input_pixel(&srcU[i]);
|
|
||||||
int vpx = input_pixel(&srcV[i]);
|
|
||||||
dstU[i] = (upx << (16 - depth)) | (upx >> (2 * depth - 16));
|
|
||||||
dstV[i] = (vpx << (16 - depth)) | (vpx >> (2 * depth - 16));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static av_always_inline void
|
|
||||||
yuv9_or_10ToY_c_template(uint16_t *dstY, const uint16_t *srcY,
|
|
||||||
int width, enum PixelFormat origin, int depth)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < width; i++) {
|
|
||||||
int px = input_pixel(&srcY[i]);
|
|
||||||
dstY[i] = (px << (16 - depth)) | (px >> (2 * depth - 16));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef input_pixel
|
|
||||||
|
|
||||||
#define YUV_NBPS(depth, BE_LE, origin) \
|
|
||||||
static void BE_LE ## depth ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
|
|
||||||
const uint8_t *_srcU, const uint8_t *_srcV, \
|
|
||||||
int width, uint32_t *unused) \
|
|
||||||
{ \
|
|
||||||
uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
|
|
||||||
const uint16_t *srcU = (const uint16_t *) _srcU, \
|
|
||||||
*srcV = (const uint16_t *) _srcV; \
|
|
||||||
yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
|
|
||||||
} \
|
|
||||||
static void BE_LE ## depth ## ToY_c(uint8_t *_dstY, const uint8_t *_srcY, \
|
|
||||||
int width, uint32_t *unused) \
|
|
||||||
{ \
|
|
||||||
uint16_t *dstY = (uint16_t *) _dstY; \
|
|
||||||
const uint16_t *srcY = (const uint16_t *) _srcY; \
|
|
||||||
yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
|
|
||||||
}
|
|
||||||
|
|
||||||
YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
|
|
||||||
YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
|
|
||||||
YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
|
|
||||||
YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
|
|
||||||
|
|
||||||
static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
|
static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
|
||||||
int width, uint32_t *unused)
|
int width, uint32_t *unused)
|
||||||
{
|
{
|
||||||
@ -1905,13 +1852,15 @@ static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void hScale16_c(int16_t *_dst, int dstW, const uint8_t *_src,
|
static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
|
||||||
const int16_t *filter,
|
const int16_t *filter,
|
||||||
const int16_t *filterPos, int filterSize)
|
const int16_t *filterPos, int filterSize)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int32_t *dst = (int32_t *) _dst;
|
int32_t *dst = (int32_t *) _dst;
|
||||||
const uint16_t *src = (const uint16_t *) _src;
|
const uint16_t *src = (const uint16_t *) _src;
|
||||||
|
int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
|
||||||
|
int sh = (bits <= 7) ? 11 : (bits - 4);
|
||||||
|
|
||||||
for (i = 0; i < dstW; i++) {
|
for (i = 0; i < dstW; i++) {
|
||||||
int j;
|
int j;
|
||||||
@ -1922,12 +1871,12 @@ static void hScale16_c(int16_t *_dst, int dstW, const uint8_t *_src,
|
|||||||
val += src[srcPos + j] * filter[filterSize * i + j];
|
val += src[srcPos + j] * filter[filterSize * i + j];
|
||||||
}
|
}
|
||||||
// filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
|
// filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
|
||||||
dst[i] = FFMIN(val >> 11, (1 << 19) - 1);
|
dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// bilinear / bicubic scaling
|
// bilinear / bicubic scaling
|
||||||
static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
|
static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
|
||||||
const int16_t *filter, const int16_t *filterPos,
|
const int16_t *filter, const int16_t *filterPos,
|
||||||
int filterSize)
|
int filterSize)
|
||||||
{
|
{
|
||||||
@ -2063,7 +2012,7 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!c->hyscale_fast) {
|
if (!c->hyscale_fast) {
|
||||||
c->hScale(dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
|
c->hScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
|
||||||
} else { // fast bilinear upscale / crap downscale
|
} else { // fast bilinear upscale / crap downscale
|
||||||
c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
|
c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
|
||||||
}
|
}
|
||||||
@ -2113,8 +2062,8 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!c->hcscale_fast) {
|
if (!c->hcscale_fast) {
|
||||||
c->hScale(dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
|
c->hScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
|
||||||
c->hScale(dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
|
c->hScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
|
||||||
} else { // fast bilinear upscale / crap downscale
|
} else { // fast bilinear upscale / crap downscale
|
||||||
c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
|
c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
|
||||||
}
|
}
|
||||||
@ -2645,21 +2594,21 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
|
|||||||
case PIX_FMT_PAL8 :
|
case PIX_FMT_PAL8 :
|
||||||
case PIX_FMT_BGR4_BYTE:
|
case PIX_FMT_BGR4_BYTE:
|
||||||
case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
|
case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
|
||||||
|
#if HAVE_BIGENDIAN
|
||||||
case PIX_FMT_YUV444P9LE:
|
case PIX_FMT_YUV444P9LE:
|
||||||
case PIX_FMT_YUV420P9LE: c->chrToYV12 = LE9ToUV_c; break;
|
case PIX_FMT_YUV420P9LE:
|
||||||
case PIX_FMT_YUV422P10LE:
|
case PIX_FMT_YUV422P10LE:
|
||||||
case PIX_FMT_YUV444P10LE:
|
case PIX_FMT_YUV444P10LE:
|
||||||
case PIX_FMT_YUV420P10LE: c->chrToYV12 = LE10ToUV_c; break;
|
case PIX_FMT_YUV420P10LE:
|
||||||
case PIX_FMT_YUV444P9BE:
|
|
||||||
case PIX_FMT_YUV420P9BE: c->chrToYV12 = BE9ToUV_c; break;
|
|
||||||
case PIX_FMT_YUV444P10BE:
|
|
||||||
case PIX_FMT_YUV422P10BE:
|
|
||||||
case PIX_FMT_YUV420P10BE: c->chrToYV12 = BE10ToUV_c; break;
|
|
||||||
#if HAVE_BIGENDIAN
|
|
||||||
case PIX_FMT_YUV420P16LE:
|
case PIX_FMT_YUV420P16LE:
|
||||||
case PIX_FMT_YUV422P16LE:
|
case PIX_FMT_YUV422P16LE:
|
||||||
case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
|
case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
|
||||||
#else
|
#else
|
||||||
|
case PIX_FMT_YUV444P9BE:
|
||||||
|
case PIX_FMT_YUV420P9BE:
|
||||||
|
case PIX_FMT_YUV444P10BE:
|
||||||
|
case PIX_FMT_YUV422P10BE:
|
||||||
|
case PIX_FMT_YUV420P10BE:
|
||||||
case PIX_FMT_YUV420P16BE:
|
case PIX_FMT_YUV420P16BE:
|
||||||
case PIX_FMT_YUV422P16BE:
|
case PIX_FMT_YUV422P16BE:
|
||||||
case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
|
case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
|
||||||
@ -2712,22 +2661,22 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
|
|||||||
c->lumToYV12 = NULL;
|
c->lumToYV12 = NULL;
|
||||||
c->alpToYV12 = NULL;
|
c->alpToYV12 = NULL;
|
||||||
switch (srcFormat) {
|
switch (srcFormat) {
|
||||||
|
#if HAVE_BIGENDIAN
|
||||||
case PIX_FMT_YUV444P9LE:
|
case PIX_FMT_YUV444P9LE:
|
||||||
case PIX_FMT_YUV420P9LE: c->lumToYV12 = LE9ToY_c; break;
|
case PIX_FMT_YUV420P9LE:
|
||||||
case PIX_FMT_YUV444P10LE:
|
case PIX_FMT_YUV444P10LE:
|
||||||
case PIX_FMT_YUV422P10LE:
|
case PIX_FMT_YUV422P10LE:
|
||||||
case PIX_FMT_YUV420P10LE: c->lumToYV12 = LE10ToY_c; break;
|
case PIX_FMT_YUV420P10LE:
|
||||||
case PIX_FMT_YUV444P9BE:
|
|
||||||
case PIX_FMT_YUV420P9BE: c->lumToYV12 = BE9ToY_c; break;
|
|
||||||
case PIX_FMT_YUV444P10BE:
|
|
||||||
case PIX_FMT_YUV422P10BE:
|
|
||||||
case PIX_FMT_YUV420P10BE: c->lumToYV12 = BE10ToY_c; break;
|
|
||||||
#if HAVE_BIGENDIAN
|
|
||||||
case PIX_FMT_YUV420P16LE:
|
case PIX_FMT_YUV420P16LE:
|
||||||
case PIX_FMT_YUV422P16LE:
|
case PIX_FMT_YUV422P16LE:
|
||||||
case PIX_FMT_YUV444P16LE:
|
case PIX_FMT_YUV444P16LE:
|
||||||
case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
|
case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
|
||||||
#else
|
#else
|
||||||
|
case PIX_FMT_YUV444P9BE:
|
||||||
|
case PIX_FMT_YUV420P9BE:
|
||||||
|
case PIX_FMT_YUV444P10BE:
|
||||||
|
case PIX_FMT_YUV422P10BE:
|
||||||
|
case PIX_FMT_YUV420P10BE:
|
||||||
case PIX_FMT_YUV420P16BE:
|
case PIX_FMT_YUV420P16BE:
|
||||||
case PIX_FMT_YUV422P16BE:
|
case PIX_FMT_YUV422P16BE:
|
||||||
case PIX_FMT_YUV444P16BE:
|
case PIX_FMT_YUV444P16BE:
|
||||||
|
@ -440,7 +440,7 @@ typedef struct SwsContext {
|
|||||||
* (and input coefficients thus padded with zeroes)
|
* (and input coefficients thus padded with zeroes)
|
||||||
* to simplify creating SIMD code.
|
* to simplify creating SIMD code.
|
||||||
*/
|
*/
|
||||||
void (*hScale)(int16_t *dst, int dstW, const uint8_t *src,
|
void (*hScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
|
||||||
const int16_t *filter, const int16_t *filterPos,
|
const int16_t *filter, const int16_t *filterPos,
|
||||||
int filterSize);
|
int filterSize);
|
||||||
|
|
||||||
|
@ -877,7 +877,6 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME it's even nicer if bpp isn't 16, but max({src,dst}formatbpp)
|
|
||||||
c->scalingBpp = FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1,
|
c->scalingBpp = FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1,
|
||||||
av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1) >= 8 ? 16 : 8;
|
av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1) >= 8 ? 16 : 8;
|
||||||
if (c->scalingBpp == 16)
|
if (c->scalingBpp == 16)
|
||||||
|
@ -1859,7 +1859,7 @@ static void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV,
|
|||||||
|
|
||||||
#if !COMPILE_TEMPLATE_MMX2
|
#if !COMPILE_TEMPLATE_MMX2
|
||||||
// bilinear / bicubic scaling
|
// bilinear / bicubic scaling
|
||||||
static void RENAME(hScale)(int16_t *dst, int dstW,
|
static void RENAME(hScale)(SwsContext *c, int16_t *dst, int dstW,
|
||||||
const uint8_t *src, const int16_t *filter,
|
const uint8_t *src, const int16_t *filter,
|
||||||
const int16_t *filterPos, int filterSize)
|
const int16_t *filterPos, int filterSize)
|
||||||
{
|
{
|
||||||
|
@ -31,12 +31,12 @@ uyvy422 314bd486277111a95d9369b944fa0400
|
|||||||
yuv410p 7df8f6d69b56a8dcb6c7ee908e5018b5
|
yuv410p 7df8f6d69b56a8dcb6c7ee908e5018b5
|
||||||
yuv411p 1143e7c5cc28fe0922b051b17733bc4c
|
yuv411p 1143e7c5cc28fe0922b051b17733bc4c
|
||||||
yuv420p fdad2d8df8985e3d17e73c71f713cb14
|
yuv420p fdad2d8df8985e3d17e73c71f713cb14
|
||||||
yuv420p10be 8b5ad855229840a8fa87786cab83f856
|
yuv420p10be 2343beaf83fccc2ab23a590b2049d38b
|
||||||
yuv420p10le 5264e87921d47b17411578d9c92672b1
|
yuv420p10le 94d511d783d175f573e7be5cce75ba4d
|
||||||
yuv420p16be f6ef3ba90f238b467c7e72ade927083d
|
yuv420p16be f6ef3ba90f238b467c7e72ade927083d
|
||||||
yuv420p16le faf6aab3b1c16e8afbe160686dd360e0
|
yuv420p16le faf6aab3b1c16e8afbe160686dd360e0
|
||||||
yuv420p9be c14bf746d161face61e1f39b491bf7ef
|
yuv420p9be fdafb9ad473a559246c4cb0a1f416cd8
|
||||||
yuv420p9le 59457f9a51768bf1d4342238d50a9be3
|
yuv420p9le fccfd3c3941da635b13739f579819b5a
|
||||||
yuv422p 918e37701ee7377d16a8a6c119c56a40
|
yuv422p 918e37701ee7377d16a8a6c119c56a40
|
||||||
yuv422p16be 837945d3a771366a5a72a4ed095a4f53
|
yuv422p16be 837945d3a771366a5a72a4ed095a4f53
|
||||||
yuv422p16le b8292ae9b52eb7afc3d8b93e8fd895b4
|
yuv422p16le b8292ae9b52eb7afc3d8b93e8fd895b4
|
||||||
|
Loading…
x
Reference in New Issue
Block a user