diff --git a/libavcodec/intelh263dec.c b/libavcodec/intelh263dec.c index d5a644ee7c..a011a9f597 100644 --- a/libavcodec/intelh263dec.c +++ b/libavcodec/intelh263dec.c @@ -67,9 +67,9 @@ int ff_intel_h263_decode_picture_header(MpegEncContext *s) if (format < 6) { s->width = h263_format[format][0]; s->height = h263_format[format][1]; - s->avctx->sample_aspect_ratio.num=12;s->avctx->sample_aspect_ratio.den=11; - } - else { + s->avctx->sample_aspect_ratio.num = 12; + s->avctx->sample_aspect_ratio.den = 11; + } else { format = get_bits(&s->gb, 3); if(format == 0 || format == 7){ av_log(s->avctx, AV_LOG_ERROR, "Wrong Intel H263 format\n"); @@ -95,12 +95,11 @@ int ff_intel_h263_decode_picture_header(MpegEncContext *s) if(ar == 15){ s->avctx->sample_aspect_ratio.num = get_bits(&s->gb, 8); // aspect ratio - width s->avctx->sample_aspect_ratio.den = get_bits(&s->gb, 8); // aspect ratio - height - } - else { + } else { s->avctx->sample_aspect_ratio = ff_h263_pixel_aspect[ar]; } - if(s->avctx->sample_aspect_ratio.num == 0) - av_log(s->avctx, AV_LOG_ERROR, "Invalid aspect ratio\n"); + if (s->avctx->sample_aspect_ratio.num == 0) + av_log(s->avctx, AV_LOG_ERROR, "Invalid aspect ratio.\n"); } s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); diff --git a/libavformat/mov.c b/libavformat/mov.c index b987f0bc20..00330a6f27 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -513,6 +513,19 @@ static int mov_read_dac3(MOVContext *c, AVIOContext *pb, MOVAtom atom) return 0; } +static int mov_read_wfex(MOVContext *c, AVIOContext *pb, MOVAtom atom) +{ + AVStream *st; + + if (c->fc->nb_streams < 1) + return 0; + st = c->fc->streams[c->fc->nb_streams-1]; + + ff_get_wav_header(pb, st->codec, atom.size); + + return 0; +} + static int mov_read_pasp(MOVContext *c, AVIOContext *pb, MOVAtom atom) { const int num = avio_rb32(pb); @@ -2261,6 +2274,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = { { MKTAG('e','s','d','s'), mov_read_esds }, { MKTAG('d','a','c','3'), mov_read_dac3 }, /* AC-3 info */ { MKTAG('w','i','d','e'), mov_read_wide }, /* place holder */ +{ MKTAG('w','f','e','x'), mov_read_wfex }, { MKTAG('c','m','o','v'), mov_read_cmov }, { MKTAG('c','h','a','n'), mov_read_chan }, { 0, NULL } diff --git a/libavformat/riff.c b/libavformat/riff.c index 4b3ab60bb7..544c29f116 100644 --- a/libavformat/riff.c +++ b/libavformat/riff.c @@ -537,6 +537,7 @@ int ff_get_wav_header(AVIOContext *pb, AVCodecContext *codec, int size) } codec->extradata_size = cbSize; if (cbSize > 0) { + av_free(codec->extradata); codec->extradata = av_mallocz(codec->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE); if (!codec->extradata) return AVERROR(ENOMEM); diff --git a/libswscale/swscale.c b/libswscale/swscale.c index f20eff0cd1..8b166c2db8 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -380,14 +380,14 @@ yuv2NBPS(10, LE, 0); yuv2NBPS(16, BE, 1); yuv2NBPS(16, LE, 0); -static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW, - const uint8_t *lumDither, const uint8_t *chrDither) +static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { //FIXME Optimize (just quickly written not optimized..) int i; @@ -423,16 +423,44 @@ static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, aDest[i]= av_clip_uint8(val>>19); } - } -static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, - uint8_t *vDest, uint8_t *aDest, - int dstW, int chrDstW, - const uint8_t *lumDither, const uint8_t *chrDither) +static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc, + const int16_t *chrUSrc, const int16_t *chrVSrc, + const int16_t *alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither) +{ + int i; + + for (i=0; i>7; + dest[i]= av_clip_uint8(val); + } + + if (uDest) + for (i=0; i>7; + int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7; + uDest[i]= av_clip_uint8(u); + vDest[i]= av_clip_uint8(v); + } + + if (CONFIG_SWSCALE_ALPHA && aDest) + for (i=0; i>7; + aDest[i]= av_clip_uint8(val); + } +} + +static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, + uint8_t *vDest, uint8_t *aDest, + int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { enum PixelFormat dstFormat = c->dstFormat; @@ -478,7 +506,225 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const in } } -#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \ +static av_always_inline void +yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, int dstW, + int y, enum PixelFormat target) +{ + int i; + +#define output_pixel(pos, val) \ + if (target == PIX_FMT_GRAY16BE) { \ + AV_WB16(pos, val); \ + } else { \ + AV_WL16(pos, val); \ + } + for (i = 0; i < (dstW >> 1); i++) { + int j; + int Y1 = 1 << 18; + int Y2 = 1 << 18; + const int i2 = 2 * i; + + for (j = 0; j < lumFilterSize; j++) { + Y1 += lumSrc[j][i2] * lumFilter[j]; + Y2 += lumSrc[j][i2+1] * lumFilter[j]; + } + Y1 >>= 11; + Y2 >>= 11; + if ((Y1 | Y2) & 0x10000) { + Y1 = av_clip_uint16(Y1); + Y2 = av_clip_uint16(Y2); + } + output_pixel(&dest[2 * i2 + 0], Y1); + output_pixel(&dest[2 * i2 + 2], Y2); + } +} + +static av_always_inline void +yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, int dstW, + int yalpha, int uvalpha, int y, + enum PixelFormat target) +{ + int yalpha1 = 4095 - yalpha; \ + int i; + + for (i = 0; i < (dstW >> 1); i++) { + const int i2 = 2 * i; + int Y1 = (buf0[i2 ] * yalpha1 + buf1[i2 ] * yalpha) >> 11; + int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11; + + output_pixel(&dest[2 * i2 + 0], Y1); + output_pixel(&dest[2 * i2 + 2], Y2); + } +} + +static av_always_inline void +yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, enum PixelFormat dstFormat, + int flags, int y, enum PixelFormat target) +{ + int i; + + for (i = 0; i < (dstW >> 1); i++) { + const int i2 = 2 * i; + int Y1 = buf0[i2 ] << 1; + int Y2 = buf0[i2+1] << 1; + + output_pixel(&dest[2 * i2 + 0], Y1); + output_pixel(&dest[2 * i2 + 2], Y2); + } +#undef output_pixel +} + +#define YUV2PACKEDWRAPPER(name, ext, fmt) \ +static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ + const int16_t **lumSrc, int lumFilterSize, \ + const int16_t *chrFilter, const int16_t **chrUSrc, \ + const int16_t **chrVSrc, int chrFilterSize, \ + const int16_t **alpSrc, uint8_t *dest, int dstW, \ + int y) \ +{ \ + name ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ + chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ + alpSrc, dest, dstW, y, fmt); \ +} \ + \ +static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \ + const uint16_t *buf1, const uint16_t *ubuf0, \ + const uint16_t *ubuf1, const uint16_t *vbuf0, \ + const uint16_t *vbuf1, const uint16_t *abuf0, \ + const uint16_t *abuf1, uint8_t *dest, int dstW, \ + int yalpha, int uvalpha, int y) \ +{ \ + name ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \ + vbuf0, vbuf1, abuf0, abuf1, \ + dest, dstW, yalpha, uvalpha, y, fmt); \ +} \ + \ +static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \ + const uint16_t *ubuf0, const uint16_t *ubuf1, \ + const uint16_t *vbuf0, const uint16_t *vbuf1, \ + const uint16_t *abuf0, uint8_t *dest, int dstW, \ + int uvalpha, enum PixelFormat dstFormat, \ + int flags, int y) \ +{ \ + name ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \ + vbuf1, abuf0, dest, dstW, uvalpha, \ + dstFormat, flags, y, fmt); \ +} + +YUV2PACKEDWRAPPER(yuv2gray16, LE, PIX_FMT_GRAY16LE); +YUV2PACKEDWRAPPER(yuv2gray16, BE, PIX_FMT_GRAY16BE); + +static av_always_inline void +yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, int dstW, + int y, enum PixelFormat target) +{ + const uint8_t * const d128=dither_8x8_220[y&7]; + uint8_t *g = c->table_gU[128] + c->table_gV[128]; + int i; + int acc = 0; + +#define output_pixel(pos, acc) \ + if (target == PIX_FMT_MONOBLACK) { \ + pos = acc; \ + } else { \ + pos = ~acc; \ + } + for (i = 0; i < dstW - 1; i += 2) { + int j; + int Y1 = 1 << 18; + int Y2 = 1 << 18; + + for (j = 0; j < lumFilterSize; j++) { + Y1 += lumSrc[j][i] * lumFilter[j]; + Y2 += lumSrc[j][i+1] * lumFilter[j]; + } + Y1 >>= 19; + Y2 >>= 19; + if ((Y1 | Y2) & 0x100) { + Y1 = av_clip_uint8(Y1); + Y2 = av_clip_uint8(Y2); + } + acc += acc + g[Y1 + d128[(i + 0) & 7]]; + acc += acc + g[Y2 + d128[(i + 1) & 7]]; + if ((i & 7) == 6) { + output_pixel(*dest++, acc); + } + } +} + +static av_always_inline void +yuv2mono_2_c_template(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, int dstW, + int yalpha, int uvalpha, int y, + enum PixelFormat target) +{ + const uint8_t * const d128 = dither_8x8_220[y & 7]; + uint8_t *g = c->table_gU[128] + c->table_gV[128]; + int yalpha1 = 4095 - yalpha; + int i; + + for (i = 0; i < dstW - 7; i += 8) { + int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]]; + acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]]; + acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]]; + acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]]; + acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]]; + acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]]; + acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]]; + acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]]; + output_pixel(*dest++, acc); + } +} + +static av_always_inline void +yuv2mono_1_c_template(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, enum PixelFormat dstFormat, + int flags, int y, enum PixelFormat target) +{ + const uint8_t * const d128 = dither_8x8_220[y & 7]; + uint8_t *g = c->table_gU[128] + c->table_gV[128]; + int i; + + for (i = 0; i < dstW - 7; i += 8) { + int acc = g[(buf0[i ] >> 7) + d128[0]]; + acc += acc + g[(buf0[i + 1] >> 7) + d128[1]]; + acc += acc + g[(buf0[i + 2] >> 7) + d128[2]]; + acc += acc + g[(buf0[i + 3] >> 7) + d128[3]]; + acc += acc + g[(buf0[i + 4] >> 7) + d128[4]]; + acc += acc + g[(buf0[i + 5] >> 7) + d128[5]]; + acc += acc + g[(buf0[i + 6] >> 7) + d128[6]]; + acc += acc + g[(buf0[i + 7] >> 7) + d128[7]]; + output_pixel(*dest++, acc); + } +#undef output_pixel +} + +YUV2PACKEDWRAPPER(yuv2mono, white, PIX_FMT_MONOWHITE); +YUV2PACKEDWRAPPER(yuv2mono, black, PIX_FMT_MONOBLACK); + +#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \ for (i=0; i<(dstW>>1); i++) {\ int j;\ int Y1 = 1<<18;\ @@ -501,6 +747,12 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const in Y2>>=19;\ U >>=19;\ V >>=19;\ + if ((Y1|Y2|U|V)&0x100) {\ + Y1 = av_clip_uint8(Y1); \ + Y2 = av_clip_uint8(Y2); \ + U = av_clip_uint8(U); \ + V = av_clip_uint8(V); \ + }\ if (alpha) {\ A1 = 1<<18;\ A2 = 1<<18;\ @@ -510,26 +762,13 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const in }\ A1>>=19;\ A2>>=19;\ + if ((A1|A2)&0x100) {\ + A1 = av_clip_uint8(A1); \ + A2 = av_clip_uint8(A2); \ + }\ } -#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \ - YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\ - if ((Y1|Y2|U|V)&256) {\ - if (Y1>255) Y1=255; \ - else if (Y1<0)Y1=0; \ - if (Y2>255) Y2=255; \ - else if (Y2<0)Y2=0; \ - if (U>255) U=255; \ - else if (U<0) U=0; \ - if (V>255) V=255; \ - else if (V<0) V=0; \ - }\ - if (alpha && ((A1|A2)&256)) {\ - A1=av_clip_uint8(A1);\ - A2=av_clip_uint8(A2);\ - } - -#define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \ +#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \ for (i=0; i>=10;\ V >>=10;\ if (alpha) {\ - A = rnd;\ + A = rnd>>3;\ for (j=0; j>=19;\ - if (A&256)\ + if (A&0x100)\ A = av_clip_uint8(A);\ - } - -#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \ - YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\ + }\ Y-= c->yuv2rgb_y_offset;\ Y*= c->yuv2rgb_y_coeff;\ Y+= rnd;\ @@ -566,35 +802,9 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const in G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\ B= Y + U*c->yuv2rgb_u2b_coeff;\ if ((R|G|B)&(0xC0000000)) {\ - if (R>=(256<<22)) R=(256<<22)-1; \ - else if (R<0)R=0; \ - if (G>=(256<<22)) G=(256<<22)-1; \ - else if (G<0)G=0; \ - if (B>=(256<<22)) B=(256<<22)-1; \ - else if (B<0)B=0; \ - } - -#define YSCALE_YUV_2_GRAY16_C \ - for (i=0; i<(dstW>>1); i++) {\ - int j;\ - int Y1 = 1<<18;\ - int Y2 = 1<<18;\ - int U = 1<<18;\ - int V = 1<<18;\ - \ - const int i2= 2*i;\ - \ - for (j=0; j>=11;\ - Y2>>=11;\ - if ((Y1|Y2|U|V)&65536) {\ - if (Y1>65535) Y1=65535; \ - else if (Y1<0)Y1=0; \ - if (Y2>65535) Y2=65535; \ - else if (Y2<0)Y2=0; \ + R = av_clip_uintp2(R, 30); \ + G = av_clip_uintp2(G, 30); \ + B = av_clip_uintp2(B, 30); \ } #define YSCALE_YUV_2_RGBX_C(type,alpha) \ @@ -617,12 +827,6 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const in A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19; \ } -#define YSCALE_YUV_2_GRAY16_2_C \ - for (i=0; i<(dstW>>1); i++) { \ - const int i2= 2*i; \ - int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \ - int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11; - #define YSCALE_YUV_2_RGB2_C(type,alpha) \ YSCALE_YUV_2_PACKED2_C(type,alpha)\ r = (type *)c->table_rV[V];\ @@ -643,12 +847,6 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const in A2= abuf0[i2+1]>>7;\ } -#define YSCALE_YUV_2_GRAY16_1_C \ - for (i=0; i<(dstW>>1); i++) {\ - const int i2= 2*i;\ - int Y1= buf0[i2 ]<<1;\ - int Y2= buf0[i2+1]<<1; - #define YSCALE_YUV_2_RGB1_C(type,alpha) \ YSCALE_YUV_2_PACKED1_C(type,alpha)\ r = (type *)c->table_rV[V];\ @@ -675,53 +873,7 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const in g = (type *)(c->table_gU[U] + c->table_gV[V]);\ b = (type *)c->table_bU[U]; -#define YSCALE_YUV_2_MONO2_C \ - const uint8_t * const d128=dither_8x8_220[y&7];\ - uint8_t *g= c->table_gU[128] + c->table_gV[128];\ - for (i=0; i>19) + d128[0]];\ - acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\ - acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\ - acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\ - acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\ - acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\ - acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\ - acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\ - ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\ - dest++;\ - } - -#define YSCALE_YUV_2_MONOX_C \ - const uint8_t * const d128=dither_8x8_220[y&7];\ - uint8_t *g= c->table_gU[128] + c->table_gV[128];\ - int acc=0;\ - for (i=0; i>=19;\ - Y2>>=19;\ - if ((Y1|Y2)&256) {\ - if (Y1>255) Y1=255;\ - else if (Y1<0)Y1=0;\ - if (Y2>255) Y2=255;\ - else if (Y2<0)Y2=0;\ - }\ - acc+= acc + g[Y1+d128[(i+0)&7]];\ - acc+= acc + g[Y2+d128[(i+1)&7]];\ - if ((i&7)==6) {\ - ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\ - dest++;\ - }\ - } - -#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\ +#define YSCALE_YUV_2_ANYRGB_C(func, func2)\ switch(c->dstFormat) {\ case PIX_FMT_RGB48BE:\ case PIX_FMT_RGB48LE:\ @@ -819,10 +971,8 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const in dest+=6;\ }\ break;\ - case PIX_FMT_RGB565BE:\ - case PIX_FMT_RGB565LE:\ - case PIX_FMT_BGR565BE:\ - case PIX_FMT_BGR565LE:\ + case PIX_FMT_RGB565:\ + case PIX_FMT_BGR565:\ {\ const int dr1= dither_2x2_8[y&1 ][0];\ const int dg1= dither_2x2_4[y&1 ][0];\ @@ -836,10 +986,8 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const in }\ }\ break;\ - case PIX_FMT_RGB555BE:\ - case PIX_FMT_RGB555LE:\ - case PIX_FMT_BGR555BE:\ - case PIX_FMT_BGR555LE:\ + case PIX_FMT_RGB555:\ + case PIX_FMT_BGR555:\ {\ const int dr1= dither_2x2_8[y&1 ][0];\ const int dg1= dither_2x2_8[y&1 ][1];\ @@ -853,10 +1001,8 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const in }\ }\ break;\ - case PIX_FMT_RGB444BE:\ - case PIX_FMT_RGB444LE:\ - case PIX_FMT_BGR444BE:\ - case PIX_FMT_BGR444LE:\ + case PIX_FMT_RGB444:\ + case PIX_FMT_BGR444:\ {\ const int dr1= dither_4x4_16[y&3 ][0];\ const int dg1= dither_4x4_16[y&3 ][1];\ @@ -903,12 +1049,6 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const in }\ }\ break;\ - case PIX_FMT_MONOBLACK:\ - case PIX_FMT_MONOWHITE:\ - {\ - func_monoblack\ - }\ - break;\ case PIX_FMT_YUYV422:\ func2\ ((uint8_t*)dest)[2*i2+0]= Y1;\ @@ -925,22 +1065,6 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const in ((uint8_t*)dest)[2*i2+3]= Y2;\ } \ break;\ - case PIX_FMT_GRAY16BE:\ - func_g16\ - ((uint8_t*)dest)[2*i2+0]= Y1>>8;\ - ((uint8_t*)dest)[2*i2+1]= Y1;\ - ((uint8_t*)dest)[2*i2+2]= Y2>>8;\ - ((uint8_t*)dest)[2*i2+3]= Y2;\ - } \ - break;\ - case PIX_FMT_GRAY16LE:\ - func_g16\ - ((uint8_t*)dest)[2*i2+0]= Y1;\ - ((uint8_t*)dest)[2*i2+1]= Y1>>8;\ - ((uint8_t*)dest)[2*i2+2]= Y2;\ - ((uint8_t*)dest)[2*i2+3]= Y2>>8;\ - } \ - break;\ } static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter, @@ -950,14 +1074,14 @@ static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter, const int16_t **alpSrc, uint8_t *dest, int dstW, int y) { int i; - YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C) + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0)) } -static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, int dstW, int y) +static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, int chrFilterSize, + const int16_t **alpSrc, uint8_t *dest, int dstW, int y) { int i; int step= c->dstFormatBpp/8; @@ -1039,7 +1163,45 @@ static inline void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter, } } -static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val) +/** + * vertical bilinear scale YV12 to RGB + */ +static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, int dstW, + int yalpha, int uvalpha, int y) +{ + int yalpha1=4095- yalpha; + int uvalpha1=4095-uvalpha; + int i; + + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0)) +} + +/** + * YV12 to RGB without scaling or interpolating + */ +static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, int dstW, + int uvalpha, enum PixelFormat dstFormat, + int flags, int y) +{ + int i; + + if (uvalpha < 2048) { + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0)) + } else { + YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0)) + } +} + +static av_always_inline void fillPlane(uint8_t* plane, int stride, + int width, int height, + int y, uint8_t val) { int i; uint8_t *ptr = plane + stride*y; @@ -1049,77 +1211,206 @@ static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, } } -#define RGB48(name, R, B, READ)\ -static inline void name ## ToY_c(int16_t *dst, const uint16_t *src, int width, uint32_t *unused)\ -{\ - int i;\ - for (i = 0; i < width; i++) {\ - int r = READ(&src[i*3+R]);\ - int g = READ(&src[i*3+1]);\ - int b = READ(&src[i*3+B]);\ -\ - dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);\ - }\ -}\ -\ -static inline void name ## ToUV_c(int16_t *dstU, int16_t *dstV,\ - const uint16_t *src1, const uint16_t *src2,\ - int width, uint32_t *unused)\ -{\ - int i;\ - assert(src1==src2);\ - for (i = 0; i < width; i++) {\ - int r = READ(&src1[3*i + R]);\ - int g = READ(&src1[3*i + 1]);\ - int b = READ(&src1[3*i + B]);\ -\ - dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);\ - dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);\ - }\ -}\ -\ -static inline void name ## ToUV_half_c(int16_t *dstU, int16_t *dstV,\ - const uint16_t *src1, const uint16_t *src2,\ - int width, uint32_t *unused)\ -{\ - int i;\ - assert(src1==src2);\ - for (i = 0; i < width; i++) {\ - int r= READ(&src1[6*i + R]) + READ(&src1[6*i + 3+R]);\ - int g= READ(&src1[6*i + 1]) + READ(&src1[6*i + 4]);\ - int b= READ(&src1[6*i + B]) + READ(&src1[6*i + 3+B]);\ -\ - dstU[i]= (RU*r + GU*g + BU*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8);\ - dstV[i]= (RV*r + GV*g + BV*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8);\ - }\ +static av_always_inline void +rgb48ToY_c_template(int16_t *dst, const uint16_t *src, int width, + enum PixelFormat origin) +{ + int i; + for (i = 0; i < width; i++) { +#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) + int a = input_pixel(&src[i*3+0]); + int g = input_pixel(&src[i*3+1]); + int c = input_pixel(&src[i*3+2]); + +#define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? c : a) +#define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? a : c) + dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8); + } } -RGB48(rgb48LE, 0, 2, AV_RL16) -RGB48(rgb48BE, 0, 2, AV_RB16) -RGB48(bgr48LE, 2, 0, AV_RL16) -RGB48(bgr48BE, 2, 0, AV_RB16) +static av_always_inline void +rgb48ToUV_c_template(int16_t *dstU, int16_t *dstV, + const uint16_t *src1, const uint16_t *src2, + int width, enum PixelFormat origin) +{ + int i; + assert(src1==src2); + for (i = 0; i < width; i++) { + int a = input_pixel(&src1[3*i + 0]); + int g = input_pixel(&src1[3*i + 1]); + int c = input_pixel(&src1[3*i + 2]); -#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\ -static void name ## _c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)\ -{\ - int i;\ - for (i=0; i>shb)&maskb;\ - int g= (((const type*)src)[i]>>shg)&maskg;\ - int r= (((const type*)src)[i]>>shr)&maskr;\ -\ - dst[i]= (((RY)*r + (GY)*g + (BY)*b + (32<<((S)-1)) + (1<<(S-7)))>>((S)-6));\ - }\ + dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8); + dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8); + } } -BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8) -BGR2Y(uint32_t,bgr321ToY,16,16, 0, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8) -BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY , BY<< 8, RGB2YUV_SHIFT+8) -BGR2Y(uint32_t,rgb321ToY, 0,16,16, 0xFF00, 0x00FF, 0xFF00, RY , GY<<8, BY , RGB2YUV_SHIFT+8) -BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY , RGB2YUV_SHIFT+8) -BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY , RGB2YUV_SHIFT+7) -BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8) -BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7) +static av_always_inline void +rgb48ToUV_half_c_template(int16_t *dstU, int16_t *dstV, + const uint16_t *src1, const uint16_t *src2, + int width, enum PixelFormat origin) +{ + int i; + assert(src1==src2); + for (i = 0; i < width; i++) { + int a = (input_pixel(&src1[6*i + 0])) + (input_pixel(&src1[6*i + 3])); + int g = (input_pixel(&src1[6*i + 1])) + (input_pixel(&src1[6*i + 4])); + int c = (input_pixel(&src1[6*i + 2])) + (input_pixel(&src1[6*i + 5])); + + dstU[i]= (RU*r + GU*g + BU*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8); + dstV[i]= (RV*r + GV*g + BV*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8); + } +#undef r +#undef b +#undef input_pixel +} + +#define rgb48funcs(pattern, BE_LE, origin) \ +static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *dst, const uint8_t *src, \ + int width, uint32_t *unused) \ +{ \ + rgb48ToY_c_template(dst, src, width, origin); \ +} \ + \ +static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *src1, const uint8_t *src2, \ + int width, uint32_t *unused) \ +{ \ + rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \ +} \ + \ +static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *src1, const uint8_t *src2, \ + int width, uint32_t *unused) \ +{ \ + rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \ +} + +rgb48funcs(rgb, LE, PIX_FMT_RGB48LE); +rgb48funcs(rgb, BE, PIX_FMT_RGB48BE); +rgb48funcs(bgr, LE, PIX_FMT_BGR48LE); +rgb48funcs(bgr, BE, PIX_FMT_BGR48BE); + +static av_always_inline void +rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src, + int width, enum PixelFormat origin, + int shr, int shg, int shb, int shp, + int maskr, int maskg, int maskb, + int rsh, int gsh, int bsh, int S) +{ + const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh, + rnd = (32<<((S)-1)) + (1<<(S-7)); + int i; + + for (i = 0; i < width; i++) { +#define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \ + origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \ + (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2]))) + int px = input_pixel(i) >> shp; + int b = (px & maskb) >> shb; + int g = (px & maskg) >> shg; + int r = (px & maskr) >> shr; + + dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6); + } +} + +static av_always_inline void +rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV, + const uint8_t *src, int width, + enum PixelFormat origin, + int shr, int shg, int shb, int shp, + int maskr, int maskg, int maskb, + int rsh, int gsh, int bsh, int S) +{ + const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, + rv = RV << rsh, gv = GV << gsh, bv = BV << bsh, + rnd = (256<<((S)-1)) + (1<<(S-7)); + int i; + + for (i = 0; i < width; i++) { + int px = input_pixel(i) >> shp; + int b = (px & maskb) >> shb; + int g = (px & maskg) >> shg; + int r = (px & maskr) >> shr; + + dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6); + dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6); + } +} + +static av_always_inline void +rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV, + const uint8_t *src, int width, + enum PixelFormat origin, + int shr, int shg, int shb, int shp, + int maskr, int maskg, int maskb, + int rsh, int gsh, int bsh, int S) +{ + const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, + rv = RV << rsh, gv = GV << gsh, bv = BV << bsh, + rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb); + int i; + + maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1; + for (i = 0; i < width; i++) { + int px0 = input_pixel(2 * i + 0) >> shp; + int px1 = input_pixel(2 * i + 1) >> shp; + int b, r, g = (px0 & maskgx) + (px1 & maskgx); + int rb = px0 + px1 - g; + + b = (rb & maskb) >> shb; + if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE || + origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) { + g >>= shg; + } else { + g = (g & maskg) >> shg; + } + r = (rb & maskr) >> shr; + + dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1); + dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1); + } +#undef input_pixel +} + +#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \ + maskg, maskb, rsh, gsh, bsh, S) \ +static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \ + int width, uint32_t *unused) \ +{ \ + rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \ + maskr, maskg, maskb, rsh, gsh, bsh, S); \ +} \ + \ +static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *src, const uint8_t *dummy, \ + int width, uint32_t *unused) \ +{ \ + rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \ + maskr, maskg, maskb, rsh, gsh, bsh, S); \ +} \ + \ +static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *src, const uint8_t *dummy, \ + int width, uint32_t *unused) \ +{ \ + rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \ + maskr, maskg, maskb, rsh, gsh, bsh, S); \ +} + +rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7); +rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7); +rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7); +rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8); +rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7); static void abgrToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) { @@ -1137,46 +1428,6 @@ static void rgbaToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unu } } -#define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \ -static void name ## _c(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, int width, uint32_t *unused)\ -{\ - int i;\ - for (i=0; i>shp)&maskb)>>shb;\ - int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\ - int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\ -\ - dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (256<<((S)-1)) + (1<<(S-7)))>>((S)-6);\ - dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (256<<((S)-1)) + (1<<(S-7)))>>((S)-6);\ - }\ -}\ -static void name ## _half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, int width, uint32_t *unused)\ -{\ - int i;\ - for (i=0; i>shp;\ - int pix1= ((const type*)src)[2*i+1]>>shp;\ - int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\ - int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\ - int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\ - g&= maskg|(2*maskg);\ -\ - g>>=shg;\ -\ - dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (256U<<(S)) + (1<<(S-6)))>>((S)-6+1);\ - dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (256U<<(S)) + (1<<(S-6)))>>((S)-6+1);\ - }\ -} - -BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8) -BGR2UV(uint32_t,bgr321ToUV,16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8) -BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8) -BGR2UV(uint32_t,rgb321ToUV, 0, 0,16, 8, 0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU , BU<< 8, RV<< 8, GV , BV<< 8, RGB2YUV_SHIFT+8) -BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RU<<11, GU<<5, BU , RV<<11, GV<<5, BV , RGB2YUV_SHIFT+8) -BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<5, BU , RV<<10, GV<<5, BV , RGB2YUV_SHIFT+7) -BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8) -BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7) - static void palToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *pal) { int i; @@ -1231,74 +1482,6 @@ static void monoblack2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t } } -static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc, - const int16_t *chrUSrc, const int16_t *chrVSrc, - const int16_t *alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither) -{ - int i; - - for (i=0; i>7; - dest[i]= av_clip_uint8(val); - } - - if (uDest) - for (i=0; i>7; - int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7; - uDest[i]= av_clip_uint8(u); - vDest[i]= av_clip_uint8(v); - } - - if (CONFIG_SWSCALE_ALPHA && aDest) - for (i=0; i>7; - aDest[i]= av_clip_uint8(val); - } -} - -/** - * vertical bilinear scale YV12 to RGB - */ -static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, int dstW, - int yalpha, int uvalpha, int y) -{ - int yalpha1=4095- yalpha; - int uvalpha1=4095-uvalpha; - int i; - - YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C) -} - -/** - * YV12 to RGB without scaling or interpolating - */ -static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, int dstW, - int uvalpha, enum PixelFormat dstFormat, - int flags, int y) -{ - const int yalpha1=0; - int i; - - const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 - const int yalpha= 4096; //FIXME ... - - if (uvalpha < 2048) { - YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) - } else { - YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) - } -} - //FIXME yuy2* can read up to 7 samples too much static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width, @@ -1372,31 +1555,51 @@ static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2, } // FIXME Maybe dither instead. -#ifndef YUV_NBPS -#define YUV_NBPS(depth, endianness, rfunc) \ -static void endianness ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ - const uint16_t *srcU, const uint16_t *srcV, \ - int width, uint32_t *unused) \ -{ \ - int i; \ - for (i = 0; i < width; i++) { \ - dstU[i] = rfunc(&srcU[i])>>(depth-8); \ - dstV[i] = rfunc(&srcV[i])>>(depth-8); \ - } \ -} \ -\ -static void endianness ## depth ## ToY_c(uint8_t *dstY, const uint16_t *srcY, int width, uint32_t *unused) \ -{ \ - int i; \ - for (i = 0; i < width; i++) \ - dstY[i] = rfunc(&srcY[i])>>(depth-8); \ -} \ +static av_always_inline void +yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV, + const uint8_t *_srcU, const uint8_t *_srcV, + int width, enum PixelFormat origin, int depth) +{ + int i; + const uint16_t *srcU = (const uint16_t *) _srcU; + const uint16_t *srcV = (const uint16_t *) _srcV; -YUV_NBPS( 9, LE, AV_RL16) -YUV_NBPS( 9, BE, AV_RB16) -YUV_NBPS(10, LE, AV_RL16) -YUV_NBPS(10, BE, AV_RB16) -#endif // YUV_NBPS +#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) + for (i = 0; i < width; i++) { + dstU[i] = input_pixel(&srcU[i]) >> (depth - 8); + dstV[i] = input_pixel(&srcV[i]) >> (depth - 8); + } +} + +static av_always_inline void +yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY, + int width, enum PixelFormat origin, int depth) +{ + int i; + const uint16_t *srcY = (const uint16_t*)_srcY; + + for (i = 0; i < width; i++) + dstY[i] = input_pixel(&srcY[i]) >> (depth - 8); +#undef input_pixel +} + +#define YUV_NBPS(depth, BE_LE, origin) \ +static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *srcU, const uint8_t *srcV, \ + int width, uint32_t *unused) \ +{ \ + yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \ +} \ +static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \ + int width, uint32_t *unused) \ +{ \ + yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \ +} + +YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE); +YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE); +YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE); +YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE); static inline void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, @@ -1595,12 +1798,12 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, } // *** horizontal scale Y line to temp buffer -static inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth, - const uint8_t *src, int srcW, int xInc, - const int16_t *hLumFilter, - const int16_t *hLumFilterPos, int hLumFilterSize, - uint8_t *formatConvBuffer, - uint32_t *pal, int isAlpha) +static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth, + const uint8_t *src, int srcW, int xInc, + const int16_t *hLumFilter, + const int16_t *hLumFilterPos, int hLumFilterSize, + uint8_t *formatConvBuffer, + uint32_t *pal, int isAlpha) { void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange; @@ -1642,11 +1845,11 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, } } -static inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth, - const uint8_t *src1, const uint8_t *src2, - int srcW, int xInc, const int16_t *hChrFilter, - const int16_t *hChrFilterPos, int hChrFilterSize, - uint8_t *formatConvBuffer, uint32_t *pal) +static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth, + const uint8_t *src1, const uint8_t *src2, + int srcW, int xInc, const int16_t *hChrFilter, + const int16_t *hChrFilterPos, int hChrFilterSize, + uint8_t *formatConvBuffer, uint32_t *pal) { if (c->chrToYV12) { uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16); @@ -1695,9 +1898,33 @@ find_c_packed_planar_out_funcs(SwsContext *c, if(c->flags & SWS_FULL_CHR_H_INT) { *yuv2packedX = yuv2rgbX_c_full; } else { - *yuv2packed1 = yuv2packed1_c; - *yuv2packed2 = yuv2packed2_c; - *yuv2packedX = yuv2packedX_c; + switch (dstFormat) { + case PIX_FMT_GRAY16BE: + *yuv2packed1 = yuv2gray16BE_1_c; + *yuv2packed2 = yuv2gray16BE_2_c; + *yuv2packedX = yuv2gray16BE_X_c; + break; + case PIX_FMT_GRAY16LE: + *yuv2packed1 = yuv2gray16LE_1_c; + *yuv2packed2 = yuv2gray16LE_2_c; + *yuv2packedX = yuv2gray16LE_X_c; + break; + case PIX_FMT_MONOWHITE: + *yuv2packed1 = yuv2monowhite_1_c; + *yuv2packed2 = yuv2monowhite_2_c; + *yuv2packedX = yuv2monowhite_X_c; + break; + case PIX_FMT_MONOBLACK: + *yuv2packed1 = yuv2monoblack_1_c; + *yuv2packed2 = yuv2monoblack_2_c; + *yuv2packedX = yuv2monoblack_X_c; + break; + default: + *yuv2packed1 = yuv2packed1_c; + *yuv2packed2 = yuv2packed2_c; + *yuv2packedX = yuv2packedX_c; + break; + } } } @@ -1967,7 +2194,7 @@ static int swScale(SwsContext *c, const uint8_t* src[], return dstY - lastDstY; } -static void sws_init_swScale_c(SwsContext *c) +static av_cold void sws_init_swScale_c(SwsContext *c) { enum PixelFormat srcFormat = c->srcFormat; @@ -2010,37 +2237,45 @@ static void sws_init_swScale_c(SwsContext *c) } if (c->chrSrcHSubSample) { switch(srcFormat) { - case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break; - case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_half_c; break; - case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_half_c; break; - case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_half_c; break; - case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break; - case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half_c; break; - case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break; - case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half_c; break; - case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half_c; break; - case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break; - case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half_c; break; - case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break; - case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half_c; break; - case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half_c; break; + case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break; + case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break; + case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break; + case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break; + case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break; + case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break; + case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break; + case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break; + case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break; + case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break; + case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break; + case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break; + case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break; + case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break; + case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break; + case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break; + case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break; + case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break; } } else { switch(srcFormat) { - case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break; - case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48LEToUV_c; break; - case PIX_FMT_BGR48BE: c->chrToYV12 = bgr48BEToUV_c; break; - case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48LEToUV_c; break; - case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break; - case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_c; break; - case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break; - case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_c; break; - case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_c; break; - case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break; - case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_c; break; - case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break; - case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_c; break; - case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_c; break; + case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break; + case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break; + case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break; + case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break; + case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break; + case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break; + case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break; + case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break; + case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break; + case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break; + case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break; + case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break; + case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break; + case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break; + case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break; + case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break; + case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break; + case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break; } } @@ -2051,13 +2286,17 @@ static void sws_init_swScale_c(SwsContext *c) case PIX_FMT_GRAY8A : c->lumToYV12 = yuy2ToY_c; break; case PIX_FMT_UYVY422 : - c->lumToYV12 = uyvyToY_c; break; - case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break; - case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY_c; break; - case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY_c; break; - case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break; - case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY_c; break; - case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY_c; break; + c->lumToYV12 = uyvyToY_c; break; + case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break; + case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break; + case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break; + case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break; + case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break; + case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break; + case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break; + case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break; + case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break; + case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break; case PIX_FMT_RGB8 : case PIX_FMT_BGR8 : case PIX_FMT_PAL8 : diff --git a/libswscale/utils.c b/libswscale/utils.c index d8c2c0818c..69fbcc8ac1 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -77,13 +77,17 @@ const char *swscale_license(void) || (x)==PIX_FMT_BGR48BE \ || (x)==PIX_FMT_BGR48LE \ || (x)==PIX_FMT_BGR24 \ - || (x)==PIX_FMT_BGR565 \ - || (x)==PIX_FMT_BGR555 \ + || (x)==PIX_FMT_BGR565LE \ + || (x)==PIX_FMT_BGR565BE \ + || (x)==PIX_FMT_BGR555LE \ + || (x)==PIX_FMT_BGR555BE \ || (x)==PIX_FMT_BGR32 \ || (x)==PIX_FMT_BGR32_1 \ || (x)==PIX_FMT_RGB24 \ - || (x)==PIX_FMT_RGB565 \ - || (x)==PIX_FMT_RGB555 \ + || (x)==PIX_FMT_RGB565LE \ + || (x)==PIX_FMT_RGB565BE \ + || (x)==PIX_FMT_RGB555LE \ + || (x)==PIX_FMT_RGB555BE \ || (x)==PIX_FMT_GRAY8 \ || (x)==PIX_FMT_GRAY8A \ || (x)==PIX_FMT_YUV410P \ @@ -137,7 +141,22 @@ int sws_isSupportedInput(enum PixelFormat pix_fmt) || (x)==PIX_FMT_YUVJ422P \ || (x)==PIX_FMT_YUVJ440P \ || (x)==PIX_FMT_YUVJ444P \ - || isAnyRGB(x) \ + || isRGBinBytes(x) \ + || isBGRinBytes(x) \ + || (x)==PIX_FMT_RGB565 \ + || (x)==PIX_FMT_RGB555 \ + || (x)==PIX_FMT_RGB444 \ + || (x)==PIX_FMT_BGR565 \ + || (x)==PIX_FMT_BGR555 \ + || (x)==PIX_FMT_BGR444 \ + || (x)==PIX_FMT_RGB8 \ + || (x)==PIX_FMT_BGR8 \ + || (x)==PIX_FMT_RGB4_BYTE \ + || (x)==PIX_FMT_BGR4_BYTE \ + || (x)==PIX_FMT_RGB4 \ + || (x)==PIX_FMT_BGR4 \ + || (x)==PIX_FMT_MONOBLACK \ + || (x)==PIX_FMT_MONOWHITE \ || (x)==PIX_FMT_NV12 \ || (x)==PIX_FMT_NV21 \ || (x)==PIX_FMT_GRAY16BE \ diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index eaad94599f..25399fadef 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -70,14 +70,14 @@ : "%"REG_d, "%"REG_S\ ); -static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW, - const uint8_t *lumDither, const uint8_t *chrDither) +static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { int i; if (uDest) { @@ -155,14 +155,14 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, : "%"REG_a, "%"REG_d, "%"REG_S\ ); -static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW, - const uint8_t *lumDither, const uint8_t *chrDither) +static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { int i; if (uDest) { @@ -180,12 +180,12 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter, YSCALEYUV2YV12X_ACCURATE(LUM_MMX_FILTER_OFFSET, dest, dstW, 0) } -static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, - const int16_t *chrUSrc, const int16_t *chrVSrc, - const int16_t *alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW, - const uint8_t *lumDither, const uint8_t *chrDither) +static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, + const int16_t *chrUSrc, const int16_t *chrVSrc, + const int16_t *alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { int p= 4; const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW }; @@ -214,12 +214,12 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, } } -static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc, - const int16_t *chrUSrc, const int16_t *chrVSrc, - const int16_t *alpSrc, - uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW, - const uint8_t *lumDither, const uint8_t *chrDither) +static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc, + const int16_t *chrUSrc, const int16_t *chrVSrc, + const int16_t *alpSrc, + uint8_t *dest, uint8_t *uDest, uint8_t *vDest, + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { int p= 4; const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW }; @@ -470,12 +470,12 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc, " jb 1b \n\t" #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) -static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) +static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) { x86_reg dummy=0; x86_reg dstW_reg = dstW; @@ -503,12 +503,12 @@ static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilte } } -static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) +static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) { x86_reg dummy=0; x86_reg dstW_reg = dstW; @@ -560,12 +560,12 @@ static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, " jb 1b \n\t" #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index) -static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) +static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) { x86_reg dummy=0; x86_reg dstW_reg = dstW; @@ -584,12 +584,12 @@ static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilt YSCALEYUV2PACKEDX_END } -static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) +static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) { x86_reg dummy=0; x86_reg dstW_reg = dstW; @@ -637,12 +637,12 @@ static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, " jb 1b \n\t" #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index) -static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) +static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) { x86_reg dummy=0; x86_reg dstW_reg = dstW; @@ -661,12 +661,12 @@ static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilt YSCALEYUV2PACKEDX_END } -static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) +static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) { x86_reg dummy=0; x86_reg dstW_reg = dstW; @@ -794,12 +794,12 @@ static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index) #endif -static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) +static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) { x86_reg dummy=0; x86_reg dstW_reg = dstW; @@ -818,12 +818,12 @@ static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilte ); } -static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) +static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) { x86_reg dummy=0; x86_reg dstW_reg = dstW; @@ -859,12 +859,12 @@ static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, " jb 1b \n\t" #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index) -static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) +static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) { x86_reg dummy=0; x86_reg dstW_reg = dstW; @@ -880,12 +880,12 @@ static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFil YSCALEYUV2PACKEDX_END } -static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, - const int16_t **lumSrc, int lumFilterSize, - const int16_t *chrFilter, const int16_t **chrUSrc, - const int16_t **chrVSrc, - int chrFilterSize, const int16_t **alpSrc, - uint8_t *dest, int dstW, int dstY) +static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrc, + const int16_t **chrVSrc, + int chrFilterSize, const int16_t **alpSrc, + uint8_t *dest, int dstW, int dstY) { x86_reg dummy=0; x86_reg dstW_reg = dstW; @@ -981,12 +981,12 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter /** * vertical bilinear scale YV12 to RGB */ -static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, - int dstW, int yalpha, int uvalpha, int y) +static void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, + int dstW, int yalpha, int uvalpha, int y) { if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { #if ARCH_X86_64 @@ -1043,12 +1043,12 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0, } } -static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, - int dstW, int yalpha, int uvalpha, int y) +static void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, + int dstW, int yalpha, int uvalpha, int y) { //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( __asm__ volatile( @@ -1065,12 +1065,12 @@ static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0, ); } -static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, - int dstW, int yalpha, int uvalpha, int y) +static void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, + int dstW, int yalpha, int uvalpha, int y) { //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( __asm__ volatile( @@ -1093,12 +1093,12 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0, ); } -static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, - int dstW, int yalpha, int uvalpha, int y) +static void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, + int dstW, int yalpha, int uvalpha, int y) { //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( __asm__ volatile( @@ -1161,12 +1161,12 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0, #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c) -static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0, - const uint16_t *buf1, const uint16_t *ubuf0, - const uint16_t *ubuf1, const uint16_t *vbuf0, - const uint16_t *vbuf1, const uint16_t *abuf0, - const uint16_t *abuf1, uint8_t *dest, - int dstW, int yalpha, int uvalpha, int y) +static void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0, + const uint16_t *buf1, const uint16_t *ubuf0, + const uint16_t *ubuf1, const uint16_t *vbuf0, + const uint16_t *vbuf1, const uint16_t *abuf0, + const uint16_t *abuf1, uint8_t *dest, + int dstW, int yalpha, int uvalpha, int y) { //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( __asm__ volatile( @@ -1300,12 +1300,12 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0, /** * YV12 to RGB without scaling or interpolating */ -static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, enum PixelFormat dstFormat, - int flags, int y) +static void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, enum PixelFormat dstFormat, + int flags, int y) { const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 @@ -1368,12 +1368,12 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0, } } -static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, enum PixelFormat dstFormat, - int flags, int y) +static void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, enum PixelFormat dstFormat, + int flags, int y) { const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 @@ -1406,12 +1406,12 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0, } } -static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, enum PixelFormat dstFormat, - int flags, int y) +static void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, enum PixelFormat dstFormat, + int flags, int y) { const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 @@ -1456,12 +1456,12 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0, } } -static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, enum PixelFormat dstFormat, - int flags, int y) +static void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, enum PixelFormat dstFormat, + int flags, int y) { const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 @@ -1543,12 +1543,12 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0, "psraw $7, %%mm7 \n\t" #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c) -static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0, - const uint16_t *ubuf0, const uint16_t *ubuf1, - const uint16_t *vbuf0, const uint16_t *vbuf1, - const uint16_t *abuf0, uint8_t *dest, - int dstW, int uvalpha, enum PixelFormat dstFormat, - int flags, int y) +static void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0, + const uint16_t *ubuf0, const uint16_t *ubuf1, + const uint16_t *vbuf0, const uint16_t *vbuf1, + const uint16_t *abuf0, uint8_t *dest, + int dstW, int uvalpha, enum PixelFormat dstFormat, + int flags, int y) { const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 @@ -1582,7 +1582,8 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0, #if !COMPILE_TEMPLATE_MMX2 //FIXME yuy2* can read up to 7 samples too much -static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused) +static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, + int width, uint32_t *unused) { __asm__ volatile( "movq "MANGLE(bm01010101)", %%mm2 \n\t" @@ -1601,7 +1602,9 @@ static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, int width, ); } -static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) +static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) { __asm__ volatile( "movq "MANGLE(bm01010101)", %%mm4 \n\t" @@ -1627,7 +1630,9 @@ static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t assert(src1 == src2); } -static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) +static void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) { __asm__ volatile( "mov %0, %%"REG_a" \n\t" @@ -1653,7 +1658,8 @@ static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s /* This is almost identical to the previous, end exists only because * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */ -static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused) +static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, + int width, uint32_t *unused) { __asm__ volatile( "mov %0, %%"REG_a" \n\t" @@ -1671,7 +1677,9 @@ static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, int width, ); } -static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) +static void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) { __asm__ volatile( "movq "MANGLE(bm01010101)", %%mm4 \n\t" @@ -1697,7 +1705,9 @@ static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t assert(src1 == src2); } -static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) +static void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) { __asm__ volatile( "movq "MANGLE(bm01010101)", %%mm4 \n\t" @@ -1722,8 +1732,8 @@ static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s ); } -static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2, - const uint8_t *src, int width) +static av_always_inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2, + const uint8_t *src, int width) { __asm__ volatile( "movq "MANGLE(bm01010101)", %%mm4 \n\t" @@ -1748,22 +1758,23 @@ static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2, ); } -static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) +static void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) { RENAME(nvXXtoUV)(dstU, dstV, src1, width); } -static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *unused) +static void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) { RENAME(nvXXtoUV)(dstV, dstU, src1, width); } #endif /* !COMPILE_TEMPLATE_MMX2 */ -static inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, int width, enum PixelFormat srcFormat) +static av_always_inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, + int width, enum PixelFormat srcFormat) { if(srcFormat == PIX_FMT_BGR24) { @@ -1815,7 +1826,21 @@ static inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, int wi ); } -static inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, const uint8_t *src, int width, enum PixelFormat srcFormat) +static void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, + int width, uint32_t *unused) +{ + RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24); +} + +static void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, + int width, uint32_t *unused) +{ + RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24); +} + +static av_always_inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, + const uint8_t *src, int width, + enum PixelFormat srcFormat) { __asm__ volatile( "movq 24(%4), %%mm6 \n\t" @@ -1871,23 +1896,17 @@ static inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, const uin ); } -static inline void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) -{ - RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24); -} - -static inline void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) +static void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) { RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24); assert(src1 == src2); } -static inline void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, int width, uint32_t *unused) -{ - RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24); -} - -static inline void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) +static void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, + const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *unused) { assert(src1==src2); RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24); @@ -1895,8 +1914,10 @@ static inline void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t #if !COMPILE_TEMPLATE_MMX2 // bilinear / bicubic scaling -static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc, - const int16_t *filter, const int16_t *filterPos, int filterSize) +static void RENAME(hScale)(int16_t *dst, int dstW, + const uint8_t *src, int srcW, + int xInc, const int16_t *filter, + const int16_t *filterPos, int filterSize) { assert(filterSize % 4 == 0 && filterSize>0); if (filterSize==4) { // Always true for upscaling, sometimes for down, too. @@ -2208,9 +2229,9 @@ static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, #if COMPILE_TEMPLATE_MMX2 -static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, - int dstWidth, const uint8_t *src, int srcW, - int xInc) +static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, + int dstWidth, const uint8_t *src, + int srcW, int xInc) { int16_t *filterPos = c->hLumFilterPos; int16_t *filter = c->hLumFilter; @@ -2280,9 +2301,9 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, dst[i] = src[srcW-1]*128; } -static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, - int dstWidth, const uint8_t *src1, - const uint8_t *src2, int srcW, int xInc) +static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, + int dstWidth, const uint8_t *src1, + const uint8_t *src2, int srcW, int xInc) { int16_t *filterPos = c->hChrFilterPos; int16_t *filter = c->hChrFilter; @@ -2343,7 +2364,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *d } #endif /* COMPILE_TEMPLATE_MMX2 */ -static void RENAME(sws_init_swScale)(SwsContext *c) +static av_cold void RENAME(sws_init_swScale)(SwsContext *c) { enum PixelFormat srcFormat = c->srcFormat, dstFormat = c->dstFormat;