From 372de27df78b85976991bfe321a43b62b8505276 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 30 Aug 2012 23:08:06 +0300 Subject: [PATCH 1/8] pktdumper: Use sizeof(variable) instead of the direct buffer length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also change the snprintf size to use the full buffer, since snprintf always null-terminates the buffer. Signed-off-by: Martin Storsjö --- tools/pktdumper.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/pktdumper.c b/tools/pktdumper.c index 9243c93373..fffeeeb70a 100644 --- a/tools/pktdumper.c +++ b/tools/pktdumper.c @@ -70,16 +70,16 @@ int main(int argc, char **argv) return usage(1); if (argc > 2) maxpkts = atoi(argv[2]); - strncpy(fntemplate, argv[1], PATH_MAX - 1); + strncpy(fntemplate, argv[1], sizeof(fntemplate) - 1); if (strrchr(argv[1], '/')) - strncpy(fntemplate, strrchr(argv[1], '/') + 1, PATH_MAX - 1); + strncpy(fntemplate, strrchr(argv[1], '/') + 1, sizeof(fntemplate) - 1); if (strrchr(fntemplate, '.')) *strrchr(fntemplate, '.') = '\0'; if (strchr(fntemplate, '%')) { fprintf(stderr, "can't use filenames containing '%%'\n"); return usage(1); } - if (strlen(fntemplate) + sizeof(PKTFILESUFF) >= PATH_MAX - 1) { + if (strlen(fntemplate) + sizeof(PKTFILESUFF) >= sizeof(fntemplate) - 1) { fprintf(stderr, "filename too long\n"); return usage(1); } @@ -105,7 +105,7 @@ int main(int argc, char **argv) while ((err = av_read_frame(fctx, &pkt)) >= 0) { int fd; - snprintf(pktfilename, PATH_MAX - 1, fntemplate, pktnum, + snprintf(pktfilename, sizeof(pktfilename), fntemplate, pktnum, pkt.stream_index, pkt.pts, pkt.size, (pkt.flags & AV_PKT_FLAG_KEY) ? 'K' : '_'); printf(PKTFILESUFF "\n", pktnum, pkt.stream_index, pkt.pts, pkt.size, From bcc44873d949bd817fa19fad368604cf73ccee78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 30 Aug 2012 23:10:07 +0300 Subject: [PATCH 2/8] pktdumper: Use av_strlcpy instead of strncpy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This takes care of null-terminating the buffer if it is too small, which wasn't handled properly before. Signed-off-by: Martin Storsjö --- tools/pktdumper.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/pktdumper.c b/tools/pktdumper.c index fffeeeb70a..f21a8e01e7 100644 --- a/tools/pktdumper.c +++ b/tools/pktdumper.c @@ -31,6 +31,7 @@ #include #endif +#include "libavutil/avstring.h" #include "libavutil/time.h" #include "libavformat/avformat.h" @@ -70,9 +71,9 @@ int main(int argc, char **argv) return usage(1); if (argc > 2) maxpkts = atoi(argv[2]); - strncpy(fntemplate, argv[1], sizeof(fntemplate) - 1); + av_strlcpy(fntemplate, argv[1], sizeof(fntemplate)); if (strrchr(argv[1], '/')) - strncpy(fntemplate, strrchr(argv[1], '/') + 1, sizeof(fntemplate) - 1); + av_strlcpy(fntemplate, strrchr(argv[1], '/') + 1, sizeof(fntemplate)); if (strrchr(fntemplate, '.')) *strrchr(fntemplate, '.') = '\0'; if (strchr(fntemplate, '%')) { From 21411a4102757cb5ee9b4b16550a55055f54af8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 28 Aug 2012 22:56:03 +0300 Subject: [PATCH 3/8] pktdumper: Use a custom define instead of PATH_MAX for buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PATH_MAX is not necessarily available on all systems, e.g. it's normally not available on MSVC, and is not guaranteed to defined on a POSIX system either. Signed-off-by: Martin Storsjö --- tools/pktdumper.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/pktdumper.c b/tools/pktdumper.c index f21a8e01e7..087ac5b2dc 100644 --- a/tools/pktdumper.c +++ b/tools/pktdumper.c @@ -31,6 +31,8 @@ #include #endif +#define FILENAME_BUF_SIZE 4096 + #include "libavutil/avstring.h" #include "libavutil/time.h" #include "libavformat/avformat.h" @@ -49,8 +51,8 @@ static int usage(int ret) int main(int argc, char **argv) { - char fntemplate[PATH_MAX]; - char pktfilename[PATH_MAX]; + char fntemplate[FILENAME_BUF_SIZE]; + char pktfilename[FILENAME_BUF_SIZE]; AVFormatContext *fctx = NULL; AVPacket pkt; int64_t pktnum = 0; From 43b73d59a8b6cba60b0401b39f0f577bc6c1df9e Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Mon, 27 Aug 2012 03:07:11 +0200 Subject: [PATCH 4/8] configure: x86: Separate inline from standalone assembler capabilities --- configure | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/configure b/configure index f6a7d1f527..efc01adfc5 100755 --- a/configure +++ b/configure @@ -373,6 +373,12 @@ map(){ for v; do eval $m; done } +add_suffix(){ + suffix=$1 + shift + for v; do echo ${v}${suffix}; done +} + set_all(){ value=$1 shift @@ -1101,6 +1107,8 @@ HAVE_LIST_PUB=' HAVE_LIST=" $ARCH_EXT_LIST + $(add_suffix _external $ARCH_EXT_LIST) + $(add_suffix _inline $ARCH_EXT_LIST) $HAVE_LIST_PUB $THREADS_LIST aligned_malloc @@ -1351,6 +1359,17 @@ sse42_deps="sse4" avx_deps="sse42" fma4_deps="avx" +mmx_external_deps="yasm" +mmx_inline_deps="inline_asm" +mmx_suggest="mmx_external mmx_inline" + +for ext in $(filter_out mmx $ARCH_EXT_LIST_X86); do + eval dep=\$${ext}_deps + eval ${ext}_external_deps='"${dep}_external"' + eval ${ext}_inline_deps='"${dep}_inline"' + eval ${ext}_suggest='"${ext}_external ${ext}_inline"' +done + aligned_stack_if_any="ppc x86" fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64" fast_clz_if_any="alpha armv5te avr32 mips ppc x86" @@ -2964,8 +2983,8 @@ EOF check_inline_asm xmm_clobbers '"":::"%xmm0"' # check whether binutils is new enough to compile SSSE3/MMXEXT - enabled ssse3 && check_inline_asm ssse3 '"pabsw %xmm0, %xmm0"' - enabled mmxext && check_inline_asm mmxext '"pmaxub %mm0, %mm1"' + enabled ssse3 && check_inline_asm ssse3_inline '"pabsw %xmm0, %xmm0"' + enabled mmxext && check_inline_asm mmxext_inline '"pmaxub %mm0, %mm1"' if ! disabled_any asm mmx yasm; then if check_cmd $yasmexe --version; then @@ -2986,8 +3005,8 @@ EOF check_yasm "pextrd [eax], xmm0, 1" && enable yasm || die "yasm not found, use --disable-yasm for a crippled build" - check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx - check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4 + check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx_external + check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4_external check_yasm "CPU amdnop" && enable cpunop fi From 17337f54c057accf12b0e87d12f576194ad085a8 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Tue, 28 Aug 2012 14:53:33 +0200 Subject: [PATCH 5/8] x86: Split inline and external assembly #ifdefs --- libavcodec/dct-test.c | 4 +-- libavcodec/imgconvert.c | 8 +++--- libavcodec/x86/ac3dsp.asm | 8 +++--- libavcodec/x86/dct32.asm | 2 +- libavcodec/x86/dsputil.asm | 6 ++--- libavcodec/x86/dsputil_mmx.c | 21 ++++++++------- libavcodec/x86/dsputilenc_mmx.c | 8 +++--- libavcodec/x86/fft.asm | 8 +++--- libavcodec/x86/h264_chromamc_10bit.asm | 4 +-- libavcodec/x86/h264_idct_10bit.asm | 16 ++++++------ libavcodec/x86/h264_intrapred_10bit.asm | 26 +++++++++---------- libavcodec/x86/h264_qpel.c | 4 +-- libavcodec/x86/h264dsp_init.c | 10 ++++---- libavcodec/x86/mpegvideoenc.c | 30 +++++++++------------- libavfilter/x86/gradfun.c | 18 ++++++------- libavfilter/x86/yadif.c | 14 +++++----- libavresample/x86/audio_convert.asm | 34 ++++++++++++------------- libavresample/x86/audio_mix.asm | 10 ++++---- libavutil/internal.h | 6 ++--- libavutil/x86/float_dsp.asm | 4 +-- libswscale/swscale.c | 2 +- libswscale/utils.c | 12 ++++----- libswscale/x86/swscale.c | 6 ++--- libswscale/x86/yuv2rgb.c | 10 ++++---- 24 files changed, 130 insertions(+), 141 deletions(-) diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c index 21a3397e4d..72d2d80af7 100644 --- a/libavcodec/dct-test.c +++ b/libavcodec/dct-test.c @@ -85,7 +85,7 @@ static const struct algo fdct_tab[] = { { "IJG-AAN-INT", ff_fdct_ifast, SCALE_PERM }, { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM }, -#if HAVE_MMX && HAVE_INLINE_ASM +#if HAVE_MMX_INLINE { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX }, { "MMXEXT", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT }, { "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 }, @@ -108,7 +108,7 @@ static const struct algo idct_tab[] = { { "INT", ff_j_rev_dct, MMX_PERM }, { "SIMPLE-C", ff_simple_idct_8, NO_PERM }, -#if HAVE_MMX && HAVE_INLINE_ASM +#if HAVE_MMX_INLINE { "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX }, { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 }, { "XVID-MMXEXT", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 }, diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c index 62ec35a47e..f6c5e3c476 100644 --- a/libavcodec/imgconvert.c +++ b/libavcodec/imgconvert.c @@ -39,7 +39,7 @@ #include "libavutil/pixdesc.h" #include "libavutil/imgutils.h" -#if HAVE_MMX && HAVE_YASM +#if HAVE_MMX_EXTERNAL #include "x86/dsputil_mmx.h" #endif @@ -52,7 +52,7 @@ #define FF_PIXEL_PACKED 1 /**< only one components containing all the channels */ #define FF_PIXEL_PALETTE 2 /**< one components containing indexes for a palette */ -#if HAVE_MMX && HAVE_YASM +#if HAVE_MMX_EXTERNAL #define deinterlace_line_inplace ff_deinterlace_line_inplace_mmx #define deinterlace_line ff_deinterlace_line_mmx #else @@ -877,7 +877,7 @@ int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width, return 0; } -#if !(HAVE_MMX && HAVE_YASM) +#if !HAVE_MMX_EXTERNAL /* filter parameters: [-1 4 2 4 -1] // 8 */ static void deinterlace_line_c(uint8_t *dst, const uint8_t *lum_m4, const uint8_t *lum_m3, @@ -926,7 +926,7 @@ static void deinterlace_line_inplace_c(uint8_t *lum_m4, uint8_t *lum_m3, lum++; } } -#endif +#endif /* !HAVE_MMX_EXTERNAL */ /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The top field is copied as is, but the bottom field is deinterlaced diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index ef828bb0d5..176fd3dbba 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -73,7 +73,7 @@ AC3_EXPONENT_MIN mmx %define LOOP_ALIGN ALIGN 16 AC3_EXPONENT_MIN mmxext %endif -%if HAVE_SSE +%if HAVE_SSE2_EXTERNAL INIT_XMM AC3_EXPONENT_MIN sse2 %endif @@ -385,7 +385,7 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum pabsd %1, %1 %endmacro -%if HAVE_AMD3DNOW +%if HAVE_AMD3DNOW_EXTERNAL INIT_MMX cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len add expq, lenq @@ -453,11 +453,11 @@ cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len REP_RET %endmacro -%if HAVE_SSE +%if HAVE_SSE2_EXTERNAL INIT_XMM %define PABSD PABSD_MMX AC3_EXTRACT_EXPONENTS sse2 -%if HAVE_SSSE3 +%if HAVE_SSSE3_EXTERNAL %define PABSD PABSD_SSSE3 AC3_EXTRACT_EXPONENTS ssse3 %endif diff --git a/libavcodec/x86/dct32.asm b/libavcodec/x86/dct32.asm index 9d6169ca66..58ee8d343b 100644 --- a/libavcodec/x86/dct32.asm +++ b/libavcodec/x86/dct32.asm @@ -193,7 +193,7 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 INIT_YMM avx SECTION_TEXT -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL ; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in) cglobal dct32_float, 2,3,8, out, in, tmp ; pass 1 diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm index d6cf824ecc..fcb1b6d53c 100644 --- a/libavcodec/x86/dsputil.asm +++ b/libavcodec/x86/dsputil.asm @@ -1169,7 +1169,7 @@ ALIGN 16 INIT_XMM sse VECTOR_FMUL_REVERSE -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_YMM avx VECTOR_FMUL_REVERSE %endif @@ -1199,7 +1199,7 @@ ALIGN 16 INIT_XMM sse VECTOR_FMUL_ADD -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_YMM avx VECTOR_FMUL_ADD %endif @@ -1245,7 +1245,7 @@ cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len INIT_XMM sse BUTTERFLIES_FLOAT_INTERLEAVE -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_YMM avx BUTTERFLIES_FLOAT_INTERLEAVE %endif diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 4fbb146ccc..d9505063b1 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2812,7 +2812,7 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, static void dsputil_init_3dnowext(DSPContext *c, AVCodecContext *avctx, int mm_flags) { -#if HAVE_6REGS && HAVE_INLINE_ASM +#if HAVE_AMD3DNOWEXT_INLINE && HAVE_6REGS c->vector_fmul_window = vector_fmul_window_3dnowext; #endif } @@ -2926,11 +2926,10 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, int mm_flags) { -#if HAVE_SSSE3 const int high_bit_depth = avctx->bits_per_raw_sample > 8; const int bit_depth = avctx->bits_per_raw_sample; -#if HAVE_INLINE_ASM +#if HAVE_SSSE3_INLINE if (!high_bit_depth && CONFIG_H264QPEL) { H264_QPEL_FUNCS(1, 0, ssse3); H264_QPEL_FUNCS(1, 1, ssse3); @@ -2945,8 +2944,9 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, H264_QPEL_FUNCS(3, 2, ssse3); H264_QPEL_FUNCS(3, 3, ssse3); } -#endif /* HAVE_INLINE_ASM */ -#if HAVE_YASM +#endif /* HAVE_SSSE3_INLINE */ + +#if HAVE_SSSE3_EXTERNAL if (bit_depth == 10 && CONFIG_H264QPEL) { H264_QPEL_FUNCS_10(1, 0, ssse3_cache64); H264_QPEL_FUNCS_10(2, 0, ssse3_cache64); @@ -2969,21 +2969,20 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, if (!(mm_flags & (AV_CPU_FLAG_SSE42|AV_CPU_FLAG_3DNOW))) // cachesplit c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; c->bswap_buf = ff_bswap32_buf_ssse3; -#endif -#endif +#endif /* HAVE_SSSE3_EXTERNAL */ } static void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx, int mm_flags) { -#if HAVE_YASM +#if HAVE_SSE4_EXTERNAL c->vector_clip_int32 = ff_vector_clip_int32_sse4; -#endif +#endif /* HAVE_SSE4_EXTERNAL */ } static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags) { -#if HAVE_AVX && HAVE_YASM +#if HAVE_AVX_EXTERNAL const int bit_depth = avctx->bits_per_raw_sample; if (bit_depth == 10) { @@ -3003,7 +3002,7 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags) c->butterflies_float_interleave = ff_butterflies_float_interleave_avx; c->vector_fmul_reverse = ff_vector_fmul_reverse_avx; c->vector_fmul_add = ff_vector_fmul_add_avx; -#endif +#endif /* HAVE_AVX_EXTERNAL */ } void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c index c0ef0bac3e..10331327bf 100644 --- a/libavcodec/x86/dsputilenc_mmx.c +++ b/libavcodec/x86/dsputilenc_mmx.c @@ -982,7 +982,7 @@ DCT_SAD_FUNC(mmx2) DCT_SAD_FUNC(sse2) #undef MMABS -#if HAVE_SSSE3 +#if HAVE_SSSE3_INLINE #define MMABS(a,z) MMABS_SSSE3(a,z) DCT_SAD_FUNC(ssse3) #undef MMABS @@ -1062,7 +1062,7 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int si #undef SCALE_OFFSET #undef PMULHRW -#if HAVE_SSSE3 +#if HAVE_SSSE3_INLINE #undef PHADDD #define DEF(x) x ## _ssse3 #define SET_RND(x) @@ -1081,7 +1081,7 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int si #undef SCALE_OFFSET #undef PMULHRW #undef PHADDD -#endif //HAVE_SSSE3 +#endif /* HAVE_SSSE3_INLINE */ #endif /* HAVE_INLINE_ASM */ @@ -1161,7 +1161,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) c->sum_abs_dctelem= sum_abs_dctelem_sse2; } -#if HAVE_SSSE3 +#if HAVE_SSSE3_INLINE if(mm_flags & AV_CPU_FLAG_SSSE3){ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ c->try_8x8basis= try_8x8basis_ssse3; diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm index 645253cbd3..f05429820b 100644 --- a/libavcodec/x86/fft.asm +++ b/libavcodec/x86/fft.asm @@ -305,7 +305,7 @@ IF%1 mova Z(1), m5 INIT_YMM avx -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL align 16 fft8_avx: mova m0, Z(0) @@ -552,7 +552,7 @@ DEFINE_ARGS zc, w, n, o1, o3 INIT_YMM avx -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL %macro INTERL_AVX 5 vunpckhps %3, %2, %1 vunpcklps %2, %2, %1 @@ -793,7 +793,7 @@ align 8 dispatch_tab %+ fullsuffix: pointer list_of_fft %endmacro ; DECL_FFT -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_YMM avx DECL_FFT 6 DECL_FFT 6, _interleave @@ -1100,6 +1100,6 @@ DECL_IMDCT POSROTATESHUF_3DNOW INIT_YMM avx -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL DECL_IMDCT POSROTATESHUF_AVX %endif diff --git a/libavcodec/x86/h264_chromamc_10bit.asm b/libavcodec/x86/h264_chromamc_10bit.asm index f8a2cff68f..bcdb27c176 100644 --- a/libavcodec/x86/h264_chromamc_10bit.asm +++ b/libavcodec/x86/h264_chromamc_10bit.asm @@ -252,7 +252,7 @@ cglobal %1_h264_chroma_mc2_10, 6,7 %define CHROMAMC_AVG NOTHING INIT_XMM sse2 CHROMA_MC8 put -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CHROMA_MC8 put %endif @@ -264,7 +264,7 @@ CHROMA_MC2 put %define PAVG pavgw INIT_XMM sse2 CHROMA_MC8 avg -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CHROMA_MC8 avg %endif diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm index 4b80669203..6afcee2840 100644 --- a/libavcodec/x86/h264_idct_10bit.asm +++ b/libavcodec/x86/h264_idct_10bit.asm @@ -80,7 +80,7 @@ cglobal h264_idct_add_10, 3,3 INIT_XMM sse2 IDCT_ADD_10 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx IDCT_ADD_10 %endif @@ -110,7 +110,7 @@ add4x4_idct %+ SUFFIX: INIT_XMM sse2 ALIGN 16 ADD4x4IDCT -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx ALIGN 16 ADD4x4IDCT @@ -150,7 +150,7 @@ cglobal h264_idct_add16_10, 5,6 INIT_XMM sse2 IDCT_ADD16_10 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx IDCT_ADD16_10 %endif @@ -216,7 +216,7 @@ cglobal h264_idct8_dc_add_10,3,3,7 INIT_XMM sse2 IDCT8_DC_ADD -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx IDCT8_DC_ADD %endif @@ -287,7 +287,7 @@ cglobal h264_idct_add16intra_10,5,7,8 INIT_XMM sse2 IDCT_ADD16INTRA_10 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx IDCT_ADD16INTRA_10 %endif @@ -324,7 +324,7 @@ cglobal h264_idct_add8_10,5,8,7 INIT_XMM sse2 IDCT_ADD8 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx IDCT_ADD8 %endif @@ -501,7 +501,7 @@ h264_idct8_add1_10 %+ SUFFIX: INIT_XMM sse2 IDCT8_ADD -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx IDCT8_ADD %endif @@ -541,7 +541,7 @@ cglobal h264_idct8_add4_10, 0,7,16 INIT_XMM sse2 IDCT8_ADD4 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx IDCT8_ADD4 %endif diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm index 1423b561ac..529134e1c8 100644 --- a/libavcodec/x86/h264_intrapred_10bit.asm +++ b/libavcodec/x86/h264_intrapred_10bit.asm @@ -84,7 +84,7 @@ INIT_XMM PRED4x4_DR sse2 %define PALIGNR PALIGNR_SSSE3 PRED4x4_DR ssse3 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_AVX PRED4x4_DR avx %endif @@ -124,7 +124,7 @@ INIT_XMM PRED4x4_VR sse2 %define PALIGNR PALIGNR_SSSE3 PRED4x4_VR ssse3 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_AVX PRED4x4_VR avx %endif @@ -167,7 +167,7 @@ INIT_XMM PRED4x4_HD sse2 %define PALIGNR PALIGNR_SSSE3 PRED4x4_HD ssse3 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_AVX PRED4x4_HD avx %endif @@ -238,7 +238,7 @@ cglobal pred4x4_down_left_10_%1, 3,3 INIT_XMM PRED4x4_DL sse2 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_AVX PRED4x4_DL avx %endif @@ -267,7 +267,7 @@ cglobal pred4x4_vertical_left_10_%1, 3,3 INIT_XMM PRED4x4_VL sse2 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_AVX PRED4x4_VL avx %endif @@ -577,7 +577,7 @@ cglobal pred8x8l_top_dc_10_%1, 4,4,6 INIT_XMM PRED8x8L_TOP_DC sse2 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_AVX PRED8x8L_TOP_DC avx %endif @@ -636,7 +636,7 @@ cglobal pred8x8l_dc_10_%1, 4,6,6 INIT_XMM PRED8x8L_DC sse2 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_AVX PRED8x8L_DC avx %endif @@ -671,7 +671,7 @@ cglobal pred8x8l_vertical_10_%1, 4,4,6 INIT_XMM PRED8x8L_VERTICAL sse2 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_AVX PRED8x8L_VERTICAL avx %endif @@ -728,7 +728,7 @@ INIT_XMM PRED8x8L_HORIZONTAL sse2 %define PALIGNR PALIGNR_SSSE3 PRED8x8L_HORIZONTAL ssse3 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_AVX PRED8x8L_HORIZONTAL avx %endif @@ -797,7 +797,7 @@ INIT_XMM PRED8x8L_DOWN_LEFT sse2 %define PALIGNR PALIGNR_SSSE3 PRED8x8L_DOWN_LEFT ssse3 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_AVX PRED8x8L_DOWN_LEFT avx %endif @@ -872,7 +872,7 @@ INIT_XMM PRED8x8L_DOWN_RIGHT sse2 %define PALIGNR PALIGNR_SSSE3 PRED8x8L_DOWN_RIGHT ssse3 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_AVX PRED8x8L_DOWN_RIGHT avx %endif @@ -943,7 +943,7 @@ INIT_XMM PRED8x8L_VERTICAL_RIGHT sse2 %define PALIGNR PALIGNR_SSSE3 PRED8x8L_VERTICAL_RIGHT ssse3 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_AVX PRED8x8L_VERTICAL_RIGHT avx %endif @@ -1005,7 +1005,7 @@ INIT_XMM PRED8x8L_HORIZONTAL_UP sse2 %define PALIGNR PALIGNR_SSSE3 PRED8x8L_HORIZONTAL_UP ssse3 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_AVX PRED8x8L_HORIZONTAL_UP avx %endif diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index fc1635de8b..284c85a99b 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -1174,7 +1174,7 @@ QPEL_H264_V_XMM(put_, PUT_OP, sse2) QPEL_H264_V_XMM(avg_, AVG_MMX2_OP, sse2) QPEL_H264_HV_XMM(put_, PUT_OP, sse2) QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, sse2) -#if HAVE_SSSE3 +#if HAVE_SSSE3_INLINE QPEL_H264_H_XMM(put_, PUT_OP, ssse3) QPEL_H264_H_XMM(avg_, AVG_MMX2_OP, ssse3) QPEL_H264_HV2_XMM(put_, PUT_OP, ssse3) @@ -1188,7 +1188,7 @@ H264_MC_4816(3dnow) H264_MC_4816(mmx2) H264_MC_816(H264_MC_V, sse2) H264_MC_816(H264_MC_HV, sse2) -#if HAVE_SSSE3 +#if HAVE_SSSE3_INLINE H264_MC_816(H264_MC_H, ssse3) H264_MC_816(H264_MC_HV, ssse3) #endif diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c index f24f751fb3..7be78a8207 100644 --- a/libavcodec/x86/h264dsp_init.c +++ b/libavcodec/x86/h264dsp_init.c @@ -39,7 +39,7 @@ IDCT_ADD_FUNC(8_dc, 10, sse2) IDCT_ADD_FUNC(8, 8, mmx) IDCT_ADD_FUNC(8, 8, sse2) IDCT_ADD_FUNC(8, 10, sse2) -#if HAVE_AVX +#if HAVE_AVX_EXTERNAL IDCT_ADD_FUNC(, 10, avx) IDCT_ADD_FUNC(8_dc, 10, avx) IDCT_ADD_FUNC(8, 10, avx) @@ -64,7 +64,7 @@ IDCT_ADD_REP_FUNC(, 16intra, 8, mmx) IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2) IDCT_ADD_REP_FUNC(, 16intra, 8, sse2) IDCT_ADD_REP_FUNC(, 16intra, 10, sse2) -#if HAVE_AVX +#if HAVE_AVX_EXTERNAL IDCT_ADD_REP_FUNC(, 16, 10, avx) IDCT_ADD_REP_FUNC(, 16intra, 10, avx) #endif @@ -79,7 +79,7 @@ IDCT_ADD_REP_FUNC2(, 8, 8, mmx) IDCT_ADD_REP_FUNC2(, 8, 8, mmx2) IDCT_ADD_REP_FUNC2(, 8, 8, sse2) IDCT_ADD_REP_FUNC2(, 8, 10, sse2) -#if HAVE_AVX +#if HAVE_AVX_EXTERNAL IDCT_ADD_REP_FUNC2(, 8, 10, avx) #endif @@ -353,7 +353,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4; c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4; } -#if HAVE_AVX +#if HAVE_AVX_EXTERNAL if (mm_flags & AV_CPU_FLAG_AVX) { c->h264_idct_dc_add = c->h264_idct_add = ff_h264_idct_add_10_avx; @@ -377,7 +377,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; #endif /* HAVE_ALIGNED_STACK */ } -#endif /* HAVE_AVX */ +#endif /* HAVE_AVX_EXTERNAL */ } } } diff --git a/libavcodec/x86/mpegvideoenc.c b/libavcodec/x86/mpegvideoenc.c index c3d8b835e5..31a6790328 100644 --- a/libavcodec/x86/mpegvideoenc.c +++ b/libavcodec/x86/mpegvideoenc.c @@ -26,20 +26,18 @@ #include "libavcodec/mpegvideo.h" #include "dsputil_mmx.h" -#if HAVE_INLINE_ASM - extern uint16_t ff_inv_zigzag_direct16[64]; -#if HAVE_MMX +#if HAVE_MMX_INLINE #define COMPILE_TEMPLATE_MMXEXT 0 #define COMPILE_TEMPLATE_SSE2 0 #define COMPILE_TEMPLATE_SSSE3 0 #define RENAME(a) a ## _MMX #define RENAMEl(a) a ## _mmx #include "mpegvideoenc_template.c" -#endif /* HAVE_MMX */ +#endif /* HAVE_MMX_INLINE */ -#if HAVE_MMXEXT +#if HAVE_MMXEXT_INLINE #undef COMPILE_TEMPLATE_SSSE3 #undef COMPILE_TEMPLATE_SSE2 #undef COMPILE_TEMPLATE_MMXEXT @@ -51,9 +49,9 @@ extern uint16_t ff_inv_zigzag_direct16[64]; #define RENAME(a) a ## _MMX2 #define RENAMEl(a) a ## _mmx2 #include "mpegvideoenc_template.c" -#endif /* HAVE_MMXEXT */ +#endif /* HAVE_MMXEXT_INLINE */ -#if HAVE_SSE2 +#if HAVE_SSE2_INLINE #undef COMPILE_TEMPLATE_MMXEXT #undef COMPILE_TEMPLATE_SSE2 #undef COMPILE_TEMPLATE_SSSE3 @@ -65,9 +63,9 @@ extern uint16_t ff_inv_zigzag_direct16[64]; #define RENAME(a) a ## _SSE2 #define RENAMEl(a) a ## _sse2 #include "mpegvideoenc_template.c" -#endif /* HAVE_SSE2 */ +#endif /* HAVE_SSE2_INLINE */ -#if HAVE_SSSE3 +#if HAVE_SSSE3_INLINE #undef COMPILE_TEMPLATE_MMXEXT #undef COMPILE_TEMPLATE_SSE2 #undef COMPILE_TEMPLATE_SSSE3 @@ -79,33 +77,29 @@ extern uint16_t ff_inv_zigzag_direct16[64]; #define RENAME(a) a ## _SSSE3 #define RENAMEl(a) a ## _sse2 #include "mpegvideoenc_template.c" -#endif /* HAVE_SSSE3 */ - -#endif /* HAVE_INLINE_ASM */ +#endif /* HAVE_SSSE3_INLINE */ void ff_MPV_encode_init_x86(MpegEncContext *s) { -#if HAVE_INLINE_ASM int mm_flags = av_get_cpu_flags(); const int dct_algo = s->avctx->dct_algo; if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) { -#if HAVE_MMX +#if HAVE_MMX_INLINE if (mm_flags & AV_CPU_FLAG_MMX && HAVE_MMX) s->dct_quantize = dct_quantize_MMX; #endif -#if HAVE_MMXEXT +#if HAVE_MMXEXT_INLINE if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) s->dct_quantize = dct_quantize_MMX2; #endif -#if HAVE_SSE2 +#if HAVE_SSE2_INLINE if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE2) s->dct_quantize = dct_quantize_SSE2; #endif -#if HAVE_SSSE3 +#if HAVE_SSSE3_INLINE if (mm_flags & AV_CPU_FLAG_SSSE3) s->dct_quantize = dct_quantize_SSSE3; #endif } -#endif /* HAVE_INLINE_ASM */ } diff --git a/libavfilter/x86/gradfun.c b/libavfilter/x86/gradfun.c index 140f5e87cc..424a03138b 100644 --- a/libavfilter/x86/gradfun.c +++ b/libavfilter/x86/gradfun.c @@ -29,7 +29,7 @@ DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F}; DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; -#if HAVE_MMXEXT +#if HAVE_MMXEXT_INLINE static void gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) { intptr_t x; @@ -77,7 +77,7 @@ static void gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, i } #endif -#if HAVE_SSSE3 +#if HAVE_SSSE3_INLINE static void gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) { intptr_t x; @@ -122,9 +122,9 @@ static void gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, :"memory" ); } -#endif // HAVE_SSSE3 +#endif /* HAVE_SSSE3_INLINE */ -#if HAVE_SSE +#if HAVE_SSE2_INLINE static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width) { #define BLURV(load)\ @@ -165,7 +165,7 @@ static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, BLURV("movdqa"); } } -#endif // HAVE_SSE +#endif /* HAVE_SSE2_INLINE */ #endif /* HAVE_INLINE_ASM */ @@ -173,18 +173,16 @@ av_cold void ff_gradfun_init_x86(GradFunContext *gf) { int cpu_flags = av_get_cpu_flags(); -#if HAVE_INLINE_ASM -#if HAVE_MMXEXT +#if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) gf->filter_line = gradfun_filter_line_mmx2; #endif -#if HAVE_SSSE3 +#if HAVE_SSSE3_INLINE if (cpu_flags & AV_CPU_FLAG_SSSE3) gf->filter_line = gradfun_filter_line_ssse3; #endif -#if HAVE_SSE +#if HAVE_SSE2_INLINE if (cpu_flags & AV_CPU_FLAG_SSE2) gf->blur_line = gradfun_blur_line_sse2; #endif -#endif /* HAVE_INLINE_ASM */ } diff --git a/libavfilter/x86/yadif.c b/libavfilter/x86/yadif.c index cdf13d3e56..f178b32cbe 100644 --- a/libavfilter/x86/yadif.c +++ b/libavfilter/x86/yadif.c @@ -31,7 +31,7 @@ DECLARE_ASM_CONST(16, const xmm_reg, pb_1) = {0x0101010101010101ULL, 0x0101010101010101ULL}; DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x0001000100010001ULL}; -#if HAVE_SSSE3 +#if HAVE_SSSE3_INLINE #define COMPILE_TEMPLATE_SSE2 1 #define COMPILE_TEMPLATE_SSSE3 1 #undef RENAME @@ -40,14 +40,14 @@ DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x000100010 #undef COMPILE_TEMPLATE_SSSE3 #endif -#if HAVE_SSE +#if HAVE_SSE2_INLINE #undef RENAME #define RENAME(a) a ## _sse2 #include "yadif_template.c" #undef COMPILE_TEMPLATE_SSE2 #endif -#if HAVE_MMXEXT +#if HAVE_MMXEXT_INLINE #undef RENAME #define RENAME(a) a ## _mmx2 #include "yadif_template.c" @@ -59,18 +59,16 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif) { int cpu_flags = av_get_cpu_flags(); -#if HAVE_INLINE_ASM -#if HAVE_MMXEXT +#if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) yadif->filter_line = yadif_filter_line_mmx2; #endif -#if HAVE_SSE +#if HAVE_SSE2_INLINE if (cpu_flags & AV_CPU_FLAG_SSE2) yadif->filter_line = yadif_filter_line_sse2; #endif -#if HAVE_SSSE3 +#if HAVE_SSSE3_INLINE if (cpu_flags & AV_CPU_FLAG_SSSE3) yadif->filter_line = yadif_filter_line_ssse3; #endif -#endif /* HAVE_INLINE_ASM */ } diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm index 3db64d2f9b..2ebdbc1ec0 100644 --- a/libavresample/x86/audio_convert.asm +++ b/libavresample/x86/audio_convert.asm @@ -155,7 +155,7 @@ cglobal conv_s32_to_flt, 3,3,3, dst, src, len INIT_XMM sse2 CONV_S32_TO_FLT -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_YMM avx CONV_S32_TO_FLT %endif @@ -223,7 +223,7 @@ cglobal conv_flt_to_s32, 3,3,5, dst, src, len INIT_XMM sse2 CONV_FLT_TO_S32 -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_YMM avx CONV_FLT_TO_S32 %endif @@ -260,7 +260,7 @@ cglobal conv_s16p_to_s16_2ch, 3,4,5, dst, src0, len, src1 INIT_XMM sse2 CONV_S16P_TO_S16_2CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_S16P_TO_S16_2CH %endif @@ -383,7 +383,7 @@ INIT_XMM sse2 CONV_S16P_TO_S16_6CH INIT_XMM sse2slow CONV_S16P_TO_S16_6CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_S16P_TO_S16_6CH %endif @@ -432,7 +432,7 @@ cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1 INIT_XMM sse2 CONV_S16P_TO_FLT_2CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_S16P_TO_FLT_2CH %endif @@ -536,7 +536,7 @@ INIT_XMM sse2 CONV_S16P_TO_FLT_6CH INIT_XMM ssse3 CONV_S16P_TO_FLT_6CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_S16P_TO_FLT_6CH %endif @@ -692,7 +692,7 @@ INIT_MMX sse CONV_FLTP_TO_S16_6CH INIT_XMM sse2 CONV_FLTP_TO_S16_6CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_FLTP_TO_S16_6CH %endif @@ -729,7 +729,7 @@ cglobal conv_fltp_to_flt_2ch, 3,4,5, dst, src0, len, src1 INIT_XMM sse CONV_FLTP_TO_FLT_2CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_FLTP_TO_FLT_2CH %endif @@ -810,7 +810,7 @@ INIT_MMX mmx CONV_FLTP_TO_FLT_6CH INIT_XMM sse4 CONV_FLTP_TO_FLT_6CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_FLTP_TO_FLT_6CH %endif @@ -859,7 +859,7 @@ INIT_XMM sse2 CONV_S16_TO_S16P_2CH INIT_XMM ssse3 CONV_S16_TO_S16P_2CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_S16_TO_S16P_2CH %endif @@ -920,7 +920,7 @@ CONV_S16_TO_S16P_6CH %define PALIGNR PALIGNR_SSSE3 INIT_XMM ssse3 CONV_S16_TO_S16P_6CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_S16_TO_S16P_6CH %endif @@ -958,7 +958,7 @@ cglobal conv_s16_to_fltp_2ch, 3,4,5, dst0, src, len, dst1 INIT_XMM sse2 CONV_S16_TO_FLTP_2CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_S16_TO_FLTP_2CH %endif @@ -1041,7 +1041,7 @@ INIT_XMM ssse3 CONV_S16_TO_FLTP_6CH INIT_XMM sse4 CONV_S16_TO_FLTP_6CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_S16_TO_FLTP_6CH %endif @@ -1087,7 +1087,7 @@ cglobal conv_flt_to_s16p_2ch, 3,4,6, dst0, src, len, dst1 INIT_XMM sse2 CONV_FLT_TO_S16P_2CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_FLT_TO_S16P_2CH %endif @@ -1161,7 +1161,7 @@ CONV_FLT_TO_S16P_6CH %define PALIGNR PALIGNR_SSSE3 INIT_XMM ssse3 CONV_FLT_TO_S16P_6CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_FLT_TO_S16P_6CH %endif @@ -1193,7 +1193,7 @@ cglobal conv_flt_to_fltp_2ch, 3,4,3, dst0, src, len, dst1 INIT_XMM sse CONV_FLT_TO_FLTP_2CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_FLT_TO_FLTP_2CH %endif @@ -1256,7 +1256,7 @@ cglobal conv_flt_to_fltp_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5 INIT_XMM sse2 CONV_FLT_TO_FLTP_6CH -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx CONV_FLT_TO_FLTP_6CH %endif diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm index bab4292e13..0c4a9bd3ad 100644 --- a/libavresample/x86/audio_mix.asm +++ b/libavresample/x86/audio_mix.asm @@ -56,7 +56,7 @@ cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1 INIT_XMM sse MIX_2_TO_1_FLTP_FLT -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_YMM avx MIX_2_TO_1_FLTP_FLT %endif @@ -175,7 +175,7 @@ cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1 INIT_XMM sse MIX_1_TO_2_FLTP_FLT -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_YMM avx MIX_1_TO_2_FLTP_FLT %endif @@ -222,7 +222,7 @@ INIT_XMM sse2 MIX_1_TO_2_S16P_FLT INIT_XMM sse4 MIX_1_TO_2_S16P_FLT -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_XMM avx MIX_1_TO_2_S16P_FLT %endif @@ -490,7 +490,7 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, s MIX_3_8_TO_1_2_FLT %%i, 1, s16p MIX_3_8_TO_1_2_FLT %%i, 2, s16p ; do not use ymm AVX or FMA4 in x86-32 for 6 or more channels due to stack alignment issues - %if HAVE_AVX + %if HAVE_AVX_EXTERNAL %if ARCH_X86_64 || %%i < 6 INIT_YMM avx %else @@ -502,7 +502,7 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, s MIX_3_8_TO_1_2_FLT %%i, 1, s16p MIX_3_8_TO_1_2_FLT %%i, 2, s16p %endif - %if HAVE_FMA4 + %if HAVE_FMA4_EXTERNAL %if ARCH_X86_64 || %%i < 6 INIT_YMM fma4 %else diff --git a/libavutil/internal.h b/libavutil/internal.h index 12b71086e6..6862000722 100644 --- a/libavutil/internal.h +++ b/libavutil/internal.h @@ -158,7 +158,7 @@ # define ONLY_IF_THREADS_ENABLED(x) NULL #endif -#if HAVE_MMX && HAVE_INLINE_ASM +#if HAVE_MMX_INLINE /** * Empty mmx state. * this must be called between any dsp function and float/double code. @@ -171,8 +171,8 @@ static av_always_inline void emms_c(void) #elif HAVE_MMX && HAVE_MM_EMPTY # include # define emms_c _mm_empty -#else /* HAVE_MMX */ +#else # define emms_c() -#endif /* HAVE_MMX */ +#endif /* HAVE_MMX_INLINE */ #endif /* AVUTIL_INTERNAL_H */ diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index 8c0ebc133c..934dac08d9 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -45,7 +45,7 @@ ALIGN 16 INIT_XMM sse VECTOR_FMUL -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_YMM avx VECTOR_FMUL %endif @@ -86,7 +86,7 @@ cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len INIT_XMM sse VECTOR_FMAC_SCALAR -%if HAVE_AVX +%if HAVE_AVX_EXTERNAL INIT_YMM avx VECTOR_FMAC_SCALAR %endif diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 9da250e1d1..94f51cf5cb 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -518,7 +518,7 @@ static int swScale(SwsContext *c, const uint8_t *src[], if (!enough_lines) break; // we can't output a dstY line so let's try with the next slice -#if HAVE_MMX && HAVE_INLINE_ASM +#if HAVE_MMX_INLINE updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf); #endif diff --git a/libswscale/utils.c b/libswscale/utils.c index f890b5cee1..200346cab5 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -577,7 +577,7 @@ fail: return ret; } -#if HAVE_MMXEXT && HAVE_INLINE_ASM +#if HAVE_MMXEXT_INLINE static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *filter, int32_t *filterPos, int numSplits) { @@ -740,7 +740,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, return fragmentPos + 1; } -#endif /* HAVE_MMXEXT && HAVE_INLINE_ASM */ +#endif /* HAVE_MMXEXT_INLINE */ static void getSubSampleFactors(int *h, int *v, enum PixelFormat format) { @@ -1012,7 +1012,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, /* precalculate horizontal scaler filter coefficients */ { -#if HAVE_MMXEXT && HAVE_INLINE_ASM +#if HAVE_MMXEXT_INLINE // can't downscale !!! if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) { c->lumMmx2FilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL, @@ -1048,7 +1048,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ); #endif } else -#endif /* HAVE_MMXEXT && HAVE_INLINE_ASM */ +#endif /* HAVE_MMXEXT_INLINE */ { const int filterAlign = (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 : @@ -1621,7 +1621,7 @@ void sws_freeContext(SwsContext *c) av_freep(&c->hLumFilterPos); av_freep(&c->hChrFilterPos); -#if HAVE_MMX +#if HAVE_MMX_INLINE #ifdef MAP_ANONYMOUS if (c->lumMmx2FilterCode) munmap(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize); @@ -1638,7 +1638,7 @@ void sws_freeContext(SwsContext *c) #endif c->lumMmx2FilterCode = NULL; c->chrMmx2FilterCode = NULL; -#endif /* HAVE_MMX */ +#endif /* HAVE_MMX_INLINE */ av_freep(&c->yuvTable); av_free(c->formatConvBuffer); diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index db9fb732b9..581d6f79f7 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -70,7 +70,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; //MMX versions -#if HAVE_MMX +#if HAVE_MMX_INLINE #undef RENAME #define COMPILE_TEMPLATE_MMXEXT 0 #define RENAME(a) a ## _MMX @@ -78,7 +78,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; #endif //MMX2 versions -#if HAVE_MMXEXT +#if HAVE_MMXEXT_INLINE #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 @@ -308,7 +308,7 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c) #if HAVE_INLINE_ASM if (cpu_flags & AV_CPU_FLAG_MMX) sws_init_swScale_MMX(c); -#if HAVE_MMXEXT +#if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) sws_init_swScale_MMX2(c); #endif diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 93755493ab..af30ca96ee 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -50,22 +50,22 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; //MMX versions -#if HAVE_MMX +#if HAVE_MMX_INLINE #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 0 #define RENAME(a) a ## _MMX #include "yuv2rgb_template.c" -#endif /* HAVE_MMX */ +#endif /* HAVE_MMX_INLINE */ //MMX2 versions -#if HAVE_MMXEXT +#if HAVE_MMXEXT_INLINE #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 #define RENAME(a) a ## _MMX2 #include "yuv2rgb_template.c" -#endif /* HAVE_MMXEXT */ +#endif /* HAVE_MMXEXT_INLINE */ #endif /* HAVE_INLINE_ASM */ @@ -78,7 +78,7 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) c->srcFormat != PIX_FMT_YUVA420P) return NULL; -#if HAVE_MMXEXT +#if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) { switch (c->dstFormat) { case PIX_FMT_RGB24: return yuv420_rgb24_MMX2; From d96d6ba61888c6a97d9426ca80bf36f3812cac76 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 31 Aug 2012 01:56:03 +0100 Subject: [PATCH 6/8] configure: handle --disable-asm before check_deps This is necessary to avoid spuriously enabling _external or _inline variants of arch extensions when they should be disabled. Signed-off-by: Mans Rullgard --- configure | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure b/configure index efc01adfc5..b12df4e8e5 100755 --- a/configure +++ b/configure @@ -3403,14 +3403,14 @@ fi enabled_any $THREADS_LIST && enable threads +enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; } + check_deps $CONFIG_LIST \ $CONFIG_EXTRA \ $HAVE_LIST \ $ALL_COMPONENTS \ $ALL_TESTS \ -enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; } - ! enabled_any memalign posix_memalign aligned_malloc && enabled_any $need_memalign && enable memalign_hack From ee769c6a7c1d4ec6560f5e5a6f457b770b10fb33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alberto=20Delm=C3=A1s?= Date: Fri, 24 Aug 2012 17:45:57 +0200 Subject: [PATCH 7/8] MSS2 decoder Signed-off-by: Kostya Shishkov --- Changelog | 1 + configure | 1 + doc/general.texi | 2 + libavcodec/Makefile | 1 + libavcodec/allcodecs.c | 1 + libavcodec/avcodec.h | 1 + libavcodec/codec_desc.c | 7 + libavcodec/h263dec.c | 6 +- libavcodec/mpegvideo.c | 13 +- libavcodec/mss1.c | 106 ++--- libavcodec/mss12.c | 406 ++++++++++++------- libavcodec/mss12.h | 70 +++- libavcodec/mss2.c | 860 ++++++++++++++++++++++++++++++++++++++++ libavcodec/mss2dsp.c | 153 +++++++ libavcodec/mss2dsp.h | 50 +++ libavcodec/vc1.c | 7 +- libavcodec/vc1.h | 2 + libavcodec/vc1dec.c | 10 +- libavcodec/version.h | 4 +- libavformat/riff.c | 1 + 20 files changed, 1476 insertions(+), 226 deletions(-) create mode 100644 libavcodec/mss2.c create mode 100644 libavcodec/mss2dsp.c create mode 100644 libavcodec/mss2dsp.h diff --git a/Changelog b/Changelog index 4fcce0e630..fd67addbb2 100644 --- a/Changelog +++ b/Changelog @@ -45,6 +45,7 @@ version : - avconv -shortest option is now per-output file, -pass and -passlogfile are now per-output stream - Ut Video encoder +- Microsoft Screen 2 decoder version 0.8: diff --git a/configure b/configure index b12df4e8e5..ff051f537a 100755 --- a/configure +++ b/configure @@ -1479,6 +1479,7 @@ msmpeg4v2_decoder_select="h263_decoder" msmpeg4v2_encoder_select="h263_encoder" msmpeg4v3_decoder_select="h263_decoder" msmpeg4v3_encoder_select="h263_encoder" +mss2_decoder_select="vc1_decoder" nellymoser_decoder_select="mdct sinewin" nellymoser_encoder_select="mdct sinewin" png_decoder_select="zlib" diff --git a/doc/general.texi b/doc/general.texi index 7f22847579..8415ca3de6 100644 --- a/doc/general.texi +++ b/doc/general.texi @@ -547,6 +547,8 @@ following image formats are supported: @item Microsoft RLE @tab @tab X @item Microsoft Screen 1 @tab @tab X @tab Also known as Windows Media Video V7 Screen. +@item Microsoft Screen 2 @tab @tab X + @tab Also known as Windows Media Video V9 Screen. @item Microsoft Video 1 @tab @tab X @item Mimic @tab @tab X @tab Used in MSN Messenger Webcam streams. diff --git a/libavcodec/Makefile b/libavcodec/Makefile index adbe1a3e55..7b3dc22d7c 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -271,6 +271,7 @@ OBJS-$(CONFIG_MSMPEG4V3_ENCODER) += msmpeg4.o msmpeg4enc.o msmpeg4data.o \ OBJS-$(CONFIG_MSRLE_DECODER) += msrle.o msrledec.o OBJS-$(CONFIG_MSA1_DECODER) += mss3.o mss34dsp.o OBJS-$(CONFIG_MSS1_DECODER) += mss1.o mss12.o +OBJS-$(CONFIG_MSS2_DECODER) += mss2.o mss12.o mss2dsp.o OBJS-$(CONFIG_MSVIDEO1_DECODER) += msvideo1.o OBJS-$(CONFIG_MSZH_DECODER) += lcldec.o OBJS-$(CONFIG_MTS2_DECODER) += mss4.o mss34dsp.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index f7187d1cf7..c6df818477 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -160,6 +160,7 @@ void avcodec_register_all(void) REGISTER_ENCDEC (MSMPEG4V3, msmpeg4v3); REGISTER_DECODER (MSRLE, msrle); REGISTER_DECODER (MSS1, mss1); + REGISTER_DECODER (MSS2, mss2); REGISTER_DECODER (MSVIDEO1, msvideo1); REGISTER_DECODER (MSZH, mszh); REGISTER_DECODER (MTS2, mts2); diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 742a2a39cb..8a091bd524 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -264,6 +264,7 @@ enum AVCodecID { AV_CODEC_ID_TSCC2, AV_CODEC_ID_MTS2, AV_CODEC_ID_CLLC, + AV_CODEC_ID_MSS2, /* various PCM "codecs" */ AV_CODEC_ID_FIRST_AUDIO = 0x10000, ///< A dummy id pointing at the start of audio codecs diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c index 513a15c090..51fc1714d1 100644 --- a/libavcodec/codec_desc.c +++ b/libavcodec/codec_desc.c @@ -1200,6 +1200,13 @@ static const AVCodecDescriptor codec_descriptors[] = { .long_name = NULL_IF_CONFIG_SMALL("Canopus Lossless Codec"), .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, }, + { + .id = AV_CODEC_ID_MSS2, + .type = AVMEDIA_TYPE_VIDEO, + .name = "mss2", + .long_name = NULL_IF_CONFIG_SMALL("MS Windows Media Video V9 Screen"), + .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY, + }, /* various PCM "codecs" */ { diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c index 6bb6204d97..8e6085beeb 100644 --- a/libavcodec/h263dec.c +++ b/libavcodec/h263dec.c @@ -58,7 +58,10 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx) s->quant_precision=5; s->decode_mb= ff_h263_decode_mb; s->low_delay= 1; - avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts); + if (avctx->codec->id == AV_CODEC_ID_MSS2) + avctx->pix_fmt = PIX_FMT_YUV420P; + else + avctx->pix_fmt = avctx->get_format(avctx, avctx->codec->pix_fmts); s->unrestricted_mv= 1; /* select sub codec */ @@ -93,6 +96,7 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx) case AV_CODEC_ID_WMV3: case AV_CODEC_ID_VC1IMAGE: case AV_CODEC_ID_WMV3IMAGE: + case AV_CODEC_ID_MSS2: s->h263_pred = 1; s->msmpeg4_version=6; avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 13e2accfbd..e9aff3b316 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -226,10 +226,11 @@ void ff_copy_picture(Picture *dst, Picture *src) */ static void free_frame_buffer(MpegEncContext *s, Picture *pic) { - /* Windows Media Image codecs allocate internal buffers with different - * dimensions; ignore user defined callbacks for these - */ - if (s->codec_id != AV_CODEC_ID_WMV3IMAGE && s->codec_id != AV_CODEC_ID_VC1IMAGE) + /* WM Image / Screen codecs allocate internal buffers with different + * dimensions / colorspaces; ignore user-defined callbacks for these. */ + if (s->codec_id != AV_CODEC_ID_WMV3IMAGE && + s->codec_id != AV_CODEC_ID_VC1IMAGE && + s->codec_id != AV_CODEC_ID_MSS2) ff_thread_release_buffer(s->avctx, &pic->f); else avcodec_default_release_buffer(s->avctx, &pic->f); @@ -254,7 +255,9 @@ static int alloc_frame_buffer(MpegEncContext *s, Picture *pic) } } - if (s->codec_id != AV_CODEC_ID_WMV3IMAGE && s->codec_id != AV_CODEC_ID_VC1IMAGE) + if (s->codec_id != AV_CODEC_ID_WMV3IMAGE && + s->codec_id != AV_CODEC_ID_VC1IMAGE && + s->codec_id != AV_CODEC_ID_MSS2) r = ff_thread_get_buffer(s->avctx, &pic->f); else r = avcodec_default_get_buffer(s->avctx, &pic->f); diff --git a/libavcodec/mss1.c b/libavcodec/mss1.c index fe8ee10840..ada479eee7 100644 --- a/libavcodec/mss1.c +++ b/libavcodec/mss1.c @@ -24,14 +24,13 @@ * Microsoft Screen 1 (aka Windows Media Video V7 Screen) decoder */ -#include "libavutil/intfloat.h" -#include "libavutil/intreadwrite.h" #include "avcodec.h" #include "mss12.h" typedef struct MSS1Context { MSS12Context ctx; AVFrame pic; + SliceContext sc[2]; } MSS1Context; static void arith_normalise(ArithCoder *c) @@ -56,24 +55,11 @@ static void arith_normalise(ArithCoder *c) c->low <<= 1; c->high <<= 1; c->high |= 1; - c->value |= get_bits1(c->gb); + c->value |= get_bits1(c->gbc.gb); } } -static int arith_get_bit(ArithCoder *c) -{ - int range = c->high - c->low + 1; - int bit = (((c->value - c->low) << 1) + 1) / range; - - if (bit) - c->low += range >> 1; - else - c->high = c->low + (range >> 1) - 1; - - arith_normalise(c); - - return bit; -} +ARITH_GET_BIT() static int arith_get_bits(ArithCoder *c, int bits) { @@ -118,40 +104,27 @@ static int arith_get_prob(ArithCoder *c, int *probs) return sym; } -static int arith_get_model_sym(ArithCoder *c, Model *m) -{ - int idx, val; - - idx = arith_get_prob(c, m->cum_prob); - - val = m->idx2sym[idx]; - ff_mss12_model_update(m, idx); - - arith_normalise(c); - - return val; -} +ARITH_GET_MODEL_SYM() static void arith_init(ArithCoder *c, GetBitContext *gb) { - c->low = 0; - c->high = 0xFFFF; - c->value = get_bits(gb, 16); - c->gb = gb; - + c->low = 0; + c->high = 0xFFFF; + c->value = get_bits(gb, 16); + c->gbc.gb = gb; c->get_model_sym = arith_get_model_sym; c->get_number = arith_get_number; } -static int decode_pal(MSS1Context *ctx, ArithCoder *acoder) +static int decode_pal(MSS12Context *ctx, ArithCoder *acoder) { int i, ncol, r, g, b; - uint32_t *pal = ctx->ctx.pal + 256 - ctx->ctx.free_colours; + uint32_t *pal = ctx->pal + 256 - ctx->free_colours; - if (!ctx->ctx.free_colours) + if (!ctx->free_colours) return 0; - ncol = arith_get_number(acoder, ctx->ctx.free_colours + 1); + ncol = arith_get_number(acoder, ctx->free_colours + 1); for (i = 0; i < ncol; i++) { r = arith_get_bits(acoder, 8); g = arith_get_bits(acoder, 8); @@ -167,7 +140,8 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size, { const uint8_t *buf = avpkt->data; int buf_size = avpkt->size; - MSS1Context *c = avctx->priv_data; + MSS1Context *ctx = avctx->priv_data; + MSS12Context *c = &ctx->ctx; GetBitContext gb; ArithCoder acoder; int pal_changed = 0; @@ -176,37 +150,37 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size, init_get_bits(&gb, buf, buf_size * 8); arith_init(&acoder, &gb); - c->pic.reference = 3; - c->pic.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | - FF_BUFFER_HINTS_REUSABLE; - if ((ret = avctx->reget_buffer(avctx, &c->pic)) < 0) { + ctx->pic.reference = 3; + ctx->pic.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_READABLE | + FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE; + if ((ret = avctx->reget_buffer(avctx, &ctx->pic)) < 0) { av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n"); return ret; } - c->ctx.pic_start = c->pic.data[0] + c->pic.linesize[0] * (avctx->height - 1); - c->ctx.pic_stride = -c->pic.linesize[0]; - c->ctx.keyframe = !arith_get_bit(&acoder); - if (c->ctx.keyframe) { - ff_mss12_codec_reset(&c->ctx); - pal_changed = decode_pal(c, &acoder); - c->pic.key_frame = 1; - c->pic.pict_type = AV_PICTURE_TYPE_I; + c->pal_pic = ctx->pic.data[0] + ctx->pic.linesize[0] * (avctx->height - 1); + c->pal_stride = -ctx->pic.linesize[0]; + c->keyframe = !arith_get_bit(&acoder); + if (c->keyframe) { + ff_mss12_codec_reset(c); + pal_changed = decode_pal(c, &acoder); + ctx->pic.key_frame = 1; + ctx->pic.pict_type = AV_PICTURE_TYPE_I; } else { - if (c->ctx.corrupted) + if (c->corrupted) return AVERROR_INVALIDDATA; - c->pic.key_frame = 0; - c->pic.pict_type = AV_PICTURE_TYPE_P; + ctx->pic.key_frame = 0; + ctx->pic.pict_type = AV_PICTURE_TYPE_P; } - c->ctx.corrupted = ff_mss12_decode_rect(&c->ctx, &acoder, 0, 0, - avctx->width, avctx->height); - if (c->ctx.corrupted) + c->corrupted = ff_mss12_decode_rect(&c->sc[0], &acoder, 0, 0, + avctx->width, avctx->height); + if (c->corrupted) return AVERROR_INVALIDDATA; - memcpy(c->pic.data[1], c->ctx.pal, AVPALETTE_SIZE); - c->pic.palette_has_changed = pal_changed; + memcpy(ctx->pic.data[1], c->pal, AVPALETTE_SIZE); + ctx->pic.palette_has_changed = pal_changed; *data_size = sizeof(AVFrame); - *(AVFrame*)data = c->pic; + *(AVFrame*)data = ctx->pic; /* always report that the buffer was completely consumed */ return buf_size; @@ -219,16 +193,16 @@ static av_cold int mss1_decode_init(AVCodecContext *avctx) c->ctx.avctx = avctx; avctx->coded_frame = &c->pic; - return ff_mss12_decode_init(avctx, 0); + return ff_mss12_decode_init(&c->ctx, 0); } static av_cold int mss1_decode_end(AVCodecContext *avctx) { - MSS1Context * const c = avctx->priv_data; + MSS1Context * const ctx = avctx->priv_data; - if (c->pic.data[0]) - avctx->release_buffer(avctx, &c->pic); - ff_mss12_decode_end(avctx); + if (ctx->pic.data[0]) + avctx->release_buffer(avctx, &ctx->pic); + ff_mss12_decode_end(&ctx->ctx); return 0; } diff --git a/libavcodec/mss12.c b/libavcodec/mss12.c index 8b4f52bf5f..f0f23c0f8e 100644 --- a/libavcodec/mss12.c +++ b/libavcodec/mss12.c @@ -47,12 +47,8 @@ static int model_calc_threshold(Model *m) { int thr; - if (m->thr_weight == -1) { - thr = 2 * m->weights[m->num_syms] - 1; - thr = ((thr >> 1) + 4 * m->cum_prob[0]) / thr; - } else { - thr = m->num_syms * m->thr_weight; - } + thr = 2 * m->weights[m->num_syms] - 1; + thr = ((thr >> 1) + 4 * m->cum_prob[0]) / thr; return FFMIN(thr, 0x3FFF); } @@ -78,7 +74,7 @@ static av_cold void model_init(Model *m, int num_syms, int thr_weight) { m->num_syms = num_syms; m->thr_weight = thr_weight; - m->threshold = model_calc_threshold(m); + m->threshold = num_syms * thr_weight; model_reset(m); } @@ -87,7 +83,7 @@ static void model_rescale_weights(Model *m) int i; int cum_prob; - if (m->thr_weight == -1) + if (m->thr_weight == THRESH_ADAPTIVE) m->threshold = model_calc_threshold(m); while (m->cum_prob[0] > m->threshold) { cum_prob = 0; @@ -129,8 +125,14 @@ static void pixctx_reset(PixContext *ctx) { int i, j, k; - for (i = 0; i < ctx->cache_size; i++) - ctx->cache[i] = i; + if (!ctx->special_initial_cache) + for (i = 0; i < ctx->cache_size; i++) + ctx->cache[i] = i; + else { + ctx->cache[0] = 1; + ctx->cache[1] = 2; + ctx->cache[2] = 4; + } model_reset(&ctx->cache_model); model_reset(&ctx->full_model); @@ -141,27 +143,23 @@ static void pixctx_reset(PixContext *ctx) model_reset(&ctx->sec_models[i][j][k]); } -static av_cold void pixctx_init(PixContext *ctx, int cache_size) +static av_cold void pixctx_init(PixContext *ctx, int cache_size, + int full_model_syms, int special_initial_cache) { int i, j, k; - ctx->cache_size = cache_size + 4; - ctx->num_syms = cache_size; - - for (i = 0; i < ctx->cache_size; i++) - ctx->cache[i] = i; + ctx->cache_size = cache_size + 4; + ctx->num_syms = cache_size; + ctx->special_initial_cache = special_initial_cache; model_init(&ctx->cache_model, ctx->num_syms + 1, THRESH_LOW); - model_init(&ctx->full_model, 256, THRESH_HIGH); + model_init(&ctx->full_model, full_model_syms, THRESH_HIGH); - for (i = 0; i < 4; i++) { - for (j = 0; j < sec_order_sizes[i]; j++) { - for (k = 0; k < 4; k++) { + for (i = 0; i < 4; i++) + for (j = 0; j < sec_order_sizes[i]; j++) + for (k = 0; k < 4; k++) model_init(&ctx->sec_models[i][j][k], 2 + i, i ? THRESH_LOW : THRESH_ADAPTIVE); - } - } - } } static int decode_top_left_pixel(ArithCoder *acoder, PixContext *pctx) @@ -196,7 +194,6 @@ static int decode_pixel(ArithCoder *acoder, PixContext *pctx, if (val < pctx->num_syms) { int idx, j; - idx = 0; for (i = 0; i < pctx->cache_size; i++) { for (j = 0; j < num_ngb; j++) @@ -309,195 +306,288 @@ static int decode_pixel_in_context(ArithCoder *acoder, PixContext *pctx, break; } - pix = acoder->get_model_sym(acoder, &pctx->sec_models[nlen - 1][layer][sub]); + pix = acoder->get_model_sym(acoder, + &pctx->sec_models[nlen - 1][layer][sub]); if (pix < nlen) return ref_pix[pix]; else return decode_pixel(acoder, pctx, ref_pix, nlen); } -static int decode_region(MSS12Context *ctx, ArithCoder *acoder, uint8_t *dst, +static int decode_region(ArithCoder *acoder, uint8_t *dst, uint8_t *rgb_pic, int x, int y, int width, int height, int stride, - PixContext *pctx) + int rgb_stride, PixContext *pctx, const uint32_t *pal) { - int i, j; + int i, j, p; + uint8_t *rgb_dst = rgb_pic + x * 3 + y * rgb_stride; dst += x + y * stride; - dst[0] = decode_top_left_pixel(acoder, pctx); for (j = 0; j < height; j++) { for (i = 0; i < width; i++) { if (!i && !j) - continue; + p = decode_top_left_pixel(acoder, pctx); + else + p = decode_pixel_in_context(acoder, pctx, dst + i, stride, + i, j, width - i - 1); + dst[i] = p; - dst[i] = decode_pixel_in_context(acoder, pctx, dst + i, stride, - i, j, width - i - 1); + if (rgb_pic) + AV_WB24(rgb_dst + i * 3, pal[p]); } - dst += stride; + dst += stride; + rgb_dst += rgb_stride; } return 0; } -static int decode_region_masked(MSS12Context *ctx, ArithCoder *acoder, +static void copy_rectangles(MSS12Context const *c, + int x, int y, int width, int height) +{ + int j; + + if (c->last_rgb_pic) + for (j = y; j < y + height; j++) { + memcpy(c->rgb_pic + j * c->rgb_stride + x * 3, + c->last_rgb_pic + j * c->rgb_stride + x * 3, + width * 3); + memcpy(c->pal_pic + j * c->pal_stride + x, + c->last_pal_pic + j * c->pal_stride + x, + width); + } +} + +static int motion_compensation(MSS12Context const *c, + int x, int y, int width, int height) +{ + if (x + c->mvX < 0 || x + c->mvX + width > c->avctx->width || + y + c->mvY < 0 || y + c->mvY + height > c->avctx->height || + !c->rgb_pic) + return -1; + else { + uint8_t *dst = c->pal_pic + x + y * c->pal_stride; + uint8_t *rgb_dst = c->rgb_pic + x * 3 + y * c->rgb_stride; + uint8_t *src; + uint8_t *rgb_src; + int j; + x += c->mvX; + y += c->mvY; + if (c->last_rgb_pic) { + src = c->last_pal_pic + x + y * c->pal_stride; + rgb_src = c->last_rgb_pic + x * 3 + y * c->rgb_stride; + } else { + src = c->pal_pic + x + y * c->pal_stride; + rgb_src = c->rgb_pic + x * 3 + y * c->rgb_stride; + } + for (j = 0; j < height; j++) { + memmove(dst, src, width); + memmove(rgb_dst, rgb_src, width * 3); + dst += c->pal_stride; + src += c->pal_stride; + rgb_dst += c->rgb_stride; + rgb_src += c->rgb_stride; + } + } + return 0; +} + +static int decode_region_masked(MSS12Context const *c, ArithCoder *acoder, uint8_t *dst, int stride, uint8_t *mask, int mask_stride, int x, int y, int width, int height, PixContext *pctx) { - int i, j; + int i, j, p; + uint8_t *rgb_dst = c->rgb_pic + x * 3 + y * c->rgb_stride; dst += x + y * stride; mask += x + y * mask_stride; - if (mask[0] == 0xFF) - dst[0] = decode_top_left_pixel(acoder, pctx); for (j = 0; j < height; j++) { for (i = 0; i < width; i++) { - if (!i && !j || mask[i] != 0xFF) - continue; + if (c->avctx->err_recognition & AV_EF_EXPLODE && + ( c->rgb_pic && mask[i] != 0x01 && mask[i] != 0x02 && mask[i] != 0x04 || + !c->rgb_pic && mask[i] != 0x80 && mask[i] != 0xFF)) + return -1; - dst[i] = decode_pixel_in_context(acoder, pctx, dst + i, stride, - i, j, width - i - 1); + if (mask[i] == 0x02) { + copy_rectangles(c, x + i, y + j, 1, 1); + } else if (mask[i] == 0x04) { + if (motion_compensation(c, x + i, y + j, 1, 1)) + return -1; + } else if (mask[i] != 0x80) { + if (!i && !j) + p = decode_top_left_pixel(acoder, pctx); + else + p = decode_pixel_in_context(acoder, pctx, dst + i, stride, + i, j, width - i - 1); + dst[i] = p; + if (c->rgb_pic) + AV_WB24(rgb_dst + i * 3, c->pal[p]); + } } - dst += stride; - mask += mask_stride; + dst += stride; + mask += mask_stride; + rgb_dst += c->rgb_stride; } return 0; } -static av_cold void codec_init(MSS12Context *ctx) +static av_cold void codec_init(MSS12Context *c, int version) { - model_init(&ctx->intra_region, 2, THRESH_ADAPTIVE); - model_init(&ctx->inter_region, 2, THRESH_ADAPTIVE); - model_init(&ctx->split_mode, 3, THRESH_HIGH); - model_init(&ctx->edge_mode, 2, THRESH_HIGH); - model_init(&ctx->pivot, 3, THRESH_LOW); - pixctx_init(&ctx->intra_pix_ctx, 8); - pixctx_init(&ctx->inter_pix_ctx, 2); - ctx->corrupted = 1; + int i; + for (i = 0; i < (c->slice_split ? 2 : 1); i++) { + c->sc[i].c = c; + model_init(&c->sc[i].intra_region, 2, THRESH_ADAPTIVE); + model_init(&c->sc[i].inter_region, 2, THRESH_ADAPTIVE); + model_init(&c->sc[i].split_mode, 3, THRESH_HIGH); + model_init(&c->sc[i].edge_mode, 2, THRESH_HIGH); + model_init(&c->sc[i].pivot, 3, THRESH_LOW); + + pixctx_init(&c->sc[i].intra_pix_ctx, 8, c->full_model_syms, 0); + + pixctx_init(&c->sc[i].inter_pix_ctx, version ? 3 : 2, + c->full_model_syms, version ? 1 : 0); + } + c->corrupted = 1; } -void ff_mss12_codec_reset(MSS12Context *ctx) +void ff_mss12_codec_reset(MSS12Context *c) { - model_reset(&ctx->intra_region); - model_reset(&ctx->inter_region); - model_reset(&ctx->split_mode); - model_reset(&ctx->edge_mode); - model_reset(&ctx->pivot); - pixctx_reset(&ctx->intra_pix_ctx); - pixctx_reset(&ctx->inter_pix_ctx); + int i; + for (i = 0; i < (c->slice_split ? 2 : 1); i++) { + model_reset(&c->sc[i].intra_region); + model_reset(&c->sc[i].inter_region); + model_reset(&c->sc[i].split_mode); + model_reset(&c->sc[i].edge_mode); + model_reset(&c->sc[i].pivot); + pixctx_reset(&c->sc[i].intra_pix_ctx); + pixctx_reset(&c->sc[i].inter_pix_ctx); + } - ctx->corrupted = 0; + c->corrupted = 0; } -static int decode_pivot(MSS12Context *ctx, ArithCoder *acoder, int base) +static int decode_pivot(SliceContext *sc, ArithCoder *acoder, int base) { int val, inv; - inv = acoder->get_model_sym(acoder, &ctx->edge_mode); - val = acoder->get_model_sym(acoder, &ctx->pivot) + 1; + inv = acoder->get_model_sym(acoder, &sc->edge_mode); + val = acoder->get_model_sym(acoder, &sc->pivot) + 1; if (val > 2) { - if ((base + 1) / 2 - 2 <= 0) { - ctx->corrupted = 1; - return 0; - } + if ((base + 1) / 2 - 2 <= 0) + return -1; + val = acoder->get_number(acoder, (base + 1) / 2 - 2) + 3; } - if (val == base) { - ctx->corrupted = 1; - return 0; - } + if (val >= base) + return -1; return inv ? base - val : val; } -static int decode_region_intra(MSS12Context *ctx, ArithCoder *acoder, +static int decode_region_intra(SliceContext *sc, ArithCoder *acoder, int x, int y, int width, int height) { + MSS12Context const *c = sc->c; int mode; - mode = acoder->get_model_sym(acoder, &ctx->intra_region); + mode = acoder->get_model_sym(acoder, &sc->intra_region); if (!mode) { - int i, pix; - int stride = ctx->pic_stride; - uint8_t *dst = ctx->pic_start + x + y * stride; + int i, j, pix, rgb_pix; + int stride = c->pal_stride; + int rgb_stride = c->rgb_stride; + uint8_t *dst = c->pal_pic + x + y * stride; + uint8_t *rgb_dst = c->rgb_pic + x * 3 + y * rgb_stride; - pix = decode_top_left_pixel(acoder, &ctx->intra_pix_ctx); - for (i = 0; i < height; i++, dst += stride) + pix = decode_top_left_pixel(acoder, &sc->intra_pix_ctx); + rgb_pix = c->pal[pix]; + for (i = 0; i < height; i++, dst += stride, rgb_dst += rgb_stride) { memset(dst, pix, width); - } else { - return decode_region(ctx, acoder, ctx->pic_start, - x, y, width, height, ctx->pic_stride, - &ctx->intra_pix_ctx); - } - - return 0; -} - -static int decode_region_inter(MSS12Context *ctx, ArithCoder *acoder, - int x, int y, int width, int height) -{ - int mode; - - mode = acoder->get_model_sym(acoder, &ctx->inter_region); - - if (!mode) { - mode = decode_top_left_pixel(acoder, &ctx->inter_pix_ctx); - if (mode != 0xFF) { - return 0; - } else { - return decode_region_intra(ctx, acoder, x, y, width, height); + if (c->rgb_pic) + for (j = 0; j < width * 3; j += 3) + AV_WB24(rgb_dst + j, rgb_pix); } } else { - if (decode_region(ctx, acoder, ctx->mask, - x, y, width, height, ctx->mask_linesize, - &ctx->inter_pix_ctx) < 0) - return -1; - return decode_region_masked(ctx, acoder, ctx->pic_start, - ctx->pic_stride, ctx->mask, - ctx->mask_linesize, - x, y, width, height, - &ctx->intra_pix_ctx); + return decode_region(acoder, c->pal_pic, c->rgb_pic, + x, y, width, height, c->pal_stride, c->rgb_stride, + &sc->intra_pix_ctx, &c->pal[0]); } return 0; } -int ff_mss12_decode_rect(MSS12Context *ctx, ArithCoder *acoder, +static int decode_region_inter(SliceContext *sc, ArithCoder *acoder, + int x, int y, int width, int height) +{ + MSS12Context const *c = sc->c; + int mode; + + mode = acoder->get_model_sym(acoder, &sc->inter_region); + + if (!mode) { + mode = decode_top_left_pixel(acoder, &sc->inter_pix_ctx); + + if (c->avctx->err_recognition & AV_EF_EXPLODE && + ( c->rgb_pic && mode != 0x01 && mode != 0x02 && mode != 0x04 || + !c->rgb_pic && mode != 0x80 && mode != 0xFF)) + return -1; + + if (mode == 0x02) + copy_rectangles(c, x, y, width, height); + else if (mode == 0x04) + return motion_compensation(c, x, y, width, height); + else if (mode != 0x80) + return decode_region_intra(sc, acoder, x, y, width, height); + } else { + if (decode_region(acoder, c->mask, NULL, + x, y, width, height, c->mask_stride, 0, + &sc->inter_pix_ctx, &c->pal[0]) < 0) + return -1; + return decode_region_masked(c, acoder, c->pal_pic, + c->pal_stride, c->mask, + c->mask_stride, + x, y, width, height, + &sc->intra_pix_ctx); + } + + return 0; +} + +int ff_mss12_decode_rect(SliceContext *sc, ArithCoder *acoder, int x, int y, int width, int height) { int mode, pivot; - if (ctx->corrupted) - return -1; - - mode = acoder->get_model_sym(acoder, &ctx->split_mode); + mode = acoder->get_model_sym(acoder, &sc->split_mode); switch (mode) { case SPLIT_VERT: - pivot = decode_pivot(ctx, acoder, height); - if (ff_mss12_decode_rect(ctx, acoder, x, y, width, pivot)) + if ((pivot = decode_pivot(sc, acoder, height)) < 1) return -1; - if (ff_mss12_decode_rect(ctx, acoder, x, y + pivot, width, height - pivot)) + if (ff_mss12_decode_rect(sc, acoder, x, y, width, pivot)) + return -1; + if (ff_mss12_decode_rect(sc, acoder, x, y + pivot, width, height - pivot)) return -1; break; case SPLIT_HOR: - pivot = decode_pivot(ctx, acoder, width); - if (ff_mss12_decode_rect(ctx, acoder, x, y, pivot, height)) + if ((pivot = decode_pivot(sc, acoder, width)) < 1) return -1; - if (ff_mss12_decode_rect(ctx, acoder, x + pivot, y, width - pivot, height)) + if (ff_mss12_decode_rect(sc, acoder, x, y, pivot, height)) + return -1; + if (ff_mss12_decode_rect(sc, acoder, x + pivot, y, width - pivot, height)) return -1; break; case SPLIT_NONE: - if (ctx->keyframe) - return decode_region_intra(ctx, acoder, x, y, width, height); + if (sc->c->keyframe) + return decode_region_intra(sc, acoder, x, y, width, height); else - return decode_region_inter(ctx, acoder, x, y, width, height); + return decode_region_inter(sc, acoder, x, y, width, height); default: return -1; } @@ -505,13 +595,11 @@ int ff_mss12_decode_rect(MSS12Context *ctx, ArithCoder *acoder, return 0; } -av_cold int ff_mss12_decode_init(AVCodecContext *avctx, int version) +av_cold int ff_mss12_decode_init(MSS12Context *c, int version) { - MSS12Context * const c = avctx->priv_data; + AVCodecContext *avctx = c->avctx; int i; - c->avctx = avctx; - if (avctx->extradata_size < 52 + 256 * 3) { av_log(avctx, AV_LOG_ERROR, "Insufficient extradata size %d\n", avctx->extradata_size); @@ -526,9 +614,23 @@ av_cold int ff_mss12_decode_init(AVCodecContext *avctx, int version) return AVERROR_INVALIDDATA; } + avctx->coded_width = AV_RB32(avctx->extradata + 20); + avctx->coded_height = AV_RB32(avctx->extradata + 24); + if (avctx->coded_width > 4096 || avctx->coded_height > 4096) { + av_log(avctx, AV_LOG_ERROR, "Frame dimensions %dx%d too large", + avctx->coded_width, avctx->coded_height); + return AVERROR_INVALIDDATA; + } + av_log(avctx, AV_LOG_DEBUG, "Encoder version %d.%d\n", AV_RB32(avctx->extradata + 4), AV_RB32(avctx->extradata + 8)); - c->free_colours = AV_RB32(avctx->extradata + 48); + if (version != AV_RB32(avctx->extradata + 4) > 1) { + av_log(avctx, AV_LOG_ERROR, + "Header version doesn't match codec tag\n"); + return -1; + } + + c->free_colours = AV_RB32(avctx->extradata + 48); if ((unsigned)c->free_colours > 256) { av_log(avctx, AV_LOG_ERROR, "Incorrect number of changeable palette entries: %d\n", @@ -536,8 +638,6 @@ av_cold int ff_mss12_decode_init(AVCodecContext *avctx, int version) return AVERROR_INVALIDDATA; } av_log(avctx, AV_LOG_DEBUG, "%d free colour(s)\n", c->free_colours); - avctx->coded_width = AV_RB32(avctx->extradata + 20); - avctx->coded_height = AV_RB32(avctx->extradata + 24); av_log(avctx, AV_LOG_DEBUG, "Display dimensions %dx%d\n", AV_RB32(avctx->extradata + 12), AV_RB32(avctx->extradata + 16)); @@ -554,27 +654,53 @@ av_cold int ff_mss12_decode_init(AVCodecContext *avctx, int version) av_log(avctx, AV_LOG_DEBUG, "Max. seek time %g ms\n", av_int2float(AV_RB32(avctx->extradata + 44))); + if (version) { + if (avctx->extradata_size < 60 + 256 * 3) { + av_log(avctx, AV_LOG_ERROR, + "Insufficient extradata size %d for v2\n", + avctx->extradata_size); + return AVERROR_INVALIDDATA; + } + + c->slice_split = AV_RB32(avctx->extradata + 52); + av_log(avctx, AV_LOG_DEBUG, "Slice split %d\n", c->slice_split); + + c->full_model_syms = AV_RB32(avctx->extradata + 56); + if (c->full_model_syms < 2 || c->full_model_syms > 256) { + av_log(avctx, AV_LOG_ERROR, + "Incorrect number of used colours %d\n", + c->full_model_syms); + return AVERROR_INVALIDDATA; + } + av_log(avctx, AV_LOG_DEBUG, "Used colours %d\n", + c->full_model_syms); + } else { + c->slice_split = 0; + c->full_model_syms = 256; + } + for (i = 0; i < 256; i++) - c->pal[i] = AV_RB24(avctx->extradata + 52 + i * 3); + c->pal[i] = AV_RB24(avctx->extradata + 52 + + (version ? 8 : 0) + i * 3); - avctx->pix_fmt = PIX_FMT_PAL8; - - c->mask_linesize = FFALIGN(avctx->width, 16); - c->mask = av_malloc(c->mask_linesize * avctx->height); + c->mask_stride = FFALIGN(avctx->width, 16); + c->mask = av_malloc(c->mask_stride * avctx->height); if (!c->mask) { av_log(avctx, AV_LOG_ERROR, "Cannot allocate mask plane\n"); return AVERROR(ENOMEM); } - codec_init(c); + avctx->pix_fmt = version ? c->free_colours == 127 ? PIX_FMT_RGB555 + : PIX_FMT_RGB24 + : PIX_FMT_PAL8; + + codec_init(c, version); return 0; } -av_cold int ff_mss12_decode_end(AVCodecContext *avctx) +av_cold int ff_mss12_decode_end(MSS12Context *c) { - MSS12Context * const c = avctx->priv_data; - av_freep(&c->mask); return 0; diff --git a/libavcodec/mss12.h b/libavcodec/mss12.h index 383d86c904..93d1f6146f 100644 --- a/libavcodec/mss12.h +++ b/libavcodec/mss12.h @@ -26,8 +26,10 @@ #ifndef AVCODEC_MSS12_H #define AVCODEC_MSS12_H +#include "libavutil/intreadwrite.h" #include "avcodec.h" #include "get_bits.h" +#include "bytestream.h" #define MODEL_MIN_SYMS 2 #define MODEL_MAX_SYMS 256 @@ -46,7 +48,10 @@ typedef struct Model { typedef struct ArithCoder { int low, high, value; - GetBitContext *gb; + union { + GetBitContext *gb; + GetByteContext *gB; + } gbc; int (*get_model_sym)(struct ArithCoder *c, Model *m); int (*get_number) (struct ArithCoder *c, int n); } ArithCoder; @@ -56,28 +61,77 @@ typedef struct PixContext { uint8_t cache[12]; Model cache_model, full_model; Model sec_models[4][8][4]; + int special_initial_cache; } PixContext; +struct MSS12Context; + +typedef struct SliceContext { + struct MSS12Context *c; + Model intra_region, inter_region; + Model pivot, edge_mode, split_mode; + PixContext intra_pix_ctx, inter_pix_ctx; +} SliceContext; + typedef struct MSS12Context { AVCodecContext *avctx; - uint8_t *pic_start; - int pic_stride; - uint8_t *mask; - int mask_linesize; uint32_t pal[256]; + uint8_t *pal_pic; + uint8_t *last_pal_pic; + int pal_stride; + uint8_t *mask; + int mask_stride; + uint8_t *rgb_pic; + uint8_t *last_rgb_pic; + int rgb_stride; int free_colours; int keyframe; Model intra_region, inter_region; Model pivot, edge_mode, split_mode; PixContext intra_pix_ctx, inter_pix_ctx; + int mvX, mvY; int corrupted; + int slice_split; + int full_model_syms; + SliceContext sc[2]; } MSS12Context; -int ff_mss12_decode_rect(MSS12Context *ctx, ArithCoder *acoder, +int ff_mss12_decode_rect(SliceContext *ctx, ArithCoder *acoder, int x, int y, int width, int height); void ff_mss12_model_update(Model *m, int val); void ff_mss12_codec_reset(MSS12Context *ctx); -av_cold int ff_mss12_decode_init(AVCodecContext *avctx, int version); -av_cold int ff_mss12_decode_end(AVCodecContext *avctx); +av_cold int ff_mss12_decode_init(MSS12Context *ctx, int version); +av_cold int ff_mss12_decode_end(MSS12Context *ctx); + +#define ARITH_GET_BIT(VERSION) \ +static int arith ## VERSION ## _get_bit(ArithCoder *c) \ +{ \ + int range = c->high - c->low + 1; \ + int bit = (((c->value - c->low) << 1) + 1) / range; \ + \ + if (bit) \ + c->low += range >> 1; \ + else \ + c->high = c->low + (range >> 1) - 1; \ + \ + arith ## VERSION ## _normalise(c); \ + \ + return bit; \ +} + +#define ARITH_GET_MODEL_SYM(VERSION) \ +static int arith ## VERSION ## _get_model_sym(ArithCoder *c, Model *m) \ +{ \ + int idx, val; \ + \ + idx = arith ## VERSION ## _get_prob(c, m->cum_prob); \ + \ + val = m->idx2sym[idx]; \ + ff_mss12_model_update(m, idx); \ + \ + arith ## VERSION ## _normalise(c); \ + \ + return val; \ +} #endif /* AVCODEC_MSS12_H */ diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c new file mode 100644 index 0000000000..c0c47dc8d3 --- /dev/null +++ b/libavcodec/mss2.c @@ -0,0 +1,860 @@ +/* + * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder + */ + +#include "libavutil/avassert.h" +#include "msmpeg4data.h" +#include "vc1.h" +#include "mss12.h" +#include "mss2dsp.h" + +typedef struct MSS2Context { + VC1Context v; + int split_position; + AVFrame pic; + AVFrame last_pic; + MSS12Context c; + MSS2DSPContext dsp; + SliceContext sc[2]; +} MSS2Context; + +static void arith2_normalise(ArithCoder *c) +{ + while ((c->high >> 15) - (c->low >> 15) < 2) { + if ((c->low ^ c->high) & 0x10000) { + c->high ^= 0x8000; + c->value ^= 0x8000; + c->low ^= 0x8000; + } + c->high = c->high << 8 & 0xFFFFFF | 0xFF; + c->value = c->value << 8 & 0xFFFFFF | bytestream2_get_byte(c->gbc.gB); + c->low = c->low << 8 & 0xFFFFFF; + } +} + +ARITH_GET_BIT(2) + +/* L. Stuiver and A. Moffat: "Piecewise Integer Mapping for Arithmetic Coding." + * In Proc. 8th Data Compression Conference (DCC '98), pp. 3-12, Mar. 1998 */ + +static int arith2_get_scaled_value(int value, int n, int range) +{ + int split = (n << 1) - range; + + if (value > split) + return split + (value - split >> 1); + else + return value; +} + +static void arith2_rescale_interval(ArithCoder *c, int range, + int low, int high, int n) +{ + int split = (n << 1) - range; + + if (high > split) + c->high = split + (high - split << 1); + else + c->high = high; + + c->high += c->low - 1; + + if (low > split) + c->low += split + (low - split << 1); + else + c->low += low; +} + +static int arith2_get_number(ArithCoder *c, int n) +{ + int range = c->high - c->low + 1; + int scale = av_log2(range) - av_log2(n); + int val; + + if (n << scale > range) + scale--; + + n <<= scale; + + val = arith2_get_scaled_value(c->value - c->low, n, range) >> scale; + + arith2_rescale_interval(c, range, val << scale, (val + 1) << scale, n); + + arith2_normalise(c); + + return val; +} + +static int arith2_get_prob(ArithCoder *c, int *probs) +{ + int range = c->high - c->low + 1, n = *probs; + int scale = av_log2(range) - av_log2(n); + int i = 0, val; + + if (n << scale > range) + scale--; + + n <<= scale; + + val = arith2_get_scaled_value(c->value - c->low, n, range) >> scale; + while (probs[++i] > val) ; + + arith2_rescale_interval(c, range, + probs[i] << scale, probs[i - 1] << scale, n); + + return i; +} + +ARITH_GET_MODEL_SYM(2) + +static int arith2_get_consumed_bytes(ArithCoder *c) +{ + int diff = (c->high >> 16) - (c->low >> 16); + int bp = bytestream2_tell(c->gbc.gB) - 3 << 3; + int bits = 1; + + while (!(diff & 0x80)) { + bits++; + diff <<= 1; + } + + return (bits + bp + 7 >> 3) + ((c->low >> 16) + 1 == c->high >> 16); +} + +static void arith2_init(ArithCoder *c, GetByteContext *gB) +{ + c->low = 0; + c->high = 0xFFFFFF; + c->value = bytestream2_get_be24(gB); + c->gbc.gB = gB; + c->get_model_sym = arith2_get_model_sym; + c->get_number = arith2_get_number; +} + +static int decode_pal_v2(MSS12Context *ctx, const uint8_t *buf, int buf_size) +{ + int i, ncol; + uint32_t *pal = ctx->pal + 256 - ctx->free_colours; + + if (!ctx->free_colours) + return 0; + + ncol = *buf++; + if (buf_size < 2 + ncol * 3) + return -1; + for (i = 0; i < ncol; i++) + *pal++ = AV_RB24(buf + 3 * i); + + return 1 + ncol * 3; +} + +static int decode_555(GetByteContext *gB, uint16_t *dst, int stride, + int keyframe, int w, int h) +{ + int last_symbol = 0, repeat = 0, prev_avail = 0; + + if (!keyframe) { + int x, y, endx, endy, t; + +#define READ_PAIR(a, b) \ + a = bytestream2_get_byte(gB) << 4; \ + t = bytestream2_get_byte(gB); \ + a |= t >> 4; \ + b = (t & 0xF) << 8; \ + b |= bytestream2_get_byte(gB); \ + + READ_PAIR(x, endx) + READ_PAIR(y, endy) + + if (endx >= w || endy >= h || x > endx || y > endy) + return -1; + dst += x + stride * y; + w = endx - x + 1; + h = endy - y + 1; + if (y) + prev_avail = 1; + } + + do { + uint16_t *p = dst; + do { + if (repeat-- < 1) { + int b = bytestream2_get_byte(gB); + if (b < 128) + last_symbol = b << 8 | bytestream2_get_byte(gB); + else if (b > 129) { + repeat = 0; + while (b-- > 130) + repeat = (repeat << 8) + bytestream2_get_byte(gB) + 1; + if (last_symbol == -2) { + int skip = FFMIN((unsigned)repeat, dst + w - p); + repeat -= skip; + p += skip; + } + } else + last_symbol = 127 - b; + } + if (last_symbol >= 0) + *p = last_symbol; + else if (last_symbol == -1 && prev_avail) + *p = *(p - stride); + } while (++p < dst + w); + dst += stride; + prev_avail = 1; + } while (--h); + + return 0; +} + +static int decode_rle(GetBitContext *gb, uint8_t *pal_dst, int pal_stride, + uint8_t *rgb_dst, int rgb_stride, uint32_t *pal, + int keyframe, int kf_slipt, int slice, int w, int h) +{ + uint8_t bits[270] = { 0 }; + uint32_t codes[270]; + VLC vlc; + + int current_length = 0, read_codes = 0, next_code = 0, current_codes = 0; + int remaining_codes, surplus_codes, i; + + const int alphabet_size = 270 - keyframe; + + int last_symbol = 0, repeat = 0, prev_avail = 0; + + if (!keyframe) { + int x, y, clipw, cliph; + + x = get_bits(gb, 12); + y = get_bits(gb, 12); + clipw = get_bits(gb, 12) + 1; + cliph = get_bits(gb, 12) + 1; + + if (x + clipw > w || y + cliph > h) + return AVERROR_INVALIDDATA; + pal_dst += pal_stride * y + x; + rgb_dst += rgb_stride * y + x * 3; + w = clipw; + h = cliph; + if (y) + prev_avail = 1; + } else { + if (slice > 0) { + pal_dst += pal_stride * kf_slipt; + rgb_dst += rgb_stride * kf_slipt; + prev_avail = 1; + h -= kf_slipt; + } else + h = kf_slipt; + } + + /* read explicit codes */ + do { + while (current_codes--) { + int symbol = get_bits(gb, 8); + if (symbol >= 204 - keyframe) + symbol += 14 - keyframe; + else if (symbol > 189) + symbol = get_bits1(gb) + (symbol << 1) - 190; + if (bits[symbol]) + return AVERROR_INVALIDDATA; + bits[symbol] = current_length; + codes[symbol] = next_code++; + read_codes++; + } + current_length++; + next_code <<= 1; + remaining_codes = (1 << current_length) - next_code; + current_codes = get_bits(gb, av_ceil_log2(remaining_codes + 1)); + if (current_length > 22 || current_codes > remaining_codes) + return AVERROR_INVALIDDATA; + } while (current_codes != remaining_codes); + + remaining_codes = alphabet_size - read_codes; + + /* determine the minimum length to fit the rest of the alphabet */ + while ((surplus_codes = (2 << current_length) - + (next_code << 1) - remaining_codes) < 0) { + current_length++; + next_code <<= 1; + } + + /* add the rest of the symbols lexicographically */ + for (i = 0; i < alphabet_size; i++) + if (!bits[i]) { + if (surplus_codes-- == 0) { + current_length++; + next_code <<= 1; + } + bits[i] = current_length; + codes[i] = next_code++; + } + + if (next_code != 1 << current_length) + return AVERROR_INVALIDDATA; + + if (i = init_vlc(&vlc, 9, alphabet_size, bits, 1, 1, codes, 4, 4, 0)) + return i; + + /* frame decode */ + do { + uint8_t *pp = pal_dst; + uint8_t *rp = rgb_dst; + do { + if (repeat-- < 1) { + int b = get_vlc2(gb, vlc.table, 9, 3); + if (b < 256) + last_symbol = b; + else if (b < 268) { + b -= 256; + if (b == 11) + b = get_bits(gb, 4) + 10; + + if (!b) + repeat = 0; + else + repeat = get_bits(gb, b); + + while (b--) + repeat += 1 << b; + + if (last_symbol == -2) { + int skip = FFMIN(repeat, pal_dst + w - pp); + repeat -= skip; + pp += skip; + rp += skip * 3; + } + } else + last_symbol = 267 - b; + } + if (last_symbol >= 0) { + *pp = last_symbol; + AV_WB24(rp, pal[last_symbol]); + } else if (last_symbol == -1 && prev_avail) { + *pp = *(pp - pal_stride); + memcpy(rp, rp - rgb_stride, 3); + } + rp += 3; + } while (++pp < pal_dst + w); + pal_dst += pal_stride; + rgb_dst += rgb_stride; + prev_avail = 1; + } while (--h); + + ff_free_vlc(&vlc); + return 0; +} + +static int decode_wmv9(AVCodecContext *avctx, const uint8_t *buf, int buf_size, + int x, int y, int w, int h, int wmv9_mask) +{ + MSS2Context *ctx = avctx->priv_data; + MSS12Context *c = &ctx->c; + VC1Context *v = avctx->priv_data; + MpegEncContext *s = &v->s; + AVFrame *f; + + ff_mpeg_flush(avctx); + + if (s->current_picture_ptr == NULL || s->current_picture_ptr->f.data[0]) { + int i = ff_find_unused_picture(s, 0); + if (i < 0) + return -1; + s->current_picture_ptr = &s->picture[i]; + } + + init_get_bits(&s->gb, buf, buf_size * 8); + + s->loop_filter = avctx->skip_loop_filter < AVDISCARD_ALL; + + if (ff_vc1_parse_frame_header(v, &s->gb) == -1) { + av_log(v->s.avctx, AV_LOG_ERROR, "header error\n"); + return AVERROR_INVALIDDATA; + } + + if (s->pict_type != AV_PICTURE_TYPE_I) { + av_log(v->s.avctx, AV_LOG_ERROR, "expected I-frame\n"); + return AVERROR_INVALIDDATA; + } + + avctx->pix_fmt = PIX_FMT_YUV420P; + + if (ff_MPV_frame_start(s, avctx) < 0) { + av_log(v->s.avctx, AV_LOG_ERROR, "ff_MPV_frame_start error\n"); + avctx->pix_fmt = PIX_FMT_RGB24; + return -1; + } + + ff_er_frame_start(s); + + v->bits = buf_size * 8; + + v->end_mb_x = (w + 15) >> 4; + s->end_mb_y = (h + 15) >> 4; + if (v->respic & 1) + v->end_mb_x = v->end_mb_x + 1 >> 1; + if (v->respic & 2) + s->end_mb_y = s->end_mb_y + 1 >> 1; + + ff_vc1_decode_blocks(v); + + ff_er_frame_end(s); + + ff_MPV_frame_end(s); + + f = &s->current_picture.f; + + if (v->respic == 3) { + ctx->dsp.upsample_plane(f->data[0], f->linesize[0], w, h); + ctx->dsp.upsample_plane(f->data[1], f->linesize[1], w >> 1, h >> 1); + ctx->dsp.upsample_plane(f->data[2], f->linesize[2], w >> 1, h >> 1); + } else if (v->respic) + av_log_ask_for_sample(v->s.avctx, + "Asymmetric WMV9 rectangle subsampling\n"); + + av_assert0(f->linesize[1] == f->linesize[2]); + + if (wmv9_mask != -1) + ctx->dsp.mss2_blit_wmv9_masked(c->rgb_pic + y * c->rgb_stride + x * 3, + c->rgb_stride, wmv9_mask, + c->pal_pic + y * c->pal_stride + x, + c->pal_stride, + f->data[0], f->linesize[0], + f->data[1], f->data[2], f->linesize[1], + w, h); + else + ctx->dsp.mss2_blit_wmv9(c->rgb_pic + y * c->rgb_stride + x * 3, + c->rgb_stride, + f->data[0], f->linesize[0], + f->data[1], f->data[2], f->linesize[1], + w, h); + + avctx->pix_fmt = PIX_FMT_RGB24; + + return 0; +} + +typedef struct Rectangle { + int coded, x, y, w, h; +} Rectangle; + +#define MAX_WMV9_RECTANGLES 20 +#define ARITH2_PADDING 2 + +static int mss2_decode_frame(AVCodecContext *avctx, void *data, int *data_size, + AVPacket *avpkt) +{ + const uint8_t *buf = avpkt->data; + int buf_size = avpkt->size; + MSS2Context *ctx = avctx->priv_data; + MSS12Context *c = &ctx->c; + GetBitContext gb; + GetByteContext gB; + ArithCoder acoder; + + int keyframe, has_wmv9, has_mv, is_rle, is_555, ret; + + Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r; + int used_rects = 0, i, implicit_rect, av_uninit(wmv9_mask); + + av_assert0(FF_INPUT_BUFFER_PADDING_SIZE >= + ARITH2_PADDING + (MIN_CACHE_BITS + 7) / 8); + + init_get_bits(&gb, buf, buf_size * 8); + + if (keyframe = get_bits1(&gb)) + skip_bits(&gb, 7); + has_wmv9 = get_bits1(&gb); + has_mv = keyframe ? 0 : get_bits1(&gb); + is_rle = get_bits1(&gb); + is_555 = is_rle && get_bits1(&gb); + if (c->slice_split > 0) + ctx->split_position = c->slice_split; + else if (c->slice_split < 0) { + if (get_bits1(&gb)) { + if (get_bits1(&gb)) { + if (get_bits1(&gb)) + ctx->split_position = get_bits(&gb, 16); + else + ctx->split_position = get_bits(&gb, 12); + } else + ctx->split_position = get_bits(&gb, 8) << 4; + } else { + if (keyframe) + ctx->split_position = avctx->height / 2; + } + } else + ctx->split_position = avctx->height; + + if (c->slice_split && (ctx->split_position < 1 - is_555 || + ctx->split_position > avctx->height - 1)) + return AVERROR_INVALIDDATA; + + align_get_bits(&gb); + buf += get_bits_count(&gb) >> 3; + buf_size -= get_bits_count(&gb) >> 3; + + if (buf_size < 1) + return AVERROR_INVALIDDATA; + + if (is_555 && (has_wmv9 || has_mv || c->slice_split && ctx->split_position)) + return AVERROR_INVALIDDATA; + + avctx->pix_fmt = is_555 ? PIX_FMT_RGB555 : PIX_FMT_RGB24; + if (ctx->pic.data[0] && ctx->pic.format != avctx->pix_fmt) + avctx->release_buffer(avctx, &ctx->pic); + + if (has_wmv9) { + bytestream2_init(&gB, buf, buf_size + ARITH2_PADDING); + arith2_init(&acoder, &gB); + + implicit_rect = !arith2_get_bit(&acoder); + + while (arith2_get_bit(&acoder)) { + if (used_rects == MAX_WMV9_RECTANGLES) + return AVERROR_INVALIDDATA; + r = &wmv9rects[used_rects]; + if (!used_rects) + r->x = arith2_get_number(&acoder, avctx->width); + else + r->x = arith2_get_number(&acoder, avctx->width - + wmv9rects[used_rects - 1].x) + + wmv9rects[used_rects - 1].x; + r->y = arith2_get_number(&acoder, avctx->height); + r->w = arith2_get_number(&acoder, avctx->width - r->x) + 1; + r->h = arith2_get_number(&acoder, avctx->height - r->y) + 1; + used_rects++; + } + + if (implicit_rect && used_rects) { + av_log(avctx, AV_LOG_ERROR, "implicit_rect && used_rects > 0\n"); + return AVERROR_INVALIDDATA; + } + + if (implicit_rect) { + wmv9rects[0].x = 0; + wmv9rects[0].y = 0; + wmv9rects[0].w = avctx->width; + wmv9rects[0].h = avctx->height; + + used_rects = 1; + } + for (i = 0; i < used_rects; i++) { + if (!implicit_rect && arith2_get_bit(&acoder)) { + av_log(avctx, AV_LOG_ERROR, "Unexpected grandchildren\n"); + return AVERROR_INVALIDDATA; + } + if (!i) { + wmv9_mask = arith2_get_bit(&acoder) - 1; + if (!wmv9_mask) + wmv9_mask = arith2_get_number(&acoder, 256); + } + wmv9rects[i].coded = arith2_get_number(&acoder, 2); + } + + buf += arith2_get_consumed_bytes(&acoder); + buf_size -= arith2_get_consumed_bytes(&acoder); + if (buf_size < 1) + return AVERROR_INVALIDDATA; + } + + c->mvX = c->mvY = 0; + if (keyframe && !is_555) { + if ((i = decode_pal_v2(c, buf, buf_size)) < 0) + return AVERROR_INVALIDDATA; + buf += i; + buf_size -= i; + } else if (has_mv) { + buf += 4; + buf_size -= 4; + if (buf_size < 1) + return AVERROR_INVALIDDATA; + c->mvX = AV_RB16(buf - 4) - avctx->width; + c->mvY = AV_RB16(buf - 2) - avctx->height; + } + + if (c->mvX < 0 || c->mvY < 0) { + FFSWAP(AVFrame, ctx->pic, ctx->last_pic); + FFSWAP(uint8_t *, c->pal_pic, c->last_pal_pic); + + if (ctx->pic.data[0]) + avctx->release_buffer(avctx, &ctx->pic); + + ctx->pic.reference = 3; + ctx->pic.buffer_hints = FF_BUFFER_HINTS_VALID | + FF_BUFFER_HINTS_READABLE | + FF_BUFFER_HINTS_PRESERVE | + FF_BUFFER_HINTS_REUSABLE; + + if ((ret = avctx->get_buffer(avctx, &ctx->pic)) < 0) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return ret; + } + + if (ctx->last_pic.data[0]) { + av_assert0(ctx->pic.linesize[0] == ctx->last_pic.linesize[0]); + c->last_rgb_pic = ctx->last_pic.data[0] + + ctx->last_pic.linesize[0] * (avctx->height - 1); + } else { + av_log(avctx, AV_LOG_ERROR, "Missing keyframe\n"); + return -1; + } + } else { + if (ctx->last_pic.data[0]) + avctx->release_buffer(avctx, &ctx->last_pic); + + ctx->pic.reference = 3; + ctx->pic.buffer_hints = FF_BUFFER_HINTS_VALID | + FF_BUFFER_HINTS_READABLE | + FF_BUFFER_HINTS_PRESERVE | + FF_BUFFER_HINTS_REUSABLE; + + if ((ret = avctx->reget_buffer(avctx, &ctx->pic)) < 0) { + av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n"); + return ret; + } + + c->last_rgb_pic = NULL; + } + c->rgb_pic = ctx->pic.data[0] + + ctx->pic.linesize[0] * (avctx->height - 1); + c->rgb_stride = -ctx->pic.linesize[0]; + + ctx->pic.key_frame = keyframe; + ctx->pic.pict_type = keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P; + + if (is_555) { + bytestream2_init(&gB, buf, buf_size); + + if (decode_555(&gB, (uint16_t *)c->rgb_pic, c->rgb_stride >> 1, + keyframe, avctx->width, avctx->height)) + return AVERROR_INVALIDDATA; + + buf_size -= bytestream2_tell(&gB); + } else if (is_rle) { + init_get_bits(&gb, buf, buf_size * 8); + if (ret = decode_rle(&gb, c->pal_pic, c->pal_stride, + c->rgb_pic, c->rgb_stride, c->pal, keyframe, + ctx->split_position, 0, + avctx->width, avctx->height)) + return ret; + align_get_bits(&gb); + + if (c->slice_split) + if (ret = decode_rle(&gb, c->pal_pic, c->pal_stride, + c->rgb_pic, c->rgb_stride, c->pal, keyframe, + ctx->split_position, 1, + avctx->width, avctx->height)) + return ret; + + align_get_bits(&gb); + buf += get_bits_count(&gb) >> 3; + buf_size -= get_bits_count(&gb) >> 3; + } else { + if (keyframe) + ff_mss12_codec_reset(c); + else if (c->corrupted) + return AVERROR_INVALIDDATA; + bytestream2_init(&gB, buf, buf_size + ARITH2_PADDING); + arith2_init(&acoder, &gB); + c->keyframe = keyframe; + if (c->corrupted = ff_mss12_decode_rect(&c->sc[0], &acoder, 0, 0, + avctx->width, + ctx->split_position)) + return AVERROR_INVALIDDATA; + + buf += arith2_get_consumed_bytes(&acoder); + buf_size -= arith2_get_consumed_bytes(&acoder); + if (c->slice_split) { + if (buf_size < 1) + return AVERROR_INVALIDDATA; + bytestream2_init(&gB, buf, buf_size + ARITH2_PADDING); + arith2_init(&acoder, &gB); + if (c->corrupted = ff_mss12_decode_rect(&c->sc[1], &acoder, 0, + ctx->split_position, + avctx->width, + avctx->height - ctx->split_position)) + return AVERROR_INVALIDDATA; + + buf += arith2_get_consumed_bytes(&acoder); + buf_size -= arith2_get_consumed_bytes(&acoder); + } + } + + if (has_wmv9) { + for (i = 0; i < used_rects; i++) { + int x = wmv9rects[i].x; + int y = wmv9rects[i].y; + int w = wmv9rects[i].w; + int h = wmv9rects[i].h; + if (wmv9rects[i].coded) { + int WMV9codedFrameSize; + if (buf_size < 4 || !(WMV9codedFrameSize = AV_RL24(buf))) + return AVERROR_INVALIDDATA; + if (ret = decode_wmv9(avctx, buf + 3, buf_size - 3, + x, y, w, h, wmv9_mask)) + return ret; + buf += WMV9codedFrameSize + 3; + buf_size -= WMV9codedFrameSize + 3; + } else { + uint8_t *dst = c->rgb_pic + y * c->rgb_stride + x * 3; + if (wmv9_mask != -1) { + ctx->dsp.mss2_gray_fill_masked(dst, c->rgb_stride, + wmv9_mask, + c->pal_pic + y * c->pal_stride + x, + c->pal_stride, + w, h); + } else { + do { + memset(dst, 0x80, w * 3); + dst += c->rgb_stride; + } while (--h); + } + } + } + } + + if (buf_size) + av_log(avctx, AV_LOG_WARNING, "buffer not fully consumed\n"); + + *data_size = sizeof(AVFrame); + *(AVFrame *)data = ctx->pic; + + return avpkt->size; +} + +static av_cold int wmv9_init(AVCodecContext *avctx) +{ + VC1Context *v = avctx->priv_data; + + v->s.avctx = avctx; + avctx->flags |= CODEC_FLAG_EMU_EDGE; + v->s.flags |= CODEC_FLAG_EMU_EDGE; + + if (avctx->idct_algo == FF_IDCT_AUTO) + avctx->idct_algo = FF_IDCT_WMV2; + + if (ff_vc1_init_common(v) < 0) + return -1; + ff_vc1dsp_init(&v->vc1dsp); + + v->profile = PROFILE_MAIN; + + v->zz_8x4 = ff_wmv2_scantableA; + v->zz_4x8 = ff_wmv2_scantableB; + v->res_y411 = 0; + v->res_sprite = 0; + + v->frmrtq_postproc = 7; + v->bitrtq_postproc = 31; + + v->res_x8 = 0; + v->multires = 0; + v->res_fasttx = 1; + + v->fastuvmc = 0; + + v->extended_mv = 0; + + v->dquant = 1; + v->vstransform = 1; + + v->res_transtab = 0; + + v->overlap = 0; + + v->s.resync_marker = 0; + v->rangered = 0; + + v->s.max_b_frames = avctx->max_b_frames = 0; + v->quantizer_mode = 0; + + v->finterpflag = 0; + + v->res_rtm_flag = 1; + + ff_vc1_init_transposed_scantables(v); + + if (ff_msmpeg4_decode_init(avctx) < 0 || + ff_vc1_decode_init_alloc_tables(v) < 0) + return -1; + + /* error concealment */ + v->s.me.qpel_put = v->s.dsp.put_qpel_pixels_tab; + v->s.me.qpel_avg = v->s.dsp.avg_qpel_pixels_tab; + + return 0; +} + +static av_cold int mss2_decode_end(AVCodecContext *avctx) +{ + MSS2Context *const ctx = avctx->priv_data; + + if (ctx->pic.data[0]) + avctx->release_buffer(avctx, &ctx->pic); + if (ctx->last_pic.data[0]) + avctx->release_buffer(avctx, &ctx->last_pic); + + ff_mss12_decode_end(&ctx->c); + av_freep(&ctx->c.pal_pic); + av_freep(&ctx->c.last_pal_pic); + ff_vc1_decode_end(avctx); + + return 0; +} + +static av_cold int mss2_decode_init(AVCodecContext *avctx) +{ + MSS2Context * const ctx = avctx->priv_data; + MSS12Context *c = &ctx->c; + int ret; + c->avctx = avctx; + avctx->coded_frame = &ctx->pic; + if (ret = ff_mss12_decode_init(c, 1)) + return ret; + c->pal_stride = c->mask_stride; + c->pal_pic = av_malloc(c->pal_stride * avctx->height); + c->last_pal_pic = av_malloc(c->pal_stride * avctx->height); + if (!c->pal_pic || !c->last_pal_pic) { + mss2_decode_end(avctx); + return AVERROR(ENOMEM); + } + if (ret = wmv9_init(avctx)) { + mss2_decode_end(avctx); + return ret; + } + ff_mss2dsp_init(&ctx->dsp); + return 0; +} + +AVCodec ff_mss2_decoder = { + .name = "mss2", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_MSS2, + .priv_data_size = sizeof(MSS2Context), + .init = mss2_decode_init, + .close = mss2_decode_end, + .decode = mss2_decode_frame, + .capabilities = CODEC_CAP_DR1, + .long_name = NULL_IF_CONFIG_SMALL("MS Windows Media Video V9 Screen"), +}; diff --git a/libavcodec/mss2dsp.c b/libavcodec/mss2dsp.c new file mode 100644 index 0000000000..aa13577d48 --- /dev/null +++ b/libavcodec/mss2dsp.c @@ -0,0 +1,153 @@ +/* + * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder DSP routines + */ + +#include "mss2dsp.h" +#include "libavutil/common.h" + +static av_always_inline void mss2_blit_wmv9_template(uint8_t *dst, + int dst_stride, + int gray, + int use_mask, + int maskcolor, + const uint8_t *mask, + int mask_stride, + const uint8_t *srcy, + int srcy_stride, + const uint8_t *srcu, + const uint8_t *srcv, + int srcuv_stride, + int w, int h) +{ + int i, j, k, r = -1; + while (++r < h) { + for (i = 0, j = 0, k = 0; i < w; j += (i & 1), i++, k += 3) { + if (!use_mask || mask[i] == maskcolor) { + if (gray) { + dst[k] = dst[k + 1] = dst[k + 2] = 0x80; + } else { + int y = srcy[i]; + int u = srcu[j] - 128; + int v = srcv[j] - 128; + dst[k] = av_clip_uint8(y + ( 91881 * v + 32768 >> 16)); + dst[k + 1] = av_clip_uint8(y + (-22554 * u - 46802 * v + 32768 >> 16)); + dst[k + 2] = av_clip_uint8(y + (116130 * u + 32768 >> 16)); + } + } + } + mask += mask_stride; + dst += dst_stride; + srcy += srcy_stride; + srcu += srcuv_stride * (r & 1); + srcv += srcuv_stride * (r & 1); + } +} + +static void mss2_blit_wmv9_c(uint8_t *dst, int dst_stride, + const uint8_t *srcy, int srcy_stride, + const uint8_t *srcu, const uint8_t *srcv, + int srcuv_stride, int w, int h) +{ + mss2_blit_wmv9_template(dst, dst_stride, 0, 0, + 0, NULL, 0, + srcy, srcy_stride, + srcu, srcv, srcuv_stride, + w, h); +} + +static void mss2_blit_wmv9_masked_c(uint8_t *dst, int dst_stride, + int maskcolor, const uint8_t *mask, + int mask_stride, + const uint8_t *srcy, int srcy_stride, + const uint8_t *srcu, const uint8_t *srcv, + int srcuv_stride, int w, int h) +{ + mss2_blit_wmv9_template(dst, dst_stride, 0, 1, + maskcolor, mask, mask_stride, + srcy, srcy_stride, + srcu, srcv, srcuv_stride, + w, h); +} + +static void mss2_gray_fill_masked_c(uint8_t *dst, int dst_stride, + int maskcolor, const uint8_t *mask, + int mask_stride, int w, int h) +{ + mss2_blit_wmv9_template(dst, dst_stride, 1, 1, + maskcolor, mask, mask_stride, + NULL, 0, + NULL, NULL, 0, + w, h); +} + +static void upsample_plane_c(uint8_t *plane, int plane_stride, int w, int h) +{ + uint8_t *src1, *src2, *dst1, *dst2, *p, a, b; + int i, j; + + w += (w & 1); + h += (h & 1); + + j = h - 1; + + memcpy(plane + plane_stride * j, + plane + plane_stride * (j >> 1), + w); + + while ((j -= 2) > 0) { + dst1 = plane + plane_stride * (j + 1); + dst2 = plane + plane_stride * j; + src1 = plane + plane_stride * ((j + 1) >> 1); + src2 = plane + plane_stride * ( j >> 1); + + for (i = (w - 1) >> 1; i >= 0; i--) { + a = src1[i]; + b = src2[i]; + dst1[i] = (3 * a + b + 2) >> 2; + dst2[i] = (a + 3 * b + 2) >> 2; + } + } + + for (j = h - 1; j >= 0; j--) { + p = plane + plane_stride * j; + i = w - 1; + + p[i] = p[i >> 1]; + + while ((i -= 2) > 0) { + a = p[ i >> 1]; + b = p[(i + 1) >> 1]; + p[i] = (3 * a + b + 1) >> 2; + p[i + 1] = (a + 3 * b + 1) >> 2; + } + } +} + +av_cold void ff_mss2dsp_init(MSS2DSPContext* dsp) +{ + dsp->mss2_blit_wmv9 = mss2_blit_wmv9_c; + dsp->mss2_blit_wmv9_masked = mss2_blit_wmv9_masked_c; + dsp->mss2_gray_fill_masked = mss2_gray_fill_masked_c; + dsp->upsample_plane = upsample_plane_c; +} diff --git a/libavcodec/mss2dsp.h b/libavcodec/mss2dsp.h new file mode 100644 index 0000000000..b3d67a1e57 --- /dev/null +++ b/libavcodec/mss2dsp.h @@ -0,0 +1,50 @@ +/* + * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder DSP routines + */ + +#ifndef AVCODEC_MSS2DSP_H +#define AVCODEC_MSS2DSP_H + +#include "dsputil.h" + +typedef struct MSS2DSPContext { + void (*mss2_blit_wmv9)(uint8_t *dst, int dst_stride, + const uint8_t *srcy, int srcy_stride, + const uint8_t *srcu, const uint8_t *srcv, + int srcuv_stride, int w, int h); + void (*mss2_blit_wmv9_masked)(uint8_t *dst, int dst_stride, + int maskcolor, const uint8_t *mask, + int mask_stride, + const uint8_t *srcy, int srcy_stride, + const uint8_t *srcu, const uint8_t *srcv, + int srcuv_stride, int w, int h); + void (*mss2_gray_fill_masked)(uint8_t *dst, int dst_stride, + int maskcolor, const uint8_t *mask, + int mask_stride, int w, int h); + void (*upsample_plane)(uint8_t *plane, int plane_stride, int w, int h); +} MSS2DSPContext; + +av_cold void ff_mss2dsp_init(MSS2DSPContext* dsp); + +#endif /* AVCODEC_MSS2DSP_H */ diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c index 0e218af759..de75aac5e1 100644 --- a/libavcodec/vc1.c +++ b/libavcodec/vc1.c @@ -578,7 +578,12 @@ int ff_vc1_parse_frame_header(VC1Context *v, GetBitContext* gb) if (v->finterpflag) v->interpfrm = get_bits1(gb); - skip_bits(gb, 2); //framecnt unused + if (v->s.avctx->codec->id == AV_CODEC_ID_MSS2) + v->respic = + v->rangered = + v->multires = get_bits(gb, 2) == 1; + else + skip_bits(gb, 2); //framecnt unused v->rangeredfrm = 0; if (v->rangered) v->rangeredfrm = get_bits1(gb); diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h index fe21f2f6b7..13011ae038 100644 --- a/libavcodec/vc1.h +++ b/libavcodec/vc1.h @@ -394,6 +394,8 @@ typedef struct VC1Context{ uint8_t broken_link; ///< Broken link flag (BROKEN_LINK syntax element) uint8_t closed_entry; ///< Closed entry point flag (CLOSED_ENTRY syntax element) + int end_mb_x; ///< Horizontal macroblock limit (used only by mss2) + int parse_only; ///< Context is used within parser int warn_interlaced; diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index 63c0949ccd..6b2662e8f4 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -4348,10 +4348,10 @@ static void vc1_decode_i_blocks(VC1Context *v) s->mb_x = s->mb_y = 0; s->mb_intra = 1; s->first_slice_line = 1; - for (s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) { + for (s->mb_y = 0; s->mb_y < s->end_mb_y; s->mb_y++) { s->mb_x = 0; ff_init_block_index(s); - for (; s->mb_x < s->mb_width; s->mb_x++) { + for (; s->mb_x < v->end_mb_x; s->mb_x++) { uint8_t *dst[6]; ff_update_block_index(s); dst[0] = s->dest[0]; @@ -4438,7 +4438,10 @@ static void vc1_decode_i_blocks(VC1Context *v) s->first_slice_line = 0; } if (v->s.loop_filter) - ff_draw_horiz_band(s, (s->mb_height - 1) * 16, 16); + ff_draw_horiz_band(s, (s->end_mb_y - 1) * 16, 16); + + /* This is intentionally mb_height and not end_mb_y - unlike in advanced + * profile, these only differ are when decoding MSS2 rectangles. */ ff_er_add_slice(s, 0, 0, s->mb_width - 1, s->mb_height - 1, ER_MB_END); } @@ -5549,6 +5552,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, ff_er_frame_start(s); v->bits = buf_size * 8; + v->end_mb_x = s->mb_width; if (v->field_mode) { uint8_t *tmp[2]; s->current_picture.f.linesize[0] <<= 1; diff --git a/libavcodec/version.h b/libavcodec/version.h index 78f4ccf17f..b08f00d274 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -27,8 +27,8 @@ */ #define LIBAVCODEC_VERSION_MAJOR 54 -#define LIBAVCODEC_VERSION_MINOR 26 -#define LIBAVCODEC_VERSION_MICRO 1 +#define LIBAVCODEC_VERSION_MINOR 27 +#define LIBAVCODEC_VERSION_MICRO 0 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ diff --git a/libavformat/riff.c b/libavformat/riff.c index 6920115518..f96389099c 100644 --- a/libavformat/riff.c +++ b/libavformat/riff.c @@ -289,6 +289,7 @@ const AVCodecTag ff_codec_bmp_tags[] = { { AV_CODEC_ID_TSCC2, MKTAG('T', 'S', 'C', '2') }, { AV_CODEC_ID_MTS2, MKTAG('M', 'T', 'S', '2') }, { AV_CODEC_ID_CLLC, MKTAG('C', 'L', 'L', 'C') }, + { AV_CODEC_ID_MSS2, MKTAG('M', 'S', 'S', '2') }, { AV_CODEC_ID_NONE, 0 } }; From ede3d6400d7c06863e6eb4bcff5f676480ae6b5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alberto=20Delm=C3=A1s?= Date: Fri, 31 Aug 2012 07:59:36 +0200 Subject: [PATCH 8/8] MSS1 and MSS2: set final pixel format after common stuff has been initialised This way it won't interfere with WMV9 initialisation inside MSS2 decoder and avplay will play it fine. Signed-off-by: Kostya Shishkov --- libavcodec/mss1.c | 7 ++++++- libavcodec/mss12.c | 4 ---- libavcodec/mss2.c | 4 ++++ 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/libavcodec/mss1.c b/libavcodec/mss1.c index ada479eee7..caf0328ad0 100644 --- a/libavcodec/mss1.c +++ b/libavcodec/mss1.c @@ -189,11 +189,16 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size, static av_cold int mss1_decode_init(AVCodecContext *avctx) { MSS1Context * const c = avctx->priv_data; + int ret; c->ctx.avctx = avctx; avctx->coded_frame = &c->pic; - return ff_mss12_decode_init(&c->ctx, 0); + ret = ff_mss12_decode_init(&c->ctx, 0); + + avctx->pix_fmt = PIX_FMT_PAL8; + + return ret; } static av_cold int mss1_decode_end(AVCodecContext *avctx) diff --git a/libavcodec/mss12.c b/libavcodec/mss12.c index f0f23c0f8e..18f2f2808f 100644 --- a/libavcodec/mss12.c +++ b/libavcodec/mss12.c @@ -690,10 +690,6 @@ av_cold int ff_mss12_decode_init(MSS12Context *c, int version) return AVERROR(ENOMEM); } - avctx->pix_fmt = version ? c->free_colours == 127 ? PIX_FMT_RGB555 - : PIX_FMT_RGB24 - : PIX_FMT_PAL8; - codec_init(c, version); return 0; diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c index c0c47dc8d3..2a0bf47cde 100644 --- a/libavcodec/mss2.c +++ b/libavcodec/mss2.c @@ -844,6 +844,10 @@ static av_cold int mss2_decode_init(AVCodecContext *avctx) return ret; } ff_mss2dsp_init(&ctx->dsp); + + avctx->pix_fmt = c->free_colours == 127 ? PIX_FMT_RGB555 + : PIX_FMT_RGB24; + return 0; }