From 1a670973a756e6e1a7a170d58f3589fd5ad4c088 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Wed, 1 Feb 2012 16:23:19 -0500 Subject: [PATCH 01/19] ff_alloc_packet: modify the size of the packet to match the requested size This will simplify encoders which use this function to request the exact packet size rather than the maximum size. --- libavcodec/internal.h | 1 + libavcodec/utils.c | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/libavcodec/internal.h b/libavcodec/internal.h index 441430e41c..b435a359fb 100644 --- a/libavcodec/internal.h +++ b/libavcodec/internal.h @@ -120,6 +120,7 @@ int avpriv_unlock_avformat(void); * If avpkt->data is already set, avpkt->size is checked * to ensure it is large enough. * If avpkt->data is NULL, a new buffer is allocated. + * avpkt->size is set to the specified size. * All other AVPacket fields will be reset with av_init_packet(). * @param size the minimum required packet size * @return 0 on success, negative error code on failure diff --git a/libavcodec/utils.c b/libavcodec/utils.c index 34eff0031a..606537b29a 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -839,16 +839,14 @@ int ff_alloc_packet(AVPacket *avpkt, int size) if (avpkt->data) { uint8_t *pkt_data; - int pkt_size; if (avpkt->size < size) return AVERROR(EINVAL); pkt_data = avpkt->data; - pkt_size = avpkt->size; av_init_packet(avpkt); avpkt->data = pkt_data; - avpkt->size = pkt_size; + avpkt->size = size; return 0; } else { return av_new_packet(avpkt, size); From 3c432631e9da0933f3309ab0b0448a97c7aebf7b Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Wed, 1 Feb 2012 16:26:37 -0500 Subject: [PATCH 02/19] pcmenc: Do not set avpkt->size. It is already the correct size as set by ff_alloc_packet(). --- libavcodec/pcm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c index 1adaf70318..594bd444fe 100644 --- a/libavcodec/pcm.c +++ b/libavcodec/pcm.c @@ -193,7 +193,6 @@ static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, return -1; } - avpkt->size = frame->nb_samples * avctx->channels * sample_size; *got_packet_ptr = 1; return 0; } From 236a550c3f9bd3e559eff81d1ac4d2087eaa796f Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Wed, 1 Feb 2012 19:02:32 -0500 Subject: [PATCH 03/19] Fix a typo in the x86 asm version of ff_vector_clip_int32() Specifies the correct number of xmm registers used so that they can be saved and restored on Win64 if necessary. --- libavcodec/x86/dsputil_yasm.asm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm index 611f5c8a72..09940d147d 100644 --- a/libavcodec/x86/dsputil_yasm.asm +++ b/libavcodec/x86/dsputil_yasm.asm @@ -1063,7 +1063,7 @@ emu_edge mmx ; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) ; %5 = suffix %macro VECTOR_CLIP_INT32 4-5 -cglobal vector_clip_int32%5, 5,5,%2, dst, src, min, max, len +cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len %if %4 cvtsi2ss m4, minm cvtsi2ss m5, maxm From 41dd77bdbcff75f91f41651baaa71d15b3f1dc1d Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Wed, 1 Feb 2012 20:30:33 +0000 Subject: [PATCH 04/19] doc: decoding Forward Uncompressed is supported Signed-off-by: Paul B Mahol Signed-off-by: Justin Ruggles --- doc/general.texi | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/general.texi b/doc/general.texi index cda79c07b4..50ae764541 100644 --- a/doc/general.texi +++ b/doc/general.texi @@ -473,6 +473,7 @@ following image formats are supported: @item Flash Screen Video v2 @tab @tab X @item Flash Video (FLV) @tab X @tab X @tab Sorenson H.263 used in Flash +@item Forward Uncompressed @tab @tab X @item Fraps @tab @tab X @item H.261 @tab X @tab X @item H.263 / H.263-1996 @tab X @tab X From b5b825c3818c2792b06f0e97d190e3c0962f1a02 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Wed, 1 Feb 2012 15:19:50 -0500 Subject: [PATCH 05/19] mpc7: simplify handling of packet sizes that are not a multiple of 4 bytes --- libavcodec/mpc7.c | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/libavcodec/mpc7.c b/libavcodec/mpc7.c index 6b6bffe357..ab2f543db4 100644 --- a/libavcodec/mpc7.c +++ b/libavcodec/mpc7.c @@ -200,35 +200,47 @@ static int mpc7_decode_frame(AVCodecContext * avctx, void *data, int *got_frame_ptr, AVPacket *avpkt) { const uint8_t *buf = avpkt->data; - int buf_size = avpkt->size; + int buf_size; MPCContext *c = avctx->priv_data; GetBitContext gb; uint8_t *bits; int i, ch; int mb = -1; Band *bands = c->bands; - int off, ret; + int off, ret, last_frame, skip; int bits_used, bits_avail; memset(bands, 0, sizeof(*bands) * (c->maxbands + 1)); - if(buf_size <= 4){ - av_log(avctx, AV_LOG_ERROR, "Too small buffer passed (%i bytes)\n", buf_size); - return AVERROR(EINVAL); + + buf_size = avpkt->size & ~3; + if (buf_size <= 0) { + av_log(avctx, AV_LOG_ERROR, "packet size is too small (%i bytes)\n", + avpkt->size); + return AVERROR_INVALIDDATA; + } + if (buf_size != avpkt->size) { + av_log(avctx, AV_LOG_WARNING, "packet size is not a multiple of 4. " + "extra bytes at the end will be skipped.\n"); } + skip = buf[0]; + last_frame = buf[1]; + buf += 4; + buf_size -= 4; + /* get output buffer */ - c->frame.nb_samples = buf[1] ? c->lastframelen : MPC_FRAME_SIZE; + c->frame.nb_samples = last_frame ? c->lastframelen : MPC_FRAME_SIZE; if ((ret = avctx->get_buffer(avctx, &c->frame)) < 0) { av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); return ret; } - bits = av_malloc(((buf_size - 1) & ~3) + FF_INPUT_BUFFER_PADDING_SIZE); + bits = av_malloc(buf_size + FF_INPUT_BUFFER_PADDING_SIZE); if (!bits) return AVERROR(ENOMEM); - c->dsp.bswap_buf((uint32_t*)bits, (const uint32_t*)(buf + 4), (buf_size - 4) >> 2); - init_get_bits(&gb, bits, (buf_size - 4)* 8); - skip_bits_long(&gb, buf[0]); + c->dsp.bswap_buf((uint32_t *)bits, (const uint32_t *)buf, buf_size >> 2); + init_get_bits(&gb, bits, buf_size * 8); + skip_bits_long(&gb, skip); /* read subband indexes */ for(i = 0; i <= c->maxbands; i++){ @@ -287,21 +299,21 @@ static int mpc7_decode_frame(AVCodecContext * avctx, void *data, av_free(bits); bits_used = get_bits_count(&gb); - bits_avail = (buf_size - 4) * 8; - if(!buf[1] && ((bits_avail < bits_used) || (bits_used + 32 <= bits_avail))){ + bits_avail = buf_size * 8; + if (!last_frame && ((bits_avail < bits_used) || (bits_used + 32 <= bits_avail))) { av_log(NULL,0, "Error decoding frame: used %i of %i bits\n", bits_used, bits_avail); return -1; } if(c->frames_to_skip){ c->frames_to_skip--; *got_frame_ptr = 0; - return buf_size; + return avpkt->size; } *got_frame_ptr = 1; *(AVFrame *)data = c->frame; - return buf_size; + return avpkt->size; } static void mpc7_decode_flush(AVCodecContext *avctx) From 83ce51cc7d10c1589f07fda1b9f10fbc5aa93e77 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Tue, 31 Jan 2012 11:17:04 -0500 Subject: [PATCH 06/19] mpc7: use av_fast_padded_malloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoids doing malloc/free for each frame. Also fixes valgrind errors due to use of uninitialized padding bytes. Based on a patch by Reimar Döffinger --- libavcodec/mpc7.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/libavcodec/mpc7.c b/libavcodec/mpc7.c index ab2f543db4..8b3a1b9a20 100644 --- a/libavcodec/mpc7.c +++ b/libavcodec/mpc7.c @@ -203,7 +203,6 @@ static int mpc7_decode_frame(AVCodecContext * avctx, void *data, int buf_size; MPCContext *c = avctx->priv_data; GetBitContext gb; - uint8_t *bits; int i, ch; int mb = -1; Band *bands = c->bands; @@ -235,11 +234,11 @@ static int mpc7_decode_frame(AVCodecContext * avctx, void *data, return ret; } - bits = av_malloc(buf_size + FF_INPUT_BUFFER_PADDING_SIZE); - if (!bits) + av_fast_padded_malloc(&c->bits, &c->buf_size, buf_size); + if (!c->bits) return AVERROR(ENOMEM); - c->dsp.bswap_buf((uint32_t *)bits, (const uint32_t *)buf, buf_size >> 2); - init_get_bits(&gb, bits, buf_size * 8); + c->dsp.bswap_buf((uint32_t *)c->bits, (const uint32_t *)buf, buf_size >> 2); + init_get_bits(&gb, c->bits, buf_size * 8); skip_bits_long(&gb, skip); /* read subband indexes */ @@ -296,8 +295,6 @@ static int mpc7_decode_frame(AVCodecContext * avctx, void *data, ff_mpc_dequantize_and_synth(c, mb, c->frame.data[0], 2); - av_free(bits); - bits_used = get_bits_count(&gb); bits_avail = buf_size * 8; if (!last_frame && ((bits_avail < bits_used) || (bits_used + 32 <= bits_avail))) { @@ -324,12 +321,21 @@ static void mpc7_decode_flush(AVCodecContext *avctx) c->frames_to_skip = 32; } +static av_cold int mpc7_decode_close(AVCodecContext *avctx) +{ + MPCContext *c = avctx->priv_data; + av_freep(&c->bits); + c->buf_size = 0; + return 0; +} + AVCodec ff_mpc7_decoder = { .name = "mpc7", .type = AVMEDIA_TYPE_AUDIO, .id = CODEC_ID_MUSEPACK7, .priv_data_size = sizeof(MPCContext), .init = mpc7_decode_init, + .close = mpc7_decode_close, .decode = mpc7_decode_frame, .flush = mpc7_decode_flush, .capabilities = CODEC_CAP_DR1, From c3a06615bdf00fcf64747f12a0ba1a2c7fb2e576 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Wed, 18 Jan 2012 13:09:43 -0500 Subject: [PATCH 07/19] bethsoftvideo: fix palette reading. Return the correct number of consumed bytes and set *data_size = 0. Returned size is 1 too small, leading to that 1 byte being read as the next frame, which results in an extra blank frame at the beginning of the stream. --- libavcodec/bethsoftvideo.c | 10 ++- tests/ref/fate/bethsoft-vid | 141 ++++++++++++++++++------------------ 2 files changed, 78 insertions(+), 73 deletions(-) diff --git a/libavcodec/bethsoftvideo.c b/libavcodec/bethsoftvideo.c index fa0457cc66..743e387cbe 100644 --- a/libavcodec/bethsoftvideo.c +++ b/libavcodec/bethsoftvideo.c @@ -59,7 +59,7 @@ static int set_palette(BethsoftvidContext *ctx) palette[a] = bytestream2_get_be24u(&ctx->g) * 4; } ctx->frame.palette_has_changed = 1; - return 256*3; + return 0; } static int bethsoftvid_decode_frame(AVCodecContext *avctx, @@ -86,7 +86,13 @@ static int bethsoftvid_decode_frame(AVCodecContext *avctx, switch(block_type = bytestream2_get_byte(&vid->g)){ case PALETTE_BLOCK: { - return set_palette(vid); + int ret; + *data_size = 0; + if ((ret = set_palette(vid)) < 0) { + av_log(avctx, AV_LOG_ERROR, "error reading palette\n"); + return ret; + } + return bytestream2_tell(&vid->g); } case VIDEO_YOFF_P_FRAME: yoffset = bytestream2_get_le16(&vid->g); diff --git a/tests/ref/fate/bethsoft-vid b/tests/ref/fate/bethsoft-vid index 0886bfcf48..92c694b19e 100644 --- a/tests/ref/fate/bethsoft-vid +++ b/tests/ref/fate/bethsoft-vid @@ -1,91 +1,90 @@ -0, 0, 192000, 0xdecc683b +0, 0, 192000, 0x00000000 1, 0, 1480, 0x00000000 -0, 1500, 192000, 0x00000000 -0, 3000, 192000, 0x01a6cf45 -0, 4500, 192000, 0xd07d57e9 +0, 1500, 192000, 0x01a6cf45 +0, 3000, 192000, 0xd07d57e9 +0, 4500, 192000, 0x3cb1dff5 1, 5994, 1480, 0x20a92bd4 -0, 6000, 192000, 0x3cb1dff5 -0, 7500, 192000, 0xd1aaa8fb -0, 9000, 192000, 0x75f526cd -0, 10500, 192000, 0x0f673577 +0, 6000, 192000, 0xd1aaa8fb +0, 7500, 192000, 0x75f526cd +0, 9000, 192000, 0x0f673577 +0, 10500, 192000, 0x897b6781 1, 11988, 1850, 0xa9e48a74 -0, 12000, 192000, 0x897b6781 -0, 13500, 192000, 0x81e6b7f7 -0, 15000, 192000, 0x1f45ce61 -0, 16500, 192000, 0x5a0772a6 -0, 18000, 192000, 0xf78732b3 +0, 12000, 192000, 0x81e6b7f7 +0, 13500, 192000, 0x1f45ce61 +0, 15000, 192000, 0x5a0772a6 +0, 16500, 192000, 0xf78732b3 +0, 18000, 192000, 0x8427f9e5 1, 19481, 1480, 0x23ecd018 -0, 19500, 192000, 0x8427f9e5 -0, 21000, 192000, 0x40473f11 -0, 22500, 192000, 0x173ceebe -0, 24000, 192000, 0x136b9516 +0, 19500, 192000, 0x40473f11 +0, 21000, 192000, 0x173ceebe +0, 22500, 192000, 0x136b9516 +0, 24000, 192000, 0x138d11ae 1, 25475, 1480, 0x206bb915 -0, 25500, 192000, 0x138d11ae -0, 27000, 192000, 0x063dbff3 -0, 28500, 192000, 0x5280852f -0, 30000, 192000, 0x99943a8f +0, 25500, 192000, 0x063dbff3 +0, 27000, 192000, 0x5280852f +0, 28500, 192000, 0x99943a8f +0, 30000, 192000, 0x0330a728 1, 31469, 1850, 0xb0e10e75 -0, 31500, 192000, 0x0330a728 -0, 33000, 192000, 0x5d35467d -0, 34500, 192000, 0xfd436343 -0, 36000, 192000, 0xc323fcfe -0, 37500, 192000, 0x2a1530a0 +0, 31500, 192000, 0x5d35467d +0, 33000, 192000, 0xfd436343 +0, 34500, 192000, 0xc323fcfe +0, 36000, 192000, 0x2a1530a0 +0, 37500, 192000, 0xbd43bb60 1, 38961, 1480, 0x8d9baedd -0, 39000, 192000, 0xbd43bb60 -0, 40500, 192000, 0xa47f5eab -0, 42000, 192000, 0xff17f5f7 -0, 43500, 192000, 0xb4140b55 +0, 39000, 192000, 0xa47f5eab +0, 40500, 192000, 0xff17f5f7 +0, 42000, 192000, 0xb4140b55 +0, 43500, 192000, 0xb8782cc4 1, 44955, 1480, 0xb802aae1 -0, 45000, 192000, 0xb8782cc4 -0, 46500, 192000, 0x92975b8b -0, 48000, 192000, 0xf42a64d6 -0, 49500, 192000, 0x2cc7077d +0, 45000, 192000, 0x92975b8b +0, 46500, 192000, 0xf42a64d6 +0, 48000, 192000, 0x2cc7077d +0, 49500, 192000, 0x00080cc8 1, 50950, 1480, 0xecd7b5cc -0, 51000, 192000, 0x00080cc8 -0, 52500, 192000, 0x584b48f3 -0, 54000, 192000, 0xd68f57da -0, 55500, 192000, 0x60158422 +0, 51000, 192000, 0x584b48f3 +0, 52500, 192000, 0xd68f57da +0, 54000, 192000, 0x60158422 +0, 55500, 192000, 0xd7fb89e6 1, 56944, 1850, 0x16861355 -0, 57000, 192000, 0xd7fb89e6 -0, 58500, 192000, 0x97f1c76a -0, 60000, 192000, 0x46c4bb9e -0, 61500, 192000, 0xd32f9b66 -0, 63000, 192000, 0x74f43886 +0, 57000, 192000, 0x97f1c76a +0, 58500, 192000, 0x46c4bb9e +0, 60000, 192000, 0xd32f9b66 +0, 61500, 192000, 0x74f43886 +0, 63000, 192000, 0x3c4e47df 1, 64436, 1480, 0xa51690bd -0, 64500, 192000, 0x3c4e47df -0, 66000, 192000, 0xb5ac0a58 -0, 67500, 192000, 0xcc572b31 -0, 69000, 192000, 0xb1739d26 +0, 64500, 192000, 0xb5ac0a58 +0, 66000, 192000, 0xcc572b31 +0, 67500, 192000, 0xb1739d26 +0, 69000, 192000, 0x73da5473 1, 70430, 1480, 0xdd0b90d1 -0, 70500, 192000, 0x73da5473 -0, 72000, 192000, 0x5f79f5bc -0, 73500, 192000, 0x0affc0a0 -0, 75000, 192000, 0x2b4d5c1c +0, 70500, 192000, 0x5f79f5bc +0, 72000, 192000, 0x0affc0a0 +0, 73500, 192000, 0x2b4d5c1c +0, 75000, 192000, 0x309b41bc 1, 76424, 1850, 0x3ce6e333 -0, 76500, 192000, 0x309b41bc -0, 78000, 192000, 0xd42b6424 -0, 79500, 192000, 0x4795c948 -0, 81000, 192000, 0xbc1a3a8b -0, 82500, 192000, 0x16529c5b +0, 76500, 192000, 0xd42b6424 +0, 78000, 192000, 0x4795c948 +0, 79500, 192000, 0xbc1a3a8b +0, 81000, 192000, 0x16529c5b +0, 82500, 192000, 0x6b1b31ba 1, 83917, 1480, 0xf8ce8ea3 -0, 84000, 192000, 0x6b1b31ba -0, 85500, 192000, 0x569182ce -0, 87000, 192000, 0xe6ea9866 -0, 88500, 192000, 0x102c6076 +0, 84000, 192000, 0x569182ce +0, 85500, 192000, 0xe6ea9866 +0, 87000, 192000, 0x102c6076 +0, 88500, 192000, 0xb29f527a 1, 89911, 1480, 0xda4597af -0, 90000, 192000, 0xb29f527a -0, 91500, 192000, 0x040b4eee -0, 93000, 192000, 0x92574f4a -0, 94500, 192000, 0x1e8acdce +0, 90000, 192000, 0x040b4eee +0, 91500, 192000, 0x92574f4a +0, 93000, 192000, 0x1e8acdce +0, 94500, 192000, 0x1becf516 1, 95905, 1480, 0x918f7cb3 -0, 96000, 192000, 0x1becf516 -0, 97500, 192000, 0xb62e9776 -0, 99000, 192000, 0xed37a08e -0, 100500, 192000, 0xc0719912 +0, 96000, 192000, 0xb62e9776 +0, 97500, 192000, 0xed37a08e +0, 99000, 192000, 0xc0719912 +0, 100500, 192000, 0x24cf7a7e 1, 101899, 1850, 0xca6edb15 -0, 102000, 192000, 0x24cf7a7e -0, 103500, 192000, 0x0307f62f -0, 105000, 192000, 0x79b7417b +0, 102000, 192000, 0x0307f62f +0, 103500, 192000, 0x79b7417b 1, 109392, 1480, 0xba279597 1, 115386, 1480, 0xc5a38a9e 1, 121380, 1850, 0x8147eef5 From 38c6bbc118661c16b31122691b3d3d5bda13b82f Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Thu, 2 Feb 2012 01:10:19 +0000 Subject: [PATCH 08/19] ra144enc: drop pointless "encoder" from .long_name Signed-off-by: Paul B Mahol Signed-off-by: Justin Ruggles --- libavcodec/ra144enc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/ra144enc.c b/libavcodec/ra144enc.c index c970a26465..c8a09a2ed2 100644 --- a/libavcodec/ra144enc.c +++ b/libavcodec/ra144enc.c @@ -521,5 +521,5 @@ AVCodec ff_ra_144_encoder = { .close = ra144_encode_close, .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, - .long_name = NULL_IF_CONFIG_SMALL("RealAudio 1.0 (14.4K) encoder"), + .long_name = NULL_IF_CONFIG_SMALL("RealAudio 1.0 (14.4K)"), }; From de53b9068ad5e05333ddaa27223e5230a042583a Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Wed, 1 Feb 2012 21:55:32 -0800 Subject: [PATCH 09/19] swscale: implement MMX, SSE2 and AVX functions for RGB32 input. --- libswscale/x86/input.asm | 159 +++++++++++++++++++++++++++++++++++ libswscale/x86/swscale_mmx.c | 16 ++++ 2 files changed, 175 insertions(+) diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm index d52a35a106..66d88458bb 100644 --- a/libswscale/x86/input.asm +++ b/libswscale/x86/input.asm @@ -51,6 +51,19 @@ bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV +rgba_Ycoeff_rb: times 4 dw RY, BY +rgba_Ycoeff_br: times 4 dw BY, RY +rgba_Ycoeff_ga: times 4 dw GY, 0 +rgba_Ycoeff_ag: times 4 dw 0, GY +rgba_Ucoeff_rb: times 4 dw RU, BU +rgba_Ucoeff_br: times 4 dw BU, RU +rgba_Ucoeff_ga: times 4 dw GU, 0 +rgba_Ucoeff_ag: times 4 dw 0, GU +rgba_Vcoeff_rb: times 4 dw RV, BV +rgba_Vcoeff_br: times 4 dw BV, RV +rgba_Vcoeff_ga: times 4 dw GV, 0 +rgba_Vcoeff_ag: times 4 dw 0, GV + shuf_rgb_12x4: db 0, 0x80, 1, 0x80, 2, 0x80, 3, 0x80, \ 6, 0x80, 7, 0x80, 8, 0x80, 9, 0x80 shuf_rgb_3x56: db 2, 0x80, 3, 0x80, 4, 0x80, 5, 0x80, \ @@ -296,6 +309,152 @@ RGB24_FUNCS 11, 13 INIT_XMM avx RGB24_FUNCS 11, 13 +; %1 = nr. of XMM registers +; %2-5 = rgba, bgra, argb or abgr (in individual characters) +%macro RGB32_TO_Y_FN 5-6 +cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w + mova m5, [rgba_Ycoeff_%2%4] + mova m6, [rgba_Ycoeff_%3%5] +%if %0 == 6 + jmp mangle(program_name %+ _ %+ %6 %+ ToY %+ SUFFIX).body +%else ; %0 == 6 +.body: +%if ARCH_X86_64 + movsxd wq, wd +%endif + lea srcq, [srcq+wq*4] + add dstq, wq + neg wq + mova m4, [rgb_Yrnd] + pcmpeqb m7, m7 + psrlw m7, 8 ; (word) { 0x00ff } x4 +.loop: + ; FIXME check alignment and use mova + movu m0, [srcq+wq*4+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] + movu m2, [srcq+wq*4+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] + DEINTB 1, 0, 3, 2, 7 ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7] + pmaddwd m1, m5 ; (dword) { Bx*BY + Rx*RY }[0-3] + pmaddwd m0, m6 ; (dword) { Gx*GY }[0-3] + pmaddwd m3, m5 ; (dword) { Bx*BY + Rx*RY }[4-7] + pmaddwd m2, m6 ; (dword) { Gx*GY }[4-7] + paddd m0, m4 ; += rgb_Yrnd + paddd m2, m4 ; += rgb_Yrnd + paddd m0, m1 ; (dword) { Y[0-3] } + paddd m2, m3 ; (dword) { Y[4-7] } + psrad m0, 15 + psrad m2, 15 + packssdw m0, m2 ; (word) { Y[0-7] } + packuswb m0, m0 ; (byte) { Y[0-7] } + movh [dstq+wq], m0 + add wq, mmsize / 2 + jl .loop + REP_RET +%endif ; %0 == 3 +%endmacro + +; %1 = nr. of XMM registers +; %2-5 = rgba, bgra, argb or abgr (in individual characters) +%macro RGB32_TO_UV_FN 5-6 +cglobal %2%3%4%5 %+ ToUV, 3, 4, %1, dstU, dstV, src, w +%if ARCH_X86_64 + mova m8, [rgba_Ucoeff_%2%4] + mova m9, [rgba_Ucoeff_%3%5] + mova m10, [rgba_Vcoeff_%2%4] + mova m11, [rgba_Vcoeff_%3%5] +%define coeffU1 m8 +%define coeffU2 m9 +%define coeffV1 m10 +%define coeffV2 m11 +%else ; x86-32 +%define coeffU1 [rgba_Ucoeff_%2%4] +%define coeffU2 [rgba_Ucoeff_%3%5] +%define coeffV1 [rgba_Vcoeff_%2%4] +%define coeffV2 [rgba_Vcoeff_%3%5] +%endif ; x86-64/32 +%if ARCH_X86_64 && %0 == 6 + jmp mangle(program_name %+ _ %+ %6 %+ ToUV %+ SUFFIX).body +%else ; ARCH_X86_64 && %0 == 6 +.body: +%if ARCH_X86_64 + movsxd wq, dword r4m +%else ; x86-32 + mov wq, r4m +%endif + add dstUq, wq + add dstVq, wq + lea srcq, [srcq+wq*4] + neg wq + pcmpeqb m7, m7 + psrlw m7, 8 ; (word) { 0x00ff } x4 + mova m6, [rgb_UVrnd] +.loop: + ; FIXME check alignment and use mova + movu m0, [srcq+wq*4+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] + movu m4, [srcq+wq*4+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] + DEINTB 1, 0, 5, 4, 7 ; (word) { Gx, xx (m0/m4) or Bx, Rx (m1/m5) }[0-3]/[4-7] + pmaddwd m3, m1, coeffV1 ; (dword) { Bx*BV + Rx*RV }[0-3] + pmaddwd m2, m0, coeffV2 ; (dword) { Gx*GV }[0-3] + pmaddwd m1, coeffU1 ; (dword) { Bx*BU + Rx*RU }[0-3] + pmaddwd m0, coeffU2 ; (dword) { Gx*GU }[0-3] + paddd m3, m6 ; += rgb_UVrnd + paddd m1, m6 ; += rgb_UVrnd + paddd m2, m3 ; (dword) { V[0-3] } + paddd m0, m1 ; (dword) { U[0-3] } + pmaddwd m3, m5, coeffV1 ; (dword) { Bx*BV + Rx*RV }[4-7] + pmaddwd m1, m4, coeffV2 ; (dword) { Gx*GV }[4-7] + pmaddwd m5, coeffU1 ; (dword) { Bx*BU + Rx*RU }[4-7] + pmaddwd m4, coeffU2 ; (dword) { Gx*GU }[4-7] + paddd m3, m6 ; += rgb_UVrnd + paddd m5, m6 ; += rgb_UVrnd + psrad m0, 15 + paddd m1, m3 ; (dword) { V[4-7] } + paddd m4, m5 ; (dword) { U[4-7] } + psrad m2, 15 + psrad m4, 15 + psrad m1, 15 + packssdw m0, m4 ; (word) { U[0-7] } + packssdw m2, m1 ; (word) { V[0-7] } +%if mmsize == 8 + packuswb m0, m0 ; (byte) { U[0-7] } + packuswb m2, m2 ; (byte) { V[0-7] } + movh [dstUq+wq], m0 + movh [dstVq+wq], m2 +%else ; mmsize == 16 + packuswb m0, m2 ; (byte) { U[0-7], V[0-7] } + movh [dstUq+wq], m0 + movhps [dstVq+wq], m0 +%endif ; mmsize == 8/16 + add wq, mmsize / 2 + jl .loop + REP_RET +%endif ; ARCH_X86_64 && %0 == 3 +%endmacro + +; %1 = nr. of XMM registers for rgb-to-Y func +; %2 = nr. of XMM registers for rgb-to-UV func +%macro RGB32_FUNCS 2 +RGB32_TO_Y_FN %1, r, g, b, a +RGB32_TO_Y_FN %1, b, g, r, a, rgba +RGB32_TO_Y_FN %1, a, r, g, b, rgba +RGB32_TO_Y_FN %1, a, b, g, r, rgba + +RGB32_TO_UV_FN %2, r, g, b, a +RGB32_TO_UV_FN %2, b, g, r, a, rgba +RGB32_TO_UV_FN %2, a, r, g, b, rgba +RGB32_TO_UV_FN %2, a, b, g, r, rgba +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +RGB32_FUNCS 0, 0 +%endif + +INIT_XMM sse2 +RGB32_FUNCS 8, 12 + +INIT_XMM avx +RGB32_FUNCS 8, 12 + ;----------------------------------------------------------------------------- ; YUYV/UYVY/NV12/NV21 packed pixel shuffling. ; diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c index 29f31c329b..764472e95e 100644 --- a/libswscale/x86/swscale_mmx.c +++ b/libswscale/x86/swscale_mmx.c @@ -244,6 +244,10 @@ extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \ INPUT_FUNC(yuyv, opt); \ INPUT_UV_FUNC(nv12, opt); \ INPUT_UV_FUNC(nv21, opt); \ + INPUT_FUNC(rgba, opt); \ + INPUT_FUNC(bgra, opt); \ + INPUT_FUNC(argb, opt); \ + INPUT_FUNC(abgr, opt); \ INPUT_FUNC(rgb24, opt); \ INPUT_FUNC(bgr24, opt) @@ -335,6 +339,10 @@ switch(c->dstBpc){ \ break; case_rgb(rgb24, RGB24, mmx); case_rgb(bgr24, BGR24, mmx); + case_rgb(bgra, BGRA, mmx); + case_rgb(rgba, RGBA, mmx); + case_rgb(abgr, ABGR, mmx); + case_rgb(argb, ARGB, mmx); default: break; } @@ -379,6 +387,10 @@ switch(c->dstBpc){ \ break; case_rgb(rgb24, RGB24, sse2); case_rgb(bgr24, BGR24, sse2); + case_rgb(bgra, BGRA, sse2); + case_rgb(rgba, RGBA, sse2); + case_rgb(abgr, ABGR, sse2); + case_rgb(argb, ARGB, sse2); default: break; } @@ -422,6 +434,10 @@ switch(c->dstBpc){ \ break; case_rgb(rgb24, RGB24, avx); case_rgb(bgr24, BGR24, avx); + case_rgb(bgra, BGRA, avx); + case_rgb(rgba, RGBA, avx); + case_rgb(abgr, ABGR, avx); + case_rgb(argb, ARGB, avx); default: break; } From fcc518a024010d652fb8d216c1004afaf45b7346 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Wed, 1 Feb 2012 19:22:54 +0100 Subject: [PATCH 10/19] APIchanges: add missing commit hashes --- doc/APIchanges | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/APIchanges b/doc/APIchanges index df55c0a732..bf0f28af0c 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -13,18 +13,18 @@ libavutil: 2011-04-18 API changes, most recent first: -2012-02-01 - xxxxxxx - lavc 54.01.0 +2012-02-01 - 316fc74 - lavc 54.01.0 Add av_fast_padded_malloc() as alternative for av_realloc() when aligned memory is required. The buffer will always have FF_INPUT_BUFFER_PADDING_SIZE zero-padded bytes at the end. -2012-01-31 - xxxxxxx - lavf 54.01.0 +2012-01-31 - dd6d3b0 - lavf 54.01.0 Add avformat_get_riff_video_tags() and avformat_get_riff_audio_tags(). -2012-01-31 - xxxxxxx - lavc 54.01.0 +2012-01-31 - af08d9a - lavc 54.01.0 Add avcodec_is_open() function. -2012-01-30 - xxxxxxx - lavu 51.22.0 - intfloat.h +2012-01-30 - 8b93312 - lavu 51.22.0 - intfloat.h Add a new installed header libavutil/intfloat.h with int/float punning functions. From 2c98f407c8803da3002747f3a8d43696e8744dc7 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Thu, 2 Feb 2012 01:46:05 +0000 Subject: [PATCH 11/19] fate: make acodec-ac3_fixed test output raw AC3 There is no point in this test using the RM format. Signed-off-by: Mans Rullgard --- configure | 2 +- tests/codec-regression.sh | 2 +- tests/ref/acodec/ac3_fixed | 4 ++-- tests/ref/seek/ac3_ac3 | 49 ++++++++++++++++++++++++++++++++++++++ tests/ref/seek/ac3_rm | 44 ---------------------------------- 5 files changed, 53 insertions(+), 48 deletions(-) create mode 100644 tests/ref/seek/ac3_ac3 delete mode 100644 tests/ref/seek/ac3_rm diff --git a/configure b/configure index 49f9af2e73..38c8457a46 100755 --- a/configure +++ b/configure @@ -1630,7 +1630,7 @@ test_deps _muxer _demuxer \ wav \ yuv4mpegpipe=yuv4mpeg \ -ac3_fixed_test_deps="ac3_fixed_encoder ac3_decoder rm_muxer rm_demuxer" +ac3_fixed_test_deps="ac3_fixed_encoder ac3_decoder" mpg_test_deps="mpeg1system_muxer mpegps_demuxer" # default parameters diff --git a/tests/codec-regression.sh b/tests/codec-regression.sh index faa61e65fb..21b53b7a63 100755 --- a/tests/codec-regression.sh +++ b/tests/codec-regression.sh @@ -295,7 +295,7 @@ $tiny_psnr $pcm_dst $pcm_ref 2 1924 fi if [ -n "$do_ac3_fixed" ] ; then -do_audio_encoding ac3.rm "-vn -acodec ac3_fixed" +do_audio_encoding ac3.ac3 "-vn -acodec ac3_fixed" # binaries configured with --disable-sse decode ac3 differently #do_audio_decoding #$tiny_psnr $pcm_dst $pcm_ref 2 1024 diff --git a/tests/ref/acodec/ac3_fixed b/tests/ref/acodec/ac3_fixed index dba2dfc5e7..0c2f9b7214 100644 --- a/tests/ref/acodec/ac3_fixed +++ b/tests/ref/acodec/ac3_fixed @@ -1,2 +1,2 @@ -e7fa185030a56d9db8663ad9e38c6c94 *./tests/data/acodec/ac3.rm -98751 ./tests/data/acodec/ac3.rm +a1d1fc116463b771abf5aef7ed37d7b1 *./tests/data/acodec/ac3.ac3 +96408 ./tests/data/acodec/ac3.ac3 diff --git a/tests/ref/seek/ac3_ac3 b/tests/ref/seek/ac3_ac3 new file mode 100644 index 0000000000..167dc8d716 --- /dev/null +++ b/tests/ref/seek/ac3_ac3 @@ -0,0 +1,49 @@ +ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556 +ret: 0 st:-1 flags:0 ts:-1.000000 +ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556 +ret: 0 st:-1 flags:1 ts: 1.894167 +ret: 0 st: 0 flags:1 dts: 1.880400 pts: 1.880400 pos: 30092 size: 558 +ret: 0 st: 0 flags:0 ts: 0.788333 +ret: 0 st: 0 flags:1 dts: 0.800911 pts: 0.800911 pos: 12818 size: 556 +ret:-1 st: 0 flags:1 ts:-0.317500 +ret: 0 st:-1 flags:0 ts: 2.576668 +ret: 0 st: 0 flags:1 dts: 2.576844 pts: 2.576844 pos: 41238 size: 558 +ret: 0 st:-1 flags:1 ts: 1.470835 +ret: 0 st: 0 flags:1 dts: 1.462533 pts: 1.462533 pos: 23406 size: 556 +ret: 0 st: 0 flags:0 ts: 0.365000 +ret: 0 st: 0 flags:1 dts: 0.383044 pts: 0.383044 pos: 6130 size: 558 +ret:-1 st: 0 flags:1 ts:-0.740833 +ret: 0 st:-1 flags:0 ts: 2.153336 +ret: 0 st: 0 flags:1 dts: 2.158978 pts: 2.158978 pos: 34552 size: 556 +ret: 0 st:-1 flags:1 ts: 1.047503 +ret: 0 st: 0 flags:1 dts: 1.044667 pts: 1.044667 pos: 16718 size: 558 +ret: 0 st: 0 flags:0 ts:-0.058333 +ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556 +ret: 0 st: 0 flags:1 ts: 2.835833 +ret: 0 st: 0 flags:1 dts: 2.820600 pts: 2.820600 pos: 45140 size: 556 +ret: 0 st:-1 flags:0 ts: 1.730004 +ret: 0 st: 0 flags:1 dts: 1.741111 pts: 1.741111 pos: 27864 size: 556 +ret: 0 st:-1 flags:1 ts: 0.624171 +ret: 0 st: 0 flags:1 dts: 0.591978 pts: 0.591978 pos: 9474 size: 556 +ret: 0 st: 0 flags:0 ts:-0.481667 +ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556 +ret: 0 st: 0 flags:1 ts: 2.412500 +ret: 0 st: 0 flags:1 dts: 2.402733 pts: 2.402733 pos: 38452 size: 558 +ret: 0 st:-1 flags:0 ts: 1.306672 +ret: 0 st: 0 flags:1 dts: 1.323244 pts: 1.323244 pos: 21176 size: 558 +ret: 0 st:-1 flags:1 ts: 0.200839 +ret: 0 st: 0 flags:1 dts: 0.174111 pts: 0.174111 pos: 2786 size: 558 +ret: 0 st: 0 flags:0 ts:-0.904989 +ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556 +ret: 0 st: 0 flags:1 ts: 1.989178 +ret: 0 st: 0 flags:1 dts: 1.984867 pts: 1.984867 pos: 31764 size: 558 +ret: 0 st:-1 flags:0 ts: 0.883340 +ret: 0 st: 0 flags:1 dts: 0.905378 pts: 0.905378 pos: 14488 size: 558 +ret:-1 st:-1 flags:1 ts:-0.222493 +ret: 0 st: 0 flags:0 ts: 2.671678 +ret: 0 st: 0 flags:1 dts: 2.681311 pts: 2.681311 pos: 42910 size: 558 +ret: 0 st: 0 flags:1 ts: 1.565844 +ret: 0 st: 0 flags:1 dts: 1.532178 pts: 1.532178 pos: 24520 size: 558 +ret: 0 st:-1 flags:0 ts: 0.460008 +ret: 0 st: 0 flags:1 dts: 0.487511 pts: 0.487511 pos: 7802 size: 556 +ret:-1 st:-1 flags:1 ts:-0.645825 diff --git a/tests/ref/seek/ac3_rm b/tests/ref/seek/ac3_rm deleted file mode 100644 index b38758bb2c..0000000000 --- a/tests/ref/seek/ac3_rm +++ /dev/null @@ -1,44 +0,0 @@ -ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 -ret: 0 st:-1 flags:0 ts:-1.000000 -ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 -ret:-1 st:-1 flags:1 ts: 1.894167 -ret:-1 st: 0 flags:0 ts: 0.788000 -ret: 0 st: 0 flags:1 ts:-0.317000 -ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 -ret:-1 st:-1 flags:0 ts: 2.576668 -ret:-1 st:-1 flags:1 ts: 1.470835 -ret:-1 st: 0 flags:0 ts: 0.365000 -ret: 0 st: 0 flags:1 ts:-0.741000 -ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 -ret:-1 st:-1 flags:0 ts: 2.153336 -ret:-1 st:-1 flags:1 ts: 1.047503 -ret: 0 st: 0 flags:0 ts:-0.058000 -ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 -ret:-1 st: 0 flags:1 ts: 2.836000 -ret: 0 st:-1 flags:0 ts: 1.730004 -ret: 0 st: 0 flags:1 dts:8589.800000 pts:8589.800000 pos: 65950 size: 32801 -ret: 0 st:-1 flags:1 ts: 0.624171 -ret: 0 st: 0 flags:1 dts: 0.256000 pts: 0.256000 pos: 65337 size: 400 -ret: 0 st: 0 flags:0 ts:-0.482000 -ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 -ret:-1 st: 0 flags:1 ts: 2.413000 -ret: 0 st:-1 flags:0 ts: 1.306672 -ret: 0 st: 0 flags:1 dts:8589.800000 pts:8589.800000 pos: 65950 size: 32801 -ret: 0 st:-1 flags:1 ts: 0.200839 -ret: 0 st: 0 flags:1 dts: 0.034000 pts: 0.034000 pos: 839 size: 558 -ret: 0 st: 0 flags:0 ts:-0.905000 -ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 -ret: 0 st: 0 flags:1 ts: 1.989000 -ret: 0 st: 0 flags:1 dts: 0.256000 pts: 0.256000 pos: 65337 size: 400 -ret: 0 st:-1 flags:0 ts: 0.883340 -ret: 0 st: 0 flags:1 dts: 3.378000 pts: 3.378000 pos: 55491 size: 558 -ret: 0 st:-1 flags:1 ts:-0.222493 -ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 -ret: 0 st: 0 flags:0 ts: 2.672000 -ret: 0 st: 0 flags:1 dts: 3.378000 pts: 3.378000 pos: 55491 size: 558 -ret: 0 st: 0 flags:1 ts: 1.566000 -ret: 0 st: 0 flags:1 dts: 0.256000 pts: 0.256000 pos: 65337 size: 400 -ret: 0 st:-1 flags:0 ts: 0.460008 -ret: 0 st: 0 flags:1 dts: 3.378000 pts: 3.378000 pos: 55491 size: 558 -ret: 0 st:-1 flags:1 ts:-0.645825 -ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 From 7f19bdc2a29e0f9e3fff0da8c5fc1d2f7f972efc Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Thu, 2 Feb 2012 16:30:27 +0100 Subject: [PATCH 12/19] movdec: fix dts generation in fragmented files Do not use AVStream's duration for dts generation since it contains in some cases the duration of the whole file instead of duration of the samples in the moov. This happens if the mdhd holds the duration of the whole file but has no entries or a zero duration in its stts. --- libavformat/isom.h | 1 + libavformat/mov.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/libavformat/isom.h b/libavformat/isom.h index 16d777651a..91fbf759f8 100644 --- a/libavformat/isom.h +++ b/libavformat/isom.h @@ -126,6 +126,7 @@ typedef struct MOVStreamContext { uint32_t palette[256]; int has_palette; int64_t data_size; + int64_t track_end; ///< used for dts generation in fragmented movie files } MOVStreamContext; typedef struct MOVContext { diff --git a/libavformat/mov.c b/libavformat/mov.c index 0ce32e0f8e..3c1927ff33 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -1639,6 +1639,7 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom) st->nb_frames= total_sample_count; if (duration) st->duration= duration; + sc->track_end = duration; return 0; } @@ -2233,7 +2234,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom) if (flags & 0x001) data_offset = avio_rb32(pb); if (flags & 0x004) first_sample_flags = avio_rb32(pb); - dts = st->duration - sc->time_offset; + dts = sc->track_end - sc->time_offset; offset = frag->base_data_offset + data_offset; distance = 0; av_dlog(c->fc, "first sample flags 0x%x\n", first_sample_flags); @@ -2263,7 +2264,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom) sc->data_size += sample_size; } frag->moof_offset = offset; - st->duration = dts + sc->time_offset; + st->duration = sc->track_end = dts + sc->time_offset; return 0; } From 3f57bde1f2a2538d46e1f26a1b0a773a89beaa67 Mon Sep 17 00:00:00 2001 From: Piotr Bandurski Date: Wed, 1 Feb 2012 20:23:33 +0000 Subject: [PATCH 13/19] frwu: lowercase the FRWU codec name This is more consistent with all the other codec names. Signed-off-by: Paul B Mahol Signed-off-by: Diego Biurrun --- libavcodec/frwu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavcodec/frwu.c b/libavcodec/frwu.c index 1dbbc32514..c21c19a2ee 100644 --- a/libavcodec/frwu.c +++ b/libavcodec/frwu.c @@ -27,7 +27,7 @@ static av_cold int decode_init(AVCodecContext *avctx) { if (avctx->width & 1) { - av_log(avctx, AV_LOG_ERROR, "FRWU needs even width\n"); + av_log(avctx, AV_LOG_ERROR, "frwu needs even width\n"); return AVERROR(EINVAL); } avctx->pix_fmt = PIX_FMT_UYVY422; @@ -114,7 +114,7 @@ static av_cold int decode_close(AVCodecContext *avctx) } AVCodec ff_frwu_decoder = { - .name = "FRWU", + .name = "frwu", .type = AVMEDIA_TYPE_VIDEO, .id = CODEC_ID_FRWU, .init = decode_init, From 148bc235b0dbf5e3dcd12fc4480475008a719576 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Wed, 18 Jan 2012 23:56:49 +0100 Subject: [PATCH 14/19] swscale: K&R formatting cosmetics for Blackfin code Also prettyprint some comments in Assembly code. --- libswscale/bfin/internal_bfin.S | 12 ++--- libswscale/bfin/swscale_bfin.c | 48 +++++++++-------- libswscale/bfin/yuv2rgb_bfin.c | 91 +++++++++++++++++---------------- 3 files changed, 78 insertions(+), 73 deletions(-) diff --git a/libswscale/bfin/internal_bfin.S b/libswscale/bfin/internal_bfin.S index 9f985e7824..b007f07f53 100644 --- a/libswscale/bfin/internal_bfin.S +++ b/libswscale/bfin/internal_bfin.S @@ -30,11 +30,11 @@ and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts. The following calculation is used for the conversion: - r = clipz((y-oy)*cy + crv*(v-128)) - g = clipz((y-oy)*cy + cgv*(v-128) + cgu*(u-128)) - b = clipz((y-oy)*cy + cbu*(u-128)) + r = clipz((y - oy) * cy + crv * (v - 128)) + g = clipz((y - oy) * cy + cgv * (v - 128) + cgu * (u - 128)) + b = clipz((y - oy) * cy + cbu * (u - 128)) -y,u,v are prescaled by a factor of 4 i.e. left-shifted to gain precision. +y, u, v are prescaled by a factor of 4 i.e. left-shifted to gain precision. New factorization to eliminate the truncation error which was @@ -47,7 +47,7 @@ occurring due to the byteop3p. 2) Scale operands up by a factor of 4 not 8 because Blackfin multiplies include a shift. -3) Compute into the accumulators cy*yx0, cy*yx1. +3) Compute into the accumulators cy * yx0, cy * yx1. 4) Compute each of the linear equations: r = clipz((y - oy) * cy + crv * (v - 128)) @@ -73,7 +73,7 @@ occurring due to the byteop3p. Where coeffs have the following layout in memory. -uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv; +uint32_t oy, oc, zero, cy, crv, rmask, cbu, bmask, cgu, cgv; coeffs is a pointer to oy. diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c index 0c5f004930..f9eba1e41b 100644 --- a/libswscale/bfin/swscale_bfin.c +++ b/libswscale/bfin/swscale_bfin.c @@ -27,32 +27,34 @@ #include #include "config.h" #include + #include "libswscale/rgb2rgb.h" #include "libswscale/swscale.h" #include "libswscale/swscale_internal.h" #if defined (__FDPIC__) && CONFIG_SRAM -#define L1CODE __attribute__ ((l1_text)) +#define L1CODE __attribute__((l1_text)) #else #define L1CODE #endif -int ff_bfin_uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - int width, int height, +int ff_bfin_uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride) L1CODE; -int ff_bfin_yuyvtoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - int width, int height, +int ff_bfin_yuyvtoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride) L1CODE; -static int uyvytoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dst[], int dstStride[]) +static int uyvytoyv12_unscaled(SwsContext *c, uint8_t *src[], int srcStride[], + int srcSliceY, int srcSliceH, uint8_t *dst[], + int dstStride[]) { - uint8_t *dsty = dst[0] + dstStride[0]*srcSliceY; - uint8_t *dstu = dst[1] + dstStride[1]*srcSliceY/2; - uint8_t *dstv = dst[2] + dstStride[2]*srcSliceY/2; - uint8_t *ip = src[0] + srcStride[0]*srcSliceY; - int w = dstStride[0]; + uint8_t *dsty = dst[0] + dstStride[0] * srcSliceY; + uint8_t *dstu = dst[1] + dstStride[1] * srcSliceY / 2; + uint8_t *dstv = dst[2] + dstStride[2] * srcSliceY / 2; + uint8_t *ip = src[0] + srcStride[0] * srcSliceY; + int w = dstStride[0]; ff_bfin_uyvytoyv12(ip, dsty, dstu, dstv, w, srcSliceH, dstStride[0], dstStride[1], srcStride[0]); @@ -60,14 +62,15 @@ static int uyvytoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], i return srcSliceH; } -static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dst[], int dstStride[]) +static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t *src[], int srcStride[], + int srcSliceY, int srcSliceH, uint8_t *dst[], + int dstStride[]) { - uint8_t *dsty = dst[0] + dstStride[0]*srcSliceY; - uint8_t *dstu = dst[1] + dstStride[1]*srcSliceY/2; - uint8_t *dstv = dst[2] + dstStride[2]*srcSliceY/2; - uint8_t *ip = src[0] + srcStride[0]*srcSliceY; - int w = dstStride[0]; + uint8_t *dsty = dst[0] + dstStride[0] * srcSliceY; + uint8_t *dstu = dst[1] + dstStride[1] * srcSliceY / 2; + uint8_t *dstv = dst[2] + dstStride[2] * srcSliceY / 2; + uint8_t *ip = src[0] + srcStride[0] * srcSliceY; + int w = dstStride[0]; ff_bfin_yuyvtoyv12(ip, dsty, dstu, dstv, w, srcSliceH, dstStride[0], dstStride[1], srcStride[0]); @@ -75,15 +78,16 @@ static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], i return srcSliceH; } - void ff_bfin_get_unscaled_swscale(SwsContext *c) { if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_UYVY422) { - av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n"); + av_log(NULL, AV_LOG_VERBOSE, + "selecting Blackfin optimized uyvytoyv12_unscaled\n"); c->swScale = uyvytoyv12_unscaled; } if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_YUYV422) { - av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized yuyvtoyv12_unscaled\n"); + av_log(NULL, AV_LOG_VERBOSE, + "selecting Blackfin optimized yuyvtoyv12_unscaled\n"); c->swScale = yuyvtoyv12_unscaled; } } diff --git a/libswscale/bfin/yuv2rgb_bfin.c b/libswscale/bfin/yuv2rgb_bfin.c index 68af522642..91a7aeea79 100644 --- a/libswscale/bfin/yuv2rgb_bfin.c +++ b/libswscale/bfin/yuv2rgb_bfin.c @@ -26,14 +26,15 @@ #include #include #include -#include "config.h" #include + +#include "config.h" #include "libswscale/rgb2rgb.h" #include "libswscale/swscale.h" #include "libswscale/swscale_internal.h" #if defined(__FDPIC__) && CONFIG_SRAM -#define L1CODE __attribute__ ((l1_text)) +#define L1CODE __attribute__((l1_text)) #else #define L1CODE #endif @@ -47,21 +48,20 @@ void ff_bfin_yuv2rgb565_line(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, void ff_bfin_yuv2rgb24_line(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, int w, uint32_t *coeffs) L1CODE; -typedef void (* ltransform)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, - int w, uint32_t *coeffs); - +typedef void (*ltransform)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, + int w, uint32_t *coeffs); static void bfin_prepare_coefficients(SwsContext *c, int rgb, int masks) { int oy; - oy = c->yOffset&0xffff; - oy = oy >> 3; // keep everything U8.0 for offset calculation + oy = c->yOffset & 0xffff; + oy = oy >> 3; // keep everything U8.0 for offset calculation - c->oc = 128*0x01010101U; - c->oy = oy*0x01010101U; + c->oc = 128 * 0x01010101U; + c->oy = oy * 0x01010101U; /* copy 64bit vector coeffs down to 32bit vector coeffs */ - c->cy = c->yCoeff; + c->cy = c->yCoeff; c->zero = 0; if (rgb) { @@ -76,7 +76,6 @@ static void bfin_prepare_coefficients(SwsContext *c, int rgb, int masks) c->cgv = c->ugCoeff; } - if (masks == 555) { c->rmask = 0x001f * 0x00010001U; c->gmask = 0x03e0 * 0x00010001U; @@ -88,27 +87,25 @@ static void bfin_prepare_coefficients(SwsContext *c, int rgb, int masks) } } -static int core_yuv420_rgb(SwsContext *c, - uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, - uint8_t **oplanes, int *outstrides, - ltransform lcscf, int rgb, int masks) +static int core_yuv420_rgb(SwsContext *c, uint8_t **in, int *instrides, + int srcSliceY, int srcSliceH, uint8_t **oplanes, + int *outstrides, ltransform lcscf, + int rgb, int masks) { - uint8_t *py,*pu,*pv,*op; + uint8_t *py, *pu, *pv, *op; int w = instrides[0]; - int h2 = srcSliceH>>1; + int h2 = srcSliceH >> 1; int i; bfin_prepare_coefficients(c, rgb, masks); py = in[0]; - pu = in[1+(1^rgb)]; - pv = in[1+(0^rgb)]; + pu = in[1 + (1 ^ rgb)]; + pv = in[1 + (0 ^ rgb)]; - op = oplanes[0] + srcSliceY*outstrides[0]; - - for (i=0;ioy); py += instrides[0]; @@ -125,9 +122,7 @@ static int core_yuv420_rgb(SwsContext *c, return srcSliceH; } - -static int bfin_yuv420_rgb555(SwsContext *c, - uint8_t **in, int *instrides, +static int bfin_yuv420_rgb555(SwsContext *c, uint8_t **in, int *instrides, int srcSliceY, int srcSliceH, uint8_t **oplanes, int *outstrides) { @@ -135,8 +130,7 @@ static int bfin_yuv420_rgb555(SwsContext *c, outstrides, ff_bfin_yuv2rgb555_line, 1, 555); } -static int bfin_yuv420_bgr555(SwsContext *c, - uint8_t **in, int *instrides, +static int bfin_yuv420_bgr555(SwsContext *c, uint8_t **in, int *instrides, int srcSliceY, int srcSliceH, uint8_t **oplanes, int *outstrides) { @@ -144,8 +138,7 @@ static int bfin_yuv420_bgr555(SwsContext *c, outstrides, ff_bfin_yuv2rgb555_line, 0, 555); } -static int bfin_yuv420_rgb24(SwsContext *c, - uint8_t **in, int *instrides, +static int bfin_yuv420_rgb24(SwsContext *c, uint8_t **in, int *instrides, int srcSliceY, int srcSliceH, uint8_t **oplanes, int *outstrides) { @@ -153,8 +146,7 @@ static int bfin_yuv420_rgb24(SwsContext *c, outstrides, ff_bfin_yuv2rgb24_line, 1, 888); } -static int bfin_yuv420_bgr24(SwsContext *c, - uint8_t **in, int *instrides, +static int bfin_yuv420_bgr24(SwsContext *c, uint8_t **in, int *instrides, int srcSliceY, int srcSliceH, uint8_t **oplanes, int *outstrides) { @@ -162,8 +154,7 @@ static int bfin_yuv420_bgr24(SwsContext *c, outstrides, ff_bfin_yuv2rgb24_line, 0, 888); } -static int bfin_yuv420_rgb565(SwsContext *c, - uint8_t **in, int *instrides, +static int bfin_yuv420_rgb565(SwsContext *c, uint8_t **in, int *instrides, int srcSliceY, int srcSliceH, uint8_t **oplanes, int *outstrides) { @@ -171,8 +162,7 @@ static int bfin_yuv420_rgb565(SwsContext *c, outstrides, ff_bfin_yuv2rgb565_line, 1, 565); } -static int bfin_yuv420_bgr565(SwsContext *c, - uint8_t **in, int *instrides, +static int bfin_yuv420_bgr565(SwsContext *c, uint8_t **in, int *instrides, int srcSliceY, int srcSliceH, uint8_t **oplanes, int *outstrides) { @@ -180,24 +170,35 @@ static int bfin_yuv420_bgr565(SwsContext *c, outstrides, ff_bfin_yuv2rgb565_line, 0, 565); } - SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c) { SwsFunc f; - switch(c->dstFormat) { - case PIX_FMT_RGB555: f = bfin_yuv420_rgb555; break; - case PIX_FMT_BGR555: f = bfin_yuv420_bgr555; break; - case PIX_FMT_RGB565: f = bfin_yuv420_rgb565; break; - case PIX_FMT_BGR565: f = bfin_yuv420_bgr565; break; - case PIX_FMT_RGB24: f = bfin_yuv420_rgb24; break; - case PIX_FMT_BGR24: f = bfin_yuv420_bgr24; break; + switch (c->dstFormat) { + case PIX_FMT_RGB555: + f = bfin_yuv420_rgb555; + break; + case PIX_FMT_BGR555: + f = bfin_yuv420_bgr555; + break; + case PIX_FMT_RGB565: + f = bfin_yuv420_rgb565; + break; + case PIX_FMT_BGR565: + f = bfin_yuv420_bgr565; + break; + case PIX_FMT_RGB24: + f = bfin_yuv420_rgb24; + break; + case PIX_FMT_BGR24: + f = bfin_yuv420_bgr24; + break; default: return 0; } av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n", - sws_format_name (c->dstFormat)); + sws_format_name(c->dstFormat)); return f; } From cd2f98f365dfd83f0debac030413e57a73c7ecd5 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Wed, 1 Feb 2012 22:25:10 +0000 Subject: [PATCH 15/19] ARM: ac3: fix ac3_bit_alloc_calc_bap_armv6 This function was broken when the start bin was not at the start of a band. Signed-off-by: Mans Rullgard --- libavcodec/arm/ac3dsp_armv6.S | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/libavcodec/arm/ac3dsp_armv6.S b/libavcodec/arm/ac3dsp_armv6.S index b6aee867b3..df8bfbaa03 100644 --- a/libavcodec/arm/ac3dsp_armv6.S +++ b/libavcodec/arm/ac3dsp_armv6.S @@ -34,24 +34,23 @@ function ff_ac3_bit_alloc_calc_bap_armv6, export=1 add r0, r0, r4, lsl #1 @ mask + band add r4, lr, r4 add r7, r7, r2 @ bap + start - ldrb r10, [r4], #1 1: ldrsh r9, [r0], #2 @ mask[band] mov r8, #0xff0 sub r9, r9, r12 @ - snr_offset - mov r11, r10 - ldrb r10, [r4], #1 @ band_start_tab[band++] + ldrb r10, [r4, #1]! @ band_start_tab[++band] subs r9, r9, r5 @ - floor it lt movlt r9, #0 cmp r10, r3 @ - end and r9, r9, r8, lsl #1 @ & 0x1fe0 ite gt - subgt r8, r3, r11 - suble r8, r10, r11 + subgt r8, r3, r2 + suble r8, r10, r2 + mov r2, r10 add r9, r9, r5 @ + floor => m tst r8, #1 - add r2, r7, r8 + add r11, r7, r8 bne 3f b 5f 2: @@ -65,9 +64,9 @@ function ff_ac3_bit_alloc_calc_bap_armv6, export=1 ldrb lr, [r6, lr] strb r8, [r7], #1 @ bap[bin] strb lr, [r7], #1 -5: cmp r7, r2 +5: cmp r7, r11 blo 2b - cmp r3, r11 + cmp r3, r10 bgt 1b pop {r4-r11,pc} 3: From 89415b8e3fb83d67fdc518323cc364aa74ec2af2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Sun, 22 Jan 2012 01:32:16 +0200 Subject: [PATCH 16/19] movdec: Parse the dvc1 atom MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normally, the actual payload data contains sequence headers, too, and the parser can extract this and set it as extradata. However, the data in the dvc1 atom is the "official" extradata for the file. This is required for proper stream copy of vc1 from ismv to ismv. Signed-off-by: Martin Storsjö --- libavformat/mov.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/libavformat/mov.c b/libavformat/mov.c index 3c1927ff33..d0e01f8e57 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -971,6 +971,32 @@ static int mov_read_glbl(MOVContext *c, AVIOContext *pb, MOVAtom atom) return 0; } +static int mov_read_dvc1(MOVContext *c, AVIOContext *pb, MOVAtom atom) +{ + AVStream *st; + uint8_t profile_level; + + if (c->fc->nb_streams < 1) + return 0; + st = c->fc->streams[c->fc->nb_streams-1]; + + if (atom.size >= (1<<28) || atom.size < 7) + return AVERROR_INVALIDDATA; + + profile_level = avio_r8(pb); + if (profile_level & 0xf0 != 0xc0) + return 0; + + av_free(st->codec->extradata); + st->codec->extradata = av_mallocz(atom.size - 7 + FF_INPUT_BUFFER_PADDING_SIZE); + if (!st->codec->extradata) + return AVERROR(ENOMEM); + st->codec->extradata_size = atom.size - 7; + avio_seek(pb, 6, SEEK_CUR); + avio_read(pb, st->codec->extradata, st->codec->extradata_size); + return 0; +} + /** * An strf atom is a BITMAPINFOHEADER struct. This struct is 40 bytes itself, * but can have extradata appended at the end after the 40 bytes belonging @@ -2435,6 +2461,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = { { MKTAG('w','f','e','x'), mov_read_wfex }, { MKTAG('c','m','o','v'), mov_read_cmov }, { MKTAG('c','h','a','n'), mov_read_chan }, /* channel layout */ +{ MKTAG('d','v','c','1'), mov_read_dvc1 }, { 0, NULL } }; From 7e4d9d5d456916f51cb40ef646fad5b0a467f4b1 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 2 Feb 2012 11:48:13 -0800 Subject: [PATCH 17/19] win64: add a XMM clobber test configure option. This will be useful to test more aggressively for failures to mark XMM registers as clobbered in Win64 builds, and prevent regressions thereof. Based on a patch by Ramiro Polla --- configure | 14 +++++++ libavcodec/x86/Makefile | 1 + libavcodec/x86/w64xmmtest.c | 80 +++++++++++++++++++++++++++++++++++++ libavutil/x86/w64xmmtest.h | 71 ++++++++++++++++++++++++++++++++ libswscale/Makefile | 2 + libswscale/x86/w64xmmtest.c | 31 ++++++++++++++ 6 files changed, 199 insertions(+) create mode 100644 libavcodec/x86/w64xmmtest.c create mode 100644 libavutil/x86/w64xmmtest.h create mode 100644 libswscale/x86/w64xmmtest.c diff --git a/configure b/configure index 38c8457a46..b1da509737 100755 --- a/configure +++ b/configure @@ -254,6 +254,8 @@ Developer options (useful when working on Libav itself): --enable-extra-warnings enable more compiler warnings --samples=PATH location of test samples for FATE, if not set use \$FATE_SAMPLES at make invocation time. + --enable-xmm-clobber-test check XMM registers for clobbering (Win64-only; + should be used only for debugging purposes) NOTE: Object files are built at the place where configure is launched. EOF @@ -991,6 +993,7 @@ CONFIG_LIST=" vda vdpau version3 + xmm_clobber_test x11grab zlib " @@ -3065,6 +3068,17 @@ check_ldflags -Wl,--warn-common check_ldflags -Wl,-rpath-link=libpostproc:libswscale:libavfilter:libavdevice:libavformat:libavcodec:libavutil test_ldflags -Wl,-Bsymbolic && append SHFLAGS -Wl,-Bsymbolic +enabled xmm_clobber_test && \ + check_ldflags -Wl,--wrap,avcodec_open2 \ + -Wl,--wrap,avcodec_decode_audio4 \ + -Wl,--wrap,avcodec_decode_video2 \ + -Wl,--wrap,avcodec_decode_subtitle2 \ + -Wl,--wrap,avcodec_encode_audio2 \ + -Wl,--wrap,avcodec_encode_video \ + -Wl,--wrap,avcodec_encode_subtitle \ + -Wl,--wrap,sws_scale || \ + disable xmm_clobber_test + echo "X{};" > $TMPV if test_ldflags -Wl,--version-script,$TMPV; then append SHFLAGS '-Wl,--version-script,\$(SUBDIR)lib\$(NAME).ver' diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 930ace78c4..fc88433783 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -74,3 +74,4 @@ OBJS-$(HAVE_MMX) += x86/dsputil_mmx.o \ x86/mpegvideo_mmx.o \ x86/simple_idct_mmx.o \ +OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o diff --git a/libavcodec/x86/w64xmmtest.c b/libavcodec/x86/w64xmmtest.c new file mode 100644 index 0000000000..f6e3de9496 --- /dev/null +++ b/libavcodec/x86/w64xmmtest.c @@ -0,0 +1,80 @@ +/* + * check XMM registers for clobbers on Win64 + * Copyright (c) 2012 Ronald S. Bultje + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/avcodec.h" +#include "libavutil/x86/w64xmmtest.h" + +wrap(avcodec_open2(AVCodecContext *avctx, + AVCodec *codec, + AVDictionary **options)) +{ + testxmmclobbers(avcodec_open2, avctx, codec, options); +} + +wrap(avcodec_decode_audio4(AVCodecContext *avctx, + AVFrame *frame, + int *got_frame_ptr, + AVPacket *avpkt)) +{ + testxmmclobbers(avcodec_decode_audio4, avctx, frame, + got_frame_ptr, avpkt); +} + +wrap(avcodec_decode_video2(AVCodecContext *avctx, + AVFrame *picture, + int *got_picture_ptr, + AVPacket *avpkt)) +{ + testxmmclobbers(avcodec_decode_video2, avctx, picture, + got_picture_ptr, avpkt); +} + +wrap(avcodec_decode_subtitle2(AVCodecContext *avctx, + AVSubtitle *sub, + int *got_sub_ptr, + AVPacket *avpkt)) +{ + testxmmclobbers(avcodec_decode_subtitle2, avctx, sub, + got_sub_ptr, avpkt); +} + +wrap(avcodec_encode_audio2(AVCodecContext *avctx, + AVPacket *avpkt, + const AVFrame *frame, + int *got_packet_ptr)) +{ + testxmmclobbers(avcodec_encode_audio2, avctx, avpkt, frame, + got_packet_ptr); +} + +wrap(avcodec_encode_video(AVCodecContext *avctx, + uint8_t *buf, int buf_size, + const AVFrame *pict)) +{ + testxmmclobbers(avcodec_encode_video, avctx, buf, buf_size, pict); +} + +wrap(avcodec_encode_subtitle(AVCodecContext *avctx, + uint8_t *buf, int buf_size, + const AVSubtitle *sub)) +{ + testxmmclobbers(avcodec_encode_subtitle, avctx, buf, buf_size, sub); +} diff --git a/libavutil/x86/w64xmmtest.h b/libavutil/x86/w64xmmtest.h new file mode 100644 index 0000000000..1c1ded86ec --- /dev/null +++ b/libavutil/x86/w64xmmtest.h @@ -0,0 +1,71 @@ +/* + * check XMM registers for clobbers on Win64 + * Copyright (c) 2008 Ramiro Polla + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include + +#include "libavutil/bswap.h" + +#define storexmmregs(mem) \ + __asm__ volatile( \ + "movups %%xmm6 , 0x00(%0)\n\t" \ + "movups %%xmm7 , 0x10(%0)\n\t" \ + "movups %%xmm8 , 0x20(%0)\n\t" \ + "movups %%xmm9 , 0x30(%0)\n\t" \ + "movups %%xmm10, 0x40(%0)\n\t" \ + "movups %%xmm11, 0x50(%0)\n\t" \ + "movups %%xmm12, 0x60(%0)\n\t" \ + "movups %%xmm13, 0x70(%0)\n\t" \ + "movups %%xmm14, 0x80(%0)\n\t" \ + "movups %%xmm15, 0x90(%0)\n\t" \ + :: "r"(mem) : "memory") + +#define testxmmclobbers(func, ctx, ...) \ + uint64_t xmm[2][10][2]; \ + int ret; \ + storexmmregs(xmm[0]); \ + ret = __real_ ## func(ctx, __VA_ARGS__); \ + storexmmregs(xmm[1]); \ + if (memcmp(xmm[0], xmm[1], sizeof(xmm[0]))) { \ + int i; \ + av_log(ctx, AV_LOG_ERROR, \ + "XMM REGS CLOBBERED IN %s!\n", #func); \ + for (i = 0; i < 10; i ++) \ + if (xmm[0][i][0] != xmm[1][i][0] || \ + xmm[0][i][1] != xmm[1][i][1]) { \ + av_log(ctx, AV_LOG_ERROR, \ + "xmm%-2d = %016"PRIx64"%016"PRIx64"\n", \ + 6 + i, av_bswap64(xmm[0][i][0]), \ + av_bswap64(xmm[0][i][1])); \ + av_log(ctx, AV_LOG_ERROR, \ + " -> %016"PRIx64"%016"PRIx64"\n", \ + av_bswap64(xmm[1][i][0]), \ + av_bswap64(xmm[1][i][1])); \ + } \ + abort(); \ + } \ + return ret + +#define wrap(func) \ +int __real_ ## func; \ +int __wrap_ ## func; \ +int __wrap_ ## func diff --git a/libswscale/Makefile b/libswscale/Makefile index bef4200c59..0aee7e497b 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -21,6 +21,8 @@ MMX-OBJS-$(HAVE_YASM) += x86/input.o \ x86/output.o \ x86/scale.o +OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o + TESTPROGS = colorspace swscale DIRS = bfin mlib ppc sparc x86 diff --git a/libswscale/x86/w64xmmtest.c b/libswscale/x86/w64xmmtest.c new file mode 100644 index 0000000000..dd9a2a4378 --- /dev/null +++ b/libswscale/x86/w64xmmtest.c @@ -0,0 +1,31 @@ +/* + * check XMM registers for clobbers on Win64 + * Copyright (c) 2012 Ronald S. Bultje + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/x86/w64xmmtest.h" +#include "libswscale/swscale.h" + +wrap(sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], + const int srcStride[], int srcSliceY, int srcSliceH, + uint8_t *const dst[], const int dstStride[])) +{ + testxmmclobbers(sws_scale, c, srcSlice, srcStride, srcSliceY, + srcSliceH, dst, dstStride); +} From 24947d4988012f1f0fd467c83418615adc11c3e8 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 10 Jan 2012 17:01:26 -0800 Subject: [PATCH 18/19] vorbis: fix overflows in floor1[] vector and inverse db table index. --- libavcodec/vorbis.c | 19 +++++++++---------- libavcodec/vorbisdec.c | 10 +++++----- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c index 0b26870421..52ded8b0a8 100644 --- a/libavcodec/vorbis.c +++ b/libavcodec/vorbis.c @@ -152,7 +152,7 @@ void ff_vorbis_ready_floor1_list(vorbis_floor1_entry * list, int values) } } -static inline void render_line_unrolled(intptr_t x, uint8_t y, int x1, +static inline void render_line_unrolled(intptr_t x, int y, int x1, intptr_t sy, int ady, int adx, float *buf) { @@ -164,30 +164,30 @@ static inline void render_line_unrolled(intptr_t x, uint8_t y, int x1, if (err >= 0) { err += ady - adx; y += sy; - buf[x++] = ff_vorbis_floor1_inverse_db_table[y]; + buf[x++] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y)]; } - buf[x] = ff_vorbis_floor1_inverse_db_table[y]; + buf[x] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y)]; } if (x <= 0) { if (err + ady >= 0) y += sy; - buf[x] = ff_vorbis_floor1_inverse_db_table[y]; + buf[x] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y)]; } } -static void render_line(int x0, uint8_t y0, int x1, int y1, float *buf) +static void render_line(int x0, int y0, int x1, int y1, float *buf) { int dy = y1 - y0; int adx = x1 - x0; int ady = FFABS(dy); int sy = dy < 0 ? -1 : 1; - buf[x0] = ff_vorbis_floor1_inverse_db_table[y0]; + buf[x0] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y0)]; if (ady*2 <= adx) { // optimized common case render_line_unrolled(x0, y0, x1, sy, ady, adx, buf); } else { int base = dy / adx; int x = x0; - uint8_t y = y0; + int y = y0; int err = -adx; ady -= FFABS(base) * adx; while (++x < x1) { @@ -197,7 +197,7 @@ static void render_line(int x0, uint8_t y0, int x1, int y1, float *buf) err -= adx; y += sy; } - buf[x] = ff_vorbis_floor1_inverse_db_table[y]; + buf[x] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y)]; } } } @@ -206,8 +206,7 @@ void ff_vorbis_floor1_render_list(vorbis_floor1_entry * list, int values, uint16_t *y_list, int *flag, int multiplier, float *out, int samples) { - int lx, i; - uint8_t ly; + int lx, ly, i; lx = 0; ly = y_list[0] * multiplier; for (i = 1; i < values; i++) { diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c index 2f1477ffb0..aa4bdff93e 100644 --- a/libavcodec/vorbisdec.c +++ b/libavcodec/vorbisdec.c @@ -1244,20 +1244,20 @@ static int vorbis_floor1_decode(vorbis_context *vc, floor1_flag[i] = 1; if (val >= room) { if (highroom > lowroom) { - floor1_Y_final[i] = val - lowroom + predicted; + floor1_Y_final[i] = av_clip_uint16(val - lowroom + predicted); } else { - floor1_Y_final[i] = predicted - val + highroom - 1; + floor1_Y_final[i] = av_clip_uint16(predicted - val + highroom - 1); } } else { if (val & 1) { - floor1_Y_final[i] = predicted - (val + 1) / 2; + floor1_Y_final[i] = av_clip_uint16(predicted - (val + 1) / 2); } else { - floor1_Y_final[i] = predicted + val / 2; + floor1_Y_final[i] = av_clip_uint16(predicted + val / 2); } } } else { floor1_flag[i] = 0; - floor1_Y_final[i] = predicted; + floor1_Y_final[i] = av_clip_uint16(predicted); } av_dlog(NULL, " Decoded floor(%d) = %u / val %u\n", From e15e2a6d2a886aa9944ac9798687104c829d1541 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Wed, 1 Feb 2012 10:56:14 +0100 Subject: [PATCH 19/19] libx264: fix indentation. --- libavcodec/libx264.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c index 12882808bd..2183e2020f 100644 --- a/libavcodec/libx264.c +++ b/libavcodec/libx264.c @@ -147,12 +147,12 @@ static int X264_frame(AVCodecContext *ctx, uint8_t *buf, } do { - if (x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL, &pic_out) < 0) - return -1; + if (x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL, &pic_out) < 0) + return -1; - bufsize = encode_nals(ctx, buf, bufsize, nal, nnal, 0); - if (bufsize < 0) - return -1; + bufsize = encode_nals(ctx, buf, bufsize, nal, nnal, 0); + if (bufsize < 0) + return -1; } while (!bufsize && !frame && x264_encoder_delayed_frames(x4->enc)); /* FIXME: libx264 now provides DTS, but AVFrame doesn't have a field for it. */