From b2ffecbd0ccccfd1e379096bb62c15b06bb6ab63 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Sun, 9 Sep 2018 01:05:56 +0200 Subject: [PATCH] avcodec/lagarith: switch to planar rgb Speed goes from 363 fps to 428 fps for 640x480 video. --- libavcodec/lagarith.c | 108 +++++++----------------- tests/ref/fate/lagarith-red | 50 +++++------ tests/ref/fate/lagarith-rgb24 | 8 +- tests/ref/fate/lagarith-ticket4119 | 4 +- tests/ref/fate/lagarith-ticket4119-cfr | 100 +++++++++++----------- tests/ref/fate/lagarith-ticket4119-drop | 4 +- tests/ref/fate/lagarith-ticket4119-pass | 4 +- tests/ref/fate/lagarith-ticket4119-vfr | 4 +- 8 files changed, 118 insertions(+), 164 deletions(-) diff --git a/libavcodec/lagarith.c b/libavcodec/lagarith.c index 3d3b4d41f4..d88c5f5ae7 100644 --- a/libavcodec/lagarith.c +++ b/libavcodec/lagarith.c @@ -53,9 +53,6 @@ typedef struct LagarithContext { LLVidDSPContext llviddsp; int zeros; /**< number of consecutive zero bytes encountered */ int zeros_rem; /**< number of zero bytes remaining to output */ - uint8_t *rgb_planes; - int rgb_planes_allocated; - int rgb_stride; } LagarithContext; /** @@ -544,7 +541,7 @@ static int lag_decode_frame(AVCodecContext *avctx, uint8_t frametype; uint32_t offset_gu = 0, offset_bv = 0, offset_ry = 9; uint32_t offs[4]; - uint8_t *srcs[4], *dst; + uint8_t *srcs[4]; int i, j, planes = 3; int ret; @@ -557,70 +554,60 @@ static int lag_decode_frame(AVCodecContext *avctx, switch (frametype) { case FRAME_SOLID_RGBA: - avctx->pix_fmt = AV_PIX_FMT_RGB32; + avctx->pix_fmt = AV_PIX_FMT_GBRAP; case FRAME_SOLID_GRAY: if (frametype == FRAME_SOLID_GRAY) if (avctx->bits_per_coded_sample == 24) { - avctx->pix_fmt = AV_PIX_FMT_RGB24; + avctx->pix_fmt = AV_PIX_FMT_GBRP; } else { - avctx->pix_fmt = AV_PIX_FMT_0RGB32; + avctx->pix_fmt = AV_PIX_FMT_GBRAP; planes = 4; } if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0) return ret; - dst = p->data[0]; if (frametype == FRAME_SOLID_RGBA) { - int qwidth = avctx->width>>2; - uint64_t c = ((uint64_t)offset_gu << 32) | offset_gu; - for (j = 0; j < avctx->height; j++) { - for (i = 0; i < qwidth; i++) { - AV_WN64(dst + i * 16 , c); - AV_WN64(dst + i * 16 + 8, c); + for (i = 0; i < avctx->height; i++) { + memset(p->data[0] + i * p->linesize[0], buf[2], avctx->width); + memset(p->data[1] + i * p->linesize[1], buf[1], avctx->width); + memset(p->data[2] + i * p->linesize[2], buf[3], avctx->width); + memset(p->data[3] + i * p->linesize[3], buf[4], avctx->width); } - for (i = 4*qwidth; i < avctx->width; i++) - AV_WN32(dst + i * 4, offset_gu); - dst += p->linesize[0]; - } } else { - for (j = 0; j < avctx->height; j++) { - memset(dst, buf[1], avctx->width * planes); - dst += p->linesize[0]; + for (i = 0; i < avctx->height; i++) { + for (j = 0; j < planes; j++) + memset(p->data[j] + i * p->linesize[j], buf[1], avctx->width); } } break; case FRAME_SOLID_COLOR: if (avctx->bits_per_coded_sample == 24) { - avctx->pix_fmt = AV_PIX_FMT_RGB24; + avctx->pix_fmt = AV_PIX_FMT_GBRP; } else { - avctx->pix_fmt = AV_PIX_FMT_RGB32; - offset_gu |= 0xFFU << 24; + avctx->pix_fmt = AV_PIX_FMT_GBRAP; } if ((ret = ff_thread_get_buffer(avctx, &frame,0)) < 0) return ret; - dst = p->data[0]; - for (j = 0; j < avctx->height; j++) { - for (i = 0; i < avctx->width; i++) - if (avctx->bits_per_coded_sample == 24) { - AV_WB24(dst + i * 3, offset_gu); - } else { - AV_WN32(dst + i * 4, offset_gu); - } - dst += p->linesize[0]; + for (i = 0; i < avctx->height; i++) { + memset(p->data[0] + i * p->linesize[0], buf[2], avctx->width); + memset(p->data[1] + i * p->linesize[1], buf[1], avctx->width); + memset(p->data[2] + i * p->linesize[2], buf[3], avctx->width); + if (avctx->pix_fmt == AV_PIX_FMT_GBRAP) + memset(p->data[3] + i * p->linesize[3], 0xFFu, avctx->width); } break; case FRAME_ARITH_RGBA: - avctx->pix_fmt = AV_PIX_FMT_RGB32; + avctx->pix_fmt = AV_PIX_FMT_GBRAP; planes = 4; offset_ry += 4; offs[3] = AV_RL32(buf + 9); case FRAME_ARITH_RGB24: case FRAME_U_RGB24: if (frametype == FRAME_ARITH_RGB24 || frametype == FRAME_U_RGB24) - avctx->pix_fmt = AV_PIX_FMT_RGB24; + avctx->pix_fmt = AV_PIX_FMT_GBRP; if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0) return ret; @@ -629,15 +616,8 @@ static int lag_decode_frame(AVCodecContext *avctx, offs[1] = offset_gu; offs[2] = offset_ry; - l->rgb_stride = FFALIGN(avctx->width, 16); - av_fast_malloc(&l->rgb_planes, &l->rgb_planes_allocated, - l->rgb_stride * avctx->height * planes + 1); - if (!l->rgb_planes) { - av_log(avctx, AV_LOG_ERROR, "cannot allocate temporary buffer\n"); - return AVERROR(ENOMEM); - } for (i = 0; i < planes; i++) - srcs[i] = l->rgb_planes + (i + 1) * l->rgb_stride * avctx->height - l->rgb_stride; + srcs[i] = p->data[i] + (avctx->height - 1) * p->linesize[i]; for (i = 0; i < planes; i++) if (buf_size <= offs[i]) { av_log(avctx, AV_LOG_ERROR, @@ -648,32 +628,16 @@ static int lag_decode_frame(AVCodecContext *avctx, for (i = 0; i < planes; i++) lag_decode_arith_plane(l, srcs[i], avctx->width, avctx->height, - -l->rgb_stride, buf + offs[i], + -p->linesize[i], buf + offs[i], buf_size - offs[i]); - dst = p->data[0]; - for (i = 0; i < planes; i++) - srcs[i] = l->rgb_planes + i * l->rgb_stride * avctx->height; - for (j = 0; j < avctx->height; j++) { - for (i = 0; i < avctx->width; i++) { - uint8_t r, g, b, a; - r = srcs[0][i]; - g = srcs[1][i]; - b = srcs[2][i]; - r += g; - b += g; - if (frametype == FRAME_ARITH_RGBA) { - a = srcs[3][i]; - AV_WN32(dst + i * 4, MKBETAG(a, r, g, b)); - } else { - dst[i * 3 + 0] = r; - dst[i * 3 + 1] = g; - dst[i * 3 + 2] = b; - } - } - dst += p->linesize[0]; - for (i = 0; i < planes; i++) - srcs[i] += l->rgb_stride; + for (i = 0; i < avctx->height; i++) { + l->llviddsp.add_bytes(p->data[0] + i * p->linesize[0], p->data[1] + i * p->linesize[1], avctx->width); + l->llviddsp.add_bytes(p->data[2] + i * p->linesize[2], p->data[1] + i * p->linesize[1], avctx->width); } + FFSWAP(uint8_t*, p->data[0], p->data[1]); + FFSWAP(int, p->linesize[0], p->linesize[1]); + FFSWAP(uint8_t*, p->data[2], p->data[1]); + FFSWAP(int, p->linesize[2], p->linesize[1]); break; case FRAME_ARITH_YUY2: avctx->pix_fmt = AV_PIX_FMT_YUV422P; @@ -757,15 +721,6 @@ static av_cold int lag_decode_init_thread_copy(AVCodecContext *avctx) } #endif -static av_cold int lag_decode_end(AVCodecContext *avctx) -{ - LagarithContext *l = avctx->priv_data; - - av_freep(&l->rgb_planes); - - return 0; -} - AVCodec ff_lagarith_decoder = { .name = "lagarith", .long_name = NULL_IF_CONFIG_SMALL("Lagarith lossless"), @@ -774,7 +729,6 @@ AVCodec ff_lagarith_decoder = { .priv_data_size = sizeof(LagarithContext), .init = lag_decode_init, .init_thread_copy = ONLY_IF_THREADS_ENABLED(lag_decode_init_thread_copy), - .close = lag_decode_end, .decode = lag_decode_frame, .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS, }; diff --git a/tests/ref/fate/lagarith-red b/tests/ref/fate/lagarith-red index 0e065d60d5..7cfb792d43 100644 --- a/tests/ref/fate/lagarith-red +++ b/tests/ref/fate/lagarith-red @@ -3,28 +3,28 @@ #codec_id 0: rawvideo #dimensions 0: 320x240 #sar 0: 0/1 -0, 0, 0, 1, 230400, 0x67dfe576 -0, 1, 1, 1, 230400, 0x67dfe576 -0, 2, 2, 1, 230400, 0x67dfe576 -0, 3, 3, 1, 230400, 0x67dfe576 -0, 4, 4, 1, 230400, 0x67dfe576 -0, 5, 5, 1, 230400, 0x67dfe576 -0, 6, 6, 1, 230400, 0x67dfe576 -0, 7, 7, 1, 230400, 0x67dfe576 -0, 8, 8, 1, 230400, 0x67dfe576 -0, 9, 9, 1, 230400, 0x67dfe576 -0, 10, 10, 1, 230400, 0x67dfe576 -0, 11, 11, 1, 230400, 0x67dfe576 -0, 12, 12, 1, 230400, 0x67dfe576 -0, 13, 13, 1, 230400, 0x67dfe576 -0, 14, 14, 1, 230400, 0x67dfe576 -0, 15, 15, 1, 230400, 0x67dfe576 -0, 16, 16, 1, 230400, 0x67dfe576 -0, 17, 17, 1, 230400, 0x67dfe576 -0, 18, 18, 1, 230400, 0x67dfe576 -0, 19, 19, 1, 230400, 0x67dfe576 -0, 20, 20, 1, 230400, 0x67dfe576 -0, 21, 21, 1, 230400, 0x67dfe576 -0, 22, 22, 1, 230400, 0x67dfe576 -0, 23, 23, 1, 230400, 0x67dfe576 -0, 24, 24, 1, 230400, 0x67dfe576 +0, 0, 0, 1, 230400, 0x77f0e576 +0, 1, 1, 1, 230400, 0x77f0e576 +0, 2, 2, 1, 230400, 0x77f0e576 +0, 3, 3, 1, 230400, 0x77f0e576 +0, 4, 4, 1, 230400, 0x77f0e576 +0, 5, 5, 1, 230400, 0x77f0e576 +0, 6, 6, 1, 230400, 0x77f0e576 +0, 7, 7, 1, 230400, 0x77f0e576 +0, 8, 8, 1, 230400, 0x77f0e576 +0, 9, 9, 1, 230400, 0x77f0e576 +0, 10, 10, 1, 230400, 0x77f0e576 +0, 11, 11, 1, 230400, 0x77f0e576 +0, 12, 12, 1, 230400, 0x77f0e576 +0, 13, 13, 1, 230400, 0x77f0e576 +0, 14, 14, 1, 230400, 0x77f0e576 +0, 15, 15, 1, 230400, 0x77f0e576 +0, 16, 16, 1, 230400, 0x77f0e576 +0, 17, 17, 1, 230400, 0x77f0e576 +0, 18, 18, 1, 230400, 0x77f0e576 +0, 19, 19, 1, 230400, 0x77f0e576 +0, 20, 20, 1, 230400, 0x77f0e576 +0, 21, 21, 1, 230400, 0x77f0e576 +0, 22, 22, 1, 230400, 0x77f0e576 +0, 23, 23, 1, 230400, 0x77f0e576 +0, 24, 24, 1, 230400, 0x77f0e576 diff --git a/tests/ref/fate/lagarith-rgb24 b/tests/ref/fate/lagarith-rgb24 index 63250c6bdc..dea49e91bf 100644 --- a/tests/ref/fate/lagarith-rgb24 +++ b/tests/ref/fate/lagarith-rgb24 @@ -3,7 +3,7 @@ #codec_id 0: rawvideo #dimensions 0: 480x256 #sar 0: 0/1 -0, 0, 0, 1, 368640, 0x26f74db2 -0, 1, 1, 1, 368640, 0x63b29ea4 -0, 2, 2, 1, 368640, 0x19467f03 -0, 3, 3, 1, 368640, 0x5fdc3575 +0, 0, 0, 1, 368640, 0x18364db2 +0, 1, 1, 1, 368640, 0x60e79ea4 +0, 2, 2, 1, 368640, 0xb28a7f03 +0, 3, 3, 1, 368640, 0x2ed83575 diff --git a/tests/ref/fate/lagarith-ticket4119 b/tests/ref/fate/lagarith-ticket4119 index c46ef041e4..c1de9dce0a 100644 --- a/tests/ref/fate/lagarith-ticket4119 +++ b/tests/ref/fate/lagarith-ticket4119 @@ -4,5 +4,5 @@ #dimensions 0: 640x360 #sar 0: 0/1 0, 0, 0, 1, 691200, 0x00000000 -0, 25, 25, 1, 691200, 0xc88a6f24 -0, 50, 50, 1, 691200, 0x906d474c +0, 25, 25, 1, 691200, 0x1c4a6f24 +0, 50, 50, 1, 691200, 0x1fa0474c diff --git a/tests/ref/fate/lagarith-ticket4119-cfr b/tests/ref/fate/lagarith-ticket4119-cfr index 324fe4483f..1b689011b4 100644 --- a/tests/ref/fate/lagarith-ticket4119-cfr +++ b/tests/ref/fate/lagarith-ticket4119-cfr @@ -27,53 +27,53 @@ 0, 21, 21, 1, 691200, 0x00000000 0, 22, 22, 1, 691200, 0x00000000 0, 23, 23, 1, 691200, 0x00000000 -0, 24, 24, 1, 691200, 0xc88a6f24 -0, 25, 25, 1, 691200, 0xc88a6f24 -0, 26, 26, 1, 691200, 0xc88a6f24 -0, 27, 27, 1, 691200, 0xc88a6f24 -0, 28, 28, 1, 691200, 0xc88a6f24 -0, 29, 29, 1, 691200, 0xc88a6f24 -0, 30, 30, 1, 691200, 0xc88a6f24 -0, 31, 31, 1, 691200, 0xc88a6f24 -0, 32, 32, 1, 691200, 0xc88a6f24 -0, 33, 33, 1, 691200, 0xc88a6f24 -0, 34, 34, 1, 691200, 0xc88a6f24 -0, 35, 35, 1, 691200, 0xc88a6f24 -0, 36, 36, 1, 691200, 0xc88a6f24 -0, 37, 37, 1, 691200, 0xc88a6f24 -0, 38, 38, 1, 691200, 0xc88a6f24 -0, 39, 39, 1, 691200, 0xc88a6f24 -0, 40, 40, 1, 691200, 0xc88a6f24 -0, 41, 41, 1, 691200, 0xc88a6f24 -0, 42, 42, 1, 691200, 0xc88a6f24 -0, 43, 43, 1, 691200, 0xc88a6f24 -0, 44, 44, 1, 691200, 0xc88a6f24 -0, 45, 45, 1, 691200, 0xc88a6f24 -0, 46, 46, 1, 691200, 0xc88a6f24 -0, 47, 47, 1, 691200, 0xc88a6f24 -0, 48, 48, 1, 691200, 0xc88a6f24 -0, 49, 49, 1, 691200, 0x906d474c -0, 50, 50, 1, 691200, 0x906d474c -0, 51, 51, 1, 691200, 0x906d474c -0, 52, 52, 1, 691200, 0x906d474c -0, 53, 53, 1, 691200, 0x906d474c -0, 54, 54, 1, 691200, 0x906d474c -0, 55, 55, 1, 691200, 0x906d474c -0, 56, 56, 1, 691200, 0x906d474c -0, 57, 57, 1, 691200, 0x906d474c -0, 58, 58, 1, 691200, 0x906d474c -0, 59, 59, 1, 691200, 0x906d474c -0, 60, 60, 1, 691200, 0x906d474c -0, 61, 61, 1, 691200, 0x906d474c -0, 62, 62, 1, 691200, 0x906d474c -0, 63, 63, 1, 691200, 0x906d474c -0, 64, 64, 1, 691200, 0x906d474c -0, 65, 65, 1, 691200, 0x906d474c -0, 66, 66, 1, 691200, 0x906d474c -0, 67, 67, 1, 691200, 0x906d474c -0, 68, 68, 1, 691200, 0x906d474c -0, 69, 69, 1, 691200, 0x906d474c -0, 70, 70, 1, 691200, 0x906d474c -0, 71, 71, 1, 691200, 0x906d474c -0, 72, 72, 1, 691200, 0x906d474c -0, 73, 73, 1, 691200, 0x906d474c +0, 24, 24, 1, 691200, 0x1c4a6f24 +0, 25, 25, 1, 691200, 0x1c4a6f24 +0, 26, 26, 1, 691200, 0x1c4a6f24 +0, 27, 27, 1, 691200, 0x1c4a6f24 +0, 28, 28, 1, 691200, 0x1c4a6f24 +0, 29, 29, 1, 691200, 0x1c4a6f24 +0, 30, 30, 1, 691200, 0x1c4a6f24 +0, 31, 31, 1, 691200, 0x1c4a6f24 +0, 32, 32, 1, 691200, 0x1c4a6f24 +0, 33, 33, 1, 691200, 0x1c4a6f24 +0, 34, 34, 1, 691200, 0x1c4a6f24 +0, 35, 35, 1, 691200, 0x1c4a6f24 +0, 36, 36, 1, 691200, 0x1c4a6f24 +0, 37, 37, 1, 691200, 0x1c4a6f24 +0, 38, 38, 1, 691200, 0x1c4a6f24 +0, 39, 39, 1, 691200, 0x1c4a6f24 +0, 40, 40, 1, 691200, 0x1c4a6f24 +0, 41, 41, 1, 691200, 0x1c4a6f24 +0, 42, 42, 1, 691200, 0x1c4a6f24 +0, 43, 43, 1, 691200, 0x1c4a6f24 +0, 44, 44, 1, 691200, 0x1c4a6f24 +0, 45, 45, 1, 691200, 0x1c4a6f24 +0, 46, 46, 1, 691200, 0x1c4a6f24 +0, 47, 47, 1, 691200, 0x1c4a6f24 +0, 48, 48, 1, 691200, 0x1c4a6f24 +0, 49, 49, 1, 691200, 0x1fa0474c +0, 50, 50, 1, 691200, 0x1fa0474c +0, 51, 51, 1, 691200, 0x1fa0474c +0, 52, 52, 1, 691200, 0x1fa0474c +0, 53, 53, 1, 691200, 0x1fa0474c +0, 54, 54, 1, 691200, 0x1fa0474c +0, 55, 55, 1, 691200, 0x1fa0474c +0, 56, 56, 1, 691200, 0x1fa0474c +0, 57, 57, 1, 691200, 0x1fa0474c +0, 58, 58, 1, 691200, 0x1fa0474c +0, 59, 59, 1, 691200, 0x1fa0474c +0, 60, 60, 1, 691200, 0x1fa0474c +0, 61, 61, 1, 691200, 0x1fa0474c +0, 62, 62, 1, 691200, 0x1fa0474c +0, 63, 63, 1, 691200, 0x1fa0474c +0, 64, 64, 1, 691200, 0x1fa0474c +0, 65, 65, 1, 691200, 0x1fa0474c +0, 66, 66, 1, 691200, 0x1fa0474c +0, 67, 67, 1, 691200, 0x1fa0474c +0, 68, 68, 1, 691200, 0x1fa0474c +0, 69, 69, 1, 691200, 0x1fa0474c +0, 70, 70, 1, 691200, 0x1fa0474c +0, 71, 71, 1, 691200, 0x1fa0474c +0, 72, 72, 1, 691200, 0x1fa0474c +0, 73, 73, 1, 691200, 0x1fa0474c diff --git a/tests/ref/fate/lagarith-ticket4119-drop b/tests/ref/fate/lagarith-ticket4119-drop index abc58d5cb5..c7738aeca8 100644 --- a/tests/ref/fate/lagarith-ticket4119-drop +++ b/tests/ref/fate/lagarith-ticket4119-drop @@ -4,5 +4,5 @@ #dimensions 0: 640x360 #sar 0: 0/1 0, 0, 0, 1, 691200, 0x00000000 -0, 1, 1, 1, 691200, 0xc88a6f24 -0, 2, 2, 1, 691200, 0x906d474c +0, 1, 1, 1, 691200, 0x1c4a6f24 +0, 2, 2, 1, 691200, 0x1fa0474c diff --git a/tests/ref/fate/lagarith-ticket4119-pass b/tests/ref/fate/lagarith-ticket4119-pass index c46ef041e4..c1de9dce0a 100644 --- a/tests/ref/fate/lagarith-ticket4119-pass +++ b/tests/ref/fate/lagarith-ticket4119-pass @@ -4,5 +4,5 @@ #dimensions 0: 640x360 #sar 0: 0/1 0, 0, 0, 1, 691200, 0x00000000 -0, 25, 25, 1, 691200, 0xc88a6f24 -0, 50, 50, 1, 691200, 0x906d474c +0, 25, 25, 1, 691200, 0x1c4a6f24 +0, 50, 50, 1, 691200, 0x1fa0474c diff --git a/tests/ref/fate/lagarith-ticket4119-vfr b/tests/ref/fate/lagarith-ticket4119-vfr index c46ef041e4..c1de9dce0a 100644 --- a/tests/ref/fate/lagarith-ticket4119-vfr +++ b/tests/ref/fate/lagarith-ticket4119-vfr @@ -4,5 +4,5 @@ #dimensions 0: 640x360 #sar 0: 0/1 0, 0, 0, 1, 691200, 0x00000000 -0, 25, 25, 1, 691200, 0xc88a6f24 -0, 50, 50, 1, 691200, 0x906d474c +0, 25, 25, 1, 691200, 0x1c4a6f24 +0, 50, 50, 1, 691200, 0x1fa0474c