From 4ed7c2bbc3d04d5410433fd7038f076538e4a944 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Mon, 26 Jun 2017 21:31:12 +0200 Subject: [PATCH] avcodec/utvideodec: add SIMD for restore_rgb_planes Signed-off-by: Paul B Mahol --- libavcodec/Makefile | 2 +- libavcodec/utvideo.h | 2 + libavcodec/utvideodec.c | 53 +++------------- libavcodec/utvideodsp.c | 82 ++++++++++++++++++++++++ libavcodec/utvideodsp.h | 39 ++++++++++++ libavcodec/x86/Makefile | 2 + libavcodec/x86/utvideodsp.asm | 103 +++++++++++++++++++++++++++++++ libavcodec/x86/utvideodsp_init.c | 43 +++++++++++++ 8 files changed, 279 insertions(+), 47 deletions(-) create mode 100644 libavcodec/utvideodsp.c create mode 100644 libavcodec/utvideodsp.h create mode 100644 libavcodec/x86/utvideodsp.asm create mode 100644 libavcodec/x86/utvideodsp_init.c diff --git a/libavcodec/Makefile b/libavcodec/Makefile index f0cba8843d..b440a00746 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -583,7 +583,7 @@ OBJS-$(CONFIG_TTA_ENCODER) += ttaenc.o ttaencdsp.o ttadata.o OBJS-$(CONFIG_TWINVQ_DECODER) += twinvqdec.o twinvq.o OBJS-$(CONFIG_TXD_DECODER) += txd.o OBJS-$(CONFIG_ULTI_DECODER) += ulti.o -OBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodec.o utvideo.o +OBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodec.o utvideo.o utvideodsp.o OBJS-$(CONFIG_UTVIDEO_ENCODER) += utvideoenc.o utvideo.o OBJS-$(CONFIG_V210_DECODER) += v210dec.o OBJS-$(CONFIG_V210_ENCODER) += v210enc.o diff --git a/libavcodec/utvideo.h b/libavcodec/utvideo.h index 9559c831fe..a8117851a7 100644 --- a/libavcodec/utvideo.h +++ b/libavcodec/utvideo.h @@ -30,6 +30,7 @@ #include "libavutil/common.h" #include "avcodec.h" #include "bswapdsp.h" +#include "utvideodsp.h" #include "lossless_videodsp.h" #include "lossless_videoencdsp.h" @@ -66,6 +67,7 @@ extern const int ff_ut_pred_order[5]; typedef struct UtvideoContext { const AVClass *class; AVCodecContext *avctx; + UTVideoDSPContext utdsp; BswapDSPContext bdsp; LLVidDSPContext llviddsp; LLVidEncDSPContext llvidencdsp; diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c index 0c6f89e83a..44841aaa65 100644 --- a/libavcodec/utvideodec.c +++ b/libavcodec/utvideodec.c @@ -333,50 +333,6 @@ fail: return AVERROR_INVALIDDATA; } -static void restore_rgb_planes(AVFrame *frame, int width, int height) -{ - uint8_t *src_r = (uint8_t *)frame->data[2]; - uint8_t *src_g = (uint8_t *)frame->data[0]; - uint8_t *src_b = (uint8_t *)frame->data[1]; - uint8_t r, g, b; - int i, j; - - for (j = 0; j < height; j++) { - for (i = 0; i < width; i++) { - r = src_r[i]; - g = src_g[i]; - b = src_b[i]; - src_r[i] = r + g - 0x80; - src_b[i] = b + g - 0x80; - } - src_r += frame->linesize[2]; - src_g += frame->linesize[0]; - src_b += frame->linesize[1]; - } -} - -static void restore_rgb_planes10(AVFrame *frame, int width, int height) -{ - uint16_t *src_r = (uint16_t *)frame->data[2]; - uint16_t *src_g = (uint16_t *)frame->data[0]; - uint16_t *src_b = (uint16_t *)frame->data[1]; - int r, g, b; - int i, j; - - for (j = 0; j < height; j++) { - for (i = 0; i < width; i++) { - r = src_r[i]; - g = src_g[i]; - b = src_b[i]; - src_r[i] = (r + g - 0x200) & 0x3FF; - src_b[i] = (b + g - 0x200) & 0x3FF; - } - src_r += frame->linesize[2] / 2; - src_g += frame->linesize[0] / 2; - src_b += frame->linesize[1] / 2; - } -} - #undef A #undef B #undef C @@ -696,7 +652,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, } } } - restore_rgb_planes(frame.f, avctx->width, avctx->height); + c->utdsp.restore_rgb_planes(frame.f->data[2], frame.f->data[0], frame.f->data[1], + frame.f->linesize[2], frame.f->linesize[0], frame.f->linesize[1], + avctx->width, avctx->height); break; case AV_PIX_FMT_GBRAP10: case AV_PIX_FMT_GBRP10: @@ -709,7 +667,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, if (ret) return ret; } - restore_rgb_planes10(frame.f, avctx->width, avctx->height); + c->utdsp.restore_rgb_planes10((uint16_t *)frame.f->data[2], (uint16_t *)frame.f->data[0], (uint16_t *)frame.f->data[1], + frame.f->linesize[2] / 2, frame.f->linesize[0] / 2, frame.f->linesize[1] / 2, + avctx->width, avctx->height); break; case AV_PIX_FMT_YUV420P: for (i = 0; i < 3; i++) { @@ -830,6 +790,7 @@ static av_cold int decode_init(AVCodecContext *avctx) c->avctx = avctx; + ff_utvideodsp_init(&c->utdsp); ff_bswapdsp_init(&c->bdsp); ff_llviddsp_init(&c->llviddsp); diff --git a/libavcodec/utvideodsp.c b/libavcodec/utvideodsp.c new file mode 100644 index 0000000000..0831a6b97b --- /dev/null +++ b/libavcodec/utvideodsp.c @@ -0,0 +1,82 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "config.h" +#include "libavutil/attributes.h" +#include "utvideodsp.h" + +static void restore_rgb_planes_c(uint8_t *src_r, + uint8_t *src_g, + uint8_t *src_b, + ptrdiff_t linesize_r, + ptrdiff_t linesize_g, + ptrdiff_t linesize_b, + int width, int height) +{ + uint8_t r, g, b; + int i, j; + + for (j = 0; j < height; j++) { + for (i = 0; i < width; i++) { + r = src_r[i]; + g = src_g[i]; + b = src_b[i]; + src_r[i] = r + g - 0x80; + src_b[i] = b + g - 0x80; + } + src_r += linesize_r; + src_g += linesize_g; + src_b += linesize_b; + } +} + +static void restore_rgb_planes10_c(uint16_t *src_r, + uint16_t *src_g, + uint16_t *src_b, + ptrdiff_t linesize_r, + ptrdiff_t linesize_g, + ptrdiff_t linesize_b, + int width, int height) +{ + int r, g, b; + int i, j; + + for (j = 0; j < height; j++) { + for (i = 0; i < width; i++) { + r = src_r[i]; + g = src_g[i]; + b = src_b[i]; + src_r[i] = (r + g - 0x200) & 0x3FF; + src_b[i] = (b + g - 0x200) & 0x3FF; + } + src_r += linesize_r; + src_g += linesize_g; + src_b += linesize_b; + } +} + +av_cold void ff_utvideodsp_init(UTVideoDSPContext *c) +{ + c->restore_rgb_planes = restore_rgb_planes_c; + c->restore_rgb_planes10 = restore_rgb_planes10_c; + + if (ARCH_X86) + ff_utvideodsp_init_x86(c); +} diff --git a/libavcodec/utvideodsp.h b/libavcodec/utvideodsp.h new file mode 100644 index 0000000000..a3d2550dce --- /dev/null +++ b/libavcodec/utvideodsp.h @@ -0,0 +1,39 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_UTVIDEODSP_H +#define AVCODEC_UTVIDEODSP_H + +#include +#include +#include "libavutil/pixfmt.h" +#include "config.h" + +typedef struct UTVideoDSPContext { + void (*restore_rgb_planes)(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b, + ptrdiff_t linesize_r, ptrdiff_t linesize_g, + ptrdiff_t linesize_b, int width, int height); + void (*restore_rgb_planes10)(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b, + ptrdiff_t linesize_r, ptrdiff_t linesize_g, + ptrdiff_t linesize_b, int width, int height); +} UTVideoDSPContext; + +void ff_utvideodsp_init(UTVideoDSPContext *c); +void ff_utvideodsp_init_x86(UTVideoDSPContext *c); + +#endif /* AVCODEC_UTVIDEODSP_H */ diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index b86700b675..0dbc46504e 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -65,6 +65,7 @@ OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp_init.o +OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp_init.o OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o @@ -171,6 +172,7 @@ X86ASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o X86ASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o X86ASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o X86ASM-OBJS-$(CONFIG_TTA_ENCODER) += x86/ttaencdsp.o +X86ASM-OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp.o X86ASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o X86ASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o X86ASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o diff --git a/libavcodec/x86/utvideodsp.asm b/libavcodec/x86/utvideodsp.asm new file mode 100644 index 0000000000..a74d6e9ec1 --- /dev/null +++ b/libavcodec/x86/utvideodsp.asm @@ -0,0 +1,103 @@ +;****************************************************************************** +;* SIMD-optimized UTVideo functions +;* Copyright (c) 2017 Paul B Mahol +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +%if ARCH_X86_64 + +SECTION_RODATA + +pb_128: times 16 db 128 +pw_512: times 8 dw 512 +pw_1023: times 8 dw 1023 + +SECTION .text + +INIT_XMM sse2 + +; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b, +; ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b, +; int width, int height) +cglobal restore_rgb_planes, 8,9,4, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x + movsxdifnidn wq, wd + add src_rq, wq + add src_gq, wq + add src_bq, wq + neg wq + mova m3, [pb_128] +.nextrow: + mov xq, wq + + .loop: + mova m0, [src_rq + xq] + mova m1, [src_gq + xq] + mova m2, [src_bq + xq] + psubb m1, m3 + paddb m0, m1 + paddb m2, m1 + mova [src_rq+xq], m0 + mova [src_bq+xq], m2 + add xq, mmsize + jl .loop + + add src_rq, linesize_rq + add src_gq, linesize_gq + add src_bq, linesize_bq + sub hd, 1 + jg .nextrow + REP_RET + +cglobal restore_rgb_planes10, 8,9,5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x + shl wd, 1 + shl linesize_rq, 1 + shl linesize_gq, 1 + shl linesize_bq, 1 + add src_rq, wq + add src_gq, wq + add src_bq, wq + mova m3, [pw_512] + mova m4, [pw_1023] + neg wq +.nextrow: + mov xq, wq + + .loop: + mova m0, [src_rq + xq] + mova m1, [src_gq + xq] + mova m2, [src_bq + xq] + psubw m1, m3 + paddw m0, m1 + paddw m2, m1 + pand m0, m4 + pand m2, m4 + mova [src_rq+xq], m0 + mova [src_bq+xq], m2 + add xq, mmsize + jl .loop + + add src_rq, linesize_rq + add src_gq, linesize_gq + add src_bq, linesize_bq + sub hd, 1 + jg .nextrow + REP_RET + +%endif diff --git a/libavcodec/x86/utvideodsp_init.c b/libavcodec/x86/utvideodsp_init.c new file mode 100644 index 0000000000..d4156926bd --- /dev/null +++ b/libavcodec/x86/utvideodsp_init.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/utvideodsp.h" + +void ff_restore_rgb_planes_sse2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b, + ptrdiff_t linesize_r, ptrdiff_t linesize_g, + ptrdiff_t linesize_b, int width, int height); +void ff_restore_rgb_planes10_sse2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b, + ptrdiff_t linesize_r, ptrdiff_t linesize_g, + ptrdiff_t linesize_b, int width, int height); + +av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c) +{ + int cpu_flags = av_get_cpu_flags(); + + if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) { + c->restore_rgb_planes = ff_restore_rgb_planes_sse2; + c->restore_rgb_planes10 = ff_restore_rgb_planes10_sse2; + } +}