From 8f9c38b19629838066def1207703cfcdc19fcbc9 Mon Sep 17 00:00:00 2001 From: Martin Vignali Date: Sun, 14 Jan 2018 14:23:05 +0100 Subject: [PATCH] avcodec/utvideoenc : add SIMD (avx) for sub_left_prediction asm code by Henrik Gramner --- libavcodec/lossless_videoencdsp.c | 15 ++++++++ libavcodec/lossless_videoencdsp.h | 5 +++ libavcodec/utvideoenc.c | 20 +--------- libavcodec/x86/lossless_videoencdsp.asm | 43 ++++++++++++++++++++++ libavcodec/x86/lossless_videoencdsp_init.c | 7 ++++ 5 files changed, 71 insertions(+), 19 deletions(-) diff --git a/libavcodec/lossless_videoencdsp.c b/libavcodec/lossless_videoencdsp.c index 5cc4934c0e..ed70329628 100644 --- a/libavcodec/lossless_videoencdsp.c +++ b/libavcodec/lossless_videoencdsp.c @@ -74,10 +74,25 @@ static void sub_median_pred_c(uint8_t *dst, const uint8_t *src1, *left_top = lt; } +static void sub_left_predict_c(uint8_t *dst, uint8_t *src, + ptrdiff_t stride, ptrdiff_t width, int height) +{ + int i, j; + uint8_t prev = 0x80; /* Set the initial value */ + for (j = 0; j < height; j++) { + for (i = 0; i < width; i++) { + *dst++ = src[i] - prev; + prev = src[i]; + } + src += stride; + } +} + av_cold void ff_llvidencdsp_init(LLVidEncDSPContext *c) { c->diff_bytes = diff_bytes_c; c->sub_median_pred = sub_median_pred_c; + c->sub_left_predict = sub_left_predict_c; if (ARCH_X86) ff_llvidencdsp_init_x86(c); diff --git a/libavcodec/lossless_videoencdsp.h b/libavcodec/lossless_videoencdsp.h index 3d645b159a..faa6c32551 100644 --- a/libavcodec/lossless_videoencdsp.h +++ b/libavcodec/lossless_videoencdsp.h @@ -21,6 +21,8 @@ #include +#include "avcodec.h" + typedef struct LLVidEncDSPContext { void (*diff_bytes)(uint8_t *dst /* align 16 */, const uint8_t *src1 /* align 16 */, @@ -33,6 +35,9 @@ typedef struct LLVidEncDSPContext { void (*sub_median_pred)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w, int *left, int *left_top); + + void (*sub_left_predict)(uint8_t *dst, uint8_t *src, + ptrdiff_t stride, ptrdiff_t width, int height); } LLVidEncDSPContext; void ff_llvidencdsp_init(LLVidEncDSPContext *c); diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c index a829b7aaac..db00e1eff5 100644 --- a/libavcodec/utvideoenc.c +++ b/libavcodec/utvideoenc.c @@ -283,23 +283,6 @@ static void mangle_rgb_planes(uint8_t *dst[4], ptrdiff_t dst_stride, } } -/* Write data to a plane with left prediction */ -static void left_predict(uint8_t *src, uint8_t *dst, ptrdiff_t stride, - int width, int height) -{ - int i, j; - uint8_t prev; - - prev = 0x80; /* Set the initial value */ - for (j = 0; j < height; j++) { - for (i = 0; i < width; i++) { - *dst++ = src[i] - prev; - prev = src[i]; - } - src += stride; - } -} - #undef A #undef B @@ -436,8 +419,7 @@ static int encode_plane(AVCodecContext *avctx, uint8_t *src, for (i = 0; i < c->slices; i++) { sstart = send; send = height * (i + 1) / c->slices & cmask; - left_predict(src + sstart * stride, dst + sstart * width, - stride, width, send - sstart); + c->llvidencdsp.sub_left_predict(dst + sstart * width, src + sstart * stride, stride, width, send - sstart); } break; case PRED_MEDIAN: diff --git a/libavcodec/x86/lossless_videoencdsp.asm b/libavcodec/x86/lossless_videoencdsp.asm index 4d79eee36b..fb1204f0f1 100644 --- a/libavcodec/x86/lossless_videoencdsp.asm +++ b/libavcodec/x86/lossless_videoencdsp.asm @@ -25,6 +25,8 @@ %include "libavutil/x86/x86util.asm" +cextern pb_80 + SECTION .text ; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, @@ -149,3 +151,44 @@ DIFF_BYTES_PROLOGUE DIFF_BYTES_BODY u, u %undef i %endif + + +;-------------------------------------------------------------------------------------------------- +;void sub_left_predict(uint8_t *dst, uint8_t *src, ptrdiff_t stride, ptrdiff_t width, int height) +;-------------------------------------------------------------------------------------------------- + +INIT_XMM avx +cglobal sub_left_predict, 5,6,5, dst, src, stride, width, height, x + mova m1, [pb_80] ; prev initial + add dstq, widthq + add srcq, widthq + lea xd, [widthq-1] + neg widthq + and xd, 15 + pinsrb m4, m1, xd, 15 + mov xq, widthq + + .loop: + movu m0, [srcq + widthq] + palignr m2, m0, m1, 15 + movu m1, [srcq + widthq + 16] + palignr m3, m1, m0, 15 + psubb m2, m0, m2 + psubb m3, m1, m3 + movu [dstq + widthq], m2 + movu [dstq + widthq + 16], m3 + add widthq, 2 * 16 + jl .loop + + add srcq, strideq + sub dstq, xq ; dst + width + test xd, 16 + jz .mod32 + mova m1, m0 + +.mod32: + pshufb m1, m4 + mov widthq, xq + dec heightd + jg .loop + RET diff --git a/libavcodec/x86/lossless_videoencdsp_init.c b/libavcodec/x86/lossless_videoencdsp_init.c index fc728c9fd1..40407add52 100644 --- a/libavcodec/x86/lossless_videoencdsp_init.c +++ b/libavcodec/x86/lossless_videoencdsp_init.c @@ -36,6 +36,9 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w); +void ff_sub_left_predict_avx(uint8_t *dst, uint8_t *src, + ptrdiff_t stride, ptrdiff_t width, int height); + #if HAVE_INLINE_ASM static void sub_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, @@ -98,6 +101,10 @@ av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c) c->diff_bytes = ff_diff_bytes_sse2; } + if (EXTERNAL_AVX(cpu_flags)) { + c->sub_left_predict = ff_sub_left_predict_avx; + } + if (EXTERNAL_AVX2_FAST(cpu_flags)) { c->diff_bytes = ff_diff_bytes_avx2; }