1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

avcodec/utvideoenc : add SIMD (avx) for sub_left_prediction

asm code by Henrik Gramner
This commit is contained in:
Martin Vignali 2018-01-14 14:23:05 +01:00
parent 3a230ce5fa
commit 8f9c38b196
5 changed files with 71 additions and 19 deletions

View File

@ -74,10 +74,25 @@ static void sub_median_pred_c(uint8_t *dst, const uint8_t *src1,
*left_top = lt;
}
static void sub_left_predict_c(uint8_t *dst, uint8_t *src,
ptrdiff_t stride, ptrdiff_t width, int height)
{
int i, j;
uint8_t prev = 0x80; /* Set the initial value */
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
*dst++ = src[i] - prev;
prev = src[i];
}
src += stride;
}
}
av_cold void ff_llvidencdsp_init(LLVidEncDSPContext *c)
{
c->diff_bytes = diff_bytes_c;
c->sub_median_pred = sub_median_pred_c;
c->sub_left_predict = sub_left_predict_c;
if (ARCH_X86)
ff_llvidencdsp_init_x86(c);

View File

@ -21,6 +21,8 @@
#include <stdint.h>
#include "avcodec.h"
typedef struct LLVidEncDSPContext {
void (*diff_bytes)(uint8_t *dst /* align 16 */,
const uint8_t *src1 /* align 16 */,
@ -33,6 +35,9 @@ typedef struct LLVidEncDSPContext {
void (*sub_median_pred)(uint8_t *dst, const uint8_t *src1,
const uint8_t *src2, intptr_t w,
int *left, int *left_top);
void (*sub_left_predict)(uint8_t *dst, uint8_t *src,
ptrdiff_t stride, ptrdiff_t width, int height);
} LLVidEncDSPContext;
void ff_llvidencdsp_init(LLVidEncDSPContext *c);

View File

@ -283,23 +283,6 @@ static void mangle_rgb_planes(uint8_t *dst[4], ptrdiff_t dst_stride,
}
}
/* Write data to a plane with left prediction */
static void left_predict(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
int width, int height)
{
int i, j;
uint8_t prev;
prev = 0x80; /* Set the initial value */
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
*dst++ = src[i] - prev;
prev = src[i];
}
src += stride;
}
}
#undef A
#undef B
@ -436,8 +419,7 @@ static int encode_plane(AVCodecContext *avctx, uint8_t *src,
for (i = 0; i < c->slices; i++) {
sstart = send;
send = height * (i + 1) / c->slices & cmask;
left_predict(src + sstart * stride, dst + sstart * width,
stride, width, send - sstart);
c->llvidencdsp.sub_left_predict(dst + sstart * width, src + sstart * stride, stride, width, send - sstart);
}
break;
case PRED_MEDIAN:

View File

@ -25,6 +25,8 @@
%include "libavutil/x86/x86util.asm"
cextern pb_80
SECTION .text
; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
@ -149,3 +151,44 @@ DIFF_BYTES_PROLOGUE
DIFF_BYTES_BODY u, u
%undef i
%endif
;--------------------------------------------------------------------------------------------------
;void sub_left_predict(uint8_t *dst, uint8_t *src, ptrdiff_t stride, ptrdiff_t width, int height)
;--------------------------------------------------------------------------------------------------
INIT_XMM avx
cglobal sub_left_predict, 5,6,5, dst, src, stride, width, height, x
mova m1, [pb_80] ; prev initial
add dstq, widthq
add srcq, widthq
lea xd, [widthq-1]
neg widthq
and xd, 15
pinsrb m4, m1, xd, 15
mov xq, widthq
.loop:
movu m0, [srcq + widthq]
palignr m2, m0, m1, 15
movu m1, [srcq + widthq + 16]
palignr m3, m1, m0, 15
psubb m2, m0, m2
psubb m3, m1, m3
movu [dstq + widthq], m2
movu [dstq + widthq + 16], m3
add widthq, 2 * 16
jl .loop
add srcq, strideq
sub dstq, xq ; dst + width
test xd, 16
jz .mod32
mova m1, m0
.mod32:
pshufb m1, m4
mov widthq, xq
dec heightd
jg .loop
RET

View File

@ -36,6 +36,9 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
intptr_t w);
void ff_sub_left_predict_avx(uint8_t *dst, uint8_t *src,
ptrdiff_t stride, ptrdiff_t width, int height);
#if HAVE_INLINE_ASM
static void sub_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
@ -98,6 +101,10 @@ av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c)
c->diff_bytes = ff_diff_bytes_sse2;
}
if (EXTERNAL_AVX(cpu_flags)) {
c->sub_left_predict = ff_sub_left_predict_avx;
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
c->diff_bytes = ff_diff_bytes_avx2;
}