1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-11-26 19:01:44 +02:00

libavcodec/exr : add x86 SIMD for predictor

Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
Martin Vignali 2017-10-01 21:37:15 +02:00 committed by James Almer
parent 59924d5eb1
commit ac5908b13f
6 changed files with 109 additions and 15 deletions

View File

@ -265,18 +265,6 @@ static inline uint16_t exr_halflt2uint(uint16_t v)
return (v + (1 << 16)) >> (exp + 1);
}
static void predictor(uint8_t *src, int size)
{
uint8_t *t = src + 1;
uint8_t *stop = src + size;
while (t < stop) {
int d = (int) t[-1] + (int) t[0] - 128;
t[0] = d;
++t;
}
}
static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size,
int uncompressed_size, EXRThreadData *td)
{
@ -288,7 +276,7 @@ static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size
av_assert1(uncompressed_size % 2 == 0);
predictor(td->tmp, uncompressed_size);
s->dsp.predictor(td->tmp, uncompressed_size);
s->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size);
return 0;
@ -335,7 +323,7 @@ static int rle_uncompress(EXRContext *ctx, const uint8_t *src, int compressed_si
av_assert1(uncompressed_size % 2 == 0);
predictor(td->tmp, uncompressed_size);
ctx->dsp.predictor(td->tmp, uncompressed_size);
ctx->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size);
return 0;

View File

@ -38,9 +38,18 @@ static void reorder_pixels_scalar(uint8_t *dst, const uint8_t *src, ptrdiff_t si
}
}
static void predictor_scalar(uint8_t *src, ptrdiff_t size)
{
ptrdiff_t i;
for (i = 1; i < size; i++)
src[i] += src[i-1] - 128;
}
av_cold void ff_exrdsp_init(ExrDSPContext *c)
{
c->reorder_pixels = reorder_pixels_scalar;
c->predictor = predictor_scalar;
if (ARCH_X86)
ff_exrdsp_init_x86(c);

View File

@ -24,6 +24,7 @@
typedef struct ExrDSPContext {
void (*reorder_pixels)(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
void (*predictor)(uint8_t *src, ptrdiff_t size);
} ExrDSPContext;
void ff_exrdsp_init(ExrDSPContext *c);

View File

@ -2,9 +2,11 @@
;* X86 Optimized functions for Open Exr Decoder
;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC
;*
;* reorder_pixels based on patch by John Loy
;* reorder_pixels, predictor based on patch by John Loy
;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema
;*
;* predictor AVX/AVX2 by Henrik Gramner
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
@ -24,6 +26,9 @@
%include "libavutil/x86/x86util.asm"
cextern pb_15
cextern pb_80
SECTION .text
;------------------------------------------------------------------------------
@ -60,3 +65,58 @@ REORDER_PIXELS
INIT_YMM avx2
REORDER_PIXELS
%endif
;------------------------------------------------------------------------------
; void ff_predictor(uint8_t *src, ptrdiff_t size);
;------------------------------------------------------------------------------
%macro PREDICTOR 0
cglobal predictor, 2,2,5, src, size
%if mmsize == 32
vbroadcasti128 m0, [pb_80]
%else
mova xm0, [pb_80]
%endif
mova xm1, [pb_15]
mova xm2, xm0
add srcq, sizeq
neg sizeq
.loop:
pxor m3, m0, [srcq + sizeq]
pslldq m4, m3, 1
paddb m3, m4
pslldq m4, m3, 2
paddb m3, m4
pslldq m4, m3, 4
paddb m3, m4
pslldq m4, m3, 8
%if mmsize == 32
paddb m3, m4
paddb xm2, xm3
vextracti128 xm4, m3, 1
mova [srcq + sizeq], xm2
pshufb xm2, xm1
paddb xm2, xm4
mova [srcq + sizeq + 16], xm2
%else
paddb m2, m3
paddb m2, m4
mova [srcq + sizeq], m2
%endif
pshufb xm2, xm1
add sizeq, mmsize
jl .loop
RET
%endmacro
INIT_XMM ssse3
PREDICTOR
INIT_XMM avx
PREDICTOR
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
PREDICTOR
%endif

View File

@ -26,6 +26,12 @@ void ff_reorder_pixels_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
void ff_reorder_pixels_avx2(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
void ff_predictor_ssse3(uint8_t *src, ptrdiff_t size);
void ff_predictor_avx(uint8_t *src, ptrdiff_t size);
void ff_predictor_avx2(uint8_t *src, ptrdiff_t size);
av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
{
int cpu_flags = av_get_cpu_flags();
@ -33,7 +39,14 @@ av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
if (EXTERNAL_SSE2(cpu_flags)) {
dsp->reorder_pixels = ff_reorder_pixels_sse2;
}
if (EXTERNAL_SSSE3(cpu_flags)) {
dsp->predictor = ff_predictor_ssse3;
}
if (EXTERNAL_AVX(cpu_flags)) {
dsp->predictor = ff_predictor_avx;
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
dsp->reorder_pixels = ff_reorder_pixels_avx2;
dsp->predictor = ff_predictor_avx2;
}
}

View File

@ -55,6 +55,24 @@ static void check_reorder_pixels(void) {
bench_new(dst_new, src, BUF_SIZE);
}
static void check_predictor(void) {
LOCAL_ALIGNED_32(uint8_t, src, [PADDED_BUF_SIZE]);
LOCAL_ALIGNED_32(uint8_t, dst_ref, [PADDED_BUF_SIZE]);
LOCAL_ALIGNED_32(uint8_t, dst_new, [PADDED_BUF_SIZE]);
declare_func(void, uint8_t *src, ptrdiff_t size);
memset(src, 0, PADDED_BUF_SIZE);
randomize_buffers();
memcpy(dst_ref, src, PADDED_BUF_SIZE);
memcpy(dst_new, src, PADDED_BUF_SIZE);
call_ref(dst_ref, BUF_SIZE);
call_new(dst_new, BUF_SIZE);
if (memcmp(dst_ref, dst_new, BUF_SIZE))
fail();
bench_new(dst_new, BUF_SIZE);
}
void checkasm_check_exrdsp(void)
{
ExrDSPContext h;
@ -65,4 +83,9 @@ void checkasm_check_exrdsp(void)
check_reorder_pixels();
report("reorder_pixels");
if (check_func(h.predictor, "predictor"))
check_predictor();
report("predictor");
}