mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
libavcodec/exr : add x86 SIMD for predictor
Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
59924d5eb1
commit
ac5908b13f
@ -265,18 +265,6 @@ static inline uint16_t exr_halflt2uint(uint16_t v)
|
||||
return (v + (1 << 16)) >> (exp + 1);
|
||||
}
|
||||
|
||||
static void predictor(uint8_t *src, int size)
|
||||
{
|
||||
uint8_t *t = src + 1;
|
||||
uint8_t *stop = src + size;
|
||||
|
||||
while (t < stop) {
|
||||
int d = (int) t[-1] + (int) t[0] - 128;
|
||||
t[0] = d;
|
||||
++t;
|
||||
}
|
||||
}
|
||||
|
||||
static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size,
|
||||
int uncompressed_size, EXRThreadData *td)
|
||||
{
|
||||
@ -288,7 +276,7 @@ static int zip_uncompress(EXRContext *s, const uint8_t *src, int compressed_size
|
||||
|
||||
av_assert1(uncompressed_size % 2 == 0);
|
||||
|
||||
predictor(td->tmp, uncompressed_size);
|
||||
s->dsp.predictor(td->tmp, uncompressed_size);
|
||||
s->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size);
|
||||
|
||||
return 0;
|
||||
@ -335,7 +323,7 @@ static int rle_uncompress(EXRContext *ctx, const uint8_t *src, int compressed_si
|
||||
|
||||
av_assert1(uncompressed_size % 2 == 0);
|
||||
|
||||
predictor(td->tmp, uncompressed_size);
|
||||
ctx->dsp.predictor(td->tmp, uncompressed_size);
|
||||
ctx->dsp.reorder_pixels(td->uncompressed_data, td->tmp, uncompressed_size);
|
||||
|
||||
return 0;
|
||||
|
@ -38,9 +38,18 @@ static void reorder_pixels_scalar(uint8_t *dst, const uint8_t *src, ptrdiff_t si
|
||||
}
|
||||
}
|
||||
|
||||
static void predictor_scalar(uint8_t *src, ptrdiff_t size)
|
||||
{
|
||||
ptrdiff_t i;
|
||||
|
||||
for (i = 1; i < size; i++)
|
||||
src[i] += src[i-1] - 128;
|
||||
}
|
||||
|
||||
av_cold void ff_exrdsp_init(ExrDSPContext *c)
|
||||
{
|
||||
c->reorder_pixels = reorder_pixels_scalar;
|
||||
c->predictor = predictor_scalar;
|
||||
|
||||
if (ARCH_X86)
|
||||
ff_exrdsp_init_x86(c);
|
||||
|
@ -24,6 +24,7 @@
|
||||
|
||||
typedef struct ExrDSPContext {
|
||||
void (*reorder_pixels)(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
|
||||
void (*predictor)(uint8_t *src, ptrdiff_t size);
|
||||
} ExrDSPContext;
|
||||
|
||||
void ff_exrdsp_init(ExrDSPContext *c);
|
||||
|
@ -2,9 +2,11 @@
|
||||
;* X86 Optimized functions for Open Exr Decoder
|
||||
;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC
|
||||
;*
|
||||
;* reorder_pixels based on patch by John Loy
|
||||
;* reorder_pixels, predictor based on patch by John Loy
|
||||
;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema
|
||||
;*
|
||||
;* predictor AVX/AVX2 by Henrik Gramner
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
@ -24,6 +26,9 @@
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
cextern pb_15
|
||||
cextern pb_80
|
||||
|
||||
SECTION .text
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
@ -60,3 +65,58 @@ REORDER_PIXELS
|
||||
INIT_YMM avx2
|
||||
REORDER_PIXELS
|
||||
%endif
|
||||
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_predictor(uint8_t *src, ptrdiff_t size);
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
%macro PREDICTOR 0
|
||||
cglobal predictor, 2,2,5, src, size
|
||||
%if mmsize == 32
|
||||
vbroadcasti128 m0, [pb_80]
|
||||
%else
|
||||
mova xm0, [pb_80]
|
||||
%endif
|
||||
mova xm1, [pb_15]
|
||||
mova xm2, xm0
|
||||
add srcq, sizeq
|
||||
neg sizeq
|
||||
.loop:
|
||||
pxor m3, m0, [srcq + sizeq]
|
||||
pslldq m4, m3, 1
|
||||
paddb m3, m4
|
||||
pslldq m4, m3, 2
|
||||
paddb m3, m4
|
||||
pslldq m4, m3, 4
|
||||
paddb m3, m4
|
||||
pslldq m4, m3, 8
|
||||
%if mmsize == 32
|
||||
paddb m3, m4
|
||||
paddb xm2, xm3
|
||||
vextracti128 xm4, m3, 1
|
||||
mova [srcq + sizeq], xm2
|
||||
pshufb xm2, xm1
|
||||
paddb xm2, xm4
|
||||
mova [srcq + sizeq + 16], xm2
|
||||
%else
|
||||
paddb m2, m3
|
||||
paddb m2, m4
|
||||
mova [srcq + sizeq], m2
|
||||
%endif
|
||||
pshufb xm2, xm1
|
||||
add sizeq, mmsize
|
||||
jl .loop
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM ssse3
|
||||
PREDICTOR
|
||||
|
||||
INIT_XMM avx
|
||||
PREDICTOR
|
||||
|
||||
%if HAVE_AVX2_EXTERNAL
|
||||
INIT_YMM avx2
|
||||
PREDICTOR
|
||||
%endif
|
||||
|
@ -26,6 +26,12 @@ void ff_reorder_pixels_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
|
||||
|
||||
void ff_reorder_pixels_avx2(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
|
||||
|
||||
void ff_predictor_ssse3(uint8_t *src, ptrdiff_t size);
|
||||
|
||||
void ff_predictor_avx(uint8_t *src, ptrdiff_t size);
|
||||
|
||||
void ff_predictor_avx2(uint8_t *src, ptrdiff_t size);
|
||||
|
||||
av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
@ -33,7 +39,14 @@ av_cold void ff_exrdsp_init_x86(ExrDSPContext *dsp)
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
dsp->reorder_pixels = ff_reorder_pixels_sse2;
|
||||
}
|
||||
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||
dsp->predictor = ff_predictor_ssse3;
|
||||
}
|
||||
if (EXTERNAL_AVX(cpu_flags)) {
|
||||
dsp->predictor = ff_predictor_avx;
|
||||
}
|
||||
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
|
||||
dsp->reorder_pixels = ff_reorder_pixels_avx2;
|
||||
dsp->predictor = ff_predictor_avx2;
|
||||
}
|
||||
}
|
||||
|
@ -55,6 +55,24 @@ static void check_reorder_pixels(void) {
|
||||
bench_new(dst_new, src, BUF_SIZE);
|
||||
}
|
||||
|
||||
static void check_predictor(void) {
|
||||
LOCAL_ALIGNED_32(uint8_t, src, [PADDED_BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_ref, [PADDED_BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst_new, [PADDED_BUF_SIZE]);
|
||||
|
||||
declare_func(void, uint8_t *src, ptrdiff_t size);
|
||||
|
||||
memset(src, 0, PADDED_BUF_SIZE);
|
||||
randomize_buffers();
|
||||
memcpy(dst_ref, src, PADDED_BUF_SIZE);
|
||||
memcpy(dst_new, src, PADDED_BUF_SIZE);
|
||||
call_ref(dst_ref, BUF_SIZE);
|
||||
call_new(dst_new, BUF_SIZE);
|
||||
if (memcmp(dst_ref, dst_new, BUF_SIZE))
|
||||
fail();
|
||||
bench_new(dst_new, BUF_SIZE);
|
||||
}
|
||||
|
||||
void checkasm_check_exrdsp(void)
|
||||
{
|
||||
ExrDSPContext h;
|
||||
@ -65,4 +83,9 @@ void checkasm_check_exrdsp(void)
|
||||
check_reorder_pixels();
|
||||
|
||||
report("reorder_pixels");
|
||||
|
||||
if (check_func(h.predictor, "predictor"))
|
||||
check_predictor();
|
||||
|
||||
report("predictor");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user