mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
avcodec/utvideoenc : add SIMD (avx) for sub_left_prediction
asm code by Henrik Gramner
This commit is contained in:
parent
3a230ce5fa
commit
8f9c38b196
@ -74,10 +74,25 @@ static void sub_median_pred_c(uint8_t *dst, const uint8_t *src1,
|
||||
*left_top = lt;
|
||||
}
|
||||
|
||||
static void sub_left_predict_c(uint8_t *dst, uint8_t *src,
|
||||
ptrdiff_t stride, ptrdiff_t width, int height)
|
||||
{
|
||||
int i, j;
|
||||
uint8_t prev = 0x80; /* Set the initial value */
|
||||
for (j = 0; j < height; j++) {
|
||||
for (i = 0; i < width; i++) {
|
||||
*dst++ = src[i] - prev;
|
||||
prev = src[i];
|
||||
}
|
||||
src += stride;
|
||||
}
|
||||
}
|
||||
|
||||
av_cold void ff_llvidencdsp_init(LLVidEncDSPContext *c)
|
||||
{
|
||||
c->diff_bytes = diff_bytes_c;
|
||||
c->sub_median_pred = sub_median_pred_c;
|
||||
c->sub_left_predict = sub_left_predict_c;
|
||||
|
||||
if (ARCH_X86)
|
||||
ff_llvidencdsp_init_x86(c);
|
||||
|
@ -21,6 +21,8 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "avcodec.h"
|
||||
|
||||
typedef struct LLVidEncDSPContext {
|
||||
void (*diff_bytes)(uint8_t *dst /* align 16 */,
|
||||
const uint8_t *src1 /* align 16 */,
|
||||
@ -33,6 +35,9 @@ typedef struct LLVidEncDSPContext {
|
||||
void (*sub_median_pred)(uint8_t *dst, const uint8_t *src1,
|
||||
const uint8_t *src2, intptr_t w,
|
||||
int *left, int *left_top);
|
||||
|
||||
void (*sub_left_predict)(uint8_t *dst, uint8_t *src,
|
||||
ptrdiff_t stride, ptrdiff_t width, int height);
|
||||
} LLVidEncDSPContext;
|
||||
|
||||
void ff_llvidencdsp_init(LLVidEncDSPContext *c);
|
||||
|
@ -283,23 +283,6 @@ static void mangle_rgb_planes(uint8_t *dst[4], ptrdiff_t dst_stride,
|
||||
}
|
||||
}
|
||||
|
||||
/* Write data to a plane with left prediction */
|
||||
static void left_predict(uint8_t *src, uint8_t *dst, ptrdiff_t stride,
|
||||
int width, int height)
|
||||
{
|
||||
int i, j;
|
||||
uint8_t prev;
|
||||
|
||||
prev = 0x80; /* Set the initial value */
|
||||
for (j = 0; j < height; j++) {
|
||||
for (i = 0; i < width; i++) {
|
||||
*dst++ = src[i] - prev;
|
||||
prev = src[i];
|
||||
}
|
||||
src += stride;
|
||||
}
|
||||
}
|
||||
|
||||
#undef A
|
||||
#undef B
|
||||
|
||||
@ -436,8 +419,7 @@ static int encode_plane(AVCodecContext *avctx, uint8_t *src,
|
||||
for (i = 0; i < c->slices; i++) {
|
||||
sstart = send;
|
||||
send = height * (i + 1) / c->slices & cmask;
|
||||
left_predict(src + sstart * stride, dst + sstart * width,
|
||||
stride, width, send - sstart);
|
||||
c->llvidencdsp.sub_left_predict(dst + sstart * width, src + sstart * stride, stride, width, send - sstart);
|
||||
}
|
||||
break;
|
||||
case PRED_MEDIAN:
|
||||
|
@ -25,6 +25,8 @@
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
cextern pb_80
|
||||
|
||||
SECTION .text
|
||||
|
||||
; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
@ -149,3 +151,44 @@ DIFF_BYTES_PROLOGUE
|
||||
DIFF_BYTES_BODY u, u
|
||||
%undef i
|
||||
%endif
|
||||
|
||||
|
||||
;--------------------------------------------------------------------------------------------------
|
||||
;void sub_left_predict(uint8_t *dst, uint8_t *src, ptrdiff_t stride, ptrdiff_t width, int height)
|
||||
;--------------------------------------------------------------------------------------------------
|
||||
|
||||
INIT_XMM avx
|
||||
cglobal sub_left_predict, 5,6,5, dst, src, stride, width, height, x
|
||||
mova m1, [pb_80] ; prev initial
|
||||
add dstq, widthq
|
||||
add srcq, widthq
|
||||
lea xd, [widthq-1]
|
||||
neg widthq
|
||||
and xd, 15
|
||||
pinsrb m4, m1, xd, 15
|
||||
mov xq, widthq
|
||||
|
||||
.loop:
|
||||
movu m0, [srcq + widthq]
|
||||
palignr m2, m0, m1, 15
|
||||
movu m1, [srcq + widthq + 16]
|
||||
palignr m3, m1, m0, 15
|
||||
psubb m2, m0, m2
|
||||
psubb m3, m1, m3
|
||||
movu [dstq + widthq], m2
|
||||
movu [dstq + widthq + 16], m3
|
||||
add widthq, 2 * 16
|
||||
jl .loop
|
||||
|
||||
add srcq, strideq
|
||||
sub dstq, xq ; dst + width
|
||||
test xd, 16
|
||||
jz .mod32
|
||||
mova m1, m0
|
||||
|
||||
.mod32:
|
||||
pshufb m1, m4
|
||||
mov widthq, xq
|
||||
dec heightd
|
||||
jg .loop
|
||||
RET
|
||||
|
@ -36,6 +36,9 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
intptr_t w);
|
||||
|
||||
void ff_sub_left_predict_avx(uint8_t *dst, uint8_t *src,
|
||||
ptrdiff_t stride, ptrdiff_t width, int height);
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
static void sub_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
|
||||
@ -98,6 +101,10 @@ av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c)
|
||||
c->diff_bytes = ff_diff_bytes_sse2;
|
||||
}
|
||||
|
||||
if (EXTERNAL_AVX(cpu_flags)) {
|
||||
c->sub_left_predict = ff_sub_left_predict_avx;
|
||||
}
|
||||
|
||||
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
|
||||
c->diff_bytes = ff_diff_bytes_avx2;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user