mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
Full-pixel MC functions.
Decoding time of ped1080p.webm goes from 11.3sec to 11.1sec.
This commit is contained in:
parent
c07ac8d467
commit
f1548c008f
@ -219,3 +219,60 @@ filter_v_fn avg
|
||||
INIT_XMM ssse3
|
||||
filter_v_fn put
|
||||
filter_v_fn avg
|
||||
|
||||
%macro fpel_fn 6
|
||||
%if %2 == 4
|
||||
%define %%srcfn movh
|
||||
%define %%dstfn movh
|
||||
%else
|
||||
%define %%srcfn movu
|
||||
%define %%dstfn mova
|
||||
%endif
|
||||
|
||||
%if %2 <= 16
|
||||
cglobal %1%2, 5, 7, 4, dst, dstride, src, sstride, h, dstride3, sstride3
|
||||
lea sstride3q, [sstrideq*3]
|
||||
lea dstride3q, [dstrideq*3]
|
||||
%else
|
||||
cglobal %1%2, 5, 5, 4, dst, dstride, src, sstride, h
|
||||
%endif
|
||||
.loop:
|
||||
%%srcfn m0, [srcq]
|
||||
%%srcfn m1, [srcq+s%3]
|
||||
%%srcfn m2, [srcq+s%4]
|
||||
%%srcfn m3, [srcq+s%5]
|
||||
lea srcq, [srcq+sstrideq*%6]
|
||||
%ifidn %1, avg
|
||||
pavgb m0, [dstq]
|
||||
pavgb m1, [dstq+d%3]
|
||||
pavgb m2, [dstq+d%4]
|
||||
pavgb m3, [dstq+d%5]
|
||||
%endif
|
||||
%%dstfn [dstq], m0
|
||||
%%dstfn [dstq+d%3], m1
|
||||
%%dstfn [dstq+d%4], m2
|
||||
%%dstfn [dstq+d%5], m3
|
||||
lea dstq, [dstq+dstrideq*%6]
|
||||
sub hd, %6
|
||||
jnz .loop
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
%define d16 16
|
||||
%define s16 16
|
||||
INIT_MMX mmx
|
||||
fpel_fn put, 4, strideq, strideq*2, stride3q, 4
|
||||
fpel_fn put, 8, strideq, strideq*2, stride3q, 4
|
||||
INIT_MMX sse
|
||||
fpel_fn avg, 4, strideq, strideq*2, stride3q, 4
|
||||
fpel_fn avg, 8, strideq, strideq*2, stride3q, 4
|
||||
INIT_XMM sse
|
||||
fpel_fn put, 16, strideq, strideq*2, stride3q, 4
|
||||
fpel_fn put, 32, mmsize, strideq, strideq+mmsize, 2
|
||||
fpel_fn put, 64, mmsize, mmsize*2, mmsize*3, 1
|
||||
INIT_XMM sse2
|
||||
fpel_fn avg, 16, strideq, strideq*2, stride3q, 4
|
||||
fpel_fn avg, 32, mmsize, strideq, strideq+mmsize, 2
|
||||
fpel_fn avg, 64, mmsize, mmsize*2, mmsize*3, 1
|
||||
%undef s16
|
||||
%undef d16
|
||||
|
@ -27,6 +27,22 @@
|
||||
|
||||
#if HAVE_YASM
|
||||
|
||||
#define fpel_func(avg, sz, opt) \
|
||||
void ff_##avg##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
|
||||
const uint8_t *src, ptrdiff_t src_stride, \
|
||||
int h, int mx, int my)
|
||||
fpel_func(put, 4, mmx);
|
||||
fpel_func(put, 8, mmx);
|
||||
fpel_func(put, 16, sse);
|
||||
fpel_func(put, 32, sse);
|
||||
fpel_func(put, 64, sse);
|
||||
fpel_func(avg, 4, sse);
|
||||
fpel_func(avg, 8, sse);
|
||||
fpel_func(avg, 16, sse2);
|
||||
fpel_func(avg, 32, sse2);
|
||||
fpel_func(avg, 64, sse2);
|
||||
#undef fpel_func
|
||||
|
||||
#define mc_func(avg, sz, dir, opt) \
|
||||
void ff_##avg##_8tap_1d_##dir##_##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
|
||||
const uint8_t *src, ptrdiff_t src_stride, \
|
||||
@ -141,6 +157,13 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
|
||||
#if HAVE_YASM
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
#define init_fpel(idx1, idx2, sz, type, opt) \
|
||||
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
|
||||
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
|
||||
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = \
|
||||
dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = ff_##type##sz##_##opt
|
||||
|
||||
|
||||
#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
|
||||
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_##opt; \
|
||||
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_##opt; \
|
||||
@ -158,11 +181,31 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
|
||||
init_subpel2(idx, 0, 1, v, type, opt); \
|
||||
init_subpel2(idx, 1, 0, h, type, opt)
|
||||
|
||||
if (cpu_flags & AV_CPU_FLAG_MMX) {
|
||||
init_fpel(4, 0, 4, put, mmx);
|
||||
init_fpel(3, 0, 8, put, mmx);
|
||||
}
|
||||
|
||||
if (cpu_flags & AV_CPU_FLAG_SSE) {
|
||||
init_fpel(2, 0, 16, put, sse);
|
||||
init_fpel(1, 0, 32, put, sse);
|
||||
init_fpel(0, 0, 64, put, sse);
|
||||
init_fpel(4, 1, 4, avg, sse);
|
||||
init_fpel(3, 1, 8, avg, sse);
|
||||
}
|
||||
|
||||
if (cpu_flags & AV_CPU_FLAG_SSE2) {
|
||||
init_fpel(2, 1, 16, avg, sse2);
|
||||
init_fpel(1, 1, 32, avg, sse2);
|
||||
init_fpel(0, 1, 64, avg, sse2);
|
||||
}
|
||||
|
||||
if (cpu_flags & AV_CPU_FLAG_SSSE3) {
|
||||
init_subpel3(0, put, ssse3);
|
||||
init_subpel3(1, avg, ssse3);
|
||||
}
|
||||
|
||||
#undef init_fpel
|
||||
#undef init_subpel1
|
||||
#undef init_subpel2
|
||||
#undef init_subpel3
|
||||
|
Loading…
Reference in New Issue
Block a user