You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-15 14:13:16 +02:00
libavfilter/x86/vf_gblur: add ff_postscale_slice_avx512()
Co-authored-by: Cheng Yanfei <yanfei.cheng@intel.com> Co-authored-by: Jin Jun <jun.i.jin@intel.com> Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
This commit is contained in:
@@ -194,19 +194,17 @@ cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max
|
|||||||
VBROADCASTSS m1, minm
|
VBROADCASTSS m1, minm
|
||||||
VBROADCASTSS m2, maxm
|
VBROADCASTSS m2, maxm
|
||||||
%elif WIN64
|
%elif WIN64
|
||||||
SWAP 0, 2
|
VBROADCASTSS m0, xmm2
|
||||||
SWAP 1, 3
|
VBROADCASTSS m1, xmm3
|
||||||
VBROADCASTSS m0, xm0
|
|
||||||
VBROADCASTSS m1, xm1
|
|
||||||
VBROADCASTSS m2, maxm
|
VBROADCASTSS m2, maxm
|
||||||
%else ; UNIX64
|
%else ; UNIX
|
||||||
VBROADCASTSS m0, xm0
|
VBROADCASTSS m0, xmm0
|
||||||
VBROADCASTSS m1, xm1
|
VBROADCASTSS m1, xmm1
|
||||||
VBROADCASTSS m2, xm2
|
VBROADCASTSS m2, xmm2
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
%if cpuflag(avx2)
|
%if cpuflag(avx2) || cpuflag(avx512)
|
||||||
mulps m3, m0, [ptrq + lengthq]
|
mulps m3, m0, [ptrq + lengthq]
|
||||||
%else
|
%else
|
||||||
movu m3, [ptrq + lengthq]
|
movu m3, [ptrq + lengthq]
|
||||||
@@ -229,3 +227,8 @@ POSTSCALE_SLICE
|
|||||||
INIT_YMM avx2
|
INIT_YMM avx2
|
||||||
POSTSCALE_SLICE
|
POSTSCALE_SLICE
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%if HAVE_AVX512_EXTERNAL
|
||||||
|
INIT_ZMM avx512
|
||||||
|
POSTSCALE_SLICE
|
||||||
|
%endif
|
||||||
|
@@ -29,6 +29,7 @@ void ff_horiz_slice_avx2(float *ptr, int width, int height, int steps, float nu,
|
|||||||
|
|
||||||
void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min, float max);
|
void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min, float max);
|
||||||
void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max);
|
void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max);
|
||||||
|
void ff_postscale_slice_avx512(float *ptr, int length, float postscale, float min, float max);
|
||||||
|
|
||||||
av_cold void ff_gblur_init_x86(GBlurContext *s)
|
av_cold void ff_gblur_init_x86(GBlurContext *s)
|
||||||
{
|
{
|
||||||
@@ -47,5 +48,8 @@ av_cold void ff_gblur_init_x86(GBlurContext *s)
|
|||||||
if (EXTERNAL_AVX2(cpu_flags)) {
|
if (EXTERNAL_AVX2(cpu_flags)) {
|
||||||
s->horiz_slice = ff_horiz_slice_avx2;
|
s->horiz_slice = ff_horiz_slice_avx2;
|
||||||
}
|
}
|
||||||
|
if (EXTERNAL_AVX512(cpu_flags)) {
|
||||||
|
s->postscale_slice = ff_postscale_slice_avx512;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user