mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
avfilter/x86/vf_threshold : add threshold16 SIMD (SSE4 and AVX2)
This commit is contained in:
parent
713f9c5b5d
commit
869efbf971
@ -25,16 +25,18 @@
|
||||
SECTION_RODATA
|
||||
|
||||
pb_128: times 16 db 128
|
||||
pb_128_0 : times 8 db 0, 128
|
||||
|
||||
SECTION .text
|
||||
|
||||
%macro THRESHOLD_8 0
|
||||
;%1 depth (8 or 16) ; %2 b or w ; %3 constant
|
||||
%macro THRESHOLD 3
|
||||
%if ARCH_X86_64
|
||||
cglobal threshold8, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x
|
||||
cglobal threshold%1, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x
|
||||
mov wd, dword wm
|
||||
mov hd, dword hm
|
||||
%else
|
||||
cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
|
||||
cglobal threshold%1, 5, 7, 5, in, threshold, min, max, out, w, x
|
||||
mov wd, r10m
|
||||
%define ilinesizeq r5mp
|
||||
%define tlinesizeq r6mp
|
||||
@ -43,7 +45,10 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
|
||||
%define olinesizeq r9mp
|
||||
%define hd r11mp
|
||||
%endif
|
||||
VBROADCASTI128 m4, [pb_128]
|
||||
VBROADCASTI128 m4, [%3]
|
||||
%if %1 == 16
|
||||
add wq, wq ; w *= 2 (16 bits instead of 8)
|
||||
%endif
|
||||
add inq, wq
|
||||
add thresholdq, wq
|
||||
add minq, wq
|
||||
@ -60,7 +65,7 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
|
||||
movu m3, [maxq + xq]
|
||||
pxor m0, m4
|
||||
pxor m1, m4
|
||||
pcmpgtb m0, m1
|
||||
pcmpgt%2 m0, m1
|
||||
PBLENDVB m3, m2, m0
|
||||
movu [outq + xq], m3
|
||||
add xq, mmsize
|
||||
@ -77,9 +82,11 @@ RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse4
|
||||
THRESHOLD_8
|
||||
THRESHOLD 8, b, pb_128
|
||||
THRESHOLD 16, w, pb_128_0
|
||||
|
||||
%if HAVE_AVX2_EXTERNAL
|
||||
INIT_YMM avx2
|
||||
THRESHOLD_8
|
||||
THRESHOLD 8, b, pb_128
|
||||
THRESHOLD 16, w, pb_128_0
|
||||
%endif
|
||||
|
@ -23,20 +23,19 @@
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavfilter/threshold.h"
|
||||
|
||||
void ff_threshold8_sse4(const uint8_t *in, const uint8_t *threshold,
|
||||
const uint8_t *min, const uint8_t *max,
|
||||
uint8_t *out,
|
||||
ptrdiff_t ilinesize, ptrdiff_t tlinesize,
|
||||
ptrdiff_t flinesize, ptrdiff_t slinesize,
|
||||
ptrdiff_t olinesize,
|
||||
int w, int h);
|
||||
void ff_threshold8_avx2(const uint8_t *in, const uint8_t *threshold,
|
||||
const uint8_t *min, const uint8_t *max,
|
||||
uint8_t *out,
|
||||
ptrdiff_t ilinesize, ptrdiff_t tlinesize,
|
||||
ptrdiff_t flinesize, ptrdiff_t slinesize,
|
||||
ptrdiff_t olinesize,
|
||||
int w, int h);
|
||||
#define THRESHOLD_FUNC(depth, opt) \
|
||||
void ff_threshold##depth##_##opt(const uint8_t *in, const uint8_t *threshold,\
|
||||
const uint8_t *min, const uint8_t *max, \
|
||||
uint8_t *out, \
|
||||
ptrdiff_t ilinesize, ptrdiff_t tlinesize, \
|
||||
ptrdiff_t flinesize, ptrdiff_t slinesize, \
|
||||
ptrdiff_t olinesize, \
|
||||
int w, int h);
|
||||
|
||||
THRESHOLD_FUNC(8, sse4)
|
||||
THRESHOLD_FUNC(8, avx2)
|
||||
THRESHOLD_FUNC(16, sse4)
|
||||
THRESHOLD_FUNC(16, avx2)
|
||||
|
||||
av_cold void ff_threshold_init_x86(ThresholdContext *s)
|
||||
{
|
||||
@ -49,5 +48,12 @@ av_cold void ff_threshold_init_x86(ThresholdContext *s)
|
||||
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
|
||||
s->threshold = ff_threshold8_avx2;
|
||||
}
|
||||
} else if (s->depth == 16) {
|
||||
if (EXTERNAL_SSE4(cpu_flags)) {
|
||||
s->threshold = ff_threshold16_sse4;
|
||||
}
|
||||
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
|
||||
s->threshold = ff_threshold16_avx2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user