mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-28 20:53:54 +02:00
x86/hevc_deblock: add add ff_hevc_[hv]_loop_filter_luma_{8, 10, 12}_avx
~5% faster than SSSE3 Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
88ba821f23
commit
73c4f63ba5
@ -663,11 +663,11 @@ ALIGN 16
|
||||
MASKED_COPY m4, m8
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_hevc_v_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc,
|
||||
; uint8_t *_no_p, uint8_t *_no_q);
|
||||
;-----------------------------------------------------------------------------
|
||||
%macro LOOP_FILTER_CHROMA 0
|
||||
cglobal hevc_v_loop_filter_chroma_8, 3, 5, 7, pix, stride, tc, pix0, r3stride
|
||||
sub pixq, 2
|
||||
lea r3strideq, [3*strideq]
|
||||
@ -752,6 +752,12 @@ cglobal hevc_h_loop_filter_chroma_12, 3, 4, 7, pix, stride, tc, pix0
|
||||
movu [pix0q+strideq], m1
|
||||
movu [pixq], m2
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
LOOP_FILTER_CHROMA
|
||||
INIT_XMM avx
|
||||
LOOP_FILTER_CHROMA
|
||||
|
||||
%if ARCH_X86_64
|
||||
%macro LOOP_FILTER_LUMA 0
|
||||
@ -903,4 +909,6 @@ INIT_XMM sse2
|
||||
LOOP_FILTER_LUMA
|
||||
INIT_XMM ssse3
|
||||
LOOP_FILTER_LUMA
|
||||
INIT_XMM avx
|
||||
LOOP_FILTER_LUMA
|
||||
%endif
|
||||
|
@ -45,12 +45,18 @@ void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, pt
|
||||
LFC_FUNCS(uint8_t, 8, sse2)
|
||||
LFC_FUNCS(uint8_t, 10, sse2)
|
||||
LFC_FUNCS(uint8_t, 12, sse2)
|
||||
LFC_FUNCS(uint8_t, 8, avx)
|
||||
LFC_FUNCS(uint8_t, 10, avx)
|
||||
LFC_FUNCS(uint8_t, 12, avx)
|
||||
LFL_FUNCS(uint8_t, 8, sse2)
|
||||
LFL_FUNCS(uint8_t, 10, sse2)
|
||||
LFL_FUNCS(uint8_t, 12, sse2)
|
||||
LFL_FUNCS(uint8_t, 8, ssse3)
|
||||
LFL_FUNCS(uint8_t, 10, ssse3)
|
||||
LFL_FUNCS(uint8_t, 12, ssse3)
|
||||
LFL_FUNCS(uint8_t, 8, avx)
|
||||
LFL_FUNCS(uint8_t, 10, avx)
|
||||
LFL_FUNCS(uint8_t, 12, avx)
|
||||
|
||||
#define IDCT_FUNCS(W, opt) \
|
||||
void ff_hevc_idct##W##_dc_8_##opt(int16_t *coeffs); \
|
||||
@ -492,6 +498,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
||||
QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4);
|
||||
QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4);
|
||||
}
|
||||
if (EXTERNAL_AVX(cpu_flags)) {
|
||||
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
|
||||
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
|
||||
if (ARCH_X86_64) {
|
||||
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
|
||||
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
|
||||
}
|
||||
}
|
||||
if (EXTERNAL_AVX2(cpu_flags)) {
|
||||
c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2;
|
||||
c->idct_dc[3] = ff_hevc_idct32x32_dc_8_avx2;
|
||||
@ -528,6 +542,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
||||
QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4);
|
||||
QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4);
|
||||
}
|
||||
if (EXTERNAL_AVX(cpu_flags)) {
|
||||
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
|
||||
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
|
||||
if (ARCH_X86_64) {
|
||||
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
|
||||
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
|
||||
}
|
||||
}
|
||||
if (EXTERNAL_AVX2(cpu_flags)) {
|
||||
c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2;
|
||||
c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2;
|
||||
@ -565,6 +587,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
||||
QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4);
|
||||
QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4);
|
||||
}
|
||||
if (EXTERNAL_AVX(cpu_flags)) {
|
||||
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
|
||||
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
|
||||
if (ARCH_X86_64) {
|
||||
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
|
||||
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
|
||||
}
|
||||
}
|
||||
if (EXTERNAL_AVX2(cpu_flags)) {
|
||||
c->idct_dc[2] = ff_hevc_idct16x16_dc_12_avx2;
|
||||
c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2;
|
||||
|
Loading…
Reference in New Issue
Block a user