You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-04 22:03:09 +02:00
x86/vvcdec: sao, add avx2 support
This is a part of Google Summer of Code 2023 Co-authored-by: Nuo Mi <nuomi2021@gmail.com>
This commit is contained in:
@ -147,7 +147,7 @@ align 16
|
|||||||
%assign i i+mmsize
|
%assign i i+mmsize
|
||||||
%endrep
|
%endrep
|
||||||
|
|
||||||
%if %2 == 48
|
%if %2 == 48 || %2 == 80 || %2 == 112
|
||||||
INIT_XMM cpuname
|
INIT_XMM cpuname
|
||||||
|
|
||||||
mova m13, [srcq + i]
|
mova m13, [srcq + i]
|
||||||
@ -160,7 +160,7 @@ INIT_XMM cpuname
|
|||||||
%if cpuflag(avx2)
|
%if cpuflag(avx2)
|
||||||
INIT_YMM cpuname
|
INIT_YMM cpuname
|
||||||
%endif
|
%endif
|
||||||
%endif ; %2 == 48
|
%endif ; %2 == 48 || %2 == 80 || %2 == 112
|
||||||
|
|
||||||
add dstq, dststrideq ; dst += dststride
|
add dstq, dststrideq ; dst += dststride
|
||||||
add srcq, srcstrideq ; src += srcstride
|
add srcq, srcstrideq ; src += srcstride
|
||||||
@ -280,7 +280,7 @@ align 16
|
|||||||
%assign i i+mmsize
|
%assign i i+mmsize
|
||||||
%endrep
|
%endrep
|
||||||
|
|
||||||
%if %2 == 48
|
%if %2 == 48 || %2 == 80 || %2 == 112
|
||||||
INIT_XMM cpuname
|
INIT_XMM cpuname
|
||||||
|
|
||||||
mova m1, [srcq + i]
|
mova m1, [srcq + i]
|
||||||
@ -291,7 +291,7 @@ INIT_XMM cpuname
|
|||||||
%if cpuflag(avx2)
|
%if cpuflag(avx2)
|
||||||
INIT_YMM cpuname
|
INIT_YMM cpuname
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif ; %2 == 48 || %2 == 80 || %2 == 112
|
||||||
|
|
||||||
add dstq, dststrideq
|
add dstq, dststrideq
|
||||||
add srcq, EDGE_SRCSTRIDE
|
add srcq, EDGE_SRCSTRIDE
|
||||||
|
@ -8,4 +8,6 @@ X86ASM-OBJS-$(CONFIG_VVC_DECODER) += x86/vvc/alf.o \
|
|||||||
x86/vvc/mc.o \
|
x86/vvc/mc.o \
|
||||||
x86/vvc/of.o \
|
x86/vvc/of.o \
|
||||||
x86/vvc/sad.o \
|
x86/vvc/sad.o \
|
||||||
|
x86/vvc/sao.o \
|
||||||
|
x86/vvc/sao_10bit.o \
|
||||||
x86/h26x/h2656_inter.o
|
x86/h26x/h2656_inter.o
|
||||||
|
@ -215,6 +215,44 @@ ALF_FUNCS(16, 12, avx2)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define SAO_FILTER_FUNC(wd, bitd, opt) \
|
||||||
|
void ff_vvc_sao_band_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
|
||||||
|
const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
|
||||||
|
void ff_vvc_sao_edge_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
|
||||||
|
const int16_t *sao_offset_val, int eo, int width, int height); \
|
||||||
|
|
||||||
|
#define SAO_FILTER_FUNCS(bitd, opt) \
|
||||||
|
SAO_FILTER_FUNC(8, bitd, opt) \
|
||||||
|
SAO_FILTER_FUNC(16, bitd, opt) \
|
||||||
|
SAO_FILTER_FUNC(32, bitd, opt) \
|
||||||
|
SAO_FILTER_FUNC(48, bitd, opt) \
|
||||||
|
SAO_FILTER_FUNC(64, bitd, opt) \
|
||||||
|
SAO_FILTER_FUNC(80, bitd, opt) \
|
||||||
|
SAO_FILTER_FUNC(96, bitd, opt) \
|
||||||
|
SAO_FILTER_FUNC(112, bitd, opt) \
|
||||||
|
SAO_FILTER_FUNC(128, bitd, opt) \
|
||||||
|
|
||||||
|
SAO_FILTER_FUNCS(8, avx2)
|
||||||
|
SAO_FILTER_FUNCS(10, avx2)
|
||||||
|
SAO_FILTER_FUNCS(12, avx2)
|
||||||
|
|
||||||
|
#define SAO_FILTER_INIT(type, bitd, opt) do { \
|
||||||
|
c->sao.type##_filter[0] = ff_vvc_sao_##type##_filter_8_##bitd##_##opt; \
|
||||||
|
c->sao.type##_filter[1] = ff_vvc_sao_##type##_filter_16_##bitd##_##opt; \
|
||||||
|
c->sao.type##_filter[2] = ff_vvc_sao_##type##_filter_32_##bitd##_##opt; \
|
||||||
|
c->sao.type##_filter[3] = ff_vvc_sao_##type##_filter_48_##bitd##_##opt; \
|
||||||
|
c->sao.type##_filter[4] = ff_vvc_sao_##type##_filter_64_##bitd##_##opt; \
|
||||||
|
c->sao.type##_filter[5] = ff_vvc_sao_##type##_filter_80_##bitd##_##opt; \
|
||||||
|
c->sao.type##_filter[6] = ff_vvc_sao_##type##_filter_96_##bitd##_##opt; \
|
||||||
|
c->sao.type##_filter[7] = ff_vvc_sao_##type##_filter_112_##bitd##_##opt; \
|
||||||
|
c->sao.type##_filter[8] = ff_vvc_sao_##type##_filter_128_##bitd##_##opt; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define SAO_INIT(bitd, opt) do { \
|
||||||
|
SAO_FILTER_INIT(band, bitd, opt); \
|
||||||
|
SAO_FILTER_INIT(edge, bitd, opt); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#define AVG_INIT(bd, opt) do { \
|
#define AVG_INIT(bd, opt) do { \
|
||||||
c->inter.avg = bf(vvc_avg, bd, opt); \
|
c->inter.avg = bf(vvc_avg, bd, opt); \
|
||||||
c->inter.w_avg = bf(vvc_w_avg, bd, opt); \
|
c->inter.w_avg = bf(vvc_w_avg, bd, opt); \
|
||||||
@ -329,6 +367,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
|
|||||||
|
|
||||||
// filter
|
// filter
|
||||||
ALF_INIT(8);
|
ALF_INIT(8);
|
||||||
|
SAO_INIT(8, avx2);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
@ -350,6 +389,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
|
|||||||
|
|
||||||
// filter
|
// filter
|
||||||
ALF_INIT(10);
|
ALF_INIT(10);
|
||||||
|
SAO_INIT(10, avx2);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
@ -371,6 +411,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
|
|||||||
|
|
||||||
// filter
|
// filter
|
||||||
ALF_INIT(12);
|
ALF_INIT(12);
|
||||||
|
SAO_INIT(12, avx2);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
|
73
libavcodec/x86/vvc/sao.asm
Normal file
73
libavcodec/x86/vvc/sao.asm
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
;******************************************************************************
|
||||||
|
;* SIMD optimized SAO functions for VVC 8bit decoding
|
||||||
|
;*
|
||||||
|
;* Copyright (c) 2024 Shaun Loo
|
||||||
|
;* Copyright (c) 2024 Nuo Mi
|
||||||
|
;*
|
||||||
|
;* This file is part of FFmpeg.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
;* modify it under the terms of the GNU Lesser General Public
|
||||||
|
;* License as published by the Free Software Foundation; either
|
||||||
|
;* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;* Lesser General Public License for more details.
|
||||||
|
;*
|
||||||
|
;* You should have received a copy of the GNU Lesser General Public
|
||||||
|
;* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;******************************************************************************
|
||||||
|
|
||||||
|
%define MAX_PB_SIZE 128
|
||||||
|
%include "libavcodec/x86/h26x/h2656_sao.asm"
|
||||||
|
|
||||||
|
%macro VVC_SAO_BAND_FILTER 2
|
||||||
|
H2656_SAO_BAND_FILTER vvc, %1, %2
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro VVC_SAO_BAND_FILTER_FUNCS 0
|
||||||
|
VVC_SAO_BAND_FILTER 8, 0
|
||||||
|
VVC_SAO_BAND_FILTER 16, 1
|
||||||
|
VVC_SAO_BAND_FILTER 32, 2
|
||||||
|
VVC_SAO_BAND_FILTER 48, 2
|
||||||
|
VVC_SAO_BAND_FILTER 64, 4
|
||||||
|
VVC_SAO_BAND_FILTER 80, 4
|
||||||
|
VVC_SAO_BAND_FILTER 96, 6
|
||||||
|
VVC_SAO_BAND_FILTER 112, 6
|
||||||
|
VVC_SAO_BAND_FILTER 128, 8
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%if HAVE_AVX2_EXTERNAL
|
||||||
|
INIT_XMM avx2
|
||||||
|
VVC_SAO_BAND_FILTER 8, 0
|
||||||
|
VVC_SAO_BAND_FILTER 16, 1
|
||||||
|
INIT_YMM avx2
|
||||||
|
VVC_SAO_BAND_FILTER 32, 1
|
||||||
|
VVC_SAO_BAND_FILTER 48, 1
|
||||||
|
VVC_SAO_BAND_FILTER 64, 2
|
||||||
|
VVC_SAO_BAND_FILTER 80, 2
|
||||||
|
VVC_SAO_BAND_FILTER 96, 3
|
||||||
|
VVC_SAO_BAND_FILTER 112, 3
|
||||||
|
VVC_SAO_BAND_FILTER 128, 4
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%macro VVC_SAO_EDGE_FILTER 2-3
|
||||||
|
H2656_SAO_EDGE_FILTER vvc, %{1:-1}
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%if HAVE_AVX2_EXTERNAL
|
||||||
|
INIT_XMM avx2
|
||||||
|
VVC_SAO_EDGE_FILTER 8, 0
|
||||||
|
VVC_SAO_EDGE_FILTER 16, 1, a
|
||||||
|
INIT_YMM avx2
|
||||||
|
VVC_SAO_EDGE_FILTER 32, 1, a
|
||||||
|
VVC_SAO_EDGE_FILTER 48, 1, u
|
||||||
|
VVC_SAO_EDGE_FILTER 64, 2, a
|
||||||
|
VVC_SAO_EDGE_FILTER 80, 2, u
|
||||||
|
VVC_SAO_EDGE_FILTER 96, 3, a
|
||||||
|
VVC_SAO_EDGE_FILTER 112, 3, u
|
||||||
|
VVC_SAO_EDGE_FILTER 128, 4, a
|
||||||
|
%endif
|
113
libavcodec/x86/vvc/sao_10bit.asm
Normal file
113
libavcodec/x86/vvc/sao_10bit.asm
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
;******************************************************************************
|
||||||
|
;* SIMD optimized SAO functions for VVC 10/12bit decoding
|
||||||
|
;*
|
||||||
|
;* Copyright (c) 2024 Shaun Loo
|
||||||
|
;* Copyright (c) 2024 Nuo Mi
|
||||||
|
;*
|
||||||
|
;* This file is part of FFmpeg.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
;* modify it under the terms of the GNU Lesser General Public
|
||||||
|
;* License as published by the Free Software Foundation; either
|
||||||
|
;* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;* Lesser General Public License for more details.
|
||||||
|
;*
|
||||||
|
;* You should have received a copy of the GNU Lesser General Public
|
||||||
|
;* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;******************************************************************************
|
||||||
|
|
||||||
|
%define MAX_PB_SIZE 128
|
||||||
|
%include "libavcodec/x86/h26x/h2656_sao_10bit.asm"
|
||||||
|
|
||||||
|
%macro VVC_SAO_BAND_FILTER 3
|
||||||
|
H2656_SAO_BAND_FILTER vvc, %1, %2, %3
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro VVC_SAO_BAND_FILTER_FUNCS 1
|
||||||
|
VVC_SAO_BAND_FILTER %1, 8, 1
|
||||||
|
VVC_SAO_BAND_FILTER %1, 16, 2
|
||||||
|
VVC_SAO_BAND_FILTER %1, 32, 4
|
||||||
|
VVC_SAO_BAND_FILTER %1, 48, 6
|
||||||
|
VVC_SAO_BAND_FILTER %1, 64, 8
|
||||||
|
VVC_SAO_BAND_FILTER %1, 80, 10
|
||||||
|
VVC_SAO_BAND_FILTER %1, 96, 12
|
||||||
|
VVC_SAO_BAND_FILTER %1, 112, 14
|
||||||
|
VVC_SAO_BAND_FILTER %1, 128, 16
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro VVC_SAO_BAND_FILTER_FUNCS 0
|
||||||
|
VVC_SAO_BAND_FILTER_FUNCS 10
|
||||||
|
VVC_SAO_BAND_FILTER_FUNCS 12
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
INIT_XMM sse2
|
||||||
|
VVC_SAO_BAND_FILTER_FUNCS
|
||||||
|
INIT_XMM avx
|
||||||
|
VVC_SAO_BAND_FILTER_FUNCS
|
||||||
|
|
||||||
|
%if HAVE_AVX2_EXTERNAL
|
||||||
|
|
||||||
|
%macro VVC_SAO_BAND_FILTER_FUNCS_AVX2 1
|
||||||
|
INIT_XMM avx2
|
||||||
|
VVC_SAO_BAND_FILTER %1, 8, 1
|
||||||
|
INIT_YMM avx2
|
||||||
|
VVC_SAO_BAND_FILTER %1, 16, 1
|
||||||
|
VVC_SAO_BAND_FILTER %1, 32, 2
|
||||||
|
VVC_SAO_BAND_FILTER %1, 48, 3
|
||||||
|
VVC_SAO_BAND_FILTER %1, 64, 4
|
||||||
|
VVC_SAO_BAND_FILTER %1, 80, 5
|
||||||
|
VVC_SAO_BAND_FILTER %1, 96, 6
|
||||||
|
VVC_SAO_BAND_FILTER %1, 112, 7
|
||||||
|
VVC_SAO_BAND_FILTER %1, 128, 8
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
VVC_SAO_BAND_FILTER_FUNCS_AVX2 10
|
||||||
|
VVC_SAO_BAND_FILTER_FUNCS_AVX2 12
|
||||||
|
|
||||||
|
%endif ; HAVE_AVX2_EXTERNAL
|
||||||
|
|
||||||
|
%macro VVC_SAO_EDGE_FILTER 3
|
||||||
|
H2656_SAO_EDGE_FILTER vvc, %1, %2, %3
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro VVC_SAO_EDGE_FILTER_FUNCS 1
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 8, 1
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 16, 2
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 32, 4
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 48, 6
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 64, 8
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 80, 10
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 96, 12
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 112, 14
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 128, 16
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
INIT_XMM sse2
|
||||||
|
VVC_SAO_EDGE_FILTER_FUNCS 10
|
||||||
|
VVC_SAO_EDGE_FILTER_FUNCS 12
|
||||||
|
|
||||||
|
%if HAVE_AVX2_EXTERNAL
|
||||||
|
|
||||||
|
%macro VVC_SAO_EDGE_FILTER_FUNCS_AVX2 1
|
||||||
|
INIT_XMM avx2
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 8, 1
|
||||||
|
INIT_YMM avx2
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 16, 1
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 32, 2
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 48, 3
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 64, 4
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 80, 5
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 96, 6
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 112, 7
|
||||||
|
VVC_SAO_EDGE_FILTER %1, 128, 8
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
VVC_SAO_EDGE_FILTER_FUNCS_AVX2 10
|
||||||
|
VVC_SAO_EDGE_FILTER_FUNCS_AVX2 12
|
||||||
|
|
||||||
|
%endif ; HAVE_AVX2_EXTERNAL
|
Reference in New Issue
Block a user