1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-04 22:03:09 +02:00

x86/vvcdec: sao, add avx2 support

This is a part of Google Summer of Code 2023

Co-authored-by: Nuo Mi <nuomi2021@gmail.com>
This commit is contained in:
Shaun Loo
2024-12-14 22:06:31 +08:00
committed by Nuo Mi
parent 69f63c6210
commit bed1769957
5 changed files with 233 additions and 4 deletions

View File

@ -147,7 +147,7 @@ align 16
%assign i i+mmsize %assign i i+mmsize
%endrep %endrep
%if %2 == 48 %if %2 == 48 || %2 == 80 || %2 == 112
INIT_XMM cpuname INIT_XMM cpuname
mova m13, [srcq + i] mova m13, [srcq + i]
@ -160,7 +160,7 @@ INIT_XMM cpuname
%if cpuflag(avx2) %if cpuflag(avx2)
INIT_YMM cpuname INIT_YMM cpuname
%endif %endif
%endif ; %2 == 48 %endif ; %2 == 48 || %2 == 80 || %2 == 112
add dstq, dststrideq ; dst += dststride add dstq, dststrideq ; dst += dststride
add srcq, srcstrideq ; src += srcstride add srcq, srcstrideq ; src += srcstride
@ -280,7 +280,7 @@ align 16
%assign i i+mmsize %assign i i+mmsize
%endrep %endrep
%if %2 == 48 %if %2 == 48 || %2 == 80 || %2 == 112
INIT_XMM cpuname INIT_XMM cpuname
mova m1, [srcq + i] mova m1, [srcq + i]
@ -291,7 +291,7 @@ INIT_XMM cpuname
%if cpuflag(avx2) %if cpuflag(avx2)
INIT_YMM cpuname INIT_YMM cpuname
%endif %endif
%endif %endif ; %2 == 48 || %2 == 80 || %2 == 112
add dstq, dststrideq add dstq, dststrideq
add srcq, EDGE_SRCSTRIDE add srcq, EDGE_SRCSTRIDE

View File

@ -8,4 +8,6 @@ X86ASM-OBJS-$(CONFIG_VVC_DECODER) += x86/vvc/alf.o \
x86/vvc/mc.o \ x86/vvc/mc.o \
x86/vvc/of.o \ x86/vvc/of.o \
x86/vvc/sad.o \ x86/vvc/sad.o \
x86/vvc/sao.o \
x86/vvc/sao_10bit.o \
x86/h26x/h2656_inter.o x86/h26x/h2656_inter.o

View File

@ -215,6 +215,44 @@ ALF_FUNCS(16, 12, avx2)
#endif #endif
#define SAO_FILTER_FUNC(wd, bitd, opt) \
void ff_vvc_sao_band_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
const int16_t *sao_offset_val, int sao_left_class, int width, int height); \
void ff_vvc_sao_edge_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, \
const int16_t *sao_offset_val, int eo, int width, int height); \
#define SAO_FILTER_FUNCS(bitd, opt) \
SAO_FILTER_FUNC(8, bitd, opt) \
SAO_FILTER_FUNC(16, bitd, opt) \
SAO_FILTER_FUNC(32, bitd, opt) \
SAO_FILTER_FUNC(48, bitd, opt) \
SAO_FILTER_FUNC(64, bitd, opt) \
SAO_FILTER_FUNC(80, bitd, opt) \
SAO_FILTER_FUNC(96, bitd, opt) \
SAO_FILTER_FUNC(112, bitd, opt) \
SAO_FILTER_FUNC(128, bitd, opt) \
SAO_FILTER_FUNCS(8, avx2)
SAO_FILTER_FUNCS(10, avx2)
SAO_FILTER_FUNCS(12, avx2)
#define SAO_FILTER_INIT(type, bitd, opt) do { \
c->sao.type##_filter[0] = ff_vvc_sao_##type##_filter_8_##bitd##_##opt; \
c->sao.type##_filter[1] = ff_vvc_sao_##type##_filter_16_##bitd##_##opt; \
c->sao.type##_filter[2] = ff_vvc_sao_##type##_filter_32_##bitd##_##opt; \
c->sao.type##_filter[3] = ff_vvc_sao_##type##_filter_48_##bitd##_##opt; \
c->sao.type##_filter[4] = ff_vvc_sao_##type##_filter_64_##bitd##_##opt; \
c->sao.type##_filter[5] = ff_vvc_sao_##type##_filter_80_##bitd##_##opt; \
c->sao.type##_filter[6] = ff_vvc_sao_##type##_filter_96_##bitd##_##opt; \
c->sao.type##_filter[7] = ff_vvc_sao_##type##_filter_112_##bitd##_##opt; \
c->sao.type##_filter[8] = ff_vvc_sao_##type##_filter_128_##bitd##_##opt; \
} while (0)
#define SAO_INIT(bitd, opt) do { \
SAO_FILTER_INIT(band, bitd, opt); \
SAO_FILTER_INIT(edge, bitd, opt); \
} while (0)
#define AVG_INIT(bd, opt) do { \ #define AVG_INIT(bd, opt) do { \
c->inter.avg = bf(vvc_avg, bd, opt); \ c->inter.avg = bf(vvc_avg, bd, opt); \
c->inter.w_avg = bf(vvc_w_avg, bd, opt); \ c->inter.w_avg = bf(vvc_w_avg, bd, opt); \
@ -329,6 +367,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
// filter // filter
ALF_INIT(8); ALF_INIT(8);
SAO_INIT(8, avx2);
} }
#endif #endif
break; break;
@ -350,6 +389,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
// filter // filter
ALF_INIT(10); ALF_INIT(10);
SAO_INIT(10, avx2);
} }
#endif #endif
break; break;
@ -371,6 +411,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
// filter // filter
ALF_INIT(12); ALF_INIT(12);
SAO_INIT(12, avx2);
} }
#endif #endif
break; break;

View File

@ -0,0 +1,73 @@
;******************************************************************************
;* SIMD optimized SAO functions for VVC 8bit decoding
;*
;* Copyright (c) 2024 Shaun Loo
;* Copyright (c) 2024 Nuo Mi
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%define MAX_PB_SIZE 128
%include "libavcodec/x86/h26x/h2656_sao.asm"
%macro VVC_SAO_BAND_FILTER 2
H2656_SAO_BAND_FILTER vvc, %1, %2
%endmacro
%macro VVC_SAO_BAND_FILTER_FUNCS 0
VVC_SAO_BAND_FILTER 8, 0
VVC_SAO_BAND_FILTER 16, 1
VVC_SAO_BAND_FILTER 32, 2
VVC_SAO_BAND_FILTER 48, 2
VVC_SAO_BAND_FILTER 64, 4
VVC_SAO_BAND_FILTER 80, 4
VVC_SAO_BAND_FILTER 96, 6
VVC_SAO_BAND_FILTER 112, 6
VVC_SAO_BAND_FILTER 128, 8
%endmacro
%if HAVE_AVX2_EXTERNAL
INIT_XMM avx2
VVC_SAO_BAND_FILTER 8, 0
VVC_SAO_BAND_FILTER 16, 1
INIT_YMM avx2
VVC_SAO_BAND_FILTER 32, 1
VVC_SAO_BAND_FILTER 48, 1
VVC_SAO_BAND_FILTER 64, 2
VVC_SAO_BAND_FILTER 80, 2
VVC_SAO_BAND_FILTER 96, 3
VVC_SAO_BAND_FILTER 112, 3
VVC_SAO_BAND_FILTER 128, 4
%endif
%macro VVC_SAO_EDGE_FILTER 2-3
H2656_SAO_EDGE_FILTER vvc, %{1:-1}
%endmacro
%if HAVE_AVX2_EXTERNAL
INIT_XMM avx2
VVC_SAO_EDGE_FILTER 8, 0
VVC_SAO_EDGE_FILTER 16, 1, a
INIT_YMM avx2
VVC_SAO_EDGE_FILTER 32, 1, a
VVC_SAO_EDGE_FILTER 48, 1, u
VVC_SAO_EDGE_FILTER 64, 2, a
VVC_SAO_EDGE_FILTER 80, 2, u
VVC_SAO_EDGE_FILTER 96, 3, a
VVC_SAO_EDGE_FILTER 112, 3, u
VVC_SAO_EDGE_FILTER 128, 4, a
%endif

View File

@ -0,0 +1,113 @@
;******************************************************************************
;* SIMD optimized SAO functions for VVC 10/12bit decoding
;*
;* Copyright (c) 2024 Shaun Loo
;* Copyright (c) 2024 Nuo Mi
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%define MAX_PB_SIZE 128
%include "libavcodec/x86/h26x/h2656_sao_10bit.asm"
%macro VVC_SAO_BAND_FILTER 3
H2656_SAO_BAND_FILTER vvc, %1, %2, %3
%endmacro
%macro VVC_SAO_BAND_FILTER_FUNCS 1
VVC_SAO_BAND_FILTER %1, 8, 1
VVC_SAO_BAND_FILTER %1, 16, 2
VVC_SAO_BAND_FILTER %1, 32, 4
VVC_SAO_BAND_FILTER %1, 48, 6
VVC_SAO_BAND_FILTER %1, 64, 8
VVC_SAO_BAND_FILTER %1, 80, 10
VVC_SAO_BAND_FILTER %1, 96, 12
VVC_SAO_BAND_FILTER %1, 112, 14
VVC_SAO_BAND_FILTER %1, 128, 16
%endmacro
%macro VVC_SAO_BAND_FILTER_FUNCS 0
VVC_SAO_BAND_FILTER_FUNCS 10
VVC_SAO_BAND_FILTER_FUNCS 12
%endmacro
INIT_XMM sse2
VVC_SAO_BAND_FILTER_FUNCS
INIT_XMM avx
VVC_SAO_BAND_FILTER_FUNCS
%if HAVE_AVX2_EXTERNAL
%macro VVC_SAO_BAND_FILTER_FUNCS_AVX2 1
INIT_XMM avx2
VVC_SAO_BAND_FILTER %1, 8, 1
INIT_YMM avx2
VVC_SAO_BAND_FILTER %1, 16, 1
VVC_SAO_BAND_FILTER %1, 32, 2
VVC_SAO_BAND_FILTER %1, 48, 3
VVC_SAO_BAND_FILTER %1, 64, 4
VVC_SAO_BAND_FILTER %1, 80, 5
VVC_SAO_BAND_FILTER %1, 96, 6
VVC_SAO_BAND_FILTER %1, 112, 7
VVC_SAO_BAND_FILTER %1, 128, 8
%endmacro
VVC_SAO_BAND_FILTER_FUNCS_AVX2 10
VVC_SAO_BAND_FILTER_FUNCS_AVX2 12
%endif ; HAVE_AVX2_EXTERNAL
%macro VVC_SAO_EDGE_FILTER 3
H2656_SAO_EDGE_FILTER vvc, %1, %2, %3
%endmacro
%macro VVC_SAO_EDGE_FILTER_FUNCS 1
VVC_SAO_EDGE_FILTER %1, 8, 1
VVC_SAO_EDGE_FILTER %1, 16, 2
VVC_SAO_EDGE_FILTER %1, 32, 4
VVC_SAO_EDGE_FILTER %1, 48, 6
VVC_SAO_EDGE_FILTER %1, 64, 8
VVC_SAO_EDGE_FILTER %1, 80, 10
VVC_SAO_EDGE_FILTER %1, 96, 12
VVC_SAO_EDGE_FILTER %1, 112, 14
VVC_SAO_EDGE_FILTER %1, 128, 16
%endmacro
INIT_XMM sse2
VVC_SAO_EDGE_FILTER_FUNCS 10
VVC_SAO_EDGE_FILTER_FUNCS 12
%if HAVE_AVX2_EXTERNAL
%macro VVC_SAO_EDGE_FILTER_FUNCS_AVX2 1
INIT_XMM avx2
VVC_SAO_EDGE_FILTER %1, 8, 1
INIT_YMM avx2
VVC_SAO_EDGE_FILTER %1, 16, 1
VVC_SAO_EDGE_FILTER %1, 32, 2
VVC_SAO_EDGE_FILTER %1, 48, 3
VVC_SAO_EDGE_FILTER %1, 64, 4
VVC_SAO_EDGE_FILTER %1, 80, 5
VVC_SAO_EDGE_FILTER %1, 96, 6
VVC_SAO_EDGE_FILTER %1, 112, 7
VVC_SAO_EDGE_FILTER %1, 128, 8
%endmacro
VVC_SAO_EDGE_FILTER_FUNCS_AVX2 10
VVC_SAO_EDGE_FILTER_FUNCS_AVX2 12
%endif ; HAVE_AVX2_EXTERNAL