diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c index f8057dee9f..dc4157b071 100644 --- a/libavcodec/x86/hevc/dsp_init.c +++ b/libavcodec/x86/hevc/dsp_init.c @@ -217,7 +217,7 @@ static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dst } \ } #define mc_rep_bi_func(name, bitd, step, W, opt) \ -void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \ +static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \ ptrdiff_t _srcstride, const int16_t *_src2, \ int height, intptr_t mx, intptr_t my, int width) \ { \ @@ -257,7 +257,7 @@ static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dsts height, mx, my, width); \ } #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \ -void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ +static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ ptrdiff_t _srcstride, const int16_t *src2, \ int height, intptr_t mx, intptr_t my, int width) \ { \ @@ -284,7 +284,7 @@ static void hevc_put_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src } #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ -void ff_hevc_put_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ +static void ff_hevc_put_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ ptrdiff_t _srcstride, const int16_t *src2, \ int height, intptr_t mx, intptr_t my, int width) \ { \ @@ -320,7 +320,7 @@ static void hevc_put_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, } #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ -void ff_hevc_put_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ +static void ff_hevc_put_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ ptrdiff_t _srcstride, const int16_t *src2, \ int height, intptr_t mx, intptr_t my, int width) \ { \ diff --git a/libavcodec/x86/hevcdsp.h b/libavcodec/x86/hevcdsp.h index b89a67f2e9..b18d9449d7 100644 --- a/libavcodec/x86/hevcdsp.h +++ b/libavcodec/x86/hevcdsp.h @@ -37,35 +37,17 @@ dst ## _uni_w[idx1][idx2][idx3] = hevc_put_uni_w_ ## name ## _ ## D ## _##opt; dst ## _bi_w[idx1][idx2][idx3] = hevc_put_bi_w_ ## name ## _ ## D ## _##opt -#define PEL_PROTOTYPE(name, D, opt) \ -void ff_hevc_put_bi_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); \ +typedef void bi_pel_func(uint8_t *_dst, ptrdiff_t _dststride, + const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, + int height, intptr_t mx, intptr_t my, int width); +#define BI_PEL_PROTOTYPE(name, W, D, opt) \ +bi_pel_func ff_hevc_put_bi_ ## name ## W ## _ ## D ## _##opt /////////////////////////////////////////////////////////////////////////////// // MC functions /////////////////////////////////////////////////////////////////////////////// -#define EPEL_PROTOTYPES(fname, bitd, opt) \ - PEL_PROTOTYPE(fname##4, bitd, opt); \ - PEL_PROTOTYPE(fname##6, bitd, opt); \ - PEL_PROTOTYPE(fname##8, bitd, opt); \ - PEL_PROTOTYPE(fname##12, bitd, opt); \ - PEL_PROTOTYPE(fname##16, bitd, opt); \ - PEL_PROTOTYPE(fname##24, bitd, opt); \ - PEL_PROTOTYPE(fname##32, bitd, opt); \ - PEL_PROTOTYPE(fname##48, bitd, opt); \ - PEL_PROTOTYPE(fname##64, bitd, opt) - -#define QPEL_PROTOTYPES(fname, bitd, opt) \ - PEL_PROTOTYPE(fname##4, bitd, opt); \ - PEL_PROTOTYPE(fname##8, bitd, opt); \ - PEL_PROTOTYPE(fname##12, bitd, opt); \ - PEL_PROTOTYPE(fname##16, bitd, opt); \ - PEL_PROTOTYPE(fname##24, bitd, opt); \ - PEL_PROTOTYPE(fname##32, bitd, opt); \ - PEL_PROTOTYPE(fname##48, bitd, opt); \ - PEL_PROTOTYPE(fname##64, bitd, opt) - #define WEIGHTING_PROTOTYPE(width, bitd, opt) \ void ff_hevc_put_uni_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const int16_t *_src, int height, int denom, int _wx, int _ox); \ void ff_hevc_put_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const int16_t *_src, const int16_t *_src2, int height, int denom, int _wx0, int _wx1, int _ox0, int _ox1) @@ -83,125 +65,101 @@ void ff_hevc_put_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, /////////////////////////////////////////////////////////////////////////////// -// QPEL_PIXELS EPEL_PIXELS +// EPEL_PIXELS /////////////////////////////////////////////////////////////////////////////// -EPEL_PROTOTYPES(pel_pixels , 8, sse4); -EPEL_PROTOTYPES(pel_pixels , 10, sse4); -EPEL_PROTOTYPES(pel_pixels , 12, sse4); -void ff_hevc_put_bi_pel_pixels16_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels24_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels32_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels48_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels64_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); - -void ff_hevc_put_bi_pel_pixels16_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels24_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels32_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels48_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels64_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); +BI_PEL_PROTOTYPE(pel_pixels, 4, 8, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 4, 10, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 4, 12, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 6, 8, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 6, 10, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 6, 12, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 8, 8, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 8, 10, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 8, 12, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 12, 8, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 16, 8, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 16, 10, avx2); +BI_PEL_PROTOTYPE(pel_pixels, 32, 8, avx2); /////////////////////////////////////////////////////////////////////////////// // EPEL /////////////////////////////////////////////////////////////////////////////// -EPEL_PROTOTYPES(epel_h , 8, sse4); -EPEL_PROTOTYPES(epel_h , 10, sse4); -EPEL_PROTOTYPES(epel_h , 12, sse4); -EPEL_PROTOTYPES(epel_v , 8, sse4); -EPEL_PROTOTYPES(epel_v , 10, sse4); -EPEL_PROTOTYPES(epel_v , 12, sse4); +BI_PEL_PROTOTYPE(epel_h, 4, 8, sse4); +BI_PEL_PROTOTYPE(epel_h, 4, 10, sse4); +BI_PEL_PROTOTYPE(epel_h, 4, 12, sse4); +BI_PEL_PROTOTYPE(epel_h, 6, 8, sse4); +BI_PEL_PROTOTYPE(epel_h, 6, 10, sse4); +BI_PEL_PROTOTYPE(epel_h, 6, 12, sse4); +BI_PEL_PROTOTYPE(epel_h, 8, 8, sse4); +BI_PEL_PROTOTYPE(epel_h, 8, 10, sse4); +BI_PEL_PROTOTYPE(epel_h, 8, 12, sse4); +BI_PEL_PROTOTYPE(epel_h, 12, 8, sse4); +BI_PEL_PROTOTYPE(epel_h, 16, 8, sse4); +BI_PEL_PROTOTYPE(epel_h, 16, 10, avx2); +BI_PEL_PROTOTYPE(epel_h, 32, 8, avx2); -EPEL_PROTOTYPES(epel_hv , 8, sse4); -EPEL_PROTOTYPES(epel_hv , 10, sse4); -EPEL_PROTOTYPES(epel_hv , 12, sse4); +BI_PEL_PROTOTYPE(epel_hv, 4, 8, sse4); +BI_PEL_PROTOTYPE(epel_hv, 4, 10, sse4); +BI_PEL_PROTOTYPE(epel_hv, 4, 12, sse4); +BI_PEL_PROTOTYPE(epel_hv, 6, 8, sse4); +BI_PEL_PROTOTYPE(epel_hv, 6, 10, sse4); +BI_PEL_PROTOTYPE(epel_hv, 6, 12, sse4); +BI_PEL_PROTOTYPE(epel_hv, 8, 8, sse4); +BI_PEL_PROTOTYPE(epel_hv, 8, 10, sse4); +BI_PEL_PROTOTYPE(epel_hv, 8, 12, sse4); +BI_PEL_PROTOTYPE(epel_hv, 16, 8, sse4); +BI_PEL_PROTOTYPE(epel_hv, 16, 10, avx2); +BI_PEL_PROTOTYPE(epel_hv, 32, 8, avx2); -PEL_PROTOTYPE(epel_h16, 8, avx2); -PEL_PROTOTYPE(epel_h24, 8, avx2); -PEL_PROTOTYPE(epel_h32, 8, avx2); -PEL_PROTOTYPE(epel_h48, 8, avx2); -PEL_PROTOTYPE(epel_h64, 8, avx2); - -PEL_PROTOTYPE(epel_h16,10, avx2); -PEL_PROTOTYPE(epel_h24,10, avx2); -PEL_PROTOTYPE(epel_h32,10, avx2); -PEL_PROTOTYPE(epel_h48,10, avx2); -PEL_PROTOTYPE(epel_h64,10, avx2); - -PEL_PROTOTYPE(epel_v16, 8, avx2); -PEL_PROTOTYPE(epel_v24, 8, avx2); -PEL_PROTOTYPE(epel_v32, 8, avx2); -PEL_PROTOTYPE(epel_v48, 8, avx2); -PEL_PROTOTYPE(epel_v64, 8, avx2); - -PEL_PROTOTYPE(epel_v16,10, avx2); -PEL_PROTOTYPE(epel_v24,10, avx2); -PEL_PROTOTYPE(epel_v32,10, avx2); -PEL_PROTOTYPE(epel_v48,10, avx2); -PEL_PROTOTYPE(epel_v64,10, avx2); - -PEL_PROTOTYPE(epel_hv16, 8, avx2); -PEL_PROTOTYPE(epel_hv24, 8, avx2); -PEL_PROTOTYPE(epel_hv32, 8, avx2); -PEL_PROTOTYPE(epel_hv48, 8, avx2); -PEL_PROTOTYPE(epel_hv64, 8, avx2); - -PEL_PROTOTYPE(epel_hv16,10, avx2); -PEL_PROTOTYPE(epel_hv24,10, avx2); -PEL_PROTOTYPE(epel_hv32,10, avx2); -PEL_PROTOTYPE(epel_hv48,10, avx2); -PEL_PROTOTYPE(epel_hv64,10, avx2); +BI_PEL_PROTOTYPE(epel_v, 4, 8, sse4); +BI_PEL_PROTOTYPE(epel_v, 4, 10, sse4); +BI_PEL_PROTOTYPE(epel_v, 4, 12, sse4); +BI_PEL_PROTOTYPE(epel_v, 6, 8, sse4); +BI_PEL_PROTOTYPE(epel_v, 6, 10, sse4); +BI_PEL_PROTOTYPE(epel_v, 6, 12, sse4); +BI_PEL_PROTOTYPE(epel_v, 8, 8, sse4); +BI_PEL_PROTOTYPE(epel_v, 8, 10, sse4); +BI_PEL_PROTOTYPE(epel_v, 8, 12, sse4); +BI_PEL_PROTOTYPE(epel_v, 12, 8, sse4); +BI_PEL_PROTOTYPE(epel_v, 16, 8, sse4); +BI_PEL_PROTOTYPE(epel_v, 16, 10, avx2); +BI_PEL_PROTOTYPE(epel_v, 32, 8, avx2); /////////////////////////////////////////////////////////////////////////////// // QPEL /////////////////////////////////////////////////////////////////////////////// -QPEL_PROTOTYPES(qpel_h , 8, sse4); -QPEL_PROTOTYPES(qpel_h , 10, sse4); -QPEL_PROTOTYPES(qpel_h , 12, sse4); -QPEL_PROTOTYPES(qpel_v, 8, sse4); -QPEL_PROTOTYPES(qpel_v, 10, sse4); -QPEL_PROTOTYPES(qpel_v, 12, sse4); +BI_PEL_PROTOTYPE(qpel_h, 4, 8, sse4); +BI_PEL_PROTOTYPE(qpel_h, 4, 10, sse4); +BI_PEL_PROTOTYPE(qpel_h, 4, 12, sse4); +BI_PEL_PROTOTYPE(qpel_h, 8, 8, sse4); +BI_PEL_PROTOTYPE(qpel_h, 8, 10, sse4); +BI_PEL_PROTOTYPE(qpel_h, 8, 12, sse4); +BI_PEL_PROTOTYPE(qpel_h, 12, 8, sse4); +BI_PEL_PROTOTYPE(qpel_h, 16, 8, sse4); +BI_PEL_PROTOTYPE(qpel_h, 16, 10, avx2); +BI_PEL_PROTOTYPE(qpel_h, 32, 8, avx2); -QPEL_PROTOTYPES(qpel_hv, 8, sse4); -QPEL_PROTOTYPES(qpel_hv, 10, sse4); -QPEL_PROTOTYPES(qpel_hv, 12, sse4); +BI_PEL_PROTOTYPE(qpel_hv, 4, 8, sse4); +BI_PEL_PROTOTYPE(qpel_hv, 4, 10, sse4); +BI_PEL_PROTOTYPE(qpel_hv, 4, 12, sse4); +BI_PEL_PROTOTYPE(qpel_hv, 8, 8, sse4); +BI_PEL_PROTOTYPE(qpel_hv, 8, 10, sse4); +BI_PEL_PROTOTYPE(qpel_hv, 8, 12, sse4); +BI_PEL_PROTOTYPE(qpel_hv, 16, 10, avx2); -PEL_PROTOTYPE(qpel_h16, 8, avx2); -PEL_PROTOTYPE(qpel_h24, 8, avx2); -PEL_PROTOTYPE(qpel_h32, 8, avx2); -PEL_PROTOTYPE(qpel_h48, 8, avx2); -PEL_PROTOTYPE(qpel_h64, 8, avx2); - -PEL_PROTOTYPE(qpel_h16,10, avx2); -PEL_PROTOTYPE(qpel_h24,10, avx2); -PEL_PROTOTYPE(qpel_h32,10, avx2); -PEL_PROTOTYPE(qpel_h48,10, avx2); -PEL_PROTOTYPE(qpel_h64,10, avx2); - -PEL_PROTOTYPE(qpel_v16, 8, avx2); -PEL_PROTOTYPE(qpel_v24, 8, avx2); -PEL_PROTOTYPE(qpel_v32, 8, avx2); -PEL_PROTOTYPE(qpel_v48, 8, avx2); -PEL_PROTOTYPE(qpel_v64, 8, avx2); - -PEL_PROTOTYPE(qpel_v16,10, avx2); -PEL_PROTOTYPE(qpel_v24,10, avx2); -PEL_PROTOTYPE(qpel_v32,10, avx2); -PEL_PROTOTYPE(qpel_v48,10, avx2); -PEL_PROTOTYPE(qpel_v64,10, avx2); - -PEL_PROTOTYPE(qpel_hv16, 8, avx2); -PEL_PROTOTYPE(qpel_hv24, 8, avx2); -PEL_PROTOTYPE(qpel_hv32, 8, avx2); -PEL_PROTOTYPE(qpel_hv48, 8, avx2); -PEL_PROTOTYPE(qpel_hv64, 8, avx2); - -PEL_PROTOTYPE(qpel_hv16,10, avx2); -PEL_PROTOTYPE(qpel_hv24,10, avx2); -PEL_PROTOTYPE(qpel_hv32,10, avx2); -PEL_PROTOTYPE(qpel_hv48,10, avx2); -PEL_PROTOTYPE(qpel_hv64,10, avx2); +BI_PEL_PROTOTYPE(qpel_v, 4, 8, sse4); +BI_PEL_PROTOTYPE(qpel_v, 4, 10, sse4); +BI_PEL_PROTOTYPE(qpel_v, 4, 12, sse4); +BI_PEL_PROTOTYPE(qpel_v, 8, 8, sse4); +BI_PEL_PROTOTYPE(qpel_v, 8, 10, sse4); +BI_PEL_PROTOTYPE(qpel_v, 8, 12, sse4); +BI_PEL_PROTOTYPE(qpel_v, 12, 8, sse4); +BI_PEL_PROTOTYPE(qpel_v, 16, 8, sse4); +BI_PEL_PROTOTYPE(qpel_v, 16, 10, avx2); +BI_PEL_PROTOTYPE(qpel_v, 32, 8, avx2); WEIGHTING_PROTOTYPES(8, sse4); WEIGHTING_PROTOTYPES(10, sse4);