You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-15 14:13:16 +02:00
x86/hevc: add 12bits support for MC
cherry picked from commit 3fcb7a4595a6f40100a22110a5805e3b7510c0fd Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
committed by
Michael Niedermayer
parent
7df98d8c4d
commit
bd0f2d316f
@@ -21,11 +21,14 @@
|
|||||||
%include "libavutil/x86/x86util.asm"
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
SECTION_RODATA
|
SECTION_RODATA
|
||||||
pw_8: times 8 dw 512
|
pw_8: times 8 dw (1 << 9)
|
||||||
pw_10: times 8 dw 2048
|
pw_10: times 8 dw (1 << 11)
|
||||||
pw_bi_8: times 8 dw 256
|
pw_12: times 8 dw (1 << 13)
|
||||||
pw_bi_10: times 8 dw 1024
|
pw_bi_8: times 8 dw (1 << 8)
|
||||||
max_pixels_10: times 8 dw 1023
|
pw_bi_10: times 8 dw (1 << 10)
|
||||||
|
pw_bi_12: times 8 dw (1 << 12)
|
||||||
|
max_pixels_10: times 8 dw ((1 << 10)-1)
|
||||||
|
max_pixels_12: times 8 dw ((1 << 12)-1)
|
||||||
zero: times 4 dd 0
|
zero: times 4 dd 0
|
||||||
one_per_32: times 4 dd 1
|
one_per_32: times 4 dd 1
|
||||||
|
|
||||||
@@ -51,6 +54,7 @@ hevc_epel_filters_%4_%1 times %2 d%3 -2, 58
|
|||||||
|
|
||||||
EPEL_TABLE 8, 8, b, sse4
|
EPEL_TABLE 8, 8, b, sse4
|
||||||
EPEL_TABLE 10, 4, w, sse4
|
EPEL_TABLE 10, 4, w, sse4
|
||||||
|
EPEL_TABLE 12, 4, w, sse4
|
||||||
|
|
||||||
%macro QPEL_TABLE 4
|
%macro QPEL_TABLE 4
|
||||||
hevc_qpel_filters_%4_%1 times %2 d%3 -1, 4
|
hevc_qpel_filters_%4_%1 times %2 d%3 -1, 4
|
||||||
@@ -69,6 +73,7 @@ hevc_qpel_filters_%4_%1 times %2 d%3 -1, 4
|
|||||||
|
|
||||||
QPEL_TABLE 8, 8, b, sse4
|
QPEL_TABLE 8, 8, b, sse4
|
||||||
QPEL_TABLE 10, 4, w, sse4
|
QPEL_TABLE 10, 4, w, sse4
|
||||||
|
QPEL_TABLE 12, 4, w, sse4
|
||||||
|
|
||||||
%define hevc_qpel_filters_sse4_14 hevc_qpel_filters_sse4_10
|
%define hevc_qpel_filters_sse4_14 hevc_qpel_filters_sse4_10
|
||||||
|
|
||||||
@@ -295,6 +300,29 @@ QPEL_TABLE 10, 4, w, sse4
|
|||||||
%endif
|
%endif
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
%macro PEL_12STORE2 3
|
||||||
|
movd [%1], %2
|
||||||
|
%endmacro
|
||||||
|
%macro PEL_12STORE4 3
|
||||||
|
movq [%1], %2
|
||||||
|
%endmacro
|
||||||
|
%macro PEL_12STORE6 3
|
||||||
|
movq [%1], %2
|
||||||
|
psrldq %2, 8
|
||||||
|
movd [%1+8], %2
|
||||||
|
%endmacro
|
||||||
|
%macro PEL_12STORE8 3
|
||||||
|
movdqa [%1], %2
|
||||||
|
%endmacro
|
||||||
|
%macro PEL_12STORE12 3
|
||||||
|
movdqa [%1], %2
|
||||||
|
movq [%1+16], %3
|
||||||
|
%endmacro
|
||||||
|
%macro PEL_12STORE16 3
|
||||||
|
PEL_12STORE8 %1, %2, %3
|
||||||
|
movdqa [%1+16], %3
|
||||||
|
%endmacro
|
||||||
|
|
||||||
%macro PEL_10STORE2 3
|
%macro PEL_10STORE2 3
|
||||||
movd [%1], %2
|
movd [%1], %2
|
||||||
%endmacro
|
%endmacro
|
||||||
@@ -1229,6 +1257,11 @@ WEIGHTING_FUNCS 4, 10
|
|||||||
WEIGHTING_FUNCS 6, 10
|
WEIGHTING_FUNCS 6, 10
|
||||||
WEIGHTING_FUNCS 8, 10
|
WEIGHTING_FUNCS 8, 10
|
||||||
|
|
||||||
|
WEIGHTING_FUNCS 2, 12
|
||||||
|
WEIGHTING_FUNCS 4, 12
|
||||||
|
WEIGHTING_FUNCS 6, 12
|
||||||
|
WEIGHTING_FUNCS 8, 12
|
||||||
|
|
||||||
HEVC_PUT_HEVC_PEL_PIXELS 2, 8
|
HEVC_PUT_HEVC_PEL_PIXELS 2, 8
|
||||||
HEVC_PUT_HEVC_PEL_PIXELS 4, 8
|
HEVC_PUT_HEVC_PEL_PIXELS 4, 8
|
||||||
HEVC_PUT_HEVC_PEL_PIXELS 6, 8
|
HEVC_PUT_HEVC_PEL_PIXELS 6, 8
|
||||||
@@ -1241,6 +1274,10 @@ HEVC_PUT_HEVC_PEL_PIXELS 4, 10
|
|||||||
HEVC_PUT_HEVC_PEL_PIXELS 6, 10
|
HEVC_PUT_HEVC_PEL_PIXELS 6, 10
|
||||||
HEVC_PUT_HEVC_PEL_PIXELS 8, 10
|
HEVC_PUT_HEVC_PEL_PIXELS 8, 10
|
||||||
|
|
||||||
|
HEVC_PUT_HEVC_PEL_PIXELS 2, 12
|
||||||
|
HEVC_PUT_HEVC_PEL_PIXELS 4, 12
|
||||||
|
HEVC_PUT_HEVC_PEL_PIXELS 6, 12
|
||||||
|
HEVC_PUT_HEVC_PEL_PIXELS 8, 12
|
||||||
|
|
||||||
HEVC_PUT_HEVC_EPEL 2, 8
|
HEVC_PUT_HEVC_EPEL 2, 8
|
||||||
HEVC_PUT_HEVC_EPEL 4, 8
|
HEVC_PUT_HEVC_EPEL 4, 8
|
||||||
@@ -1255,6 +1292,10 @@ HEVC_PUT_HEVC_EPEL 4, 10
|
|||||||
HEVC_PUT_HEVC_EPEL 6, 10
|
HEVC_PUT_HEVC_EPEL 6, 10
|
||||||
HEVC_PUT_HEVC_EPEL 8, 10
|
HEVC_PUT_HEVC_EPEL 8, 10
|
||||||
|
|
||||||
|
HEVC_PUT_HEVC_EPEL 2, 12
|
||||||
|
HEVC_PUT_HEVC_EPEL 4, 12
|
||||||
|
HEVC_PUT_HEVC_EPEL 6, 12
|
||||||
|
HEVC_PUT_HEVC_EPEL 8, 12
|
||||||
|
|
||||||
HEVC_PUT_HEVC_EPEL_HV 2, 8
|
HEVC_PUT_HEVC_EPEL_HV 2, 8
|
||||||
HEVC_PUT_HEVC_EPEL_HV 4, 8
|
HEVC_PUT_HEVC_EPEL_HV 4, 8
|
||||||
@@ -1266,6 +1307,10 @@ HEVC_PUT_HEVC_EPEL_HV 4, 10
|
|||||||
HEVC_PUT_HEVC_EPEL_HV 6, 10
|
HEVC_PUT_HEVC_EPEL_HV 6, 10
|
||||||
HEVC_PUT_HEVC_EPEL_HV 8, 10
|
HEVC_PUT_HEVC_EPEL_HV 8, 10
|
||||||
|
|
||||||
|
HEVC_PUT_HEVC_EPEL_HV 2, 12
|
||||||
|
HEVC_PUT_HEVC_EPEL_HV 4, 12
|
||||||
|
HEVC_PUT_HEVC_EPEL_HV 6, 12
|
||||||
|
HEVC_PUT_HEVC_EPEL_HV 8, 12
|
||||||
|
|
||||||
HEVC_PUT_HEVC_QPEL 4, 8
|
HEVC_PUT_HEVC_QPEL 4, 8
|
||||||
HEVC_PUT_HEVC_QPEL 8, 8
|
HEVC_PUT_HEVC_QPEL 8, 8
|
||||||
@@ -1275,6 +1320,9 @@ HEVC_PUT_HEVC_QPEL 16, 8
|
|||||||
HEVC_PUT_HEVC_QPEL 4, 10
|
HEVC_PUT_HEVC_QPEL 4, 10
|
||||||
HEVC_PUT_HEVC_QPEL 8, 10
|
HEVC_PUT_HEVC_QPEL 8, 10
|
||||||
|
|
||||||
|
HEVC_PUT_HEVC_QPEL 4, 12
|
||||||
|
HEVC_PUT_HEVC_QPEL 8, 12
|
||||||
|
|
||||||
HEVC_PUT_HEVC_QPEL_HV 2, 8
|
HEVC_PUT_HEVC_QPEL_HV 2, 8
|
||||||
HEVC_PUT_HEVC_QPEL_HV 4, 8
|
HEVC_PUT_HEVC_QPEL_HV 4, 8
|
||||||
HEVC_PUT_HEVC_QPEL_HV 6, 8
|
HEVC_PUT_HEVC_QPEL_HV 6, 8
|
||||||
@@ -1285,4 +1333,9 @@ HEVC_PUT_HEVC_QPEL_HV 4, 10
|
|||||||
HEVC_PUT_HEVC_QPEL_HV 6, 10
|
HEVC_PUT_HEVC_QPEL_HV 6, 10
|
||||||
HEVC_PUT_HEVC_QPEL_HV 8, 10
|
HEVC_PUT_HEVC_QPEL_HV 8, 10
|
||||||
|
|
||||||
|
HEVC_PUT_HEVC_QPEL_HV 2, 12
|
||||||
|
HEVC_PUT_HEVC_QPEL_HV 4, 12
|
||||||
|
HEVC_PUT_HEVC_QPEL_HV 6, 12
|
||||||
|
HEVC_PUT_HEVC_QPEL_HV 8, 12
|
||||||
|
|
||||||
%endif ; ARCH_X86_64
|
%endif ; ARCH_X86_64
|
||||||
|
@@ -95,33 +95,41 @@ void ff_hevc_put_hevc_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dstst
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
EPEL_PROTOTYPES(pel_pixels , 8, sse4);
|
EPEL_PROTOTYPES(pel_pixels , 8, sse4);
|
||||||
EPEL_PROTOTYPES(pel_pixels , 10, sse4);
|
EPEL_PROTOTYPES(pel_pixels , 10, sse4);
|
||||||
|
EPEL_PROTOTYPES(pel_pixels , 12, sse4);
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// EPEL
|
// EPEL
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
EPEL_PROTOTYPES(epel_h , 8, sse4);
|
EPEL_PROTOTYPES(epel_h , 8, sse4);
|
||||||
EPEL_PROTOTYPES(epel_h , 10, sse4);
|
EPEL_PROTOTYPES(epel_h , 10, sse4);
|
||||||
|
EPEL_PROTOTYPES(epel_h , 12, sse4);
|
||||||
|
|
||||||
EPEL_PROTOTYPES(epel_v , 8, sse4);
|
EPEL_PROTOTYPES(epel_v , 8, sse4);
|
||||||
EPEL_PROTOTYPES(epel_v , 10, sse4);
|
EPEL_PROTOTYPES(epel_v , 10, sse4);
|
||||||
|
EPEL_PROTOTYPES(epel_v , 12, sse4);
|
||||||
|
|
||||||
EPEL_PROTOTYPES(epel_hv , 8, sse4);
|
EPEL_PROTOTYPES(epel_hv , 8, sse4);
|
||||||
EPEL_PROTOTYPES(epel_hv , 10, sse4);
|
EPEL_PROTOTYPES(epel_hv , 10, sse4);
|
||||||
|
EPEL_PROTOTYPES(epel_hv , 12, sse4);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// QPEL
|
// QPEL
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
QPEL_PROTOTYPES(qpel_h , 8, sse4);
|
QPEL_PROTOTYPES(qpel_h , 8, sse4);
|
||||||
QPEL_PROTOTYPES(qpel_h , 10, sse4);
|
QPEL_PROTOTYPES(qpel_h , 10, sse4);
|
||||||
|
QPEL_PROTOTYPES(qpel_h , 12, sse4);
|
||||||
|
|
||||||
QPEL_PROTOTYPES(qpel_v, 8, sse4);
|
QPEL_PROTOTYPES(qpel_v, 8, sse4);
|
||||||
QPEL_PROTOTYPES(qpel_v, 10, sse4);
|
QPEL_PROTOTYPES(qpel_v, 10, sse4);
|
||||||
|
QPEL_PROTOTYPES(qpel_v, 12, sse4);
|
||||||
|
|
||||||
QPEL_PROTOTYPES(qpel_hv, 8, sse4);
|
QPEL_PROTOTYPES(qpel_hv, 8, sse4);
|
||||||
QPEL_PROTOTYPES(qpel_hv, 10, sse4);
|
QPEL_PROTOTYPES(qpel_hv, 10, sse4);
|
||||||
|
QPEL_PROTOTYPES(qpel_hv, 12, sse4);
|
||||||
|
|
||||||
|
|
||||||
WEIGHTING_PROTOTYPES(8, sse4);
|
WEIGHTING_PROTOTYPES(8, sse4);
|
||||||
WEIGHTING_PROTOTYPES(10, sse4);
|
WEIGHTING_PROTOTYPES(10, sse4);
|
||||||
|
WEIGHTING_PROTOTYPES(12, sse4);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// IDCT
|
// IDCT
|
||||||
|
@@ -167,13 +167,18 @@ mc_rep_funcs(pel_pixels, 8, 16, 64, sse4);
|
|||||||
mc_rep_funcs(pel_pixels, 8, 16, 48, sse4);
|
mc_rep_funcs(pel_pixels, 8, 16, 48, sse4);
|
||||||
mc_rep_funcs(pel_pixels, 8, 16, 32, sse4);
|
mc_rep_funcs(pel_pixels, 8, 16, 32, sse4);
|
||||||
mc_rep_funcs(pel_pixels, 8, 8, 24, sse4);
|
mc_rep_funcs(pel_pixels, 8, 8, 24, sse4);
|
||||||
|
|
||||||
mc_rep_funcs(pel_pixels,10, 8, 64, sse4);
|
mc_rep_funcs(pel_pixels,10, 8, 64, sse4);
|
||||||
mc_rep_funcs(pel_pixels,10, 8, 48, sse4);
|
mc_rep_funcs(pel_pixels,10, 8, 48, sse4);
|
||||||
mc_rep_funcs(pel_pixels,10, 8, 32, sse4);
|
mc_rep_funcs(pel_pixels,10, 8, 32, sse4);
|
||||||
mc_rep_funcs(pel_pixels,10, 8, 24, sse4);
|
mc_rep_funcs(pel_pixels,10, 8, 24, sse4);
|
||||||
mc_rep_funcs(pel_pixels,10, 8, 16, sse4);
|
mc_rep_funcs(pel_pixels,10, 8, 16, sse4);
|
||||||
mc_rep_funcs(pel_pixels,10, 4, 12, sse4);
|
mc_rep_funcs(pel_pixels,10, 4, 12, sse4);
|
||||||
|
mc_rep_funcs(pel_pixels,12, 8, 64, sse4);
|
||||||
|
mc_rep_funcs(pel_pixels,12, 8, 48, sse4);
|
||||||
|
mc_rep_funcs(pel_pixels,12, 8, 32, sse4);
|
||||||
|
mc_rep_funcs(pel_pixels,12, 8, 24, sse4);
|
||||||
|
mc_rep_funcs(pel_pixels,12, 8, 16, sse4);
|
||||||
|
mc_rep_funcs(pel_pixels,12, 4, 12, sse4);
|
||||||
|
|
||||||
mc_rep_funcs(epel_h, 8, 16, 64, sse4);
|
mc_rep_funcs(epel_h, 8, 16, 64, sse4);
|
||||||
mc_rep_funcs(epel_h, 8, 16, 48, sse4);
|
mc_rep_funcs(epel_h, 8, 16, 48, sse4);
|
||||||
@@ -185,6 +190,12 @@ mc_rep_funcs(epel_h,10, 8, 32, sse4);
|
|||||||
mc_rep_funcs(epel_h,10, 8, 24, sse4);
|
mc_rep_funcs(epel_h,10, 8, 24, sse4);
|
||||||
mc_rep_funcs(epel_h,10, 8, 16, sse4);
|
mc_rep_funcs(epel_h,10, 8, 16, sse4);
|
||||||
mc_rep_funcs(epel_h,10, 4, 12, sse4);
|
mc_rep_funcs(epel_h,10, 4, 12, sse4);
|
||||||
|
mc_rep_funcs(epel_h,12, 8, 64, sse4);
|
||||||
|
mc_rep_funcs(epel_h,12, 8, 48, sse4);
|
||||||
|
mc_rep_funcs(epel_h,12, 8, 32, sse4);
|
||||||
|
mc_rep_funcs(epel_h,12, 8, 24, sse4);
|
||||||
|
mc_rep_funcs(epel_h,12, 8, 16, sse4);
|
||||||
|
mc_rep_funcs(epel_h,12, 4, 12, sse4);
|
||||||
mc_rep_funcs(epel_v, 8, 16, 64, sse4);
|
mc_rep_funcs(epel_v, 8, 16, 64, sse4);
|
||||||
mc_rep_funcs(epel_v, 8, 16, 48, sse4);
|
mc_rep_funcs(epel_v, 8, 16, 48, sse4);
|
||||||
mc_rep_funcs(epel_v, 8, 16, 32, sse4);
|
mc_rep_funcs(epel_v, 8, 16, 32, sse4);
|
||||||
@@ -195,6 +206,12 @@ mc_rep_funcs(epel_v,10, 8, 32, sse4);
|
|||||||
mc_rep_funcs(epel_v,10, 8, 24, sse4);
|
mc_rep_funcs(epel_v,10, 8, 24, sse4);
|
||||||
mc_rep_funcs(epel_v,10, 8, 16, sse4);
|
mc_rep_funcs(epel_v,10, 8, 16, sse4);
|
||||||
mc_rep_funcs(epel_v,10, 4, 12, sse4);
|
mc_rep_funcs(epel_v,10, 4, 12, sse4);
|
||||||
|
mc_rep_funcs(epel_v,12, 8, 64, sse4);
|
||||||
|
mc_rep_funcs(epel_v,12, 8, 48, sse4);
|
||||||
|
mc_rep_funcs(epel_v,12, 8, 32, sse4);
|
||||||
|
mc_rep_funcs(epel_v,12, 8, 24, sse4);
|
||||||
|
mc_rep_funcs(epel_v,12, 8, 16, sse4);
|
||||||
|
mc_rep_funcs(epel_v,12, 4, 12, sse4);
|
||||||
mc_rep_funcs(epel_hv, 8, 8, 64, sse4);
|
mc_rep_funcs(epel_hv, 8, 8, 64, sse4);
|
||||||
mc_rep_funcs(epel_hv, 8, 8, 48, sse4);
|
mc_rep_funcs(epel_hv, 8, 8, 48, sse4);
|
||||||
mc_rep_funcs(epel_hv, 8, 8, 32, sse4);
|
mc_rep_funcs(epel_hv, 8, 8, 32, sse4);
|
||||||
@@ -207,6 +224,12 @@ mc_rep_funcs(epel_hv,10, 8, 32, sse4);
|
|||||||
mc_rep_funcs(epel_hv,10, 8, 24, sse4);
|
mc_rep_funcs(epel_hv,10, 8, 24, sse4);
|
||||||
mc_rep_funcs(epel_hv,10, 8, 16, sse4);
|
mc_rep_funcs(epel_hv,10, 8, 16, sse4);
|
||||||
mc_rep_funcs(epel_hv,10, 4, 12, sse4);
|
mc_rep_funcs(epel_hv,10, 4, 12, sse4);
|
||||||
|
mc_rep_funcs(epel_hv,12, 8, 64, sse4);
|
||||||
|
mc_rep_funcs(epel_hv,12, 8, 48, sse4);
|
||||||
|
mc_rep_funcs(epel_hv,12, 8, 32, sse4);
|
||||||
|
mc_rep_funcs(epel_hv,12, 8, 24, sse4);
|
||||||
|
mc_rep_funcs(epel_hv,12, 8, 16, sse4);
|
||||||
|
mc_rep_funcs(epel_hv,12, 4, 12, sse4);
|
||||||
|
|
||||||
mc_rep_funcs(qpel_h, 8, 16, 64, sse4);
|
mc_rep_funcs(qpel_h, 8, 16, 64, sse4);
|
||||||
mc_rep_funcs(qpel_h, 8, 16, 48, sse4);
|
mc_rep_funcs(qpel_h, 8, 16, 48, sse4);
|
||||||
@@ -218,6 +241,12 @@ mc_rep_funcs(qpel_h,10, 8, 32, sse4);
|
|||||||
mc_rep_funcs(qpel_h,10, 8, 24, sse4);
|
mc_rep_funcs(qpel_h,10, 8, 24, sse4);
|
||||||
mc_rep_funcs(qpel_h,10, 8, 16, sse4);
|
mc_rep_funcs(qpel_h,10, 8, 16, sse4);
|
||||||
mc_rep_funcs(qpel_h,10, 4, 12, sse4);
|
mc_rep_funcs(qpel_h,10, 4, 12, sse4);
|
||||||
|
mc_rep_funcs(qpel_h,12, 8, 64, sse4);
|
||||||
|
mc_rep_funcs(qpel_h,12, 8, 48, sse4);
|
||||||
|
mc_rep_funcs(qpel_h,12, 8, 32, sse4);
|
||||||
|
mc_rep_funcs(qpel_h,12, 8, 24, sse4);
|
||||||
|
mc_rep_funcs(qpel_h,12, 8, 16, sse4);
|
||||||
|
mc_rep_funcs(qpel_h,12, 4, 12, sse4);
|
||||||
mc_rep_funcs(qpel_v, 8, 16, 64, sse4);
|
mc_rep_funcs(qpel_v, 8, 16, 64, sse4);
|
||||||
mc_rep_funcs(qpel_v, 8, 16, 48, sse4);
|
mc_rep_funcs(qpel_v, 8, 16, 48, sse4);
|
||||||
mc_rep_funcs(qpel_v, 8, 16, 32, sse4);
|
mc_rep_funcs(qpel_v, 8, 16, 32, sse4);
|
||||||
@@ -228,6 +257,12 @@ mc_rep_funcs(qpel_v,10, 8, 32, sse4);
|
|||||||
mc_rep_funcs(qpel_v,10, 8, 24, sse4);
|
mc_rep_funcs(qpel_v,10, 8, 24, sse4);
|
||||||
mc_rep_funcs(qpel_v,10, 8, 16, sse4);
|
mc_rep_funcs(qpel_v,10, 8, 16, sse4);
|
||||||
mc_rep_funcs(qpel_v,10, 4, 12, sse4);
|
mc_rep_funcs(qpel_v,10, 4, 12, sse4);
|
||||||
|
mc_rep_funcs(qpel_v,12, 8, 64, sse4);
|
||||||
|
mc_rep_funcs(qpel_v,12, 8, 48, sse4);
|
||||||
|
mc_rep_funcs(qpel_v,12, 8, 32, sse4);
|
||||||
|
mc_rep_funcs(qpel_v,12, 8, 24, sse4);
|
||||||
|
mc_rep_funcs(qpel_v,12, 8, 16, sse4);
|
||||||
|
mc_rep_funcs(qpel_v,12, 4, 12, sse4);
|
||||||
mc_rep_funcs(qpel_hv, 8, 8, 64, sse4);
|
mc_rep_funcs(qpel_hv, 8, 8, 64, sse4);
|
||||||
mc_rep_funcs(qpel_hv, 8, 8, 48, sse4);
|
mc_rep_funcs(qpel_hv, 8, 8, 48, sse4);
|
||||||
mc_rep_funcs(qpel_hv, 8, 8, 32, sse4);
|
mc_rep_funcs(qpel_hv, 8, 8, 32, sse4);
|
||||||
@@ -240,6 +275,12 @@ mc_rep_funcs(qpel_hv,10, 8, 32, sse4);
|
|||||||
mc_rep_funcs(qpel_hv,10, 8, 24, sse4);
|
mc_rep_funcs(qpel_hv,10, 8, 24, sse4);
|
||||||
mc_rep_funcs(qpel_hv,10, 8, 16, sse4);
|
mc_rep_funcs(qpel_hv,10, 8, 16, sse4);
|
||||||
mc_rep_funcs(qpel_hv,10, 4, 12, sse4);
|
mc_rep_funcs(qpel_hv,10, 4, 12, sse4);
|
||||||
|
mc_rep_funcs(qpel_hv,12, 8, 64, sse4);
|
||||||
|
mc_rep_funcs(qpel_hv,12, 8, 48, sse4);
|
||||||
|
mc_rep_funcs(qpel_hv,12, 8, 32, sse4);
|
||||||
|
mc_rep_funcs(qpel_hv,12, 8, 24, sse4);
|
||||||
|
mc_rep_funcs(qpel_hv,12, 8, 16, sse4);
|
||||||
|
mc_rep_funcs(qpel_hv,12, 4, 12, sse4);
|
||||||
|
|
||||||
#define mc_rep_uni_w(bitd, step, W, opt) \
|
#define mc_rep_uni_w(bitd, step, W, opt) \
|
||||||
void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride,\
|
void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride,\
|
||||||
@@ -270,6 +311,13 @@ mc_rep_uni_w(10, 8, 32, sse4);
|
|||||||
mc_rep_uni_w(10, 8, 48, sse4);
|
mc_rep_uni_w(10, 8, 48, sse4);
|
||||||
mc_rep_uni_w(10, 8, 64, sse4);
|
mc_rep_uni_w(10, 8, 64, sse4);
|
||||||
|
|
||||||
|
mc_rep_uni_w(12, 6, 12, sse4);
|
||||||
|
mc_rep_uni_w(12, 8, 16, sse4);
|
||||||
|
mc_rep_uni_w(12, 8, 24, sse4);
|
||||||
|
mc_rep_uni_w(12, 8, 32, sse4);
|
||||||
|
mc_rep_uni_w(12, 8, 48, sse4);
|
||||||
|
mc_rep_uni_w(12, 8, 64, sse4);
|
||||||
|
|
||||||
#define mc_rep_bi_w(bitd, step, W, opt) \
|
#define mc_rep_bi_w(bitd, step, W, opt) \
|
||||||
void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, \
|
void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, \
|
||||||
int16_t *_src2, ptrdiff_t _src2stride, int height, \
|
int16_t *_src2, ptrdiff_t _src2stride, int height, \
|
||||||
@@ -302,6 +350,13 @@ mc_rep_bi_w(10, 8, 32, sse4);
|
|||||||
mc_rep_bi_w(10, 8, 48, sse4);
|
mc_rep_bi_w(10, 8, 48, sse4);
|
||||||
mc_rep_bi_w(10, 8, 64, sse4);
|
mc_rep_bi_w(10, 8, 64, sse4);
|
||||||
|
|
||||||
|
mc_rep_bi_w(12, 6, 12, sse4);
|
||||||
|
mc_rep_bi_w(12, 8, 16, sse4);
|
||||||
|
mc_rep_bi_w(12, 8, 24, sse4);
|
||||||
|
mc_rep_bi_w(12, 8, 32, sse4);
|
||||||
|
mc_rep_bi_w(12, 8, 48, sse4);
|
||||||
|
mc_rep_bi_w(12, 8, 64, sse4);
|
||||||
|
|
||||||
#define mc_uni_w_func(name, bitd, W, opt) \
|
#define mc_uni_w_func(name, bitd, W, opt) \
|
||||||
void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
|
void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
|
||||||
uint8_t *_src, ptrdiff_t _srcstride, \
|
uint8_t *_src, ptrdiff_t _srcstride, \
|
||||||
@@ -348,6 +403,17 @@ mc_uni_w_funcs(qpel_h, 10, sse4);
|
|||||||
mc_uni_w_funcs(qpel_v, 10, sse4);
|
mc_uni_w_funcs(qpel_v, 10, sse4);
|
||||||
mc_uni_w_funcs(qpel_hv, 10, sse4);
|
mc_uni_w_funcs(qpel_hv, 10, sse4);
|
||||||
|
|
||||||
|
mc_uni_w_funcs(pel_pixels, 12, sse4);
|
||||||
|
mc_uni_w_func(pel_pixels, 12, 6, sse4);
|
||||||
|
mc_uni_w_funcs(epel_h, 12, sse4);
|
||||||
|
mc_uni_w_func(epel_h, 12, 6, sse4);
|
||||||
|
mc_uni_w_funcs(epel_v, 12, sse4);
|
||||||
|
mc_uni_w_func(epel_v, 12, 6, sse4);
|
||||||
|
mc_uni_w_funcs(epel_hv, 12, sse4);
|
||||||
|
mc_uni_w_func(epel_hv, 12, 6, sse4);
|
||||||
|
mc_uni_w_funcs(qpel_h, 12, sse4);
|
||||||
|
mc_uni_w_funcs(qpel_v, 12, sse4);
|
||||||
|
mc_uni_w_funcs(qpel_hv, 12, sse4);
|
||||||
|
|
||||||
#define mc_bi_w_func(name, bitd, W, opt) \
|
#define mc_bi_w_func(name, bitd, W, opt) \
|
||||||
void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
|
void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
|
||||||
@@ -397,6 +463,17 @@ mc_bi_w_funcs(qpel_h, 10, sse4);
|
|||||||
mc_bi_w_funcs(qpel_v, 10, sse4);
|
mc_bi_w_funcs(qpel_v, 10, sse4);
|
||||||
mc_bi_w_funcs(qpel_hv, 10, sse4);
|
mc_bi_w_funcs(qpel_hv, 10, sse4);
|
||||||
|
|
||||||
|
mc_bi_w_funcs(pel_pixels, 12, sse4);
|
||||||
|
mc_bi_w_func(pel_pixels, 12, 6, sse4);
|
||||||
|
mc_bi_w_funcs(epel_h, 12, sse4);
|
||||||
|
mc_bi_w_func(epel_h, 12, 6, sse4);
|
||||||
|
mc_bi_w_funcs(epel_v, 12, sse4);
|
||||||
|
mc_bi_w_func(epel_v, 12, 6, sse4);
|
||||||
|
mc_bi_w_funcs(epel_hv, 12, sse4);
|
||||||
|
mc_bi_w_func(epel_hv, 12, 6, sse4);
|
||||||
|
mc_bi_w_funcs(qpel_h, 12, sse4);
|
||||||
|
mc_bi_w_funcs(qpel_v, 12, sse4);
|
||||||
|
mc_bi_w_funcs(qpel_hv, 12, sse4);
|
||||||
#endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
|
#endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
|
||||||
|
|
||||||
|
|
||||||
@@ -515,5 +592,16 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
|||||||
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
|
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
|
||||||
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
|
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
|
||||||
}
|
}
|
||||||
|
if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {
|
||||||
|
EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
|
||||||
|
EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4);
|
||||||
|
EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4);
|
||||||
|
EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 12, sse4);
|
||||||
|
|
||||||
|
QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
|
||||||
|
QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4);
|
||||||
|
QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4);
|
||||||
|
QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user