mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
Merge commit '1a880b2fb8456ce68eefe5902bac95fea1e6a72d'
* commit '1a880b2fb8456ce68eefe5902bac95fea1e6a72d':
hevc: SSE2 and SSSE3 loop filters
Conflicts:
libavcodec/hevcdsp.c
libavcodec/hevcdsp.h
libavcodec/x86/Makefile
libavcodec/x86/hevc_deblock.asm
libavcodec/x86/hevcdsp_init.c
See: de7b89fd43
and several others
Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
706f81a2c2
@ -247,5 +247,7 @@ int i = 0;
|
||||
HEVC_DSP(8);
|
||||
break;
|
||||
}
|
||||
if (ARCH_X86) ff_hevcdsp_init_x86(hevcdsp, bit_depth);
|
||||
|
||||
if (ARCH_X86)
|
||||
ff_hevc_dsp_init_x86(hevcdsp, bit_depth);
|
||||
}
|
||||
|
@ -123,6 +123,6 @@ void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
|
||||
extern const int8_t ff_hevc_epel_filters[7][4];
|
||||
extern const int8_t ff_hevc_qpel_filters[3][16];
|
||||
|
||||
void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth);
|
||||
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
|
||||
|
||||
#endif /* AVCODEC_HEVCDSP_H */
|
||||
|
@ -660,7 +660,8 @@ ALIGN 16
|
||||
|
||||
INIT_XMM sse2
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_hevc_v_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q)
|
||||
; void ff_hevc_v_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc,
|
||||
; uint8_t *_no_p, uint8_t *_no_q);
|
||||
;-----------------------------------------------------------------------------
|
||||
cglobal hevc_v_loop_filter_chroma_8, 3, 5, 7, pix, stride, tc, pix0, r3stride
|
||||
sub pixq, 2
|
||||
@ -693,7 +694,8 @@ cglobal hevc_v_loop_filter_chroma_12, 3, 5, 7, pix, stride, tc, pix0, r3stride
|
||||
RET
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_hevc_h_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q
|
||||
; void ff_hevc_h_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc,
|
||||
; uint8_t *_no_p, uint8_t *_no_q);
|
||||
;-----------------------------------------------------------------------------
|
||||
cglobal hevc_h_loop_filter_chroma_8, 3, 4, 7, pix, stride, tc, pix0
|
||||
mov pix0q, pixq
|
||||
@ -749,7 +751,8 @@ cglobal hevc_h_loop_filter_chroma_12, 3, 4, 7, pix, stride, tc, pix0
|
||||
%if ARCH_X86_64
|
||||
%macro LOOP_FILTER_LUMA 0
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_hevc_v_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int *_beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
|
||||
; void ff_hevc_v_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int beta,
|
||||
; int *_tc, uint8_t *_no_p, uint8_t *_no_q);
|
||||
;-----------------------------------------------------------------------------
|
||||
cglobal hevc_v_loop_filter_luma_8, 4, 15, 16, pix, stride, beta, tc
|
||||
sub r0, 4
|
||||
@ -788,7 +791,8 @@ cglobal hevc_v_loop_filter_luma_12, 4, 15, 16, pix, stride, beta, tc
|
||||
RET
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_hevc_h_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int *_beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
|
||||
; void ff_hevc_h_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int beta,
|
||||
; int *_tc, uint8_t *_no_p, uint8_t *_no_q);
|
||||
;-----------------------------------------------------------------------------
|
||||
cglobal hevc_h_loop_filter_luma_8, 4, 15, 16, pix, stride, beta, tc, count, pix0, src3stride
|
||||
lea src3strideq, [3 * strideq]
|
||||
|
@ -28,21 +28,19 @@
|
||||
#include "libavcodec/hevcdsp.h"
|
||||
#include "libavcodec/x86/hevcdsp.h"
|
||||
|
||||
|
||||
#define LFC_FUNC(DIR, DEPTH, OPT) \
|
||||
void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
|
||||
void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int *tc, uint8_t *no_p, uint8_t *no_q);
|
||||
|
||||
#define LFL_FUNC(DIR, DEPTH, OPT) \
|
||||
void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ptrdiff_t stride, int _beta, int *_tc, \
|
||||
uint8_t *_no_p, uint8_t *_no_q);
|
||||
void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, int *tc, uint8_t *no_p, uint8_t *no_q);
|
||||
|
||||
#define LFC_FUNCS(type, depth, opt) \
|
||||
LFC_FUNC(h, depth, opt) \
|
||||
LFC_FUNC(v, depth, opt)
|
||||
LFC_FUNC(h, depth, opt) \
|
||||
LFC_FUNC(v, depth, opt)
|
||||
|
||||
#define LFL_FUNCS(type, depth, opt) \
|
||||
LFL_FUNC(h, depth, opt) \
|
||||
LFL_FUNC(v, depth, opt)
|
||||
LFL_FUNC(h, depth, opt) \
|
||||
LFL_FUNC(v, depth, opt)
|
||||
|
||||
LFC_FUNCS(uint8_t, 8, sse2)
|
||||
LFC_FUNCS(uint8_t, 10, sse2)
|
||||
@ -456,16 +454,16 @@ mc_bi_w_funcs(qpel_hv, 12, sse4);
|
||||
PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
|
||||
|
||||
|
||||
void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
||||
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
||||
{
|
||||
int mm_flags = av_get_cpu_flags();
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (bit_depth == 8) {
|
||||
if (EXTERNAL_MMXEXT(mm_flags)) {
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
c->idct_dc[0] = ff_hevc_idct4x4_dc_8_mmxext;
|
||||
c->idct_dc[1] = ff_hevc_idct8x8_dc_8_mmxext;
|
||||
}
|
||||
if (EXTERNAL_SSE2(mm_flags)) {
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
|
||||
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
|
||||
if (ARCH_X86_64) {
|
||||
@ -477,11 +475,11 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
||||
c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2;
|
||||
c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2;
|
||||
}
|
||||
if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
|
||||
if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
|
||||
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
|
||||
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
|
||||
}
|
||||
if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {
|
||||
if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
|
||||
|
||||
EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4);
|
||||
EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4);
|
||||
@ -493,16 +491,16 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
||||
QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4);
|
||||
QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4);
|
||||
}
|
||||
if (EXTERNAL_AVX2(mm_flags)) {
|
||||
if (EXTERNAL_AVX2(cpu_flags)) {
|
||||
c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2;
|
||||
c->idct_dc[3] = ff_hevc_idct32x32_dc_8_avx2;
|
||||
}
|
||||
} else if (bit_depth == 10) {
|
||||
if (EXTERNAL_MMXEXT(mm_flags)) {
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
c->idct_dc[0] = ff_hevc_idct4x4_dc_10_mmxext;
|
||||
c->idct_dc[1] = ff_hevc_idct8x8_dc_10_mmxext;
|
||||
}
|
||||
if (EXTERNAL_SSE2(mm_flags)) {
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
|
||||
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
|
||||
if (ARCH_X86_64) {
|
||||
@ -514,11 +512,11 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
||||
c->idct_dc[2] = ff_hevc_idct16x16_dc_10_sse2;
|
||||
c->idct_dc[3] = ff_hevc_idct32x32_dc_10_sse2;
|
||||
}
|
||||
if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
|
||||
if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
|
||||
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
|
||||
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
|
||||
}
|
||||
if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {
|
||||
if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
|
||||
EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
|
||||
EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4);
|
||||
EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4);
|
||||
@ -529,13 +527,13 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
||||
QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4);
|
||||
QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4);
|
||||
}
|
||||
if (EXTERNAL_AVX2(mm_flags)) {
|
||||
if (EXTERNAL_AVX2(cpu_flags)) {
|
||||
c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2;
|
||||
c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2;
|
||||
|
||||
}
|
||||
} else if (bit_depth == 12) {
|
||||
if (EXTERNAL_SSE2(mm_flags)) {
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
|
||||
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
|
||||
if (ARCH_X86_64) {
|
||||
@ -543,11 +541,11 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
|
||||
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
|
||||
}
|
||||
}
|
||||
if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
|
||||
if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
|
||||
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
|
||||
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
|
||||
}
|
||||
if (EXTERNAL_SSE4(mm_flags) && ARCH_X86_64) {
|
||||
if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
|
||||
EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
|
||||
EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4);
|
||||
EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4);
|
||||
|
Loading…
Reference in New Issue
Block a user