1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-19 05:49:09 +02:00

avcodec/h264: add avx 8-bit chroma v deblock/loop filter

~1.24x faster (101 vs. 81 cycles) compared with mmxext function
This commit is contained in:
James Darnley 2017-02-15 14:36:20 +01:00
parent 1e298e7724
commit 5c56758843
2 changed files with 40 additions and 0 deletions

View File

@ -1059,6 +1059,44 @@ ff_chroma_intra_body_mmxext:
paddb m2, m6
ret
%macro CHROMA_INTER_BODY_XMM 1
LOAD_MASK alpha_d, beta_d
movd m6, [tc0_q]
%rep %1
punpcklbw m6, m6
%endrep
pand m7, m6
DEBLOCK_P0_Q0
%endmacro
%macro CHROMA_V_START_XMM 1
movsxdifnidn stride_q, stride_d
dec alpha_d
dec beta_d
mov %1, pix_q
sub %1, stride_q
sub %1, stride_q
%endmacro
%macro DEBLOCK_CHROMA_XMM 1
INIT_XMM %1
cglobal deblock_v_chroma_8, 5, 6, 8, pix_, stride_, alpha_, beta_, tc0_
CHROMA_V_START_XMM r5
movq m0, [r5]
movq m1, [r5 + stride_q]
movq m2, [pix_q]
movq m3, [pix_q + stride_q]
CHROMA_INTER_BODY_XMM 1
movq [r5 + stride_q], m1
movq [pix_q], m2
RET
%endmacro ; DEBLOCK_CHROMA_XMM
DEBLOCK_CHROMA_XMM avx
;-----------------------------------------------------------------------------
; void ff_h264_loop_filter_strength(int16_t bs[2][4][4], uint8_t nnz[40],
; int8_t ref[2][40], int16_t mv[2][40][2],

View File

@ -317,6 +317,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
#if ARCH_X86_64
c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_avx;
#endif
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_avx;
}
} else if (bit_depth == 10) {
if (EXTERNAL_MMXEXT(cpu_flags)) {