mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-28 20:53:54 +02:00
avcodec/h264: add avx 8-bit chroma v deblock/loop filter
~1.24x faster (101 vs. 81 cycles) compared with mmxext function
This commit is contained in:
parent
1e298e7724
commit
5c56758843
@ -1059,6 +1059,44 @@ ff_chroma_intra_body_mmxext:
|
||||
paddb m2, m6
|
||||
ret
|
||||
|
||||
%macro CHROMA_INTER_BODY_XMM 1
|
||||
LOAD_MASK alpha_d, beta_d
|
||||
movd m6, [tc0_q]
|
||||
%rep %1
|
||||
punpcklbw m6, m6
|
||||
%endrep
|
||||
pand m7, m6
|
||||
DEBLOCK_P0_Q0
|
||||
%endmacro
|
||||
|
||||
%macro CHROMA_V_START_XMM 1
|
||||
movsxdifnidn stride_q, stride_d
|
||||
dec alpha_d
|
||||
dec beta_d
|
||||
mov %1, pix_q
|
||||
sub %1, stride_q
|
||||
sub %1, stride_q
|
||||
%endmacro
|
||||
|
||||
%macro DEBLOCK_CHROMA_XMM 1
|
||||
|
||||
INIT_XMM %1
|
||||
|
||||
cglobal deblock_v_chroma_8, 5, 6, 8, pix_, stride_, alpha_, beta_, tc0_
|
||||
CHROMA_V_START_XMM r5
|
||||
movq m0, [r5]
|
||||
movq m1, [r5 + stride_q]
|
||||
movq m2, [pix_q]
|
||||
movq m3, [pix_q + stride_q]
|
||||
CHROMA_INTER_BODY_XMM 1
|
||||
movq [r5 + stride_q], m1
|
||||
movq [pix_q], m2
|
||||
RET
|
||||
|
||||
%endmacro ; DEBLOCK_CHROMA_XMM
|
||||
|
||||
DEBLOCK_CHROMA_XMM avx
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_h264_loop_filter_strength(int16_t bs[2][4][4], uint8_t nnz[40],
|
||||
; int8_t ref[2][40], int16_t mv[2][40][2],
|
||||
|
@ -317,6 +317,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
||||
#if ARCH_X86_64
|
||||
c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_avx;
|
||||
#endif
|
||||
|
||||
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_avx;
|
||||
}
|
||||
} else if (bit_depth == 10) {
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
|
Loading…
Reference in New Issue
Block a user