You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-10 06:10:52 +02:00
avcodec/h264: mmx2, sse2, avx 10-bit 4:2:2 h chroma deblock/loop filter
Yorkfield: - mmx2: 2.53x (504 vs. 199 cycles) - sse2: 3.83x (504 vs. 131 cycles) Nehalem: - mmx2: 2.42x (365 vs. 151 cycles) - sse2: 3.56x (365 vs. 103 cycles) Skylake: - mmx2: 1.81x (308 vs. 170 cycles) - sse2: 2.84x (308 vs. 108 cycles) - avx: 2.93x (308 vs. 105 cycles)
This commit is contained in:
@@ -1032,6 +1032,45 @@ cglobal deblock_h_chroma_10, 5, 7, 8, 2*mmsize, pix_, stride_, alpha_, beta_, tc
|
|||||||
%endif
|
%endif
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
;-----------------------------------------------------------------------------
|
||||||
|
; void ff_deblock_h_chroma422_10(uint16_t *pix, int stride, int alpha, int beta,
|
||||||
|
; int8_t *tc0)
|
||||||
|
;-----------------------------------------------------------------------------
|
||||||
|
cglobal deblock_h_chroma422_10, 5, 7, 8, 3*mmsize, pix_, stride_, alpha_, beta_, tc0_
|
||||||
|
shl alpha_d, 2
|
||||||
|
shl beta_d, 2
|
||||||
|
|
||||||
|
movd m0, [tc0_q]
|
||||||
|
punpcklbw m0, m0
|
||||||
|
psraw m0, 6
|
||||||
|
movq [rsp], m0
|
||||||
|
|
||||||
|
mov r5, pix_q
|
||||||
|
lea r6, [3*stride_q]
|
||||||
|
add r5, r6
|
||||||
|
|
||||||
|
mov r4, -8
|
||||||
|
.loop:
|
||||||
|
|
||||||
|
CHROMA_H_LOAD r5, r6, [rsp + 1*mmsize], [rsp + 2*mmsize]
|
||||||
|
LOAD_AB m4, m5, alpha_d, beta_d
|
||||||
|
LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4
|
||||||
|
pxor m4, m4
|
||||||
|
movd m6, [rsp + r4 + 8]
|
||||||
|
punpcklwd m6, m6
|
||||||
|
punpcklwd m6, m6
|
||||||
|
psubw m6, [pw_3]
|
||||||
|
pmaxsw m6, m4
|
||||||
|
pand m7, m6
|
||||||
|
DEBLOCK_P0_Q0 m1, m2, m0, m3, m7, m5, m6
|
||||||
|
CHROMA_H_STORE r5, r6, [rsp + 1*mmsize], [rsp + 2*mmsize]
|
||||||
|
|
||||||
|
lea pix_q, [pix_q + (mmsize/2)*stride_q]
|
||||||
|
lea r5, [r5 + (mmsize/2)*stride_q]
|
||||||
|
add r4, (mmsize/4)
|
||||||
|
jl .loop
|
||||||
|
RET
|
||||||
|
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%if ARCH_X86_64 == 0
|
%if ARCH_X86_64 == 0
|
||||||
|
@@ -315,6 +315,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmxext;
|
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmxext;
|
||||||
if (chroma_format_idc <= 1) {
|
if (chroma_format_idc <= 1) {
|
||||||
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_mmxext;
|
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_mmxext;
|
||||||
|
} else {
|
||||||
|
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_mmxext;
|
||||||
}
|
}
|
||||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_mmxext;
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_mmxext;
|
||||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_mmxext;
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_mmxext;
|
||||||
@@ -351,6 +353,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
|
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
|
||||||
if (chroma_format_idc <= 1) {
|
if (chroma_format_idc <= 1) {
|
||||||
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_sse2;
|
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_sse2;
|
||||||
|
} else {
|
||||||
|
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_sse2;
|
||||||
}
|
}
|
||||||
#if HAVE_ALIGNED_STACK
|
#if HAVE_ALIGNED_STACK
|
||||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2;
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2;
|
||||||
@@ -389,6 +393,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
|
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
|
||||||
if (chroma_format_idc <= 1) {
|
if (chroma_format_idc <= 1) {
|
||||||
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_avx;
|
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_avx;
|
||||||
|
} else {
|
||||||
|
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_avx;
|
||||||
}
|
}
|
||||||
#if HAVE_ALIGNED_STACK
|
#if HAVE_ALIGNED_STACK
|
||||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx;
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx;
|
||||||
|
Reference in New Issue
Block a user