mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-21 10:55:51 +02:00
avcodec/h264: mmxext 4:2:2 chroma intra deblock/loop filter
2.1 times faster (401 vs. 194 cycles)
This commit is contained in:
parent
122190392b
commit
815ea8c6cc
@ -946,6 +946,20 @@ cglobal deblock_h_chroma_intra_8, 4,6
|
||||
TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
|
||||
RET
|
||||
|
||||
cglobal deblock_h_chroma422_intra_8, 4, 6
|
||||
CHROMA_H_START
|
||||
TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
|
||||
call ff_chroma_intra_body_mmxext
|
||||
TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
|
||||
|
||||
lea r0, [r0+r1*8]
|
||||
lea t5, [t5+r1*8]
|
||||
|
||||
TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
|
||||
call ff_chroma_intra_body_mmxext
|
||||
TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
|
||||
RET
|
||||
|
||||
ALIGN 16
|
||||
ff_chroma_intra_body_mmxext:
|
||||
LOAD_MASK r2d, r3d
|
||||
|
@ -130,6 +130,7 @@ LF_FUNCS(uint8_t, 8)
|
||||
LF_FUNCS(uint16_t, 10)
|
||||
|
||||
void ff_deblock_h_chroma422_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
|
||||
LF_IFUNC(h, chroma422_intra, 8, mmxext)
|
||||
|
||||
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
|
||||
LF_FUNC(v8, luma, 8, mmxext)
|
||||
@ -249,6 +250,7 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
||||
c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext;
|
||||
} else {
|
||||
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_mmxext;
|
||||
c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_mmxext;
|
||||
}
|
||||
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
|
||||
c->h264_v_loop_filter_luma = deblock_v_luma_8_mmxext;
|
||||
|
Loading…
Reference in New Issue
Block a user