mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
avcodec/h264: mmxext 4:2:2 chroma deblock/loop filter
2.6 times faster (366 vs. 142 cycles)
This commit is contained in:
parent
9556446623
commit
7042a55c55
@ -864,7 +864,50 @@ ff_chroma_inter_body_mmxext:
|
||||
DEBLOCK_P0_Q0
|
||||
ret
|
||||
|
||||
%define t5 r4
|
||||
%define t6 r5
|
||||
|
||||
cglobal deblock_h_chroma422_8, 5, 6, 0, 0-(1+ARCH_X86_64*2)*mmsize
|
||||
%if ARCH_X86_64
|
||||
%define buf0 [rsp+16]
|
||||
%define buf1 [rsp+8]
|
||||
%else
|
||||
%define buf0 r0m
|
||||
%define buf1 r2m
|
||||
%endif
|
||||
|
||||
movd m6, [r4]
|
||||
punpcklbw m6, m6
|
||||
movq [rsp], m6
|
||||
CHROMA_H_START
|
||||
|
||||
TRANSPOSE4x8B_LOAD PASS8ROWS(t5, r0, r1, t6)
|
||||
movq buf0, m0
|
||||
movq buf1, m3
|
||||
LOAD_MASK r2d, r3d
|
||||
movd m6, [rsp]
|
||||
punpcklwd m6, m6
|
||||
pand m7, m6
|
||||
DEBLOCK_P0_Q0
|
||||
movq m0, buf0
|
||||
movq m3, buf1
|
||||
TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
|
||||
|
||||
lea r0, [r0+r1*8]
|
||||
lea t5, [t5+r1*8]
|
||||
|
||||
TRANSPOSE4x8B_LOAD PASS8ROWS(t5, r0, r1, t6)
|
||||
movq buf0, m0
|
||||
movq buf1, m3
|
||||
LOAD_MASK r2d, r3d
|
||||
movd m6, [rsp+4]
|
||||
punpcklwd m6, m6
|
||||
pand m7, m6
|
||||
DEBLOCK_P0_Q0
|
||||
movq m0, buf0
|
||||
movq m3, buf1
|
||||
TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
|
||||
RET
|
||||
|
||||
; in: %1=p0 %2=p1 %3=q1
|
||||
; out: p0 = (p0 + q1 + 2*p1 + 2) >> 2
|
||||
@ -877,9 +920,6 @@ ff_chroma_inter_body_mmxext:
|
||||
pavgb %1, %2 ; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1))
|
||||
%endmacro
|
||||
|
||||
%define t5 r4
|
||||
%define t6 r5
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_deblock_v_chroma_intra(uint8_t *pix, int stride, int alpha, int beta)
|
||||
;------------------------------------------------------------------------------
|
||||
|
@ -129,6 +129,8 @@ LF_IFUNC(v, chroma_intra, depth, avx)
|
||||
LF_FUNCS(uint8_t, 8)
|
||||
LF_FUNCS(uint16_t, 10)
|
||||
|
||||
void ff_deblock_h_chroma422_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
|
||||
|
||||
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
|
||||
LF_FUNC(v8, luma, 8, mmxext)
|
||||
static void deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha,
|
||||
@ -245,6 +247,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
||||
if (chroma_format_idc <= 1) {
|
||||
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_mmxext;
|
||||
c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext;
|
||||
} else {
|
||||
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_mmxext;
|
||||
}
|
||||
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
|
||||
c->h264_v_loop_filter_luma = deblock_v_luma_8_mmxext;
|
||||
|
Loading…
Reference in New Issue
Block a user