mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-24 13:56:33 +02:00
x86/hevc_deblock: fix label names
Also remove some unnecessary jmps Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
521eaea63a
commit
8aac77fede
@ -383,7 +383,7 @@ ALIGN 16
|
|||||||
pcmpgtw m15, m13, m14; beta0, beta1
|
pcmpgtw m15, m13, m14; beta0, beta1
|
||||||
movmskps r13, m15 ;filtering mask 0d0 + 0d3 < beta0 (bit 2 or 3) , 1d0 + 1d3 < beta1 (bit 0 or 1)
|
movmskps r13, m15 ;filtering mask 0d0 + 0d3 < beta0 (bit 2 or 3) , 1d0 + 1d3 < beta1 (bit 0 or 1)
|
||||||
cmp r13, 0
|
cmp r13, 0
|
||||||
je bypasswrite_macro_%2%1
|
je .bypassluma
|
||||||
|
|
||||||
;weak / strong decision compare to beta_2
|
;weak / strong decision compare to beta_2
|
||||||
psraw m15, m13, 2; beta >> 2
|
psraw m15, m13, 2; beta >> 2
|
||||||
@ -440,7 +440,7 @@ ALIGN 16
|
|||||||
movd m9, r3; tc1
|
movd m9, r3; tc1
|
||||||
add r2d, r3d; tc0 + tc1
|
add r2d, r3d; tc0 + tc1
|
||||||
cmp r2d, 0;
|
cmp r2d, 0;
|
||||||
je bypasswrite_macro_%2%1
|
je .bypassluma
|
||||||
punpcklwd m9, m9
|
punpcklwd m9, m9
|
||||||
shufps m8, m9, 0; tc0, tc1
|
shufps m8, m9, 0; tc0, tc1
|
||||||
mova m9, m8
|
mova m9, m8
|
||||||
@ -497,7 +497,7 @@ ALIGN 16
|
|||||||
movd m10, r2; store to xmm for mask generation
|
movd m10, r2; store to xmm for mask generation
|
||||||
or r14, r2; final strong mask, bits 1 and 0
|
or r14, r2; final strong mask, bits 1 and 0
|
||||||
cmp r14, 0;
|
cmp r14, 0;
|
||||||
je weakfilter_macro_%2%1
|
je .weakfilter
|
||||||
|
|
||||||
shufps m10, m12, 0
|
shufps m10, m12, 0
|
||||||
|
|
||||||
@ -583,11 +583,11 @@ ALIGN 16
|
|||||||
MASKED_COPY m4, m8
|
MASKED_COPY m4, m8
|
||||||
MASKED_COPY m3, m12
|
MASKED_COPY m3, m12
|
||||||
|
|
||||||
weakfilter_macro_%2%1:
|
.weakfilter:
|
||||||
not r14; strong mask -> weak mask
|
not r14; strong mask -> weak mask
|
||||||
and r14, r13; final weak filtering mask, bits 0 and 1
|
and r14, r13; final weak filtering mask, bits 0 and 1
|
||||||
cmp r14, 0;
|
cmp r14, 0;
|
||||||
je ready_macro_%2%1
|
je .store
|
||||||
|
|
||||||
; weak filtering mask
|
; weak filtering mask
|
||||||
mov r2, r14
|
mov r2, r14
|
||||||
@ -693,10 +693,6 @@ weakfilter_macro_%2%1:
|
|||||||
|
|
||||||
psubw m8, m4, m12 ; q0 - delta0
|
psubw m8, m4, m12 ; q0 - delta0
|
||||||
MASKED_COPY m4, m8
|
MASKED_COPY m4, m8
|
||||||
ready_macro_%2%1:
|
|
||||||
jmp to_store_%2%1
|
|
||||||
bypasswrite_macro_%2%1:
|
|
||||||
jmp bypass%2luma_10
|
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
@ -774,9 +770,9 @@ cglobal hevc_v_loop_filter_luma_8, 4, 15, 16, pix, stride, beta, tc
|
|||||||
add r0, r5
|
add r0, r5
|
||||||
TRANSPOSE8x8B_LOAD PASS8ROWS(r6, r0, r1, r5)
|
TRANSPOSE8x8B_LOAD PASS8ROWS(r6, r0, r1, r5)
|
||||||
LUMA_DEBLOCK_BODY 8, v
|
LUMA_DEBLOCK_BODY 8, v
|
||||||
to_store_v8:
|
.store:
|
||||||
TRANSPOSE8x8B_STORE PASS8ROWS(r6, r0, r1, r5)
|
TRANSPOSE8x8B_STORE PASS8ROWS(r6, r0, r1, r5)
|
||||||
bypassvluma_8:
|
.bypassluma:
|
||||||
RET
|
RET
|
||||||
|
|
||||||
cglobal hevc_v_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc
|
cglobal hevc_v_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc
|
||||||
@ -786,9 +782,9 @@ cglobal hevc_v_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc
|
|||||||
add pixq, r5
|
add pixq, r5
|
||||||
TRANSPOSE8x8W_LOAD PASS8ROWS(r6, pixq, strideq, r5)
|
TRANSPOSE8x8W_LOAD PASS8ROWS(r6, pixq, strideq, r5)
|
||||||
LUMA_DEBLOCK_BODY 10, v
|
LUMA_DEBLOCK_BODY 10, v
|
||||||
to_store_v10:
|
.store:
|
||||||
TRANSPOSE8x8W_STORE PASS8ROWS(r6, r0, r1, r5)
|
TRANSPOSE8x8W_STORE PASS8ROWS(r6, r0, r1, r5)
|
||||||
bypassvluma_10:
|
.bypassluma:
|
||||||
RET
|
RET
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
@ -817,7 +813,7 @@ cglobal hevc_h_loop_filter_luma_8, 4, 15, 16, pix, stride, beta, tc, count, pix0
|
|||||||
punpcklbw m6, m8
|
punpcklbw m6, m8
|
||||||
punpcklbw m7, m8
|
punpcklbw m7, m8
|
||||||
LUMA_DEBLOCK_BODY 8, h
|
LUMA_DEBLOCK_BODY 8, h
|
||||||
to_store_h8:
|
.store:
|
||||||
packuswb m1, m1; p2
|
packuswb m1, m1; p2
|
||||||
packuswb m2, m2; p1
|
packuswb m2, m2; p1
|
||||||
packuswb m3, m3; p0
|
packuswb m3, m3; p0
|
||||||
@ -830,7 +826,7 @@ to_store_h8:
|
|||||||
movq [r0], m4; q0
|
movq [r0], m4; q0
|
||||||
movq [r0+r1], m5; q1
|
movq [r0+r1], m5; q1
|
||||||
movq [r0+2*r1], m6; q2
|
movq [r0+2*r1], m6; q2
|
||||||
bypasshluma_8:
|
.bypassluma:
|
||||||
RET
|
RET
|
||||||
|
|
||||||
cglobal hevc_h_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc, count, pix0, src3stride
|
cglobal hevc_h_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc, count, pix0, src3stride
|
||||||
@ -847,7 +843,7 @@ cglobal hevc_h_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc, count, pix
|
|||||||
movdqu m6, [pixq+2*strideq]; q2
|
movdqu m6, [pixq+2*strideq]; q2
|
||||||
movdqu m7, [pixq+src3strideq]; q3
|
movdqu m7, [pixq+src3strideq]; q3
|
||||||
LUMA_DEBLOCK_BODY 10, h
|
LUMA_DEBLOCK_BODY 10, h
|
||||||
to_store_h10:
|
.store:
|
||||||
pxor m8, m8; zeros reg
|
pxor m8, m8; zeros reg
|
||||||
CLIPW m1, m8, [pw_pixel_max]
|
CLIPW m1, m8, [pw_pixel_max]
|
||||||
CLIPW m2, m8, [pw_pixel_max]
|
CLIPW m2, m8, [pw_pixel_max]
|
||||||
@ -861,6 +857,6 @@ to_store_h10:
|
|||||||
movdqu [pixq], m4; q0
|
movdqu [pixq], m4; q0
|
||||||
movdqu [pixq+strideq], m5; q1
|
movdqu [pixq+strideq], m5; q1
|
||||||
movdqu [pixq+2*strideq], m6; q2
|
movdqu [pixq+2*strideq], m6; q2
|
||||||
bypasshluma_10:
|
.bypassluma:
|
||||||
RET
|
RET
|
||||||
%endif
|
%endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user