You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-15 14:13:16 +02:00
x86: huffyuvdsp: add SSE2 median prediction
From 5010c to 4566 on lagarith YUY2. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
committed by
Michael Niedermayer
parent
b3dfebd641
commit
884078d2df
@@ -33,64 +33,86 @@ SECTION_TEXT
|
|||||||
; void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
|
; void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
|
||||||
; const uint8_t *diff, int w,
|
; const uint8_t *diff, int w,
|
||||||
; int *left, int *left_top)
|
; int *left, int *left_top)
|
||||||
INIT_MMX mmxext
|
%macro LSHIFT 2
|
||||||
cglobal add_hfyu_median_pred, 6,6,0, dst, top, diff, w, left, left_top
|
%if mmsize > 8
|
||||||
movq mm0, [topq]
|
pslldq %1, %2
|
||||||
movq mm2, mm0
|
%else
|
||||||
movd mm4, [left_topq]
|
psllq %1, 8*(%2)
|
||||||
psllq mm2, 8
|
%endif
|
||||||
movq mm1, mm0
|
%endmacro
|
||||||
por mm4, mm2
|
|
||||||
movd mm3, [leftq]
|
%macro RSHIFT 2
|
||||||
psubb mm0, mm4 ; t-tl
|
%if mmsize > 8
|
||||||
|
psrldq %1, %2
|
||||||
|
%else
|
||||||
|
psrlq %1, 8*(%2)
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro HFYU_MEDIAN 0
|
||||||
|
cglobal add_hfyu_median_pred, 6,6,8, dst, top, diff, w, left, left_top
|
||||||
|
movu m0, [topq]
|
||||||
|
mova m2, m0
|
||||||
|
movd m4, [left_topq]
|
||||||
|
LSHIFT m2, 1
|
||||||
|
mova m1, m0
|
||||||
|
por m4, m2
|
||||||
|
movd m3, [leftq]
|
||||||
|
psubb m0, m4 ; t-tl
|
||||||
add dstq, wq
|
add dstq, wq
|
||||||
add topq, wq
|
add topq, wq
|
||||||
add diffq, wq
|
add diffq, wq
|
||||||
neg wq
|
neg wq
|
||||||
jmp .skip
|
jmp .skip
|
||||||
.loop:
|
.loop:
|
||||||
movq mm4, [topq+wq]
|
movu m4, [topq+wq]
|
||||||
movq mm0, mm4
|
mova m0, m4
|
||||||
psllq mm4, 8
|
LSHIFT m4, 1
|
||||||
por mm4, mm1
|
por m4, m1
|
||||||
movq mm1, mm0 ; t
|
mova m1, m0 ; t
|
||||||
psubb mm0, mm4 ; t-tl
|
psubb m0, m4 ; t-tl
|
||||||
.skip:
|
.skip:
|
||||||
movq mm2, [diffq+wq]
|
movu m2, [diffq+wq]
|
||||||
%assign i 0
|
%assign i 0
|
||||||
%rep 8
|
%rep mmsize
|
||||||
movq mm4, mm0
|
mova m4, m0
|
||||||
paddb mm4, mm3 ; t-tl+l
|
paddb m4, m3 ; t-tl+l
|
||||||
movq mm5, mm3
|
mova m5, m3
|
||||||
pmaxub mm3, mm1
|
pmaxub m3, m1
|
||||||
pminub mm5, mm1
|
pminub m5, m1
|
||||||
pminub mm3, mm4
|
pminub m3, m4
|
||||||
pmaxub mm3, mm5 ; median
|
pmaxub m3, m5 ; median
|
||||||
paddb mm3, mm2 ; +residual
|
paddb m3, m2 ; +residual
|
||||||
%if i==0
|
%if i==0
|
||||||
movq mm7, mm3
|
mova m7, m3
|
||||||
psllq mm7, 56
|
LSHIFT m7, mmsize-1
|
||||||
%else
|
%else
|
||||||
movq mm6, mm3
|
mova m6, m3
|
||||||
psrlq mm7, 8
|
RSHIFT m7, 1
|
||||||
psllq mm6, 56
|
LSHIFT m6, mmsize-1
|
||||||
por mm7, mm6
|
por m7, m6
|
||||||
%endif
|
%endif
|
||||||
%if i<7
|
%if i<mmsize-1
|
||||||
psrlq mm0, 8
|
RSHIFT m0, 1
|
||||||
psrlq mm1, 8
|
RSHIFT m1, 1
|
||||||
psrlq mm2, 8
|
RSHIFT m2, 1
|
||||||
%endif
|
%endif
|
||||||
%assign i i+1
|
%assign i i+1
|
||||||
%endrep
|
%endrep
|
||||||
movq [dstq+wq], mm7
|
movu [dstq+wq], m7
|
||||||
add wq, 8
|
add wq, mmsize
|
||||||
jl .loop
|
jl .loop
|
||||||
movzx r2d, byte [dstq-1]
|
movzx r2d, byte [dstq-1]
|
||||||
mov [leftq], r2d
|
mov [leftq], r2d
|
||||||
movzx r2d, byte [topq-1]
|
movzx r2d, byte [topq-1]
|
||||||
mov [left_topq], r2d
|
mov [left_topq], r2d
|
||||||
RET
|
RET
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
INIT_MMX mmxext
|
||||||
|
HFYU_MEDIAN
|
||||||
|
INIT_XMM sse2
|
||||||
|
HFYU_MEDIAN
|
||||||
|
|
||||||
|
|
||||||
%macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
|
%macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
|
||||||
|
@@ -32,6 +32,9 @@ void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top,
|
|||||||
void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
|
void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
|
||||||
const uint8_t *diff, int w,
|
const uint8_t *diff, int w,
|
||||||
int *left, int *left_top);
|
int *left, int *left_top);
|
||||||
|
void ff_add_hfyu_median_pred_sse2(uint8_t *dst, const uint8_t *top,
|
||||||
|
const uint8_t *diff, int w,
|
||||||
|
int *left, int *left_top);
|
||||||
|
|
||||||
int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
|
int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
|
||||||
int w, int left);
|
int w, int left);
|
||||||
@@ -58,6 +61,7 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
|
|||||||
|
|
||||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
c->add_bytes = ff_add_bytes_sse2;
|
c->add_bytes = ff_add_bytes_sse2;
|
||||||
|
c->add_hfyu_median_pred = ff_add_hfyu_median_pred_sse2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSSE3(cpu_flags)) {
|
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||||
|
Reference in New Issue
Block a user