mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-06-04 05:57:49 +02:00
SSSE3 versions of vp8 width4 bilinear MC functions
Originally committed as revision 24013 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
9862f9e149
commit
b06855f18a
@ -85,6 +85,12 @@ extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, int dststride,
|
|||||||
extern void ff_put_vp8_bilinear8_h_sse2 (uint8_t *dst, int dststride,
|
extern void ff_put_vp8_bilinear8_h_sse2 (uint8_t *dst, int dststride,
|
||||||
uint8_t *src, int srcstride,
|
uint8_t *src, int srcstride,
|
||||||
int height, int mx, int my);
|
int height, int mx, int my);
|
||||||
|
extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, int dststride,
|
||||||
|
uint8_t *src, int srcstride,
|
||||||
|
int height, int mx, int my);
|
||||||
|
extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
|
||||||
|
uint8_t *src, int srcstride,
|
||||||
|
int height, int mx, int my);
|
||||||
|
|
||||||
extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride,
|
extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride,
|
||||||
uint8_t *src, int srcstride,
|
uint8_t *src, int srcstride,
|
||||||
@ -92,12 +98,13 @@ extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride,
|
|||||||
extern void ff_put_vp8_bilinear8_v_sse2 (uint8_t *dst, int dststride,
|
extern void ff_put_vp8_bilinear8_v_sse2 (uint8_t *dst, int dststride,
|
||||||
uint8_t *src, int srcstride,
|
uint8_t *src, int srcstride,
|
||||||
int height, int mx, int my);
|
int height, int mx, int my);
|
||||||
|
extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, int dststride,
|
||||||
|
uint8_t *src, int srcstride,
|
||||||
|
int height, int mx, int my);
|
||||||
extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride,
|
extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride,
|
||||||
uint8_t *src, int srcstride,
|
uint8_t *src, int srcstride,
|
||||||
int height, int mx, int my);
|
int height, int mx, int my);
|
||||||
extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
|
|
||||||
uint8_t *src, int srcstride,
|
|
||||||
int height, int mx, int my);
|
|
||||||
|
|
||||||
extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride,
|
extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride,
|
||||||
uint8_t *src, int srcstride,
|
uint8_t *src, int srcstride,
|
||||||
@ -207,6 +214,7 @@ HVBILIN(mmxext, 8, 8, 16)
|
|||||||
HVBILIN(mmxext, 8, 16, 16)
|
HVBILIN(mmxext, 8, 16, 16)
|
||||||
HVBILIN(sse2, 8, 8, 16)
|
HVBILIN(sse2, 8, 8, 16)
|
||||||
HVBILIN(sse2, 8, 16, 16)
|
HVBILIN(sse2, 8, 16, 16)
|
||||||
|
HVBILIN(ssse3, 8, 4, 8)
|
||||||
HVBILIN(ssse3, 8, 8, 16)
|
HVBILIN(ssse3, 8, 8, 16)
|
||||||
HVBILIN(ssse3, 8, 16, 16)
|
HVBILIN(ssse3, 8, 16, 16)
|
||||||
|
|
||||||
@ -284,6 +292,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
|
|||||||
VP8_MC_FUNC(2, 4, ssse3);
|
VP8_MC_FUNC(2, 4, ssse3);
|
||||||
VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
|
VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
|
||||||
VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
|
VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
|
||||||
|
VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mm_flags & FF_MM_SSE4) {
|
if (mm_flags & FF_MM_SSE4) {
|
||||||
|
@ -770,7 +770,8 @@ FILTER_BILINEAR mmxext, 4, 0
|
|||||||
INIT_XMM
|
INIT_XMM
|
||||||
FILTER_BILINEAR sse2, 8, 7
|
FILTER_BILINEAR sse2, 8, 7
|
||||||
|
|
||||||
cglobal put_vp8_bilinear8_v_ssse3, 7,7,5
|
%macro FILTER_BILINEAR_SSSE3 1
|
||||||
|
cglobal put_vp8_bilinear%1_v_ssse3, 7,7
|
||||||
shl r6d, 4
|
shl r6d, 4
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea r11, [bilinear_filter_vb_m]
|
lea r11, [bilinear_filter_vb_m]
|
||||||
@ -789,9 +790,16 @@ cglobal put_vp8_bilinear8_v_ssse3, 7,7,5
|
|||||||
psraw m1, 2
|
psraw m1, 2
|
||||||
pavgw m0, m4
|
pavgw m0, m4
|
||||||
pavgw m1, m4
|
pavgw m1, m4
|
||||||
|
%if mmsize==8
|
||||||
|
packuswb m0, m0
|
||||||
|
packuswb m1, m1
|
||||||
|
movh [r0+r1*0], m0
|
||||||
|
movh [r0+r1*1], m1
|
||||||
|
%else
|
||||||
packuswb m0, m1
|
packuswb m0, m1
|
||||||
movh [r0+r1*0], m0
|
movh [r0+r1*0], m0
|
||||||
movhps [r0+r1*1], m0
|
movhps [r0+r1*1], m0
|
||||||
|
%endif
|
||||||
|
|
||||||
lea r0, [r0+r1*2]
|
lea r0, [r0+r1*2]
|
||||||
lea r2, [r2+r3*2]
|
lea r2, [r2+r3*2]
|
||||||
@ -799,7 +807,7 @@ cglobal put_vp8_bilinear8_v_ssse3, 7,7,5
|
|||||||
jg .nextrow
|
jg .nextrow
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
cglobal put_vp8_bilinear8_h_ssse3, 7,7,5
|
cglobal put_vp8_bilinear%1_h_ssse3, 7,7
|
||||||
shl r5d, 4
|
shl r5d, 4
|
||||||
%ifdef PIC
|
%ifdef PIC
|
||||||
lea r11, [bilinear_filter_vb_m]
|
lea r11, [bilinear_filter_vb_m]
|
||||||
@ -818,15 +826,28 @@ cglobal put_vp8_bilinear8_h_ssse3, 7,7,5
|
|||||||
psraw m1, 2
|
psraw m1, 2
|
||||||
pavgw m0, m4
|
pavgw m0, m4
|
||||||
pavgw m1, m4
|
pavgw m1, m4
|
||||||
|
%if mmsize==8
|
||||||
|
packuswb m0, m0
|
||||||
|
packuswb m1, m1
|
||||||
|
movh [r0+r1*0], m0
|
||||||
|
movh [r0+r1*1], m1
|
||||||
|
%else
|
||||||
packuswb m0, m1
|
packuswb m0, m1
|
||||||
movh [r0+r1*0], m0
|
movh [r0+r1*0], m0
|
||||||
movhps [r0+r1*1], m0
|
movhps [r0+r1*1], m0
|
||||||
|
%endif
|
||||||
|
|
||||||
lea r0, [r0+r1*2]
|
lea r0, [r0+r1*2]
|
||||||
lea r2, [r2+r3*2]
|
lea r2, [r2+r3*2]
|
||||||
sub r4, 2
|
sub r4, 2
|
||||||
jg .nextrow
|
jg .nextrow
|
||||||
REP_RET
|
REP_RET
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
INIT_MMX
|
||||||
|
FILTER_BILINEAR_SSSE3 4
|
||||||
|
INIT_XMM
|
||||||
|
FILTER_BILINEAR_SSSE3 8
|
||||||
|
|
||||||
cglobal put_vp8_pixels8_mmx, 5,5
|
cglobal put_vp8_pixels8_mmx, 5,5
|
||||||
.nextrow:
|
.nextrow:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user