mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
h264pred: added AVX2 implementation for tm_vp8 16x16.
checkasm --bench results with 5000 runs pred16x16_tm_vp8_c: 302.8 pred16x16_tm_vp8_mmx: 101.4 pred16x16_tm_vp8_mmxext: 95.5 pred16x16_tm_vp8_sse2: 95.1 pred16x16_tm_vp8_avx2: 38.2 Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
This commit is contained in:
parent
f3cd2302a9
commit
5eb4f95bef
@ -268,6 +268,43 @@ cglobal pred16x16_tm_vp8_8, 2,6,6
|
|||||||
jg .loop
|
jg .loop
|
||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
|
%if HAVE_AVX2_EXTERNAL
|
||||||
|
INIT_YMM avx2
|
||||||
|
cglobal pred16x16_tm_vp8_8, 2, 4, 5, dst, stride, stride3, iteration
|
||||||
|
sub dstq, strideq
|
||||||
|
pmovzxbw m0, [dstq]
|
||||||
|
vpbroadcastb xm1, [r0-1]
|
||||||
|
pmovzxbw m1, xm1
|
||||||
|
psubw m0, m1
|
||||||
|
mov iterationd, 4
|
||||||
|
lea stride3q, [strideq*3]
|
||||||
|
.loop:
|
||||||
|
vpbroadcastb xm1, [dstq+strideq*1-1]
|
||||||
|
vpbroadcastb xm2, [dstq+strideq*2-1]
|
||||||
|
vpbroadcastb xm3, [dstq+stride3q-1]
|
||||||
|
vpbroadcastb xm4, [dstq+strideq*4-1]
|
||||||
|
pmovzxbw m1, xm1
|
||||||
|
pmovzxbw m2, xm2
|
||||||
|
pmovzxbw m3, xm3
|
||||||
|
pmovzxbw m4, xm4
|
||||||
|
paddw m1, m0
|
||||||
|
paddw m2, m0
|
||||||
|
paddw m3, m0
|
||||||
|
paddw m4, m0
|
||||||
|
vpackuswb m1, m1, m2
|
||||||
|
vpackuswb m3, m3, m4
|
||||||
|
vpermq m1, m1, q3120
|
||||||
|
vpermq m3, m3, q3120
|
||||||
|
movdqa [dstq+strideq*1], xm1
|
||||||
|
vextracti128 [dstq+strideq*2], m1, 1
|
||||||
|
movdqa [dstq+stride3q*1], xm3
|
||||||
|
vextracti128 [dstq+strideq*4], m3, 1
|
||||||
|
lea dstq, [dstq+strideq*4]
|
||||||
|
dec iterationd
|
||||||
|
jg .loop
|
||||||
|
REP_RET
|
||||||
|
%endif
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; void ff_pred16x16_plane_*_8(uint8_t *src, int stride)
|
; void ff_pred16x16_plane_*_8(uint8_t *src, int stride)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
|
@ -127,6 +127,7 @@ PRED16x16(plane_svq3, 8, ssse3)
|
|||||||
PRED16x16(tm_vp8, 8, mmx)
|
PRED16x16(tm_vp8, 8, mmx)
|
||||||
PRED16x16(tm_vp8, 8, mmxext)
|
PRED16x16(tm_vp8, 8, mmxext)
|
||||||
PRED16x16(tm_vp8, 8, sse2)
|
PRED16x16(tm_vp8, 8, sse2)
|
||||||
|
PRED16x16(tm_vp8, 8, avx2)
|
||||||
|
|
||||||
PRED8x8(top_dc, 8, mmxext)
|
PRED8x8(top_dc, 8, mmxext)
|
||||||
PRED8x8(dc_rv40, 8, mmxext)
|
PRED8x8(dc_rv40, 8, mmxext)
|
||||||
@ -323,6 +324,12 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(EXTERNAL_AVX2(cpu_flags)){
|
||||||
|
if (codec_id == AV_CODEC_ID_VP8) {
|
||||||
|
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_avx2;
|
||||||
|
}
|
||||||
|
}
|
||||||
} else if (bit_depth == 10) {
|
} else if (bit_depth == 10) {
|
||||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext;
|
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext;
|
||||||
|
Loading…
Reference in New Issue
Block a user