mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
diracdsp: add SIMD for the 10 bit version of put_signed_rect_clamped
Signed-off-by: Rostislav Pehlivanov <rpehlivanov@obe.tv>
This commit is contained in:
parent
80721cc1ff
commit
bd61f3c6bf
@ -22,6 +22,8 @@
|
||||
|
||||
SECTION_RODATA
|
||||
pw_7: times 8 dw 7
|
||||
convert_to_unsigned_10bit: times 4 dd 0x200
|
||||
clip_10bit: times 8 dw 0x3ff
|
||||
|
||||
cextern pw_3
|
||||
cextern pw_16
|
||||
@ -300,3 +302,43 @@ cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h
|
||||
jg .loop_v
|
||||
|
||||
RET
|
||||
|
||||
%if ARCH_X86_64 == 1
|
||||
; void put_signed_rect_clamped_10(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height)
|
||||
cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w, h
|
||||
mov r6, srcq
|
||||
mov r7, dstq
|
||||
mov r8, wq
|
||||
pxor m2, m2
|
||||
mova m3, [clip_10bit]
|
||||
mova m4, [convert_to_unsigned_10bit]
|
||||
|
||||
.loop_h:
|
||||
mov srcq, r6
|
||||
mov dstq, r7
|
||||
mov wq, r8
|
||||
|
||||
.loop_w:
|
||||
movu m0, [srcq+0*mmsize]
|
||||
movu m1, [srcq+1*mmsize]
|
||||
|
||||
paddd m0, m4
|
||||
paddd m1, m4
|
||||
packusdw m0, m0, m1
|
||||
CLIPW m0, m2, m3 ; packusdw saturates so it's fine
|
||||
|
||||
movu [dstq], m0
|
||||
|
||||
add srcq, 2*mmsize
|
||||
add dstq, 1*mmsize
|
||||
sub wd, 8
|
||||
jg .loop_w
|
||||
|
||||
add r6, src_strideq
|
||||
add r7, dst_strideq
|
||||
sub hd, 1
|
||||
jg .loop_h
|
||||
|
||||
RET
|
||||
|
||||
%endif
|
||||
|
@ -45,6 +45,9 @@ void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, i
|
||||
void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
||||
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
||||
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
||||
#if ARCH_X86_64
|
||||
void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height);
|
||||
#endif
|
||||
|
||||
void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
|
||||
|
||||
@ -189,5 +192,6 @@ void ff_diracdsp_init_x86(DiracDSPContext* c)
|
||||
|
||||
if (EXTERNAL_SSE4(mm_flags)) {
|
||||
c->dequant_subband[1] = ff_dequant_subband_32_sse4;
|
||||
c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user