mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-03-23 04:24:35 +02:00
x86/rv34dsp: add ff_rv34_idct_dc_add_sse2
Also disable ff_rv34_idct_dc_add_mmx on x86_64 as the presence of sse2 is guaranteed in such builds. Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
ab5c4d006d
commit
c8467abbad
libavcodec/x86
@ -64,6 +64,7 @@ rv34_idct dc
|
|||||||
rv34_idct dc_noround
|
rv34_idct dc_noround
|
||||||
|
|
||||||
; ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc);
|
; ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc);
|
||||||
|
%if ARCH_X86_32
|
||||||
INIT_MMX mmx
|
INIT_MMX mmx
|
||||||
cglobal rv34_idct_dc_add, 3, 3
|
cglobal rv34_idct_dc_add, 3, 3
|
||||||
; calculate DC
|
; calculate DC
|
||||||
@ -97,6 +98,7 @@ cglobal rv34_idct_dc_add, 3, 3
|
|||||||
movh [r2], m4
|
movh [r2], m4
|
||||||
movh [r2+r1], m5
|
movh [r2+r1], m5
|
||||||
RET
|
RET
|
||||||
|
%endif
|
||||||
|
|
||||||
; Load coeffs and perform row transform
|
; Load coeffs and perform row transform
|
||||||
; Output: coeffs in mm[0467], rounder in mm5
|
; Output: coeffs in mm[0467], rounder in mm5
|
||||||
@ -167,7 +169,7 @@ cglobal rv34_idct_add, 3,3,0, d, s, b
|
|||||||
ret
|
ret
|
||||||
|
|
||||||
; ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc);
|
; ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc);
|
||||||
INIT_XMM sse4
|
%macro RV34_IDCT_DC_ADD 0
|
||||||
cglobal rv34_idct_dc_add, 3, 3, 6
|
cglobal rv34_idct_dc_add, 3, 3, 6
|
||||||
; load data
|
; load data
|
||||||
IDCT_DC_ROUND r2
|
IDCT_DC_ROUND r2
|
||||||
@ -190,7 +192,22 @@ cglobal rv34_idct_dc_add, 3, 3, 6
|
|||||||
paddw m4, m0
|
paddw m4, m0
|
||||||
packuswb m2, m4
|
packuswb m2, m4
|
||||||
movd [r0], m2
|
movd [r0], m2
|
||||||
|
%if cpuflag(sse4)
|
||||||
pextrd [r0+r1], m2, 1
|
pextrd [r0+r1], m2, 1
|
||||||
pextrd [r2], m2, 2
|
pextrd [r2], m2, 2
|
||||||
pextrd [r2+r1], m2, 3
|
pextrd [r2+r1], m2, 3
|
||||||
|
%else
|
||||||
|
psrldq m2, 4
|
||||||
|
movd [r0+r1], m2
|
||||||
|
psrldq m2, 4
|
||||||
|
movd [r2], m2
|
||||||
|
psrldq m2, 4
|
||||||
|
movd [r2+r1], m2
|
||||||
|
%endif
|
||||||
RET
|
RET
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
INIT_XMM sse2
|
||||||
|
RV34_IDCT_DC_ADD
|
||||||
|
INIT_XMM sse4
|
||||||
|
RV34_IDCT_DC_ADD
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
void ff_rv34_idct_dc_mmxext(int16_t *block);
|
void ff_rv34_idct_dc_mmxext(int16_t *block);
|
||||||
void ff_rv34_idct_dc_noround_mmxext(int16_t *block);
|
void ff_rv34_idct_dc_noround_mmxext(int16_t *block);
|
||||||
void ff_rv34_idct_dc_add_mmx(uint8_t *dst, ptrdiff_t stride, int dc);
|
void ff_rv34_idct_dc_add_mmx(uint8_t *dst, ptrdiff_t stride, int dc);
|
||||||
|
void ff_rv34_idct_dc_add_sse2(uint8_t *dst, ptrdiff_t stride, int dc);
|
||||||
void ff_rv34_idct_dc_add_sse4(uint8_t *dst, ptrdiff_t stride, int dc);
|
void ff_rv34_idct_dc_add_sse4(uint8_t *dst, ptrdiff_t stride, int dc);
|
||||||
void ff_rv34_idct_add_mmxext(uint8_t *dst, ptrdiff_t stride, int16_t *block);
|
void ff_rv34_idct_add_mmxext(uint8_t *dst, ptrdiff_t stride, int16_t *block);
|
||||||
|
|
||||||
@ -34,12 +35,14 @@ av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c)
|
|||||||
{
|
{
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (EXTERNAL_MMX(cpu_flags))
|
if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags))
|
||||||
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
|
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
|
||||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmxext;
|
c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmxext;
|
||||||
c->rv34_idct_add = ff_rv34_idct_add_mmxext;
|
c->rv34_idct_add = ff_rv34_idct_add_mmxext;
|
||||||
}
|
}
|
||||||
|
if (EXTERNAL_SSE2(cpu_flags))
|
||||||
|
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse2;
|
||||||
if (EXTERNAL_SSE4(cpu_flags))
|
if (EXTERNAL_SSE4(cpu_flags))
|
||||||
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4;
|
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user