mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-04-08 16:54:03 +02:00
Merge remote-tracking branch 'qatar/master'
* qatar/master: lavr: fix handling of custom mix matrices fate: force pix_fmt in lagarith-rgb32 test fate: add tests for lagarith lossless video codec. ARMv6: vp8: fix stack allocation with Apple's assembler ARM: vp56: allow inline asm to build with clang fft: 3dnow: fix register name typo in DECL_IMDCT macro x86: dct32: port to cpuflags x86: build: replace mmx2 by mmxext Revert "wmapro: prevent division by zero when sample rate is unspecified" wmapro: prevent division by zero when sample rate is unspecified lagarith: fix color plane inversion for YUY2 output. lagarith: pad RGB buffer by 1 byte. dsputil: make add_hfyu_left_prediction_sse4() support unaligned src. Conflicts: doc/APIchanges libavcodec/lagarith.c libavfilter/x86/gradfun.c libavutil/cpu.h libavutil/version.h libswscale/utils.c libswscale/version.h libswscale/x86/yuv2rgb.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
e776ee8f29
2
Doxyfile
2
Doxyfile
@ -1378,7 +1378,7 @@ PREDEFINED = "__attribute__(x)=" \
|
|||||||
"DEF(x)=x ## _TMPL" \
|
"DEF(x)=x ## _TMPL" \
|
||||||
HAVE_AV_CONFIG_H \
|
HAVE_AV_CONFIG_H \
|
||||||
HAVE_MMX \
|
HAVE_MMX \
|
||||||
HAVE_MMX2 \
|
HAVE_MMXEXT \
|
||||||
HAVE_AMD3DNOW \
|
HAVE_AMD3DNOW \
|
||||||
"DECLARE_ALIGNED(a,t,n)=t n" \
|
"DECLARE_ALIGNED(a,t,n)=t n" \
|
||||||
"offsetof(x,y)=0x42"
|
"offsetof(x,y)=0x42"
|
||||||
|
13
configure
vendored
13
configure
vendored
@ -267,7 +267,7 @@ Optimization options (experts only):
|
|||||||
--disable-amd3dnow disable 3DNow! optimizations
|
--disable-amd3dnow disable 3DNow! optimizations
|
||||||
--disable-amd3dnowext disable 3DNow! extended optimizations
|
--disable-amd3dnowext disable 3DNow! extended optimizations
|
||||||
--disable-mmx disable MMX optimizations
|
--disable-mmx disable MMX optimizations
|
||||||
--disable-mmx2 disable MMX2 optimizations
|
--disable-mmxext disable MMXEXT optimizations
|
||||||
--disable-sse disable SSE optimizations
|
--disable-sse disable SSE optimizations
|
||||||
--disable-ssse3 disable SSSE3 optimizations
|
--disable-ssse3 disable SSSE3 optimizations
|
||||||
--disable-avx disable AVX optimizations
|
--disable-avx disable AVX optimizations
|
||||||
@ -1182,7 +1182,7 @@ ARCH_EXT_LIST='
|
|||||||
fma4
|
fma4
|
||||||
mmi
|
mmi
|
||||||
mmx
|
mmx
|
||||||
mmx2
|
mmxext
|
||||||
neon
|
neon
|
||||||
ppc4xx
|
ppc4xx
|
||||||
sse
|
sse
|
||||||
@ -1459,7 +1459,7 @@ x86_64_suggest="cmov fast_cmov"
|
|||||||
amd3dnow_deps="mmx"
|
amd3dnow_deps="mmx"
|
||||||
amd3dnowext_deps="amd3dnow"
|
amd3dnowext_deps="amd3dnow"
|
||||||
mmx_deps="x86"
|
mmx_deps="x86"
|
||||||
mmx2_deps="mmx"
|
mmxext_deps="mmx"
|
||||||
sse_deps="mmx"
|
sse_deps="mmx"
|
||||||
ssse3_deps="sse"
|
ssse3_deps="sse"
|
||||||
avx_deps="ssse3"
|
avx_deps="ssse3"
|
||||||
@ -3194,9 +3194,9 @@ EOF
|
|||||||
# check whether xmm clobbers are supported
|
# check whether xmm clobbers are supported
|
||||||
check_asm xmm_clobbers '"":::"%xmm0"'
|
check_asm xmm_clobbers '"":::"%xmm0"'
|
||||||
|
|
||||||
# check whether binutils is new enough to compile SSSE3/MMX2
|
# check whether binutils is new enough to compile SSSE3/MMXEXT
|
||||||
enabled ssse3 && check_asm ssse3 '"pabsw %xmm0, %xmm0"'
|
enabled ssse3 && check_asm ssse3 '"pabsw %xmm0, %xmm0"'
|
||||||
enabled mmx2 && check_asm mmx2 '"pmaxub %mm0, %mm1"'
|
enabled mmxext && check_asm mmxext '"pmaxub %mm0, %mm1"'
|
||||||
|
|
||||||
if ! disabled_any asm mmx yasm; then
|
if ! disabled_any asm mmx yasm; then
|
||||||
if check_cmd $yasmexe --version; then
|
if check_cmd $yasmexe --version; then
|
||||||
@ -3748,7 +3748,7 @@ echo "runtime cpu detection ${runtime_cpudetect-no}"
|
|||||||
if enabled x86; then
|
if enabled x86; then
|
||||||
echo "${yasmexe} ${yasm-no}"
|
echo "${yasmexe} ${yasm-no}"
|
||||||
echo "MMX enabled ${mmx-no}"
|
echo "MMX enabled ${mmx-no}"
|
||||||
echo "MMX2 enabled ${mmx2-no}"
|
echo "MMXEXT enabled ${mmxext-no}"
|
||||||
echo "3DNow! enabled ${amd3dnow-no}"
|
echo "3DNow! enabled ${amd3dnow-no}"
|
||||||
echo "3DNow! extended enabled ${amd3dnowext-no}"
|
echo "3DNow! extended enabled ${amd3dnowext-no}"
|
||||||
echo "SSE enabled ${sse-no}"
|
echo "SSE enabled ${sse-no}"
|
||||||
@ -4019,6 +4019,7 @@ cat > $TMPH <<EOF
|
|||||||
#define EXTERN_PREFIX "${extern_prefix}"
|
#define EXTERN_PREFIX "${extern_prefix}"
|
||||||
#define EXTERN_ASM ${extern_prefix}
|
#define EXTERN_ASM ${extern_prefix}
|
||||||
#define SLIBSUF "$SLIBSUF"
|
#define SLIBSUF "$SLIBSUF"
|
||||||
|
#define HAVE_MMX2 HAVE_MMXEXT
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
test -n "$assert_level" &&
|
test -n "$assert_level" &&
|
||||||
|
@ -70,6 +70,11 @@ API changes, most recent first:
|
|||||||
2012-03-26 - a67d9cf - lavfi 2.66.100
|
2012-03-26 - a67d9cf - lavfi 2.66.100
|
||||||
Add avfilter_fill_frame_from_{audio_,}buffer_ref() functions.
|
Add avfilter_fill_frame_from_{audio_,}buffer_ref() functions.
|
||||||
|
|
||||||
|
2012-08-03 - xxxxxxx - lavu 51.37.1 - cpu.h
|
||||||
|
lsws 2.1.1 - swscale.h
|
||||||
|
Rename AV_CPU_FLAG_MMX2 ---> AV_CPU_FLAG_MMXEXT.
|
||||||
|
Rename SWS_CPU_CAPS_MMX2 ---> SWS_CPU_CAPS_MMXEXT.
|
||||||
|
|
||||||
2012-07-xx - xxxxxxx - lavf 54.13.0 - avformat.h
|
2012-07-xx - xxxxxxx - lavf 54.13.0 - avformat.h
|
||||||
Add AVFMT_FLAG_NOBUFFER for low latency use cases.
|
Add AVFMT_FLAG_NOBUFFER for low latency use cases.
|
||||||
|
|
||||||
|
@ -29,6 +29,14 @@
|
|||||||
# define T(x)
|
# define T(x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_THUMB || defined __clang__
|
||||||
|
# define L(x)
|
||||||
|
# define U(x) x
|
||||||
|
#else
|
||||||
|
# define L(x) x
|
||||||
|
# define U(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
#if HAVE_ARMV6 && HAVE_INLINE_ASM
|
#if HAVE_ARMV6 && HAVE_INLINE_ASM
|
||||||
|
|
||||||
#define vp56_rac_get_prob vp56_rac_get_prob_armv6
|
#define vp56_rac_get_prob vp56_rac_get_prob_armv6
|
||||||
@ -42,8 +50,8 @@ static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr)
|
|||||||
__asm__ ("adds %3, %3, %0 \n"
|
__asm__ ("adds %3, %3, %0 \n"
|
||||||
"itt cs \n"
|
"itt cs \n"
|
||||||
"cmpcs %7, %4 \n"
|
"cmpcs %7, %4 \n"
|
||||||
A("ldrcsh %2, [%4], #2 \n")
|
L("ldrcsh %2, [%4], #2 \n")
|
||||||
T("ldrhcs %2, [%4], #2 \n")
|
U("ldrhcs %2, [%4], #2 \n")
|
||||||
"rsb %0, %6, #256 \n"
|
"rsb %0, %6, #256 \n"
|
||||||
"smlabb %0, %5, %6, %0 \n"
|
"smlabb %0, %5, %6, %0 \n"
|
||||||
T("itttt cs \n")
|
T("itttt cs \n")
|
||||||
@ -80,8 +88,8 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr)
|
|||||||
__asm__ ("adds %3, %3, %0 \n"
|
__asm__ ("adds %3, %3, %0 \n"
|
||||||
"itt cs \n"
|
"itt cs \n"
|
||||||
"cmpcs %7, %4 \n"
|
"cmpcs %7, %4 \n"
|
||||||
A("ldrcsh %2, [%4], #2 \n")
|
L("ldrcsh %2, [%4], #2 \n")
|
||||||
T("ldrhcs %2, [%4], #2 \n")
|
U("ldrhcs %2, [%4], #2 \n")
|
||||||
"rsb %0, %6, #256 \n"
|
"rsb %0, %6, #256 \n"
|
||||||
"smlabb %0, %5, %6, %0 \n"
|
"smlabb %0, %5, %6, %0 \n"
|
||||||
T("itttt cs \n")
|
T("itttt cs \n")
|
||||||
|
@ -1226,7 +1226,13 @@ vp8_mc_1 bilin, 8, v
|
|||||||
vp8_mc_1 bilin, 4, h
|
vp8_mc_1 bilin, 4, h
|
||||||
vp8_mc_1 bilin, 4, v
|
vp8_mc_1 bilin, 4, v
|
||||||
|
|
||||||
#define TMPSIZE \size * (8 - 8*(\size > 4) + \ytaps - 1)
|
/* True relational expressions have the value -1 in the GNU assembler,
|
||||||
|
+1 in Apple's. */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
# define TMPSIZE \size * (8 + 8*(\size > 4) + \ytaps - 1)
|
||||||
|
#else
|
||||||
|
# define TMPSIZE \size * (8 - 8*(\size > 4) + \ytaps - 1)
|
||||||
|
#endif
|
||||||
|
|
||||||
.macro vp8_mc_hv name, size, h, v, ytaps
|
.macro vp8_mc_hv name, size, h, v, ytaps
|
||||||
function ff_put_vp8_\name\size\()_\h\v\()_armv6, export=1
|
function ff_put_vp8_\name\size\()_\h\v\()_armv6, export=1
|
||||||
|
@ -87,7 +87,7 @@ static const struct algo fdct_tab[] = {
|
|||||||
|
|
||||||
#if HAVE_MMX && HAVE_INLINE_ASM
|
#if HAVE_MMX && HAVE_INLINE_ASM
|
||||||
{ "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
|
{ "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
|
||||||
{ "MMX2", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMX2 },
|
{ "MMXEXT", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT },
|
||||||
{ "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 },
|
{ "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 },
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -132,7 +132,7 @@ static const struct algo idct_tab[] = {
|
|||||||
#endif
|
#endif
|
||||||
{ "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
|
{ "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
|
||||||
{ "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
|
{ "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
|
||||||
{ "XVID-MMX2", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMX2, 1 },
|
{ "XVID-MMXEXT", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 },
|
||||||
{ "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
|
{ "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
|
||||||
#if ARCH_X86_64 && HAVE_YASM
|
#if ARCH_X86_64 && HAVE_YASM
|
||||||
{ "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 },
|
{ "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 },
|
||||||
|
@ -116,8 +116,8 @@ int main(int argc, char **argv)
|
|||||||
AVCodecContext *ctx;
|
AVCodecContext *ctx;
|
||||||
int c;
|
int c;
|
||||||
DSPContext cctx, mmxctx;
|
DSPContext cctx, mmxctx;
|
||||||
int flags[2] = { AV_CPU_FLAG_MMX, AV_CPU_FLAG_MMX2 };
|
int flags[2] = { AV_CPU_FLAG_MMX, AV_CPU_FLAG_MMXEXT };
|
||||||
int flags_size = HAVE_MMX2 ? 2 : 1;
|
int flags_size = HAVE_MMXEXT ? 2 : 1;
|
||||||
|
|
||||||
if (argc > 1) {
|
if (argc > 1) {
|
||||||
help();
|
help();
|
||||||
|
@ -68,7 +68,7 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
|
|||||||
%define LOOP_ALIGN
|
%define LOOP_ALIGN
|
||||||
INIT_MMX
|
INIT_MMX
|
||||||
AC3_EXPONENT_MIN mmx
|
AC3_EXPONENT_MIN mmx
|
||||||
%if HAVE_MMX2
|
%if HAVE_MMXEXT
|
||||||
%define PMINUB PMINUB_MMXEXT
|
%define PMINUB PMINUB_MMXEXT
|
||||||
%define LOOP_ALIGN ALIGN 16
|
%define LOOP_ALIGN ALIGN 16
|
||||||
AC3_EXPONENT_MIN mmxext
|
AC3_EXPONENT_MIN mmxext
|
||||||
|
@ -65,7 +65,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
|
|||||||
c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
|
c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
|
if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) {
|
||||||
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
|
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
|
||||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
|
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
|
||||||
}
|
}
|
||||||
|
@ -486,7 +486,7 @@ void ff_cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx)
|
|||||||
int mm_flags = av_get_cpu_flags();
|
int mm_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2) ff_cavsdsp_init_mmx2 (c, avctx);
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) ff_cavsdsp_init_mmx2(c, avctx);
|
||||||
if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx);
|
if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx);
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
}
|
}
|
||||||
|
@ -42,39 +42,24 @@ ps_cos_vec: dd 0.500603, 0.505471, 0.515447, 0.531043
|
|||||||
align 32
|
align 32
|
||||||
ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
|
ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
|
||||||
|
|
||||||
%macro BUTTERFLY_SSE 4
|
%macro BUTTERFLY 4
|
||||||
movaps %4, %1
|
subps %4, %1, %2
|
||||||
subps %1, %2
|
addps %2, %2, %1
|
||||||
addps %2, %4
|
mulps %1, %4, %3
|
||||||
mulps %1, %3
|
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro BUTTERFLY_AVX 4
|
%macro BUTTERFLY0 5
|
||||||
vsubps %4, %1, %2
|
%if cpuflag(sse2) && notcpuflag(avx)
|
||||||
vaddps %2, %2, %1
|
|
||||||
vmulps %1, %4, %3
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro BUTTERFLY0_SSE 5
|
|
||||||
movaps %4, %1
|
|
||||||
shufps %1, %1, %5
|
|
||||||
xorps %4, %2
|
|
||||||
addps %1, %4
|
|
||||||
mulps %1, %3
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro BUTTERFLY0_SSE2 5
|
|
||||||
pshufd %4, %1, %5
|
pshufd %4, %1, %5
|
||||||
xorps %1, %2
|
xorps %1, %2
|
||||||
addps %1, %4
|
addps %1, %4
|
||||||
mulps %1, %3
|
mulps %1, %3
|
||||||
%endmacro
|
%else
|
||||||
|
shufps %4, %1, %1, %5
|
||||||
%macro BUTTERFLY0_AVX 5
|
xorps %1, %1, %2
|
||||||
vshufps %4, %1, %1, %5
|
addps %4, %4, %1
|
||||||
vxorps %1, %1, %2
|
mulps %1, %4, %3
|
||||||
vaddps %4, %4, %1
|
%endif
|
||||||
vmulps %1, %4, %3
|
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro BUTTERFLY2 4
|
%macro BUTTERFLY2 4
|
||||||
@ -206,14 +191,11 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
|
|||||||
movss [outq+116], m6
|
movss [outq+116], m6
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%define BUTTERFLY BUTTERFLY_AVX
|
INIT_YMM avx
|
||||||
%define BUTTERFLY0 BUTTERFLY0_AVX
|
|
||||||
|
|
||||||
INIT_YMM
|
|
||||||
SECTION_TEXT
|
SECTION_TEXT
|
||||||
%if HAVE_AVX
|
%if HAVE_AVX
|
||||||
; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
|
; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
|
||||||
cglobal dct32_float_avx, 2,3,8, out, in, tmp
|
cglobal dct32_float, 2,3,8, out, in, tmp
|
||||||
; pass 1
|
; pass 1
|
||||||
vmovaps m4, [inq+0]
|
vmovaps m4, [inq+0]
|
||||||
vinsertf128 m5, m5, [inq+96], 1
|
vinsertf128 m5, m5, [inq+96], 1
|
||||||
@ -286,9 +268,6 @@ INIT_XMM
|
|||||||
RET
|
RET
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%define BUTTERFLY BUTTERFLY_SSE
|
|
||||||
%define BUTTERFLY0 BUTTERFLY0_SSE
|
|
||||||
|
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
%define SPILL SWAP
|
%define SPILL SWAP
|
||||||
%define UNSPILL SWAP
|
%define UNSPILL SWAP
|
||||||
@ -411,10 +390,9 @@ INIT_XMM
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
|
||||||
INIT_XMM
|
|
||||||
%macro DCT32_FUNC 1
|
|
||||||
; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
|
; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
|
||||||
cglobal dct32_float_%1, 2,3,16, out, in, tmp
|
%macro DCT32_FUNC 0
|
||||||
|
cglobal dct32_float, 2, 3, 16, out, in, tmp
|
||||||
; pass 1
|
; pass 1
|
||||||
|
|
||||||
movaps m0, [inq+0]
|
movaps m0, [inq+0]
|
||||||
@ -498,18 +476,16 @@ cglobal dct32_float_%1, 2,3,16, out, in, tmp
|
|||||||
RET
|
RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro LOAD_INV_SSE 2
|
%macro LOAD_INV 2
|
||||||
|
%if cpuflag(sse2)
|
||||||
|
pshufd %1, %2, 0x1b
|
||||||
|
%elif cpuflag(sse)
|
||||||
movaps %1, %2
|
movaps %1, %2
|
||||||
shufps %1, %1, 0x1b
|
shufps %1, %1, 0x1b
|
||||||
|
%endif
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%define LOAD_INV LOAD_INV_SSE
|
INIT_XMM sse
|
||||||
DCT32_FUNC sse
|
DCT32_FUNC
|
||||||
|
INIT_XMM sse2
|
||||||
%macro LOAD_INV_SSE2 2
|
DCT32_FUNC
|
||||||
pshufd %1, %2, 0x1b
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%define LOAD_INV LOAD_INV_SSE2
|
|
||||||
%define BUTTERFLY0 BUTTERFLY0_SSE2
|
|
||||||
DCT32_FUNC sse2
|
|
||||||
|
@ -3171,7 +3171,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
|||||||
c->idct_add = ff_idct_xvid_sse2_add;
|
c->idct_add = ff_idct_xvid_sse2_add;
|
||||||
c->idct = ff_idct_xvid_sse2;
|
c->idct = ff_idct_xvid_sse2;
|
||||||
c->idct_permutation_type = FF_SSE2_IDCT_PERM;
|
c->idct_permutation_type = FF_SSE2_IDCT_PERM;
|
||||||
} else if (mm_flags & AV_CPU_FLAG_MMX2) {
|
} else if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->idct_put = ff_idct_xvid_mmx2_put;
|
c->idct_put = ff_idct_xvid_mmx2_put;
|
||||||
c->idct_add = ff_idct_xvid_mmx2_add;
|
c->idct_add = ff_idct_xvid_mmx2_add;
|
||||||
c->idct = ff_idct_xvid_mmx2;
|
c->idct = ff_idct_xvid_mmx2;
|
||||||
@ -3187,7 +3187,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
|||||||
dsputil_init_mmx(c, avctx, mm_flags);
|
dsputil_init_mmx(c, avctx, mm_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2)
|
if (mm_flags & AV_CPU_FLAG_MMXEXT)
|
||||||
dsputil_init_mmx2(c, avctx, mm_flags);
|
dsputil_init_mmx2(c, avctx, mm_flags);
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW)
|
if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW)
|
||||||
|
@ -388,12 +388,16 @@ cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_to
|
|||||||
RET
|
RET
|
||||||
|
|
||||||
|
|
||||||
%macro ADD_HFYU_LEFT_LOOP 1 ; %1 = is_aligned
|
%macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
|
||||||
add srcq, wq
|
add srcq, wq
|
||||||
add dstq, wq
|
add dstq, wq
|
||||||
neg wq
|
neg wq
|
||||||
%%.loop:
|
%%.loop:
|
||||||
|
%if %2
|
||||||
mova m1, [srcq+wq]
|
mova m1, [srcq+wq]
|
||||||
|
%else
|
||||||
|
movu m1, [srcq+wq]
|
||||||
|
%endif
|
||||||
mova m2, m1
|
mova m2, m1
|
||||||
psllw m1, 8
|
psllw m1, 8
|
||||||
paddb m1, m2
|
paddb m1, m2
|
||||||
@ -435,7 +439,7 @@ cglobal add_hfyu_left_prediction_ssse3, 3,3,7, dst, src, w, left
|
|||||||
mova m3, [pb_zz11zz55zz99zzdd]
|
mova m3, [pb_zz11zz55zz99zzdd]
|
||||||
movd m0, leftm
|
movd m0, leftm
|
||||||
psllq m0, 56
|
psllq m0, 56
|
||||||
ADD_HFYU_LEFT_LOOP 1
|
ADD_HFYU_LEFT_LOOP 1, 1
|
||||||
|
|
||||||
INIT_XMM
|
INIT_XMM
|
||||||
cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left
|
cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left
|
||||||
@ -446,12 +450,14 @@ cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left
|
|||||||
movd m0, leftm
|
movd m0, leftm
|
||||||
pslldq m0, 15
|
pslldq m0, 15
|
||||||
test srcq, 15
|
test srcq, 15
|
||||||
jnz add_hfyu_left_prediction_ssse3.skip_prologue
|
jnz .src_unaligned
|
||||||
test dstq, 15
|
test dstq, 15
|
||||||
jnz .unaligned
|
jnz .dst_unaligned
|
||||||
ADD_HFYU_LEFT_LOOP 1
|
ADD_HFYU_LEFT_LOOP 1, 1
|
||||||
.unaligned:
|
.dst_unaligned:
|
||||||
ADD_HFYU_LEFT_LOOP 0
|
ADD_HFYU_LEFT_LOOP 0, 1
|
||||||
|
.src_unaligned:
|
||||||
|
ADD_HFYU_LEFT_LOOP 0, 0
|
||||||
|
|
||||||
|
|
||||||
; float scalarproduct_float_sse(const float *v1, const float *v2, int len)
|
; float scalarproduct_float_sse(const float *v1, const float *v2, int len)
|
||||||
|
@ -1112,7 +1112,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) {
|
(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) {
|
||||||
if(mm_flags & AV_CPU_FLAG_SSE2){
|
if(mm_flags & AV_CPU_FLAG_SSE2){
|
||||||
c->fdct = ff_fdct_sse2;
|
c->fdct = ff_fdct_sse2;
|
||||||
}else if(mm_flags & AV_CPU_FLAG_MMX2){
|
} else if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->fdct = ff_fdct_mmx2;
|
c->fdct = ff_fdct_mmx2;
|
||||||
}else{
|
}else{
|
||||||
c->fdct = ff_fdct_mmx;
|
c->fdct = ff_fdct_mmx;
|
||||||
@ -1145,8 +1145,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
|
|
||||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
|
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
|
||||||
|
|
||||||
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
|
||||||
c->sum_abs_dctelem= sum_abs_dctelem_mmx2;
|
c->sum_abs_dctelem= sum_abs_dctelem_mmx2;
|
||||||
c->vsad[4]= vsad_intra16_mmx2;
|
c->vsad[4]= vsad_intra16_mmx2;
|
||||||
|
|
||||||
@ -1187,7 +1186,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx;
|
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx;
|
||||||
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx;
|
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx;
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2;
|
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2;
|
||||||
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2;
|
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2;
|
||||||
}
|
}
|
||||||
|
@ -1041,7 +1041,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
|
|||||||
mova [r1+r5*8], m0
|
mova [r1+r5*8], m0
|
||||||
mova [r1+r6*8], m2
|
mova [r1+r6*8], m2
|
||||||
add r4, 2
|
add r4, 2
|
||||||
sub r4, 2
|
sub r3, 2
|
||||||
%else
|
%else
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
movzx r5, word [rrevtab+r4-4]
|
movzx r5, word [rrevtab+r4-4]
|
||||||
|
@ -198,7 +198,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx2;
|
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx2;
|
||||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx2;
|
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx2;
|
||||||
if (chroma_format_idc == 1)
|
if (chroma_format_idc == 1)
|
||||||
@ -308,7 +308,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (bit_depth == 10) {
|
} else if (bit_depth == 10) {
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext;
|
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext;
|
||||||
h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext;
|
h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext;
|
||||||
|
|
||||||
|
@ -218,7 +218,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
int mm_flags = av_get_cpu_flags();
|
int mm_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMX2)
|
if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMXEXT)
|
||||||
c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2;
|
c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2;
|
||||||
|
|
||||||
if (bit_depth == 8) {
|
if (bit_depth == 8) {
|
||||||
@ -236,7 +236,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
if (mm_flags & AV_CPU_FLAG_CMOV)
|
if (mm_flags & AV_CPU_FLAG_CMOV)
|
||||||
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;
|
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2;
|
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2;
|
||||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2;
|
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2;
|
||||||
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2;
|
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2;
|
||||||
@ -304,7 +304,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
}
|
}
|
||||||
} else if (bit_depth == 10) {
|
} else if (bit_depth == 10) {
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX) {
|
if (mm_flags & AV_CPU_FLAG_MMX) {
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmx2;
|
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmx2;
|
||||||
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2;
|
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2;
|
||||||
|
@ -444,7 +444,7 @@ void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->sad[0]= sad16_mmx;
|
c->sad[0]= sad16_mmx;
|
||||||
c->sad[1]= sad8_mmx;
|
c->sad[1]= sad8_mmx;
|
||||||
}
|
}
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->pix_abs[0][0] = sad16_mmx2;
|
c->pix_abs[0][0] = sad16_mmx2;
|
||||||
c->pix_abs[1][0] = sad8_mmx2;
|
c->pix_abs[1][0] = sad8_mmx2;
|
||||||
|
|
||||||
|
@ -595,15 +595,15 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
|
|||||||
#define HAVE_SSSE3 0
|
#define HAVE_SSSE3 0
|
||||||
|
|
||||||
#undef HAVE_SSE2
|
#undef HAVE_SSE2
|
||||||
#undef HAVE_MMX2
|
#undef HAVE_MMXEXT
|
||||||
#define HAVE_SSE2 0
|
#define HAVE_SSE2 0
|
||||||
#define HAVE_MMX2 0
|
#define HAVE_MMXEXT 0
|
||||||
#define RENAME(a) a ## _MMX
|
#define RENAME(a) a ## _MMX
|
||||||
#define RENAMEl(a) a ## _mmx
|
#define RENAMEl(a) a ## _mmx
|
||||||
#include "mpegvideo_mmx_template.c"
|
#include "mpegvideo_mmx_template.c"
|
||||||
|
|
||||||
#undef HAVE_MMX2
|
#undef HAVE_MMXEXT
|
||||||
#define HAVE_MMX2 1
|
#define HAVE_MMXEXT 1
|
||||||
#undef RENAME
|
#undef RENAME
|
||||||
#undef RENAMEl
|
#undef RENAMEl
|
||||||
#define RENAME(a) a ## _MMX2
|
#define RENAME(a) a ## _MMX2
|
||||||
@ -660,7 +660,7 @@ void ff_MPV_common_init_mmx(MpegEncContext *s)
|
|||||||
#endif
|
#endif
|
||||||
if(mm_flags & AV_CPU_FLAG_SSE2){
|
if(mm_flags & AV_CPU_FLAG_SSE2){
|
||||||
s->dct_quantize= dct_quantize_SSE2;
|
s->dct_quantize= dct_quantize_SSE2;
|
||||||
} else if(mm_flags & AV_CPU_FLAG_MMX2){
|
} else if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
s->dct_quantize= dct_quantize_MMX2;
|
s->dct_quantize= dct_quantize_MMX2;
|
||||||
} else {
|
} else {
|
||||||
s->dct_quantize= dct_quantize_MMX;
|
s->dct_quantize= dct_quantize_MMX;
|
||||||
|
@ -48,7 +48,7 @@
|
|||||||
#define MMREG_WIDTH "8"
|
#define MMREG_WIDTH "8"
|
||||||
#define MM "%%mm"
|
#define MM "%%mm"
|
||||||
#define MOVQ "movq"
|
#define MOVQ "movq"
|
||||||
#if HAVE_MMX2
|
#if HAVE_MMXEXT
|
||||||
#define SPREADW(a) "pshufw $0, "a", "a" \n\t"
|
#define SPREADW(a) "pshufw $0, "a", "a" \n\t"
|
||||||
#define PMAXW(a,b) "pmaxsw "a", "b" \n\t"
|
#define PMAXW(a,b) "pmaxsw "a", "b" \n\t"
|
||||||
#define PMAX(a,b) \
|
#define PMAX(a,b) \
|
||||||
|
@ -41,7 +41,7 @@ void ff_pngdsp_init_x86(PNGDSPContext *dsp)
|
|||||||
if (flags & AV_CPU_FLAG_MMX)
|
if (flags & AV_CPU_FLAG_MMX)
|
||||||
dsp->add_bytes_l2 = ff_add_bytes_l2_mmx;
|
dsp->add_bytes_l2 = ff_add_bytes_l2_mmx;
|
||||||
#endif
|
#endif
|
||||||
if (flags & AV_CPU_FLAG_MMX2)
|
if (flags & AV_CPU_FLAG_MMXEXT)
|
||||||
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2;
|
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2;
|
||||||
if (flags & AV_CPU_FLAG_SSE2)
|
if (flags & AV_CPU_FLAG_SSE2)
|
||||||
dsp->add_bytes_l2 = ff_add_bytes_l2_sse2;
|
dsp->add_bytes_l2 = ff_add_bytes_l2_sse2;
|
||||||
|
@ -37,7 +37,7 @@ av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp)
|
|||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX)
|
if (mm_flags & AV_CPU_FLAG_MMX)
|
||||||
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
|
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2;
|
c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2;
|
||||||
c->rv34_idct_add = ff_rv34_idct_add_mmx2;
|
c->rv34_idct_add = ff_rv34_idct_add_mmx2;
|
||||||
}
|
}
|
||||||
|
@ -204,7 +204,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
|
|||||||
QPEL_MC_SET(put_, _mmx)
|
QPEL_MC_SET(put_, _mmx)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2;
|
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2;
|
||||||
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2;
|
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2;
|
||||||
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2;
|
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2;
|
||||||
|
@ -889,7 +889,7 @@ void ff_dwt_init_x86(DWTContext *c)
|
|||||||
c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
|
c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
if(mm_flags & AV_CPU_FLAG_MMX2){
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
|
c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
|
||||||
#if HAVE_7REGS
|
#if HAVE_7REGS
|
||||||
c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
|
c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
|
||||||
|
@ -760,7 +760,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
|
|||||||
dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
|
dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2){
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2;
|
||||||
dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2;
|
||||||
dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2;
|
||||||
@ -810,7 +810,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
|
|||||||
if (mm_flags & AV_CPU_FLAG_MMX) {
|
if (mm_flags & AV_CPU_FLAG_MMX) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
ASSIGN_LF(mmx2);
|
ASSIGN_LF(mmx2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,7 +49,7 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (HAVE_MMX2 && cpuflags & AV_CPU_FLAG_MMX2) {
|
if (HAVE_MMXEXT && cpuflags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->idct_dc_add = ff_vp3_idct_dc_add_mmx2;
|
c->idct_dc_add = ff_vp3_idct_dc_add_mmx2;
|
||||||
|
|
||||||
if (!(flags & CODEC_FLAG_BITEXACT)) {
|
if (!(flags & CODEC_FLAG_BITEXACT)) {
|
||||||
|
@ -350,7 +350,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
|
|||||||
|
|
||||||
/* note that 4-tap width=16 functions are missing because w=16
|
/* note that 4-tap width=16 functions are missing because w=16
|
||||||
* is only used for luma, and luma is always a copy or sixtap. */
|
* is only used for luma, and luma is always a copy or sixtap. */
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
VP8_MC_FUNC(2, 4, mmx2);
|
VP8_MC_FUNC(2, 4, mmx2);
|
||||||
VP8_BILINEAR_MC_FUNC(2, 4, mmx2);
|
VP8_BILINEAR_MC_FUNC(2, 4, mmx2);
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
|
@ -28,7 +28,7 @@
|
|||||||
DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F};
|
DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F};
|
||||||
DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
|
DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
|
||||||
|
|
||||||
#if HAVE_MMX2
|
#if HAVE_MMXEXT
|
||||||
static void gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers)
|
static void gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers)
|
||||||
{
|
{
|
||||||
intptr_t x;
|
intptr_t x;
|
||||||
@ -173,8 +173,8 @@ av_cold void ff_gradfun_init_x86(GradFunContext *gf)
|
|||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
#if HAVE_MMX2
|
#if HAVE_MMXEXT
|
||||||
if (cpu_flags & AV_CPU_FLAG_MMX2)
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
|
||||||
gf->filter_line = gradfun_filter_line_mmx2;
|
gf->filter_line = gradfun_filter_line_mmx2;
|
||||||
#endif
|
#endif
|
||||||
#if HAVE_SSSE3
|
#if HAVE_SSSE3
|
||||||
|
@ -45,7 +45,7 @@ DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x000100010
|
|||||||
#undef COMPILE_TEMPLATE_SSE
|
#undef COMPILE_TEMPLATE_SSE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_MMX2
|
#if HAVE_MMXEXT
|
||||||
#undef RENAME
|
#undef RENAME
|
||||||
#define RENAME(a) a ## _mmx2
|
#define RENAME(a) a ## _mmx2
|
||||||
#include "yadif_template.c"
|
#include "yadif_template.c"
|
||||||
@ -58,8 +58,8 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif)
|
|||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
#if HAVE_MMX2
|
#if HAVE_MMXEXT
|
||||||
if (cpu_flags & AV_CPU_FLAG_MMX2)
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
|
||||||
yadif->filter_line = yadif_filter_line_mmx2;
|
yadif->filter_line = yadif_filter_line_mmx2;
|
||||||
#endif
|
#endif
|
||||||
#if HAVE_SSE
|
#if HAVE_SSE
|
||||||
|
@ -314,7 +314,15 @@ int ff_audio_mix_init(AVAudioResampleContext *avr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* build matrix if the user did not already set one */
|
/* build matrix if the user did not already set one */
|
||||||
if (!avr->am->matrix) {
|
if (avr->am->matrix) {
|
||||||
|
if (avr->am->coeff_type != avr->mix_coeff_type ||
|
||||||
|
avr->am->in_layout != avr->in_channel_layout ||
|
||||||
|
avr->am->out_layout != avr->out_channel_layout) {
|
||||||
|
av_log(avr, AV_LOG_ERROR,
|
||||||
|
"Custom matrix does not match current parameters\n");
|
||||||
|
return AVERROR(EINVAL);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
int i, j;
|
int i, j;
|
||||||
char in_layout_name[128];
|
char in_layout_name[128];
|
||||||
char out_layout_name[128];
|
char out_layout_name[128];
|
||||||
|
@ -294,8 +294,8 @@ int avresample_get_matrix(AVAudioResampleContext *avr, double *matrix,
|
|||||||
in_channels = av_get_channel_layout_nb_channels(avr->in_channel_layout);
|
in_channels = av_get_channel_layout_nb_channels(avr->in_channel_layout);
|
||||||
out_channels = av_get_channel_layout_nb_channels(avr->out_channel_layout);
|
out_channels = av_get_channel_layout_nb_channels(avr->out_channel_layout);
|
||||||
|
|
||||||
if ( in_channels < 0 || in_channels > AVRESAMPLE_MAX_CHANNELS ||
|
if ( in_channels <= 0 || in_channels > AVRESAMPLE_MAX_CHANNELS ||
|
||||||
out_channels < 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) {
|
out_channels <= 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) {
|
||||||
av_log(avr, AV_LOG_ERROR, "Invalid channel layouts\n");
|
av_log(avr, AV_LOG_ERROR, "Invalid channel layouts\n");
|
||||||
return AVERROR(EINVAL);
|
return AVERROR(EINVAL);
|
||||||
}
|
}
|
||||||
@ -332,6 +332,7 @@ int avresample_get_matrix(AVAudioResampleContext *avr, double *matrix,
|
|||||||
av_log(avr, AV_LOG_ERROR, "Invalid mix coeff type\n");
|
av_log(avr, AV_LOG_ERROR, "Invalid mix coeff type\n");
|
||||||
return AVERROR(EINVAL);
|
return AVERROR(EINVAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -343,14 +344,16 @@ int avresample_set_matrix(AVAudioResampleContext *avr, const double *matrix,
|
|||||||
in_channels = av_get_channel_layout_nb_channels(avr->in_channel_layout);
|
in_channels = av_get_channel_layout_nb_channels(avr->in_channel_layout);
|
||||||
out_channels = av_get_channel_layout_nb_channels(avr->out_channel_layout);
|
out_channels = av_get_channel_layout_nb_channels(avr->out_channel_layout);
|
||||||
|
|
||||||
if ( in_channels < 0 || in_channels > AVRESAMPLE_MAX_CHANNELS ||
|
if ( in_channels <= 0 || in_channels > AVRESAMPLE_MAX_CHANNELS ||
|
||||||
out_channels < 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) {
|
out_channels <= 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) {
|
||||||
av_log(avr, AV_LOG_ERROR, "Invalid channel layouts\n");
|
av_log(avr, AV_LOG_ERROR, "Invalid channel layouts\n");
|
||||||
return AVERROR(EINVAL);
|
return AVERROR(EINVAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (avr->am->matrix)
|
if (avr->am->matrix) {
|
||||||
av_freep(avr->am->matrix);
|
av_free(avr->am->matrix[0]);
|
||||||
|
avr->am->matrix = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
#define CONVERT_MATRIX(type, expr) \
|
#define CONVERT_MATRIX(type, expr) \
|
||||||
avr->am->matrix_## type[0] = av_mallocz(out_channels * in_channels * \
|
avr->am->matrix_## type[0] = av_mallocz(out_channels * in_channels * \
|
||||||
@ -386,5 +389,11 @@ int avresample_set_matrix(AVAudioResampleContext *avr, const double *matrix,
|
|||||||
/* TODO: detect situations where we can just swap around pointers
|
/* TODO: detect situations where we can just swap around pointers
|
||||||
instead of doing matrix multiplications with 0.0 and 1.0 */
|
instead of doing matrix multiplications with 0.0 and 1.0 */
|
||||||
|
|
||||||
|
/* set AudioMix params */
|
||||||
|
avr->am->in_layout = avr->in_channel_layout;
|
||||||
|
avr->am->out_layout = avr->out_channel_layout;
|
||||||
|
avr->am->in_channels = in_channels;
|
||||||
|
avr->am->out_channels = out_channels;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -48,9 +48,8 @@ int avresample_open(AVAudioResampleContext *avr)
|
|||||||
avr->resample_channels = FFMIN(avr->in_channels, avr->out_channels);
|
avr->resample_channels = FFMIN(avr->in_channels, avr->out_channels);
|
||||||
avr->downmix_needed = avr->in_channels > avr->out_channels;
|
avr->downmix_needed = avr->in_channels > avr->out_channels;
|
||||||
avr->upmix_needed = avr->out_channels > avr->in_channels ||
|
avr->upmix_needed = avr->out_channels > avr->in_channels ||
|
||||||
avr->am->matrix ||
|
(!avr->downmix_needed && (avr->am->matrix ||
|
||||||
(avr->out_channels == avr->in_channels &&
|
avr->in_channel_layout != avr->out_channel_layout));
|
||||||
avr->in_channel_layout != avr->out_channel_layout);
|
|
||||||
avr->mixing_needed = avr->downmix_needed || avr->upmix_needed;
|
avr->mixing_needed = avr->downmix_needed || avr->upmix_needed;
|
||||||
|
|
||||||
/* set resampling parameters */
|
/* set resampling parameters */
|
||||||
|
@ -49,10 +49,10 @@ void av_set_cpu_flags_mask(int mask)
|
|||||||
|
|
||||||
int av_parse_cpu_flags(const char *s)
|
int av_parse_cpu_flags(const char *s)
|
||||||
{
|
{
|
||||||
#define CPUFLAG_MMX2 (AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_CMOV)
|
#define CPUFLAG_MMXEXT (AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT | AV_CPU_FLAG_CMOV)
|
||||||
#define CPUFLAG_3DNOW (AV_CPU_FLAG_3DNOW | AV_CPU_FLAG_MMX)
|
#define CPUFLAG_3DNOW (AV_CPU_FLAG_3DNOW | AV_CPU_FLAG_MMX)
|
||||||
#define CPUFLAG_3DNOWEXT (AV_CPU_FLAG_3DNOWEXT | CPUFLAG_3DNOW)
|
#define CPUFLAG_3DNOWEXT (AV_CPU_FLAG_3DNOWEXT | CPUFLAG_3DNOW)
|
||||||
#define CPUFLAG_SSE (AV_CPU_FLAG_SSE | CPUFLAG_MMX2)
|
#define CPUFLAG_SSE (AV_CPU_FLAG_SSE | CPUFLAG_MMXEXT)
|
||||||
#define CPUFLAG_SSE2 (AV_CPU_FLAG_SSE2 | CPUFLAG_SSE)
|
#define CPUFLAG_SSE2 (AV_CPU_FLAG_SSE2 | CPUFLAG_SSE)
|
||||||
#define CPUFLAG_SSE2SLOW (AV_CPU_FLAG_SSE2SLOW | CPUFLAG_SSE2)
|
#define CPUFLAG_SSE2SLOW (AV_CPU_FLAG_SSE2SLOW | CPUFLAG_SSE2)
|
||||||
#define CPUFLAG_SSE3 (AV_CPU_FLAG_SSE3 | CPUFLAG_SSE2)
|
#define CPUFLAG_SSE3 (AV_CPU_FLAG_SSE3 | CPUFLAG_SSE2)
|
||||||
@ -69,7 +69,7 @@ int av_parse_cpu_flags(const char *s)
|
|||||||
{ "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ALTIVEC }, .unit = "flags" },
|
{ "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ALTIVEC }, .unit = "flags" },
|
||||||
#elif ARCH_X86
|
#elif ARCH_X86
|
||||||
{ "mmx" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_MMX }, .unit = "flags" },
|
{ "mmx" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_MMX }, .unit = "flags" },
|
||||||
{ "mmx2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMX2 }, .unit = "flags" },
|
{ "mmxext" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMXEXT }, .unit = "flags" },
|
||||||
{ "sse" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE }, .unit = "flags" },
|
{ "sse" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE }, .unit = "flags" },
|
||||||
{ "sse2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2 }, .unit = "flags" },
|
{ "sse2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2 }, .unit = "flags" },
|
||||||
{ "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2SLOW }, .unit = "flags" },
|
{ "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2SLOW }, .unit = "flags" },
|
||||||
@ -174,7 +174,7 @@ static const struct {
|
|||||||
{ AV_CPU_FLAG_ALTIVEC, "altivec" },
|
{ AV_CPU_FLAG_ALTIVEC, "altivec" },
|
||||||
#elif ARCH_X86
|
#elif ARCH_X86
|
||||||
{ AV_CPU_FLAG_MMX, "mmx" },
|
{ AV_CPU_FLAG_MMX, "mmx" },
|
||||||
{ AV_CPU_FLAG_MMX2, "mmx2" },
|
{ AV_CPU_FLAG_MMXEXT, "mmxext" },
|
||||||
{ AV_CPU_FLAG_SSE, "sse" },
|
{ AV_CPU_FLAG_SSE, "sse" },
|
||||||
{ AV_CPU_FLAG_SSE2, "sse2" },
|
{ AV_CPU_FLAG_SSE2, "sse2" },
|
||||||
{ AV_CPU_FLAG_SSE2SLOW, "sse2(slow)" },
|
{ AV_CPU_FLAG_SSE2SLOW, "sse2(slow)" },
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
|
|
||||||
/* lower 16 bits - CPU features */
|
/* lower 16 bits - CPU features */
|
||||||
#define AV_CPU_FLAG_MMX 0x0001 ///< standard MMX
|
#define AV_CPU_FLAG_MMX 0x0001 ///< standard MMX
|
||||||
|
#define AV_CPU_FLAG_MMXEXT 0x0002 ///< SSE integer functions or AMD MMX ext
|
||||||
#define AV_CPU_FLAG_MMX2 0x0002 ///< SSE integer functions or AMD MMX ext
|
#define AV_CPU_FLAG_MMX2 0x0002 ///< SSE integer functions or AMD MMX ext
|
||||||
#define AV_CPU_FLAG_3DNOW 0x0004 ///< AMD 3DNOW
|
#define AV_CPU_FLAG_3DNOW 0x0004 ///< AMD 3DNOW
|
||||||
#define AV_CPU_FLAG_SSE 0x0008 ///< SSE functions
|
#define AV_CPU_FLAG_SSE 0x0008 ///< SSE functions
|
||||||
|
@ -33,6 +33,7 @@ unsigned avutil_version(void)
|
|||||||
av_assert0(AVMEDIA_TYPE_ATTACHMENT == 4);
|
av_assert0(AVMEDIA_TYPE_ATTACHMENT == 4);
|
||||||
av_assert0(AV_PICTURE_TYPE_BI == 7);
|
av_assert0(AV_PICTURE_TYPE_BI == 7);
|
||||||
av_assert0(LIBAVUTIL_VERSION_MICRO >= 100);
|
av_assert0(LIBAVUTIL_VERSION_MICRO >= 100);
|
||||||
|
av_assert0(HAVE_MMX2 == HAVE_MMXEXT);
|
||||||
|
|
||||||
return LIBAVUTIL_VERSION_INT;
|
return LIBAVUTIL_VERSION_INT;
|
||||||
}
|
}
|
||||||
|
@ -40,7 +40,7 @@
|
|||||||
|
|
||||||
#define LIBAVUTIL_VERSION_MAJOR 51
|
#define LIBAVUTIL_VERSION_MAJOR 51
|
||||||
#define LIBAVUTIL_VERSION_MINOR 66
|
#define LIBAVUTIL_VERSION_MINOR 66
|
||||||
#define LIBAVUTIL_VERSION_MICRO 100
|
#define LIBAVUTIL_VERSION_MICRO 101
|
||||||
|
|
||||||
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
|
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
|
||||||
LIBAVUTIL_VERSION_MINOR, \
|
LIBAVUTIL_VERSION_MINOR, \
|
||||||
|
@ -122,7 +122,7 @@ int ff_get_cpu_flags_x86(void)
|
|||||||
if (std_caps & (1 << 23))
|
if (std_caps & (1 << 23))
|
||||||
rval |= AV_CPU_FLAG_MMX;
|
rval |= AV_CPU_FLAG_MMX;
|
||||||
if (std_caps & (1 << 25))
|
if (std_caps & (1 << 25))
|
||||||
rval |= AV_CPU_FLAG_MMX2;
|
rval |= AV_CPU_FLAG_MMXEXT;
|
||||||
#if HAVE_SSE
|
#if HAVE_SSE
|
||||||
if (std_caps & (1 << 25))
|
if (std_caps & (1 << 25))
|
||||||
rval |= AV_CPU_FLAG_SSE;
|
rval |= AV_CPU_FLAG_SSE;
|
||||||
@ -159,7 +159,7 @@ int ff_get_cpu_flags_x86(void)
|
|||||||
if (ext_caps & (1 << 23))
|
if (ext_caps & (1 << 23))
|
||||||
rval |= AV_CPU_FLAG_MMX;
|
rval |= AV_CPU_FLAG_MMX;
|
||||||
if (ext_caps & (1 << 22))
|
if (ext_caps & (1 << 22))
|
||||||
rval |= AV_CPU_FLAG_MMX2;
|
rval |= AV_CPU_FLAG_MMXEXT;
|
||||||
|
|
||||||
/* Allow for selectively disabling SSE2 functions on AMD processors
|
/* Allow for selectively disabling SSE2 functions on AMD processors
|
||||||
with SSE2 support but not SSE4a. This includes Athlon64, some
|
with SSE2 support but not SSE4a. This includes Athlon64, some
|
||||||
|
@ -663,8 +663,8 @@ static int swScale(SwsContext *c, const uint8_t *src[],
|
|||||||
if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
|
if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
|
||||||
fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255);
|
fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255);
|
||||||
|
|
||||||
#if HAVE_MMX2 && HAVE_INLINE_ASM
|
#if HAVE_MMXEXT && HAVE_INLINE_ASM
|
||||||
if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
|
if (av_get_cpu_flags() & AV_CPU_FLAG_MMXEXT)
|
||||||
__asm__ volatile ("sfence" ::: "memory");
|
__asm__ volatile ("sfence" ::: "memory");
|
||||||
#endif
|
#endif
|
||||||
emms_c();
|
emms_c();
|
||||||
|
@ -82,7 +82,10 @@ const char *swscale_license(void);
|
|||||||
* are only provided for API compatibility.
|
* are only provided for API compatibility.
|
||||||
*/
|
*/
|
||||||
#define SWS_CPU_CAPS_MMX 0x80000000
|
#define SWS_CPU_CAPS_MMX 0x80000000
|
||||||
|
#define SWS_CPU_CAPS_MMXEXT 0x20000000
|
||||||
|
#if LIBSWSCALE_VERSION_MAJOR < 3
|
||||||
#define SWS_CPU_CAPS_MMX2 0x20000000
|
#define SWS_CPU_CAPS_MMX2 0x20000000
|
||||||
|
#endif
|
||||||
#define SWS_CPU_CAPS_3DNOW 0x40000000
|
#define SWS_CPU_CAPS_3DNOW 0x40000000
|
||||||
#define SWS_CPU_CAPS_ALTIVEC 0x10000000
|
#define SWS_CPU_CAPS_ALTIVEC 0x10000000
|
||||||
#define SWS_CPU_CAPS_BFIN 0x01000000
|
#define SWS_CPU_CAPS_BFIN 0x01000000
|
||||||
|
@ -599,7 +599,7 @@ fail:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_MMX2 && HAVE_INLINE_ASM
|
#if HAVE_MMXEXT && HAVE_INLINE_ASM
|
||||||
static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode,
|
static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode,
|
||||||
int16_t *filter, int32_t *filterPos, int numSplits)
|
int16_t *filter, int32_t *filterPos, int numSplits)
|
||||||
{
|
{
|
||||||
@ -762,7 +762,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode,
|
|||||||
|
|
||||||
return fragmentPos + 1;
|
return fragmentPos + 1;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_MMX2 && HAVE_INLINE_ASM */
|
#endif /* HAVE_MMXEXT && HAVE_INLINE_ASM */
|
||||||
|
|
||||||
static void getSubSampleFactors(int *h, int *v, enum PixelFormat format)
|
static void getSubSampleFactors(int *h, int *v, enum PixelFormat format)
|
||||||
{
|
{
|
||||||
@ -1024,7 +1024,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
|
|||||||
c->srcBpc = 16;
|
c->srcBpc = 16;
|
||||||
if (c->dstBpc == 16)
|
if (c->dstBpc == 16)
|
||||||
dst_stride <<= 1;
|
dst_stride <<= 1;
|
||||||
if (HAVE_MMX2 && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMX2 &&
|
if (HAVE_MMXEXT && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMXEXT &&
|
||||||
c->srcBpc == 8 && c->dstBpc <= 14) {
|
c->srcBpc == 8 && c->dstBpc <= 14) {
|
||||||
c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 &&
|
c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 &&
|
||||||
(srcW & 15) == 0) ? 1 : 0;
|
(srcW & 15) == 0) ? 1 : 0;
|
||||||
@ -1063,7 +1063,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
|
|||||||
|
|
||||||
/* precalculate horizontal scaler filter coefficients */
|
/* precalculate horizontal scaler filter coefficients */
|
||||||
{
|
{
|
||||||
#if HAVE_MMX2 && HAVE_INLINE_ASM
|
#if HAVE_MMXEXT && HAVE_INLINE_ASM
|
||||||
// can't downscale !!!
|
// can't downscale !!!
|
||||||
if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) {
|
if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) {
|
||||||
c->lumMmx2FilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL,
|
c->lumMmx2FilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL,
|
||||||
@ -1107,7 +1107,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
|
|||||||
mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ);
|
mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ);
|
||||||
#endif
|
#endif
|
||||||
} else
|
} else
|
||||||
#endif /* HAVE_MMX2 && HAVE_INLINE_ASM */
|
#endif /* HAVE_MMXEXT && HAVE_INLINE_ASM */
|
||||||
{
|
{
|
||||||
const int filterAlign =
|
const int filterAlign =
|
||||||
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 :
|
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 :
|
||||||
@ -1273,7 +1273,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
|
|||||||
#endif
|
#endif
|
||||||
av_get_pix_fmt_name(dstFormat));
|
av_get_pix_fmt_name(dstFormat));
|
||||||
|
|
||||||
if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2)
|
if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT)
|
||||||
av_log(c, AV_LOG_INFO, "using MMX2\n");
|
av_log(c, AV_LOG_INFO, "using MMX2\n");
|
||||||
else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW)
|
else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW)
|
||||||
av_log(c, AV_LOG_INFO, "using 3DNOW\n");
|
av_log(c, AV_LOG_INFO, "using 3DNOW\n");
|
||||||
|
@ -28,7 +28,7 @@
|
|||||||
|
|
||||||
#define LIBSWSCALE_VERSION_MAJOR 2
|
#define LIBSWSCALE_VERSION_MAJOR 2
|
||||||
#define LIBSWSCALE_VERSION_MINOR 1
|
#define LIBSWSCALE_VERSION_MINOR 1
|
||||||
#define LIBSWSCALE_VERSION_MICRO 100
|
#define LIBSWSCALE_VERSION_MICRO 101
|
||||||
|
|
||||||
#define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
|
#define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
|
||||||
LIBSWSCALE_VERSION_MINOR, \
|
LIBSWSCALE_VERSION_MINOR, \
|
||||||
|
@ -88,7 +88,7 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL;
|
|||||||
|
|
||||||
//Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one.
|
//Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one.
|
||||||
|
|
||||||
#define COMPILE_TEMPLATE_MMX2 0
|
#define COMPILE_TEMPLATE_MMXEXT 0
|
||||||
#define COMPILE_TEMPLATE_AMD3DNOW 0
|
#define COMPILE_TEMPLATE_AMD3DNOW 0
|
||||||
#define COMPILE_TEMPLATE_SSE2 0
|
#define COMPILE_TEMPLATE_SSE2 0
|
||||||
|
|
||||||
@ -99,8 +99,8 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL;
|
|||||||
|
|
||||||
//MMX2 versions
|
//MMX2 versions
|
||||||
#undef RENAME
|
#undef RENAME
|
||||||
#undef COMPILE_TEMPLATE_MMX2
|
#undef COMPILE_TEMPLATE_MMXEXT
|
||||||
#define COMPILE_TEMPLATE_MMX2 1
|
#define COMPILE_TEMPLATE_MMXEXT 1
|
||||||
#define RENAME(a) a ## _MMX2
|
#define RENAME(a) a ## _MMX2
|
||||||
#include "rgb2rgb_template.c"
|
#include "rgb2rgb_template.c"
|
||||||
|
|
||||||
@ -113,10 +113,10 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL;
|
|||||||
|
|
||||||
//3DNOW versions
|
//3DNOW versions
|
||||||
#undef RENAME
|
#undef RENAME
|
||||||
#undef COMPILE_TEMPLATE_MMX2
|
#undef COMPILE_TEMPLATE_MMXEXT
|
||||||
#undef COMPILE_TEMPLATE_SSE2
|
#undef COMPILE_TEMPLATE_SSE2
|
||||||
#undef COMPILE_TEMPLATE_AMD3DNOW
|
#undef COMPILE_TEMPLATE_AMD3DNOW
|
||||||
#define COMPILE_TEMPLATE_MMX2 0
|
#define COMPILE_TEMPLATE_MMXEXT 0
|
||||||
#define COMPILE_TEMPLATE_SSE2 0
|
#define COMPILE_TEMPLATE_SSE2 0
|
||||||
#define COMPILE_TEMPLATE_AMD3DNOW 1
|
#define COMPILE_TEMPLATE_AMD3DNOW 1
|
||||||
#define RENAME(a) a ## _3DNOW
|
#define RENAME(a) a ## _3DNOW
|
||||||
@ -140,7 +140,7 @@ av_cold void rgb2rgb_init_x86(void)
|
|||||||
rgb2rgb_init_MMX();
|
rgb2rgb_init_MMX();
|
||||||
if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW)
|
if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW)
|
||||||
rgb2rgb_init_3DNOW();
|
rgb2rgb_init_3DNOW();
|
||||||
if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2)
|
if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT)
|
||||||
rgb2rgb_init_MMX2();
|
rgb2rgb_init_MMX2();
|
||||||
if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
|
if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
|
||||||
rgb2rgb_init_SSE2();
|
rgb2rgb_init_SSE2();
|
||||||
|
@ -35,7 +35,7 @@
|
|||||||
#if COMPILE_TEMPLATE_AMD3DNOW
|
#if COMPILE_TEMPLATE_AMD3DNOW
|
||||||
#define PREFETCH "prefetch"
|
#define PREFETCH "prefetch"
|
||||||
#define PAVGB "pavgusb"
|
#define PAVGB "pavgusb"
|
||||||
#elif COMPILE_TEMPLATE_MMX2
|
#elif COMPILE_TEMPLATE_MMXEXT
|
||||||
#define PREFETCH "prefetchnta"
|
#define PREFETCH "prefetchnta"
|
||||||
#define PAVGB "pavgb"
|
#define PAVGB "pavgb"
|
||||||
#else
|
#else
|
||||||
@ -49,7 +49,7 @@
|
|||||||
#define EMMS "emms"
|
#define EMMS "emms"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if COMPILE_TEMPLATE_MMX2
|
#if COMPILE_TEMPLATE_MMXEXT
|
||||||
#define MOVNTQ "movntq"
|
#define MOVNTQ "movntq"
|
||||||
#define SFENCE "sfence"
|
#define SFENCE "sfence"
|
||||||
#else
|
#else
|
||||||
@ -1136,7 +1136,7 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst,
|
|||||||
PREFETCH" 32(%1, %0) \n\t"
|
PREFETCH" 32(%1, %0) \n\t"
|
||||||
"movq (%1, %0), %%mm0 \n\t"
|
"movq (%1, %0), %%mm0 \n\t"
|
||||||
"movq 8(%1, %0), %%mm1 \n\t"
|
"movq 8(%1, %0), %%mm1 \n\t"
|
||||||
# if COMPILE_TEMPLATE_MMX2
|
# if COMPILE_TEMPLATE_MMXEXT
|
||||||
"pshufw $177, %%mm0, %%mm3 \n\t"
|
"pshufw $177, %%mm0, %%mm3 \n\t"
|
||||||
"pshufw $177, %%mm1, %%mm5 \n\t"
|
"pshufw $177, %%mm1, %%mm5 \n\t"
|
||||||
"pand %%mm7, %%mm0 \n\t"
|
"pand %%mm7, %%mm0 \n\t"
|
||||||
@ -1500,7 +1500,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
|
|||||||
}
|
}
|
||||||
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
|
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
|
||||||
|
|
||||||
#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
|
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
|
||||||
static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
|
static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
|
||||||
{
|
{
|
||||||
int x,y;
|
int x,y;
|
||||||
@ -1590,7 +1590,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
|
|||||||
SFENCE" \n\t"
|
SFENCE" \n\t"
|
||||||
:::"memory");
|
:::"memory");
|
||||||
}
|
}
|
||||||
#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */
|
#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */
|
||||||
|
|
||||||
#if !COMPILE_TEMPLATE_AMD3DNOW
|
#if !COMPILE_TEMPLATE_AMD3DNOW
|
||||||
/**
|
/**
|
||||||
@ -1798,7 +1798,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
|
|||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
PREFETCH" 64(%0, %%"REG_d") \n\t"
|
PREFETCH" 64(%0, %%"REG_d") \n\t"
|
||||||
PREFETCH" 64(%1, %%"REG_d") \n\t"
|
PREFETCH" 64(%1, %%"REG_d") \n\t"
|
||||||
#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
|
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
|
||||||
"movq (%0, %%"REG_d"), %%mm0 \n\t"
|
"movq (%0, %%"REG_d"), %%mm0 \n\t"
|
||||||
"movq (%1, %%"REG_d"), %%mm1 \n\t"
|
"movq (%1, %%"REG_d"), %%mm1 \n\t"
|
||||||
"movq 6(%0, %%"REG_d"), %%mm2 \n\t"
|
"movq 6(%0, %%"REG_d"), %%mm2 \n\t"
|
||||||
@ -1859,7 +1859,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
|
|||||||
"packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
|
"packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
|
||||||
"psraw $7, %%mm0 \n\t"
|
"psraw $7, %%mm0 \n\t"
|
||||||
|
|
||||||
#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
|
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
|
||||||
"movq 12(%0, %%"REG_d"), %%mm4 \n\t"
|
"movq 12(%0, %%"REG_d"), %%mm4 \n\t"
|
||||||
"movq 12(%1, %%"REG_d"), %%mm1 \n\t"
|
"movq 12(%1, %%"REG_d"), %%mm1 \n\t"
|
||||||
"movq 18(%0, %%"REG_d"), %%mm2 \n\t"
|
"movq 18(%0, %%"REG_d"), %%mm2 \n\t"
|
||||||
@ -2580,9 +2580,9 @@ static inline void RENAME(rgb2rgb_init)(void)
|
|||||||
yuyvtoyuv422 = RENAME(yuyvtoyuv422);
|
yuyvtoyuv422 = RENAME(yuyvtoyuv422);
|
||||||
#endif /* !COMPILE_TEMPLATE_SSE2 */
|
#endif /* !COMPILE_TEMPLATE_SSE2 */
|
||||||
|
|
||||||
#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
|
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
|
||||||
planar2x = RENAME(planar2x);
|
planar2x = RENAME(planar2x);
|
||||||
#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */
|
#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */
|
||||||
rgb24toyv12 = RENAME(rgb24toyv12);
|
rgb24toyv12 = RENAME(rgb24toyv12);
|
||||||
|
|
||||||
yuyvtoyuv420 = RENAME(yuyvtoyuv420);
|
yuyvtoyuv420 = RENAME(yuyvtoyuv420);
|
||||||
|
@ -74,16 +74,16 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
|
|||||||
//MMX versions
|
//MMX versions
|
||||||
#if HAVE_MMX
|
#if HAVE_MMX
|
||||||
#undef RENAME
|
#undef RENAME
|
||||||
#define COMPILE_TEMPLATE_MMX2 0
|
#define COMPILE_TEMPLATE_MMXEXT 0
|
||||||
#define RENAME(a) a ## _MMX
|
#define RENAME(a) a ## _MMX
|
||||||
#include "swscale_template.c"
|
#include "swscale_template.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//MMX2 versions
|
//MMX2 versions
|
||||||
#if HAVE_MMX2
|
#if HAVE_MMXEXT
|
||||||
#undef RENAME
|
#undef RENAME
|
||||||
#undef COMPILE_TEMPLATE_MMX2
|
#undef COMPILE_TEMPLATE_MMXEXT
|
||||||
#define COMPILE_TEMPLATE_MMX2 1
|
#define COMPILE_TEMPLATE_MMXEXT 1
|
||||||
#define RENAME(a) a ## _MMX2
|
#define RENAME(a) a ## _MMX2
|
||||||
#include "swscale_template.c"
|
#include "swscale_template.c"
|
||||||
#endif
|
#endif
|
||||||
@ -375,8 +375,8 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c)
|
|||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
if (cpu_flags & AV_CPU_FLAG_MMX)
|
if (cpu_flags & AV_CPU_FLAG_MMX)
|
||||||
sws_init_swScale_MMX(c);
|
sws_init_swScale_MMX(c);
|
||||||
#if HAVE_MMX2
|
#if HAVE_MMXEXT
|
||||||
if (cpu_flags & AV_CPU_FLAG_MMX2)
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
|
||||||
sws_init_swScale_MMX2(c);
|
sws_init_swScale_MMX2(c);
|
||||||
if (cpu_flags & AV_CPU_FLAG_SSE3){
|
if (cpu_flags & AV_CPU_FLAG_SSE3){
|
||||||
if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
|
if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
|
||||||
@ -439,7 +439,7 @@ switch(c->dstBpc){ \
|
|||||||
if (cpu_flags & AV_CPU_FLAG_MMX) {
|
if (cpu_flags & AV_CPU_FLAG_MMX) {
|
||||||
ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
|
ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
|
||||||
ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
|
ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
|
||||||
ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2);
|
ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMXEXT);
|
||||||
|
|
||||||
switch (c->srcFormat) {
|
switch (c->srcFormat) {
|
||||||
case PIX_FMT_Y400A:
|
case PIX_FMT_Y400A:
|
||||||
@ -471,7 +471,7 @@ switch(c->dstBpc){ \
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (cpu_flags & AV_CPU_FLAG_MMX2) {
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1);
|
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -23,13 +23,13 @@
|
|||||||
#undef MOVNTQ2
|
#undef MOVNTQ2
|
||||||
#undef PREFETCH
|
#undef PREFETCH
|
||||||
|
|
||||||
#if COMPILE_TEMPLATE_MMX2
|
#if COMPILE_TEMPLATE_MMXEXT
|
||||||
#define PREFETCH "prefetchnta"
|
#define PREFETCH "prefetchnta"
|
||||||
#else
|
#else
|
||||||
#define PREFETCH " # nop"
|
#define PREFETCH " # nop"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if COMPILE_TEMPLATE_MMX2
|
#if COMPILE_TEMPLATE_MMXEXT
|
||||||
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
|
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
|
||||||
#define MOVNTQ2 "movntq "
|
#define MOVNTQ2 "movntq "
|
||||||
#else
|
#else
|
||||||
@ -38,7 +38,7 @@
|
|||||||
#endif
|
#endif
|
||||||
#define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
|
#define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
|
||||||
|
|
||||||
#if !COMPILE_TEMPLATE_MMX2
|
#if !COMPILE_TEMPLATE_MMXEXT
|
||||||
static av_always_inline void
|
static av_always_inline void
|
||||||
dither_8to16(const uint8_t *srcDither, int rot)
|
dither_8to16(const uint8_t *srcDither, int rot)
|
||||||
{
|
{
|
||||||
@ -641,7 +641,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
|
|||||||
"cmp "#dstw", "#index" \n\t"\
|
"cmp "#dstw", "#index" \n\t"\
|
||||||
" jb 1b \n\t"
|
" jb 1b \n\t"
|
||||||
|
|
||||||
#if COMPILE_TEMPLATE_MMX2
|
#if COMPILE_TEMPLATE_MMXEXT
|
||||||
#undef WRITEBGR24
|
#undef WRITEBGR24
|
||||||
#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
|
#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index)
|
||||||
#else
|
#else
|
||||||
@ -1445,7 +1445,7 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if COMPILE_TEMPLATE_MMX2
|
#if COMPILE_TEMPLATE_MMXEXT
|
||||||
static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
|
static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
|
||||||
int dstWidth, const uint8_t *src,
|
int dstWidth, const uint8_t *src,
|
||||||
int srcW, int xInc)
|
int srcW, int xInc)
|
||||||
@ -1627,7 +1627,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
|
|||||||
dst2[i] = src2[srcW-1]*128;
|
dst2[i] = src2[srcW-1]*128;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* COMPILE_TEMPLATE_MMX2 */
|
#endif /* COMPILE_TEMPLATE_MMXEXT */
|
||||||
|
|
||||||
static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
|
static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
|
||||||
{
|
{
|
||||||
@ -1691,17 +1691,17 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
|
|||||||
|
|
||||||
if (c->srcBpc == 8 && c->dstBpc <= 14) {
|
if (c->srcBpc == 8 && c->dstBpc <= 14) {
|
||||||
// Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
|
// Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
|
||||||
#if COMPILE_TEMPLATE_MMX2
|
#if COMPILE_TEMPLATE_MMXEXT
|
||||||
if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
|
if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
|
||||||
{
|
{
|
||||||
c->hyscale_fast = RENAME(hyscale_fast);
|
c->hyscale_fast = RENAME(hyscale_fast);
|
||||||
c->hcscale_fast = RENAME(hcscale_fast);
|
c->hcscale_fast = RENAME(hcscale_fast);
|
||||||
} else {
|
} else {
|
||||||
#endif /* COMPILE_TEMPLATE_MMX2 */
|
#endif /* COMPILE_TEMPLATE_MMXEXT */
|
||||||
c->hyscale_fast = NULL;
|
c->hyscale_fast = NULL;
|
||||||
c->hcscale_fast = NULL;
|
c->hcscale_fast = NULL;
|
||||||
#if COMPILE_TEMPLATE_MMX2
|
#if COMPILE_TEMPLATE_MMXEXT
|
||||||
}
|
}
|
||||||
#endif /* COMPILE_TEMPLATE_MMX2 */
|
#endif /* COMPILE_TEMPLATE_MMXEXT */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -52,20 +52,20 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
|
|||||||
//MMX versions
|
//MMX versions
|
||||||
#if HAVE_MMX
|
#if HAVE_MMX
|
||||||
#undef RENAME
|
#undef RENAME
|
||||||
#undef COMPILE_TEMPLATE_MMX2
|
#undef COMPILE_TEMPLATE_MMXEXT
|
||||||
#define COMPILE_TEMPLATE_MMX2 0
|
#define COMPILE_TEMPLATE_MMXEXT 0
|
||||||
#define RENAME(a) a ## _MMX
|
#define RENAME(a) a ## _MMX
|
||||||
#include "yuv2rgb_template.c"
|
#include "yuv2rgb_template.c"
|
||||||
#endif /* HAVE_MMX */
|
#endif /* HAVE_MMX */
|
||||||
|
|
||||||
//MMX2 versions
|
//MMX2 versions
|
||||||
#if HAVE_MMX2
|
#if HAVE_MMXEXT
|
||||||
#undef RENAME
|
#undef RENAME
|
||||||
#undef COMPILE_TEMPLATE_MMX2
|
#undef COMPILE_TEMPLATE_MMXEXT
|
||||||
#define COMPILE_TEMPLATE_MMX2 1
|
#define COMPILE_TEMPLATE_MMXEXT 1
|
||||||
#define RENAME(a) a ## _MMX2
|
#define RENAME(a) a ## _MMX2
|
||||||
#include "yuv2rgb_template.c"
|
#include "yuv2rgb_template.c"
|
||||||
#endif /* HAVE_MMX2 */
|
#endif /* HAVE_MMXEXT */
|
||||||
|
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
|
||||||
@ -74,8 +74,8 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
|
|||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
#if HAVE_MMX2
|
#if HAVE_MMXEXT
|
||||||
if (cpu_flags & AV_CPU_FLAG_MMX2) {
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
switch (c->dstFormat) {
|
switch (c->dstFormat) {
|
||||||
case PIX_FMT_RGB24: return yuv420_rgb24_MMX2;
|
case PIX_FMT_RGB24: return yuv420_rgb24_MMX2;
|
||||||
case PIX_FMT_BGR24: return yuv420_bgr24_MMX2;
|
case PIX_FMT_BGR24: return yuv420_bgr24_MMX2;
|
||||||
|
@ -25,7 +25,7 @@
|
|||||||
#undef EMMS
|
#undef EMMS
|
||||||
#undef SFENCE
|
#undef SFENCE
|
||||||
|
|
||||||
#if COMPILE_TEMPLATE_MMX2
|
#if COMPILE_TEMPLATE_MMXEXT
|
||||||
#define MOVNTQ "movntq"
|
#define MOVNTQ "movntq"
|
||||||
#define SFENCE "sfence"
|
#define SFENCE "sfence"
|
||||||
#else
|
#else
|
||||||
@ -181,7 +181,7 @@
|
|||||||
"paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \
|
"paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \
|
||||||
"paddusb "RED_DITHER"(%4), %%mm1\n\t" \
|
"paddusb "RED_DITHER"(%4), %%mm1\n\t" \
|
||||||
|
|
||||||
#if !COMPILE_TEMPLATE_MMX2
|
#if !COMPILE_TEMPLATE_MMXEXT
|
||||||
static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[],
|
static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[],
|
||||||
int srcStride[],
|
int srcStride[],
|
||||||
int srcSliceY, int srcSliceH,
|
int srcSliceY, int srcSliceH,
|
||||||
@ -237,7 +237,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
|
|||||||
YUV2RGB_OPERANDS
|
YUV2RGB_OPERANDS
|
||||||
YUV2RGB_ENDFUNC
|
YUV2RGB_ENDFUNC
|
||||||
}
|
}
|
||||||
#endif /* !COMPILE_TEMPLATE_MMX2 */
|
#endif /* !COMPILE_TEMPLATE_MMXEXT */
|
||||||
|
|
||||||
#define RGB_PACK24(blue, red)\
|
#define RGB_PACK24(blue, red)\
|
||||||
"packuswb %%mm3, %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\
|
"packuswb %%mm3, %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\
|
||||||
@ -254,7 +254,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
|
|||||||
"punpckhwd %%mm6, %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\
|
"punpckhwd %%mm6, %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\
|
||||||
RGB_PACK24_B
|
RGB_PACK24_B
|
||||||
|
|
||||||
#if COMPILE_TEMPLATE_MMX2
|
#if COMPILE_TEMPLATE_MMXEXT
|
||||||
DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1};
|
DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1};
|
||||||
DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0};
|
DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0};
|
||||||
DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0};
|
DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0};
|
||||||
@ -361,7 +361,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
|
|||||||
MOVNTQ " %%mm5, 16(%1)\n\t" \
|
MOVNTQ " %%mm5, 16(%1)\n\t" \
|
||||||
MOVNTQ " %%mm"alpha", 24(%1)\n\t" \
|
MOVNTQ " %%mm"alpha", 24(%1)\n\t" \
|
||||||
|
|
||||||
#if !COMPILE_TEMPLATE_MMX2
|
#if !COMPILE_TEMPLATE_MMXEXT
|
||||||
static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
|
static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
|
||||||
int srcStride[],
|
int srcStride[],
|
||||||
int srcSliceY, int srcSliceH,
|
int srcSliceY, int srcSliceH,
|
||||||
@ -448,4 +448,4 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* !COMPILE_TEMPLATE_MMX2 */
|
#endif /* !COMPILE_TEMPLATE_MMXEXT */
|
||||||
|
@ -1,3 +1,18 @@
|
|||||||
|
FATE_LAGARITH += fate-lagarith-rgb24
|
||||||
|
fate-lagarith-rgb24: CMD = framecrc -i $(SAMPLES)/lagarith/lag-rgb24.avi
|
||||||
|
|
||||||
|
FATE_LAGARITH += fate-lagarith-rgb32
|
||||||
|
fate-lagarith-rgb32: CMD = framecrc -i $(SAMPLES)/lagarith/lag-rgb32.avi -pix_fmt bgra
|
||||||
|
|
||||||
|
FATE_LAGARITH += fate-lagarith-yuy2
|
||||||
|
fate-lagarith-yuy2: CMD = framecrc -i $(SAMPLES)/lagarith/lag-yuy2.avi
|
||||||
|
|
||||||
|
FATE_LAGARITH += fate-lagarith-yv12
|
||||||
|
fate-lagarith-yv12: CMD = framecrc -i $(SAMPLES)/lagarith/lag-yv12.avi
|
||||||
|
|
||||||
|
FATE_SAMPLES_AVCONV += $(FATE_LAGARITH)
|
||||||
|
fate-lagarith: $(FATE_LAGARITH)
|
||||||
|
|
||||||
FATE_LOCO += fate-loco-rgb
|
FATE_LOCO += fate-loco-rgb
|
||||||
fate-loco-rgb: CMD = framecrc -i $(SAMPLES)/loco/pig-loco-rgb.avi
|
fate-loco-rgb: CMD = framecrc -i $(SAMPLES)/loco/pig-loco-rgb.avi
|
||||||
|
|
||||||
|
5
tests/ref/fate/lagarith-rgb24
Normal file
5
tests/ref/fate/lagarith-rgb24
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
#tb 0: 100/2997
|
||||||
|
0, 0, 0, 1, 368640, 0x26f74db2
|
||||||
|
0, 1, 1, 1, 368640, 0x63b29ea4
|
||||||
|
0, 2, 2, 1, 368640, 0x19467f03
|
||||||
|
0, 3, 3, 1, 368640, 0x5fdc3575
|
26
tests/ref/fate/lagarith-rgb32
Normal file
26
tests/ref/fate/lagarith-rgb32
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
#tb 0: 1001/24000
|
||||||
|
0, 0, 0, 1, 1382400, 0x00000000
|
||||||
|
0, 1, 1, 1, 1382400, 0x00000000
|
||||||
|
0, 2, 2, 1, 1382400, 0x00000000
|
||||||
|
0, 3, 3, 1, 1382400, 0x00000000
|
||||||
|
0, 4, 4, 1, 1382400, 0x00000000
|
||||||
|
0, 5, 5, 1, 1382400, 0xf95bde46
|
||||||
|
0, 6, 6, 1, 1382400, 0x4f4c0393
|
||||||
|
0, 7, 7, 1, 1382400, 0xe5aa40db
|
||||||
|
0, 8, 8, 1, 1382400, 0xc25a8ba2
|
||||||
|
0, 9, 9, 1, 1382400, 0x9db3150d
|
||||||
|
0, 10, 10, 1, 1382400, 0x730e64b3
|
||||||
|
0, 11, 11, 1, 1382400, 0xf8fd7edf
|
||||||
|
0, 12, 12, 1, 1382400, 0x0114798a
|
||||||
|
0, 13, 13, 1, 1382400, 0x7571210f
|
||||||
|
0, 14, 14, 1, 1382400, 0x552ae59d
|
||||||
|
0, 15, 15, 1, 1382400, 0x7ae0c946
|
||||||
|
0, 16, 16, 1, 1382400, 0x0818c3ef
|
||||||
|
0, 17, 17, 1, 1382400, 0x8257cac4
|
||||||
|
0, 18, 18, 1, 1382400, 0x7762a979
|
||||||
|
0, 19, 19, 1, 1382400, 0x282af57a
|
||||||
|
0, 20, 20, 1, 1382400, 0x3f42de50
|
||||||
|
0, 21, 21, 1, 1382400, 0xc42d5f93
|
||||||
|
0, 22, 22, 1, 1382400, 0x18775c90
|
||||||
|
0, 23, 23, 1, 1382400, 0x34befa90
|
||||||
|
0, 24, 24, 1, 1382400, 0xd33d5f53
|
2
tests/ref/fate/lagarith-yuy2
Normal file
2
tests/ref/fate/lagarith-yuy2
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
#tb 0: 1/10
|
||||||
|
0, 0, 0, 1, 1572864, 0xeed76a7d
|
3
tests/ref/fate/lagarith-yv12
Normal file
3
tests/ref/fate/lagarith-yv12
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
#tb 0: 1/60
|
||||||
|
0, 0, 0, 1, 92160, 0x1dfdf5c1
|
||||||
|
0, 1, 1, 1, 92160, 0x6965884f
|
Loading…
x
Reference in New Issue
Block a user