mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
avutil: [loongarch] Add support for loongarch SIMD.
LSX and LASX is loongarch SIMD extention. They are enabled by default if compiler support it, and can be disabled with '--disable-lsx' '--disable-lasx'. Change-Id: Ie2608ea61dbd9b7fffadbf0ec2348bad6c124476 Reviewed-by: Shiyou Yin <yinshiyou-hf@loongson.cn> Reviewed-by: guxiwei <guxiwei-hf@loongson.cn> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
parent
571e8ca2dd
commit
9a840ffa17
2
Makefile
2
Makefile
@ -89,7 +89,7 @@ SUBDIR_VARS := CLEANFILES FFLIBS HOSTPROGS TESTPROGS TOOLS \
|
|||||||
ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS \
|
ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS \
|
||||||
ALTIVEC-OBJS VSX-OBJS MMX-OBJS X86ASM-OBJS \
|
ALTIVEC-OBJS VSX-OBJS MMX-OBJS X86ASM-OBJS \
|
||||||
MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS \
|
MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS \
|
||||||
MMI-OBJS OBJS SLIBOBJS HOSTOBJS TESTOBJS
|
MMI-OBJS LSX-OBJS LASX-OBJS OBJS SLIBOBJS HOSTOBJS TESTOBJS
|
||||||
|
|
||||||
define RESET
|
define RESET
|
||||||
$(1) :=
|
$(1) :=
|
||||||
|
20
configure
vendored
20
configure
vendored
@ -452,7 +452,9 @@ Optimization options (experts only):
|
|||||||
--disable-mipsdspr2 disable MIPS DSP ASE R2 optimizations
|
--disable-mipsdspr2 disable MIPS DSP ASE R2 optimizations
|
||||||
--disable-msa disable MSA optimizations
|
--disable-msa disable MSA optimizations
|
||||||
--disable-mipsfpu disable floating point MIPS optimizations
|
--disable-mipsfpu disable floating point MIPS optimizations
|
||||||
--disable-mmi disable Loongson SIMD optimizations
|
--disable-mmi disable Loongson MMI optimizations
|
||||||
|
--disable-lsx disable Loongson LSX optimizations
|
||||||
|
--disable-lasx disable Loongson LASX optimizations
|
||||||
--disable-fast-unaligned consider unaligned accesses slow
|
--disable-fast-unaligned consider unaligned accesses slow
|
||||||
|
|
||||||
Developer options (useful when working on FFmpeg itself):
|
Developer options (useful when working on FFmpeg itself):
|
||||||
@ -2081,6 +2083,8 @@ ARCH_EXT_LIST_LOONGSON="
|
|||||||
loongson2
|
loongson2
|
||||||
loongson3
|
loongson3
|
||||||
mmi
|
mmi
|
||||||
|
lsx
|
||||||
|
lasx
|
||||||
"
|
"
|
||||||
|
|
||||||
ARCH_EXT_LIST_X86_SIMD="
|
ARCH_EXT_LIST_X86_SIMD="
|
||||||
@ -2617,6 +2621,10 @@ power8_deps="vsx"
|
|||||||
|
|
||||||
loongson2_deps="mips"
|
loongson2_deps="mips"
|
||||||
loongson3_deps="mips"
|
loongson3_deps="mips"
|
||||||
|
mmi_deps_any="loongson2 loongson3"
|
||||||
|
lsx_deps="loongarch"
|
||||||
|
lasx_deps="lsx"
|
||||||
|
|
||||||
mips32r2_deps="mips"
|
mips32r2_deps="mips"
|
||||||
mips32r5_deps="mips"
|
mips32r5_deps="mips"
|
||||||
mips32r6_deps="mips"
|
mips32r6_deps="mips"
|
||||||
@ -2625,7 +2633,6 @@ mips64r6_deps="mips"
|
|||||||
mipsfpu_deps="mips"
|
mipsfpu_deps="mips"
|
||||||
mipsdsp_deps="mips"
|
mipsdsp_deps="mips"
|
||||||
mipsdspr2_deps="mips"
|
mipsdspr2_deps="mips"
|
||||||
mmi_deps_any="loongson2 loongson3"
|
|
||||||
msa_deps="mipsfpu"
|
msa_deps="mipsfpu"
|
||||||
|
|
||||||
cpunop_deps="i686"
|
cpunop_deps="i686"
|
||||||
@ -6134,6 +6141,9 @@ EOF
|
|||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
elif enabled loongarch; then
|
||||||
|
enabled lsx && check_inline_asm lsx '"vadd.b $vr0, $vr1, $vr2"' '-mlsx' && append LSXFLAGS '-mlsx'
|
||||||
|
enabled lasx && check_inline_asm lasx '"xvadd.b $xr0, $xr1, $xr2"' '-mlasx' && append LASXFLAGS '-mlasx'
|
||||||
fi
|
fi
|
||||||
|
|
||||||
check_cc intrinsics_neon arm_neon.h "int16x8_t test = vdupq_n_s16(0)"
|
check_cc intrinsics_neon arm_neon.h "int16x8_t test = vdupq_n_s16(0)"
|
||||||
@ -7484,6 +7494,10 @@ if enabled ppc; then
|
|||||||
echo "PPC 4xx optimizations ${ppc4xx-no}"
|
echo "PPC 4xx optimizations ${ppc4xx-no}"
|
||||||
echo "dcbzl available ${dcbzl-no}"
|
echo "dcbzl available ${dcbzl-no}"
|
||||||
fi
|
fi
|
||||||
|
if enabled loongarch; then
|
||||||
|
echo "LSX enabled ${lsx-no}"
|
||||||
|
echo "LASX enabled ${lasx-no}"
|
||||||
|
fi
|
||||||
echo "debug symbols ${debug-no}"
|
echo "debug symbols ${debug-no}"
|
||||||
echo "strip symbols ${stripping-no}"
|
echo "strip symbols ${stripping-no}"
|
||||||
echo "optimize for size ${small-no}"
|
echo "optimize for size ${small-no}"
|
||||||
@ -7645,6 +7659,8 @@ ASMSTRIPFLAGS=$ASMSTRIPFLAGS
|
|||||||
X86ASMFLAGS=$X86ASMFLAGS
|
X86ASMFLAGS=$X86ASMFLAGS
|
||||||
MSAFLAGS=$MSAFLAGS
|
MSAFLAGS=$MSAFLAGS
|
||||||
MMIFLAGS=$MMIFLAGS
|
MMIFLAGS=$MMIFLAGS
|
||||||
|
LSXFLAGS=$LSXFLAGS
|
||||||
|
LASXFLAGS=$LASXFLAGS
|
||||||
BUILDSUF=$build_suffix
|
BUILDSUF=$build_suffix
|
||||||
PROGSSUF=$progs_suffix
|
PROGSSUF=$progs_suffix
|
||||||
FULLNAME=$FULLNAME
|
FULLNAME=$FULLNAME
|
||||||
|
@ -9,6 +9,8 @@ OBJS-$(HAVE_MIPSDSP) += $(MIPSDSP-OBJS) $(MIPSDSP-OBJS-yes)
|
|||||||
OBJS-$(HAVE_MIPSDSPR2) += $(MIPSDSPR2-OBJS) $(MIPSDSPR2-OBJS-yes)
|
OBJS-$(HAVE_MIPSDSPR2) += $(MIPSDSPR2-OBJS) $(MIPSDSPR2-OBJS-yes)
|
||||||
OBJS-$(HAVE_MSA) += $(MSA-OBJS) $(MSA-OBJS-yes)
|
OBJS-$(HAVE_MSA) += $(MSA-OBJS) $(MSA-OBJS-yes)
|
||||||
OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes)
|
OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes)
|
||||||
|
OBJS-$(HAVE_LSX) += $(LSX-OBJS) $(LSX-OBJS-yes)
|
||||||
|
OBJS-$(HAVE_LASX) += $(LASX-OBJS) $(LASX-OBJS-yes)
|
||||||
|
|
||||||
OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes)
|
OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes)
|
||||||
OBJS-$(HAVE_VSX) += $(VSX-OBJS) $(VSX-OBJS-yes)
|
OBJS-$(HAVE_VSX) += $(VSX-OBJS) $(VSX-OBJS-yes)
|
||||||
|
@ -59,6 +59,8 @@ COMPILE_HOSTC = $(call COMPILE,HOSTCC)
|
|||||||
COMPILE_NVCC = $(call COMPILE,NVCC)
|
COMPILE_NVCC = $(call COMPILE,NVCC)
|
||||||
COMPILE_MMI = $(call COMPILE,CC,MMIFLAGS)
|
COMPILE_MMI = $(call COMPILE,CC,MMIFLAGS)
|
||||||
COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS)
|
COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS)
|
||||||
|
COMPILE_LSX = $(call COMPILE,CC,LSXFLAGS)
|
||||||
|
COMPILE_LASX = $(call COMPILE,CC,LASXFLAGS)
|
||||||
|
|
||||||
%_mmi.o: %_mmi.c
|
%_mmi.o: %_mmi.c
|
||||||
$(COMPILE_MMI)
|
$(COMPILE_MMI)
|
||||||
@ -66,6 +68,12 @@ COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS)
|
|||||||
%_msa.o: %_msa.c
|
%_msa.o: %_msa.c
|
||||||
$(COMPILE_MSA)
|
$(COMPILE_MSA)
|
||||||
|
|
||||||
|
%_lsx.o: %_lsx.c
|
||||||
|
$(COMPILE_LSX)
|
||||||
|
|
||||||
|
%_lasx.o: %_lasx.c
|
||||||
|
$(COMPILE_LASX)
|
||||||
|
|
||||||
%.o: %.c
|
%.o: %.c
|
||||||
$(COMPILE_C)
|
$(COMPILE_C)
|
||||||
|
|
||||||
|
@ -62,6 +62,8 @@ static int get_cpu_flags(void)
|
|||||||
return ff_get_cpu_flags_ppc();
|
return ff_get_cpu_flags_ppc();
|
||||||
if (ARCH_X86)
|
if (ARCH_X86)
|
||||||
return ff_get_cpu_flags_x86();
|
return ff_get_cpu_flags_x86();
|
||||||
|
if (ARCH_LOONGARCH)
|
||||||
|
return ff_get_cpu_flags_loongarch();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -168,6 +170,9 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
|
|||||||
#elif ARCH_MIPS
|
#elif ARCH_MIPS
|
||||||
{ "mmi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI }, .unit = "flags" },
|
{ "mmi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI }, .unit = "flags" },
|
||||||
{ "msa", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA }, .unit = "flags" },
|
{ "msa", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA }, .unit = "flags" },
|
||||||
|
#elif ARCH_LOONGARCH
|
||||||
|
{ "lsx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LSX }, .unit = "flags" },
|
||||||
|
{ "lasx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LASX }, .unit = "flags" },
|
||||||
#endif
|
#endif
|
||||||
{ NULL },
|
{ NULL },
|
||||||
};
|
};
|
||||||
@ -253,6 +258,8 @@ size_t av_cpu_max_align(void)
|
|||||||
return ff_get_cpu_max_align_ppc();
|
return ff_get_cpu_max_align_ppc();
|
||||||
if (ARCH_X86)
|
if (ARCH_X86)
|
||||||
return ff_get_cpu_max_align_x86();
|
return ff_get_cpu_max_align_x86();
|
||||||
|
if (ARCH_LOONGARCH)
|
||||||
|
return ff_get_cpu_max_align_loongarch();
|
||||||
|
|
||||||
return 8;
|
return 8;
|
||||||
}
|
}
|
||||||
|
@ -72,6 +72,10 @@
|
|||||||
#define AV_CPU_FLAG_MMI (1 << 0)
|
#define AV_CPU_FLAG_MMI (1 << 0)
|
||||||
#define AV_CPU_FLAG_MSA (1 << 1)
|
#define AV_CPU_FLAG_MSA (1 << 1)
|
||||||
|
|
||||||
|
//Loongarch SIMD extension.
|
||||||
|
#define AV_CPU_FLAG_LSX (1 << 0)
|
||||||
|
#define AV_CPU_FLAG_LASX (1 << 1)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the flags which specify extensions supported by the CPU.
|
* Return the flags which specify extensions supported by the CPU.
|
||||||
* The returned value is affected by av_force_cpu_flags() if that was used
|
* The returned value is affected by av_force_cpu_flags() if that was used
|
||||||
|
@ -46,11 +46,13 @@ int ff_get_cpu_flags_aarch64(void);
|
|||||||
int ff_get_cpu_flags_arm(void);
|
int ff_get_cpu_flags_arm(void);
|
||||||
int ff_get_cpu_flags_ppc(void);
|
int ff_get_cpu_flags_ppc(void);
|
||||||
int ff_get_cpu_flags_x86(void);
|
int ff_get_cpu_flags_x86(void);
|
||||||
|
int ff_get_cpu_flags_loongarch(void);
|
||||||
|
|
||||||
size_t ff_get_cpu_max_align_mips(void);
|
size_t ff_get_cpu_max_align_mips(void);
|
||||||
size_t ff_get_cpu_max_align_aarch64(void);
|
size_t ff_get_cpu_max_align_aarch64(void);
|
||||||
size_t ff_get_cpu_max_align_arm(void);
|
size_t ff_get_cpu_max_align_arm(void);
|
||||||
size_t ff_get_cpu_max_align_ppc(void);
|
size_t ff_get_cpu_max_align_ppc(void);
|
||||||
size_t ff_get_cpu_max_align_x86(void);
|
size_t ff_get_cpu_max_align_x86(void);
|
||||||
|
size_t ff_get_cpu_max_align_loongarch(void);
|
||||||
|
|
||||||
#endif /* AVUTIL_CPU_INTERNAL_H */
|
#endif /* AVUTIL_CPU_INTERNAL_H */
|
||||||
|
1
libavutil/loongarch/Makefile
Normal file
1
libavutil/loongarch/Makefile
Normal file
@ -0,0 +1 @@
|
|||||||
|
OBJS += loongarch/cpu.o
|
69
libavutil/loongarch/cpu.c
Normal file
69
libavutil/loongarch/cpu.c
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2020 Loongson Technology Corporation Limited
|
||||||
|
* Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn>
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include "cpu.h"
|
||||||
|
|
||||||
|
#define LOONGARCH_CFG2 0x2
|
||||||
|
#define LOONGARCH_CFG2_LSX (1 << 6)
|
||||||
|
#define LOONGARCH_CFG2_LASX (1 << 7)
|
||||||
|
|
||||||
|
static int cpu_flags_cpucfg(void)
|
||||||
|
{
|
||||||
|
int flags = 0;
|
||||||
|
uint32_t cfg2 = 0;
|
||||||
|
|
||||||
|
__asm__ volatile(
|
||||||
|
"cpucfg %0, %1 \n\t"
|
||||||
|
: "+&r"(cfg2)
|
||||||
|
: "r"(LOONGARCH_CFG2)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (cfg2 & LOONGARCH_CFG2_LSX)
|
||||||
|
flags |= AV_CPU_FLAG_LSX;
|
||||||
|
|
||||||
|
if (cfg2 & LOONGARCH_CFG2_LASX)
|
||||||
|
flags |= AV_CPU_FLAG_LASX;
|
||||||
|
|
||||||
|
return flags;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ff_get_cpu_flags_loongarch(void)
|
||||||
|
{
|
||||||
|
#if defined __linux__
|
||||||
|
return cpu_flags_cpucfg();
|
||||||
|
#else
|
||||||
|
/* Assume no SIMD ASE supported */
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t ff_get_cpu_max_align_loongarch(void)
|
||||||
|
{
|
||||||
|
int flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (flags & AV_CPU_FLAG_LASX)
|
||||||
|
return 32;
|
||||||
|
if (flags & AV_CPU_FLAG_LSX)
|
||||||
|
return 16;
|
||||||
|
|
||||||
|
return 8;
|
||||||
|
}
|
31
libavutil/loongarch/cpu.h
Normal file
31
libavutil/loongarch/cpu.h
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2020 Loongson Technology Corporation Limited
|
||||||
|
* Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn>
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVUTIL_LOONGARCH_CPU_H
|
||||||
|
#define AVUTIL_LOONGARCH_CPU_H
|
||||||
|
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/cpu_internal.h"
|
||||||
|
|
||||||
|
#define have_lsx(flags) CPUEXT(flags, LSX)
|
||||||
|
#define have_lasx(flags) CPUEXT(flags, LASX)
|
||||||
|
|
||||||
|
#endif /* AVUTIL_LOONGARCH_CPU_H */
|
@ -77,6 +77,9 @@ static const struct {
|
|||||||
{ AV_CPU_FLAG_BMI2, "bmi2" },
|
{ AV_CPU_FLAG_BMI2, "bmi2" },
|
||||||
{ AV_CPU_FLAG_AESNI, "aesni" },
|
{ AV_CPU_FLAG_AESNI, "aesni" },
|
||||||
{ AV_CPU_FLAG_AVX512, "avx512" },
|
{ AV_CPU_FLAG_AVX512, "avx512" },
|
||||||
|
#elif ARCH_LOONGARCH
|
||||||
|
{ AV_CPU_FLAG_LSX, "lsx" },
|
||||||
|
{ AV_CPU_FLAG_LASX, "lasx" },
|
||||||
#endif
|
#endif
|
||||||
{ 0 }
|
{ 0 }
|
||||||
};
|
};
|
||||||
|
@ -236,6 +236,9 @@ static const struct {
|
|||||||
{ "FMA4", "fma4", AV_CPU_FLAG_FMA4 },
|
{ "FMA4", "fma4", AV_CPU_FLAG_FMA4 },
|
||||||
{ "AVX2", "avx2", AV_CPU_FLAG_AVX2 },
|
{ "AVX2", "avx2", AV_CPU_FLAG_AVX2 },
|
||||||
{ "AVX-512", "avx512", AV_CPU_FLAG_AVX512 },
|
{ "AVX-512", "avx512", AV_CPU_FLAG_AVX512 },
|
||||||
|
#elif ARCH_LOONGARCH
|
||||||
|
{ "LSX", "lsx", AV_CPU_FLAG_LSX },
|
||||||
|
{ "LASX", "lasx", AV_CPU_FLAG_LASX },
|
||||||
#endif
|
#endif
|
||||||
{ NULL }
|
{ NULL }
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user