Merge commit 'f0389eb777b1ab4291329d4f709098cdfa7384dc'

* commit 'f0389eb777b1ab4291329d4f709098cdfa7384dc': arm: fmtconvert: Split armv6 fmtconvert code off from vfp code Merged-by: Michael Niedermayer <michaelni@gmx.at>
2025-08-10 06:10:52 +02:00 · 2013-08-29 16:09:46 +02:00
parent 946f080b54 f0389eb777
commit e1c3e6277f
4 changed files with 85 additions and 63 deletions
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -52,11 +52,13 @@ ARMV6-OBJS-$(CONFIG_VP8_DECODER)       += arm/vp8_armv6.o               \
                                          arm/vp8dsp_init_armv6.o       \
                                          arm/vp8dsp_armv6.o

+VFP-OBJS                               += arm/fmtconvert_vfp.o
+
 VFP-OBJS-$(CONFIG_DCA_DECODER)         += arm/dcadsp_vfp.o              \
                                          arm/synth_filter_vfp.o
 VFP-OBJS-$(CONFIG_FFT)                 += arm/fft_vfp.o
 VFP-OBJS-$(CONFIG_MDCT)                += arm/mdct_vfp.o
-VFP-OBJS-$(HAVE_ARMV6)                 += arm/fmtconvert_vfp.o
+VFP-OBJS-$(HAVE_ARMV6)                 += arm/fmtconvert_vfp_armv6.o

 NEON-OBJS                              += arm/fmtconvert_neon.o

--- a/libavcodec/arm/fmtconvert_init_arm.c
+++ b/libavcodec/arm/fmtconvert_init_arm.c
@@ -43,16 +43,15 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx
 {
    int cpu_flags = av_get_cpu_flags();

-    if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) {
+    if (have_vfp(cpu_flags)) {
        if (!have_vfpv3(cpu_flags)) {
-            // These functions don't use anything armv6 specific in themselves,
-            // but ff_float_to_int16_vfp which is in the same assembly source
-            // file does, thus the whole file requires armv6 to be built.
            c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp;
            c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp;
        }

-        c->float_to_int16 = ff_float_to_int16_vfp;
+        if (have_armv6(cpu_flags)) {
+            c->float_to_int16 = ff_float_to_int16_vfp;
+        }
    }

    if (have_neon(cpu_flags)) {
--- a/libavcodec/arm/fmtconvert_vfp.S
+++ b/libavcodec/arm/fmtconvert_vfp.S
@@ -1,5 +1,4 @@
 /*
- * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
 * Copyright (c) 2013 RISC OS Open Ltd <bavison@riscosopen.org>
 *
 * This file is part of FFmpeg.
@@ -22,62 +21,6 @@
 #include "config.h"
 #include "libavutil/arm/asm.S"

-/**
- * ARM VFP optimized float to int16 conversion.
- * Assume that len is a positive number and is multiple of 8, destination
- * buffer is at least 4 bytes aligned (8 bytes alignment is better for
- * performance), little-endian byte sex.
- */
-@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
-function ff_float_to_int16_vfp, export=1
-        push            {r4-r8,lr}
-        vpush           {d8-d11}
-        vldmia          r1!, {s16-s23}
-        vcvt.s32.f32    s0,  s16
-        vcvt.s32.f32    s1,  s17
-        vcvt.s32.f32    s2,  s18
-        vcvt.s32.f32    s3,  s19
-        vcvt.s32.f32    s4,  s20
-        vcvt.s32.f32    s5,  s21
-        vcvt.s32.f32    s6,  s22
-        vcvt.s32.f32    s7,  s23
-1:
-        subs            r2,  r2,  #8
-        vmov            r3,  r4,  s0, s1
-        vmov            r5,  r6,  s2, s3
-        vmov            r7,  r8,  s4, s5
-        vmov            ip,  lr,  s6, s7
-        it              gt
-        vldmiagt        r1!, {s16-s23}
-        ssat            r4,  #16, r4
-        ssat            r3,  #16, r3
-        ssat            r6,  #16, r6
-        ssat            r5,  #16, r5
-        pkhbt           r3,  r3,  r4, lsl #16
-        pkhbt           r4,  r5,  r6, lsl #16
-        itttt           gt
-        vcvtgt.s32.f32  s0,  s16
-        vcvtgt.s32.f32  s1,  s17
-        vcvtgt.s32.f32  s2,  s18
-        vcvtgt.s32.f32  s3,  s19
-        itttt           gt
-        vcvtgt.s32.f32  s4,  s20
-        vcvtgt.s32.f32  s5,  s21
-        vcvtgt.s32.f32  s6,  s22
-        vcvtgt.s32.f32  s7,  s23
-        ssat            r8,  #16, r8
-        ssat            r7,  #16, r7
-        ssat            lr,  #16, lr
-        ssat            ip,  #16, ip
-        pkhbt           r5,  r7,  r8, lsl #16
-        pkhbt           r6,  ip,  lr, lsl #16
-        stmia           r0!, {r3-r6}
-        bgt             1b
-
-        vpop            {d8-d11}
-        pop             {r4-r8,pc}
-endfunc
-
 /**
 * ARM VFP optimised int32 to float conversion.
 * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned
--- a/libavcodec/arm/fmtconvert_vfp_armv6.S
+++ b/libavcodec/arm/fmtconvert_vfp_armv6.S
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/arm/asm.S"
+
+/**
+ * ARM VFP optimized float to int16 conversion.
+ * Assume that len is a positive number and is multiple of 8, destination
+ * buffer is at least 4 bytes aligned (8 bytes alignment is better for
+ * performance), little-endian byte sex.
+ */
+@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
+function ff_float_to_int16_vfp, export=1
+        push            {r4-r8,lr}
+        vpush           {d8-d11}
+        vldmia          r1!, {s16-s23}
+        vcvt.s32.f32    s0,  s16
+        vcvt.s32.f32    s1,  s17
+        vcvt.s32.f32    s2,  s18
+        vcvt.s32.f32    s3,  s19
+        vcvt.s32.f32    s4,  s20
+        vcvt.s32.f32    s5,  s21
+        vcvt.s32.f32    s6,  s22
+        vcvt.s32.f32    s7,  s23
+1:
+        subs            r2,  r2,  #8
+        vmov            r3,  r4,  s0, s1
+        vmov            r5,  r6,  s2, s3
+        vmov            r7,  r8,  s4, s5
+        vmov            ip,  lr,  s6, s7
+        it              gt
+        vldmiagt        r1!, {s16-s23}
+        ssat            r4,  #16, r4
+        ssat            r3,  #16, r3
+        ssat            r6,  #16, r6
+        ssat            r5,  #16, r5
+        pkhbt           r3,  r3,  r4, lsl #16
+        pkhbt           r4,  r5,  r6, lsl #16
+        itttt           gt
+        vcvtgt.s32.f32  s0,  s16
+        vcvtgt.s32.f32  s1,  s17
+        vcvtgt.s32.f32  s2,  s18
+        vcvtgt.s32.f32  s3,  s19
+        itttt           gt
+        vcvtgt.s32.f32  s4,  s20
+        vcvtgt.s32.f32  s5,  s21
+        vcvtgt.s32.f32  s6,  s22
+        vcvtgt.s32.f32  s7,  s23
+        ssat            r8,  #16, r8
+        ssat            r7,  #16, r7
+        ssat            lr,  #16, lr
+        ssat            ip,  #16, ip
+        pkhbt           r5,  r7,  r8, lsl #16
+        pkhbt           r6,  ip,  lr, lsl #16
+        stmia           r0!, {r3-r6}
+        bgt             1b
+
+        vpop            {d8-d11}
+        pop             {r4-r8,pc}
+endfunc