avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC intra prediction functions

This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC intra predition functions in new file hevcpred_msa.c Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
2025-08-10 06:10:52 +02:00 · 2015-06-04 13:31:49 +05:30
parent 271195f85b
commit d6d98237ed
7 changed files with 3259 additions and 2 deletions
--- a/libavcodec/hevcpred.c
+++ b/libavcodec/hevcpred.c
@@ -74,4 +74,7 @@ void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth)
        HEVC_PRED(8);
        break;
    }
+
+    if (ARCH_MIPS)
+        ff_hevc_pred_init_mips(hpc, bit_depth);
 }
--- a/libavcodec/hevcpred.h
+++ b/libavcodec/hevcpred.h
@@ -41,5 +41,6 @@ typedef struct HEVCPredContext {
 } HEVCPredContext;

 void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth);
+void ff_hevc_pred_init_mips(HEVCPredContext *hpc, int bit_depth);

 #endif /* AVCODEC_HEVCPRED_H */
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -18,7 +18,8 @@ OBJS-$(CONFIG_AAC_DECODER)                += mips/aacdec_mips.o            \
                                             mips/aacpsdsp_mips.o
 MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER)      += mips/aaccoder_mips.o
 MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)        += mips/iirfilter_mips.o
-OBJS-$(CONFIG_HEVC_DECODER)               += mips/hevcdsp_init_mips.o
+OBJS-$(CONFIG_HEVC_DECODER)               += mips/hevcdsp_init_mips.o      \
+                                             mips/hevcpred_init_mips.o
 OBJS-$(CONFIG_H264DSP)                    += mips/h264dsp_init_mips.o
 OBJS-$(CONFIG_H264CHROMA)                 += mips/h264chroma_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)           += mips/hevcdsp_msa.o            \
@@ -27,7 +28,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER)           += mips/hevcdsp_msa.o            \
                                             mips/hevc_mc_bi_msa.o         \
                                             mips/hevc_mc_biw_msa.o        \
                                             mips/hevc_idct_msa.o          \
-                                             mips/hevc_lpf_sao_msa.o
+                                             mips/hevc_lpf_sao_msa.o       \
+                                             mips/hevcpred_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)                += mips/h264dsp_msa.o
 LOONGSON3-OBJS-$(CONFIG_H264DSP)          += mips/h264dsp_mmi.o
 LOONGSON3-OBJS-$(CONFIG_H264CHROMA)       += mips/h264chroma_mmi.o
--- a/libavcodec/mips/hevcpred_init_mips.c
+++ b/libavcodec/mips/hevcpred_init_mips.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/hevc.h"
+#include "libavcodec/mips/hevcpred_mips.h"
+
+#if HAVE_MSA
+static av_cold void hevc_pred_init_msa(HEVCPredContext *c, const int bit_depth)
+{
+    if (8 == bit_depth) {
+        c->intra_pred[2] = ff_intra_pred_8_16x16_msa;
+        c->intra_pred[3] = ff_intra_pred_8_32x32_msa;
+        c->pred_planar[0] = ff_hevc_intra_pred_planar_0_msa;
+        c->pred_planar[1] = ff_hevc_intra_pred_planar_1_msa;
+        c->pred_planar[2] = ff_hevc_intra_pred_planar_2_msa;
+        c->pred_planar[3] = ff_hevc_intra_pred_planar_3_msa;
+        c->pred_dc = ff_hevc_intra_pred_dc_msa;
+        c->pred_angular[0] = ff_pred_intra_pred_angular_0_msa;
+        c->pred_angular[1] = ff_pred_intra_pred_angular_1_msa;
+        c->pred_angular[2] = ff_pred_intra_pred_angular_2_msa;
+        c->pred_angular[3] = ff_pred_intra_pred_angular_3_msa;
+    }
+}
+#endif  // #if HAVE_MSA
+
+void ff_hevc_pred_init_mips(HEVCPredContext *c, const int bit_depth)
+{
+#if HAVE_MSA
+    hevc_pred_init_msa(c, bit_depth);
+#endif  // #if HAVE_MSA
+}
--- a/libavcodec/mips/hevcpred_mips.h
+++ b/libavcodec/mips/hevcpred_mips.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_HEVCPRED_MIPS_H
+#define AVCODEC_MIPS_HEVCPRED_MIPS_H
+
+#include "libavcodec/hevcdsp.h"
+
+void ff_hevc_intra_pred_planar_0_msa(uint8_t *dst,
+                                     const uint8_t *src_top,
+                                     const uint8_t *src_left,
+                                     ptrdiff_t stride);
+
+void ff_hevc_intra_pred_planar_1_msa(uint8_t *dst,
+                                     const uint8_t *src_top,
+                                     const uint8_t *src_left,
+                                     ptrdiff_t stride);
+
+void ff_hevc_intra_pred_planar_2_msa(uint8_t *dst,
+                                     const uint8_t *src_top,
+                                     const uint8_t *src_left,
+                                     ptrdiff_t stride);
+
+void ff_hevc_intra_pred_planar_3_msa(uint8_t *dst,
+                                     const uint8_t *src_top,
+                                     const uint8_t *src_left,
+                                     ptrdiff_t stride);
+
+void ff_hevc_intra_pred_dc_msa(uint8_t *dst, const uint8_t *src_top,
+                               const uint8_t *src_left,
+                               ptrdiff_t stride, int log2, int c_idx);
+
+void ff_pred_intra_pred_angular_0_msa(uint8_t *dst,
+                                      const uint8_t *src_top,
+                                      const uint8_t *src_left,
+                                      ptrdiff_t stride, int c_idx, int mode);
+
+void ff_pred_intra_pred_angular_1_msa(uint8_t *dst,
+                                      const uint8_t *src_top,
+                                      const uint8_t *src_left,
+                                      ptrdiff_t stride, int c_idx, int mode);
+
+void ff_pred_intra_pred_angular_2_msa(uint8_t *dst,
+                                      const uint8_t *src_top,
+                                      const uint8_t *src_left,
+                                      ptrdiff_t stride, int c_idx, int mode);
+
+void ff_pred_intra_pred_angular_3_msa(uint8_t *dst,
+                                      const uint8_t *src_top,
+                                      const uint8_t *src_left,
+                                      ptrdiff_t stride, int c_idx, int mode);
+
+void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, int y0, int c_idx);
+void ff_intra_pred_8_32x32_msa(HEVCContext *s, int x0, int y0, int c_idx);
+
+#endif  // #ifndef AVCODEC_MIPS_HEVCPRED_MIPS_H
--- a/libavcodec/mips/hevcpred_msa.c
+++ b/libavcodec/mips/hevcpred_msa.c
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -770,7 +770,9 @@
    SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val);  \
    SLDI_B2_0(RTYPE, in2, in3, out2, out3, slide_val);  \
 }
+#define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__)
 #define SLDI_B4_0_SB(...) SLDI_B4_0(v16i8, __VA_ARGS__)
+#define SLDI_B4_0_SH(...) SLDI_B4_0(v8i16, __VA_ARGS__)

 /* Description : Immediate number of columns to slide
   Arguments   : Inputs  - in0_0, in0_1, in1_0, in1_1, slide_val
@@ -1037,6 +1039,21 @@
    out_m;                                                \
 } )

+/* Description : Horizontal addition of unsigned byte vector elements
+   Arguments   : Inputs  - in0, in1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Each unsigned odd byte element from 'in0' is added to
+                 even unsigned byte element from 'in0' (pairwise) and the
+                 halfword result is stored in 'out0'
+*/
+#define HADD_UB2(RTYPE, in0, in1, out0, out1)                 \
+{                                                             \
+    out0 = (RTYPE) __msa_hadd_u_h((v16u8) in0, (v16u8) in0);  \
+    out1 = (RTYPE) __msa_hadd_u_h((v16u8) in1, (v16u8) in1);  \
+}
+#define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__)
+
 /* Description : Horizontal subtraction of unsigned byte vector elements
   Arguments   : Inputs  - in0, in1
                 Outputs - out0, out1
@@ -1053,6 +1070,20 @@
 #define HSUB_UB2_UH(...) HSUB_UB2(v8u16, __VA_ARGS__)
 #define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__)

+/* Description : Insert specified word elements from input vectors to 1
+                 destination vector
+   Arguments   : Inputs  - in0, in1, in2, in3 (4 input vectors)
+                 Outputs - out                (output vector)
+                 Return Type - as per RTYPE
+*/
+#define INSERT_W2(RTYPE, in0, in1, out)                 \
+{                                                       \
+    out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0);  \
+    out = (RTYPE) __msa_insert_w((v4i32) out, 1, in1);  \
+}
+#define INSERT_W2_UB(...) INSERT_W2(v16u8, __VA_ARGS__)
+#define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__)
+
 #define INSERT_W4(RTYPE, in0, in1, in2, in3, out)       \
 {                                                       \
    out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0);  \
@@ -1364,8 +1395,11 @@
    out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1);  \
    out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1);  \
 }
+#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
 #define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__)
+#define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__)
 #define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__)
+#define ILVRL_B2_SW(...) ILVRL_B2(v4i32, __VA_ARGS__)

 #define ILVRL_H2(RTYPE, in0, in1, out0, out1)               \
 {                                                           \
@@ -1923,6 +1957,18 @@
    ADD2(in4, in5, in6, in7, out2, out3);                                     \
 }

+/* Description : Subtraction of 2 pairs of vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+   Details     : Each element from 2 pairs vectors is subtracted and 2 results
+                 are produced
+*/
+#define SUB2(in0, in1, in2, in3, out0, out1)  \
+{                                             \
+    out0 = in0 - in1;                         \
+    out1 = in2 - in3;                         \
+}
+
 /* Description : Sign extend byte elements from input vector and return
                 halfword results in pair of vectors
   Arguments   : Inputs  - in           (1 input byte vector)