mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for HEVC intra prediction functions
This patch adds MSA (MIPS-SIMD-Arch) optimizations for HEVC intra predition functions in new file hevcpred_msa.c Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
271195f85b
commit
d6d98237ed
@ -74,4 +74,7 @@ void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth)
|
||||
HEVC_PRED(8);
|
||||
break;
|
||||
}
|
||||
|
||||
if (ARCH_MIPS)
|
||||
ff_hevc_pred_init_mips(hpc, bit_depth);
|
||||
}
|
||||
|
@ -41,5 +41,6 @@ typedef struct HEVCPredContext {
|
||||
} HEVCPredContext;
|
||||
|
||||
void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth);
|
||||
void ff_hevc_pred_init_mips(HEVCPredContext *hpc, int bit_depth);
|
||||
|
||||
#endif /* AVCODEC_HEVCPRED_H */
|
||||
|
@ -18,7 +18,8 @@ OBJS-$(CONFIG_AAC_DECODER) += mips/aacdec_mips.o \
|
||||
mips/aacpsdsp_mips.o
|
||||
MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER) += mips/aaccoder_mips.o
|
||||
MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER) += mips/iirfilter_mips.o
|
||||
OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_init_mips.o
|
||||
OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_init_mips.o \
|
||||
mips/hevcpred_init_mips.o
|
||||
OBJS-$(CONFIG_H264DSP) += mips/h264dsp_init_mips.o
|
||||
OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_init_mips.o
|
||||
MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o \
|
||||
@ -27,7 +28,8 @@ MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o \
|
||||
mips/hevc_mc_bi_msa.o \
|
||||
mips/hevc_mc_biw_msa.o \
|
||||
mips/hevc_idct_msa.o \
|
||||
mips/hevc_lpf_sao_msa.o
|
||||
mips/hevc_lpf_sao_msa.o \
|
||||
mips/hevcpred_msa.o
|
||||
MSA-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_msa.o
|
||||
LOONGSON3-OBJS-$(CONFIG_H264DSP) += mips/h264dsp_mmi.o
|
||||
LOONGSON3-OBJS-$(CONFIG_H264CHROMA) += mips/h264chroma_mmi.o
|
||||
|
48
libavcodec/mips/hevcpred_init_mips.c
Normal file
48
libavcodec/mips/hevcpred_init_mips.c
Normal file
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavcodec/hevc.h"
|
||||
#include "libavcodec/mips/hevcpred_mips.h"
|
||||
|
||||
#if HAVE_MSA
|
||||
static av_cold void hevc_pred_init_msa(HEVCPredContext *c, const int bit_depth)
|
||||
{
|
||||
if (8 == bit_depth) {
|
||||
c->intra_pred[2] = ff_intra_pred_8_16x16_msa;
|
||||
c->intra_pred[3] = ff_intra_pred_8_32x32_msa;
|
||||
c->pred_planar[0] = ff_hevc_intra_pred_planar_0_msa;
|
||||
c->pred_planar[1] = ff_hevc_intra_pred_planar_1_msa;
|
||||
c->pred_planar[2] = ff_hevc_intra_pred_planar_2_msa;
|
||||
c->pred_planar[3] = ff_hevc_intra_pred_planar_3_msa;
|
||||
c->pred_dc = ff_hevc_intra_pred_dc_msa;
|
||||
c->pred_angular[0] = ff_pred_intra_pred_angular_0_msa;
|
||||
c->pred_angular[1] = ff_pred_intra_pred_angular_1_msa;
|
||||
c->pred_angular[2] = ff_pred_intra_pred_angular_2_msa;
|
||||
c->pred_angular[3] = ff_pred_intra_pred_angular_3_msa;
|
||||
}
|
||||
}
|
||||
#endif // #if HAVE_MSA
|
||||
|
||||
void ff_hevc_pred_init_mips(HEVCPredContext *c, const int bit_depth)
|
||||
{
|
||||
#if HAVE_MSA
|
||||
hevc_pred_init_msa(c, bit_depth);
|
||||
#endif // #if HAVE_MSA
|
||||
}
|
73
libavcodec/mips/hevcpred_mips.h
Normal file
73
libavcodec/mips/hevcpred_mips.h
Normal file
@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVCODEC_MIPS_HEVCPRED_MIPS_H
|
||||
#define AVCODEC_MIPS_HEVCPRED_MIPS_H
|
||||
|
||||
#include "libavcodec/hevcdsp.h"
|
||||
|
||||
void ff_hevc_intra_pred_planar_0_msa(uint8_t *dst,
|
||||
const uint8_t *src_top,
|
||||
const uint8_t *src_left,
|
||||
ptrdiff_t stride);
|
||||
|
||||
void ff_hevc_intra_pred_planar_1_msa(uint8_t *dst,
|
||||
const uint8_t *src_top,
|
||||
const uint8_t *src_left,
|
||||
ptrdiff_t stride);
|
||||
|
||||
void ff_hevc_intra_pred_planar_2_msa(uint8_t *dst,
|
||||
const uint8_t *src_top,
|
||||
const uint8_t *src_left,
|
||||
ptrdiff_t stride);
|
||||
|
||||
void ff_hevc_intra_pred_planar_3_msa(uint8_t *dst,
|
||||
const uint8_t *src_top,
|
||||
const uint8_t *src_left,
|
||||
ptrdiff_t stride);
|
||||
|
||||
void ff_hevc_intra_pred_dc_msa(uint8_t *dst, const uint8_t *src_top,
|
||||
const uint8_t *src_left,
|
||||
ptrdiff_t stride, int log2, int c_idx);
|
||||
|
||||
void ff_pred_intra_pred_angular_0_msa(uint8_t *dst,
|
||||
const uint8_t *src_top,
|
||||
const uint8_t *src_left,
|
||||
ptrdiff_t stride, int c_idx, int mode);
|
||||
|
||||
void ff_pred_intra_pred_angular_1_msa(uint8_t *dst,
|
||||
const uint8_t *src_top,
|
||||
const uint8_t *src_left,
|
||||
ptrdiff_t stride, int c_idx, int mode);
|
||||
|
||||
void ff_pred_intra_pred_angular_2_msa(uint8_t *dst,
|
||||
const uint8_t *src_top,
|
||||
const uint8_t *src_left,
|
||||
ptrdiff_t stride, int c_idx, int mode);
|
||||
|
||||
void ff_pred_intra_pred_angular_3_msa(uint8_t *dst,
|
||||
const uint8_t *src_top,
|
||||
const uint8_t *src_left,
|
||||
ptrdiff_t stride, int c_idx, int mode);
|
||||
|
||||
void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, int y0, int c_idx);
|
||||
void ff_intra_pred_8_32x32_msa(HEVCContext *s, int x0, int y0, int c_idx);
|
||||
|
||||
#endif // #ifndef AVCODEC_MIPS_HEVCPRED_MIPS_H
|
3084
libavcodec/mips/hevcpred_msa.c
Normal file
3084
libavcodec/mips/hevcpred_msa.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -770,7 +770,9 @@
|
||||
SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val); \
|
||||
SLDI_B2_0(RTYPE, in2, in3, out2, out3, slide_val); \
|
||||
}
|
||||
#define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__)
|
||||
#define SLDI_B4_0_SB(...) SLDI_B4_0(v16i8, __VA_ARGS__)
|
||||
#define SLDI_B4_0_SH(...) SLDI_B4_0(v8i16, __VA_ARGS__)
|
||||
|
||||
/* Description : Immediate number of columns to slide
|
||||
Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val
|
||||
@ -1037,6 +1039,21 @@
|
||||
out_m; \
|
||||
} )
|
||||
|
||||
/* Description : Horizontal addition of unsigned byte vector elements
|
||||
Arguments : Inputs - in0, in1
|
||||
Outputs - out0, out1
|
||||
Return Type - as per RTYPE
|
||||
Details : Each unsigned odd byte element from 'in0' is added to
|
||||
even unsigned byte element from 'in0' (pairwise) and the
|
||||
halfword result is stored in 'out0'
|
||||
*/
|
||||
#define HADD_UB2(RTYPE, in0, in1, out0, out1) \
|
||||
{ \
|
||||
out0 = (RTYPE) __msa_hadd_u_h((v16u8) in0, (v16u8) in0); \
|
||||
out1 = (RTYPE) __msa_hadd_u_h((v16u8) in1, (v16u8) in1); \
|
||||
}
|
||||
#define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__)
|
||||
|
||||
/* Description : Horizontal subtraction of unsigned byte vector elements
|
||||
Arguments : Inputs - in0, in1
|
||||
Outputs - out0, out1
|
||||
@ -1053,6 +1070,20 @@
|
||||
#define HSUB_UB2_UH(...) HSUB_UB2(v8u16, __VA_ARGS__)
|
||||
#define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__)
|
||||
|
||||
/* Description : Insert specified word elements from input vectors to 1
|
||||
destination vector
|
||||
Arguments : Inputs - in0, in1, in2, in3 (4 input vectors)
|
||||
Outputs - out (output vector)
|
||||
Return Type - as per RTYPE
|
||||
*/
|
||||
#define INSERT_W2(RTYPE, in0, in1, out) \
|
||||
{ \
|
||||
out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0); \
|
||||
out = (RTYPE) __msa_insert_w((v4i32) out, 1, in1); \
|
||||
}
|
||||
#define INSERT_W2_UB(...) INSERT_W2(v16u8, __VA_ARGS__)
|
||||
#define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__)
|
||||
|
||||
#define INSERT_W4(RTYPE, in0, in1, in2, in3, out) \
|
||||
{ \
|
||||
out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0); \
|
||||
@ -1364,8 +1395,11 @@
|
||||
out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \
|
||||
out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \
|
||||
}
|
||||
#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
|
||||
#define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__)
|
||||
#define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__)
|
||||
#define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__)
|
||||
#define ILVRL_B2_SW(...) ILVRL_B2(v4i32, __VA_ARGS__)
|
||||
|
||||
#define ILVRL_H2(RTYPE, in0, in1, out0, out1) \
|
||||
{ \
|
||||
@ -1923,6 +1957,18 @@
|
||||
ADD2(in4, in5, in6, in7, out2, out3); \
|
||||
}
|
||||
|
||||
/* Description : Subtraction of 2 pairs of vectors
|
||||
Arguments : Inputs - in0, in1, in2, in3
|
||||
Outputs - out0, out1
|
||||
Details : Each element from 2 pairs vectors is subtracted and 2 results
|
||||
are produced
|
||||
*/
|
||||
#define SUB2(in0, in1, in2, in3, out0, out1) \
|
||||
{ \
|
||||
out0 = in0 - in1; \
|
||||
out1 = in2 - in3; \
|
||||
}
|
||||
|
||||
/* Description : Sign extend byte elements from input vector and return
|
||||
halfword results in pair of vectors
|
||||
Arguments : Inputs - in (1 input byte vector)
|
||||
|
Loading…
Reference in New Issue
Block a user