mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
avcodec/mips: version 1 of vp8dsp optimizations for loongson mmi
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
parent
3f55752cd5
commit
c5c6e30781
@ -77,4 +77,5 @@ MMI-OBJS-$(CONFIG_MPEG4_DECODER) += mips/xvid_idct_mmi.o
|
||||
MMI-OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_mmi.o
|
||||
MMI-OBJS-$(CONFIG_PIXBLOCKDSP) += mips/pixblockdsp_mmi.o
|
||||
MMI-OBJS-$(CONFIG_H264QPEL) += mips/h264qpel_mmi.o
|
||||
MMI-OBJS-$(CONFIG_VP8_DECODER) += mips/vp8dsp_mmi.o
|
||||
MMI-OBJS-$(CONFIG_HPELDSP) += mips/hpeldsp_mmi.o
|
||||
|
@ -28,6 +28,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_2) = {0x0002000200020002ULL};
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pw_3) = {0x0003000300030003ULL};
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pw_4) = {0x0004000400040004ULL};
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pw_5) = {0x0005000500050005ULL};
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pw_6) = {0x0006000600060006ULL};
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pw_8) = {0x0008000800080008ULL};
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pw_9) = {0x0009000900090009ULL};
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pw_10) = {0x000A000A000A000AULL};
|
||||
|
@ -29,6 +29,7 @@ extern const uint64_t ff_pw_2;
|
||||
extern const uint64_t ff_pw_3;
|
||||
extern const uint64_t ff_pw_4;
|
||||
extern const uint64_t ff_pw_5;
|
||||
extern const uint64_t ff_pw_6;
|
||||
extern const uint64_t ff_pw_8;
|
||||
extern const uint64_t ff_pw_9;
|
||||
extern const uint64_t ff_pw_10;
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
|
||||
* Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
@ -105,9 +106,97 @@ static av_cold void vp8dsp_init_msa(VP8DSPContext *dsp)
|
||||
}
|
||||
#endif // #if HAVE_MSA
|
||||
|
||||
#if HAVE_MMI
|
||||
static av_cold void vp8dsp_init_mmi(VP8DSPContext *dsp)
|
||||
{
|
||||
dsp->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmi;
|
||||
dsp->vp8_luma_dc_wht_dc = ff_vp8_luma_dc_wht_dc_mmi;
|
||||
dsp->vp8_idct_add = ff_vp8_idct_add_mmi;
|
||||
dsp->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmi;
|
||||
dsp->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmi;
|
||||
dsp->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmi;
|
||||
|
||||
dsp->put_vp8_epel_pixels_tab[0][0][1] = ff_put_vp8_epel16_h4_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[0][0][2] = ff_put_vp8_epel16_h6_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[0][1][0] = ff_put_vp8_epel16_v4_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[0][1][1] = ff_put_vp8_epel16_h4v4_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[0][1][2] = ff_put_vp8_epel16_h6v4_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[0][2][1] = ff_put_vp8_epel16_h4v6_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[0][2][2] = ff_put_vp8_epel16_h6v6_mmi;
|
||||
|
||||
dsp->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[1][0][2] = ff_put_vp8_epel8_h6_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[1][1][1] = ff_put_vp8_epel8_h4v4_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[1][1][2] = ff_put_vp8_epel8_h6v4_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[1][2][0] = ff_put_vp8_epel8_v6_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[1][2][1] = ff_put_vp8_epel8_h4v6_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[1][2][2] = ff_put_vp8_epel8_h6v6_mmi;
|
||||
|
||||
dsp->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[2][0][2] = ff_put_vp8_epel4_h6_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[2][1][1] = ff_put_vp8_epel4_h4v4_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[2][1][2] = ff_put_vp8_epel4_h6v4_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[2][2][0] = ff_put_vp8_epel4_v6_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[2][2][1] = ff_put_vp8_epel4_h4v6_mmi;
|
||||
dsp->put_vp8_epel_pixels_tab[2][2][2] = ff_put_vp8_epel4_h6v6_mmi;
|
||||
|
||||
dsp->put_vp8_bilinear_pixels_tab[0][0][1] = ff_put_vp8_bilinear16_h_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[0][0][2] = ff_put_vp8_bilinear16_h_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[0][1][0] = ff_put_vp8_bilinear16_v_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[0][1][1] = ff_put_vp8_bilinear16_hv_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[0][1][2] = ff_put_vp8_bilinear16_hv_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[0][2][0] = ff_put_vp8_bilinear16_v_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[0][2][1] = ff_put_vp8_bilinear16_hv_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[0][2][2] = ff_put_vp8_bilinear16_hv_mmi;
|
||||
|
||||
dsp->put_vp8_bilinear_pixels_tab[1][0][1] = ff_put_vp8_bilinear8_h_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[1][0][2] = ff_put_vp8_bilinear8_h_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[1][1][0] = ff_put_vp8_bilinear8_v_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[1][1][1] = ff_put_vp8_bilinear8_hv_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[1][1][2] = ff_put_vp8_bilinear8_hv_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[1][2][0] = ff_put_vp8_bilinear8_v_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[1][2][1] = ff_put_vp8_bilinear8_hv_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[1][2][2] = ff_put_vp8_bilinear8_hv_mmi;
|
||||
|
||||
dsp->put_vp8_bilinear_pixels_tab[2][0][1] = ff_put_vp8_bilinear4_h_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[2][0][2] = ff_put_vp8_bilinear4_h_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[2][1][0] = ff_put_vp8_bilinear4_v_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[2][1][1] = ff_put_vp8_bilinear4_hv_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[2][1][2] = ff_put_vp8_bilinear4_hv_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[2][2][0] = ff_put_vp8_bilinear4_v_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[2][2][1] = ff_put_vp8_bilinear4_hv_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[2][2][2] = ff_put_vp8_bilinear4_hv_mmi;
|
||||
|
||||
dsp->put_vp8_epel_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmi;
|
||||
|
||||
dsp->put_vp8_epel_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmi;
|
||||
dsp->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmi;
|
||||
|
||||
dsp->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16_mmi;
|
||||
dsp->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16_mmi;
|
||||
dsp->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mmi;
|
||||
dsp->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mmi;
|
||||
|
||||
dsp->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16_inner_mmi;
|
||||
dsp->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16_inner_mmi;
|
||||
dsp->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmi;
|
||||
dsp->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmi;
|
||||
|
||||
dsp->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmi;
|
||||
dsp->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmi;
|
||||
}
|
||||
#endif /* HAVE_MMI */
|
||||
|
||||
av_cold void ff_vp8dsp_init_mips(VP8DSPContext *dsp)
|
||||
{
|
||||
#if HAVE_MSA
|
||||
vp8dsp_init_msa(dsp);
|
||||
#endif // #if HAVE_MSA
|
||||
#if HAVE_MMI
|
||||
vp8dsp_init_mmi(dsp);
|
||||
#endif /* HAVE_MMI */
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
|
||||
* Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
@ -21,6 +22,11 @@
|
||||
#ifndef AVCODEC_MIPS_VP8DSP_MIPS_H
|
||||
#define AVCODEC_MIPS_VP8DSP_MIPS_H
|
||||
|
||||
#include "libavutil/mem.h"
|
||||
#include "libavcodec/vp8dsp.h"
|
||||
#include "libavcodec/mathops.h"
|
||||
#include "constants.h"
|
||||
|
||||
void ff_put_vp8_pixels4_msa(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride,
|
||||
int h, int x, int y);
|
||||
@ -169,4 +175,115 @@ void ff_vp8_idct_dc_add4uv_msa(uint8_t *dst, int16_t block[4][16],
|
||||
void ff_vp8_idct_dc_add4y_msa(uint8_t *dst, int16_t block[4][16],
|
||||
ptrdiff_t stride);
|
||||
|
||||
void ff_vp8_luma_dc_wht_mmi(int16_t block[4][4][16], int16_t dc[16]);
|
||||
void ff_vp8_luma_dc_wht_dc_mmi(int16_t block[4][4][16], int16_t dc[16]);
|
||||
void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
|
||||
void ff_vp8_idct_dc_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
|
||||
void ff_vp8_idct_dc_add4y_mmi(uint8_t *dst, int16_t block[4][16],
|
||||
ptrdiff_t stride);
|
||||
void ff_vp8_idct_dc_add4uv_mmi(uint8_t *dst, int16_t block[4][16],
|
||||
ptrdiff_t stride);
|
||||
|
||||
void ff_put_vp8_pixels4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
|
||||
ptrdiff_t srcstride, int h, int x, int y);
|
||||
void ff_put_vp8_pixels8_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
|
||||
ptrdiff_t srcstride, int h, int x, int y);
|
||||
void ff_put_vp8_pixels16_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
|
||||
ptrdiff_t srcstride, int h, int x, int y);
|
||||
|
||||
void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
|
||||
ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
|
||||
ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
|
||||
ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
|
||||
ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel16_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel16_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel16_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel16_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
|
||||
void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel8_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel8_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel8_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel8_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
|
||||
void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel4_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel4_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel4_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_epel4_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
|
||||
void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_bilinear16_hv_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
|
||||
void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_bilinear8_hv_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
|
||||
void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
void ff_put_vp8_bilinear4_hv_mmi(uint8_t *dst, ptrdiff_t dststride,
|
||||
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
|
||||
|
||||
// loop filter applied to edges between macroblocks
|
||||
void ff_vp8_v_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E,
|
||||
int flim_I, int hev_thresh);
|
||||
void ff_vp8_h_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E,
|
||||
int flim_I, int hev_thresh);
|
||||
void ff_vp8_v_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,
|
||||
int flim_E, int flim_I, int hev_thresh);
|
||||
void ff_vp8_h_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,
|
||||
int flim_E, int flim_I, int hev_thresh);
|
||||
|
||||
// loop filter applied to inner macroblock edges
|
||||
void ff_vp8_v_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride,
|
||||
int flim_E, int flim_I, int hev_thresh);
|
||||
void ff_vp8_h_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride,
|
||||
int flim_E, int flim_I, int hev_thresh);
|
||||
void ff_vp8_v_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV,
|
||||
ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh);
|
||||
void ff_vp8_h_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV,
|
||||
ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh);
|
||||
|
||||
void ff_vp8_v_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim);
|
||||
void ff_vp8_h_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim);
|
||||
|
||||
#endif // #ifndef AVCODEC_MIPS_VP8DSP_MIPS_H
|
||||
|
3052
libavcodec/mips/vp8dsp_mmi.c
Normal file
3052
libavcodec/mips/vp8dsp_mmi.c
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user