mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-24 13:56:33 +02:00
avcodec: [loongarch] Optimize vp9_lpf/idct with LSX.
ffmpeg -i ../10_vp9_1080p_30fps_3Mbps.webm -f rawvideo -y /dev/null -an before:294fps after :567fps Reviewed-by: Shiyou Yin <yinshiyou-hf@loongson.cn> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
parent
2fd914e079
commit
fea299f876
@ -13,4 +13,6 @@ LASX-OBJS-$(CONFIG_H264PRED) += loongarch/h264_intrapred_lasx.o
|
||||
LSX-OBJS-$(CONFIG_VP8_DECODER) += loongarch/vp8_mc_lsx.o \
|
||||
loongarch/vp8_lpf_lsx.o
|
||||
LSX-OBJS-$(CONFIG_VP9_DECODER) += loongarch/vp9_mc_lsx.o \
|
||||
loongarch/vp9_intra_lsx.o
|
||||
loongarch/vp9_intra_lsx.o \
|
||||
loongarch/vp9_lpf_lsx.o \
|
||||
loongarch/vp9_idct_lsx.o
|
||||
|
1411
libavcodec/loongarch/vp9_idct_lsx.c
Normal file
1411
libavcodec/loongarch/vp9_idct_lsx.c
Normal file
File diff suppressed because it is too large
Load Diff
3141
libavcodec/loongarch/vp9_lpf_lsx.c
Normal file
3141
libavcodec/loongarch/vp9_lpf_lsx.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -71,6 +71,15 @@
|
||||
dsp->intra_pred[tx][TOP_DC_PRED] = ff_dc_top_##sz##_lsx; \
|
||||
dsp->intra_pred[tx][TM_VP8_PRED] = ff_tm_##sz##_lsx; \
|
||||
|
||||
#define init_idct(tx, nm) \
|
||||
dsp->itxfm_add[tx][DCT_DCT] = \
|
||||
dsp->itxfm_add[tx][ADST_DCT] = \
|
||||
dsp->itxfm_add[tx][DCT_ADST] = \
|
||||
dsp->itxfm_add[tx][ADST_ADST] = nm##_add_lsx;
|
||||
|
||||
#define init_itxfm(tx, sz) \
|
||||
dsp->itxfm_add[tx][DCT_DCT] = ff_idct_idct_##sz##_add_lsx;
|
||||
|
||||
av_cold void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
@ -86,8 +95,30 @@ av_cold void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp)
|
||||
init_intra_pred1_lsx(TX_32X32, 32x32);
|
||||
init_intra_pred2_lsx(TX_4X4, 4x4);
|
||||
init_intra_pred2_lsx(TX_8X8, 8x8);
|
||||
init_itxfm(TX_8X8, 8x8);
|
||||
init_itxfm(TX_16X16, 16x16);
|
||||
init_idct(TX_32X32, ff_idct_idct_32x32);
|
||||
dsp->loop_filter_8[0][0] = ff_loop_filter_h_4_8_lsx;
|
||||
dsp->loop_filter_8[0][1] = ff_loop_filter_v_4_8_lsx;
|
||||
dsp->loop_filter_8[1][0] = ff_loop_filter_h_8_8_lsx;
|
||||
dsp->loop_filter_8[1][1] = ff_loop_filter_v_8_8_lsx;
|
||||
dsp->loop_filter_8[2][0] = ff_loop_filter_h_16_8_lsx;
|
||||
dsp->loop_filter_8[2][1] = ff_loop_filter_v_16_8_lsx;
|
||||
|
||||
dsp->loop_filter_16[0] = ff_loop_filter_h_16_16_lsx;
|
||||
dsp->loop_filter_16[1] = ff_loop_filter_v_16_16_lsx;
|
||||
|
||||
dsp->loop_filter_mix2[0][0][0] = ff_loop_filter_h_44_16_lsx;
|
||||
dsp->loop_filter_mix2[0][0][1] = ff_loop_filter_v_44_16_lsx;
|
||||
dsp->loop_filter_mix2[0][1][0] = ff_loop_filter_h_48_16_lsx;
|
||||
dsp->loop_filter_mix2[0][1][1] = ff_loop_filter_v_48_16_lsx;
|
||||
dsp->loop_filter_mix2[1][0][0] = ff_loop_filter_h_84_16_lsx;
|
||||
dsp->loop_filter_mix2[1][0][1] = ff_loop_filter_v_84_16_lsx;
|
||||
dsp->loop_filter_mix2[1][1][0] = ff_loop_filter_h_88_16_lsx;
|
||||
dsp->loop_filter_mix2[1][1][1] = ff_loop_filter_v_88_16_lsx;
|
||||
}
|
||||
}
|
||||
|
||||
#undef init_subpel1
|
||||
#undef init_subpel2
|
||||
#undef init_subpel3
|
||||
@ -95,3 +126,5 @@ av_cold void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp)
|
||||
#undef init_fpel
|
||||
#undef init_intra_pred1_lsx
|
||||
#undef init_intra_pred2_lsx
|
||||
#undef init_idct
|
||||
#undef init_itxfm
|
||||
|
@ -140,5 +140,43 @@ void ff_tm_16x16_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
|
||||
const uint8_t *top);
|
||||
void ff_tm_32x32_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
|
||||
const uint8_t *top);
|
||||
void ff_loop_filter_h_16_8_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_v_16_8_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_h_4_8_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_v_4_8_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_h_44_16_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_v_44_16_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_h_8_8_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_v_8_8_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_h_88_16_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_v_88_16_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_h_84_16_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_v_84_16_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_h_48_16_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_v_48_16_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_h_16_16_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_loop_filter_v_16_16_lsx(uint8_t *dst, ptrdiff_t stride, int32_t e,
|
||||
int32_t i, int32_t h);
|
||||
void ff_idct_idct_8x8_add_lsx(uint8_t *dst, ptrdiff_t stride,
|
||||
int16_t *block, int eob);
|
||||
void ff_idct_idct_16x16_add_lsx(uint8_t *dst, ptrdiff_t stride,
|
||||
int16_t *block, int eob);
|
||||
void ff_idct_idct_32x32_add_lsx(uint8_t *dst, ptrdiff_t stride,
|
||||
int16_t *block, int eob);
|
||||
|
||||
#endif /* AVCODEC_LOONGARCH_VP9DSP_LOONGARCH_H */
|
||||
|
Loading…
x
Reference in New Issue
Block a user