mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
lavc/vp7dsp: add R-V V vp7_luma_dc_wht
This works out a bit more favourably than VP8's due to: - additional multiplications that can be vectored, - hardware-supported fixed-point rounding mode. vp7_luma_dc_wht_c: 3.2 vp7_luma_dc_wht_rvv_i64: 2.0
This commit is contained in:
parent
91b5ea7bb9
commit
fd39997f72
@ -65,6 +65,8 @@ RVV-OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_rvv.o
|
||||
OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_init.o
|
||||
RV-OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_rvi.o
|
||||
RVV-OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_rvv.o
|
||||
OBJS-$(CONFIG_VP7_DECODER) += riscv/vp7dsp_init.o
|
||||
RVV-OBJS-$(CONFIG_VP7_DECODER) += riscv/vp7dsp_rvv.o
|
||||
OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
|
||||
RV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvi.o
|
||||
RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
|
||||
|
41
libavcodec/riscv/vp7dsp_init.c
Normal file
41
libavcodec/riscv/vp7dsp_init.c
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Rémi Denis-Courmont.
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/riscv/cpu.h"
|
||||
#include "libavcodec/vp8dsp.h"
|
||||
|
||||
void ff_vp7_luma_dc_wht_rvv(int16_t block[4][4][16], int16_t dc[16]);
|
||||
|
||||
av_cold void ff_vp7dsp_init_riscv(VP8DSPContext *c)
|
||||
{
|
||||
#if HAVE_RVV
|
||||
int flags = av_get_cpu_flags();
|
||||
|
||||
if ((flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
|
||||
#if __riscv_xlen >= 64
|
||||
c->vp8_luma_dc_wht = ff_vp7_luma_dc_wht_rvv;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
95
libavcodec/riscv/vp7dsp_rvv.S
Normal file
95
libavcodec/riscv/vp7dsp_rvv.S
Normal file
@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Rémi Denis-Courmont.
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/riscv/asm.S"
|
||||
|
||||
#if __riscv_xlen >= 64
|
||||
func ff_vp7_luma_dc_wht_rvv, zve32x
|
||||
csrwi vxrm, 0
|
||||
li t4, 12540
|
||||
vsetivli zero, 4, e16, mf2, ta, ma
|
||||
vlseg4e16.v v0, (a1)
|
||||
li t6, 30274
|
||||
vwmul.vx v8, v1, t4
|
||||
li t5, 23170
|
||||
vwmul.vx v9, v3, t6
|
||||
addi t1, sp, -12 * 2
|
||||
vwmul.vx v10, v1, t6
|
||||
addi t2, sp, -8 * 2
|
||||
vwmul.vx v11, v3, t4
|
||||
addi t3, sp, -4 * 2
|
||||
vwadd.vv v4, v0, v2
|
||||
addi sp, sp, -16 * 2
|
||||
vwsub.vv v5, v0, v2
|
||||
vsetvli zero, zero, e32, m1, ta, ma
|
||||
vadd.vv v7, v10, v11
|
||||
vmul.vx v4, v4, t5
|
||||
vsub.vv v6, v8, v9
|
||||
vmul.vx v5, v5, t5
|
||||
vadd.vv v0, v4, v7
|
||||
vsub.vv v3, v4, v7
|
||||
vadd.vv v1, v5, v6
|
||||
vsub.vv v2, v5, v6
|
||||
vsetvli zero, zero, e16, mf2, ta, ma
|
||||
vnsra.wi v4, v0, 14
|
||||
vnsra.wi v7, v3, 14
|
||||
vnsra.wi v5, v1, 14
|
||||
vnsra.wi v6, v2, 14
|
||||
vsseg4e16.v v4, (sp)
|
||||
vle16.v v0, (sp)
|
||||
vle16.v v1, (t1)
|
||||
vle16.v v2, (t2)
|
||||
vle16.v v3, (t3)
|
||||
vwmul.vx v8, v1, t4
|
||||
li t0, 16 * 2
|
||||
vwmul.vx v9, v3, t6
|
||||
addi t1, a0, 1 * 4 * 16 * 2
|
||||
vwmul.vx v10, v1, t6
|
||||
addi t2, a0, 2 * 4 * 16 * 2
|
||||
vwmul.vx v11, v3, t4
|
||||
addi t3, a0, 3 * 4 * 16 * 2
|
||||
vwadd.vv v4, v0, v2
|
||||
vwsub.vv v5, v0, v2
|
||||
vsetvli zero, zero, e32, m1, ta, ma
|
||||
vmul.vx v4, v4, t5
|
||||
sd zero, (a1)
|
||||
vadd.vv v7, v10, v11
|
||||
sd zero, 8(a1)
|
||||
vmul.vx v5, v5, t5
|
||||
sd zero, 16(a1)
|
||||
vsub.vv v6, v8, v9
|
||||
sd zero, 24(a1)
|
||||
vadd.vv v0, v4, v7
|
||||
addi sp, sp, 16 * 2
|
||||
vsub.vv v3, v4, v7
|
||||
vadd.vv v1, v5, v6
|
||||
vsub.vv v2, v5, v6
|
||||
vsetvli zero, zero, e16, mf2, ta, ma
|
||||
vnclip.wi v4, v0, 18
|
||||
vnclip.wi v5, v1, 18
|
||||
vnclip.wi v6, v2, 18
|
||||
vnclip.wi v7, v3, 18
|
||||
vsse16.v v4, (a0), t0
|
||||
vsse16.v v5, (t1), t0
|
||||
vsse16.v v6, (t2), t0
|
||||
vsse16.v v7, (t3), t0
|
||||
ret
|
||||
endfunc
|
||||
#endif
|
@ -712,6 +712,10 @@ av_cold void ff_vp7dsp_init(VP8DSPContext *dsp)
|
||||
|
||||
dsp->vp8_v_loop_filter_simple = vp7_v_loop_filter_simple_c;
|
||||
dsp->vp8_h_loop_filter_simple = vp7_h_loop_filter_simple_c;
|
||||
|
||||
#if ARCH_RISCV
|
||||
ff_vp7dsp_init_riscv(dsp);
|
||||
#endif
|
||||
}
|
||||
#endif /* CONFIG_VP7_DECODER */
|
||||
|
||||
|
@ -90,6 +90,8 @@ void ff_vp78dsp_init_ppc(VP8DSPContext *c);
|
||||
void ff_vp78dsp_init_riscv(VP8DSPContext *c);
|
||||
void ff_vp78dsp_init_x86(VP8DSPContext *c);
|
||||
|
||||
void ff_vp7dsp_init_riscv(VP8DSPContext *c);
|
||||
|
||||
void ff_vp8dsp_init(VP8DSPContext *c);
|
||||
void ff_vp8dsp_init_aarch64(VP8DSPContext *c);
|
||||
void ff_vp8dsp_init_arm(VP8DSPContext *c);
|
||||
|
Loading…
Reference in New Issue
Block a user