mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-21 10:55:51 +02:00
lavc/vc1dsp: R-V V inv_trans
C908: vc1dsp.vc1_inv_trans_4x4_dc_c: 125.7 vc1dsp.vc1_inv_trans_4x4_dc_rvv_i32: 53.5 vc1dsp.vc1_inv_trans_4x8_dc_c: 230.7 vc1dsp.vc1_inv_trans_4x8_dc_rvv_i32: 65.5 vc1dsp.vc1_inv_trans_8x4_dc_c: 228.7 vc1dsp.vc1_inv_trans_8x4_dc_rvv_i64: 64.5 vc1dsp.vc1_inv_trans_8x8_dc_c: 476.5 vc1dsp.vc1_inv_trans_8x8_dc_rvv_i64: 80.2 Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
This commit is contained in:
parent
0f745b74ec
commit
0b9d009b4a
@ -40,5 +40,7 @@ OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_init.o \
|
|||||||
RVV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvv.o
|
RVV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvv.o
|
||||||
OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_init.o
|
OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_init.o
|
||||||
RVV-OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_rvv.o
|
RVV-OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_rvv.o
|
||||||
|
OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_init.o
|
||||||
|
RVV-OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_rvv.o
|
||||||
OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
|
OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
|
||||||
RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
|
RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
|
||||||
|
49
libavcodec/riscv/vc1dsp_init.c
Normal file
49
libavcodec/riscv/vc1dsp_init.c
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/riscv/cpu.h"
|
||||||
|
#include "libavcodec/vc1.h"
|
||||||
|
|
||||||
|
void ff_vc1_inv_trans_8x8_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t *block);
|
||||||
|
void ff_vc1_inv_trans_4x8_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t *block);
|
||||||
|
void ff_vc1_inv_trans_8x4_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t *block);
|
||||||
|
void ff_vc1_inv_trans_4x4_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t *block);
|
||||||
|
|
||||||
|
av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp)
|
||||||
|
{
|
||||||
|
#if HAVE_RVV
|
||||||
|
int flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (ff_get_rv_vlenb() >= 16) {
|
||||||
|
if (flags & AV_CPU_FLAG_RVV_I64) {
|
||||||
|
dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv;
|
||||||
|
dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv;
|
||||||
|
}
|
||||||
|
if (flags & AV_CPU_FLAG_RVV_I32) {
|
||||||
|
dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
|
||||||
|
dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
113
libavcodec/riscv/vc1dsp_rvv.S
Normal file
113
libavcodec/riscv/vc1dsp_rvv.S
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/riscv/asm.S"
|
||||||
|
|
||||||
|
func ff_vc1_inv_trans_8x8_dc_rvv, zve64x
|
||||||
|
lh t2, (a2)
|
||||||
|
vsetivli zero, 8, e8, mf2, ta, ma
|
||||||
|
vlse64.v v0, (a0), a1
|
||||||
|
sh1add t2, t2, t2
|
||||||
|
addi t2, t2, 1
|
||||||
|
srai t2, t2, 1
|
||||||
|
sh1add t2, t2, t2
|
||||||
|
addi t2, t2, 16
|
||||||
|
srai t2, t2, 5
|
||||||
|
li t0, 8*8
|
||||||
|
vsetvli zero, t0, e16, m8, ta, ma
|
||||||
|
vzext.vf2 v8, v0
|
||||||
|
vadd.vx v8, v8, t2
|
||||||
|
vmax.vx v8, v8, zero
|
||||||
|
vsetvli zero, zero, e8, m4, ta, ma
|
||||||
|
vnclipu.wi v0, v8, 0
|
||||||
|
vsetivli zero, 8, e8, mf2, ta, ma
|
||||||
|
vsse64.v v0, (a0), a1
|
||||||
|
ret
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
func ff_vc1_inv_trans_4x8_dc_rvv, zve32x
|
||||||
|
lh t2, (a2)
|
||||||
|
vsetivli zero, 8, e8, mf2, ta, ma
|
||||||
|
vlse32.v v0, (a0), a1
|
||||||
|
slli t1, t2, 4
|
||||||
|
add t2, t2, t1
|
||||||
|
addi t2, t2, 4
|
||||||
|
srai t2, t2, 3
|
||||||
|
sh1add t2, t2, t2
|
||||||
|
slli t2, t2, 2
|
||||||
|
addi t2, t2, 64
|
||||||
|
srai t2, t2, 7
|
||||||
|
li t0, 4*8
|
||||||
|
vsetvli zero, t0, e16, m4, ta, ma
|
||||||
|
vzext.vf2 v4, v0
|
||||||
|
vadd.vx v4, v4, t2
|
||||||
|
vmax.vx v4, v4, zero
|
||||||
|
vsetvli zero, zero, e8, m2, ta, ma
|
||||||
|
vnclipu.wi v0, v4, 0
|
||||||
|
vsetivli zero, 8, e8, mf2, ta, ma
|
||||||
|
vsse32.v v0, (a0), a1
|
||||||
|
ret
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
func ff_vc1_inv_trans_8x4_dc_rvv, zve64x
|
||||||
|
lh t2, (a2)
|
||||||
|
vsetivli zero, 8, e8, mf2, ta, ma
|
||||||
|
vlse64.v v0, (a0), a1
|
||||||
|
sh1add t2, t2, t2
|
||||||
|
addi t2, t2, 1
|
||||||
|
srai t2, t2, 1
|
||||||
|
slli t1, t2, 4
|
||||||
|
add t2, t2, t1
|
||||||
|
addi t2, t2, 64
|
||||||
|
srai t2, t2, 7
|
||||||
|
li t0, 8*4
|
||||||
|
vsetvli zero, t0, e16, m4, ta, ma
|
||||||
|
vzext.vf2 v4, v0
|
||||||
|
vadd.vx v4, v4, t2
|
||||||
|
vmax.vx v4, v4, zero
|
||||||
|
vsetvli zero, zero, e8, m2, ta, ma
|
||||||
|
vnclipu.wi v0, v4, 0
|
||||||
|
vsetivli zero, 8, e8, mf2, ta, ma
|
||||||
|
vsse64.v v0, (a0), a1
|
||||||
|
ret
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
func ff_vc1_inv_trans_4x4_dc_rvv, zve32x
|
||||||
|
lh t2, (a2)
|
||||||
|
vsetivli zero, 4, e8, mf2, ta, ma
|
||||||
|
vlse32.v v0, (a0), a1
|
||||||
|
slli t1, t2, 4
|
||||||
|
add t2, t2, t1
|
||||||
|
addi t2, t2, 4
|
||||||
|
srai t2, t2, 3
|
||||||
|
slli t1, t2, 4
|
||||||
|
add t2, t2, t1
|
||||||
|
addi t2, t2, 64
|
||||||
|
srai t2, t2, 7
|
||||||
|
vsetivli zero, 4*4, e16, m2, ta, ma
|
||||||
|
vzext.vf2 v2, v0
|
||||||
|
vadd.vx v2, v2, t2
|
||||||
|
vmax.vx v2, v2, zero
|
||||||
|
vsetvli zero, zero, e8, m1, ta, ma
|
||||||
|
vnclipu.wi v0, v2, 0
|
||||||
|
vsetivli zero, 4, e8, mf2, ta, ma
|
||||||
|
vsse32.v v0, (a0), a1
|
||||||
|
ret
|
||||||
|
endfunc
|
@ -1039,6 +1039,8 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
|
|||||||
ff_vc1dsp_init_arm(dsp);
|
ff_vc1dsp_init_arm(dsp);
|
||||||
#elif ARCH_PPC
|
#elif ARCH_PPC
|
||||||
ff_vc1dsp_init_ppc(dsp);
|
ff_vc1dsp_init_ppc(dsp);
|
||||||
|
#elif ARCH_RISCV
|
||||||
|
ff_vc1dsp_init_riscv(dsp);
|
||||||
#elif ARCH_X86
|
#elif ARCH_X86
|
||||||
ff_vc1dsp_init_x86(dsp);
|
ff_vc1dsp_init_x86(dsp);
|
||||||
#elif ARCH_MIPS
|
#elif ARCH_MIPS
|
||||||
|
@ -89,6 +89,7 @@ void ff_vc1dsp_init(VC1DSPContext* c);
|
|||||||
void ff_vc1dsp_init_aarch64(VC1DSPContext* dsp);
|
void ff_vc1dsp_init_aarch64(VC1DSPContext* dsp);
|
||||||
void ff_vc1dsp_init_arm(VC1DSPContext* dsp);
|
void ff_vc1dsp_init_arm(VC1DSPContext* dsp);
|
||||||
void ff_vc1dsp_init_ppc(VC1DSPContext *c);
|
void ff_vc1dsp_init_ppc(VC1DSPContext *c);
|
||||||
|
void ff_vc1dsp_init_riscv(VC1DSPContext *c);
|
||||||
void ff_vc1dsp_init_x86(VC1DSPContext* dsp);
|
void ff_vc1dsp_init_x86(VC1DSPContext* dsp);
|
||||||
void ff_vc1dsp_init_mips(VC1DSPContext* dsp);
|
void ff_vc1dsp_init_mips(VC1DSPContext* dsp);
|
||||||
void ff_vc1dsp_init_loongarch(VC1DSPContext* dsp);
|
void ff_vc1dsp_init_loongarch(VC1DSPContext* dsp);
|
||||||
|
Loading…
Reference in New Issue
Block a user