1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-02-14 22:22:59 +02:00

lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8

T-Head C908 (cycles):
h264_add_pixels4_8bpp_c:        93.5
h264_add_pixels4_8bpp_rvv_i32:  39.5
h264_add_pixels4_9bpp_c:        87.5
h264_add_pixels4_9bpp_rvv_i64:  50.5
h264_add_pixels4_10bpp_c:       87.5
h264_add_pixels4_10bpp_rvv_i64: 50.5
h264_add_pixels4_12bpp_c:       87.5
h264_add_pixels4_12bpp_rvv_i64: 50.5
h264_add_pixels4_14bpp_c:       87.5
h264_add_pixels4_14bpp_rvv_i64: 50.5
h264_add_pixels8_8bpp_c:       265.2
h264_add_pixels8_8bpp_rvv_i64:  84.5
This commit is contained in:
Rémi Denis-Courmont 2024-07-13 15:43:27 +03:00
parent 635f7c0f6c
commit 7744c08240
3 changed files with 102 additions and 1 deletions

View File

@ -31,7 +31,8 @@ RVV-OBJS-$(CONFIG_H263DSP) += riscv/h263dsp_rvv.o
OBJS-$(CONFIG_H264CHROMA) += riscv/h264_chroma_init_riscv.o
RVV-OBJS-$(CONFIG_H264CHROMA) += riscv/h264_mc_chroma.o
OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_init.o
RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_rvv.o riscv/h264idct_rvv.o
RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264addpx_rvv.o riscv/h264dsp_rvv.o \
riscv/h264idct_rvv.o
OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_init.o
RVV-OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_rvv.o
OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o

View File

@ -0,0 +1,89 @@
/*
* Copyright © 2024 Rémi Denis-Courmont.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "libavutil/riscv/asm.S"
.macro sx rd, addr
#if (__riscv_xlen == 32)
sw \rd, \addr
#elif (__riscv_xlen == 64)
sd \rd, \addr
#else
sq \rd, \addr
#endif
.endm
func ff_h264_add_pixels4_8_rvv, zve32x
vsetivli zero, 4, e8, mf4, ta, ma
vlse32.v v8, (a0), a2
vsetivli zero, 4 * 4, e8, m1, ta, ma
vle16.v v16, (a1)
.equ offset, 0
.rept 256 / __riscv_xlen
sx zero, offset(a1)
.equ offset, offset + (__riscv_xlen / 8)
.endr
vncvt.x.x.w v24, v16
vadd.vv v8, v8, v24
vsetivli zero, 4, e8, mf4, ta, ma
vsse32.v v8, (a0), a2
ret
endfunc
func ff_h264_add_pixels4_16_rvv, zve64x
vsetivli zero, 4, e16, mf2, ta, ma
vlse64.v v8, (a0), a2
vsetivli zero, 4 * 4, e16, m2, ta, ma
vle32.v v16, (a1)
.equ offset, 0
.rept 512 / __riscv_xlen
sx zero, offset(a1)
.equ offset, offset + (__riscv_xlen / 8)
.endr
vncvt.x.x.w v24, v16
vadd.vv v8, v8, v24
vsetivli zero, 4, e16, mf2, ta, ma
vsse64.v v8, (a0), a2
ret
endfunc
func ff_h264_add_pixels8_8_rvv, zve64x
li t0, 8 * 8
vsetivli zero, 8, e8, mf2, ta, ma
vlse64.v v8, (a0), a2
vsetvli zero, t0, e8, m4, ta, ma
vle16.v v16, (a1)
.equ offset, 0
.rept 1024 / __riscv_xlen
sx zero, offset(a1)
.equ offset, offset + (__riscv_xlen / 8)
.endr
vncvt.x.x.w v24, v16
vadd.vv v8, v8, v24
vsetivli zero, 8, e8, mf2, ta, ma
vsse64.v v8, (a0), a2
ret
endfunc

View File

@ -61,6 +61,10 @@ void ff_h264_idct8_add_12_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_add_14_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct8_add_14_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_add_pixels8_8_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_add_pixels4_8_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_add_pixels4_16_rvv(uint8_t *dst, int16_t *block, int stride);
extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
@ -96,6 +100,9 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv;
dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv;
# endif
if (flags & AV_CPU_FLAG_RVV_I64)
dsp->h264_add_pixels8_clear = ff_h264_add_pixels8_8_rvv;
dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_8_rvv;
}
if (bit_depth == 9) {
@ -118,6 +125,10 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
dsp->h264_idct_add = ff_h264_idct_add_14_rvv;
dsp->h264_idct8_add = ff_h264_idct8_add_14_rvv;
}
if (bit_depth > 8 && zvl128b) {
if (flags & AV_CPU_FLAG_RVV_I64)
dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_16_rvv;
}
dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
}