diff --git a/libavcodec/riscv/h264addpx_rvv.S b/libavcodec/riscv/h264addpx_rvv.S index fd36bd4896..3c0700d1d9 100644 --- a/libavcodec/riscv/h264addpx_rvv.S +++ b/libavcodec/riscv/h264addpx_rvv.S @@ -87,3 +87,25 @@ func ff_h264_add_pixels8_8_rvv, zve64x vsse64.v v8, (a0), a2 ret endfunc + +func ff_h264_add_pixels8_16_rvv, zve32x + li t0, 8 + vsetivli zero, 8, e16, m1, ta, ma +1: + vle32.v v16, (a1) + addi t0, t0, -1 + vle16.v v8, (a0) + .equ offset, 0 + .rept 256 / __riscv_xlen + sx zero, offset(a1) + .equ offset, offset + (__riscv_xlen / 8) + .endr + vncvt.x.x.w v24, v16 + addi a1, a1, 8 * 4 + vadd.vv v8, v8, v24 + vse16.v v8, (a0) + add a0, a0, a2 + bnez t0, 1b + + ret +endfunc diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c index 2787485647..4fc695f158 100644 --- a/libavcodec/riscv/h264dsp_init.c +++ b/libavcodec/riscv/h264dsp_init.c @@ -63,6 +63,7 @@ void ff_h264_idct8_add_14_rvv(uint8_t *dst, int16_t *block, int stride); void ff_h264_add_pixels8_8_rvv(uint8_t *dst, int16_t *block, int stride); void ff_h264_add_pixels4_8_rvv(uint8_t *dst, int16_t *block, int stride); +void ff_h264_add_pixels8_16_rvv(uint8_t *dst, int16_t *block, int stride); void ff_h264_add_pixels4_16_rvv(uint8_t *dst, int16_t *block, int stride); extern int ff_startcode_find_candidate_rvb(const uint8_t *, int); @@ -126,6 +127,7 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth, dsp->h264_idct8_add = ff_h264_idct8_add_14_rvv; } if (bit_depth > 8 && zvl128b) { + dsp->h264_add_pixels8_clear = ff_h264_add_pixels8_16_rvv; if (flags & AV_CPU_FLAG_RVV_I64) dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_16_rvv; }