1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-11-21 10:55:51 +02:00

lavc/me_cmp: R-V V pix_abs_x2

C908:
pix_abs_0_1_c: 767.0
pix_abs_0_1_rvv_i32: 196.2
pix_abs_1_1_c: 388.0
pix_abs_1_1_rvv_i32: 185.2

Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
This commit is contained in:
sunyuechi 2024-02-06 21:41:35 +08:00 committed by Rémi Denis-Courmont
parent b41e115dde
commit f1ec475f66
2 changed files with 56 additions and 0 deletions

View File

@ -30,6 +30,10 @@ int ff_pix_abs16_rvv(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2
ptrdiff_t stride, int h);
int ff_pix_abs8_rvv(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_pix_abs16_x2_rvv(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_pix_abs8_x2_rvv(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
ptrdiff_t stride, int h);
av_cold void ff_me_cmp_init_riscv(MECmpContext *c, AVCodecContext *avctx)
{
@ -41,6 +45,8 @@ av_cold void ff_me_cmp_init_riscv(MECmpContext *c, AVCodecContext *avctx)
c->sad[0] = ff_pix_abs16_rvv;
c->pix_abs[1][0] = ff_pix_abs8_rvv;
c->sad[1] = ff_pix_abs8_rvv;
c->pix_abs[0][1] = ff_pix_abs16_x2_rvv;
c->pix_abs[1][1] = ff_pix_abs8_x2_rvv;
}
#endif
}

View File

@ -65,3 +65,53 @@ func ff_pix_abs8_rvv, zve32x
pix_abs_ret
endfunc
func ff_pix_abs16_x2_rvv, zve32x
csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma
li t5, 1
vmv.s.x v0, zero
1:
vsetivli zero, 17, e8, m2, tu, ma
vle8.v v12, (a2)
addi a4, a4, -1
vslide1down.vx v24, v12, t5
vsetivli zero, 16, e8, m1, tu, ma
vle8.v v4, (a1)
vaaddu.vv v12, v12, v24
vwsubu.vv v16, v4, v12
add a1, a1, a3
vwsubu.vv v20, v12, v4
vsetvli zero, zero, e16, m2, tu, ma
vmax.vv v16, v16, v20
add a2, a2, a3
vwredsum.vs v0, v16, v0
bnez a4, 1b
pix_abs_ret
endfunc
func ff_pix_abs8_x2_rvv, zve32x
csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma
li t5, 1
vmv.s.x v0, zero
1:
vsetivli zero, 9, e8, m1, tu, ma
vle8.v v12, (a2)
addi a4, a4, -1
vslide1down.vx v24, v12, t5
vsetivli zero, 8, e8, mf2, tu, ma
vle8.v v4, (a1)
vaaddu.vv v12, v12, v24
vwsubu.vv v16, v4, v12
add a1, a1, a3
vwsubu.vv v20, v12, v4
vsetvli zero, zero, e16, m1, tu, ma
vmax.vv v16, v16, v20
add a2, a2, a3
vwredsum.vs v0, v16, v0
bnez a4, 1b
pix_abs_ret
endfunc