1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-11-21 10:55:51 +02:00

lavc/me_cmp: R-V V vsse vsad

C908:
vsad_0_c: 936.0
vsad_0_rvv_i32: 236.2
vsad_1_c: 424.0
vsad_1_rvv_i32: 190.2
vsse_0_c: 877.0
vsse_0_rvv_i32: 204.2
vsse_1_c: 439.0
vsse_1_rvv_i32: 140.2

Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
This commit is contained in:
sunyuechi 2024-02-06 23:18:51 +08:00 committed by Rémi Denis-Courmont
parent 2e4e424ac2
commit 925b55a5e8
2 changed files with 108 additions and 0 deletions

View File

@ -46,6 +46,11 @@ int ff_sse8_rvv(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
int ff_sse4_rvv(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_vsse16_rvv(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h);
int ff_vsse8_rvv(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h);
int ff_vsad16_rvv(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h);
int ff_vsad8_rvv(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h);
av_cold void ff_me_cmp_init_riscv(MECmpContext *c, AVCodecContext *avctx)
{
#if HAVE_RVV
@ -64,6 +69,11 @@ av_cold void ff_me_cmp_init_riscv(MECmpContext *c, AVCodecContext *avctx)
c->sse[0] = ff_sse16_rvv;
c->sse[1] = ff_sse8_rvv;
c->sse[2] = ff_sse4_rvv;
c->vsse[0] = ff_vsse16_rvv;
c->vsse[1] = ff_vsse8_rvv;
c->vsad[0] = ff_vsad16_rvv;
c->vsad[1] = ff_vsad8_rvv;
}
#endif
}

View File

@ -231,3 +231,101 @@ func ff_sse4_rvv, zve32x
vmv.x.s a0, v0
ret
endfunc
.macro vabsaddu dst src tmp
vneg.v \tmp, \src
vmax.vv \tmp, \src, \tmp
vwaddu.wv \dst, \dst, \tmp
.endm
.macro vsad_vsse16 type
vsetivli t0, 16, e32, m4, ta, ma
addi a4, a4, -1
add t1, a1, a3
add t2, a2, a3
vmv.v.x v24, zero
vmv.s.x v0, zero
1:
vsetvli zero, zero, e8, m1, tu, ma
vle8.v v4, (a1)
vle8.v v8, (t1)
vle8.v v12, (a2)
vle8.v v16, (t2)
addi a4, a4, -1
vwsubu.vv v28, v4, v12
vwsubu.wv v12, v28, v8
vwaddu.wv v28, v12, v16
vsetvli zero, zero, e16, m2, tu, ma
.ifc \type,abs
vabsaddu v24, v28, v12
.endif
.ifc \type,square
vwmacc.vv v24, v28, v28
.endif
add a1, a1, a3
add a2, a2, a3
add t1, t1, a3
add t2, t2, a3
bnez a4, 1b
vsetvli zero, zero, e32, m4, tu, ma
vredsum.vs v0, v24, v0
vmv.x.s a0, v0
ret
.endm
.macro vsad_vsse8 type
vsetivli t0, 8, e32, m2, ta, ma
addi a4, a4, -1
add t1, a1, a3
add t2, a2, a3
vmv.v.x v24, zero
vmv.s.x v0, zero
1:
vsetvli zero, zero, e8, mf2, tu, ma
vle8.v v4, (a1)
vle8.v v8, (t1)
vle8.v v12, (a2)
vle8.v v16, (t2)
addi a4, a4, -1
vwsubu.vv v28, v4, v12
vwsubu.wv v12, v28, v8
vwaddu.wv v28, v12, v16
vsetvli zero, zero, e16, m1, tu, ma
.ifc \type,abs
vabsaddu v24, v28, v12
.endif
.ifc \type,square
vwmacc.vv v24, v28, v28
.endif
add a1, a1, a3
add a2, a2, a3
add t1, t1, a3
add t2, t2, a3
bnez a4, 1b
vsetvli zero, zero, e32, m2, tu, ma
vredsum.vs v0, v24, v0
vmv.x.s a0, v0
ret
.endm
func ff_vsse16_rvv, zve32x
vsad_vsse16 square
endfunc
func ff_vsse8_rvv, zve32x
vsad_vsse8 square
endfunc
func ff_vsad16_rvv, zve32x
vsad_vsse16 abs
endfunc
func ff_vsad8_rvv, zve32x
vsad_vsse8 abs
endfunc