1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-24 13:56:33 +02:00

lavc/h264dsp: factor some mostly identical R-V V code

This commit is contained in:
Rémi Denis-Courmont 2024-07-16 22:45:14 +03:00
parent c4c811b3d9
commit d15169c51f

View File

@ -418,8 +418,8 @@ const ff_h264_scan8
endconst
#if (__riscv_xlen == 64)
.irp depth, 8, 16
func ff_h264_idct_add16_\depth\()_rvv, zve32x
.macro idct4_adds type, depth
func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x
csrwi vxrm, 0
addi sp, sp, -96
lla t0, ff_h264_scan8
@ -455,9 +455,13 @@ func ff_h264_idct_add16_\depth\()_rvv, zve32x
.endif
vmsne.vi v1, v16, 0
vsetvli zero, zero, e8, m1, ta, ma
.ifc \type, 16
vmseq.vi v2, v12, 1
.endif
vmsne.vi v0, v12, 0
.ifc \type, 16
vmand.mm v1, v1, v2
.endif
vsetvli zero, zero, e16, m2, ta, ma
vmv.x.s s2, v0
vmv.x.s s3, v1
@ -470,7 +474,9 @@ func ff_h264_idct_add16_\depth\()_rvv, zve32x
andi t0, s2, 1
addi s1, s1, -1
srli s2, s2, 1
.ifc \type, 16
beqz t0, 3f # if (nnz)
.endif
lw t2, (s5) # block_offset[i]
andi t1, s3, 1
mv a1, s6
@ -479,100 +485,17 @@ func ff_h264_idct_add16_\depth\()_rvv, zve32x
.if \depth > 8
mv a5, s8
.endif
bnez t1, 2f # if (nnz == 1 && block[i * 16])
jal .Lidct_add4_\depth\()_rvv
j 3f
2:
.if \depth == 8
call ff_h264_idct_dc_add_\depth\()_c
.ifc \type, 16
bnez t1, 2f # if (nnz == 1 && block[i * 16])
.else
jalr s9
.endif
3:
srli s3, s3, 1
addi s5, s5, 4
addi s6, s6, 16 * 2 * (\depth / 8)
bnez s1, 1b
.if \depth > 8
ld s9, 80(sp)
ld s8, 72(sp)
.endif
ld s7, 64(sp)
ld s6, 56(sp)
ld s5, 48(sp)
ld s4, 40(sp)
ld s3, 32(sp)
ld s2, 24(sp)
ld s1, 16(sp)
ld ra, 8(sp)
ld s0, 0(sp)
addi sp, sp, 96
ret
endfunc
func ff_h264_idct_add16intra_\depth\()_rvv, zve32x
csrwi vxrm, 0
addi sp, sp, -96
lla t0, ff_h264_scan8
sd s0, (sp)
li t1, 32 * (\depth / 8)
mv s0, sp
sd ra, 8(sp)
sd s1, 16(sp)
sd s2, 24(sp)
sd s3, 32(sp)
sd s4, 40(sp)
sd s5, 48(sp)
sd s6, 56(sp)
sd s7, 64(sp)
.if \depth > 8
sd s8, 72(sp)
sd s9, 80(sp)
mv s8, a5
mv s9, a6
.endif
vsetivli zero, 16, e8, m1, ta, ma
vle8.v v8, (t0)
.if \depth == 8
vlse16.v v16, (a2), t1
.else
vlse32.v v16, (a2), t1
.endif
vluxei8.v v12, (a4), v8
.if \depth == 8
vsetvli zero, zero, e16, m2, ta, ma
.else
vsetvli zero, zero, e32, m4, ta, ma
.endif
vmsne.vi v1, v16, 0
vsetvli zero, zero, e8, m1, ta, ma
vmsne.vi v0, v12, 0
vsetvli zero, zero, e16, m2, ta, ma
vmv.x.s s2, v0
vmv.x.s s3, v1
li s1, 16
mv s4, a0
mv s5, a1
mv s6, a2
mv s7, a3
1:
andi t0, s2, 1
addi s1, s1, -1
srli s2, s2, 1
lw t2, (s5) # block_offset[i]
andi t1, s3, 1
mv a1, s6
mv a2, s7
add a0, s4, t2
.if \depth > 8
mv a5, s8
.endif
beqz t0, 2f # if (nnzc[scan8[i]])
.endif
jal .Lidct_add4_\depth\()_rvv
j 3f
2:
.ifnc \type, 16
beqz t1, 3f # if (block[i * 16])
.endif
.if \depth == 8
call ff_h264_idct_dc_add_\depth\()_c
.else
@ -600,6 +523,11 @@ func ff_h264_idct_add16intra_\depth\()_rvv, zve32x
addi sp, sp, 96
ret
endfunc
.endm
.irp depth, 8, 16
idct4_adds 16, \depth
idct4_adds 16intra, \depth
func ff_h264_idct8_add4_\depth\()_rvv, zve32x
csrwi vxrm, 0