1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-24 13:56:33 +02:00

swscale/rgb2rgb2: rework RISC-V V shuffle_bytes_{0321,2103}

This avoids strided loads.

Before:
shuffle_bytes_0321_rvv_i32: 307.7
shuffle_bytes_2103_rvv_i32: 308.7

After:
shuffle_bytes_0321_rvv_i32: 59.7
shuffle_bytes_2103_rvv_i32: 61.5
This commit is contained in:
Rémi Denis-Courmont 2023-07-18 21:39:59 +03:00
parent d3948e4db5
commit 15982554e6

View File

@ -21,36 +21,31 @@
#include "libavutil/riscv/asm.S"
func ff_shuffle_bytes_0321_rvv, zve32x
addi t1, a0, 3
addi t2, a0, 2
addi t3, a0, 1
1:
srai a2, a2, 2
li t4, 4
2:
vsetvli t0, a2, e8, m1, ta, ma
sub a2, a2, t0
vlse8.v v8, (a0), t4
sh2add a0, t0, a0
vlse8.v v9, (t1), t4
sh2add t1, t0, t1
vlse8.v v10, (t2), t4
sh2add t2, t0, t2
vlse8.v v11, (t3), t4
sh2add t3, t0, t3
vsseg4e8.v v8, (a1)
sh2add a1, t0, a1
bnez a2, 2b
ret
li t1, 0x00ff00ff
j 1f
endfunc
func ff_shuffle_bytes_2103_rvv, zve32x
addi t1, a0, 1
addi t2, a0, 0
addi t3, a0, 3
addi a0, a0, 2
j 1b
li t1, ~0x00ff00ff
1:
not t2, t1
srai a2, a2, 2
2:
vsetvli t0, a2, e32, m8, ta, ma
vle32.v v8, (a0)
sub a2, a2, t0
vand.vx v16, v8, t2
sh2add a0, t0, a0
vand.vx v8, v8, t1
vsrl.vi v24, v16, 16
vsll.vi v16, v16, 16
vor.vv v8, v8, v24
vor.vv v8, v16, v8
vse32.v v8, (a1)
sh2add a1, t0, a1
bnez a2, 2b
ret
endfunc
func ff_shuffle_bytes_1230_rvv, zve32x