mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
lavc/startcode: add R-V Zbb startcode_find_candidate
The main loop processes 8 bytes in 5 instructions. For comparison, the optimal plain strnlen() requires 4 instructions per byte (6.4x worse): LBU; ADDI; BEQZ; BNE. The current libavcodec C code involves 5 instructions per byte (8x worse). Actual benchmarks may be slightly less favourable due to latency from ORC.B to BNE.
This commit is contained in:
parent
8b8b555de0
commit
4ad5b9c8db
@ -158,6 +158,8 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
|
||||
ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc);
|
||||
#elif ARCH_PPC
|
||||
ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc);
|
||||
#elif ARCH_RISCV
|
||||
ff_h264dsp_init_riscv(c, bit_depth, chroma_format_idc);
|
||||
#elif ARCH_X86
|
||||
ff_h264dsp_init_x86(c, bit_depth, chroma_format_idc);
|
||||
#elif ARCH_MIPS
|
||||
|
@ -125,6 +125,8 @@ void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth,
|
||||
const int chroma_format_idc);
|
||||
void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth,
|
||||
const int chroma_format_idc);
|
||||
void ff_h264dsp_init_riscv(H264DSPContext *c, const int bit_depth,
|
||||
const int chroma_format_idc);
|
||||
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
||||
const int chroma_format_idc);
|
||||
void ff_h264dsp_init_mips(H264DSPContext *c, const int bit_depth,
|
||||
|
@ -28,6 +28,7 @@ OBJS-$(CONFIG_JPEG2000_DECODER) += riscv/jpeg2000dsp_init.o
|
||||
RVV-OBJS-$(CONFIG_JPEG2000_DECODER) += riscv/jpeg2000dsp_rvv.o
|
||||
OBJS-$(CONFIG_H264CHROMA) += riscv/h264_chroma_init_riscv.o
|
||||
RVV-OBJS-$(CONFIG_H264CHROMA) += riscv/h264_mc_chroma.o
|
||||
OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_init.o
|
||||
OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_init.o
|
||||
RVV-OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_rvv.o
|
||||
OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o
|
||||
@ -51,6 +52,7 @@ OBJS-$(CONFIG_RV34DSP) += riscv/rv34dsp_init.o
|
||||
RVV-OBJS-$(CONFIG_RV34DSP) += riscv/rv34dsp_rvv.o
|
||||
OBJS-$(CONFIG_RV40_DECODER) += riscv/rv40dsp_init.o
|
||||
RVV-OBJS-$(CONFIG_RV40_DECODER) += riscv/rv40dsp_rvv.o
|
||||
RV-OBJS-$(CONFIG_STARTCODE) += riscv/startcode_rvb.o
|
||||
OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_init.o
|
||||
RVV-OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_rvv.o
|
||||
OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_init.o
|
||||
|
40
libavcodec/riscv/h264dsp_init.c
Normal file
40
libavcodec/riscv/h264dsp_init.c
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright © 2024 Rémi Denis-Courmont.
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavcodec/h264dsp.h"
|
||||
|
||||
extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
|
||||
|
||||
av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
|
||||
const int chroma_format_idc)
|
||||
{
|
||||
#if HAVE_RV
|
||||
int flags = av_get_cpu_flags();
|
||||
|
||||
if (flags & AV_CPU_FLAG_RVB_BASIC)
|
||||
dsp->startcode_find_candidate = ff_startcode_find_candidate_rvb;
|
||||
#endif
|
||||
}
|
83
libavcodec/riscv/startcode_rvb.S
Normal file
83
libavcodec/riscv/startcode_rvb.S
Normal file
@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright © 2024 Rémi Denis-Courmont.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "libavutil/riscv/asm.S"
|
||||
|
||||
.macro lx rd, addr
|
||||
#if (__riscv_xlen == 32)
|
||||
lw \rd, \addr
|
||||
#elif (__riscv_xlen == 64)
|
||||
ld \rd, \addr
|
||||
#else
|
||||
lq \rd, \addr
|
||||
#endif
|
||||
.endm
|
||||
|
||||
func ff_startcode_find_candidate_rvb, zbb
|
||||
add a1, a0, a1
|
||||
|
||||
// Potentially unaligned head
|
||||
andi t0, a0, -(__riscv_xlen / 8)
|
||||
beq a0, a1, 2f
|
||||
|
||||
andi t1, a0, (__riscv_xlen / 8) - 1
|
||||
lx t2, (t0)
|
||||
li t3, __riscv_xlen
|
||||
orc.b t2, t2
|
||||
slli t1, t1, 3
|
||||
not t2, t2
|
||||
sub t3, t3, t1
|
||||
srl t2, t2, t1
|
||||
addi t0, t0, __riscv_xlen / 8
|
||||
sll t2, t2, t1
|
||||
bnez t2, 4f
|
||||
|
||||
// Main loop (including potentially short tail)
|
||||
bge t0, a1, 2f
|
||||
li t3, -1
|
||||
1:
|
||||
lx t2, (t0)
|
||||
addi t0, t0, __riscv_xlen / 8
|
||||
orc.b t2, t2
|
||||
bne t2, t3, 3f // t2 != -1 iff (at least one) zero byte
|
||||
blt t0, a1, 1b
|
||||
|
||||
2: // No zero byte found
|
||||
sub a0, a1, a0
|
||||
ret
|
||||
|
||||
3: // Zero byte found in main loop
|
||||
not t2, t2
|
||||
4: // Zero byte found in head
|
||||
ctz t2, t2
|
||||
addi t0, t0, -(__riscv_xlen / 8) // back-track
|
||||
srl t2, t2, 3
|
||||
add t0, t0, t2
|
||||
// Uncomment the following line for exact POSIX C strnlen() semantics.
|
||||
//minu t0, t0, a1 // ignore zero byte in tail
|
||||
sub a0, t0, a0
|
||||
ret
|
||||
endfunc
|
@ -33,6 +33,7 @@ void ff_put_pixels16x16_rvi(uint8_t *dst, const uint8_t *src, ptrdiff_t line_siz
|
||||
void ff_put_pixels8x8_rvi(uint8_t *dst, const uint8_t *src, ptrdiff_t line_size, int rnd);
|
||||
void ff_avg_pixels16x16_rvv(uint8_t *dst, const uint8_t *src, ptrdiff_t line_size, int rnd);
|
||||
void ff_avg_pixels8x8_rvv(uint8_t *dst, const uint8_t *src, ptrdiff_t line_size, int rnd);
|
||||
int ff_startcode_find_candidate_rvb(const uint8_t *, int);
|
||||
|
||||
av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp)
|
||||
{
|
||||
@ -45,7 +46,9 @@ av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp)
|
||||
dsp->put_vc1_mspel_pixels_tab[0][0] = ff_put_pixels16x16_rvi;
|
||||
}
|
||||
# endif
|
||||
#if HAVE_RVV
|
||||
if (flags & AV_CPU_FLAG_RVB_BASIC)
|
||||
dsp->startcode_find_candidate = ff_startcode_find_candidate_rvb;
|
||||
# if HAVE_RVV
|
||||
if (flags & AV_CPU_FLAG_RVV_I32 && ff_rv_vlen_least(128)) {
|
||||
dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
|
||||
dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;
|
||||
@ -56,6 +59,6 @@ av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp)
|
||||
dsp->avg_vc1_mspel_pixels_tab[1][0] = ff_avg_pixels8x8_rvv;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user