1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-12 19:18:44 +02:00
FFmpeg/libavcodec/riscv/vc1dsp_init.c
Rémi Denis-Courmont d452db8410 lavc/vc1dsp: R-V V vc1_unescape_buffer
Notes:
- The loop is biased toward no unescaped bytes as that should be most common.
- The input byte array is slid rather than the (8 times smaller) bit-mask,
  as RISC-V V does not provide a bit-mask (or bit-wise) slide instruction.
- There are two comparisons with 0 per iteration, for the same reason.
- In case of match, bytes are copied until the first match, and the loop is
  restarted after the escape byte. Vector compression (vcompress.vm) could
  discard all escape bytes but that is slower if escape bytes are rare.

Further optimisations should be possible, e.g.:
- processing 2 bytes fewer per iteration to get rid of a 2 slides,
- taking a short cut if the input vector contains less than 2 zeroes.
But this is a good starting point:

T-Head C908:
vc1dsp.vc1_unescape_buffer_c:      12749.5
vc1dsp.vc1_unescape_buffer_rvv_i32: 6009.0

SpacemiT X60:
vc1dsp.vc1_unescape_buffer_c:      11038.0
vc1dsp.vc1_unescape_buffer_rvv_i32: 2061.0
2024-05-21 21:16:30 +03:00

71 lines
3.0 KiB
C

/*
* Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/riscv/cpu.h"
#include "libavcodec/vc1.h"
void ff_vc1_inv_trans_8x8_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t *block);
void ff_vc1_inv_trans_4x8_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t *block);
void ff_vc1_inv_trans_8x4_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t *block);
void ff_vc1_inv_trans_4x4_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t *block);
void ff_put_pixels16x16_rvi(uint8_t *dst, const uint8_t *src, ptrdiff_t line_size, int rnd);
void ff_put_pixels8x8_rvi(uint8_t *dst, const uint8_t *src, ptrdiff_t line_size, int rnd);
void ff_avg_pixels16x16_rvv(uint8_t *dst, const uint8_t *src, ptrdiff_t line_size, int rnd);
void ff_avg_pixels8x8_rvv(uint8_t *dst, const uint8_t *src, ptrdiff_t line_size, int rnd);
int ff_startcode_find_candidate_rvb(const uint8_t *, int);
int ff_startcode_find_candidate_rvv(const uint8_t *, int);
int ff_vc1_unescape_buffer_rvv(const uint8_t *, int, uint8_t *);
av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp)
{
#if HAVE_RV
int flags = av_get_cpu_flags();
# if __riscv_xlen >= 64
if (flags & AV_CPU_FLAG_RVI) {
dsp->put_vc1_mspel_pixels_tab[1][0] = ff_put_pixels8x8_rvi;
dsp->put_vc1_mspel_pixels_tab[0][0] = ff_put_pixels16x16_rvi;
}
# endif
if (flags & AV_CPU_FLAG_RVB_BASIC)
dsp->startcode_find_candidate = ff_startcode_find_candidate_rvb;
# if HAVE_RVV
if (flags & AV_CPU_FLAG_RVV_I32) {
if (ff_rv_vlen_least(128)) {
dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;
dsp->avg_vc1_mspel_pixels_tab[0][0] = ff_avg_pixels16x16_rvv;
if (flags & AV_CPU_FLAG_RVV_I64) {
dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv;
dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv;
dsp->avg_vc1_mspel_pixels_tab[1][0] = ff_avg_pixels8x8_rvv;
}
}
dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
dsp->vc1_unescape_buffer = ff_vc1_unescape_buffer_rvv;
}
# endif
#endif
}