mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-03 05:10:03 +02:00
ba7d0d5fc3
C908 X60 avg_8_2x2_c : 1.2 1.0 avg_8_2x2_rvv_i32 : 0.7 0.7 avg_8_2x4_c : 2.0 2.2 avg_8_2x4_rvv_i32 : 1.2 1.2 avg_8_2x8_c : 3.7 4.0 avg_8_2x8_rvv_i32 : 1.7 1.5 avg_8_2x16_c : 7.2 7.7 avg_8_2x16_rvv_i32 : 3.0 2.7 avg_8_2x32_c : 14.2 15.2 avg_8_2x32_rvv_i32 : 5.5 5.0 avg_8_2x64_c : 51.0 43.7 avg_8_2x64_rvv_i32 : 39.2 29.7 avg_8_2x128_c : 100.5 79.2 avg_8_2x128_rvv_i32 : 79.7 68.2 avg_8_4x2_c : 1.7 2.0 avg_8_4x2_rvv_i32 : 1.0 0.7 avg_8_4x4_c : 3.5 3.7 avg_8_4x4_rvv_i32 : 1.2 1.2 avg_8_4x8_c : 6.7 7.0 avg_8_4x8_rvv_i32 : 1.7 1.5 avg_8_4x16_c : 13.5 14.0 avg_8_4x16_rvv_i32 : 3.0 2.7 avg_8_4x32_c : 26.2 27.7 avg_8_4x32_rvv_i32 : 5.5 4.7 avg_8_4x64_c : 73.0 73.7 avg_8_4x64_rvv_i32 : 39.0 32.5 avg_8_4x128_c : 143.0 137.2 avg_8_4x128_rvv_i32 : 72.7 68.0 avg_8_8x2_c : 3.5 3.5 avg_8_8x2_rvv_i32 : 1.0 0.7 avg_8_8x4_c : 6.2 6.5 avg_8_8x4_rvv_i32 : 1.5 1.0 avg_8_8x8_c : 12.7 13.2 avg_8_8x8_rvv_i32 : 2.0 1.5 avg_8_8x16_c : 25.0 26.5 avg_8_8x16_rvv_i32 : 3.2 2.7 avg_8_8x32_c : 50.0 52.7 avg_8_8x32_rvv_i32 : 6.2 5.0 avg_8_8x64_c : 118.7 122.5 avg_8_8x64_rvv_i32 : 40.2 31.5 avg_8_8x128_c : 236.7 220.2 avg_8_8x128_rvv_i32 : 85.2 67.7 avg_8_16x2_c : 6.2 6.7 avg_8_16x2_rvv_i32 : 1.2 0.7 avg_8_16x4_c : 12.5 13.0 avg_8_16x4_rvv_i32 : 1.7 1.0 avg_8_16x8_c : 24.5 26.0 avg_8_16x8_rvv_i32 : 3.0 1.7 avg_8_16x16_c : 49.0 51.5 avg_8_16x16_rvv_i32 : 5.5 3.0 avg_8_16x32_c : 97.5 102.5 avg_8_16x32_rvv_i32 : 10.5 5.5 avg_8_16x64_c : 213.7 222.0 avg_8_16x64_rvv_i32 : 48.5 34.2 avg_8_16x128_c : 434.7 420.0 avg_8_16x128_rvv_i32 : 97.7 74.0 avg_8_32x2_c : 12.2 12.7 avg_8_32x2_rvv_i32 : 1.5 1.0 avg_8_32x4_c : 24.5 25.5 avg_8_32x4_rvv_i32 : 3.0 1.7 avg_8_32x8_c : 48.5 50.7 avg_8_32x8_rvv_i32 : 5.2 2.7 avg_8_32x16_c : 96.7 101.2 avg_8_32x16_rvv_i32 : 10.2 5.0 avg_8_32x32_c : 192.7 202.2 avg_8_32x32_rvv_i32 : 19.7 9.5 avg_8_32x64_c : 427.5 426.5 avg_8_32x64_rvv_i32 : 64.2 18.2 avg_8_32x128_c : 816.5 821.0 avg_8_32x128_rvv_i32 : 135.2 75.5 avg_8_64x2_c : 24.0 25.2 avg_8_64x2_rvv_i32 : 2.7 1.5 avg_8_64x4_c : 48.2 50.5 avg_8_64x4_rvv_i32 : 5.0 2.7 avg_8_64x8_c : 96.0 100.7 avg_8_64x8_rvv_i32 : 9.7 4.5 avg_8_64x16_c : 207.7 201.2 avg_8_64x16_rvv_i32 : 19.0 9.0 avg_8_64x32_c : 383.2 402.0 avg_8_64x32_rvv_i32 : 37.5 17.5 avg_8_64x64_c : 837.2 828.7 avg_8_64x64_rvv_i32 : 84.7 35.5 avg_8_64x128_c : 1640.7 1640.2 avg_8_64x128_rvv_i32 : 206.0 153.0 avg_8_128x2_c : 48.7 51.0 avg_8_128x2_rvv_i32 : 5.2 2.7 avg_8_128x4_c : 96.7 101.5 avg_8_128x4_rvv_i32 : 10.2 5.0 avg_8_128x8_c : 192.2 202.0 avg_8_128x8_rvv_i32 : 19.7 9.2 avg_8_128x16_c : 400.7 403.2 avg_8_128x16_rvv_i32 : 38.7 18.5 avg_8_128x32_c : 786.7 805.7 avg_8_128x32_rvv_i32 : 77.0 36.2 avg_8_128x64_c : 1615.5 1655.5 avg_8_128x64_rvv_i32 : 189.7 80.7 avg_8_128x128_c : 3182.0 3238.0 avg_8_128x128_rvv_i32 : 397.5 308.5 w_avg_8_2x2_c : 1.7 1.2 w_avg_8_2x2_rvv_i32 : 1.2 1.0 w_avg_8_2x4_c : 2.7 2.7 w_avg_8_2x4_rvv_i32 : 1.7 1.5 w_avg_8_2x8_c : 21.7 4.7 w_avg_8_2x8_rvv_i32 : 2.7 2.5 w_avg_8_2x16_c : 9.5 9.2 w_avg_8_2x16_rvv_i32 : 4.7 4.2 w_avg_8_2x32_c : 19.0 18.7 w_avg_8_2x32_rvv_i32 : 9.0 8.0 w_avg_8_2x64_c : 62.0 50.2 w_avg_8_2x64_rvv_i32 : 47.7 33.5 w_avg_8_2x128_c : 116.7 87.7 w_avg_8_2x128_rvv_i32 : 80.0 69.5 w_avg_8_4x2_c : 2.5 2.5 w_avg_8_4x2_rvv_i32 : 1.2 1.0 w_avg_8_4x4_c : 4.7 4.5 w_avg_8_4x4_rvv_i32 : 1.7 1.7 w_avg_8_4x8_c : 9.0 8.7 w_avg_8_4x8_rvv_i32 : 2.7 2.5 w_avg_8_4x16_c : 17.7 17.5 w_avg_8_4x16_rvv_i32 : 4.7 4.2 w_avg_8_4x32_c : 35.0 35.0 w_avg_8_4x32_rvv_i32 : 9.0 8.0 w_avg_8_4x64_c : 100.5 84.5 w_avg_8_4x64_rvv_i32 : 42.2 33.7 w_avg_8_4x128_c : 203.5 151.2 w_avg_8_4x128_rvv_i32 : 83.0 69.5 w_avg_8_8x2_c : 4.5 4.5 w_avg_8_8x2_rvv_i32 : 1.2 1.2 w_avg_8_8x4_c : 8.7 8.7 w_avg_8_8x4_rvv_i32 : 2.0 1.7 w_avg_8_8x8_c : 17.0 17.0 w_avg_8_8x8_rvv_i32 : 3.2 2.5 w_avg_8_8x16_c : 34.0 33.5 w_avg_8_8x16_rvv_i32 : 5.5 4.2 w_avg_8_8x32_c : 86.0 67.5 w_avg_8_8x32_rvv_i32 : 10.5 8.0 w_avg_8_8x64_c : 187.2 149.5 w_avg_8_8x64_rvv_i32 : 45.0 35.5 w_avg_8_8x128_c : 342.7 290.0 w_avg_8_8x128_rvv_i32 : 108.7 70.2 w_avg_8_16x2_c : 8.5 8.2 w_avg_8_16x2_rvv_i32 : 2.0 1.2 w_avg_8_16x4_c : 16.7 16.7 w_avg_8_16x4_rvv_i32 : 3.0 1.7 w_avg_8_16x8_c : 33.2 33.5 w_avg_8_16x8_rvv_i32 : 5.5 3.0 w_avg_8_16x16_c : 66.2 66.7 w_avg_8_16x16_rvv_i32 : 10.5 5.0 w_avg_8_16x32_c : 132.5 131.0 w_avg_8_16x32_rvv_i32 : 20.0 9.7 w_avg_8_16x64_c : 340.0 283.5 w_avg_8_16x64_rvv_i32 : 60.5 37.2 w_avg_8_16x128_c : 641.2 597.5 w_avg_8_16x128_rvv_i32 : 118.7 77.7 w_avg_8_32x2_c : 16.5 16.7 w_avg_8_32x2_rvv_i32 : 3.2 1.7 w_avg_8_32x4_c : 33.2 33.2 w_avg_8_32x4_rvv_i32 : 5.5 2.7 w_avg_8_32x8_c : 66.0 62.5 w_avg_8_32x8_rvv_i32 : 10.5 5.0 w_avg_8_32x16_c : 131.5 132.0 w_avg_8_32x16_rvv_i32 : 20.2 9.5 w_avg_8_32x32_c : 261.7 272.0 w_avg_8_32x32_rvv_i32 : 39.7 18.0 w_avg_8_32x64_c : 575.2 545.5 w_avg_8_32x64_rvv_i32 : 105.5 58.7 w_avg_8_32x128_c : 1154.2 1088.0 w_avg_8_32x128_rvv_i32 : 207.0 98.0 w_avg_8_64x2_c : 33.0 33.0 w_avg_8_64x2_rvv_i32 : 6.2 2.7 w_avg_8_64x4_c : 65.5 66.0 w_avg_8_64x4_rvv_i32 : 11.5 5.0 w_avg_8_64x8_c : 131.2 132.5 w_avg_8_64x8_rvv_i32 : 22.5 9.5 w_avg_8_64x16_c : 268.2 262.5 w_avg_8_64x16_rvv_i32 : 44.2 18.0 w_avg_8_64x32_c : 561.5 528.7 w_avg_8_64x32_rvv_i32 : 88.0 35.2 w_avg_8_64x64_c : 1136.2 1124.0 w_avg_8_64x64_rvv_i32 : 222.0 82.2 w_avg_8_64x128_c : 2345.0 2312.7 w_avg_8_64x128_rvv_i32 : 423.0 190.5 w_avg_8_128x2_c : 65.7 66.5 w_avg_8_128x2_rvv_i32 : 11.2 5.5 w_avg_8_128x4_c : 131.2 132.2 w_avg_8_128x4_rvv_i32 : 22.0 10.2 w_avg_8_128x8_c : 263.5 312.0 w_avg_8_128x8_rvv_i32 : 43.2 19.7 w_avg_8_128x16_c : 528.7 526.2 w_avg_8_128x16_rvv_i32 : 85.5 39.5 w_avg_8_128x32_c : 1067.7 1062.7 w_avg_8_128x32_rvv_i32 : 171.7 78.2 w_avg_8_128x64_c : 2234.7 2168.7 w_avg_8_128x64_rvv_i32 : 400.0 159.0 w_avg_8_128x128_c : 4752.5 4295.0 w_avg_8_128x128_rvv_i32 : 757.7 365.5 Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
186 lines
9.2 KiB
C
186 lines
9.2 KiB
C
/*
|
|
* VVC DSP
|
|
*
|
|
* Copyright (C) 2021 Nuo Mi
|
|
*
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#ifndef AVCODEC_VVC_DSP_H
|
|
#define AVCODEC_VVC_DSP_H
|
|
|
|
#include <stdint.h>
|
|
#include <stddef.h>
|
|
|
|
enum VVCTxType {
|
|
VVC_DCT2,
|
|
VVC_DST7,
|
|
VVC_DCT8,
|
|
VVC_N_TX_TYPE,
|
|
};
|
|
|
|
enum VVCTxSize {
|
|
VVC_TX_SIZE_2,
|
|
VVC_TX_SIZE_4,
|
|
VVC_TX_SIZE_8,
|
|
VVC_TX_SIZE_16,
|
|
VVC_TX_SIZE_32,
|
|
VVC_TX_SIZE_64,
|
|
VVC_N_TX_SIZE,
|
|
};
|
|
|
|
typedef struct VVCInterDSPContext {
|
|
void (*put[2 /* luma, chroma */][7 /* log2(width) - 1 */][2 /* int, frac */][2 /* int, frac */])(
|
|
int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height,
|
|
const int8_t *hf, const int8_t *vf, int width);
|
|
|
|
void (*put_uni[2 /* luma, chroma */][7 /* log2(width) - 1 */][2 /* int, frac */][2 /* int, frac */])(
|
|
uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, int height,
|
|
const int8_t *hf, const int8_t *vf, int width);
|
|
|
|
void (*put_uni_w[2 /* luma, chroma */][7 /* log2(width) - 1 */][2 /* int, frac */][2 /* int, frac */])(
|
|
uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, int height,
|
|
int denom, int wx, int ox, const int8_t *hf, const int8_t *vf, int width);
|
|
|
|
void (*put_scaled[2 /* luma, chroma */][7 /* log2(width) - 1 */])(
|
|
int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int src_height,
|
|
int x, int y, int dx, int dy, int height, const int8_t *hf, const int8_t *vf, int width);
|
|
|
|
void (*put_uni_scaled[2 /* luma, chroma */][7 /* log2(width) - 1 */])(
|
|
uint8_t *dst, const ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, int src_height,
|
|
int x, int y, int dx, int dy, int height, const int8_t *hf, const int8_t *vf, int width);
|
|
|
|
void (*put_uni_w_scaled[2 /* luma, chroma */][7 /* log2(width) - 1 */])(
|
|
uint8_t *dst, const ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, int src_height,
|
|
int x, int y, int dx, int dy, int height, int denom, int wx, int ox, const int8_t *hf, const int8_t *vf,
|
|
int width);
|
|
|
|
void (*avg)(uint8_t *dst, ptrdiff_t dst_stride,
|
|
const int16_t *src0, const int16_t *src1, int width, int height);
|
|
|
|
void (*w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
|
|
const int16_t *src0, const int16_t *src1, int width, int height,
|
|
int denom, int w0, int w1, int o0, int o1);
|
|
|
|
void (*put_ciip)(uint8_t *dst, ptrdiff_t dst_stride, int width, int height,
|
|
const uint8_t *inter, ptrdiff_t inter_stride, int inter_weight);
|
|
|
|
void (*put_gpm)(uint8_t *dst, ptrdiff_t dst_stride, int width, int height,
|
|
const int16_t *src0, const int16_t *src1,
|
|
const uint8_t *weights, int step_x, int step_y);
|
|
|
|
void (*fetch_samples)(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int x_frac, int y_frac);
|
|
void (*bdof_fetch_samples)(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int x_frac, int y_frac,
|
|
int width, int height);
|
|
|
|
void (*apply_prof)(int16_t *dst, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y);
|
|
|
|
void (*apply_prof_uni)(uint8_t *dst, ptrdiff_t dst_stride, const int16_t *src,
|
|
const int16_t *diff_mv_x, const int16_t *diff_mv_y);
|
|
void (*apply_prof_uni_w)(uint8_t *dst, const ptrdiff_t dst_stride, const int16_t *src,
|
|
const int16_t *diff_mv_x, const int16_t *diff_mv_y, int denom, int wx, int ox);
|
|
|
|
void (*apply_bdof)(uint8_t *dst, ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1, int block_w, int block_h);
|
|
|
|
int (*sad)(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h);
|
|
void (*dmvr[2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height,
|
|
intptr_t mx, intptr_t my, int width);
|
|
} VVCInterDSPContext;
|
|
|
|
struct VVCLocalContext;
|
|
|
|
typedef struct VVCIntraDSPContext {
|
|
void (*intra_cclm_pred)(const struct VVCLocalContext *lc, int x0, int y0, int w, int h);
|
|
void (*lmcs_scale_chroma)(struct VVCLocalContext *lc, int *dst, const int *coeff, int w, int h, int x0_cu, int y0_cu);
|
|
void (*intra_pred)(const struct VVCLocalContext *lc, int x0, int y0, int w, int h, int c_idx);
|
|
void (*pred_planar)(uint8_t *src, const uint8_t *top, const uint8_t *left, int w, int h, ptrdiff_t stride);
|
|
void (*pred_mip)(uint8_t *src, const uint8_t *top, const uint8_t *left, int w, int h, ptrdiff_t stride,
|
|
int mode_id, int is_transpose);
|
|
void (*pred_dc)(uint8_t *src, const uint8_t *top, const uint8_t *left, int w, int h, ptrdiff_t stride);
|
|
void (*pred_v)(uint8_t *src, const uint8_t *_top, int w, int h, ptrdiff_t stride);
|
|
void (*pred_h)(uint8_t *src, const uint8_t *_left, int w, int h, ptrdiff_t stride);
|
|
void (*pred_angular_v)(uint8_t *src, const uint8_t *_top, const uint8_t *_left,
|
|
int w, int h, ptrdiff_t stride, int c_idx, int mode, int ref_idx, int filter_flag, int need_pdpc);
|
|
void (*pred_angular_h)(uint8_t *src, const uint8_t *_top, const uint8_t *_left, int w, int h, ptrdiff_t stride,
|
|
int c_idx, int mode, int ref_idx, int filter_flag, int need_pdpc);
|
|
} VVCIntraDSPContext;
|
|
|
|
typedef struct VVCItxDSPContext {
|
|
void (*add_residual)(uint8_t *dst, const int *res, int width, int height, ptrdiff_t stride);
|
|
void (*add_residual_joint)(uint8_t *dst, const int *res, int width, int height, ptrdiff_t stride, int c_sign, int shift);
|
|
void (*pred_residual_joint)(int *buf, int width, int height, int c_sign, int shift);
|
|
|
|
void (*itx[VVC_N_TX_TYPE][VVC_N_TX_SIZE])(int *coeffs, ptrdiff_t step, size_t nz);
|
|
void (*transform_bdpcm)(int *coeffs, int width, int height, int vertical, int log2_transform_range);
|
|
} VVCItxDSPContext;
|
|
|
|
typedef struct VVCLMCSDSPContext {
|
|
void (*filter)(uint8_t *dst, ptrdiff_t dst_stride, int width, int height, const void *lut);
|
|
} VVCLMCSDSPContext;
|
|
|
|
typedef struct VVCLFDSPContext {
|
|
int (*ladf_level[2 /* h, v */])(const uint8_t *pix, ptrdiff_t stride);
|
|
|
|
void (*filter_luma[2 /* h, v */])(uint8_t *pix, ptrdiff_t stride, const int32_t *beta, const int32_t *tc,
|
|
const uint8_t *no_p, const uint8_t *no_q, const uint8_t *max_len_p, const uint8_t *max_len_q, int hor_ctu_edge);
|
|
void (*filter_chroma[2 /* h, v */])(uint8_t *pix, ptrdiff_t stride, const int32_t *beta, const int32_t *tc,
|
|
const uint8_t *no_p, const uint8_t *no_q, const uint8_t *max_len_p, const uint8_t *max_len_q, int shift);
|
|
} VVCLFDSPContext;
|
|
|
|
struct SAOParams;
|
|
typedef struct VVCSAODSPContext {
|
|
void (*band_filter[9])(uint8_t *dst, const uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride,
|
|
const int16_t *sao_offset_val, int sao_left_class, int width, int height);
|
|
/* implicit src_stride parameter has value of 2 * MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE */
|
|
void (*edge_filter[9])(uint8_t *dst /* align 16 */, const uint8_t *src /* align 32 */, ptrdiff_t dst_stride,
|
|
const int16_t *sao_offset_val, int sao_eo_class, int width, int height);
|
|
void (*edge_restore[2])(uint8_t *dst, const uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride,
|
|
const struct SAOParams *sao, const int *borders, int width, int height, int c_idx,
|
|
const uint8_t *vert_edge, const uint8_t *horiz_edge, const uint8_t *diag_edge);
|
|
} VVCSAODSPContext;
|
|
|
|
typedef struct VVCALFDSPContext {
|
|
void (*filter[2 /* luma, chroma */])(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride,
|
|
int width, int height, const int16_t *filter, const int16_t *clip, int vb_pos);
|
|
void (*filter_cc)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *luma, ptrdiff_t luma_stride,
|
|
int width, int height, int hs, int vs, const int16_t *filter, int vb_pos);
|
|
|
|
void (*classify)(int *class_idx, int *transpose_idx, const uint8_t *src, ptrdiff_t src_stride, int width, int height,
|
|
int vb_pos, int *gradient_tmp);
|
|
void (*recon_coeff_and_clip)(int16_t *coeff, int16_t *clip, const int *class_idx, const int *transpose_idx, int size,
|
|
const int16_t *coeff_set, const uint8_t *clip_idx_set, const uint8_t *class_to_filt);
|
|
} VVCALFDSPContext;
|
|
|
|
typedef struct VVCDSPContext {
|
|
VVCInterDSPContext inter;
|
|
VVCIntraDSPContext intra;
|
|
VVCItxDSPContext itx;
|
|
VVCLMCSDSPContext lmcs;
|
|
VVCLFDSPContext lf;
|
|
VVCSAODSPContext sao;
|
|
VVCALFDSPContext alf;
|
|
} VVCDSPContext;
|
|
|
|
void ff_vvc_dsp_init(VVCDSPContext *hpc, int bit_depth);
|
|
|
|
void ff_vvc_dsp_init_aarch64(VVCDSPContext *hpc, const int bit_depth);
|
|
void ff_vvc_dsp_init_riscv(VVCDSPContext *hpc, const int bit_depth);
|
|
void ff_vvc_dsp_init_x86(VVCDSPContext *hpc, const int bit_depth);
|
|
|
|
#endif /* AVCODEC_VVC_DSP_H */
|