You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-04 22:03:09 +02:00
wasm/hevc: Add sao_band_filter
hevc_sao_band_8_8_c: 63.0 ( 1.00x) hevc_sao_band_8_8_simd128: 10.4 ( 6.06x) hevc_sao_band_16_8_c: 230.4 ( 1.00x) hevc_sao_band_16_8_simd128: 22.9 (10.07x) hevc_sao_band_32_8_c: 900.4 ( 1.00x) hevc_sao_band_32_8_simd128: 81.5 (11.05x) hevc_sao_band_48_8_c: 2009.1 ( 1.00x) hevc_sao_band_48_8_simd128: 170.2 (11.80x) hevc_sao_band_64_8_c: 3535.0 ( 1.00x) hevc_sao_band_64_8_simd128: 297.5 (11.88x) Signed-off-by: Zhao Zhili <zhilizhao@tencent.com>
This commit is contained in:
@ -1,3 +1,4 @@
|
|||||||
OBJS-$(CONFIG_HEVC_DECODER) += wasm/hevc/dsp_init.o
|
OBJS-$(CONFIG_HEVC_DECODER) += wasm/hevc/dsp_init.o
|
||||||
|
|
||||||
SIMD128-OBJS-$(CONFIG_HEVC_DECODER) += wasm/hevc/idct.o
|
SIMD128-OBJS-$(CONFIG_HEVC_DECODER) += wasm/hevc/idct.o \
|
||||||
|
wasm/hevc/sao.o
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include "libavutil/cpu_internal.h"
|
#include "libavutil/cpu_internal.h"
|
||||||
#include "libavcodec/hevc/dsp.h"
|
#include "libavcodec/hevc/dsp.h"
|
||||||
#include "libavcodec/wasm/hevc/idct.h"
|
#include "libavcodec/wasm/hevc/idct.h"
|
||||||
|
#include "libavcodec/wasm/hevc/sao.h"
|
||||||
|
|
||||||
av_cold void ff_hevc_dsp_init_wasm(HEVCDSPContext *c, const int bit_depth)
|
av_cold void ff_hevc_dsp_init_wasm(HEVCDSPContext *c, const int bit_depth)
|
||||||
{
|
{
|
||||||
@ -35,6 +36,12 @@ av_cold void ff_hevc_dsp_init_wasm(HEVCDSPContext *c, const int bit_depth)
|
|||||||
c->idct[1] = ff_hevc_idct_8x8_8_simd128;
|
c->idct[1] = ff_hevc_idct_8x8_8_simd128;
|
||||||
c->idct[2] = ff_hevc_idct_16x16_8_simd128;
|
c->idct[2] = ff_hevc_idct_16x16_8_simd128;
|
||||||
c->idct[3] = ff_hevc_idct_32x32_8_simd128;
|
c->idct[3] = ff_hevc_idct_32x32_8_simd128;
|
||||||
|
|
||||||
|
c->sao_band_filter[0] = ff_hevc_sao_band_filter_8x8_8_simd128;
|
||||||
|
c->sao_band_filter[1] =
|
||||||
|
c->sao_band_filter[2] =
|
||||||
|
c->sao_band_filter[3] =
|
||||||
|
c->sao_band_filter[4] = ff_hevc_sao_band_filter_16x16_8_simd128;
|
||||||
} else if (bit_depth == 10) {
|
} else if (bit_depth == 10) {
|
||||||
c->idct[0] = ff_hevc_idct_4x4_10_simd128;
|
c->idct[0] = ff_hevc_idct_4x4_10_simd128;
|
||||||
c->idct[1] = ff_hevc_idct_8x8_10_simd128;
|
c->idct[1] = ff_hevc_idct_8x8_10_simd128;
|
||||||
|
114
libavcodec/wasm/hevc/sao.c
Normal file
114
libavcodec/wasm/hevc/sao.c
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Zhao Zhili
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "sao.h"
|
||||||
|
|
||||||
|
#include <wasm_simd128.h>
|
||||||
|
|
||||||
|
void ff_hevc_sao_band_filter_8x8_8_simd128(uint8_t *dst, const uint8_t *src,
|
||||||
|
ptrdiff_t stride_dst,
|
||||||
|
ptrdiff_t stride_src,
|
||||||
|
const int16_t *sao_offset_val,
|
||||||
|
int sao_left_class, int width,
|
||||||
|
int height)
|
||||||
|
{
|
||||||
|
int8_t offset_table[32] = {0};
|
||||||
|
v128_t offset_low, offset_high;
|
||||||
|
|
||||||
|
for (int k = 0; k < 4; k++)
|
||||||
|
offset_table[(k + sao_left_class) & 31] = (int8_t)sao_offset_val[k + 1];
|
||||||
|
|
||||||
|
offset_low = wasm_v128_load(offset_table);
|
||||||
|
offset_high = wasm_v128_load(&offset_table[16]);
|
||||||
|
|
||||||
|
for (int y = height; y > 0; y -= 2) {
|
||||||
|
v128_t src_v, src_high;
|
||||||
|
v128_t v0, v1;
|
||||||
|
|
||||||
|
src_v = wasm_v128_load64_zero(src);
|
||||||
|
src += stride_src;
|
||||||
|
src_v = wasm_v128_load64_lane(src, src_v, 1);
|
||||||
|
src += stride_src;
|
||||||
|
|
||||||
|
v0 = wasm_u8x16_shr(src_v, 3);
|
||||||
|
v1 = wasm_i8x16_sub(v0, wasm_i8x16_const_splat(16));
|
||||||
|
v0 = wasm_i8x16_swizzle(offset_low, v0);
|
||||||
|
v1 = wasm_i8x16_swizzle(offset_high, v1);
|
||||||
|
v0 = wasm_v128_or(v0, v1);
|
||||||
|
src_high = wasm_u16x8_extend_high_u8x16(src_v);
|
||||||
|
v1 = wasm_i16x8_extend_high_i8x16(v0);
|
||||||
|
src_v = wasm_u16x8_extend_low_u8x16(src_v);
|
||||||
|
v0 = wasm_i16x8_extend_low_i8x16(v0);
|
||||||
|
|
||||||
|
v0 = wasm_i16x8_add_sat(src_v, v0);
|
||||||
|
v1 = wasm_i16x8_add_sat(src_high, v1);
|
||||||
|
v0 = wasm_u8x16_narrow_i16x8(v0, v1);
|
||||||
|
|
||||||
|
wasm_v128_store64_lane(dst, v0, 0);
|
||||||
|
dst += stride_dst;
|
||||||
|
wasm_v128_store64_lane(dst, v0, 1);
|
||||||
|
dst += stride_dst;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ff_hevc_sao_band_filter_16x16_8_simd128(uint8_t *dst, const uint8_t *src,
|
||||||
|
ptrdiff_t stride_dst,
|
||||||
|
ptrdiff_t stride_src,
|
||||||
|
const int16_t *sao_offset_val,
|
||||||
|
int sao_left_class, int width,
|
||||||
|
int height)
|
||||||
|
{
|
||||||
|
int8_t offset_table[32] = {0};
|
||||||
|
v128_t offset_low, offset_high;
|
||||||
|
|
||||||
|
for (int k = 0; k < 4; k++)
|
||||||
|
offset_table[(k + sao_left_class) & 31] = (int8_t)sao_offset_val[k + 1];
|
||||||
|
|
||||||
|
offset_low = wasm_v128_load(offset_table);
|
||||||
|
offset_high = wasm_v128_load(&offset_table[16]);
|
||||||
|
|
||||||
|
for (int y = height; y > 0; y--) {
|
||||||
|
for (int x = 0; x < width; x += 16) {
|
||||||
|
v128_t src_v, src_high;
|
||||||
|
v128_t v0, v1;
|
||||||
|
|
||||||
|
src_v = wasm_v128_load(&src[x]);
|
||||||
|
|
||||||
|
v0 = wasm_u8x16_shr(src_v, 3);
|
||||||
|
v1 = wasm_i8x16_sub(v0, wasm_i8x16_const_splat(16));
|
||||||
|
v0 = wasm_i8x16_swizzle(offset_low, v0);
|
||||||
|
v1 = wasm_i8x16_swizzle(offset_high, v1);
|
||||||
|
v0 = wasm_v128_or(v0, v1);
|
||||||
|
src_high = wasm_u16x8_extend_high_u8x16(src_v);
|
||||||
|
v1 = wasm_i16x8_extend_high_i8x16(v0);
|
||||||
|
src_v = wasm_u16x8_extend_low_u8x16(src_v);
|
||||||
|
v0 = wasm_i16x8_extend_low_i8x16(v0);
|
||||||
|
|
||||||
|
v0 = wasm_i16x8_add_sat(src_v, v0);
|
||||||
|
v1 = wasm_i16x8_add_sat(src_high, v1);
|
||||||
|
v0 = wasm_u8x16_narrow_i16x8(v0, v1);
|
||||||
|
wasm_v128_store(&dst[x], v0);
|
||||||
|
}
|
||||||
|
|
||||||
|
dst += stride_dst;
|
||||||
|
src += stride_src;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
42
libavcodec/wasm/hevc/sao.h
Normal file
42
libavcodec/wasm/hevc/sao.h
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Zhao Zhili
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVCODEC_WASM_HEVC_SAO_H
|
||||||
|
#define AVCODEC_WASM_HEVC_SAO_H
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
void ff_hevc_sao_band_filter_8x8_8_simd128(uint8_t *_dst, const uint8_t *_src,
|
||||||
|
ptrdiff_t _stride_dst,
|
||||||
|
ptrdiff_t _stride_src,
|
||||||
|
const int16_t *sao_offset_val,
|
||||||
|
int sao_left_class, int width,
|
||||||
|
int height);
|
||||||
|
|
||||||
|
void ff_hevc_sao_band_filter_16x16_8_simd128(uint8_t *_dst, const uint8_t *_src,
|
||||||
|
ptrdiff_t _stride_dst,
|
||||||
|
ptrdiff_t _stride_src,
|
||||||
|
const int16_t *sao_offset_val,
|
||||||
|
int sao_left_class, int width,
|
||||||
|
int height);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
Reference in New Issue
Block a user