1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-11-26 19:01:44 +02:00

checkasm: add vvc_bdof test

apply_bdof_8_8x16_c: 5776.5
apply_bdof_8_8x16_avx2: 396.2
apply_bdof_8_16x8_c: 5722.0
apply_bdof_8_16x8_avx2: 216.0
apply_bdof_8_16x16_c: 11213.2
apply_bdof_8_16x16_avx2: 434.5
apply_bdof_10_8x16_c: 5657.7
apply_bdof_10_8x16_avx2: 1096.0
apply_bdof_10_16x8_c: 5531.7
apply_bdof_10_16x8_avx2: 212.5
apply_bdof_10_16x16_c: 11043.7
apply_bdof_10_16x16_avx2: 1252.7
apply_bdof_12_8x16_c: 5680.0
apply_bdof_12_8x16_avx2: 1096.5
apply_bdof_12_16x8_c: 5646.2
apply_bdof_12_16x8_avx2: 624.5
apply_bdof_12_16x16_c: 11076.0
apply_bdof_12_16x16_avx2: 1241.5
This commit is contained in:
Nuo Mi 2024-08-20 21:22:36 +08:00
parent 15eb10c6de
commit 7175544c0b

View File

@ -64,6 +64,14 @@ static const int sizes[] = { 2, 4, 8, 16, 32, 64, 128 };
randomize_buffers(buf0, buf1, size, mask); \
} while (0)
#define randomize_prof_src(buf0, buf1, size) \
do { \
const int shift = 14 - bit_depth; \
const int mask16 = 0x3fff >> shift << shift; \
uint32_t mask = (mask16 << 16) | mask16; \
randomize_buffers(buf0, buf1, size, mask); \
} while (0)
static void check_put_vvc_luma(void)
{
LOCAL_ALIGNED_32(int16_t, dst0, [DST_BUF_SIZE / 2]);
@ -382,6 +390,47 @@ static void check_dmvr(void)
report("dmvr");
}
#define BDOF_BLOCK_SIZE 16
#define BDOF_SRC_SIZE (MAX_PB_SIZE* (BDOF_BLOCK_SIZE + 2))
#define BDOF_SRC_OFFSET (MAX_PB_SIZE + 1)
#define BDOF_DST_SIZE (BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE * 2)
static void check_bdof(void)
{
LOCAL_ALIGNED_32(uint8_t, dst0, [BDOF_DST_SIZE]);
LOCAL_ALIGNED_32(uint8_t, dst1, [BDOF_DST_SIZE]);
LOCAL_ALIGNED_32(uint16_t, src00, [BDOF_SRC_SIZE]);
LOCAL_ALIGNED_32(uint16_t, src01, [BDOF_SRC_SIZE]);
LOCAL_ALIGNED_32(uint16_t, src10, [BDOF_SRC_SIZE]);
LOCAL_ALIGNED_32(uint16_t, src11, [BDOF_SRC_SIZE]);
VVCDSPContext c;
declare_func(void, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1, int block_w, int block_h);
for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
const int dst_stride = BDOF_BLOCK_SIZE * SIZEOF_PIXEL;
ff_vvc_dsp_init(&c, bit_depth);
randomize_prof_src(src00, src10, BDOF_SRC_SIZE);
randomize_prof_src(src01, src11, BDOF_SRC_SIZE);
for (int h = 8; h <= 16; h *= 2) {
for (int w = 8; w <= 16; w *= 2) {
if (w * h < 128)
continue;
if (check_func(c.inter.apply_bdof, "apply_bdof_%d_%dx%d", bit_depth, w, h)) {
memset(dst0, 0, BDOF_DST_SIZE);
memset(dst1, 0, BDOF_DST_SIZE);
call_ref(dst0, dst_stride, src00 + BDOF_SRC_OFFSET, src01 + BDOF_SRC_OFFSET, w, h);
call_new(dst1, dst_stride, src10 + BDOF_SRC_OFFSET, src11 + BDOF_SRC_OFFSET, w, h);
if (memcmp(dst0, dst1, BDOF_DST_SIZE))
fail();
bench_new(dst0, dst_stride, src00 + BDOF_SRC_OFFSET, src01 + BDOF_SRC_OFFSET, w, h);
}
}
}
}
report("apply_bdof");
}
static void check_vvc_sad(void)
{
const int bit_depth = 10;
@ -422,6 +471,7 @@ static void check_vvc_sad(void)
void checkasm_check_vvc_mc(void)
{
check_dmvr();
check_bdof();
check_vvc_sad();
check_put_vvc_luma();
check_put_vvc_luma_uni();