mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
checkasm: add vvc_bdof test
apply_bdof_8_8x16_c: 5776.5 apply_bdof_8_8x16_avx2: 396.2 apply_bdof_8_16x8_c: 5722.0 apply_bdof_8_16x8_avx2: 216.0 apply_bdof_8_16x16_c: 11213.2 apply_bdof_8_16x16_avx2: 434.5 apply_bdof_10_8x16_c: 5657.7 apply_bdof_10_8x16_avx2: 1096.0 apply_bdof_10_16x8_c: 5531.7 apply_bdof_10_16x8_avx2: 212.5 apply_bdof_10_16x16_c: 11043.7 apply_bdof_10_16x16_avx2: 1252.7 apply_bdof_12_8x16_c: 5680.0 apply_bdof_12_8x16_avx2: 1096.5 apply_bdof_12_16x8_c: 5646.2 apply_bdof_12_16x8_avx2: 624.5 apply_bdof_12_16x16_c: 11076.0 apply_bdof_12_16x16_avx2: 1241.5
This commit is contained in:
parent
15eb10c6de
commit
7175544c0b
@ -64,6 +64,14 @@ static const int sizes[] = { 2, 4, 8, 16, 32, 64, 128 };
|
||||
randomize_buffers(buf0, buf1, size, mask); \
|
||||
} while (0)
|
||||
|
||||
#define randomize_prof_src(buf0, buf1, size) \
|
||||
do { \
|
||||
const int shift = 14 - bit_depth; \
|
||||
const int mask16 = 0x3fff >> shift << shift; \
|
||||
uint32_t mask = (mask16 << 16) | mask16; \
|
||||
randomize_buffers(buf0, buf1, size, mask); \
|
||||
} while (0)
|
||||
|
||||
static void check_put_vvc_luma(void)
|
||||
{
|
||||
LOCAL_ALIGNED_32(int16_t, dst0, [DST_BUF_SIZE / 2]);
|
||||
@ -382,6 +390,47 @@ static void check_dmvr(void)
|
||||
report("dmvr");
|
||||
}
|
||||
|
||||
#define BDOF_BLOCK_SIZE 16
|
||||
#define BDOF_SRC_SIZE (MAX_PB_SIZE* (BDOF_BLOCK_SIZE + 2))
|
||||
#define BDOF_SRC_OFFSET (MAX_PB_SIZE + 1)
|
||||
#define BDOF_DST_SIZE (BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE * 2)
|
||||
static void check_bdof(void)
|
||||
{
|
||||
LOCAL_ALIGNED_32(uint8_t, dst0, [BDOF_DST_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint8_t, dst1, [BDOF_DST_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint16_t, src00, [BDOF_SRC_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint16_t, src01, [BDOF_SRC_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint16_t, src10, [BDOF_SRC_SIZE]);
|
||||
LOCAL_ALIGNED_32(uint16_t, src11, [BDOF_SRC_SIZE]);
|
||||
|
||||
VVCDSPContext c;
|
||||
declare_func(void, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1, int block_w, int block_h);
|
||||
|
||||
for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
|
||||
const int dst_stride = BDOF_BLOCK_SIZE * SIZEOF_PIXEL;
|
||||
|
||||
ff_vvc_dsp_init(&c, bit_depth);
|
||||
randomize_prof_src(src00, src10, BDOF_SRC_SIZE);
|
||||
randomize_prof_src(src01, src11, BDOF_SRC_SIZE);
|
||||
for (int h = 8; h <= 16; h *= 2) {
|
||||
for (int w = 8; w <= 16; w *= 2) {
|
||||
if (w * h < 128)
|
||||
continue;
|
||||
if (check_func(c.inter.apply_bdof, "apply_bdof_%d_%dx%d", bit_depth, w, h)) {
|
||||
memset(dst0, 0, BDOF_DST_SIZE);
|
||||
memset(dst1, 0, BDOF_DST_SIZE);
|
||||
call_ref(dst0, dst_stride, src00 + BDOF_SRC_OFFSET, src01 + BDOF_SRC_OFFSET, w, h);
|
||||
call_new(dst1, dst_stride, src10 + BDOF_SRC_OFFSET, src11 + BDOF_SRC_OFFSET, w, h);
|
||||
if (memcmp(dst0, dst1, BDOF_DST_SIZE))
|
||||
fail();
|
||||
bench_new(dst0, dst_stride, src00 + BDOF_SRC_OFFSET, src01 + BDOF_SRC_OFFSET, w, h);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
report("apply_bdof");
|
||||
}
|
||||
|
||||
static void check_vvc_sad(void)
|
||||
{
|
||||
const int bit_depth = 10;
|
||||
@ -422,6 +471,7 @@ static void check_vvc_sad(void)
|
||||
void checkasm_check_vvc_mc(void)
|
||||
{
|
||||
check_dmvr();
|
||||
check_bdof();
|
||||
check_vvc_sad();
|
||||
check_put_vvc_luma();
|
||||
check_put_vvc_luma_uni();
|
||||
|
Loading…
Reference in New Issue
Block a user