diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index c41d719e82..1589a15e2f 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -20,6 +20,7 @@ AVCODECOBJS-$(CONFIG_LLVIDENCDSP) += llviddspenc.o AVCODECOBJS-$(CONFIG_LPC) += lpc.o AVCODECOBJS-$(CONFIG_ME_CMP) += motion.o AVCODECOBJS-$(CONFIG_MPEGVIDEOENCDSP) += mpegvideoencdsp.o +AVCODECOBJS-$(CONFIG_QPELDSP) += qpeldsp.o AVCODECOBJS-$(CONFIG_VC1DSP) += vc1dsp.o AVCODECOBJS-$(CONFIG_VP8DSP) += vp8dsp.o AVCODECOBJS-$(CONFIG_VIDEODSP) += videodsp.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index b23e4ce889..e59d366f2b 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -220,6 +220,9 @@ static const struct { #if CONFIG_PIXBLOCKDSP { "pixblockdsp", checkasm_check_pixblockdsp }, #endif + #if CONFIG_QPELDSP + { "qpeldsp", checkasm_check_qpeldsp }, + #endif #if CONFIG_RV34DSP { "rv34dsp", checkasm_check_rv34dsp }, #endif diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 2c259aae01..eda806e870 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -125,6 +125,7 @@ void checkasm_check_mpegvideoencdsp(void); void checkasm_check_nlmeans(void); void checkasm_check_opusdsp(void); void checkasm_check_pixblockdsp(void); +void checkasm_check_qpeldsp(void); void checkasm_check_sbrdsp(void); void checkasm_check_rv34dsp(void); void checkasm_check_rv40dsp(void); diff --git a/tests/checkasm/qpeldsp.c b/tests/checkasm/qpeldsp.c new file mode 100644 index 0000000000..fd3c50cd55 --- /dev/null +++ b/tests/checkasm/qpeldsp.c @@ -0,0 +1,108 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include + +#include "checkasm.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/macros.h" +#include "libavutil/mem_internal.h" +#include "libavcodec/qpeldsp.h" + +#define MAX_BLOCK_SIZE 16 +#define MAX_STRIDE 64 +// BUF_SIZE is bigger than necessary in order to test strides > block width. +#define BUF_SIZE ((MAX_BLOCK_SIZE - 1) * MAX_STRIDE + MAX_BLOCK_SIZE) +// Due to qpel interpolation the input needs to have one more line than +// the output and the last line needs one more element. +// The input is not subject to alignment requirements; making the input buffer +// bigger (by MAX_BLOCK_SIZE - 1) allows us to use a random misalignment. +#define INPUT_BUF_SIZE (MAX_BLOCK_SIZE * MAX_STRIDE + MAX_BLOCK_SIZE + 1 + (MAX_BLOCK_SIZE - 1)) + +#define randomize_buffers(buf0, buf1) \ + do { \ + static_assert(sizeof(buf0) == sizeof(buf1), "Incompatible buffers"); \ + static_assert(!(sizeof(buf0) % 4), "Tail handling needed"); \ + static_assert(sizeof(buf0[0]) == 1 && sizeof(buf1[0]) == 1, \ + "Pointer arithmetic needs to be adapted"); \ + for (size_t k = 0; k < sizeof(buf0); k += 4) { \ + uint32_t r = rnd(); \ + AV_WN32A(buf0 + k, r); \ + AV_WN32A(buf1 + k, r); \ + } \ + } while (0) + + +void checkasm_check_qpeldsp(void) +{ + DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, srcbuf0)[INPUT_BUF_SIZE]; + DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, srcbuf1)[INPUT_BUF_SIZE]; + DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, dstbuf0)[BUF_SIZE]; + DECLARE_ALIGNED(MAX_BLOCK_SIZE, uint8_t, dstbuf1)[BUF_SIZE]; + QpelDSPContext qdsp; + static const struct { + const char *name; + size_t offset; + } tests[] = { +#define TEST(NAME) { .name = #NAME, .offset = offsetof(QpelDSPContext, NAME) } + TEST(put_qpel_pixels_tab), + TEST(avg_qpel_pixels_tab), + TEST(put_no_rnd_qpel_pixels_tab), + }; + declare_func_emms(AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride); + + ff_qpeldsp_init(&qdsp); + + for (size_t i = 0; i < FF_ARRAY_ELEMS(tests); ++i) { + qpel_mc_func (*func_tab)[16] = (qpel_mc_func (*)[16])((char*)&qdsp + tests[i].offset); + for (unsigned j = 0; j < 2; ++j) { + const unsigned blocksize = MAX_BLOCK_SIZE >> j; + + for (unsigned dxy = 0; dxy < 16; ++dxy) { + if (check_func(func_tab[j][dxy], "%s[%u][%u]", tests[i].name, j, dxy)) { + // Don't always use output that is 16-aligned. + size_t dst_offset = (rnd() % (MAX_BLOCK_SIZE / blocksize)) * blocksize; + size_t src_offset = rnd() % MAX_BLOCK_SIZE; + ptrdiff_t stride = (rnd() % (MAX_STRIDE / blocksize) + 1) * blocksize; + const uint8_t *src0 = srcbuf0 + src_offset, *src1 = srcbuf1 + src_offset; + uint8_t *dst0 = dstbuf0 + dst_offset, *dst1 = dstbuf1 + dst_offset; + + if (rnd() & 1) { + // Flip stride. + dst1 += (blocksize - 1) * stride; + dst0 += (blocksize - 1) * stride; + // Due to interpolation potentially blocksize + 1 lines are read + // from src, hence blocksize * stride. + src0 += blocksize * stride; + src1 += blocksize * stride; + stride = -stride; + } + + randomize_buffers(srcbuf0, srcbuf1); + randomize_buffers(dstbuf0, dstbuf1); + call_ref(dst0, src0, stride); + call_new(dst1, src1, stride); + if (memcmp(srcbuf0, srcbuf1, sizeof(srcbuf0)) || memcmp(dstbuf0, dstbuf1, sizeof(dstbuf0))) + fail(); + bench_new(dst0, src0, stride); + } + } + } + } +} diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak index 7570c89ad9..178b630fba 100644 --- a/tests/fate/checkasm.mak +++ b/tests/fate/checkasm.mak @@ -40,6 +40,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp \ fate-checkasm-mpegvideoencdsp \ fate-checkasm-opusdsp \ fate-checkasm-pixblockdsp \ + fate-checkasm-qpeldsp \ fate-checkasm-sbrdsp \ fate-checkasm-rv34dsp \ fate-checkasm-rv40dsp \