mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
lavc/aarch64: motion estimation functions in neon
- ff_pix_abs16_neon - ff_pix_abs16_xy2_neon In direct micro benchmarks of these ff functions verses their C implementations, these functions performed as follows on AWS Graviton 3. ff_pix_abs16_neon: pix_abs_0_0_c: 141.1 pix_abs_0_0_neon: 19.6 ff_pix_abs16_xy2_neon: pix_abs_0_3_c: 269.1 pix_abs_0_3_neon: 39.3 Tested with: ./tests/checkasm/checkasm --test=motion --bench --disable-linux-perf Signed-off-by: Jonathan Swinney <jswinney@amazon.com> Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
parent
20e2aa940c
commit
c471cc7474
@ -7,6 +7,7 @@ OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
|
||||
OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
|
||||
OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_IDCTDSP) += aarch64/idctdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_ME_CMP) += aarch64/me_cmp_init_aarch64.o
|
||||
OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_init.o
|
||||
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o
|
||||
OBJS-$(CONFIG_PIXBLOCKDSP) += aarch64/pixblockdsp_init_aarch64.o
|
||||
@ -47,6 +48,7 @@ NEON-OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_neon.o
|
||||
NEON-OBJS-$(CONFIG_IDCTDSP) += aarch64/idctdsp_neon.o \
|
||||
aarch64/simple_idct_neon.o
|
||||
NEON-OBJS-$(CONFIG_MDCT) += aarch64/mdct_neon.o
|
||||
NEON-OBJS-$(CONFIG_ME_CMP) += aarch64/me_cmp_neon.o
|
||||
NEON-OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_neon.o
|
||||
NEON-OBJS-$(CONFIG_PIXBLOCKDSP) += aarch64/pixblockdsp_neon.o
|
||||
NEON-OBJS-$(CONFIG_VC1DSP) += aarch64/vc1dsp_neon.o
|
||||
|
39
libavcodec/aarch64/me_cmp_init_aarch64.c
Normal file
39
libavcodec/aarch64/me_cmp_init_aarch64.c
Normal file
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/aarch64/cpu.h"
|
||||
#include "libavcodec/mpegvideo.h"
|
||||
|
||||
int ff_pix_abs16_neon(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
|
||||
ptrdiff_t stride, int h);
|
||||
int ff_pix_abs16_xy2_neon(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
|
||||
ptrdiff_t stride, int h);
|
||||
|
||||
av_cold void ff_me_cmp_init_aarch64(MECmpContext *c, AVCodecContext *avctx)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (have_neon(cpu_flags)) {
|
||||
c->pix_abs[0][0] = ff_pix_abs16_neon;
|
||||
c->pix_abs[0][3] = ff_pix_abs16_xy2_neon;
|
||||
}
|
||||
}
|
205
libavcodec/aarch64/me_cmp_neon.S
Normal file
205
libavcodec/aarch64/me_cmp_neon.S
Normal file
@ -0,0 +1,205 @@
|
||||
/*
|
||||
* Copyright (c) 2022 Jonathan Swinney <jswinney@amazon.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/aarch64/asm.S"
|
||||
|
||||
function ff_pix_abs16_neon, export=1
|
||||
// x0 unused
|
||||
// x1 uint8_t *pix1
|
||||
// x2 uint8_t *pix2
|
||||
// x3 ptrdiff_t stride
|
||||
// w4 int h
|
||||
cmp w4, #4 // if h < 4, jump to completion section
|
||||
movi v18.4S, #0 // clear result accumulator
|
||||
b.lt 2f
|
||||
1:
|
||||
ld1 {v0.16b}, [x1], x3 // load pix1
|
||||
ld1 {v4.16b}, [x2], x3 // load pix2
|
||||
ld1 {v1.16b}, [x1], x3 // load pix1
|
||||
ld1 {v5.16b}, [x2], x3 // load pix2
|
||||
uabdl v16.8h, v0.8b, v4.8b // absolute difference accumulate
|
||||
uabdl2 v17.8h, v0.16b, v4.16b
|
||||
ld1 {v2.16b}, [x1], x3 // load pix1
|
||||
ld1 {v6.16b}, [x2], x3 // load pix2
|
||||
uabal v16.8h, v1.8b, v5.8b // absolute difference accumulate
|
||||
uabal2 v17.8h, v1.16b, v5.16b
|
||||
ld1 {v3.16b}, [x1], x3
|
||||
ld1 {v7.16b}, [x2], x3
|
||||
uabal v16.8h, v2.8b, v6.8b
|
||||
uabal2 v17.8h, v2.16b, v6.16b
|
||||
sub w4, w4, #4 // h -= 4
|
||||
uabal v16.8h, v3.8b, v7.8b
|
||||
uabal2 v17.8h, v3.16b, v7.16b
|
||||
cmp w4, #4 // if h >= 4, loop
|
||||
add v16.8h, v16.8h, v17.8h
|
||||
uaddlv s16, v16.8h // add up everything in v16 accumulator
|
||||
add d18, d16, d18 // add to the end result register
|
||||
|
||||
b.ge 1b
|
||||
cbnz w4, 2f // if iterations remain, jump to completion section
|
||||
|
||||
fmov w0, s18 // copy result to general purpose register
|
||||
ret
|
||||
|
||||
2:
|
||||
ld1 {v0.16b}, [x1], x3 // load pix1
|
||||
ld1 {v4.16b}, [x2], x3 // load pix2
|
||||
uabdl v16.8h, v0.8b, v4.8b // absolute difference accumulate
|
||||
uabal2 v16.8h, v0.16b, v4.16b
|
||||
subs w4, w4, #1 // h -= 1
|
||||
addv h16, v16.8h // add up v16
|
||||
add d18, d16, d18 // add to result
|
||||
b.ne 2b
|
||||
|
||||
fmov w0, s18 // copy result to general purpose register
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_pix_abs16_xy2_neon, export=1
|
||||
// x0 unused
|
||||
// x1 uint8_t *pix1
|
||||
// x2 uint8_t *pix2
|
||||
// x3 ptrdiff_t stride
|
||||
// w4 int h
|
||||
|
||||
add x5, x2, x3 // use x5 to hold uint8_t *pix3
|
||||
movi v0.2d, #0 // initialize the result register
|
||||
|
||||
// Load initial pix2 values for either the unrolled version or completion version.
|
||||
ldur q4, [x2, #1] // load pix2+1
|
||||
ldr q3, [x2] // load pix2
|
||||
uaddl v2.8h, v4.8b, v3.8b // pix2 + pix2+1 0..7
|
||||
uaddl2 v3.8h, v4.16b, v3.16b // pix2 + pix2+1 8..15
|
||||
cmp w4, #4 // if h < 4 jump to the completion version
|
||||
b.lt 2f
|
||||
1:
|
||||
// This is an unrolled implementation. It completes 4 iterations of the C for each branch.
|
||||
// In each iteration, pix2[i+1] == pix3[i]. This means we need only three loads per iteration,
|
||||
// plus two at the beginning to start.
|
||||
ldur q5, [x5, #1] // load pix3+1
|
||||
ld1 {v4.16b}, [x5], x3 // load pix3
|
||||
ld1 {v1.16b}, [x1], x3 // load pix1
|
||||
|
||||
ldur q7, [x5, #1] // load pix3+1
|
||||
ld1 {v6.16b}, [x5], x3 // load pix3
|
||||
ld1 {v16.16b}, [x1], x3 // load pix1
|
||||
|
||||
ldur q19, [x5, #1] // load pix3+1
|
||||
ld1 {v18.16b}, [x5], x3 // load pix3
|
||||
ld1 {v17.16b}, [x1], x3 // load pix1
|
||||
|
||||
ldur q22, [x5, #1] // load pix3+1
|
||||
ld1 {v21.16b}, [x5], x3 // load pix3
|
||||
ld1 {v20.16b}, [x1], x3 // load pix1
|
||||
|
||||
// These blocks compute the average: avg(pix2[n], pix2[n+1], pix3[n], pix3[n+1])
|
||||
uaddl v30.8h, v4.8b, v5.8b // pix3 + pix3+1 0..7
|
||||
uaddl2 v31.8h, v4.16b, v5.16b // pix3 + pix3+1 8..15
|
||||
add v23.8h, v2.8h, v30.8h // add up 0..7, using pix2 + pix2+1 values from previous iteration
|
||||
add v24.8h, v3.8h, v31.8h // add up 8..15, using pix2 + pix2+1 values from previous iteration
|
||||
rshrn v23.8b, v23.8h, #2 // shift right 2 0..7 (rounding shift right)
|
||||
rshrn2 v23.16b, v24.8h, #2 // shift right 2 8..15
|
||||
|
||||
uaddl v2.8h, v6.8b, v7.8b // pix3 + pix3+1 0..7
|
||||
uaddl2 v3.8h, v6.16b, v7.16b // pix3 + pix3+1 8..15
|
||||
add v26.8h, v30.8h, v2.8h // add up 0..7, using pix2 + pix2+1 values from pix3 above
|
||||
add v27.8h, v31.8h, v3.8h // add up 8..15, using pix2 + pix2+1 values from pix3 above
|
||||
rshrn v26.8b, v26.8h, #2 // shift right 2 0..7 (rounding shift right)
|
||||
rshrn2 v26.16b, v27.8h, #2 // shift right 2 8..15
|
||||
|
||||
uaddl v4.8h, v18.8b, v19.8b // pix3 + pix3+1 0..7
|
||||
uaddl2 v5.8h, v18.16b, v19.16b // pix3 + pix3+1 8..15
|
||||
add v28.8h, v2.8h, v4.8h // add up 0..7, using pix2 + pix2+1 values from pix3 above
|
||||
add v29.8h, v3.8h, v5.8h // add up 8..15, using pix2 + pix2+1 values from pix3 above
|
||||
rshrn v28.8b, v28.8h, #2 // shift right 2 0..7 (rounding shift right)
|
||||
rshrn2 v28.16b, v29.8h, #2 // shift right 2 8..15
|
||||
|
||||
uaddl v2.8h, v21.8b, v22.8b // pix3 + pix3+1 0..7
|
||||
uaddl2 v3.8h, v21.16b, v22.16b // pix3 + pix3+1 8..15
|
||||
add v30.8h, v4.8h, v2.8h // add up 0..7, using pix2 + pix2+1 values from pix3 above
|
||||
add v31.8h, v5.8h, v3.8h // add up 8..15, using pix2 + pix2+1 values from pix3 above
|
||||
rshrn v30.8b, v30.8h, #2 // shift right 2 0..7 (rounding shift right)
|
||||
rshrn2 v30.16b, v31.8h, #2 // shift right 2 8..15
|
||||
|
||||
// Averages are now stored in these registers:
|
||||
// v23, v16, v28, v30
|
||||
// pix1 values in these registers:
|
||||
// v1, v16, v17, v20
|
||||
// available:
|
||||
// v4, v5, v7, v18, v19, v24, v25, v27, v29, v31
|
||||
|
||||
sub w4, w4, #4 // h -= 4
|
||||
|
||||
// Using absolute-difference instructions instead of absolute-difference-accumulate allows
|
||||
// us to keep the results in 16b vectors instead of widening values with twice the instructions.
|
||||
// This approach also has fewer data dependencies, allowing better instruction level parallelism.
|
||||
uabd v4.16b, v1.16b, v23.16b // absolute difference 0..15, i=0
|
||||
uabd v5.16b, v16.16b, v26.16b // absolute difference 0..15, i=1
|
||||
uabd v6.16b, v17.16b, v28.16b // absolute difference 0..15, i=2
|
||||
uabd v7.16b, v20.16b, v30.16b // absolute difference 0..15, i=3
|
||||
|
||||
cmp w4, #4 // loop if h >= 4
|
||||
|
||||
// Now add up all the values in each vector, v4-v7 with widening adds
|
||||
uaddl v19.8h, v4.8b, v5.8b
|
||||
uaddl2 v18.8h, v4.16b, v5.16b
|
||||
uaddl v4.8h, v6.8b, v7.8b
|
||||
uaddl2 v5.8h, v6.16b, v7.16b
|
||||
add v4.8h, v4.8h, v5.8h
|
||||
add v4.8h, v4.8h, v18.8h
|
||||
add v4.8h, v4.8h, v19.8h
|
||||
uaddlv s4, v4.8h // finish adding up accumulated values
|
||||
add d0, d0, d4 // add the value to the top level accumulator
|
||||
|
||||
b.ge 1b
|
||||
cbnz w4, 2f // if iterations remain jump to completion section
|
||||
|
||||
fmov w0, s0 // copy result to general purpose register
|
||||
ret
|
||||
2:
|
||||
// v2 and v3 are set either at the end of this loop or at from the unrolled version
|
||||
// which branches here to complete iterations when h % 4 != 0.
|
||||
ldur q5, [x5, #1] // load pix3+1
|
||||
ld1 {v4.16b}, [x5], x3 // load pix3
|
||||
ld1 {v1.16b}, [x1], x3 // load pix1
|
||||
subs w4, w4, #1 // decrement h
|
||||
|
||||
uaddl v18.8h, v4.8b, v5.8b // pix3 + pix3+1 0..7
|
||||
uaddl2 v19.8h, v4.16b, v5.16b // pix3 + pix3+1 8..15
|
||||
add v16.8h, v2.8h, v18.8h // add up 0..7, using pix2 + pix2+1 values from previous iteration
|
||||
add v17.8h, v3.8h, v19.8h // add up 8..15, using pix2 + pix2+1 values from previous iteration
|
||||
// divide by 4 to compute the average of values summed above
|
||||
urshr v16.8h, v16.8h, #2 // shift right by 2 0..7 (rounding shift right)
|
||||
urshr v17.8h, v17.8h, #2 // shift right by 2 8..15
|
||||
|
||||
uxtl2 v8.8h, v1.16b // 8->16 bits pix1 8..15
|
||||
uxtl v1.8h, v1.8b // 8->16 bits pix1 0..7
|
||||
|
||||
uabd v6.8h, v1.8h, v16.8h // absolute difference 0..7
|
||||
uaba v6.8h, v8.8h, v17.8h // absolute difference accumulate 8..15
|
||||
mov v2.16b, v18.16b // pix3 -> pix2
|
||||
mov v3.16b, v19.16b // pix3+1 -> pix2+1
|
||||
uaddlv s6, v6.8h // add up accumulator in v6
|
||||
add d0, d0, d6 // add to the final result
|
||||
|
||||
b.ne 2b // loop if h > 0
|
||||
fmov w0, s0 // copy result to general purpose register
|
||||
ret
|
||||
endfunc
|
@ -1061,7 +1061,9 @@ av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx)
|
||||
ff_dsputil_init_dwt(c);
|
||||
#endif
|
||||
|
||||
#if ARCH_ALPHA
|
||||
#if ARCH_AARCH64
|
||||
ff_me_cmp_init_aarch64(c, avctx);
|
||||
#elif ARCH_ALPHA
|
||||
ff_me_cmp_init_alpha(c, avctx);
|
||||
#elif ARCH_ARM
|
||||
ff_me_cmp_init_arm(c, avctx);
|
||||
|
@ -80,6 +80,7 @@ typedef struct MECmpContext {
|
||||
} MECmpContext;
|
||||
|
||||
void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx);
|
||||
void ff_me_cmp_init_aarch64(MECmpContext *c, AVCodecContext *avctx);
|
||||
void ff_me_cmp_init_alpha(MECmpContext *c, AVCodecContext *avctx);
|
||||
void ff_me_cmp_init_arm(MECmpContext *c, AVCodecContext *avctx);
|
||||
void ff_me_cmp_init_ppc(MECmpContext *c, AVCodecContext *avctx);
|
||||
|
@ -12,6 +12,7 @@ AVCODECOBJS-$(CONFIG_H264QPEL) += h264qpel.o
|
||||
AVCODECOBJS-$(CONFIG_IDCTDSP) += idctdsp.o
|
||||
AVCODECOBJS-$(CONFIG_LLVIDDSP) += llviddsp.o
|
||||
AVCODECOBJS-$(CONFIG_LLVIDENCDSP) += llviddspenc.o
|
||||
AVCODECOBJS-$(CONFIG_ME_CMP) += motion.o
|
||||
AVCODECOBJS-$(CONFIG_VC1DSP) += vc1dsp.o
|
||||
AVCODECOBJS-$(CONFIG_VP8DSP) += vp8dsp.o
|
||||
AVCODECOBJS-$(CONFIG_VIDEODSP) += videodsp.o
|
||||
|
@ -135,6 +135,9 @@ static const struct {
|
||||
#if CONFIG_LLVIDENCDSP
|
||||
{ "llviddspenc", checkasm_check_llviddspenc },
|
||||
#endif
|
||||
#if CONFIG_ME_CMP
|
||||
{ "motion", checkasm_check_motion },
|
||||
#endif
|
||||
#if CONFIG_OPUS_DECODER
|
||||
{ "opusdsp", checkasm_check_opusdsp },
|
||||
#endif
|
||||
|
@ -68,6 +68,7 @@ void checkasm_check_idctdsp(void);
|
||||
void checkasm_check_jpeg2000dsp(void);
|
||||
void checkasm_check_llviddsp(void);
|
||||
void checkasm_check_llviddspenc(void);
|
||||
void checkasm_check_motion(void);
|
||||
void checkasm_check_nlmeans(void);
|
||||
void checkasm_check_opusdsp(void);
|
||||
void checkasm_check_pixblockdsp(void);
|
||||
|
151
tests/checkasm/motion.c
Normal file
151
tests/checkasm/motion.c
Normal file
@ -0,0 +1,151 @@
|
||||
/*
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
#include "libavutil/mem_internal.h"
|
||||
|
||||
#include "libavcodec/me_cmp.h"
|
||||
|
||||
#include "checkasm.h"
|
||||
|
||||
static void fill_random(uint8_t *tab, int size)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < size; i++) {
|
||||
tab[i] = rnd() % 256;
|
||||
}
|
||||
}
|
||||
|
||||
static void test_motion(const char *name, me_cmp_func test_func)
|
||||
{
|
||||
/* test configurarion */
|
||||
#define ITERATIONS 16
|
||||
#define WIDTH 64
|
||||
#define HEIGHT 64
|
||||
|
||||
/* motion estimation can look up to 17 bytes ahead */
|
||||
static const int look_ahead = 17;
|
||||
|
||||
int i, x, y, d1, d2;
|
||||
uint8_t *ptr;
|
||||
|
||||
LOCAL_ALIGNED_8(uint8_t, img1, [WIDTH * HEIGHT]);
|
||||
LOCAL_ALIGNED_8(uint8_t, img2, [WIDTH * HEIGHT]);
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, int, struct MpegEncContext *c,
|
||||
uint8_t *blk1 /* align width (8 or 16) */,
|
||||
uint8_t *blk2 /* align 1 */, ptrdiff_t stride,
|
||||
int h);
|
||||
|
||||
if (test_func == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* test correctness */
|
||||
fill_random(img1, WIDTH * HEIGHT);
|
||||
fill_random(img2, WIDTH * HEIGHT);
|
||||
|
||||
if (check_func(test_func, "%s", name)) {
|
||||
for (i = 0; i < ITERATIONS; i++) {
|
||||
x = rnd() % (WIDTH - look_ahead);
|
||||
y = rnd() % (HEIGHT - look_ahead);
|
||||
|
||||
ptr = img2 + y * WIDTH + x;
|
||||
d2 = call_ref(NULL, img1, ptr, WIDTH, 8);
|
||||
d1 = call_new(NULL, img1, ptr, WIDTH, 8);
|
||||
|
||||
if (d1 != d2) {
|
||||
fail();
|
||||
printf("func: %s, x=%d y=%d, error: asm=%d c=%d\n", name, x, y, d1, d2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// benchmark with the final value of ptr
|
||||
bench_new(NULL, img1, ptr, WIDTH, 8);
|
||||
}
|
||||
}
|
||||
|
||||
#define ME_CMP_1D_ARRAYS(XX) \
|
||||
XX(sad) \
|
||||
XX(sse) \
|
||||
XX(hadamard8_diff) \
|
||||
XX(vsad) \
|
||||
XX(vsse) \
|
||||
XX(nsse) \
|
||||
XX(me_pre_cmp) \
|
||||
XX(me_cmp) \
|
||||
XX(me_sub_cmp) \
|
||||
XX(mb_cmp) \
|
||||
XX(ildct_cmp) \
|
||||
XX(frame_skip_cmp) \
|
||||
XX(median_sad)
|
||||
|
||||
// tests for functions not yet implemented
|
||||
#if 0
|
||||
XX(dct_sad) \
|
||||
XX(quant_psnr) \
|
||||
XX(bit) \
|
||||
XX(rd) \
|
||||
XX(w53) \
|
||||
XX(w97) \
|
||||
XX(dct_max) \
|
||||
XX(dct264_sad) \
|
||||
|
||||
#endif
|
||||
|
||||
static void check_motion(void)
|
||||
{
|
||||
char buf[64];
|
||||
AVCodecContext *av_ctx;
|
||||
MECmpContext me_ctx;
|
||||
|
||||
memset(&me_ctx, 0, sizeof(me_ctx));
|
||||
|
||||
/* allocate AVCodecContext */
|
||||
av_ctx = avcodec_alloc_context3(NULL);
|
||||
av_ctx->flags |= AV_CODEC_FLAG_BITEXACT;
|
||||
|
||||
ff_me_cmp_init(&me_ctx, av_ctx);
|
||||
|
||||
for (int i = 0; i < FF_ARRAY_ELEMS(me_ctx.pix_abs); i++) {
|
||||
for (int j = 0; j < FF_ARRAY_ELEMS(me_ctx.pix_abs[0]); j++) {
|
||||
snprintf(buf, sizeof(buf), "pix_abs_%d_%d", i, j);
|
||||
test_motion(buf, me_ctx.pix_abs[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
#define XX(me_cmp_array) \
|
||||
for (int i = 0; i < FF_ARRAY_ELEMS(me_ctx.me_cmp_array); i++) { \
|
||||
snprintf(buf, sizeof(buf), #me_cmp_array "_%d", i); \
|
||||
test_motion(buf, me_ctx.me_cmp_array[i]); \
|
||||
}
|
||||
ME_CMP_1D_ARRAYS(XX)
|
||||
#undef XX
|
||||
|
||||
avcodec_free_context(&av_ctx);
|
||||
}
|
||||
|
||||
void checkasm_check_motion(void)
|
||||
{
|
||||
check_motion();
|
||||
report("motion");
|
||||
}
|
@ -23,6 +23,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp \
|
||||
fate-checkasm-jpeg2000dsp \
|
||||
fate-checkasm-llviddsp \
|
||||
fate-checkasm-llviddspenc \
|
||||
fate-checkasm-motion \
|
||||
fate-checkasm-opusdsp \
|
||||
fate-checkasm-pixblockdsp \
|
||||
fate-checkasm-sbrdsp \
|
||||
|
Loading…
Reference in New Issue
Block a user