You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI
This commit enabled assembly code with intel AVX512 VNNI and added unit test for sobel filter sobel_c: 4537 sobel_avx512icl 2136 Signed-off-by: bwang30 <bin.wang@intel.com> Signed-off-by: Haihao Xiang <haihao.xiang@intel.com>
This commit is contained in:
		| @@ -21,6 +21,7 @@ | ||||
| #ifndef AVFILTER_CONVOLUTION_H | ||||
| #define AVFILTER_CONVOLUTION_H | ||||
| #include "avfilter.h" | ||||
| #include "libavutil/intreadwrite.h" | ||||
|  | ||||
| enum MatrixMode { | ||||
|     MATRIX_SQUARE, | ||||
| @@ -61,4 +62,77 @@ typedef struct ConvolutionContext { | ||||
| } ConvolutionContext; | ||||
|  | ||||
| void ff_convolution_init_x86(ConvolutionContext *s); | ||||
| void ff_sobel_init_x86(ConvolutionContext *s, int depth, int nb_planes); | ||||
|  | ||||
| static void setup_3x3(int radius, const uint8_t *c[], const uint8_t *src, int stride, | ||||
|                       int x, int w, int y, int h, int bpc) | ||||
| { | ||||
|     int i; | ||||
|  | ||||
|     for (i = 0; i < 9; i++) { | ||||
|         int xoff = FFABS(x + ((i % 3) - 1)); | ||||
|         int yoff = FFABS(y + (i / 3) - 1); | ||||
|  | ||||
|         xoff = xoff >= w ? 2 * w - 1 - xoff : xoff; | ||||
|         yoff = yoff >= h ? 2 * h - 1 - yoff : yoff; | ||||
|  | ||||
|         c[i] = src + xoff * bpc + yoff * stride; | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void filter_sobel(uint8_t *dst, int width, | ||||
|                          float scale, float delta, const int *const matrix, | ||||
|                          const uint8_t *c[], int peak, int radius, | ||||
|                          int dstride, int stride, int size) | ||||
| { | ||||
|     const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2]; | ||||
|     const uint8_t *c3 = c[3], *c5 = c[5]; | ||||
|     const uint8_t *c6 = c[6], *c7 = c[7], *c8 = c[8]; | ||||
|     int x; | ||||
|  | ||||
|     for (x = 0; x < width; x++) { | ||||
|         float suma = c0[x] * -1 + c1[x] * -2 + c2[x] * -1 + | ||||
|                      c6[x] *  1 + c7[x] *  2 + c8[x] *  1; | ||||
|         float sumb = c0[x] * -1 + c2[x] *  1 + c3[x] * -2 + | ||||
|                      c5[x] *  2 + c6[x] * -1 + c8[x] *  1; | ||||
|  | ||||
|         dst[x] = av_clip_uint8(sqrtf(suma*suma + sumb*sumb) * scale + delta); | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void filter16_sobel(uint8_t *dstp, int width, | ||||
|                            float scale, float delta, const int *const matrix, | ||||
|                            const uint8_t *c[], int peak, int radius, | ||||
|                            int dstride, int stride, int size) | ||||
| { | ||||
|     uint16_t *dst = (uint16_t *)dstp; | ||||
|     int x; | ||||
|  | ||||
|     for (x = 0; x < width; x++) { | ||||
|         float suma = AV_RN16A(&c[0][2 * x]) * -1 + AV_RN16A(&c[1][2 * x]) * -2 + AV_RN16A(&c[2][2 * x]) * -1 + | ||||
|                      AV_RN16A(&c[6][2 * x]) *  1 + AV_RN16A(&c[7][2 * x]) *  2 + AV_RN16A(&c[8][2 * x]) *  1; | ||||
|         float sumb = AV_RN16A(&c[0][2 * x]) * -1 + AV_RN16A(&c[2][2 * x]) *  1 + AV_RN16A(&c[3][2 * x]) * -2 + | ||||
|                      AV_RN16A(&c[5][2 * x]) *  2 + AV_RN16A(&c[6][2 * x]) * -1 + AV_RN16A(&c[8][2 * x]) *  1; | ||||
|  | ||||
|         dst[x] = av_clip(sqrtf(suma*suma + sumb*sumb) * scale + delta, 0, peak); | ||||
|     } | ||||
| } | ||||
|  | ||||
| static av_unused void ff_sobel_init(ConvolutionContext *s, int depth, int nb_planes) | ||||
| { | ||||
|     for (int i = 0; i < 4; i++) { | ||||
|         s->filter[i] = filter_sobel; | ||||
|         s->copy[i] = !((1 << i) & s->planes); | ||||
|         s->size[i] = 3; | ||||
|         s->setup[i] = setup_3x3; | ||||
|         s->rdiv[i] = s->scale; | ||||
|         s->bias[i] = s->delta; | ||||
|     } | ||||
|     if (s->depth > 8) | ||||
|         for (int i = 0; i < 4; i++) | ||||
|             s->filter[i] = filter16_sobel; | ||||
| #if ARCH_X86_64 | ||||
|     ff_sobel_init_x86(s, depth, nb_planes); | ||||
| #endif | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -139,24 +139,6 @@ static void filter16_roberts(uint8_t *dstp, int width, | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void filter16_sobel(uint8_t *dstp, int width, | ||||
|                            float scale, float delta, const int *const matrix, | ||||
|                            const uint8_t *c[], int peak, int radius, | ||||
|                            int dstride, int stride, int size) | ||||
| { | ||||
|     uint16_t *dst = (uint16_t *)dstp; | ||||
|     int x; | ||||
|  | ||||
|     for (x = 0; x < width; x++) { | ||||
|         float suma = AV_RN16A(&c[0][2 * x]) * -1 + AV_RN16A(&c[1][2 * x]) * -2 + AV_RN16A(&c[2][2 * x]) * -1 + | ||||
|                      AV_RN16A(&c[6][2 * x]) *  1 + AV_RN16A(&c[7][2 * x]) *  2 + AV_RN16A(&c[8][2 * x]) *  1; | ||||
|         float sumb = AV_RN16A(&c[0][2 * x]) * -1 + AV_RN16A(&c[2][2 * x]) *  1 + AV_RN16A(&c[3][2 * x]) * -2 + | ||||
|                      AV_RN16A(&c[5][2 * x]) *  2 + AV_RN16A(&c[6][2 * x]) * -1 + AV_RN16A(&c[8][2 * x]) *  1; | ||||
|  | ||||
|         dst[x] = av_clip(sqrtf(suma*suma + sumb*sumb) * scale + delta, 0, peak); | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void filter16_scharr(uint8_t *dstp, int width, | ||||
|                             float scale, float delta, const int *const matrix, | ||||
|                             const uint8_t *c[], int peak, int radius, | ||||
| @@ -261,26 +243,6 @@ static void filter_roberts(uint8_t *dst, int width, | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void filter_sobel(uint8_t *dst, int width, | ||||
|                          float scale, float delta, const int *const matrix, | ||||
|                          const uint8_t *c[], int peak, int radius, | ||||
|                          int dstride, int stride, int size) | ||||
| { | ||||
|     const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2]; | ||||
|     const uint8_t *c3 = c[3], *c5 = c[5]; | ||||
|     const uint8_t *c6 = c[6], *c7 = c[7], *c8 = c[8]; | ||||
|     int x; | ||||
|  | ||||
|     for (x = 0; x < width; x++) { | ||||
|         float suma = c0[x] * -1 + c1[x] * -2 + c2[x] * -1 + | ||||
|                      c6[x] *  1 + c7[x] *  2 + c8[x] *  1; | ||||
|         float sumb = c0[x] * -1 + c2[x] *  1 + c3[x] * -2 + | ||||
|                      c5[x] *  2 + c6[x] * -1 + c8[x] *  1; | ||||
|  | ||||
|         dst[x] = av_clip_uint8(sqrtf(suma*suma + sumb*sumb) * scale + delta); | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void filter_scharr(uint8_t *dst, int width, | ||||
|                           float scale, float delta, const int *const matrix, | ||||
|                           const uint8_t *c[], int peak, int radius, | ||||
| @@ -552,22 +514,6 @@ static void filter_column(uint8_t *dst, int height, | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void setup_3x3(int radius, const uint8_t *c[], const uint8_t *src, int stride, | ||||
|                       int x, int w, int y, int h, int bpc) | ||||
| { | ||||
|     int i; | ||||
|  | ||||
|     for (i = 0; i < 9; i++) { | ||||
|         int xoff = FFABS(x + ((i % 3) - 1)); | ||||
|         int yoff = FFABS(y + (i / 3) - 1); | ||||
|  | ||||
|         xoff = xoff >= w ? 2 * w - 1 - xoff : xoff; | ||||
|         yoff = yoff >= h ? 2 * h - 1 - yoff : yoff; | ||||
|  | ||||
|         c[i] = src + xoff * bpc + yoff * stride; | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void setup_5x5(int radius, const uint8_t *c[], const uint8_t *src, int stride, | ||||
|                       int x, int w, int y, int h, int bpc) | ||||
| { | ||||
| @@ -708,6 +654,18 @@ static int param_init(AVFilterContext *ctx) | ||||
|     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); | ||||
|     int p, i; | ||||
|  | ||||
|     s->depth = desc->comp[0].depth; | ||||
|     s->max = (1 << s->depth) - 1; | ||||
|  | ||||
|     s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); | ||||
|     s->planewidth[0] = s->planewidth[3] = inlink->w; | ||||
|     s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); | ||||
|     s->planeheight[0] = s->planeheight[3] = inlink->h; | ||||
|  | ||||
|     s->nb_planes = av_pix_fmt_count_planes(inlink->format); | ||||
|     s->nb_threads = ff_filter_get_nb_threads(ctx); | ||||
|     s->bpc = (s->depth + 7) / 8; | ||||
|  | ||||
|     if (!strcmp(ctx->filter->name, "convolution")) { | ||||
|         for (i = 0; i < 4; i++) { | ||||
|             int *matrix = (int *)s->matrix[i]; | ||||
| @@ -804,14 +762,7 @@ static int param_init(AVFilterContext *ctx) | ||||
|             s->bias[i] = s->delta; | ||||
|         } | ||||
|     } else if (!strcmp(ctx->filter->name, "sobel")) { | ||||
|         for (i = 0; i < 4; i++) { | ||||
|             s->filter[i] = filter_sobel; | ||||
|             s->copy[i] = !((1 << i) & s->planes); | ||||
|             s->size[i] = 3; | ||||
|             s->setup[i] = setup_3x3; | ||||
|             s->rdiv[i] = s->scale; | ||||
|             s->bias[i] = s->delta; | ||||
|         } | ||||
|         ff_sobel_init(s, s->depth, s->nb_planes); | ||||
|     } else if (!strcmp(ctx->filter->name, "kirsch")) { | ||||
|         for (i = 0; i < 4; i++) { | ||||
|             s->filter[i] = filter_kirsch; | ||||
| @@ -832,18 +783,6 @@ static int param_init(AVFilterContext *ctx) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     s->depth = desc->comp[0].depth; | ||||
|     s->max = (1 << s->depth) - 1; | ||||
|  | ||||
|     s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); | ||||
|     s->planewidth[0] = s->planewidth[3] = inlink->w; | ||||
|     s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); | ||||
|     s->planeheight[0] = s->planeheight[3] = inlink->h; | ||||
|  | ||||
|     s->nb_planes = av_pix_fmt_count_planes(inlink->format); | ||||
|     s->nb_threads = ff_filter_get_nb_threads(ctx); | ||||
|     s->bpc = (s->depth + 7) / 8; | ||||
|  | ||||
|     if (!strcmp(ctx->filter->name, "convolution")) { | ||||
|         if (s->depth > 8) { | ||||
|             for (p = 0; p < s->nb_planes; p++) { | ||||
| @@ -870,10 +809,6 @@ static int param_init(AVFilterContext *ctx) | ||||
|         if (s->depth > 8) | ||||
|             for (p = 0; p < s->nb_planes; p++) | ||||
|                 s->filter[p] = filter16_roberts; | ||||
|     } else if (!strcmp(ctx->filter->name, "sobel")) { | ||||
|         if (s->depth > 8) | ||||
|             for (p = 0; p < s->nb_planes; p++) | ||||
|                 s->filter[p] = filter16_sobel; | ||||
|     } else if (!strcmp(ctx->filter->name, "kirsch")) { | ||||
|         if (s->depth > 8) | ||||
|             for (p = 0; p < s->nb_planes; p++) | ||||
|   | ||||
| @@ -22,6 +22,18 @@ | ||||
|  | ||||
| SECTION_RODATA | ||||
| half:   dd 0.5 | ||||
| data_p1: dd  1 | ||||
| data_n1: dd -1 | ||||
| data_p2: dd  2 | ||||
| data_n2: dd -2 | ||||
|  | ||||
| ALIGN 64 | ||||
| sobel_perm: db  0, 16, 32, 48,  1, 17, 33, 49,  2, 18, 34, 50,  3, 19, 35, 51 | ||||
|             db  4, 20, 36, 52,  5, 21, 37, 53,  6, 22, 38, 54,  7, 23, 39, 55 | ||||
|             db  8, 24, 40, 56,  9, 25, 41, 57, 10, 26, 42, 58, 11, 27, 43, 59 | ||||
|             db 12, 28, 44, 60, 13, 29, 45, 61, 14, 30, 46, 62, 15, 31, 47, 63 | ||||
| sobel_mulA: db -1,  1, -2,  2 | ||||
| sobel_mulB: db  1, -1,  2, -2 | ||||
|  | ||||
| SECTION .text | ||||
|  | ||||
| @@ -154,3 +166,138 @@ cglobal filter_3x3, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, c1, c2, c | ||||
| INIT_XMM sse4 | ||||
| FILTER_3X3 | ||||
| %endif | ||||
|  | ||||
| %macro SOBEL_MUL 2 | ||||
|     movzx ptrd, byte [c%1q + xq] | ||||
|     imul  ptrd, [%2] | ||||
|     add   rd, ptrd | ||||
| %endmacro | ||||
|  | ||||
| %macro SOBEL_ADD 1 | ||||
|     movzx ptrd, byte [c%1q + xq] | ||||
|     add   rd, ptrd | ||||
| %endmacro | ||||
|  | ||||
| ; void filter_sobel_avx512(uint8_t *dst, int width, | ||||
| ;                      float scale, float delta, const int *const matrix, | ||||
| ;                      const uint8_t *c[], int peak, int radius, | ||||
| ;                      int dstride, int stride) | ||||
| %macro FILTER_SOBEL 0 | ||||
| %if UNIX64 | ||||
| cglobal filter_sobel, 4, 15, 7, dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x | ||||
| %else | ||||
| cglobal filter_sobel, 4, 15, 7, dst, width, rdiv, bias, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x | ||||
| %endif | ||||
| %if WIN64 | ||||
|     SWAP xmm0, xmm2 | ||||
|     SWAP xmm1, xmm3 | ||||
|     mov  r2q, matrixmp | ||||
|     mov  r3q, ptrmp | ||||
|     DEFINE_ARGS dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, x | ||||
| %endif | ||||
|     movsxdifnidn widthq, widthd | ||||
|     VBROADCASTSS m0, xmm0 | ||||
|     VBROADCASTSS m1, xmm1 | ||||
|     pxor  m6, m6 | ||||
|     mov   c0q, [ptrq + 0*gprsize] | ||||
|     mov   c1q, [ptrq + 1*gprsize] | ||||
|     mov   c2q, [ptrq + 2*gprsize] | ||||
|     mov   c3q, [ptrq + 3*gprsize] | ||||
|     mov   c4q, [ptrq + 4*gprsize] | ||||
|     mov   c5q, [ptrq + 5*gprsize] | ||||
|     mov   c6q, [ptrq + 6*gprsize] | ||||
|     mov   c7q, [ptrq + 7*gprsize] | ||||
|     mov   c8q, [ptrq + 8*gprsize] | ||||
|  | ||||
|     xor   xq, xq | ||||
|     cmp   widthq, mmsize/4 | ||||
|     jl .loop2 | ||||
|  | ||||
|     mov   rq, widthq | ||||
|     and   rq, mmsize/4-1 | ||||
|     sub   widthq, rq | ||||
|  | ||||
|     mova  m6, [sobel_perm] | ||||
| .loop1: | ||||
|     movu          xm3, [c2q + xq] | ||||
|     pmovzxbd      m5, [c0q + xq] | ||||
|     vinserti32x4  ym3, [c6q + xq], 1 | ||||
|     pmovzxbd      m4, [c8q + xq] | ||||
|     vinserti32x4  m2, m3, [c1q + xq], 2 | ||||
|     vinserti32x4  m3, [c5q + xq], 2 | ||||
|     vinserti32x4  m2, [c7q + xq], 3 | ||||
|     vinserti32x4  m3, [c3q + xq], 3 | ||||
|     vpermb        m2, m6, m2 | ||||
|     psubd         m4, m5 | ||||
|     vpermb        m3, m6, m3 | ||||
|     mova          m5, m4 | ||||
|     vpdpbusd      m4, m2, [sobel_mulA] {1to16} | ||||
|     vpdpbusd      m5, m3, [sobel_mulB] {1to16} | ||||
|  | ||||
|     cvtdq2ps  m4, m4 | ||||
|     mulps     m4, m4 | ||||
|  | ||||
|     cvtdq2ps    m5, m5 | ||||
|     VFMADD231PS m4, m5, m5 | ||||
|  | ||||
|     sqrtps    m4, m4 | ||||
|     fmaddps m4, m4, m0, m1 | ||||
|     cvttps2dq m4, m4 | ||||
|     vpmovusdb [dstq + xq], m4 | ||||
|  | ||||
|     add xq, mmsize/4 | ||||
|     cmp xq, widthq | ||||
|     jl .loop1 | ||||
|  | ||||
|     add widthq, rq | ||||
|     cmp xq, widthq | ||||
|     jge .end | ||||
|  | ||||
| .loop2: | ||||
|     xor  rd, rd | ||||
|     pxor m4, m4 | ||||
|  | ||||
|     ;Gx | ||||
|     SOBEL_MUL 0, data_n1 | ||||
|     SOBEL_MUL 1, data_n2 | ||||
|     SOBEL_MUL 2, data_n1 | ||||
|     SOBEL_ADD 6 | ||||
|     SOBEL_MUL 7, data_p2 | ||||
|     SOBEL_ADD 8 | ||||
|  | ||||
|     cvtsi2ss xmm4, rd | ||||
|     mulss    xmm4, xmm4 | ||||
|  | ||||
|     xor rd, rd | ||||
|     ;Gy | ||||
|     SOBEL_MUL 0, data_n1 | ||||
|     SOBEL_ADD 2 | ||||
|     SOBEL_MUL 3, data_n2 | ||||
|     SOBEL_MUL 5, data_p2 | ||||
|     SOBEL_MUL 6, data_n1 | ||||
|     SOBEL_ADD 8 | ||||
|  | ||||
|     cvtsi2ss  xmm5, rd | ||||
|     fmaddss xmm4, xmm5, xmm5, xmm4 | ||||
|  | ||||
|     sqrtps    xmm4, xmm4 | ||||
|     fmaddss   xmm4, xmm4, xmm0, xmm1     ;sum = sum * rdiv + bias | ||||
|     cvttps2dq xmm4, xmm4     ; trunc to integer | ||||
|     packssdw  xmm4, xmm4 | ||||
|     packuswb  xmm4, xmm4 | ||||
|     movd      rd, xmm4 | ||||
|     mov       [dstq + xq], rb | ||||
|  | ||||
|     add xq, 1 | ||||
|     cmp xq, widthq | ||||
|     jl .loop2 | ||||
| .end: | ||||
|     RET | ||||
| %endmacro | ||||
|  | ||||
| %if ARCH_X86_64 | ||||
| %if HAVE_AVX512ICL_EXTERNAL | ||||
| INIT_ZMM avx512icl | ||||
| FILTER_SOBEL | ||||
| %endif | ||||
| %endif | ||||
|   | ||||
| @@ -29,6 +29,11 @@ void ff_filter_3x3_sse4(uint8_t *dst, int width, | ||||
|                         const uint8_t *c[], int peak, int radius, | ||||
|                         int dstride, int stride, int size); | ||||
|  | ||||
| void ff_filter_sobel_avx512icl(uint8_t *dst, int width, | ||||
|                          float scale, float delta, const int *const matrix, | ||||
|                          const uint8_t *c[], int peak, int radius, | ||||
|                          int dstride, int stride, int size); | ||||
|  | ||||
| av_cold void ff_convolution_init_x86(ConvolutionContext *s) | ||||
| { | ||||
| #if ARCH_X86_64 | ||||
| @@ -44,3 +49,16 @@ av_cold void ff_convolution_init_x86(ConvolutionContext *s) | ||||
|     } | ||||
| #endif | ||||
| } | ||||
|  | ||||
| av_cold void ff_sobel_init_x86(ConvolutionContext *s, int depth, int nb_planes) | ||||
| { | ||||
| #if ARCH_X86_64 | ||||
|     int cpu_flags = av_get_cpu_flags(); | ||||
|     for (int i = 0; i < nb_planes; i++) { | ||||
|         if (depth == 8) { | ||||
|             if (EXTERNAL_AVX512ICL(cpu_flags)) | ||||
|                 s->filter[i] = ff_filter_sobel_avx512icl; | ||||
|         } | ||||
|     } | ||||
| #endif | ||||
| } | ||||
|   | ||||
| @@ -46,6 +46,7 @@ AVFILTEROBJS-$(CONFIG_GBLUR_FILTER)      += vf_gblur.o | ||||
| AVFILTEROBJS-$(CONFIG_HFLIP_FILTER)      += vf_hflip.o | ||||
| AVFILTEROBJS-$(CONFIG_THRESHOLD_FILTER)  += vf_threshold.o | ||||
| AVFILTEROBJS-$(CONFIG_NLMEANS_FILTER)    += vf_nlmeans.o | ||||
| AVFILTEROBJS-$(CONFIG_SOBEL_FILTER)      += vf_convolution.o | ||||
|  | ||||
| CHECKASMOBJS-$(CONFIG_AVFILTER) += $(AVFILTEROBJS-yes) | ||||
|  | ||||
|   | ||||
| @@ -197,6 +197,9 @@ static const struct { | ||||
|     #if CONFIG_THRESHOLD_FILTER | ||||
|         { "vf_threshold", checkasm_check_vf_threshold }, | ||||
|     #endif | ||||
|     #if CONFIG_SOBEL_FILTER | ||||
|         { "vf_sobel", checkasm_check_vf_sobel }, | ||||
|     #endif | ||||
| #endif | ||||
| #if CONFIG_SWSCALE | ||||
|     { "sw_gbrp", checkasm_check_sw_gbrp }, | ||||
|   | ||||
| @@ -86,6 +86,7 @@ void checkasm_check_vf_eq(void); | ||||
| void checkasm_check_vf_gblur(void); | ||||
| void checkasm_check_vf_hflip(void); | ||||
| void checkasm_check_vf_threshold(void); | ||||
| void checkasm_check_vf_sobel(void); | ||||
| void checkasm_check_vp8dsp(void); | ||||
| void checkasm_check_vp9dsp(void); | ||||
| void checkasm_check_videodsp(void); | ||||
|   | ||||
							
								
								
									
										104
									
								
								tests/checkasm/vf_convolution.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								tests/checkasm/vf_convolution.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,104 @@ | ||||
| /* | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation; either version 2 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License along | ||||
|  * with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||||
|  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  */ | ||||
|  | ||||
| #include <string.h> | ||||
| #include "checkasm.h" | ||||
| #include "libavfilter/avfilter.h" | ||||
| #include "libavfilter/convolution.h" | ||||
| #include "libavutil/intreadwrite.h" | ||||
| #include "libavutil/mem_internal.h" | ||||
|  | ||||
| #define WIDTH 512 | ||||
| #define HEIGHT 512 | ||||
| #define SRC_STRIDE 512 | ||||
| #define PIXELS (WIDTH * HEIGHT) | ||||
|  | ||||
| #define randomize_buffers(buf, size)      \ | ||||
|     do {                                  \ | ||||
|         int j;                            \ | ||||
|         uint8_t *tmp_buf = (uint8_t *)buf;\ | ||||
|         for (j = 0; j< size; j++)         \ | ||||
|             tmp_buf[j] = rnd() & 0xFF;    \ | ||||
|     } while (0) | ||||
|  | ||||
| static void check_sobel(const char * report_name) | ||||
| { | ||||
|     LOCAL_ALIGNED_32(uint8_t, src,     [PIXELS]); | ||||
|     LOCAL_ALIGNED_32(uint8_t, dst_ref, [PIXELS]); | ||||
|     LOCAL_ALIGNED_32(uint8_t, dst_new, [PIXELS]); | ||||
|     const int height = WIDTH; | ||||
|     const int width  = HEIGHT; | ||||
|     const int stride = SRC_STRIDE; | ||||
|     const int dstride = SRC_STRIDE; | ||||
|     int mode = 0; | ||||
|     const uint8_t *c[49]; | ||||
|     const int radius = 1; | ||||
|     const int bpc = 1; | ||||
|     const int step = mode == MATRIX_COLUMN ? 16 : 1; | ||||
|     const int slice_start = 0; | ||||
|     const int slice_end = height; | ||||
|     int y; | ||||
|     const int sizew = mode == MATRIX_COLUMN ? height : width; | ||||
|     float scale = 2; | ||||
|     float delta = 10; | ||||
|  | ||||
|     ConvolutionContext s; | ||||
|  | ||||
|     declare_func(void, uint8_t *dst, int width, float scale, float delta, const int *const matrix, | ||||
|                  const uint8_t *c[], int peak, int radius, int dstride, int stride, int size); | ||||
|  | ||||
|     s.scale = scale; | ||||
|     s.delta = delta; | ||||
|     s.depth = 8; | ||||
|     s.nb_planes = 3; | ||||
|     s.planes = 15; | ||||
|     ff_sobel_init(&s, s.depth, s.nb_planes); | ||||
|  | ||||
|     memset(dst_ref, 0, PIXELS); | ||||
|     memset(dst_new, 0, PIXELS); | ||||
|     randomize_buffers(src, PIXELS); | ||||
|  | ||||
|     if (check_func(s.filter[0], "%s", report_name)) { | ||||
|         for (y = slice_start; y < slice_end; y += step) { | ||||
|             const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : radius * bpc; | ||||
|             const int yoff = mode == MATRIX_COLUMN ? radius * dstride : 0; | ||||
|  | ||||
|             s.setup[0](radius, c, src, stride, radius, width, y, height, bpc); | ||||
|             call_ref(dst_ref + yoff + xoff, sizew - 2 * radius, | ||||
|                      scale, delta, NULL, c, 0, radius, | ||||
|                      dstride, stride, slice_end - step); | ||||
|             call_new(dst_new + yoff + xoff, sizew - 2 * radius, | ||||
|                      scale, delta, NULL, c, 0, radius, | ||||
|                      dstride, stride, slice_end - step); | ||||
|             if (memcmp(dst_ref + yoff + xoff, dst_new + yoff + xoff, slice_end - step)) | ||||
|                 fail(); | ||||
|             bench_new(dst_new + yoff + xoff, sizew - 2 * radius, | ||||
|                       scale, delta, NULL, c, 0, radius, | ||||
|                       dstride, stride, slice_end - step); | ||||
|             if (mode != MATRIX_COLUMN) | ||||
|                 dst_ref += dstride; | ||||
|         } | ||||
|     } | ||||
|  | ||||
| } | ||||
|  | ||||
| void checkasm_check_vf_sobel(void) | ||||
| { | ||||
|     check_sobel("sobel"); | ||||
|     report("convolution:sobel"); | ||||
| } | ||||
| @@ -43,6 +43,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp                                  \ | ||||
|                 fate-checkasm-vf_hflip                                  \ | ||||
|                 fate-checkasm-vf_nlmeans                                \ | ||||
|                 fate-checkasm-vf_threshold                              \ | ||||
|                 fate-checkasm-vf_sobel                                  \ | ||||
|                 fate-checkasm-videodsp                                  \ | ||||
|                 fate-checkasm-vorbisdsp                                 \ | ||||
|                 fate-checkasm-vp8dsp                                    \ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user