You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-11-23 21:54:53 +02:00
avfilter/vf_lut3d: add x86-optimized tetrahedral interpolation
I spotted an interesting pattern that I didn't see before that leads to the implementation being faster. The bit shifting table I was using before is no longer needed, and was able to remove quite a few lines. I also add use of FMA on the AVX2 version. f32 1920x1080 1 thread with prelut c impl 1434012700 UNITS in lut3d->interp, 1 runs, 0 skips 1434035335 UNITS in lut3d->interp, 2 runs, 0 skips 1423615347 UNITS in lut3d->interp, 4 runs, 0 skips 1426268863 UNITS in lut3d->interp, 8 runs, 0 skips sse2 905484420 UNITS in lut3d->interp, 1 runs, 0 skips 905659010 UNITS in lut3d->interp, 2 runs, 0 skips 915167140 UNITS in lut3d->interp, 4 runs, 0 skips 915834222 UNITS in lut3d->interp, 8 runs, 0 skips avx 574794860 UNITS in lut3d->interp, 1 runs, 0 skips 581035090 UNITS in lut3d->interp, 2 runs, 0 skips 584116720 UNITS in lut3d->interp, 4 runs, 0 skips 581460290 UNITS in lut3d->interp, 8 runs, 0 skips avx2 301698880 UNITS in lut3d->interp, 1 runs, 0 skips 301982880 UNITS in lut3d->interp, 2 runs, 0 skips 306962430 UNITS in lut3d->interp, 4 runs, 0 skips 305472025 UNITS in lut3d->interp, 8 runs, 0 skips gbrap16 1920x1080 1 thread with prelut c impl 1480894840 UNITS in lut3d->interp, 1 runs, 0 skips 1502922990 UNITS in lut3d->interp, 2 runs, 0 skips 1496114307 UNITS in lut3d->interp, 4 runs, 0 skips 1492554551 UNITS in lut3d->interp, 8 runs, 0 skips sse2 980777180 UNITS in lut3d->interp, 1 runs, 0 skips 986121520 UNITS in lut3d->interp, 2 runs, 0 skips 986489840 UNITS in lut3d->interp, 4 runs, 0 skips 998832248 UNITS in lut3d->interp, 8 runs, 0 skips avx 622212360 UNITS in lut3d->interp, 1 runs, 0 skips 622981160 UNITS in lut3d->interp, 2 runs, 0 skips 645396315 UNITS in lut3d->interp, 4 runs, 0 skips 641057075 UNITS in lut3d->interp, 8 runs, 0 skips avx2 321336400 UNITS in lut3d->interp, 1 runs, 0 skips 321268920 UNITS in lut3d->interp, 2 runs, 0 skips 323459895 UNITS in lut3d->interp, 4 runs, 0 skips 324949967 UNITS in lut3d->interp, 8 runs, 0 skips
This commit is contained in:
83
libavfilter/lut3d.h
Normal file
83
libavfilter/lut3d.h
Normal file
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Clément Bœsch
|
||||
* Copyright (c) 2018 Paul B Mahol
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
#ifndef AVFILTER_LUT3D_H
|
||||
#define AVFILTER_LUT3D_H
|
||||
|
||||
#include "libavutil/pixdesc.h"
|
||||
#include "framesync.h"
|
||||
#include "avfilter.h"
|
||||
|
||||
enum interp_mode {
|
||||
INTERPOLATE_NEAREST,
|
||||
INTERPOLATE_TRILINEAR,
|
||||
INTERPOLATE_TETRAHEDRAL,
|
||||
INTERPOLATE_PYRAMID,
|
||||
INTERPOLATE_PRISM,
|
||||
NB_INTERP_MODE
|
||||
};
|
||||
|
||||
struct rgbvec {
|
||||
float r, g, b;
|
||||
};
|
||||
|
||||
/* 3D LUT don't often go up to level 32, but it is common to have a Hald CLUT
|
||||
* of 512x512 (64x64x64) */
|
||||
#define MAX_LEVEL 256
|
||||
#define PRELUT_SIZE 65536
|
||||
|
||||
typedef struct Lut3DPreLut {
|
||||
int size;
|
||||
float min[3];
|
||||
float max[3];
|
||||
float scale[3];
|
||||
float* lut[3];
|
||||
} Lut3DPreLut;
|
||||
|
||||
typedef struct LUT3DContext {
|
||||
const AVClass *class;
|
||||
struct rgbvec *lut;
|
||||
int lutsize;
|
||||
int lutsize2;
|
||||
struct rgbvec scale;
|
||||
int interpolation; ///<interp_mode
|
||||
char *file;
|
||||
uint8_t rgba_map[4];
|
||||
int step;
|
||||
avfilter_action_func *interp;
|
||||
Lut3DPreLut prelut;
|
||||
#if CONFIG_HALDCLUT_FILTER
|
||||
uint8_t clut_rgba_map[4];
|
||||
int clut_step;
|
||||
int clut_bits;
|
||||
int clut_planar;
|
||||
int clut_float;
|
||||
int clut_width;
|
||||
FFFrameSync fs;
|
||||
#endif
|
||||
} LUT3DContext;
|
||||
|
||||
typedef struct ThreadData {
|
||||
AVFrame *in, *out;
|
||||
} ThreadData;
|
||||
|
||||
void ff_lut3d_init_x86(LUT3DContext *s, const AVPixFmtDescriptor *desc);
|
||||
|
||||
#endif /* AVFILTER_LUT3D_H */
|
||||
Reference in New Issue
Block a user