mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-04-19 09:02:26 +02:00
avfilter/vf_idet: MMX/MMXEXT/SSE2 implementation of idet's filter_line()
integration by Neil Birkbeck, with help from Vitor Sessak. core SSE2 loop by Skal (pascal.massimino@gmail.com) Reviewed-by: Clément Bœsch <u@pkh.me> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
53b0892005
commit
406a9ccffe
@ -353,6 +353,7 @@ Filters:
|
|||||||
vf_extractplanes.c Paul B Mahol
|
vf_extractplanes.c Paul B Mahol
|
||||||
vf_histogram.c Paul B Mahol
|
vf_histogram.c Paul B Mahol
|
||||||
vf_hqx.c Clément Bœsch
|
vf_hqx.c Clément Bœsch
|
||||||
|
vf_idec.c Pascal Massimino
|
||||||
vf_il.c Paul B Mahol
|
vf_il.c Paul B Mahol
|
||||||
vf_lenscorrection.c Daniel Oberhoff
|
vf_lenscorrection.c Daniel Oberhoff
|
||||||
vf_mergeplanes.c Paul B Mahol
|
vf_mergeplanes.c Paul B Mahol
|
||||||
|
@ -23,37 +23,8 @@
|
|||||||
#include "libavutil/cpu.h"
|
#include "libavutil/cpu.h"
|
||||||
#include "libavutil/common.h"
|
#include "libavutil/common.h"
|
||||||
#include "libavutil/opt.h"
|
#include "libavutil/opt.h"
|
||||||
#include "libavutil/pixdesc.h"
|
|
||||||
#include "avfilter.h"
|
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
#include "vf_idet.h"
|
||||||
#define HIST_SIZE 4
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
TFF,
|
|
||||||
BFF,
|
|
||||||
PROGRSSIVE,
|
|
||||||
UNDETERMINED,
|
|
||||||
} Type;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
const AVClass *class;
|
|
||||||
float interlace_threshold;
|
|
||||||
float progressive_threshold;
|
|
||||||
|
|
||||||
Type last_type;
|
|
||||||
int prestat[4];
|
|
||||||
int poststat[4];
|
|
||||||
|
|
||||||
uint8_t history[HIST_SIZE];
|
|
||||||
|
|
||||||
AVFrame *cur;
|
|
||||||
AVFrame *next;
|
|
||||||
AVFrame *prev;
|
|
||||||
int (*filter_line)(const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w);
|
|
||||||
|
|
||||||
const AVPixFmtDescriptor *csp;
|
|
||||||
} IDETContext;
|
|
||||||
|
|
||||||
#define OFFSET(x) offsetof(IDETContext, x)
|
#define OFFSET(x) offsetof(IDETContext, x)
|
||||||
#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
|
#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
|
||||||
@ -77,7 +48,7 @@ static const char *type2str(Type type)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w)
|
int ff_idet_filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w)
|
||||||
{
|
{
|
||||||
int x;
|
int x;
|
||||||
int ret=0;
|
int ret=0;
|
||||||
@ -271,7 +242,10 @@ static av_cold int init(AVFilterContext *ctx)
|
|||||||
idet->last_type = UNDETERMINED;
|
idet->last_type = UNDETERMINED;
|
||||||
memset(idet->history, UNDETERMINED, HIST_SIZE);
|
memset(idet->history, UNDETERMINED, HIST_SIZE);
|
||||||
|
|
||||||
idet->filter_line = filter_line_c;
|
idet->filter_line = ff_idet_filter_line_c;
|
||||||
|
|
||||||
|
if (ARCH_X86)
|
||||||
|
ff_idet_init_x86(idet);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
58
libavfilter/vf_idet.h
Normal file
58
libavfilter/vf_idet.h
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along
|
||||||
|
* with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVFILTER_IDET_H
|
||||||
|
#define AVFILTER_IDET_H
|
||||||
|
|
||||||
|
#include "libavutil/pixdesc.h"
|
||||||
|
#include "avfilter.h"
|
||||||
|
|
||||||
|
#define HIST_SIZE 4
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
TFF,
|
||||||
|
BFF,
|
||||||
|
PROGRSSIVE,
|
||||||
|
UNDETERMINED,
|
||||||
|
} Type;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const AVClass *class;
|
||||||
|
float interlace_threshold;
|
||||||
|
float progressive_threshold;
|
||||||
|
|
||||||
|
Type last_type;
|
||||||
|
int prestat[4];
|
||||||
|
int poststat[4];
|
||||||
|
|
||||||
|
uint8_t history[HIST_SIZE];
|
||||||
|
|
||||||
|
AVFrame *cur;
|
||||||
|
AVFrame *next;
|
||||||
|
AVFrame *prev;
|
||||||
|
int (*filter_line)(const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w);
|
||||||
|
|
||||||
|
const AVPixFmtDescriptor *csp;
|
||||||
|
} IDETContext;
|
||||||
|
|
||||||
|
void ff_idet_init_x86(IDETContext *idet);
|
||||||
|
|
||||||
|
/* main fall-back for left-over */
|
||||||
|
int ff_idet_filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w);
|
||||||
|
|
||||||
|
#endif
|
@ -1,5 +1,6 @@
|
|||||||
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o
|
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o
|
||||||
OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
|
OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
|
||||||
|
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
|
||||||
OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o
|
OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o
|
||||||
OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o
|
OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o
|
||||||
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
|
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
|
||||||
@ -7,6 +8,7 @@ OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
|
|||||||
|
|
||||||
YASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
|
YASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
|
||||||
YASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
|
YASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
|
||||||
|
YASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o
|
||||||
YASM-OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup.o
|
YASM-OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup.o
|
||||||
YASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o
|
YASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o
|
||||||
YASM-OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o
|
YASM-OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o
|
||||||
|
114
libavfilter/x86/vf_idet.asm
Normal file
114
libavfilter/x86/vf_idet.asm
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
; *****************************************************************************
|
||||||
|
; * x86-optimized functions for idet filter
|
||||||
|
; *
|
||||||
|
; * This file is part of FFmpeg.
|
||||||
|
; *
|
||||||
|
; * FFmpeg is free software; you can redistribute it and/or modify
|
||||||
|
; * it under the terms of the GNU General Public License as published by
|
||||||
|
; * the Free Software Foundation; either version 2 of the License, or
|
||||||
|
; * (at your option) any later version.
|
||||||
|
; *
|
||||||
|
; * FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
; * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
; * GNU General Public License for more details.
|
||||||
|
; *
|
||||||
|
; * You should have received a copy of the GNU General Public License along
|
||||||
|
; * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
; * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
; ******************************************************************************
|
||||||
|
|
||||||
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
|
SECTION_TEXT
|
||||||
|
|
||||||
|
%if ARCH_X86_32
|
||||||
|
|
||||||
|
; Implementation that does 8-bytes at a time using single-word operations.
|
||||||
|
%macro IDET_FILTER_LINE 1
|
||||||
|
INIT_MMX %1
|
||||||
|
cglobal idet_filter_line, 4, 5, 0, a, b, c, width, index
|
||||||
|
xor indexq, indexq
|
||||||
|
%define m_zero m2
|
||||||
|
%define m_sum m5
|
||||||
|
pxor m_sum, m_sum
|
||||||
|
pxor m_zero, m_zero
|
||||||
|
|
||||||
|
.loop:
|
||||||
|
movu m0, [aq + indexq*1]
|
||||||
|
punpckhbw m1, m0, m_zero
|
||||||
|
punpcklbw m0, m_zero
|
||||||
|
|
||||||
|
movu m3, [cq + indexq*1]
|
||||||
|
punpckhbw m4, m3, m_zero
|
||||||
|
punpcklbw m3, m_zero
|
||||||
|
|
||||||
|
paddsw m1, m4
|
||||||
|
paddsw m0, m3
|
||||||
|
|
||||||
|
movu m3, [bq + indexq*1]
|
||||||
|
punpckhbw m4, m3, m_zero
|
||||||
|
punpcklbw m3, m_zero
|
||||||
|
|
||||||
|
paddw m4, m4
|
||||||
|
paddw m3, m3
|
||||||
|
psubsw m1, m4
|
||||||
|
psubsw m0, m3
|
||||||
|
|
||||||
|
ABS2 m1, m0, m4, m3
|
||||||
|
|
||||||
|
paddw m0, m1
|
||||||
|
punpckhwd m1, m0, m_zero
|
||||||
|
punpcklwd m0, m_zero
|
||||||
|
|
||||||
|
paddd m0, m1
|
||||||
|
paddd m_sum, m0
|
||||||
|
|
||||||
|
add indexq, 0x8
|
||||||
|
CMP widthd, indexd
|
||||||
|
jg .loop
|
||||||
|
|
||||||
|
mova m0, m_sum
|
||||||
|
psrlq m_sum, 0x20
|
||||||
|
paddq m0, m_sum
|
||||||
|
movd eax, m0
|
||||||
|
RET
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
IDET_FILTER_LINE mmxext
|
||||||
|
IDET_FILTER_LINE mmx
|
||||||
|
%endif
|
||||||
|
|
||||||
|
; SSE2 8-bit implementation that does 16-bytes at a time:
|
||||||
|
INIT_XMM sse2
|
||||||
|
cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total
|
||||||
|
xor indexq, indexq
|
||||||
|
pxor m0, m0
|
||||||
|
pxor m1, m1
|
||||||
|
|
||||||
|
.sse2_loop:
|
||||||
|
movu m2, [bq + indexq*1] ; B
|
||||||
|
movu m3, [aq + indexq*1] ; A
|
||||||
|
mova m6, m2
|
||||||
|
mova m4, m3
|
||||||
|
psubusb m5, m2, m3 ; ba
|
||||||
|
|
||||||
|
movu m3, [cq + indexq*1] ; C
|
||||||
|
add indexq, 0x10
|
||||||
|
psubusb m4, m2 ; ab
|
||||||
|
CMP indexd, widthd
|
||||||
|
|
||||||
|
psubusb m6, m3 ; bc
|
||||||
|
psubusb m3, m2 ; cb
|
||||||
|
|
||||||
|
psadbw m4, m6 ; |ab - bc|
|
||||||
|
paddq m0, m4
|
||||||
|
psadbw m5, m3 ; |ba - cb|
|
||||||
|
paddq m1, m5
|
||||||
|
jl .sse2_loop
|
||||||
|
|
||||||
|
paddq m0, m1
|
||||||
|
movhlps m1, m0
|
||||||
|
paddq m0, m1
|
||||||
|
movd eax, m0
|
||||||
|
RET
|
70
libavfilter/x86/vf_idet_init.c
Normal file
70
libavfilter/x86/vf_idet_init.c
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along
|
||||||
|
* with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/mem.h"
|
||||||
|
#include "libavutil/x86/asm.h"
|
||||||
|
#include "libavutil/x86/cpu.h"
|
||||||
|
#include "libavfilter/vf_idet.h"
|
||||||
|
|
||||||
|
/* declares main callable idet_filter_line_{mmx,mmxext,sse2}() */
|
||||||
|
#define FUNC_MAIN_DECL(KIND, SPAN) \
|
||||||
|
int ff_idet_filter_line_##KIND(const uint8_t *a, const uint8_t *b, \
|
||||||
|
const uint8_t *c, int w); \
|
||||||
|
static int idet_filter_line_##KIND(const uint8_t *a, const uint8_t *b, \
|
||||||
|
const uint8_t *c, int w) { \
|
||||||
|
int sum = 0; \
|
||||||
|
const int left_over = w & (SPAN - 1); \
|
||||||
|
w -= left_over; \
|
||||||
|
if (w > 0) \
|
||||||
|
sum += ff_idet_filter_line_##KIND(a, b, c, w); \
|
||||||
|
if (left_over > 0) \
|
||||||
|
sum += ff_idet_filter_line_c(a + w, b + w, c + w, left_over); \
|
||||||
|
return sum; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#if HAVE_YASM
|
||||||
|
|
||||||
|
FUNC_MAIN_DECL(sse2, 16)
|
||||||
|
#if ARCH_X86_32
|
||||||
|
FUNC_MAIN_DECL(mmx, 8)
|
||||||
|
FUNC_MAIN_DECL(mmxext, 8)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
av_cold void ff_idet_init_x86(IDETContext *idet)
|
||||||
|
{
|
||||||
|
#if HAVE_YASM
|
||||||
|
const int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
#if ARCH_X86_32
|
||||||
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
|
idet->filter_line = idet_filter_line_mmx;
|
||||||
|
}
|
||||||
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
|
idet->filter_line = idet_filter_line_mmxext;
|
||||||
|
}
|
||||||
|
#endif // ARCH_x86_32
|
||||||
|
|
||||||
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
|
idet->filter_line = idet_filter_line_sse2;
|
||||||
|
}
|
||||||
|
#endif // HAVE_YASM
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user