mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
avfilter/vf_maskedclamp: add x86 SIMD
This commit is contained in:
parent
11cfff04ed
commit
ac0f5f4c17
35
libavfilter/maskedclamp.h
Normal file
35
libavfilter/maskedclamp.h
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2019 Paul B Mahol
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVFILTER_MASKEDCLAMP_H
|
||||||
|
#define AVFILTER_MASKEDCLAMP_H
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
typedef struct MaskedClampDSPContext {
|
||||||
|
void (*maskedclamp)(const uint8_t *bsrc, uint8_t *dst,
|
||||||
|
const uint8_t *darksrc, const uint8_t *brightsrc,
|
||||||
|
int w, int undershoot, int overshoot);
|
||||||
|
} MaskedClampDSPContext;
|
||||||
|
|
||||||
|
void ff_maskedclamp_init_x86(MaskedClampDSPContext *dsp, int depth);
|
||||||
|
|
||||||
|
#endif /* AVFILTER_MASKEDCLAMP_H */
|
@ -26,6 +26,7 @@
|
|||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
#include "video.h"
|
#include "video.h"
|
||||||
#include "framesync.h"
|
#include "framesync.h"
|
||||||
|
#include "maskedclamp.h"
|
||||||
|
|
||||||
#define OFFSET(x) offsetof(MaskedClampContext, x)
|
#define OFFSET(x) offsetof(MaskedClampContext, x)
|
||||||
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
|
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
|
||||||
@ -47,9 +48,7 @@ typedef struct MaskedClampContext {
|
|||||||
int depth;
|
int depth;
|
||||||
FFFrameSync fs;
|
FFFrameSync fs;
|
||||||
|
|
||||||
void (*maskedclamp)(const uint8_t *bsrc, uint8_t *dst,
|
MaskedClampDSPContext dsp;
|
||||||
const uint8_t *darksrc, const uint8_t *brightsrc,
|
|
||||||
int w, int undershoot, int overshoot);
|
|
||||||
} MaskedClampContext;
|
} MaskedClampContext;
|
||||||
|
|
||||||
static const AVOption maskedclamp_options[] = {
|
static const AVOption maskedclamp_options[] = {
|
||||||
@ -117,7 +116,7 @@ static int maskedclamp_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (y = slice_start; y < slice_end; y++) {
|
for (y = slice_start; y < slice_end; y++) {
|
||||||
s->maskedclamp(bsrc, dst, darksrc, brightsrc, w, undershoot, overshoot);
|
s->dsp.maskedclamp(bsrc, dst, darksrc, brightsrc, w, undershoot, overshoot);
|
||||||
|
|
||||||
dst += dlinesize;
|
dst += dlinesize;
|
||||||
bsrc += blinesize;
|
bsrc += blinesize;
|
||||||
@ -210,11 +209,16 @@ static int config_input(AVFilterLink *inlink)
|
|||||||
s->width[0] = s->width[3] = inlink->w;
|
s->width[0] = s->width[3] = inlink->w;
|
||||||
|
|
||||||
s->depth = desc->comp[0].depth;
|
s->depth = desc->comp[0].depth;
|
||||||
|
s->undershoot = FFMIN(s->undershoot, (1 << s->depth) - 1);
|
||||||
|
s->overshoot = FFMIN(s->overshoot, (1 << s->depth) - 1);
|
||||||
|
|
||||||
if (desc->comp[0].depth == 8)
|
if (s->depth <= 8)
|
||||||
s->maskedclamp = maskedclamp8;
|
s->dsp.maskedclamp = maskedclamp8;
|
||||||
else
|
else
|
||||||
s->maskedclamp = maskedclamp16;
|
s->dsp.maskedclamp = maskedclamp16;
|
||||||
|
|
||||||
|
if (ARCH_X86)
|
||||||
|
ff_maskedclamp_init_x86(&s->dsp, s->depth);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,7 @@ OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
|
|||||||
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
|
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
|
||||||
OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_tinterlace_init.o
|
OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_tinterlace_init.o
|
||||||
OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter_init.o
|
OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter_init.o
|
||||||
|
OBJS-$(CONFIG_MASKEDCLAMP_FILTER) += x86/vf_maskedclamp_init.o
|
||||||
OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge_init.o
|
OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge_init.o
|
||||||
OBJS-$(CONFIG_NOISE_FILTER) += x86/vf_noise.o
|
OBJS-$(CONFIG_NOISE_FILTER) += x86/vf_noise.o
|
||||||
OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay_init.o
|
OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay_init.o
|
||||||
@ -56,6 +57,7 @@ X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
|
|||||||
X86ASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o
|
X86ASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o
|
||||||
X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o
|
X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o
|
||||||
X86ASM-OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter.o
|
X86ASM-OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter.o
|
||||||
|
X86ASM-OBJS-$(CONFIG_MASKEDCLAMP_FILTER) += x86/vf_maskedclamp.o
|
||||||
X86ASM-OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge.o
|
X86ASM-OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge.o
|
||||||
X86ASM-OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay.o
|
X86ASM-OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay.o
|
||||||
X86ASM-OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7.o
|
X86ASM-OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7.o
|
||||||
|
95
libavfilter/x86/vf_maskedclamp.asm
Normal file
95
libavfilter/x86/vf_maskedclamp.asm
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
;*****************************************************************************
|
||||||
|
;* x86-optimized functions for maskedclamp filter
|
||||||
|
;*
|
||||||
|
;* Copyright (c) 2019 Paul B Mahol
|
||||||
|
;*
|
||||||
|
;* This file is part of FFmpeg.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
;* modify it under the terms of the GNU Lesser General Public
|
||||||
|
;* License as published by the Free Software Foundation; either
|
||||||
|
;* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;* Lesser General Public License for more details.
|
||||||
|
;*
|
||||||
|
;* You should have received a copy of the GNU Lesser General Public
|
||||||
|
;* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;******************************************************************************
|
||||||
|
|
||||||
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
|
SECTION .text
|
||||||
|
|
||||||
|
;------------------------------------------------------------------------------
|
||||||
|
; void ff_maskedclamp(const uint8_t *src, uint8_t *dst,
|
||||||
|
; const uint8_t *darksrc,
|
||||||
|
; const uint8_t *brightsrc,
|
||||||
|
; int w, int undershoot, int overshoot)
|
||||||
|
;------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
INIT_XMM sse2
|
||||||
|
cglobal maskedclamp8, 5,5,5, src, dst, dark, bright, w, undershoot, overshoot
|
||||||
|
movsxdifnidn wq, wd
|
||||||
|
|
||||||
|
add srcq, wq
|
||||||
|
add darkq, wq
|
||||||
|
add brightq, wq
|
||||||
|
add dstq, wq
|
||||||
|
neg wq
|
||||||
|
|
||||||
|
movd m3, r5m
|
||||||
|
punpcklbw m3, m3
|
||||||
|
SPLATW m3, m3
|
||||||
|
|
||||||
|
movd m4, r6m
|
||||||
|
punpcklbw m4, m4
|
||||||
|
SPLATW m4, m4
|
||||||
|
|
||||||
|
.loop:
|
||||||
|
movu m0, [srcq + wq]
|
||||||
|
movu m1, [darkq + wq]
|
||||||
|
movu m2, [brightq + wq]
|
||||||
|
|
||||||
|
psubusb m1, m3
|
||||||
|
paddusb m2, m4
|
||||||
|
CLIPUB m0, m1, m2
|
||||||
|
mova [dstq + wq], m0
|
||||||
|
|
||||||
|
add wq, mmsize
|
||||||
|
jl .loop
|
||||||
|
RET
|
||||||
|
|
||||||
|
INIT_XMM sse4
|
||||||
|
cglobal maskedclamp16, 5,5,5, src, dst, dark, bright, w, undershoot, overshoot
|
||||||
|
shl wd, 1
|
||||||
|
|
||||||
|
add srcq, wq
|
||||||
|
add darkq, wq
|
||||||
|
add brightq, wq
|
||||||
|
add dstq, wq
|
||||||
|
neg wq
|
||||||
|
|
||||||
|
movd m3, r5m
|
||||||
|
SPLATW m3, m3
|
||||||
|
|
||||||
|
movd m4, r6m
|
||||||
|
SPLATW m4, m4
|
||||||
|
|
||||||
|
.loop:
|
||||||
|
movu m0, [srcq + wq]
|
||||||
|
movu m1, [darkq + wq]
|
||||||
|
movu m2, [brightq + wq]
|
||||||
|
|
||||||
|
psubusw m1, m3
|
||||||
|
paddusw m2, m4
|
||||||
|
pmaxuw m0, m1
|
||||||
|
pminuw m0, m2
|
||||||
|
mova [dstq + wq], m0
|
||||||
|
|
||||||
|
add wq, mmsize
|
||||||
|
jl .loop
|
||||||
|
RET
|
47
libavfilter/x86/vf_maskedclamp_init.c
Normal file
47
libavfilter/x86/vf_maskedclamp_init.c
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2019 Paul B Mahol
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/mem.h"
|
||||||
|
#include "libavutil/x86/asm.h"
|
||||||
|
#include "libavutil/x86/cpu.h"
|
||||||
|
#include "libavfilter/maskedclamp.h"
|
||||||
|
|
||||||
|
void ff_maskedclamp8_sse2(const uint8_t *bsrc, uint8_t *dst,
|
||||||
|
const uint8_t *darksrc, const uint8_t *brightsrc,
|
||||||
|
int w, int undershoot, int overshoot);
|
||||||
|
|
||||||
|
void ff_maskedclamp16_sse4(const uint8_t *bsrc, uint8_t *dst,
|
||||||
|
const uint8_t *darksrc, const uint8_t *brightsrc,
|
||||||
|
int w, int undershoot, int overshoot);
|
||||||
|
|
||||||
|
av_cold void ff_maskedclamp_init_x86(MaskedClampDSPContext *dsp, int depth)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (EXTERNAL_SSE2(cpu_flags) && depth <= 8) {
|
||||||
|
dsp->maskedclamp = ff_maskedclamp8_sse2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (EXTERNAL_SSE4(cpu_flags) && depth > 8) {
|
||||||
|
dsp->maskedclamp = ff_maskedclamp16_sse4;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user