mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
avfilter/vf_maskedclamp: add x86 SIMD
This commit is contained in:
parent
11cfff04ed
commit
ac0f5f4c17
35
libavfilter/maskedclamp.h
Normal file
35
libavfilter/maskedclamp.h
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2019 Paul B Mahol
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVFILTER_MASKEDCLAMP_H
|
||||
#define AVFILTER_MASKEDCLAMP_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct MaskedClampDSPContext {
|
||||
void (*maskedclamp)(const uint8_t *bsrc, uint8_t *dst,
|
||||
const uint8_t *darksrc, const uint8_t *brightsrc,
|
||||
int w, int undershoot, int overshoot);
|
||||
} MaskedClampDSPContext;
|
||||
|
||||
void ff_maskedclamp_init_x86(MaskedClampDSPContext *dsp, int depth);
|
||||
|
||||
#endif /* AVFILTER_MASKEDCLAMP_H */
|
@ -26,6 +26,7 @@
|
||||
#include "internal.h"
|
||||
#include "video.h"
|
||||
#include "framesync.h"
|
||||
#include "maskedclamp.h"
|
||||
|
||||
#define OFFSET(x) offsetof(MaskedClampContext, x)
|
||||
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
|
||||
@ -47,9 +48,7 @@ typedef struct MaskedClampContext {
|
||||
int depth;
|
||||
FFFrameSync fs;
|
||||
|
||||
void (*maskedclamp)(const uint8_t *bsrc, uint8_t *dst,
|
||||
const uint8_t *darksrc, const uint8_t *brightsrc,
|
||||
int w, int undershoot, int overshoot);
|
||||
MaskedClampDSPContext dsp;
|
||||
} MaskedClampContext;
|
||||
|
||||
static const AVOption maskedclamp_options[] = {
|
||||
@ -117,7 +116,7 @@ static int maskedclamp_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_
|
||||
}
|
||||
|
||||
for (y = slice_start; y < slice_end; y++) {
|
||||
s->maskedclamp(bsrc, dst, darksrc, brightsrc, w, undershoot, overshoot);
|
||||
s->dsp.maskedclamp(bsrc, dst, darksrc, brightsrc, w, undershoot, overshoot);
|
||||
|
||||
dst += dlinesize;
|
||||
bsrc += blinesize;
|
||||
@ -210,11 +209,16 @@ static int config_input(AVFilterLink *inlink)
|
||||
s->width[0] = s->width[3] = inlink->w;
|
||||
|
||||
s->depth = desc->comp[0].depth;
|
||||
s->undershoot = FFMIN(s->undershoot, (1 << s->depth) - 1);
|
||||
s->overshoot = FFMIN(s->overshoot, (1 << s->depth) - 1);
|
||||
|
||||
if (desc->comp[0].depth == 8)
|
||||
s->maskedclamp = maskedclamp8;
|
||||
if (s->depth <= 8)
|
||||
s->dsp.maskedclamp = maskedclamp8;
|
||||
else
|
||||
s->maskedclamp = maskedclamp16;
|
||||
s->dsp.maskedclamp = maskedclamp16;
|
||||
|
||||
if (ARCH_X86)
|
||||
ff_maskedclamp_init_x86(&s->dsp, s->depth);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -17,6 +17,7 @@ OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
|
||||
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
|
||||
OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_tinterlace_init.o
|
||||
OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter_init.o
|
||||
OBJS-$(CONFIG_MASKEDCLAMP_FILTER) += x86/vf_maskedclamp_init.o
|
||||
OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge_init.o
|
||||
OBJS-$(CONFIG_NOISE_FILTER) += x86/vf_noise.o
|
||||
OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay_init.o
|
||||
@ -56,6 +57,7 @@ X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
|
||||
X86ASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o
|
||||
X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o
|
||||
X86ASM-OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter.o
|
||||
X86ASM-OBJS-$(CONFIG_MASKEDCLAMP_FILTER) += x86/vf_maskedclamp.o
|
||||
X86ASM-OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge.o
|
||||
X86ASM-OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay.o
|
||||
X86ASM-OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7.o
|
||||
|
95
libavfilter/x86/vf_maskedclamp.asm
Normal file
95
libavfilter/x86/vf_maskedclamp.asm
Normal file
@ -0,0 +1,95 @@
|
||||
;*****************************************************************************
|
||||
;* x86-optimized functions for maskedclamp filter
|
||||
;*
|
||||
;* Copyright (c) 2019 Paul B Mahol
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_maskedclamp(const uint8_t *src, uint8_t *dst,
|
||||
; const uint8_t *darksrc,
|
||||
; const uint8_t *brightsrc,
|
||||
; int w, int undershoot, int overshoot)
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
INIT_XMM sse2
|
||||
cglobal maskedclamp8, 5,5,5, src, dst, dark, bright, w, undershoot, overshoot
|
||||
movsxdifnidn wq, wd
|
||||
|
||||
add srcq, wq
|
||||
add darkq, wq
|
||||
add brightq, wq
|
||||
add dstq, wq
|
||||
neg wq
|
||||
|
||||
movd m3, r5m
|
||||
punpcklbw m3, m3
|
||||
SPLATW m3, m3
|
||||
|
||||
movd m4, r6m
|
||||
punpcklbw m4, m4
|
||||
SPLATW m4, m4
|
||||
|
||||
.loop:
|
||||
movu m0, [srcq + wq]
|
||||
movu m1, [darkq + wq]
|
||||
movu m2, [brightq + wq]
|
||||
|
||||
psubusb m1, m3
|
||||
paddusb m2, m4
|
||||
CLIPUB m0, m1, m2
|
||||
mova [dstq + wq], m0
|
||||
|
||||
add wq, mmsize
|
||||
jl .loop
|
||||
RET
|
||||
|
||||
INIT_XMM sse4
|
||||
cglobal maskedclamp16, 5,5,5, src, dst, dark, bright, w, undershoot, overshoot
|
||||
shl wd, 1
|
||||
|
||||
add srcq, wq
|
||||
add darkq, wq
|
||||
add brightq, wq
|
||||
add dstq, wq
|
||||
neg wq
|
||||
|
||||
movd m3, r5m
|
||||
SPLATW m3, m3
|
||||
|
||||
movd m4, r6m
|
||||
SPLATW m4, m4
|
||||
|
||||
.loop:
|
||||
movu m0, [srcq + wq]
|
||||
movu m1, [darkq + wq]
|
||||
movu m2, [brightq + wq]
|
||||
|
||||
psubusw m1, m3
|
||||
paddusw m2, m4
|
||||
pmaxuw m0, m1
|
||||
pminuw m0, m2
|
||||
mova [dstq + wq], m0
|
||||
|
||||
add wq, mmsize
|
||||
jl .loop
|
||||
RET
|
47
libavfilter/x86/vf_maskedclamp_init.c
Normal file
47
libavfilter/x86/vf_maskedclamp_init.c
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (c) 2019 Paul B Mahol
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/mem.h"
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavfilter/maskedclamp.h"
|
||||
|
||||
void ff_maskedclamp8_sse2(const uint8_t *bsrc, uint8_t *dst,
|
||||
const uint8_t *darksrc, const uint8_t *brightsrc,
|
||||
int w, int undershoot, int overshoot);
|
||||
|
||||
void ff_maskedclamp16_sse4(const uint8_t *bsrc, uint8_t *dst,
|
||||
const uint8_t *darksrc, const uint8_t *brightsrc,
|
||||
int w, int undershoot, int overshoot);
|
||||
|
||||
av_cold void ff_maskedclamp_init_x86(MaskedClampDSPContext *dsp, int depth)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags) && depth <= 8) {
|
||||
dsp->maskedclamp = ff_maskedclamp8_sse2;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE4(cpu_flags) && depth > 8) {
|
||||
dsp->maskedclamp = ff_maskedclamp16_sse4;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user