mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
avfilter/vf_overlay: add x86 SIMD
Specifically for yuv444, yuv422, yuv420 format when main stream has no alpha, and alpha is straight. Signed-off-by: Paul B Mahol <onemda@gmail.com>
This commit is contained in:
parent
a150b2e3a0
commit
6d7c63588c
@ -39,6 +39,7 @@
|
|||||||
#include "drawutils.h"
|
#include "drawutils.h"
|
||||||
#include "framesync.h"
|
#include "framesync.h"
|
||||||
#include "video.h"
|
#include "video.h"
|
||||||
|
#include "vf_overlay.h"
|
||||||
|
|
||||||
typedef struct ThreadData {
|
typedef struct ThreadData {
|
||||||
AVFrame *dst, *src;
|
AVFrame *dst, *src;
|
||||||
@ -59,21 +60,6 @@ static const char *const var_names[] = {
|
|||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
enum var_name {
|
|
||||||
VAR_MAIN_W, VAR_MW,
|
|
||||||
VAR_MAIN_H, VAR_MH,
|
|
||||||
VAR_OVERLAY_W, VAR_OW,
|
|
||||||
VAR_OVERLAY_H, VAR_OH,
|
|
||||||
VAR_HSUB,
|
|
||||||
VAR_VSUB,
|
|
||||||
VAR_X,
|
|
||||||
VAR_Y,
|
|
||||||
VAR_N,
|
|
||||||
VAR_POS,
|
|
||||||
VAR_T,
|
|
||||||
VAR_VARS_NB
|
|
||||||
};
|
|
||||||
|
|
||||||
#define MAIN 0
|
#define MAIN 0
|
||||||
#define OVERLAY 1
|
#define OVERLAY 1
|
||||||
|
|
||||||
@ -92,45 +78,6 @@ enum EvalMode {
|
|||||||
EVAL_MODE_NB
|
EVAL_MODE_NB
|
||||||
};
|
};
|
||||||
|
|
||||||
enum OverlayFormat {
|
|
||||||
OVERLAY_FORMAT_YUV420,
|
|
||||||
OVERLAY_FORMAT_YUV422,
|
|
||||||
OVERLAY_FORMAT_YUV444,
|
|
||||||
OVERLAY_FORMAT_RGB,
|
|
||||||
OVERLAY_FORMAT_GBRP,
|
|
||||||
OVERLAY_FORMAT_AUTO,
|
|
||||||
OVERLAY_FORMAT_NB
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct OverlayContext {
|
|
||||||
const AVClass *class;
|
|
||||||
int x, y; ///< position of overlaid picture
|
|
||||||
|
|
||||||
uint8_t main_is_packed_rgb;
|
|
||||||
uint8_t main_rgba_map[4];
|
|
||||||
uint8_t main_has_alpha;
|
|
||||||
uint8_t overlay_is_packed_rgb;
|
|
||||||
uint8_t overlay_rgba_map[4];
|
|
||||||
uint8_t overlay_has_alpha;
|
|
||||||
int format; ///< OverlayFormat
|
|
||||||
int alpha_format;
|
|
||||||
int eval_mode; ///< EvalMode
|
|
||||||
|
|
||||||
FFFrameSync fs;
|
|
||||||
|
|
||||||
int main_pix_step[4]; ///< steps per pixel for each plane of the main output
|
|
||||||
int overlay_pix_step[4]; ///< steps per pixel for each plane of the overlay
|
|
||||||
int hsub, vsub; ///< chroma subsampling values
|
|
||||||
const AVPixFmtDescriptor *main_desc; ///< format descriptor for main input
|
|
||||||
|
|
||||||
double var_values[VAR_VARS_NB];
|
|
||||||
char *x_expr, *y_expr;
|
|
||||||
|
|
||||||
AVExpr *x_pexpr, *y_pexpr;
|
|
||||||
|
|
||||||
int (*blend_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
|
|
||||||
} OverlayContext;
|
|
||||||
|
|
||||||
static av_cold void uninit(AVFilterContext *ctx)
|
static av_cold void uninit(AVFilterContext *ctx)
|
||||||
{
|
{
|
||||||
OverlayContext *s = ctx->priv;
|
OverlayContext *s = ctx->priv;
|
||||||
@ -509,6 +456,7 @@ static av_always_inline void blend_plane(AVFilterContext *ctx,
|
|||||||
int jobnr,
|
int jobnr,
|
||||||
int nb_jobs)
|
int nb_jobs)
|
||||||
{
|
{
|
||||||
|
OverlayContext *octx = ctx->priv;
|
||||||
int src_wp = AV_CEIL_RSHIFT(src_w, hsub);
|
int src_wp = AV_CEIL_RSHIFT(src_w, hsub);
|
||||||
int src_hp = AV_CEIL_RSHIFT(src_h, vsub);
|
int src_hp = AV_CEIL_RSHIFT(src_h, vsub);
|
||||||
int dst_wp = AV_CEIL_RSHIFT(dst_w, hsub);
|
int dst_wp = AV_CEIL_RSHIFT(dst_w, hsub);
|
||||||
@ -538,8 +486,18 @@ static av_always_inline void blend_plane(AVFilterContext *ctx,
|
|||||||
s = sp + k;
|
s = sp + k;
|
||||||
a = ap + (k<<hsub);
|
a = ap + (k<<hsub);
|
||||||
da = dap + ((xp+k) << hsub);
|
da = dap + ((xp+k) << hsub);
|
||||||
|
kmax = FFMIN(-xp + dst_wp, src_wp);
|
||||||
|
|
||||||
for (kmax = FFMIN(-xp + dst_wp, src_wp); k < kmax; k++) {
|
if (((vsub && j+1 < src_hp) || !vsub) && octx->blend_row[i]) {
|
||||||
|
int c = octx->blend_row[i](d, da, s, a, kmax - k, src->linesize[3]);
|
||||||
|
|
||||||
|
s += c;
|
||||||
|
d += dst_step * c;
|
||||||
|
da += (1 << hsub) * c;
|
||||||
|
a += (1 << hsub) * c;
|
||||||
|
k += c;
|
||||||
|
}
|
||||||
|
for (; k < kmax; k++) {
|
||||||
int alpha_v, alpha_h, alpha;
|
int alpha_v, alpha_h, alpha;
|
||||||
|
|
||||||
// average alpha for color components, improve quality
|
// average alpha for color components, improve quality
|
||||||
@ -916,7 +874,7 @@ static int config_input_main(AVFilterLink *inlink)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!s->alpha_format)
|
if (!s->alpha_format)
|
||||||
return 0;
|
goto end;
|
||||||
|
|
||||||
switch (s->format) {
|
switch (s->format) {
|
||||||
case OVERLAY_FORMAT_YUV420:
|
case OVERLAY_FORMAT_YUV420:
|
||||||
@ -960,6 +918,11 @@ static int config_input_main(AVFilterLink *inlink)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
end:
|
||||||
|
if (ARCH_X86)
|
||||||
|
ff_overlay_init_x86(s, s->format, s->alpha_format, s->main_has_alpha);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
85
libavfilter/vf_overlay.h
Normal file
85
libavfilter/vf_overlay.h
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVFILTER_OVERLAY_H
|
||||||
|
#define AVFILTER_OVERLAY_H
|
||||||
|
|
||||||
|
#include "libavutil/eval.h"
|
||||||
|
#include "libavutil/pixdesc.h"
|
||||||
|
#include "framesync.h"
|
||||||
|
#include "avfilter.h"
|
||||||
|
|
||||||
|
enum var_name {
|
||||||
|
VAR_MAIN_W, VAR_MW,
|
||||||
|
VAR_MAIN_H, VAR_MH,
|
||||||
|
VAR_OVERLAY_W, VAR_OW,
|
||||||
|
VAR_OVERLAY_H, VAR_OH,
|
||||||
|
VAR_HSUB,
|
||||||
|
VAR_VSUB,
|
||||||
|
VAR_X,
|
||||||
|
VAR_Y,
|
||||||
|
VAR_N,
|
||||||
|
VAR_POS,
|
||||||
|
VAR_T,
|
||||||
|
VAR_VARS_NB
|
||||||
|
};
|
||||||
|
|
||||||
|
enum OverlayFormat {
|
||||||
|
OVERLAY_FORMAT_YUV420,
|
||||||
|
OVERLAY_FORMAT_YUV422,
|
||||||
|
OVERLAY_FORMAT_YUV444,
|
||||||
|
OVERLAY_FORMAT_RGB,
|
||||||
|
OVERLAY_FORMAT_GBRP,
|
||||||
|
OVERLAY_FORMAT_AUTO,
|
||||||
|
OVERLAY_FORMAT_NB
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct OverlayContext {
|
||||||
|
const AVClass *class;
|
||||||
|
int x, y; ///< position of overlaid picture
|
||||||
|
|
||||||
|
uint8_t main_is_packed_rgb;
|
||||||
|
uint8_t main_rgba_map[4];
|
||||||
|
uint8_t main_has_alpha;
|
||||||
|
uint8_t overlay_is_packed_rgb;
|
||||||
|
uint8_t overlay_rgba_map[4];
|
||||||
|
uint8_t overlay_has_alpha;
|
||||||
|
int format; ///< OverlayFormat
|
||||||
|
int alpha_format;
|
||||||
|
int eval_mode; ///< EvalMode
|
||||||
|
|
||||||
|
FFFrameSync fs;
|
||||||
|
|
||||||
|
int main_pix_step[4]; ///< steps per pixel for each plane of the main output
|
||||||
|
int overlay_pix_step[4]; ///< steps per pixel for each plane of the overlay
|
||||||
|
int hsub, vsub; ///< chroma subsampling values
|
||||||
|
const AVPixFmtDescriptor *main_desc; ///< format descriptor for main input
|
||||||
|
|
||||||
|
double var_values[VAR_VARS_NB];
|
||||||
|
char *x_expr, *y_expr;
|
||||||
|
|
||||||
|
AVExpr *x_pexpr, *y_pexpr;
|
||||||
|
|
||||||
|
int (*blend_row[4])(uint8_t *d, uint8_t *da, uint8_t *s, uint8_t *a, int w,
|
||||||
|
ptrdiff_t alinesize);
|
||||||
|
int (*blend_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
|
||||||
|
} OverlayContext;
|
||||||
|
|
||||||
|
void ff_overlay_init_x86(OverlayContext *s, int format, int alpha_format, int main_has_alpha);
|
||||||
|
|
||||||
|
#endif /* AVFILTER_OVERLAY_H */
|
@ -13,6 +13,7 @@ OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_tinterlace_init.o
|
|||||||
OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter_init.o
|
OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter_init.o
|
||||||
OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge_init.o
|
OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge_init.o
|
||||||
OBJS-$(CONFIG_NOISE_FILTER) += x86/vf_noise.o
|
OBJS-$(CONFIG_NOISE_FILTER) += x86/vf_noise.o
|
||||||
|
OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay_init.o
|
||||||
OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7_init.o
|
OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7_init.o
|
||||||
OBJS-$(CONFIG_PSNR_FILTER) += x86/vf_psnr_init.o
|
OBJS-$(CONFIG_PSNR_FILTER) += x86/vf_psnr_init.o
|
||||||
OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o
|
OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o
|
||||||
@ -41,6 +42,7 @@ X86ASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o
|
|||||||
X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o
|
X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o
|
||||||
X86ASM-OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter.o
|
X86ASM-OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter.o
|
||||||
X86ASM-OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge.o
|
X86ASM-OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge.o
|
||||||
|
X86ASM-OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay.o
|
||||||
X86ASM-OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7.o
|
X86ASM-OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7.o
|
||||||
X86ASM-OBJS-$(CONFIG_PSNR_FILTER) += x86/vf_psnr.o
|
X86ASM-OBJS-$(CONFIG_PSNR_FILTER) += x86/vf_psnr.o
|
||||||
X86ASM-OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup.o
|
X86ASM-OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup.o
|
||||||
|
144
libavfilter/x86/vf_overlay.asm
Normal file
144
libavfilter/x86/vf_overlay.asm
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
;*****************************************************************************
|
||||||
|
;* x86-optimized functions for overlay filter
|
||||||
|
;*
|
||||||
|
;* Copyright (C) 2018 Paul B Mahol
|
||||||
|
;* Copyright (C) 2018 Henrik Gramner
|
||||||
|
;*
|
||||||
|
;* This file is part of FFmpeg.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
;* modify it under the terms of the GNU Lesser General Public
|
||||||
|
;* License as published by the Free Software Foundation; either
|
||||||
|
;* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;* Lesser General Public License for more details.
|
||||||
|
;*
|
||||||
|
;* You should have received a copy of the GNU Lesser General Public
|
||||||
|
;* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;*****************************************************************************
|
||||||
|
|
||||||
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
|
SECTION_RODATA
|
||||||
|
|
||||||
|
pb_1: times 16 db 1
|
||||||
|
pw_128: times 8 dw 128
|
||||||
|
pw_255: times 8 dw 255
|
||||||
|
pw_257: times 8 dw 257
|
||||||
|
|
||||||
|
SECTION .text
|
||||||
|
|
||||||
|
INIT_XMM sse4
|
||||||
|
cglobal overlay_row_44, 5, 7, 6, 0, d, da, s, a, w, r, x
|
||||||
|
xor xq, xq
|
||||||
|
movsxdifnidn wq, wd
|
||||||
|
mov rq, wq
|
||||||
|
and rq, mmsize/2 - 1
|
||||||
|
cmp wq, mmsize/2
|
||||||
|
jl .end
|
||||||
|
sub wq, rq
|
||||||
|
mova m3, [pw_255]
|
||||||
|
mova m4, [pw_128]
|
||||||
|
mova m5, [pw_257]
|
||||||
|
.loop:
|
||||||
|
pmovzxbw m0, [sq+xq]
|
||||||
|
pmovzxbw m2, [aq+xq]
|
||||||
|
pmovzxbw m1, [dq+xq]
|
||||||
|
pmullw m0, m2
|
||||||
|
pxor m2, m3
|
||||||
|
pmullw m1, m2
|
||||||
|
paddw m0, m4
|
||||||
|
paddw m0, m1
|
||||||
|
pmulhuw m0, m5
|
||||||
|
packuswb m0, m0
|
||||||
|
movq [dq+xq], m0
|
||||||
|
add xq, mmsize/2
|
||||||
|
cmp xq, wq
|
||||||
|
jl .loop
|
||||||
|
|
||||||
|
.end:
|
||||||
|
mov eax, xd
|
||||||
|
RET
|
||||||
|
|
||||||
|
INIT_XMM sse4
|
||||||
|
cglobal overlay_row_22, 5, 7, 6, 0, d, da, s, a, w, r, x
|
||||||
|
xor xq, xq
|
||||||
|
movsxdifnidn wq, wd
|
||||||
|
sub wq, 1
|
||||||
|
mov rq, wq
|
||||||
|
and rq, mmsize/2 - 1
|
||||||
|
cmp wq, mmsize/2
|
||||||
|
jl .end
|
||||||
|
sub wq, rq
|
||||||
|
mova m3, [pw_255]
|
||||||
|
mova m4, [pw_128]
|
||||||
|
mova m5, [pw_257]
|
||||||
|
.loop:
|
||||||
|
pmovzxbw m0, [sq+xq]
|
||||||
|
movu m1, [aq+2*xq]
|
||||||
|
pandn m2, m3, m1
|
||||||
|
psllw m1, 8
|
||||||
|
pavgw m2, m1
|
||||||
|
pavgw m2, m1
|
||||||
|
psrlw m2, 8
|
||||||
|
pmovzxbw m1, [dq+xq]
|
||||||
|
pmullw m0, m2
|
||||||
|
pxor m2, m3
|
||||||
|
pmullw m1, m2
|
||||||
|
paddw m0, m4
|
||||||
|
paddw m0, m1
|
||||||
|
pmulhuw m0, m5
|
||||||
|
packuswb m0, m0
|
||||||
|
movq [dq+xq], m0
|
||||||
|
add xq, mmsize/2
|
||||||
|
cmp xq, wq
|
||||||
|
jl .loop
|
||||||
|
|
||||||
|
.end:
|
||||||
|
mov eax, xd
|
||||||
|
RET
|
||||||
|
|
||||||
|
INIT_XMM sse4
|
||||||
|
cglobal overlay_row_20, 6, 7, 7, 0, d, da, s, a, w, r, x
|
||||||
|
mov daq, aq
|
||||||
|
add daq, rmp
|
||||||
|
xor xq, xq
|
||||||
|
movsxdifnidn wq, wd
|
||||||
|
sub wq, 1
|
||||||
|
mov rq, wq
|
||||||
|
and rq, mmsize/2 - 1
|
||||||
|
cmp wq, mmsize/2
|
||||||
|
jl .end
|
||||||
|
sub wq, rq
|
||||||
|
mova m3, [pw_255]
|
||||||
|
mova m4, [pw_128]
|
||||||
|
mova m5, [pw_257]
|
||||||
|
mova m6, [pb_1]
|
||||||
|
.loop:
|
||||||
|
pmovzxbw m0, [sq+xq]
|
||||||
|
movu m2, [aq+2*xq]
|
||||||
|
movu m1, [daq+2*xq]
|
||||||
|
pmaddubsw m2, m6
|
||||||
|
pmaddubsw m1, m6
|
||||||
|
paddw m2, m1
|
||||||
|
psrlw m2, 2
|
||||||
|
pmovzxbw m1, [dq+xq]
|
||||||
|
pmullw m0, m2
|
||||||
|
pxor m2, m3
|
||||||
|
pmullw m1, m2
|
||||||
|
paddw m0, m4
|
||||||
|
paddw m0, m1
|
||||||
|
pmulhuw m0, m5
|
||||||
|
packuswb m0, m0
|
||||||
|
movq [dq+xq], m0
|
||||||
|
add xq, mmsize/2
|
||||||
|
cmp xq, wq
|
||||||
|
jl .loop
|
||||||
|
|
||||||
|
.end:
|
||||||
|
mov eax, xd
|
||||||
|
RET
|
63
libavfilter/x86/vf_overlay_init.c
Normal file
63
libavfilter/x86/vf_overlay_init.c
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018 Paul B Mahol
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/x86/cpu.h"
|
||||||
|
#include "libavfilter/vf_overlay.h"
|
||||||
|
|
||||||
|
int ff_overlay_row_44_sse4(uint8_t *d, uint8_t *da, uint8_t *s, uint8_t *a,
|
||||||
|
int w, ptrdiff_t alinesize);
|
||||||
|
|
||||||
|
int ff_overlay_row_20_sse4(uint8_t *d, uint8_t *da, uint8_t *s, uint8_t *a,
|
||||||
|
int w, ptrdiff_t alinesize);
|
||||||
|
|
||||||
|
int ff_overlay_row_22_sse4(uint8_t *d, uint8_t *da, uint8_t *s, uint8_t *a,
|
||||||
|
int w, ptrdiff_t alinesize);
|
||||||
|
|
||||||
|
av_cold void ff_overlay_init_x86(OverlayContext *s, int format, int alpha_format, int main_has_alpha)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (EXTERNAL_SSE4(cpu_flags) &&
|
||||||
|
(format == OVERLAY_FORMAT_YUV444 ||
|
||||||
|
format == OVERLAY_FORMAT_GBRP) &&
|
||||||
|
alpha_format == 0 && main_has_alpha == 0) {
|
||||||
|
s->blend_row[0] = ff_overlay_row_44_sse4;
|
||||||
|
s->blend_row[1] = ff_overlay_row_44_sse4;
|
||||||
|
s->blend_row[2] = ff_overlay_row_44_sse4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (EXTERNAL_SSE4(cpu_flags) &&
|
||||||
|
(format == OVERLAY_FORMAT_YUV420) &&
|
||||||
|
alpha_format == 0 && main_has_alpha == 0) {
|
||||||
|
s->blend_row[0] = ff_overlay_row_44_sse4;
|
||||||
|
s->blend_row[1] = ff_overlay_row_20_sse4;
|
||||||
|
s->blend_row[2] = ff_overlay_row_20_sse4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (EXTERNAL_SSE4(cpu_flags) &&
|
||||||
|
(format == OVERLAY_FORMAT_YUV422) &&
|
||||||
|
alpha_format == 0 && main_has_alpha == 0) {
|
||||||
|
s->blend_row[0] = ff_overlay_row_44_sse4;
|
||||||
|
s->blend_row[1] = ff_overlay_row_22_sse4;
|
||||||
|
s->blend_row[2] = ff_overlay_row_22_sse4;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user