diff --git a/Changelog b/Changelog
index 92a72490a9..51c3f85a28 100644
--- a/Changelog
+++ b/Changelog
@@ -19,6 +19,7 @@ version <next>:
 - Cinepak encoder
 - Intel QSV-accelerated MJPEG encoding
 - NVIDIA CUVID-accelerated H.264 and HEVC decoding
+- Intel QSV-accelerated overlay filter
 
 
 version 12:
diff --git a/configure b/configure
index ff3abdd403..a3cfe37680 100755
--- a/configure
+++ b/configure
@@ -1790,6 +1790,7 @@ CONFIG_EXTRA="
     qsv
     qsvdec
     qsvenc
+    qsvvpp
     rangecoder
     riffdec
     riffenc
@@ -2276,6 +2277,7 @@ omx_rpi_select="omx"
 qsv_deps="libmfx"
 qsvdec_select="qsv"
 qsvenc_select="qsv"
+qsvvpp_select="qsv"
 vaapi_encode_deps="vaapi"
 
 hwupload_cuda_filter_deps="cuda"
@@ -2540,6 +2542,8 @@ hqdn3d_filter_deps="gpl"
 interlace_filter_deps="gpl"
 movie_filter_deps="avcodec avformat"
 ocv_filter_deps="libopencv"
+overlay_qsv_filter_deps="libmfx"
+overlay_qsv_filter_select="qsvvpp"
 resample_filter_deps="avresample"
 scale_filter_deps="swscale"
 scale_qsv_filter_deps="libmfx"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 2b7e283ab0..8277626770 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -20,6 +20,9 @@ OBJS = allfilters.o                                                     \
 
 OBJS-$(HAVE_THREADS)                         += pthread.o
 
+# subsystems
+OBJS-$(CONFIG_QSVVPP)                        += qsvvpp.o
+
 # audio filters
 OBJS-$(CONFIG_AFORMAT_FILTER)                += af_aformat.o
 OBJS-$(CONFIG_AMIX_FILTER)                   += af_amix.o
@@ -75,6 +78,7 @@ OBJS-$(CONFIG_NOFORMAT_FILTER)               += vf_format.o
 OBJS-$(CONFIG_NULL_FILTER)                   += vf_null.o
 OBJS-$(CONFIG_OCV_FILTER)                    += vf_libopencv.o
 OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o
+OBJS-$(CONFIG_OVERLAY_QSV_FILTER)            += vf_overlay_qsv.o
 OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
 OBJS-$(CONFIG_PIXDESCTEST_FILTER)            += vf_pixdesctest.o
 OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o
@@ -105,5 +109,7 @@ OBJS-$(CONFIG_NULLSRC_FILTER)                += vsrc_nullsrc.o
 OBJS-$(CONFIG_RGBTESTSRC_FILTER)             += vsrc_testsrc.o
 OBJS-$(CONFIG_TESTSRC_FILTER)                += vsrc_testsrc.o
 
+SKIPHEADERS-$(CONFIG_QSVVPP)                 += qsvvpp.h
+
 TOOLS     = graph2dot
 TESTPROGS = filtfmts
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index ef94516ebc..2b3a67244e 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -98,6 +98,7 @@ void avfilter_register_all(void)
     REGISTER_FILTER(NULL,           null,           vf);
     REGISTER_FILTER(OCV,            ocv,            vf);
     REGISTER_FILTER(OVERLAY,        overlay,        vf);
+    REGISTER_FILTER(OVERLAY_QSV,    overlay_qsv,    vf);
     REGISTER_FILTER(PAD,            pad,            vf);
     REGISTER_FILTER(PIXDESCTEST,    pixdesctest,    vf);
     REGISTER_FILTER(SCALE,          scale,          vf);
diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c
new file mode 100644
index 0000000000..0b639c2d67
--- /dev/null
+++ b/libavfilter/qsvvpp.c
@@ -0,0 +1,727 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Intel Quick Sync Video VPP base function
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_qsv.h"
+#include "libavutil/time.h"
+#include "libavutil/pixdesc.h"
+
+#include "internal.h"
+#include "qsvvpp.h"
+#include "video.h"
+
+#define IS_VIDEO_MEMORY(mode)  (mode & (MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET | \
+                                        MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET))
+#define IS_OPAQUE_MEMORY(mode) (mode & MFX_MEMTYPE_OPAQUE_FRAME)
+#define IS_SYSTEM_MEMORY(mode) (mode & MFX_MEMTYPE_SYSTEM_MEMORY)
+
+typedef struct QSVFrame {
+    AVFrame          *frame;
+    mfxFrameSurface1 *surface;
+    mfxFrameSurface1  surface_internal;  /* for system memory */
+    struct QSVFrame  *next;
+} QSVFrame;
+
+/* abstract struct for all QSV filters */
+struct QSVVPPContext {
+    mfxSession          session;
+    int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/* callback */
+    enum AVPixelFormat  out_sw_format;   /* Real output format */
+    mfxVideoParam       vpp_param;
+    mfxFrameInfo       *frame_infos;     /* frame info for each input */
+
+    /* members related to the input/output surface */
+    int                 in_mem_mode;
+    int                 out_mem_mode;
+    QSVFrame           *in_frame_list;
+    QSVFrame           *out_frame_list;
+    int                 nb_surface_ptrs_in;
+    int                 nb_surface_ptrs_out;
+    mfxFrameSurface1  **surface_ptrs_in;
+    mfxFrameSurface1  **surface_ptrs_out;
+
+    /* MFXVPP extern parameters */
+    mfxExtOpaqueSurfaceAlloc opaque_alloc;
+    mfxExtBuffer      **ext_buffers;
+    int                 nb_ext_buffers;
+};
+
+static const mfxHandleType handle_types[] = {
+    MFX_HANDLE_VA_DISPLAY,
+    MFX_HANDLE_D3D9_DEVICE_MANAGER,
+    MFX_HANDLE_D3D11_DEVICE,
+};
+
+static const AVRational default_tb = { 1, 90000 };
+
+/* functions for frameAlloc */
+static mfxStatus frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *req,
+                             mfxFrameAllocResponse *resp)
+{
+    QSVVPPContext *s = pthis;
+    int i;
+
+    if (!(req->Type & MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET) ||
+        !(req->Type & (MFX_MEMTYPE_FROM_VPPIN | MFX_MEMTYPE_FROM_VPPOUT)) ||
+        !(req->Type & MFX_MEMTYPE_EXTERNAL_FRAME))
+        return MFX_ERR_UNSUPPORTED;
+
+    if (req->Type & MFX_MEMTYPE_FROM_VPPIN) {
+        resp->mids = av_mallocz(s->nb_surface_ptrs_in * sizeof(*resp->mids));
+        if (!resp->mids)
+            return AVERROR(ENOMEM);
+
+        for (i = 0; i < s->nb_surface_ptrs_in; i++)
+            resp->mids[i] = s->surface_ptrs_in[i]->Data.MemId;
+
+        resp->NumFrameActual = s->nb_surface_ptrs_in;
+    } else {
+        resp->mids = av_mallocz(s->nb_surface_ptrs_out * sizeof(*resp->mids));
+        if (!resp->mids)
+            return AVERROR(ENOMEM);
+
+        for (i = 0; i < s->nb_surface_ptrs_out; i++)
+            resp->mids[i] = s->surface_ptrs_out[i]->Data.MemId;
+
+        resp->NumFrameActual = s->nb_surface_ptrs_out;
+    }
+
+    return MFX_ERR_NONE;
+}
+
+static mfxStatus frame_free(mfxHDL pthis, mfxFrameAllocResponse *resp)
+{
+    av_freep(&resp->mids);
+    return MFX_ERR_NONE;
+}
+
+static mfxStatus frame_lock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
+{
+    return MFX_ERR_UNSUPPORTED;
+}
+
+static mfxStatus frame_unlock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
+{
+    return MFX_ERR_UNSUPPORTED;
+}
+
+static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl)
+{
+    *hdl = mid;
+    return MFX_ERR_NONE;
+}
+
+static int pix_fmt_to_mfx_fourcc(int format)
+{
+    switch (format) {
+    case AV_PIX_FMT_YUV420P:
+        return MFX_FOURCC_YV12;
+    case AV_PIX_FMT_NV12:
+        return MFX_FOURCC_NV12;
+    case AV_PIX_FMT_YUYV422:
+        return MFX_FOURCC_YUY2;
+    case AV_PIX_FMT_RGB32:
+        return MFX_FOURCC_RGB4;
+    }
+
+    return MFX_FOURCC_NV12;
+}
+
+static int map_frame_to_surface(AVFrame *frame, mfxFrameSurface1 *surface)
+{
+    switch (frame->format) {
+    case AV_PIX_FMT_NV12:
+        surface->Data.Y  = frame->data[0];
+        surface->Data.UV = frame->data[1];
+        break;
+    case AV_PIX_FMT_YUV420P:
+        surface->Data.Y = frame->data[0];
+        surface->Data.U = frame->data[1];
+        surface->Data.V = frame->data[2];
+        break;
+    case AV_PIX_FMT_YUYV422:
+        surface->Data.Y = frame->data[0];
+        surface->Data.U = frame->data[0] + 1;
+        surface->Data.V = frame->data[0] + 3;
+        break;
+    case AV_PIX_FMT_RGB32:
+        surface->Data.B = frame->data[0];
+        surface->Data.G = frame->data[0] + 1;
+        surface->Data.R = frame->data[0] + 2;
+        surface->Data.A = frame->data[0] + 3;
+        break;
+    default:
+        return MFX_ERR_UNSUPPORTED;
+    }
+    surface->Data.Pitch = frame->linesize[0];
+
+    return 0;
+}
+
+/* fill the surface info */
+static int fill_frameinfo_by_link(mfxFrameInfo *frameinfo, AVFilterLink *link)
+{
+    enum AVPixelFormat        pix_fmt;
+    AVHWFramesContext        *frames_ctx;
+    AVQSVFramesContext       *frames_hwctx;
+    const AVPixFmtDescriptor *desc;
+
+    if (link->format == AV_PIX_FMT_QSV) {
+        if (!link->hw_frames_ctx)
+            return AVERROR(EINVAL);
+
+        frames_ctx   = (AVHWFramesContext *)link->hw_frames_ctx->data;
+        frames_hwctx = frames_ctx->hwctx;
+        *frameinfo   = frames_hwctx->surfaces[0].Info;
+    } else {
+        pix_fmt = link->format;
+        desc = av_pix_fmt_desc_get(pix_fmt);
+        if (!desc)
+            return AVERROR_BUG;
+
+        frameinfo->CropX          = 0;
+        frameinfo->CropY          = 0;
+        frameinfo->Width          = FFALIGN(link->w, 32);
+        frameinfo->Height         = FFALIGN(link->h, 32);
+        frameinfo->PicStruct      = MFX_PICSTRUCT_PROGRESSIVE;
+        frameinfo->FourCC         = pix_fmt_to_mfx_fourcc(pix_fmt);
+        frameinfo->BitDepthLuma   = desc->comp[0].depth;
+        frameinfo->BitDepthChroma = desc->comp[0].depth;
+        frameinfo->Shift          = desc->comp[0].depth > 8;
+        if (desc->log2_chroma_w && desc->log2_chroma_h)
+            frameinfo->ChromaFormat = MFX_CHROMAFORMAT_YUV420;
+        else if (desc->log2_chroma_w)
+            frameinfo->ChromaFormat = MFX_CHROMAFORMAT_YUV422;
+        else
+            frameinfo->ChromaFormat = MFX_CHROMAFORMAT_YUV444;
+    }
+
+    frameinfo->CropW          = link->w;
+    frameinfo->CropH          = link->h;
+    frameinfo->FrameRateExtN  = link->frame_rate.num;
+    frameinfo->FrameRateExtD  = link->frame_rate.den;
+    frameinfo->AspectRatioW   = link->sample_aspect_ratio.num ? link->sample_aspect_ratio.num : 1;
+    frameinfo->AspectRatioH   = link->sample_aspect_ratio.den ? link->sample_aspect_ratio.den : 1;
+
+    return 0;
+}
+
+static void clear_unused_frames(QSVFrame *list)
+{
+    while (list) {
+        if (list->surface && !list->surface->Data.Locked) {
+            list->surface = NULL;
+            av_frame_free(&list->frame);
+        }
+        list = list->next;
+    }
+}
+
+static void clear_frame_list(QSVFrame **list)
+{
+    while (*list) {
+        QSVFrame *frame;
+
+        frame = *list;
+        *list = (*list)->next;
+        av_frame_free(&frame->frame);
+        av_freep(&frame);
+    }
+}
+
+static QSVFrame *get_free_frame(QSVFrame **list)
+{
+    QSVFrame *out = *list;
+
+    for (; out; out = out->next) {
+        if (!out->surface)
+            break;
+    }
+
+    if (!out) {
+        out = av_mallocz(sizeof(*out));
+        if (!out) {
+            av_log(NULL, AV_LOG_ERROR, "Can't alloc new output frame.\n");
+            return NULL;
+        }
+        out->next  = *list;
+        *list      = out;
+    }
+
+    return out;
+}
+
+/* get the input surface */
+static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picref)
+{
+    QSVFrame        *qsv_frame;
+    AVFilterContext *ctx = inlink->dst;
+
+    clear_unused_frames(s->in_frame_list);
+
+    qsv_frame = get_free_frame(&s->in_frame_list);
+    if (!qsv_frame)
+        return NULL;
+
+    /* Turn AVFrame into mfxFrameSurface1.
+     * For video/opaque memory mode, pix_fmt is AV_PIX_FMT_QSV, and
+     * mfxFrameSurface1 is stored in AVFrame->data[3];
+     * for system memory mode, raw video data is stored in
+     * AVFrame, we should map it into mfxFrameSurface1.
+     */
+    if (!IS_SYSTEM_MEMORY(s->in_mem_mode)) {
+        if (picref->format != AV_PIX_FMT_QSV) {
+            av_log(ctx, AV_LOG_ERROR, "QSVVPP gets a wrong frame.\n");
+            return NULL;
+        }
+        qsv_frame->frame   = picref;
+        qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame->data[3];
+    } else {
+        /* make a copy if the input is not padded as libmfx requires */
+        if (picref->height & 31 || picref->linesize[0] & 31) {
+            qsv_frame->frame = ff_get_video_buffer(inlink,
+                                                   FFALIGN(inlink->w, 32),
+                                                   FFALIGN(inlink->h, 32));
+            if (!qsv_frame->frame)
+                return NULL;
+
+            qsv_frame->frame->width   = picref->width;
+            qsv_frame->frame->height  = picref->height;
+
+            if (av_frame_copy(qsv_frame->frame, picref) < 0) {
+                av_frame_free(&qsv_frame->frame);
+                return NULL;
+            }
+
+            av_frame_copy_props(qsv_frame->frame, picref);
+            av_frame_free(&picref);
+        } else
+            qsv_frame->frame = picref;
+
+        if (map_frame_to_surface(qsv_frame->frame,
+                                &qsv_frame->surface_internal) < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n");
+            return NULL;
+        }
+        qsv_frame->surface = &qsv_frame->surface_internal;
+    }
+
+    qsv_frame->surface->Info           = s->frame_infos[FF_INLINK_IDX(inlink)];
+    qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame->frame->pts,
+                                                      inlink->time_base, default_tb);
+
+    qsv_frame->surface->Info.PicStruct =
+            !qsv_frame->frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE :
+            (qsv_frame->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF :
+                                                 MFX_PICSTRUCT_FIELD_BFF);
+    if (qsv_frame->frame->repeat_pict == 1)
+        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED;
+    else if (qsv_frame->frame->repeat_pict == 2)
+        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING;
+    else if (qsv_frame->frame->repeat_pict == 4)
+        qsv_frame->surface->Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING;
+
+    return qsv_frame;
+}
+
+/* get the output surface */
+static QSVFrame *query_frame(QSVVPPContext *s, AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    QSVFrame        *out_frame;
+    int              ret;
+
+    clear_unused_frames(s->out_frame_list);
+
+    out_frame = get_free_frame(&s->out_frame_list);
+    if (!out_frame)
+        return NULL;
+
+    /* For video memory, get a hw frame;
+     * For system memory, get a sw frame and map it into a mfx_surface. */
+    if (!IS_SYSTEM_MEMORY(s->out_mem_mode)) {
+        out_frame->frame = av_frame_alloc();
+        if (!out_frame->frame)
+            return NULL;
+
+        ret = av_hwframe_get_buffer(outlink->hw_frames_ctx, out_frame->frame, 0);
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Can't allocate a surface.\n");
+            return NULL;
+        }
+
+        out_frame->surface = (mfxFrameSurface1 *)out_frame->frame->data[3];
+    } else {
+        /* Get a frame with aligned dimensions.
+         * Libmfx need system memory being 128x64 aligned */
+        out_frame->frame = ff_get_video_buffer(outlink,
+                                               FFALIGN(outlink->w, 128),
+                                               FFALIGN(outlink->h, 64));
+        if (!out_frame->frame)
+            return NULL;
+
+        out_frame->frame->width  = outlink->w;
+        out_frame->frame->height = outlink->h;
+
+        ret = map_frame_to_surface(out_frame->frame,
+                                  &out_frame->surface_internal);
+        if (ret < 0)
+            return NULL;
+
+        out_frame->surface = &out_frame->surface_internal;
+    }
+
+    out_frame->surface->Info = s->vpp_param.vpp.Out;
+
+    return out_frame;
+}
+
+/* create the QSV session */
+static int init_vpp_session(AVFilterContext *avctx, QSVVPPContext *s)
+{
+    AVFilterLink                 *inlink = avctx->inputs[0];
+    AVFilterLink                *outlink = avctx->outputs[0];
+    AVQSVFramesContext  *in_frames_hwctx = NULL;
+    AVQSVFramesContext *out_frames_hwctx = NULL;
+
+    AVBufferRef *device_ref;
+    AVHWDeviceContext *device_ctx;
+    AVQSVDeviceContext *device_hwctx;
+    mfxHDL handle;
+    mfxHandleType handle_type;
+    mfxVersion ver;
+    mfxIMPL impl;
+    int ret, i;
+
+    if (inlink->hw_frames_ctx) {
+        AVHWFramesContext *frames_ctx = (AVHWFramesContext *)inlink->hw_frames_ctx->data;
+
+        device_ref      = frames_ctx->device_ref;
+        in_frames_hwctx = frames_ctx->hwctx;
+
+        s->in_mem_mode = in_frames_hwctx->frame_type;
+
+        s->surface_ptrs_in = av_mallocz_array(in_frames_hwctx->nb_surfaces,
+                                              sizeof(*s->surface_ptrs_in));
+        if (!s->surface_ptrs_in)
+            return AVERROR(ENOMEM);
+
+        for (i = 0; i < in_frames_hwctx->nb_surfaces; i++)
+            s->surface_ptrs_in[i] = in_frames_hwctx->surfaces + i;
+
+        s->nb_surface_ptrs_in = in_frames_hwctx->nb_surfaces;
+    } else if (avctx->hw_device_ctx) {
+        device_ref     = avctx->hw_device_ctx;
+        s->in_mem_mode = MFX_MEMTYPE_SYSTEM_MEMORY;
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "No hw context provided.\n");
+        return AVERROR(EINVAL);
+    }
+
+    device_ctx   = (AVHWDeviceContext *)device_ref->data;
+    device_hwctx = device_ctx->hwctx;
+
+    if (outlink->format == AV_PIX_FMT_QSV) {
+        AVHWFramesContext *out_frames_ctx;
+        AVBufferRef *out_frames_ref = av_hwframe_ctx_alloc(device_ref);
+        if (!out_frames_ref)
+            return AVERROR(ENOMEM);
+
+        s->out_mem_mode = IS_OPAQUE_MEMORY(s->in_mem_mode) ?
+                          MFX_MEMTYPE_OPAQUE_FRAME :
+                          MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET;
+
+        out_frames_ctx   = (AVHWFramesContext *)out_frames_ref->data;
+        out_frames_hwctx = out_frames_ctx->hwctx;
+
+        out_frames_ctx->format            = AV_PIX_FMT_QSV;
+        out_frames_ctx->width             = FFALIGN(outlink->w, 32);
+        out_frames_ctx->height            = FFALIGN(outlink->h, 32);
+        out_frames_ctx->sw_format         = s->out_sw_format;
+        out_frames_ctx->initial_pool_size = 64;
+        out_frames_hwctx->frame_type      = s->out_mem_mode;
+
+        ret = av_hwframe_ctx_init(out_frames_ref);
+        if (ret < 0) {
+            av_buffer_unref(&out_frames_ref);
+            av_log(avctx, AV_LOG_ERROR, "Error creating frames_ctx for output pad.\n");
+            return ret;
+        }
+
+        s->surface_ptrs_out = av_mallocz_array(out_frames_hwctx->nb_surfaces,
+                                               sizeof(*s->surface_ptrs_out));
+        if (!s->surface_ptrs_out) {
+            av_buffer_unref(&out_frames_ref);
+            return AVERROR(ENOMEM);
+        }
+
+        for (i = 0; i < out_frames_hwctx->nb_surfaces; i++)
+            s->surface_ptrs_out[i] = out_frames_hwctx->surfaces + i;
+        s->nb_surface_ptrs_out = out_frames_hwctx->nb_surfaces;
+
+        av_buffer_unref(&outlink->hw_frames_ctx);
+        outlink->hw_frames_ctx = out_frames_ref;
+    } else
+        s->out_mem_mode = MFX_MEMTYPE_SYSTEM_MEMORY;
+
+    /* extract the properties of the "master" session given to us */
+    ret = MFXQueryIMPL(device_hwctx->session, &impl);
+    if (ret == MFX_ERR_NONE)
+        ret = MFXQueryVersion(device_hwctx->session, &ver);
+    if (ret != MFX_ERR_NONE) {
+        av_log(avctx, AV_LOG_ERROR, "Error querying the session attributes\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    for (i = 0; i < FF_ARRAY_ELEMS(handle_types); i++) {
+        ret = MFXVideoCORE_GetHandle(device_hwctx->session, handle_types[i], &handle);
+        if (ret == MFX_ERR_NONE) {
+            handle_type = handle_types[i];
+            break;
+        }
+    }
+
+    /* create a "slave" session with those same properties, to be used for vpp */
+    ret = MFXInit(impl, &ver, &s->session);
+    if (ret != MFX_ERR_NONE) {
+        av_log(avctx, AV_LOG_ERROR, "Error initializing a session for scaling\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    if (handle) {
+        ret = MFXVideoCORE_SetHandle(s->session, handle_type, handle);
+        if (ret != MFX_ERR_NONE)
+            return AVERROR_UNKNOWN;
+    }
+
+    if (IS_OPAQUE_MEMORY(s->in_mem_mode) || IS_OPAQUE_MEMORY(s->out_mem_mode)) {
+        s->opaque_alloc.In.Surfaces   = s->surface_ptrs_in;
+        s->opaque_alloc.In.NumSurface = s->nb_surface_ptrs_in;
+        s->opaque_alloc.In.Type       = s->in_mem_mode;
+
+        s->opaque_alloc.Out.Surfaces   = s->surface_ptrs_out;
+        s->opaque_alloc.Out.NumSurface = s->nb_surface_ptrs_out;
+        s->opaque_alloc.Out.Type       = s->out_mem_mode;
+
+        s->opaque_alloc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION;
+        s->opaque_alloc.Header.BufferSz = sizeof(s->opaque_alloc);
+    } else if (IS_VIDEO_MEMORY(s->in_mem_mode) || IS_VIDEO_MEMORY(s->out_mem_mode)) {
+        mfxFrameAllocator frame_allocator = {
+            .pthis  = s,
+            .Alloc  = frame_alloc,
+            .Lock   = frame_lock,
+            .Unlock = frame_unlock,
+            .GetHDL = frame_get_hdl,
+            .Free   = frame_free,
+        };
+
+        ret = MFXVideoCORE_SetFrameAllocator(s->session, &frame_allocator);
+        if (ret != MFX_ERR_NONE)
+            return AVERROR_UNKNOWN;
+    }
+
+    return 0;
+}
+
+int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *param)
+{
+    int i;
+    int ret;
+    QSVVPPContext *s;
+
+    s = av_mallocz(sizeof(*s));
+    if (!s)
+        return AVERROR(ENOMEM);
+
+    s->filter_frame  = param->filter_frame;
+    if (!s->filter_frame)
+        s->filter_frame = ff_filter_frame;
+    s->out_sw_format = param->out_sw_format;
+
+    /* create the vpp session */
+    ret = init_vpp_session(avctx, s);
+    if (ret < 0)
+        goto failed;
+
+    s->frame_infos = av_mallocz_array(avctx->nb_inputs, sizeof(*s->frame_infos));
+    if (!s->frame_infos) {
+        ret = AVERROR(ENOMEM);
+        goto failed;
+    }
+
+    /* Init each input's information */
+    for (i = 0; i < avctx->nb_inputs; i++) {
+        ret = fill_frameinfo_by_link(&s->frame_infos[i], avctx->inputs[i]);
+        if (ret < 0)
+            goto failed;
+    }
+
+    /* Update input's frame info according to crop */
+    for (i = 0; i < param->num_crop; i++) {
+        QSVVPPCrop *crop = param->crop + i;
+        if (crop->in_idx > avctx->nb_inputs) {
+            ret = AVERROR(EINVAL);
+            goto failed;
+        }
+        s->frame_infos[crop->in_idx].CropX = crop->x;
+        s->frame_infos[crop->in_idx].CropY = crop->y;
+        s->frame_infos[crop->in_idx].CropW = crop->w;
+        s->frame_infos[crop->in_idx].CropH = crop->h;
+    }
+
+    s->vpp_param.vpp.In = s->frame_infos[0];
+
+    ret = fill_frameinfo_by_link(&s->vpp_param.vpp.Out, avctx->outputs[0]);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Fail to get frame info from link.\n");
+        goto failed;
+    }
+
+    if (IS_OPAQUE_MEMORY(s->in_mem_mode) || IS_OPAQUE_MEMORY(s->out_mem_mode)) {
+        s->nb_ext_buffers = param->num_ext_buf + 1;
+        s->ext_buffers = av_mallocz_array(s->nb_ext_buffers, sizeof(*s->ext_buffers));
+        if (!s->ext_buffers) {
+            ret = AVERROR(ENOMEM);
+            goto failed;
+        }
+
+        s->ext_buffers[0] = (mfxExtBuffer *)&s->opaque_alloc;
+        for (i = 1; i < param->num_ext_buf; i++)
+            s->ext_buffers[i]    = param->ext_buf[i - 1];
+        s->vpp_param.ExtParam    = s->ext_buffers;
+        s->vpp_param.NumExtParam = s->nb_ext_buffers;
+    } else {
+        s->vpp_param.NumExtParam = param->num_ext_buf;
+        s->vpp_param.ExtParam    = param->ext_buf;
+    }
+
+    s->vpp_param.AsyncDepth = 1;
+
+    if (IS_SYSTEM_MEMORY(s->in_mem_mode))
+        s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY;
+    else if (IS_VIDEO_MEMORY(s->in_mem_mode))
+        s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_VIDEO_MEMORY;
+    else if (IS_OPAQUE_MEMORY(s->in_mem_mode))
+        s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_OPAQUE_MEMORY;
+
+    if (IS_SYSTEM_MEMORY(s->out_mem_mode))
+        s->vpp_param.IOPattern |= MFX_IOPATTERN_OUT_SYSTEM_MEMORY;
+    else if (IS_VIDEO_MEMORY(s->out_mem_mode))
+        s->vpp_param.IOPattern |= MFX_IOPATTERN_OUT_VIDEO_MEMORY;
+    else if (IS_OPAQUE_MEMORY(s->out_mem_mode))
+        s->vpp_param.IOPattern |= MFX_IOPATTERN_OUT_OPAQUE_MEMORY;
+
+    ret = MFXVideoVPP_Init(s->session, &s->vpp_param);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create a qsvvpp, ret = %d.\n", ret);
+        goto failed;
+    }
+
+    *vpp = s;
+    return 0;
+
+failed:
+    ff_qsvvpp_free(&s);
+
+    return ret;
+}
+
+int ff_qsvvpp_free(QSVVPPContext **vpp)
+{
+    QSVVPPContext *s = *vpp;
+
+    if (!s)
+        return 0;
+
+    if (s->session) {
+        MFXVideoVPP_Close(s->session);
+        MFXClose(s->session);
+    }
+
+    /* release all the resources */
+    clear_frame_list(&s->in_frame_list);
+    clear_frame_list(&s->out_frame_list);
+    av_freep(&s->surface_ptrs_in);
+    av_freep(&s->surface_ptrs_out);
+    av_freep(&s->ext_buffers);
+    av_freep(&s->frame_infos);
+    av_freep(vpp);
+
+    return 0;
+}
+
+int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picref)
+{
+    AVFilterContext  *ctx     = inlink->dst;
+    AVFilterLink     *outlink = ctx->outputs[0];
+    mfxSyncPoint      sync;
+    QSVFrame         *in_frame, *out_frame;
+    int               ret, filter_ret;
+
+    in_frame = submit_frame(s, inlink, picref);
+    if (!in_frame) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on input[%d]\n",
+               FF_INLINK_IDX(inlink));
+        return AVERROR(ENOMEM);
+    }
+
+    do {
+        out_frame = query_frame(s, outlink);
+        if (!out_frame) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to query an output frame.\n");
+            return AVERROR(ENOMEM);
+        }
+
+        do {
+            ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame->surface,
+                                               out_frame->surface, NULL, &sync);
+            if (ret == MFX_WRN_DEVICE_BUSY)
+                av_usleep(500);
+        } while (ret == MFX_WRN_DEVICE_BUSY);
+
+        if (ret < 0 && ret != MFX_ERR_MORE_SURFACE) {
+            /* Ignore more_data error */
+            if (ret == MFX_ERR_MORE_DATA)
+                ret = AVERROR(EAGAIN);
+            break;
+        }
+
+        if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
+            av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
+
+        out_frame->frame->pts = av_rescale_q(out_frame->surface->Data.TimeStamp,
+                                             default_tb, outlink->time_base);
+
+        filter_ret = s->filter_frame(outlink, out_frame->frame);
+        if (filter_ret < 0) {
+            av_frame_free(&out_frame->frame);
+            ret = filter_ret;
+            break;
+        }
+        out_frame->frame = NULL;
+    } while(ret == MFX_ERR_MORE_SURFACE);
+
+    return ret;
+}
diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h
new file mode 100644
index 0000000000..082c0a8994
--- /dev/null
+++ b/libavfilter/qsvvpp.h
@@ -0,0 +1,66 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Intel Quick Sync Video VPP base function
+ */
+
+#ifndef AVFILTER_QSVVPP_H
+#define AVFILTER_QSVVPP_H
+
+#include <mfx/mfxvideo.h>
+
+#include "avfilter.h"
+
+#define FF_INLINK_IDX(link)  ((int)((link)->dstpad - (link)->dst->input_pads))
+#define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src->output_pads))
+
+typedef struct QSVVPPContext QSVVPPContext;
+
+typedef struct QSVVPPCrop {
+    int in_idx;        ///< Input index
+    int x, y, w, h;    ///< Crop rectangle
+} QSVVPPCrop;
+
+typedef struct QSVVPPParam {
+    /* default is ff_filter_frame */
+    int (*filter_frame)(AVFilterLink *outlink, AVFrame *frame);
+
+    /* To fill with MFX enhanced filter configurations */
+    int num_ext_buf;
+    mfxExtBuffer **ext_buf;
+
+    /* Real output format */
+    enum AVPixelFormat out_sw_format;
+
+    /* Crop information for each input, if needed */
+    int num_crop;
+    QSVVPPCrop *crop;
+} QSVVPPParam;
+
+/* create and initialize the QSV session */
+int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp, QSVVPPParam *param);
+
+/* release the resources (eg.surfaces) */
+int ff_qsvvpp_free(QSVVPPContext **vpp);
+
+/* vpp filter frame and call the cb if needed */
+int ff_qsvvpp_filter_frame(QSVVPPContext *vpp, AVFilterLink *inlink, AVFrame *frame);
+
+#endif /* AVFILTER_QSVVPP_H */
diff --git a/libavfilter/vf_overlay_qsv.c b/libavfilter/vf_overlay_qsv.c
new file mode 100644
index 0000000000..89f2fb134d
--- /dev/null
+++ b/libavfilter/vf_overlay_qsv.c
@@ -0,0 +1,487 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * A hardware accelerated overlay filter based on Intel Quick Sync Video VPP
+ */
+
+#include "libavutil/opt.h"
+#include "libavutil/common.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/eval.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/avstring.h"
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mathematics.h"
+
+#include "internal.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "video.h"
+
+#include "qsvvpp.h"
+
+#define MAIN    0
+#define OVERLAY 1
+
+#define OFFSET(x) offsetof(QSVOverlayContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM
+
+enum var_name {
+    VAR_MAIN_iW,     VAR_MW,
+    VAR_MAIN_iH,     VAR_MH,
+    VAR_OVERLAY_iW,
+    VAR_OVERLAY_iH,
+    VAR_OVERLAY_X,  VAR_OX,
+    VAR_OVERLAY_Y,  VAR_OY,
+    VAR_OVERLAY_W,  VAR_OW,
+    VAR_OVERLAY_H,  VAR_OH,
+    VAR_VARS_NB
+};
+
+enum EOFAction {
+    EOF_ACTION_REPEAT,
+    EOF_ACTION_ENDALL
+};
+
+typedef struct QSVOverlayContext {
+    const AVClass      *class;
+
+    QSVVPPContext      *qsv;
+    QSVVPPParam        qsv_param;
+    mfxExtVPPComposite comp_conf;
+    double             var_values[VAR_VARS_NB];
+
+    char     *overlay_ox, *overlay_oy, *overlay_ow, *overlay_oh;
+    uint16_t  overlay_alpha, overlay_pixel_alpha;
+
+    enum EOFAction eof_action;  /* action to take on EOF from source */
+
+    AVFrame *main;
+    AVFrame *over_prev, *over_next;
+} QSVOverlayContext;
+
+static const char *const var_names[] = {
+    "main_w",     "W",   /* input width of the main layer */
+    "main_h",     "H",   /* input height of the main layer */
+    "overlay_iw",        /* input width of the overlay layer */
+    "overlay_ih",        /* input height of the overlay layer */
+    "overlay_x",  "x",   /* x position of the overlay layer inside of main */
+    "overlay_y",  "y",   /* y position of the overlay layer inside of main */
+    "overlay_w",  "w",   /* output width of overlay layer */
+    "overlay_h",  "h",   /* output height of overlay layer */
+    NULL
+};
+
+static const AVOption options[] = {
+    { "x", "Overlay x position", OFFSET(overlay_ox), AV_OPT_TYPE_STRING, { .str="0"}, 0, 255, .flags = FLAGS},
+    { "y", "Overlay y position", OFFSET(overlay_oy), AV_OPT_TYPE_STRING, { .str="0"}, 0, 255, .flags = FLAGS},
+    { "w", "Overlay width",      OFFSET(overlay_ow), AV_OPT_TYPE_STRING, { .str="overlay_iw"}, 0, 255, .flags = FLAGS},
+    { "h", "Overlay height",     OFFSET(overlay_oh), AV_OPT_TYPE_STRING, { .str="overlay_ih*w/overlay_iw"}, 0, 255, .flags = FLAGS},
+    { "alpha", "Overlay global alpha", OFFSET(overlay_alpha), AV_OPT_TYPE_INT, { .i64 = 255}, 0, 255, .flags = FLAGS},
+    { "eof_action", "Action to take when encountering EOF from secondary input ",
+        OFFSET(eof_action), AV_OPT_TYPE_INT, { .i64 = EOF_ACTION_REPEAT },
+        EOF_ACTION_REPEAT, EOF_ACTION_ENDALL, .flags = FLAGS, "eof_action" },
+        { "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" },
+        { "endall", "End both streams.",          0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" },
+    { NULL }
+};
+
+static int eval_expr(AVFilterContext *ctx)
+{
+    QSVOverlayContext *vpp = ctx->priv;
+    double     *var_values = vpp->var_values;
+    int                ret = 0;
+    AVExpr *ox_expr = NULL, *oy_expr = NULL;
+    AVExpr *ow_expr = NULL, *oh_expr = NULL;
+
+#define PASS_EXPR(e, s) {\
+    ret = av_expr_parse(&e, s, var_names, NULL, NULL, NULL, NULL, 0, ctx); \
+    if (ret < 0) {\
+        av_log(ctx, AV_LOG_ERROR, "Error when passing '%s'.\n", s);\
+        goto release;\
+    }\
+}
+    PASS_EXPR(ox_expr, vpp->overlay_ox);
+    PASS_EXPR(oy_expr, vpp->overlay_oy);
+    PASS_EXPR(ow_expr, vpp->overlay_ow);
+    PASS_EXPR(oh_expr, vpp->overlay_oh);
+#undef PASS_EXPR
+
+    var_values[VAR_OVERLAY_W] =
+    var_values[VAR_OW]        = av_expr_eval(ow_expr, var_values, NULL);
+    var_values[VAR_OVERLAY_H] =
+    var_values[VAR_OH]        = av_expr_eval(oh_expr, var_values, NULL);
+
+    /* calc again in case ow is relative to oh */
+    var_values[VAR_OVERLAY_W] =
+    var_values[VAR_OW]        = av_expr_eval(ow_expr, var_values, NULL);
+
+    var_values[VAR_OVERLAY_X] =
+    var_values[VAR_OX]        = av_expr_eval(ox_expr, var_values, NULL);
+    var_values[VAR_OVERLAY_Y] =
+    var_values[VAR_OY]        = av_expr_eval(oy_expr, var_values, NULL);
+
+    /* calc again in case ox is relative to oy */
+    var_values[VAR_OVERLAY_X] =
+    var_values[VAR_OX]        = av_expr_eval(ox_expr, var_values, NULL);
+
+    /* calc overlay_w and overlay_h again incase relative to ox,oy */
+    var_values[VAR_OVERLAY_W] =
+    var_values[VAR_OW]        = av_expr_eval(ow_expr, var_values, NULL);
+    var_values[VAR_OVERLAY_H] =
+    var_values[VAR_OH]        = av_expr_eval(oh_expr, var_values, NULL);
+    var_values[VAR_OVERLAY_W] =
+    var_values[VAR_OW]        = av_expr_eval(ow_expr, var_values, NULL);
+
+release:
+    av_expr_free(ox_expr);
+    av_expr_free(oy_expr);
+    av_expr_free(ow_expr);
+    av_expr_free(oh_expr);
+
+    return ret;
+}
+
+static int have_alpha_planar(AVFilterLink *link)
+{
+    enum AVPixelFormat pix_fmt;
+    const AVPixFmtDescriptor *desc;
+    AVHWFramesContext *fctx;
+
+    if (link->format == AV_PIX_FMT_QSV) {
+        fctx    = (AVHWFramesContext *)link->hw_frames_ctx->data;
+        pix_fmt = fctx->sw_format;
+    }
+
+    desc = av_pix_fmt_desc_get(pix_fmt);
+    if (!desc)
+        return 0;
+
+    return !!(desc->flags & AV_PIX_FMT_FLAG_ALPHA);
+}
+
+static int config_main_input(AVFilterLink *inlink)
+{
+    AVFilterContext      *ctx = inlink->dst;
+    QSVOverlayContext    *vpp = ctx->priv;
+    mfxVPPCompInputStream *st = &vpp->comp_conf.InputStream[0];
+
+    av_log(ctx, AV_LOG_DEBUG, "Input[%d] is of %s.\n", FF_INLINK_IDX(inlink),
+           av_get_pix_fmt_name(inlink->format));
+
+    vpp->var_values[VAR_MAIN_iW] =
+    vpp->var_values[VAR_MW]      = inlink->w;
+    vpp->var_values[VAR_MAIN_iH] =
+    vpp->var_values[VAR_MH]      = inlink->h;
+
+    st->DstX              = 0;
+    st->DstY              = 0;
+    st->DstW              = inlink->w;
+    st->DstH              = inlink->h;
+    st->GlobalAlphaEnable = 0;
+    st->PixelAlphaEnable  = 0;
+
+    return 0;
+}
+
+static int config_overlay_input(AVFilterLink *inlink)
+{
+    AVFilterContext       *ctx = inlink->dst;
+    QSVOverlayContext     *vpp = ctx->priv;
+    mfxVPPCompInputStream *st  = &vpp->comp_conf.InputStream[1];
+    int                    ret = 0;
+
+    av_log(ctx, AV_LOG_DEBUG, "Input[%d] is of %s.\n", FF_INLINK_IDX(inlink),
+           av_get_pix_fmt_name(inlink->format));
+
+    vpp->var_values[VAR_OVERLAY_iW] = inlink->w;
+    vpp->var_values[VAR_OVERLAY_iH] = inlink->h;
+
+    ret = eval_expr(ctx);
+    if (ret < 0)
+        return ret;
+
+    st->DstX              = vpp->var_values[VAR_OX];
+    st->DstY              = vpp->var_values[VAR_OY];
+    st->DstW              = vpp->var_values[VAR_OW];
+    st->DstH              = vpp->var_values[VAR_OH];
+    st->GlobalAlpha       = vpp->overlay_alpha;
+    st->GlobalAlphaEnable = (st->GlobalAlpha < 255);
+    st->PixelAlphaEnable  = have_alpha_planar(inlink);
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext   *ctx = outlink->src;
+    QSVOverlayContext *vpp = ctx->priv;
+    AVFilterLink      *in0 = ctx->inputs[0];
+    AVFilterLink      *in1 = ctx->inputs[1];
+
+    av_log(ctx, AV_LOG_DEBUG, "Output is of %s.\n", av_get_pix_fmt_name(outlink->format));
+    if ((in0->format == AV_PIX_FMT_QSV && in1->format != AV_PIX_FMT_QSV) ||
+        (in0->format != AV_PIX_FMT_QSV && in1->format == AV_PIX_FMT_QSV)) {
+        av_log(ctx, AV_LOG_ERROR, "Mixing hardware and software pixel formats is not supported.\n");
+        return AVERROR(EINVAL);
+    } else if (in0->format == AV_PIX_FMT_QSV) {
+        AVHWFramesContext *hw_frame0 = (AVHWFramesContext *)in0->hw_frames_ctx->data;
+        AVHWFramesContext *hw_frame1 = (AVHWFramesContext *)in1->hw_frames_ctx->data;
+
+        if (hw_frame0->device_ctx != hw_frame1->device_ctx) {
+            av_log(ctx, AV_LOG_ERROR, "Inputs with different underlying QSV devices are forbidden.\n");
+            return AVERROR(EINVAL);
+        }
+    }
+
+    outlink->w          = vpp->var_values[VAR_MW];
+    outlink->h          = vpp->var_values[VAR_MH];
+    outlink->frame_rate = in0->frame_rate;
+    outlink->time_base  = av_inv_q(outlink->frame_rate);
+
+    return ff_qsvvpp_create(ctx, &vpp->qsv, &vpp->qsv_param);
+}
+
+static int blend_frame(AVFilterContext *ctx, AVFrame *mpic, AVFrame *opic)
+{
+    int                ret = 0;
+    QSVOverlayContext *vpp = ctx->priv;
+    AVFrame     *opic_copy = NULL;
+
+    ret = ff_qsvvpp_filter_frame(vpp->qsv, ctx->inputs[0], mpic);
+    if (ret == 0 || ret == AVERROR(EAGAIN)) {
+        /* Reference the overlay frame. Because:
+         * 1. ff_qsvvpp_filter_frame will take control of the given frame
+         * 2. We need to repeat the overlay frame when 2nd input goes into EOF
+         */
+        opic_copy = av_frame_clone(opic);
+        if (!opic_copy)
+            return AVERROR(ENOMEM);
+
+        ret = ff_qsvvpp_filter_frame(vpp->qsv, ctx->inputs[1], opic_copy);
+    }
+
+    return ret;
+}
+
+static int handle_overlay_eof(AVFilterContext *ctx)
+{
+    int              ret = 0;
+    QSVOverlayContext *s = ctx->priv;
+    /* Repeat previous frame on secondary input */
+    if (s->over_prev && s->eof_action == EOF_ACTION_REPEAT)
+        ret = blend_frame(ctx, s->main, s->over_prev);
+    /* End both streams */
+    else if (s->eof_action == EOF_ACTION_ENDALL)
+        return AVERROR_EOF;
+
+    s->main = NULL;
+
+    return ret;
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    QSVOverlayContext *s = ctx->priv;
+    AVRational   tb_main = ctx->inputs[MAIN]->time_base;
+    AVRational   tb_over = ctx->inputs[OVERLAY]->time_base;
+    int              ret = 0;
+
+    /* get a frame on the main input */
+    if (!s->main) {
+        ret = ff_request_frame(ctx->inputs[MAIN]);
+        if (ret < 0)
+            return ret;
+    }
+
+    /* get a new frame on the overlay input, on EOF check setting 'eof_action' */
+    if (!s->over_next) {
+        ret = ff_request_frame(ctx->inputs[OVERLAY]);
+        if (ret == AVERROR_EOF)
+            return handle_overlay_eof(ctx);
+        else if (ret < 0)
+            return ret;
+    }
+
+    while (s->main->pts != AV_NOPTS_VALUE &&
+           s->over_next->pts != AV_NOPTS_VALUE &&
+           av_compare_ts(s->over_next->pts, tb_over, s->main->pts, tb_main) < 0) {
+        av_frame_free(&s->over_prev);
+        FFSWAP(AVFrame*, s->over_prev, s->over_next);
+
+        ret = ff_request_frame(ctx->inputs[OVERLAY]);
+        if (ret == AVERROR_EOF)
+            return handle_overlay_eof(ctx);
+        else if (ret < 0)
+            return ret;
+    }
+
+    if (s->main->pts == AV_NOPTS_VALUE ||
+        s->over_next->pts == AV_NOPTS_VALUE ||
+        !av_compare_ts(s->over_next->pts, tb_over, s->main->pts, tb_main)) {
+        ret = blend_frame(ctx, s->main, s->over_next);
+        av_frame_free(&s->over_prev);
+        FFSWAP(AVFrame*, s->over_prev, s->over_next);
+    } else if (s->over_prev) {
+        ret = blend_frame(ctx, s->main, s->over_prev);
+    } else {
+        av_frame_free(&s->main);
+        ret = AVERROR(EAGAIN);
+    }
+
+    s->main = NULL;
+
+    return ret;
+}
+
+static int filter_frame_main(AVFilterLink *inlink, AVFrame *frame)
+{
+    QSVOverlayContext *s = inlink->dst->priv;
+
+    av_assert0(!s->main);
+    s->main = frame;
+
+    return 0;
+}
+
+static int filter_frame_overlay(AVFilterLink *inlink, AVFrame *frame)
+{
+    QSVOverlayContext *s = inlink->dst->priv;
+
+    av_assert0(!s->over_next);
+    s->over_next = frame;
+
+    return 0;
+}
+
+static int overlay_qsv_init(AVFilterContext *ctx)
+{
+    QSVOverlayContext *vpp = ctx->priv;
+
+    /* fill composite config */
+    vpp->comp_conf.Header.BufferId = MFX_EXTBUFF_VPP_COMPOSITE;
+    vpp->comp_conf.Header.BufferSz = sizeof(vpp->comp_conf);
+    vpp->comp_conf.NumInputStream  = ctx->nb_inputs;
+    vpp->comp_conf.InputStream     = av_mallocz_array(ctx->nb_inputs,
+                                                      sizeof(*vpp->comp_conf.InputStream));
+    if (!vpp->comp_conf.InputStream)
+        return AVERROR(ENOMEM);
+
+    /* initialize QSVVPP params */
+    vpp->qsv_param.filter_frame = NULL;
+    vpp->qsv_param.ext_buf      = av_mallocz(sizeof(*vpp->qsv_param.ext_buf));
+    if (!vpp->qsv_param.ext_buf)
+        return AVERROR(ENOMEM);
+
+    vpp->qsv_param.ext_buf[0]    = (mfxExtBuffer *)&vpp->comp_conf;
+    vpp->qsv_param.num_ext_buf   = 1;
+    vpp->qsv_param.out_sw_format = AV_PIX_FMT_NV12;
+    vpp->qsv_param.num_crop      = 0;
+
+    return 0;
+}
+
+static void overlay_qsv_uninit(AVFilterContext *ctx)
+{
+    QSVOverlayContext *vpp = ctx->priv;
+
+    av_frame_free(&vpp->main);
+    av_frame_free(&vpp->over_prev);
+    av_frame_free(&vpp->over_next);
+    ff_qsvvpp_free(&vpp->qsv);
+    av_freep(&vpp->comp_conf.InputStream);
+    av_freep(&vpp->qsv_param.ext_buf);
+}
+
+static int overlay_qsv_query_formats(AVFilterContext *ctx)
+{
+    int i;
+
+    static const enum AVPixelFormat main_in_fmts[] = {
+        AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_NV12,
+        AV_PIX_FMT_YUYV422,
+        AV_PIX_FMT_RGB32,
+        AV_PIX_FMT_QSV,
+        AV_PIX_FMT_NONE
+    };
+    static const enum AVPixelFormat out_pix_fmts[] = {
+        AV_PIX_FMT_NV12,
+        AV_PIX_FMT_QSV,
+        AV_PIX_FMT_NONE
+    };
+
+    for (i = 0; i < ctx->nb_inputs; i++)
+        ff_formats_ref(ff_make_format_list(main_in_fmts), &ctx->inputs[i]->out_formats);
+
+    ff_formats_ref(ff_make_format_list(out_pix_fmts), &ctx->outputs[0]->in_formats);
+
+    return 0;
+}
+
+static const AVClass overlay_qsv_class = {
+    .class_name = "overlay_qsv",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVFilterPad overlay_qsv_inputs[] = {
+    {
+        .name          = "main",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .filter_frame  = filter_frame_main,
+        .config_props  = config_main_input,
+        .needs_fifo    = 1,
+    },
+    {
+        .name          = "overlay",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .filter_frame  = filter_frame_overlay,
+        .config_props  = config_overlay_input,
+        .needs_fifo    = 1,
+    },
+    { NULL }
+};
+
+static const AVFilterPad overlay_qsv_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_overlay_qsv = {
+    .name           = "overlay_qsv",
+    .description    = NULL_IF_CONFIG_SMALL("Quick Sync Video overlay."),
+    .priv_size      = sizeof(QSVOverlayContext),
+    .query_formats  = overlay_qsv_query_formats,
+    .init           = overlay_qsv_init,
+    .uninit         = overlay_qsv_uninit,
+    .inputs         = overlay_qsv_inputs,
+    .outputs        = overlay_qsv_outputs,
+    .priv_class     = &overlay_qsv_class,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};