From 19d3d0c0570981ddc8a224f07d734ff75d76e234 Mon Sep 17 00:00:00 2001
From: Philip Langdale <philipl@overt.org>
Date: Sat, 10 Nov 2018 22:47:28 -0800
Subject: [PATCH] avutil/hwcontext_cuda: Define and use common CHECK_CU()

We have a pattern of wrapping CUDA calls to print errors and
normalise return values that is used in a couple of places. To
avoid duplication and increase consistency, let's put the wrapper
implementation in a shared place and use it everywhere.

Affects:

* avcodec/cuviddec
* avcodec/nvdec
* avcodec/nvenc
* avfilter/vf_scale_cuda
* avfilter/vf_scale_npp
* avfilter/vf_thumbnail_cuda
* avfilter/vf_transpose_npp
* avfilter/vf_yadif_cuda
---
 libavcodec/Makefile             |   6 +-
 libavcodec/cuda_check.c         |   1 +
 libavcodec/cuviddec.c           |  25 +-----
 libavcodec/nvdec.c              |  92 +++++++++-----------
 libavcodec/nvenc.c              |  68 +++++----------
 libavfilter/Makefile            |  13 +--
 libavfilter/cuda_check.c        |   1 +
 libavfilter/vf_scale_cuda.c     |  84 +++++++++---------
 libavfilter/vf_scale_npp.c      |  12 +--
 libavfilter/vf_thumbnail_cuda.c |  94 ++++++++++----------
 libavfilter/vf_transpose_npp.c  |  12 +--
 libavfilter/vf_yadif_cuda.c     |  97 ++++++---------------
 libavutil/Makefile              |   5 +-
 libavutil/cuda_check.c          |  45 ++++++++++
 libavutil/cuda_check.h          |  43 ++++++++++
 libavutil/hwcontext_cuda.c      | 148 +++++++++++++++-----------------
 16 files changed, 359 insertions(+), 387 deletions(-)
 create mode 100644 libavcodec/cuda_check.c
 create mode 100644 libavfilter/cuda_check.c
 create mode 100644 libavutil/cuda_check.c
 create mode 100644 libavutil/cuda_check.h

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 05be02ec7d..716f26d191 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -124,7 +124,7 @@ OBJS-$(CONFIG_MPEGVIDEOENC)            += mpegvideo_enc.o mpeg12data.o  \
                                           motion_est.o ratecontrol.o    \
                                           mpegvideoencdsp.o
 OBJS-$(CONFIG_MSS34DSP)                += mss34dsp.o
-OBJS-$(CONFIG_NVENC)                   += nvenc.o
+OBJS-$(CONFIG_NVENC)                   += nvenc.o cuda_check.o
 OBJS-$(CONFIG_PIXBLOCKDSP)             += pixblockdsp.o
 OBJS-$(CONFIG_QPELDSP)                 += qpeldsp.o
 OBJS-$(CONFIG_QSV)                     += qsv.o
@@ -346,7 +346,7 @@ OBJS-$(CONFIG_H264_DECODER)            += h264dec.o h264_cabac.o h264_cavlc.o \
                                           h264_refs.o h264_sei.o \
                                           h264_slice.o h264data.o
 OBJS-$(CONFIG_H264_AMF_ENCODER)        += amfenc_h264.o
-OBJS-$(CONFIG_H264_CUVID_DECODER)      += cuviddec.o
+OBJS-$(CONFIG_H264_CUVID_DECODER)      += cuviddec.o cuda_check.o
 OBJS-$(CONFIG_H264_MEDIACODEC_DECODER) += mediacodecdec.o
 OBJS-$(CONFIG_H264_MMAL_DECODER)       += mmaldec.o
 OBJS-$(CONFIG_H264_NVENC_ENCODER)      += nvenc_h264.o
@@ -852,7 +852,7 @@ OBJS-$(CONFIG_ADPCM_YAMAHA_ENCODER)       += adpcmenc.o adpcm_data.o
 # hardware accelerators
 OBJS-$(CONFIG_D3D11VA)                    += dxva2.o
 OBJS-$(CONFIG_DXVA2)                      += dxva2.o
-OBJS-$(CONFIG_NVDEC)                      += nvdec.o
+OBJS-$(CONFIG_NVDEC)                      += nvdec.o cuda_check.o
 OBJS-$(CONFIG_VAAPI)                      += vaapi_decode.o
 OBJS-$(CONFIG_VIDEOTOOLBOX)               += videotoolbox.o
 OBJS-$(CONFIG_VDPAU)                      += vdpau.o
diff --git a/libavcodec/cuda_check.c b/libavcodec/cuda_check.c
new file mode 100644
index 0000000000..a1ebb88882
--- /dev/null
+++ b/libavcodec/cuda_check.c
@@ -0,0 +1 @@
+#include "libavutil/cuda_check.c"
diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
index f21273c07e..03589367ce 100644
--- a/libavcodec/cuviddec.c
+++ b/libavcodec/cuviddec.c
@@ -25,6 +25,7 @@
 #include "libavutil/mathematics.h"
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/fifo.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
@@ -95,29 +96,7 @@ typedef struct CuvidParsedFrame
     int is_deinterlacing;
 } CuvidParsedFrame;
 
-static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
-{
-    CuvidContext *ctx = avctx->priv_data;
-    const char *err_name;
-    const char *err_string;
-
-    av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
-
-    if (err == CUDA_SUCCESS)
-        return 0;
-
-    ctx->cudl->cuGetErrorName(err, &err_name);
-    ctx->cudl->cuGetErrorString(err, &err_string);
-
-    av_log(avctx, AV_LOG_ERROR, "%s failed", func);
-    if (err_name && err_string)
-        av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
-    av_log(avctx, AV_LOG_ERROR, "\n");
-
-    return AVERROR_EXTERNAL;
-}
-
-#define CHECK_CU(x) check_cu(avctx, (x), #x)
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
 
 static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
 {
diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
index 0426c9b319..c7d5379770 100644
--- a/libavcodec/nvdec.c
+++ b/libavcodec/nvdec.c
@@ -26,6 +26,7 @@
 #include "libavutil/error.h"
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/pixfmt.h"
 
@@ -50,6 +51,8 @@ typedef struct NVDECFramePool {
     unsigned int nb_allocated;
 } NVDECFramePool;
 
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(logctx, decoder->cudl, x)
+
 static int map_avcodec_id(enum AVCodecID id)
 {
     switch (id) {
@@ -86,7 +89,7 @@ static int map_chroma_format(enum AVPixelFormat pix_fmt)
 static int nvdec_test_capabilities(NVDECDecoder *decoder,
                                    CUVIDDECODECREATEINFO *params, void *logctx)
 {
-    CUresult err;
+    int ret;
     CUVIDDECODECAPS caps = { 0 };
 
     caps.eCodecType      = params->CodecType;
@@ -105,11 +108,9 @@ static int nvdec_test_capabilities(NVDECDecoder *decoder,
         return 0;
     }
 
-    err = decoder->cvdl->cuvidGetDecoderCaps(&caps);
-    if (err != CUDA_SUCCESS) {
-        av_log(logctx, AV_LOG_ERROR, "Failed querying decoder capabilities\n");
-        return AVERROR_UNKNOWN;
-    }
+    ret = CHECK_CU(decoder->cvdl->cuvidGetDecoderCaps(&caps));
+    if (ret < 0)
+        return ret;
 
     av_log(logctx, AV_LOG_VERBOSE, "NVDEC capabilities:\n");
     av_log(logctx, AV_LOG_VERBOSE, "format supported: %s, max_mb_count: %d\n",
@@ -150,10 +151,11 @@ static void nvdec_decoder_free(void *opaque, uint8_t *data)
     NVDECDecoder *decoder = (NVDECDecoder*)data;
 
     if (decoder->decoder) {
+        void *logctx = decoder->hw_device_ref->data;
         CUcontext dummy;
-        decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
-        decoder->cvdl->cuvidDestroyDecoder(decoder->decoder);
-        decoder->cudl->cuCtxPopCurrent(&dummy);
+        CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
+        CHECK_CU(decoder->cvdl->cuvidDestroyDecoder(decoder->decoder));
+        CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
     }
 
     av_buffer_unref(&decoder->hw_device_ref);
@@ -173,7 +175,6 @@ static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref,
     NVDECDecoder *decoder;
 
     CUcontext dummy;
-    CUresult err;
     int ret;
 
     decoder = av_mallocz(sizeof(*decoder));
@@ -202,25 +203,21 @@ static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref,
         goto fail;
     }
 
-    err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
+    if (ret < 0)
         goto fail;
-    }
 
     ret = nvdec_test_capabilities(decoder, params, logctx);
     if (ret < 0) {
-        decoder->cudl->cuCtxPopCurrent(&dummy);
+        CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
         goto fail;
     }
 
-    err = decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params);
+    ret = CHECK_CU(decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params));
 
-    decoder->cudl->cuCtxPopCurrent(&dummy);
+    CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
 
-    if (err != CUDA_SUCCESS) {
-        av_log(logctx, AV_LOG_ERROR, "Error creating a NVDEC decoder: %d\n", err);
-        ret = AVERROR_UNKNOWN;
+    if (ret < 0) {
         goto fail;
     }
 
@@ -364,21 +361,18 @@ static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
 {
     NVDECFrame *unmap_data = (NVDECFrame*)data;
     NVDECDecoder *decoder = (NVDECDecoder*)unmap_data->decoder_ref->data;
+    void *logctx = decoder->hw_device_ref->data;
     CUdeviceptr devptr = (CUdeviceptr)opaque;
-    CUresult err;
+    int ret;
     CUcontext dummy;
 
-    err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        av_log(NULL, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+    ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
+    if (ret < 0)
         goto finish;
-    }
 
-    err = decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
-    if (err != CUDA_SUCCESS)
-        av_log(NULL, AV_LOG_ERROR, "cuvidUnmapVideoFrame failed\n");
+    CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr));
 
-    decoder->cudl->cuCtxPopCurrent(&dummy);
+    CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
 
 finish:
     av_buffer_unref(&unmap_data->idx_ref);
@@ -395,7 +389,6 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
     CUVIDPROCPARAMS vpp = { 0 };
     NVDECFrame *unmap_data = NULL;
 
-    CUresult err;
     CUcontext dummy;
     CUdeviceptr devptr;
 
@@ -406,18 +399,15 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
     vpp.progressive_frame = 1;
     vpp.output_stream = decoder->stream;
 
-    err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
-    if (err != CUDA_SUCCESS)
-        return AVERROR_UNKNOWN;
+    ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
+    if (ret < 0)
+        return ret;
 
-    err = decoder->cvdl->cuvidMapVideoFrame(decoder->decoder, cf->idx, &devptr,
-                                            &pitch, &vpp);
-    if (err != CUDA_SUCCESS) {
-        av_log(logctx, AV_LOG_ERROR, "Error mapping a picture with CUVID: %d\n",
-               err);
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(decoder->cvdl->cuvidMapVideoFrame(decoder->decoder,
+                                                     cf->idx, &devptr,
+                                                     &pitch, &vpp));
+    if (ret < 0)
         goto finish;
-    }
 
     unmap_data = av_mallocz(sizeof(*unmap_data));
     if (!unmap_data) {
@@ -447,14 +437,14 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
 
 copy_fail:
     if (!frame->buf[1]) {
-        decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
+        CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr));
         av_freep(&unmap_data);
     } else {
         av_buffer_unref(&frame->buf[1]);
     }
 
 finish:
-    decoder->cudl->cuCtxPopCurrent(&dummy);
+    CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
     return ret;
 }
 
@@ -504,9 +494,9 @@ int ff_nvdec_end_frame(AVCodecContext *avctx)
 {
     NVDECContext     *ctx = avctx->internal->hwaccel_priv_data;
     NVDECDecoder *decoder = (NVDECDecoder*)ctx->decoder_ref->data;
+    void *logctx          = avctx;
     CUVIDPICPARAMS    *pp = &ctx->pic_params;
 
-    CUresult err;
     CUcontext dummy;
 
     int ret = 0;
@@ -516,20 +506,16 @@ int ff_nvdec_end_frame(AVCodecContext *avctx)
     pp->nNumSlices        = ctx->nb_slices;
     pp->pSliceDataOffsets = ctx->slice_offsets;
 
-    err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
-    if (err != CUDA_SUCCESS)
-        return AVERROR_UNKNOWN;
+    ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
+    if (ret < 0)
+        return ret;
 
-    err = decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params);
-    if (err != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "Error decoding a picture with NVDEC: %d\n",
-               err);
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params));
+    if (ret < 0)
         goto finish;
-    }
 
 finish:
-    decoder->cudl->cuCtxPopCurrent(&dummy);
+    CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
 
     return ret;
 }
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index e180d7b993..97497be0bc 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -25,12 +25,15 @@
 
 #include "libavutil/hwcontext_cuda.h"
 #include "libavutil/hwcontext.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/avassert.h"
 #include "libavutil/mem.h"
 #include "libavutil/pixdesc.h"
 #include "internal.h"
 
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, dl_fn->cuda_dl, x)
+
 #define NVENC_CAP 0x30
 #define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR ||             \
                     rc == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || \
@@ -183,37 +186,23 @@ static int nvenc_push_context(AVCodecContext *avctx)
 {
     NvencContext *ctx            = avctx->priv_data;
     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
-    CUresult cu_res;
 
     if (ctx->d3d11_device)
         return 0;
 
-    cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
-        return AVERROR_EXTERNAL;
-    }
-
-    return 0;
+    return CHECK_CU(dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context));
 }
 
 static int nvenc_pop_context(AVCodecContext *avctx)
 {
     NvencContext *ctx            = avctx->priv_data;
     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
-    CUresult cu_res;
     CUcontext dummy;
 
     if (ctx->d3d11_device)
         return 0;
 
-    cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
-        return AVERROR_EXTERNAL;
-    }
-
-    return 0;
+    return CHECK_CU(dl_fn->cuda_dl->cuCtxPopCurrent(&dummy));
 }
 
 static av_cold int nvenc_open_session(AVCodecContext *avctx)
@@ -406,32 +395,23 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
     char name[128] = { 0};
     int major, minor, ret;
-    CUresult cu_res;
     CUdevice cu_device;
     int loglevel = AV_LOG_VERBOSE;
 
     if (ctx->device == LIST_DEVICES)
         loglevel = AV_LOG_INFO;
 
-    cu_res = dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Cannot access the CUDA device %d\n",
-               idx);
-        return -1;
-    }
+    ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx));
+    if (ret < 0)
+        return ret;
 
-    cu_res = dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuDeviceGetName failed on device %d\n", idx);
-        return -1;
-    }
+    ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device));
+    if (ret < 0)
+        return ret;
 
-    cu_res = dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuDeviceComputeCapability failed on device %d\n", idx);
-        return -1;
-    }
+    ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device));
+    if (ret < 0)
+        return ret;
 
     av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor);
     if (((major << 4) | minor) < NVENC_CAP) {
@@ -442,11 +422,9 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
     if (ctx->device != idx && ctx->device != ANY_DEVICE)
         return -1;
 
-    cu_res = dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
+    ret = CHECK_CU(dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device));
+    if (ret < 0)
         goto fail;
-    }
 
     ctx->cu_context = ctx->cu_context_internal;
 
@@ -477,7 +455,7 @@ fail3:
         return ret;
 
 fail2:
-    dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
+    CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
     ctx->cu_context_internal = NULL;
 
 fail:
@@ -555,17 +533,11 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx)
     } else {
         int i, nb_devices = 0;
 
-        if ((dl_fn->cuda_dl->cuInit(0)) != CUDA_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Cannot init CUDA\n");
+        if (CHECK_CU(dl_fn->cuda_dl->cuInit(0)) < 0)
             return AVERROR_UNKNOWN;
-        }
 
-        if ((dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) != CUDA_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Cannot enumerate the CUDA devices\n");
+        if (CHECK_CU(dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) < 0)
             return AVERROR_UNKNOWN;
-        }
 
         if (!nb_devices) {
             av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
@@ -1460,7 +1432,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
     ctx->nvencoder = NULL;
 
     if (ctx->cu_context_internal)
-        dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
+        CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
     ctx->cu_context = ctx->cu_context_internal = NULL;
 
 #if CONFIG_D3D11VA
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 7c6fc836e5..a7ebd0221b 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -334,8 +334,9 @@ OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER)         += vf_convolution_opencl.o opencl.o
 OBJS-$(CONFIG_ROTATE_FILTER)                 += vf_rotate.o
 OBJS-$(CONFIG_SAB_FILTER)                    += vf_sab.o
 OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o scale.o
-OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o vf_scale_cuda.ptx.o
-OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o scale.o
+OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o vf_scale_cuda.ptx.o \
+                                                cuda_check.o
+OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o scale.o cuda_check.o
 OBJS-$(CONFIG_SCALE_QSV_FILTER)              += vf_scale_qsv.o
 OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o scale.o vaapi_vpp.o
 OBJS-$(CONFIG_SCALE2REF_FILTER)              += vf_scale.o scale.o
@@ -376,7 +377,8 @@ OBJS-$(CONFIG_TBLEND_FILTER)                 += vf_blend.o framesync.o
 OBJS-$(CONFIG_TELECINE_FILTER)               += vf_telecine.o
 OBJS-$(CONFIG_THRESHOLD_FILTER)              += vf_threshold.o framesync.o
 OBJS-$(CONFIG_THUMBNAIL_FILTER)              += vf_thumbnail.o
-OBJS-$(CONFIG_THUMBNAIL_CUDA_FILTER)         += vf_thumbnail_cuda.o vf_thumbnail_cuda.ptx.o
+OBJS-$(CONFIG_THUMBNAIL_CUDA_FILTER)         += vf_thumbnail_cuda.o vf_thumbnail_cuda.ptx.o \
+                                                cuda_check.o
 OBJS-$(CONFIG_TILE_FILTER)                   += vf_tile.o
 OBJS-$(CONFIG_TINTERLACE_FILTER)             += vf_tinterlace.o
 OBJS-$(CONFIG_TLUT2_FILTER)                  += vf_lut2.o framesync.o
@@ -386,7 +388,7 @@ OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER)         += vf_tonemap_opencl.o colorspace.o
                                                 opencl/tonemap.o opencl/colorspace_common.o
 OBJS-$(CONFIG_TPAD_FILTER)                   += vf_tpad.o
 OBJS-$(CONFIG_TRANSPOSE_FILTER)              += vf_transpose.o
-OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER)          += vf_transpose_npp.o
+OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER)          += vf_transpose_npp.o cuda_check.o
 OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)          += vf_premultiply.o framesync.o
 OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
@@ -410,7 +412,8 @@ OBJS-$(CONFIG_WEAVE_FILTER)                  += vf_weave.o
 OBJS-$(CONFIG_XBR_FILTER)                    += vf_xbr.o
 OBJS-$(CONFIG_XSTACK_FILTER)                 += vf_stack.o framesync.o
 OBJS-$(CONFIG_YADIF_FILTER)                  += vf_yadif.o yadif_common.o
-OBJS-$(CONFIG_YADIF_CUDA_FILTER)             += vf_yadif_cuda.o vf_yadif_cuda.ptx.o yadif_common.o
+OBJS-$(CONFIG_YADIF_CUDA_FILTER)             += vf_yadif_cuda.o vf_yadif_cuda.ptx.o \
+                                                yadif_common.o cuda_check.o
 OBJS-$(CONFIG_ZMQ_FILTER)                    += f_zmq.o
 OBJS-$(CONFIG_ZOOMPAN_FILTER)                += vf_zoompan.o
 OBJS-$(CONFIG_ZSCALE_FILTER)                 += vf_zscale.o
diff --git a/libavfilter/cuda_check.c b/libavfilter/cuda_check.c
new file mode 100644
index 0000000000..a1ebb88882
--- /dev/null
+++ b/libavfilter/cuda_check.c
@@ -0,0 +1 @@
+#include "libavutil/cuda_check.c"
diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
index 7b2b78c1ed..53b7aa9531 100644
--- a/libavfilter/vf_scale_cuda.c
+++ b/libavfilter/vf_scale_cuda.c
@@ -28,6 +28,7 @@
 #include "libavutil/common.h"
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_cuda.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -52,6 +53,8 @@ static const enum AVPixelFormat supported_formats[] = {
 #define BLOCKX 32
 #define BLOCKY 16
 
+#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x)
+
 typedef struct CUDAScaleContext {
     const AVClass *class;
     enum AVPixelFormat in_fmt;
@@ -255,55 +258,48 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink)
     AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
     AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
-    CUresult err;
     int w, h;
     int ret;
 
     extern char vf_scale_cuda_ptx[];
 
-    err = cuCtxPushCurrent(cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error pushing cuda context\n");
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+    if (ret < 0)
         goto fail;
-    }
 
-    err = cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error loading module data\n");
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx));
+    if (ret < 0)
         goto fail;
-    }
 
-    cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar");
-    cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2");
-    cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4");
-    cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort");
-    cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2");
-    cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4");
+    CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar"));
+    CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2"));
+    CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4"));
+    CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort"));
+    CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2"));
+    CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4"));
 
-    cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex");
-    cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex");
-    cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex");
-    cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex");
-    cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex");
-    cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex");
+    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex"));
+    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex"));
+    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex"));
+    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex"));
+    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"));
+    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex"));
 
-    cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER);
+    CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER));
+    CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER));
+    CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER));
+    CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER));
+    CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER));
+    CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER));
 
-    cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR);
+    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR));
+    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR));
+    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR));
+    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR));
+    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR));
+    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR));
 
-    cuCtxPopCurrent(&dummy);
+    CHECK_CU(cuCtxPopCurrent(&dummy));
 
     if ((ret = ff_scale_eval_dimensions(s,
                                         s->w_expr, s->h_expr,
@@ -339,7 +335,7 @@ fail:
     return ret;
 }
 
-static int call_resize_kernel(CUDAScaleContext *s, CUfunction func, CUtexref tex, int channels,
+static int call_resize_kernel(CUDAScaleContext *ctx, CUfunction func, CUtexref tex, int channels,
                               uint8_t *src_dptr, int src_width, int src_height, int src_pitch,
                               uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch,
                               int pixel_size)
@@ -358,8 +354,9 @@ static int call_resize_kernel(CUDAScaleContext *s, CUfunction func, CUtexref tex
         desc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
     }
 
-    cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size);
-    cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL);
+    CHECK_CU(cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size));
+    CHECK_CU(cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
+                            BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL));
 
     return 0;
 }
@@ -470,7 +467,6 @@ static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in)
     AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
 
     AVFrame *out = NULL;
-    CUresult err;
     CUcontext dummy;
     int ret = 0;
 
@@ -480,15 +476,13 @@ static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in)
         goto fail;
     }
 
-    err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(cuCtxPushCurrent(device_hwctx->cuda_ctx));
+    if (ret < 0)
         goto fail;
-    }
 
     ret = cudascale_scale(ctx, out, in);
 
-    cuCtxPopCurrent(&dummy);
+    CHECK_CU(cuCtxPopCurrent(&dummy));
     if (ret < 0)
         goto fail;
 
diff --git a/libavfilter/vf_scale_npp.c b/libavfilter/vf_scale_npp.c
index 8a277ce8e1..a3e085764a 100644
--- a/libavfilter/vf_scale_npp.c
+++ b/libavfilter/vf_scale_npp.c
@@ -29,6 +29,7 @@
 #include "libavutil/common.h"
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -39,6 +40,8 @@
 #include "scale.h"
 #include "video.h"
 
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x)
+
 static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_YUV420P,
     AV_PIX_FMT_NV12,
@@ -498,7 +501,6 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in)
     AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
 
     AVFrame *out = NULL;
-    CUresult err;
     CUcontext dummy;
     int ret = 0;
 
@@ -511,15 +513,13 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in)
         goto fail;
     }
 
-    err = device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx));
+    if (ret < 0)
         goto fail;
-    }
 
     ret = nppscale_scale(ctx, out, in);
 
-    device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy);
+    CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy));
     if (ret < 0)
         goto fail;
 
diff --git a/libavfilter/vf_thumbnail_cuda.c b/libavfilter/vf_thumbnail_cuda.c
index 53df7e0bf7..22691e156f 100644
--- a/libavfilter/vf_thumbnail_cuda.c
+++ b/libavfilter/vf_thumbnail_cuda.c
@@ -24,12 +24,15 @@
 
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_cuda.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 
 #include "avfilter.h"
 #include "internal.h"
 
+#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x)
+
 #define HIST_SIZE (3*256)
 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
 #define BLOCKX 32
@@ -154,7 +157,7 @@ static AVFrame *get_best_frame(AVFilterContext *ctx)
     return picref;
 }
 
-static int thumbnail_kernel(ThumbnailCudaContext *s, CUfunction func, CUtexref tex, int channels,
+static int thumbnail_kernel(ThumbnailCudaContext *ctx, CUfunction func, CUtexref tex, int channels,
     int *histogram, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, int pixel_size)
 {
     CUdeviceptr src_devptr = (CUdeviceptr)src_dptr;
@@ -171,8 +174,10 @@ static int thumbnail_kernel(ThumbnailCudaContext *s, CUfunction func, CUtexref t
         desc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
     }
 
-    cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch);
-    cuLaunchKernel(func, DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args, NULL);
+    CHECK_CU(cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch));
+    CHECK_CU(cuLaunchKernel(func,
+                            DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1,
+                            BLOCKX, BLOCKY, 1, 0, 0, args, NULL));
 
     return 0;
 }
@@ -235,7 +240,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     int *hist = s->frames[s->n].histogram;
     AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)s->hw_frames_ctx->data;
     AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx;
-    CUresult err;
     CUcontext dummy;
     CUDA_MEMCPY2D cpy = { 0 };
     int ret = 0;
@@ -243,11 +247,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     // keep a reference of each frame
     s->frames[s->n].buf = frame;
 
-    err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
-    if (err != CUDA_SUCCESS)
-        return AVERROR_UNKNOWN;
+    ret = CHECK_CU(cuCtxPushCurrent(device_hwctx->cuda_ctx));
+    if (ret < 0)
+        return ret;
 
-    cuMemsetD8(s->data, 0, HIST_SIZE * sizeof(int));
+    CHECK_CU(cuMemsetD8(s->data, 0, HIST_SIZE * sizeof(int)));
 
     thumbnail(ctx, (int*)s->data, frame);
 
@@ -260,11 +264,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     cpy.WidthInBytes = HIST_SIZE * sizeof(int);
     cpy.Height = 1;
 
-    err = cuMemcpy2D(&cpy);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
-        return AVERROR_UNKNOWN;
-    }
+    ret = CHECK_CU(cuMemcpy2D(&cpy));
+    if (ret < 0)
+        return ret;
 
     if (hw_frames_ctx->sw_format == AV_PIX_FMT_NV12 || hw_frames_ctx->sw_format == AV_PIX_FMT_YUV420P ||
         hw_frames_ctx->sw_format == AV_PIX_FMT_P010LE || hw_frames_ctx->sw_format == AV_PIX_FMT_P016LE)
@@ -274,7 +276,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
             hist[i] = 4 * hist[i];
     }
 
-    cuCtxPopCurrent(&dummy);
+    CHECK_CU(cuCtxPopCurrent(&dummy));
     if (ret < 0)
         return ret;
 
@@ -292,12 +294,12 @@ static av_cold void uninit(AVFilterContext *ctx)
     ThumbnailCudaContext *s = ctx->priv;
 
     if (s->data) {
-        cuMemFree(s->data);
+        CHECK_CU(cuMemFree(s->data));
         s->data = 0;
     }
 
     if (s->cu_module) {
-        cuModuleUnload(s->cu_module);
+        CHECK_CU(cuModuleUnload(s->cu_module));
         s->cu_module = NULL;
     }
 
@@ -340,49 +342,43 @@ static int config_props(AVFilterLink *inlink)
     AVHWFramesContext     *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
     AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx;
     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
-    CUresult err;
+    int ret;
 
     extern char vf_thumbnail_cuda_ptx[];
 
-    err = cuCtxPushCurrent(cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error pushing cuda context\n");
-        return AVERROR_UNKNOWN;
-    }
+    ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+    if (ret < 0)
+        return ret;
 
-    err = cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error loading module data\n");
-        return AVERROR_UNKNOWN;
-    }
+    ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx));
+    if (ret < 0)
+        return ret;
 
-    cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar");
-    cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2");
-    cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort");
-    cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2");
+    CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar"));
+    CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2"));
+    CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort"));
+    CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2"));
 
-    cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex");
-    cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex");
-    cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex");
-    cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex");
+    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex"));
+    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex"));
+    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex"));
+    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"));
 
-    cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER);
+    CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER));
+    CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER));
+    CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER));
+    CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER));
 
-    cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR);
+    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR));
+    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR));
+    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR));
+    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR));
 
-    err = cuMemAlloc(&s->data, HIST_SIZE * sizeof(int));
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error allocating cuda memory\n");
-        return AVERROR_UNKNOWN;
-    }
+    ret = CHECK_CU(cuMemAlloc(&s->data, HIST_SIZE * sizeof(int)));
+    if (ret < 0)
+        return ret;
 
-    cuCtxPopCurrent(&dummy);
+    CHECK_CU(cuCtxPopCurrent(&dummy));
 
     s->hw_frames_ctx = ctx->inputs[0]->hw_frames_ctx;
 
diff --git a/libavfilter/vf_transpose_npp.c b/libavfilter/vf_transpose_npp.c
index 1b3a5c0c69..3ea031667c 100644
--- a/libavfilter/vf_transpose_npp.c
+++ b/libavfilter/vf_transpose_npp.c
@@ -23,6 +23,7 @@
 #include "libavutil/common.h"
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -32,6 +33,8 @@
 #include "internal.h"
 #include "video.h"
 
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x)
+
 static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_YUV420P,
     AV_PIX_FMT_YUV444P
@@ -397,7 +400,6 @@ static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in)
     AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data;
     AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
     AVFrame *out = NULL;
-    CUresult err;
     CUcontext dummy;
     int ret = 0;
 
@@ -410,15 +412,13 @@ static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in)
         goto fail;
     }
 
-    err = device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx));
+    if (ret < 0)
         goto fail;
-    }
 
     ret = npptranspose_filter(ctx, out, in);
 
-    device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy);
+    CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy));
     if (ret < 0)
         goto fail;
 
diff --git a/libavfilter/vf_yadif_cuda.c b/libavfilter/vf_yadif_cuda.c
index be22344d9d..85e1aac5eb 100644
--- a/libavfilter/vf_yadif_cuda.c
+++ b/libavfilter/vf_yadif_cuda.c
@@ -21,6 +21,7 @@
 #include <cuda.h>
 #include "libavutil/avassert.h"
 #include "libavutil/hwcontext_cuda.h"
+#include "libavutil/cuda_check.h"
 #include "internal.h"
 #include "yadif.h"
 
@@ -48,28 +49,7 @@ typedef struct DeintCUDAContext {
 #define BLOCKX 32
 #define BLOCKY 16
 
-static int check_cu(AVFilterContext *avctx, CUresult err, const char *func)
-{
-    const char *err_name;
-    const char *err_string;
-
-    av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
-
-    if (err == CUDA_SUCCESS)
-        return 0;
-
-    cuGetErrorName(err, &err_name);
-    cuGetErrorString(err, &err_string);
-
-    av_log(avctx, AV_LOG_ERROR, "%s failed", func);
-    if (err_name && err_string)
-        av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
-    av_log(avctx, AV_LOG_ERROR, "\n");
-
-    return AVERROR_EXTERNAL;
-}
-
-#define CHECK_CU(x) check_cu(ctx, (x), #x)
+#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x)
 
 static CUresult call_kernel(AVFilterContext *ctx, CUfunction func,
                             CUdeviceptr prev, CUdeviceptr cur, CUdeviceptr next,
@@ -85,7 +65,7 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func,
 {
     DeintCUDAContext *s = ctx->priv;
     CUtexObject tex_prev = 0, tex_cur = 0, tex_next = 0;
-    CUresult err;
+    int ret;
     int skip_spatial_check = s->yadif.mode&2;
 
     void *args[] = { &dst, &tex_prev, &tex_cur, &tex_next,
@@ -108,24 +88,21 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func,
     };
 
     res_desc.res.pitch2D.devPtr = (CUdeviceptr)prev;
-    err = CHECK_CU(cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL));
-    if (err != CUDA_SUCCESS) {
+    ret = CHECK_CU(cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL));
+    if (ret < 0)
         goto exit;
-    }
 
     res_desc.res.pitch2D.devPtr = (CUdeviceptr)cur;
-    err = CHECK_CU(cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL));
-    if (err != CUDA_SUCCESS) {
+    ret = CHECK_CU(cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL));
+    if (ret < 0)
         goto exit;
-    }
 
     res_desc.res.pitch2D.devPtr = (CUdeviceptr)next;
-    err = CHECK_CU(cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL));
-    if (err != CUDA_SUCCESS) {
+    ret = CHECK_CU(cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL));
+    if (ret < 0)
         goto exit;
-    }
 
-    err = CHECK_CU(cuLaunchKernel(func,
+    ret = CHECK_CU(cuLaunchKernel(func,
                                   DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
                                   BLOCKX, BLOCKY, 1,
                                   0, s->stream, args, NULL));
@@ -138,7 +115,7 @@ exit:
     if (tex_next)
         CHECK_CU(cuTexObjectDestroy(tex_next));
 
-    return err;
+    return ret;
 }
 
 static void filter(AVFilterContext *ctx, AVFrame *dst,
@@ -147,13 +124,11 @@ static void filter(AVFilterContext *ctx, AVFrame *dst,
     DeintCUDAContext *s = ctx->priv;
     YADIFContext *y = &s->yadif;
     CUcontext dummy;
-    CUresult err;
-    int i;
+    int i, ret;
 
-    err = CHECK_CU(cuCtxPushCurrent(s->cu_ctx));
-    if (err != CUDA_SUCCESS) {
-        goto exit;
-    }
+    ret = CHECK_CU(cuCtxPushCurrent(s->cu_ctx));
+    if (ret < 0)
+        return;
 
     for (i = 0; i < y->csp->nb_components; i++) {
         CUfunction func;
@@ -204,10 +179,7 @@ static void filter(AVFilterContext *ctx, AVFrame *dst,
                     parity, tff);
     }
 
-    err = CHECK_CU(cuStreamSynchronize(s->stream));
-    if (err != CUDA_SUCCESS) {
-        goto exit;
-    }
+    CHECK_CU(cuStreamSynchronize(s->stream));
 
 exit:
     CHECK_CU(cuCtxPopCurrent(&dummy));
@@ -283,7 +255,6 @@ static int config_output(AVFilterLink *link)
     YADIFContext *y = &s->yadif;
     int ret = 0;
     CUcontext dummy;
-    CUresult err;
 
     av_assert0(s->input_frames);
     s->device_ref = av_buffer_ref(s->input_frames->device_ref);
@@ -342,41 +313,29 @@ static int config_output(AVFilterLink *link)
     y->csp = av_pix_fmt_desc_get(output_frames->sw_format);
     y->filter = filter;
 
-    err = CHECK_CU(cuCtxPushCurrent(s->cu_ctx));
-    if (err != CUDA_SUCCESS) {
-        ret = AVERROR_EXTERNAL;
+    ret = CHECK_CU(cuCtxPushCurrent(s->cu_ctx));
+    if (ret < 0)
         goto exit;
-    }
 
-    err = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx));
-    if (err != CUDA_SUCCESS) {
-        ret = AVERROR_INVALIDDATA;
+    ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx));
+    if (ret < 0)
         goto exit;
-    }
 
-    err = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar"));
-    if (err != CUDA_SUCCESS) {
-        ret = AVERROR_INVALIDDATA;
+    ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar"));
+    if (ret < 0)
         goto exit;
-    }
 
-    err = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2"));
-    if (err != CUDA_SUCCESS) {
-        ret = AVERROR_INVALIDDATA;
+    ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2"));
+    if (ret < 0)
         goto exit;
-    }
 
-    err= CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort"));
-    if (err != CUDA_SUCCESS) {
-        ret = AVERROR_INVALIDDATA;
+    ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort"));
+    if (ret < 0)
         goto exit;
-    }
 
-    err = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2"));
-    if (err != CUDA_SUCCESS) {
-        ret = AVERROR_INVALIDDATA;
+    ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2"));
+    if (ret < 0)
         goto exit;
-    }
 
 exit:
     CHECK_CU(cuCtxPopCurrent(&dummy));
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 9ed24cfc82..b772111695 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -157,7 +157,7 @@ OBJS = adler32.o                                                        \
        xtea.o                                                           \
        tea.o                                                            \
 
-OBJS-$(CONFIG_CUDA)                     += hwcontext_cuda.o
+OBJS-$(CONFIG_CUDA)                     += hwcontext_cuda.o cuda_check.o
 OBJS-$(CONFIG_D3D11VA)                  += hwcontext_d3d11va.o
 OBJS-$(CONFIG_DXVA2)                    += hwcontext_dxva2.o
 OBJS-$(CONFIG_LIBDRM)                   += hwcontext_drm.o
@@ -175,7 +175,8 @@ OBJS += $(COMPAT_OBJS:%=../compat/%)
 SLIBOBJS-$(HAVE_GNU_WINDRES)            += avutilres.o
 
 SKIPHEADERS-$(HAVE_CUDA_H)             += hwcontext_cuda.h
-SKIPHEADERS-$(CONFIG_CUDA)             += hwcontext_cuda_internal.h
+SKIPHEADERS-$(CONFIG_CUDA)             += hwcontext_cuda_internal.h     \
+                                          cuda_check.h
 SKIPHEADERS-$(CONFIG_D3D11VA)          += hwcontext_d3d11va.h
 SKIPHEADERS-$(CONFIG_DXVA2)            += hwcontext_dxva2.h
 SKIPHEADERS-$(CONFIG_QSV)              += hwcontext_qsv.h
diff --git a/libavutil/cuda_check.c b/libavutil/cuda_check.c
new file mode 100644
index 0000000000..95c0256d12
--- /dev/null
+++ b/libavutil/cuda_check.c
@@ -0,0 +1,45 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "compat/cuda/dynlink_loader.h"
+#include "libavutil/cuda_check.h"
+
+int ff_cuda_check(void *avctx,
+                  void *cuGetErrorName_fn,
+                  void *cuGetErrorString_fn,
+                  CUresult err, const char *func)
+{
+    const char *err_name;
+    const char *err_string;
+
+    av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
+
+    if (err == CUDA_SUCCESS)
+        return 0;
+
+    ((tcuGetErrorName *)cuGetErrorName_fn)(err, &err_name);
+    ((tcuGetErrorString *)cuGetErrorString_fn)(err, &err_string);
+
+    av_log(avctx, AV_LOG_ERROR, "%s failed", func);
+    if (err_name && err_string)
+        av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
+    av_log(avctx, AV_LOG_ERROR, "\n");
+
+    return AVERROR_EXTERNAL;
+}
+
diff --git a/libavutil/cuda_check.h b/libavutil/cuda_check.h
new file mode 100644
index 0000000000..0d45538c2f
--- /dev/null
+++ b/libavutil/cuda_check.h
@@ -0,0 +1,43 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#ifndef FF_CUDA_CHECK_H
+#define FF_CUDA_CHECK_H
+
+/**
+ * Wrap a CUDA function call and print error information if it fails.
+ */
+
+int ff_cuda_check(void *avctx,
+                  void *cuGetErrorName_fn, void *cuGetErrorString_fn,
+                  CUresult err, const char *func);
+
+/**
+ * Convenience wrapper for ff_cuda_check when directly linking libcuda.
+ */
+
+#define FF_CUDA_CHECK(avclass, x) ff_cuda_check(avclass, cuGetErrorName, cuGetErrorString, (x), #x)
+
+/**
+ * Convenience wrapper for ff_cuda_check when dynamically loading cuda symbols.
+ */
+
+#define FF_CUDA_CHECK_DL(avclass, cudl, x) ff_cuda_check(avclass, cudl->cuGetErrorName, cudl->cuGetErrorString, (x), #x)
+
+#endif /* FF_CUDA_CHECK_H */
diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
index 3b1d53e799..540a7610ef 100644
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c
@@ -21,6 +21,7 @@
 #include "hwcontext.h"
 #include "hwcontext_internal.h"
 #include "hwcontext_cuda_internal.h"
+#include "cuda_check.h"
 #include "mem.h"
 #include "pixdesc.h"
 #include "pixfmt.h"
@@ -43,6 +44,8 @@ static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_0BGR32,
 };
 
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x)
+
 static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
                                        const void *hwconfig,
                                        AVHWFramesConstraints *constraints)
@@ -70,48 +73,48 @@ static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
 
 static void cuda_buffer_free(void *opaque, uint8_t *data)
 {
-    AVHWFramesContext *ctx = opaque;
-    AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
-    CudaFunctions *cu = hwctx->internal->cuda_dl;
+    AVHWFramesContext        *ctx = opaque;
+    AVHWDeviceContext *device_ctx = ctx->device_ctx;
+    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
+    CudaFunctions             *cu = hwctx->internal->cuda_dl;
 
     CUcontext dummy;
 
-    cu->cuCtxPushCurrent(hwctx->cuda_ctx);
+    CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
 
-    cu->cuMemFree((CUdeviceptr)data);
+    CHECK_CU(cu->cuMemFree((CUdeviceptr)data));
 
-    cu->cuCtxPopCurrent(&dummy);
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
 }
 
 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
 {
-    AVHWFramesContext     *ctx = opaque;
-    AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
-    CudaFunctions          *cu = hwctx->internal->cuda_dl;
+    AVHWFramesContext        *ctx = opaque;
+    AVHWDeviceContext *device_ctx = ctx->device_ctx;
+    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
+    CudaFunctions             *cu = hwctx->internal->cuda_dl;
 
     AVBufferRef *ret = NULL;
     CUcontext dummy = NULL;
     CUdeviceptr data;
-    CUresult err;
+    int err;
 
-    err = cu->cuCtxPushCurrent(hwctx->cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
+    err = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
+    if (err < 0)
         return NULL;
-    }
 
-    err = cu->cuMemAlloc(&data, size);
-    if (err != CUDA_SUCCESS)
+    err = CHECK_CU(cu->cuMemAlloc(&data, size));
+    if (err < 0)
         goto fail;
 
     ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
     if (!ret) {
-        cu->cuMemFree(data);
+        CHECK_CU(cu->cuMemFree(data));
         goto fail;
     }
 
 fail:
-    cu->cuCtxPopCurrent(&dummy);
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
     return ret;
 }
 
@@ -194,17 +197,17 @@ static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
 static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
                                    const AVFrame *src)
 {
-    CUDAFramesContext           *priv = ctx->internal->priv;
-    AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
-    CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;
+    CUDAFramesContext       *priv = ctx->internal->priv;
+    AVHWDeviceContext *device_ctx = ctx->device_ctx;
+    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
+    CudaFunctions             *cu = hwctx->internal->cuda_dl;
 
     CUcontext dummy;
-    CUresult err;
-    int i;
+    int i, ret;
 
-    err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
-    if (err != CUDA_SUCCESS)
-        return AVERROR_UNKNOWN;
+    ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
+    if (ret < 0)
+        return ret;
 
     for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
         CUDA_MEMCPY2D cpy = {
@@ -218,20 +221,17 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
             .Height        = src->height >> (i ? priv->shift_height : 0),
         };
 
-        err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream);
-        if (err != CUDA_SUCCESS) {
-            av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
-            return AVERROR_UNKNOWN;
-        }
+        ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
+        if (ret < 0)
+            goto exit;
     }
 
-    err = cu->cuStreamSynchronize(device_hwctx->stream);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n");
-        return AVERROR_UNKNOWN;
-    }
+    ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream));
+    if (ret < 0)
+        goto exit;
 
-    cu->cuCtxPopCurrent(&dummy);
+exit:
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
 
     return 0;
 }
@@ -239,17 +239,17 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
 static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
                                  const AVFrame *src)
 {
-    CUDAFramesContext           *priv = ctx->internal->priv;
-    AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
-    CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;
+    CUDAFramesContext       *priv = ctx->internal->priv;
+    AVHWDeviceContext *device_ctx = ctx->device_ctx;
+    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
+    CudaFunctions             *cu = hwctx->internal->cuda_dl;
 
     CUcontext dummy;
-    CUresult err;
-    int i;
+    int i, ret;
 
-    err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
-    if (err != CUDA_SUCCESS)
-        return AVERROR_UNKNOWN;
+    ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
+    if (ret < 0)
+        return ret;
 
     for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
         CUDA_MEMCPY2D cpy = {
@@ -263,31 +263,29 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
             .Height        = src->height >> (i ? priv->shift_height : 0),
         };
 
-        err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream);
-        if (err != CUDA_SUCCESS) {
-            av_log(ctx, AV_LOG_ERROR, "Error transferring the data to the CUDA frame\n");
-            return AVERROR_UNKNOWN;
-        }
+        ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
+        if (ret < 0)
+            goto exit;
     }
 
-    err = cu->cuStreamSynchronize(device_hwctx->stream);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n");
-        return AVERROR_UNKNOWN;
-    }
+    ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream));
+    if (ret < 0)
+        goto exit;
 
-    cu->cuCtxPopCurrent(&dummy);
+exit:
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
 
     return 0;
 }
 
-static void cuda_device_uninit(AVHWDeviceContext *ctx)
+static void cuda_device_uninit(AVHWDeviceContext *device_ctx)
 {
-    AVCUDADeviceContext *hwctx = ctx->hwctx;
+    AVCUDADeviceContext *hwctx = device_ctx->hwctx;
 
     if (hwctx->internal) {
+        CudaFunctions *cu = hwctx->internal->cuda_dl;
         if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
-            hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx);
+            CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
             hwctx->cuda_ctx = NULL;
         }
         cuda_free_functions(&hwctx->internal->cuda_dl);
@@ -322,53 +320,47 @@ error:
     return ret;
 }
 
-static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
+static int cuda_device_create(AVHWDeviceContext *device_ctx,
+                              const char *device,
                               AVDictionary *opts, int flags)
 {
-    AVCUDADeviceContext *hwctx = ctx->hwctx;
+    AVCUDADeviceContext *hwctx = device_ctx->hwctx;
     CudaFunctions *cu;
     CUdevice cu_device;
     CUcontext dummy;
-    CUresult err;
-    int device_idx = 0;
+    int ret, device_idx = 0;
 
     if (device)
         device_idx = strtol(device, NULL, 0);
 
-    if (cuda_device_init(ctx) < 0)
+    if (cuda_device_init(device_ctx) < 0)
         goto error;
 
     cu = hwctx->internal->cuda_dl;
 
-    err = cu->cuInit(0);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
+    ret = CHECK_CU(cu->cuInit(0));
+    if (ret < 0)
         goto error;
-    }
 
-    err = cu->cuDeviceGet(&cu_device, device_idx);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
+    ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx));
+    if (ret < 0)
         goto error;
-    }
 
-    err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
+    ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device));
+    if (ret < 0)
         goto error;
-    }
 
     // Setting stream to NULL will make functions automatically use the default CUstream
     hwctx->stream = NULL;
 
-    cu->cuCtxPopCurrent(&dummy);
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
 
     hwctx->internal->is_allocated = 1;
 
     return 0;
 
 error:
-    cuda_device_uninit(ctx);
+    cuda_device_uninit(device_ctx);
     return AVERROR_UNKNOWN;
 }