diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 05be02ec7d..716f26d191 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -124,7 +124,7 @@ OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ motion_est.o ratecontrol.o \ mpegvideoencdsp.o OBJS-$(CONFIG_MSS34DSP) += mss34dsp.o -OBJS-$(CONFIG_NVENC) += nvenc.o +OBJS-$(CONFIG_NVENC) += nvenc.o cuda_check.o OBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o OBJS-$(CONFIG_QPELDSP) += qpeldsp.o OBJS-$(CONFIG_QSV) += qsv.o @@ -346,7 +346,7 @@ OBJS-$(CONFIG_H264_DECODER) += h264dec.o h264_cabac.o h264_cavlc.o \ h264_refs.o h264_sei.o \ h264_slice.o h264data.o OBJS-$(CONFIG_H264_AMF_ENCODER) += amfenc_h264.o -OBJS-$(CONFIG_H264_CUVID_DECODER) += cuviddec.o +OBJS-$(CONFIG_H264_CUVID_DECODER) += cuviddec.o cuda_check.o OBJS-$(CONFIG_H264_MEDIACODEC_DECODER) += mediacodecdec.o OBJS-$(CONFIG_H264_MMAL_DECODER) += mmaldec.o OBJS-$(CONFIG_H264_NVENC_ENCODER) += nvenc_h264.o @@ -852,7 +852,7 @@ OBJS-$(CONFIG_ADPCM_YAMAHA_ENCODER) += adpcmenc.o adpcm_data.o # hardware accelerators OBJS-$(CONFIG_D3D11VA) += dxva2.o OBJS-$(CONFIG_DXVA2) += dxva2.o -OBJS-$(CONFIG_NVDEC) += nvdec.o +OBJS-$(CONFIG_NVDEC) += nvdec.o cuda_check.o OBJS-$(CONFIG_VAAPI) += vaapi_decode.o OBJS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.o OBJS-$(CONFIG_VDPAU) += vdpau.o diff --git a/libavcodec/cuda_check.c b/libavcodec/cuda_check.c new file mode 100644 index 0000000000..a1ebb88882 --- /dev/null +++ b/libavcodec/cuda_check.c @@ -0,0 +1 @@ +#include "libavutil/cuda_check.c" diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c index f21273c07e..03589367ce 100644 --- a/libavcodec/cuviddec.c +++ b/libavcodec/cuviddec.c @@ -25,6 +25,7 @@ #include "libavutil/mathematics.h" #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/cuda_check.h" #include "libavutil/fifo.h" #include "libavutil/log.h" #include "libavutil/opt.h" @@ -95,29 +96,7 @@ typedef struct CuvidParsedFrame int is_deinterlacing; } CuvidParsedFrame; -static int check_cu(AVCodecContext *avctx, CUresult err, const char *func) -{ - CuvidContext *ctx = avctx->priv_data; - const char *err_name; - const char *err_string; - - av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func); - - if (err == CUDA_SUCCESS) - return 0; - - ctx->cudl->cuGetErrorName(err, &err_name); - ctx->cudl->cuGetErrorString(err, &err_string); - - av_log(avctx, AV_LOG_ERROR, "%s failed", func); - if (err_name && err_string) - av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string); - av_log(avctx, AV_LOG_ERROR, "\n"); - - return AVERROR_EXTERNAL; -} - -#define CHECK_CU(x) check_cu(avctx, (x), #x) +#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x) static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format) { diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index 0426c9b319..c7d5379770 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -26,6 +26,7 @@ #include "libavutil/error.h" #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/cuda_check.h" #include "libavutil/pixdesc.h" #include "libavutil/pixfmt.h" @@ -50,6 +51,8 @@ typedef struct NVDECFramePool { unsigned int nb_allocated; } NVDECFramePool; +#define CHECK_CU(x) FF_CUDA_CHECK_DL(logctx, decoder->cudl, x) + static int map_avcodec_id(enum AVCodecID id) { switch (id) { @@ -86,7 +89,7 @@ static int map_chroma_format(enum AVPixelFormat pix_fmt) static int nvdec_test_capabilities(NVDECDecoder *decoder, CUVIDDECODECREATEINFO *params, void *logctx) { - CUresult err; + int ret; CUVIDDECODECAPS caps = { 0 }; caps.eCodecType = params->CodecType; @@ -105,11 +108,9 @@ static int nvdec_test_capabilities(NVDECDecoder *decoder, return 0; } - err = decoder->cvdl->cuvidGetDecoderCaps(&caps); - if (err != CUDA_SUCCESS) { - av_log(logctx, AV_LOG_ERROR, "Failed querying decoder capabilities\n"); - return AVERROR_UNKNOWN; - } + ret = CHECK_CU(decoder->cvdl->cuvidGetDecoderCaps(&caps)); + if (ret < 0) + return ret; av_log(logctx, AV_LOG_VERBOSE, "NVDEC capabilities:\n"); av_log(logctx, AV_LOG_VERBOSE, "format supported: %s, max_mb_count: %d\n", @@ -150,10 +151,11 @@ static void nvdec_decoder_free(void *opaque, uint8_t *data) NVDECDecoder *decoder = (NVDECDecoder*)data; if (decoder->decoder) { + void *logctx = decoder->hw_device_ref->data; CUcontext dummy; - decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); - decoder->cvdl->cuvidDestroyDecoder(decoder->decoder); - decoder->cudl->cuCtxPopCurrent(&dummy); + CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx)); + CHECK_CU(decoder->cvdl->cuvidDestroyDecoder(decoder->decoder)); + CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy)); } av_buffer_unref(&decoder->hw_device_ref); @@ -173,7 +175,6 @@ static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref, NVDECDecoder *decoder; CUcontext dummy; - CUresult err; int ret; decoder = av_mallocz(sizeof(*decoder)); @@ -202,25 +203,21 @@ static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref, goto fail; } - err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); - if (err != CUDA_SUCCESS) { - ret = AVERROR_UNKNOWN; + ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx)); + if (ret < 0) goto fail; - } ret = nvdec_test_capabilities(decoder, params, logctx); if (ret < 0) { - decoder->cudl->cuCtxPopCurrent(&dummy); + CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy)); goto fail; } - err = decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params); + ret = CHECK_CU(decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params)); - decoder->cudl->cuCtxPopCurrent(&dummy); + CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy)); - if (err != CUDA_SUCCESS) { - av_log(logctx, AV_LOG_ERROR, "Error creating a NVDEC decoder: %d\n", err); - ret = AVERROR_UNKNOWN; + if (ret < 0) { goto fail; } @@ -364,21 +361,18 @@ static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data) { NVDECFrame *unmap_data = (NVDECFrame*)data; NVDECDecoder *decoder = (NVDECDecoder*)unmap_data->decoder_ref->data; + void *logctx = decoder->hw_device_ref->data; CUdeviceptr devptr = (CUdeviceptr)opaque; - CUresult err; + int ret; CUcontext dummy; - err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); - if (err != CUDA_SUCCESS) { - av_log(NULL, AV_LOG_ERROR, "cuCtxPushCurrent failed\n"); + ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx)); + if (ret < 0) goto finish; - } - err = decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr); - if (err != CUDA_SUCCESS) - av_log(NULL, AV_LOG_ERROR, "cuvidUnmapVideoFrame failed\n"); + CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr)); - decoder->cudl->cuCtxPopCurrent(&dummy); + CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy)); finish: av_buffer_unref(&unmap_data->idx_ref); @@ -395,7 +389,6 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) CUVIDPROCPARAMS vpp = { 0 }; NVDECFrame *unmap_data = NULL; - CUresult err; CUcontext dummy; CUdeviceptr devptr; @@ -406,18 +399,15 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) vpp.progressive_frame = 1; vpp.output_stream = decoder->stream; - err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); - if (err != CUDA_SUCCESS) - return AVERROR_UNKNOWN; + ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx)); + if (ret < 0) + return ret; - err = decoder->cvdl->cuvidMapVideoFrame(decoder->decoder, cf->idx, &devptr, - &pitch, &vpp); - if (err != CUDA_SUCCESS) { - av_log(logctx, AV_LOG_ERROR, "Error mapping a picture with CUVID: %d\n", - err); - ret = AVERROR_UNKNOWN; + ret = CHECK_CU(decoder->cvdl->cuvidMapVideoFrame(decoder->decoder, + cf->idx, &devptr, + &pitch, &vpp)); + if (ret < 0) goto finish; - } unmap_data = av_mallocz(sizeof(*unmap_data)); if (!unmap_data) { @@ -447,14 +437,14 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) copy_fail: if (!frame->buf[1]) { - decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr); + CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr)); av_freep(&unmap_data); } else { av_buffer_unref(&frame->buf[1]); } finish: - decoder->cudl->cuCtxPopCurrent(&dummy); + CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy)); return ret; } @@ -504,9 +494,9 @@ int ff_nvdec_end_frame(AVCodecContext *avctx) { NVDECContext *ctx = avctx->internal->hwaccel_priv_data; NVDECDecoder *decoder = (NVDECDecoder*)ctx->decoder_ref->data; + void *logctx = avctx; CUVIDPICPARAMS *pp = &ctx->pic_params; - CUresult err; CUcontext dummy; int ret = 0; @@ -516,20 +506,16 @@ int ff_nvdec_end_frame(AVCodecContext *avctx) pp->nNumSlices = ctx->nb_slices; pp->pSliceDataOffsets = ctx->slice_offsets; - err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx); - if (err != CUDA_SUCCESS) - return AVERROR_UNKNOWN; + ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx)); + if (ret < 0) + return ret; - err = decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params); - if (err != CUDA_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Error decoding a picture with NVDEC: %d\n", - err); - ret = AVERROR_UNKNOWN; + ret = CHECK_CU(decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params)); + if (ret < 0) goto finish; - } finish: - decoder->cudl->cuCtxPopCurrent(&dummy); + CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy)); return ret; } diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c index e180d7b993..97497be0bc 100644 --- a/libavcodec/nvenc.c +++ b/libavcodec/nvenc.c @@ -25,12 +25,15 @@ #include "libavutil/hwcontext_cuda.h" #include "libavutil/hwcontext.h" +#include "libavutil/cuda_check.h" #include "libavutil/imgutils.h" #include "libavutil/avassert.h" #include "libavutil/mem.h" #include "libavutil/pixdesc.h" #include "internal.h" +#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, dl_fn->cuda_dl, x) + #define NVENC_CAP 0x30 #define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \ rc == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || \ @@ -183,37 +186,23 @@ static int nvenc_push_context(AVCodecContext *avctx) { NvencContext *ctx = avctx->priv_data; NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; - CUresult cu_res; if (ctx->d3d11_device) return 0; - cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context); - if (cu_res != CUDA_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n"); - return AVERROR_EXTERNAL; - } - - return 0; + return CHECK_CU(dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context)); } static int nvenc_pop_context(AVCodecContext *avctx) { NvencContext *ctx = avctx->priv_data; NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; - CUresult cu_res; CUcontext dummy; if (ctx->d3d11_device) return 0; - cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy); - if (cu_res != CUDA_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n"); - return AVERROR_EXTERNAL; - } - - return 0; + return CHECK_CU(dl_fn->cuda_dl->cuCtxPopCurrent(&dummy)); } static av_cold int nvenc_open_session(AVCodecContext *avctx) @@ -406,32 +395,23 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx) NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; char name[128] = { 0}; int major, minor, ret; - CUresult cu_res; CUdevice cu_device; int loglevel = AV_LOG_VERBOSE; if (ctx->device == LIST_DEVICES) loglevel = AV_LOG_INFO; - cu_res = dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx); - if (cu_res != CUDA_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, - "Cannot access the CUDA device %d\n", - idx); - return -1; - } + ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx)); + if (ret < 0) + return ret; - cu_res = dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device); - if (cu_res != CUDA_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "cuDeviceGetName failed on device %d\n", idx); - return -1; - } + ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device)); + if (ret < 0) + return ret; - cu_res = dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device); - if (cu_res != CUDA_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "cuDeviceComputeCapability failed on device %d\n", idx); - return -1; - } + ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device)); + if (ret < 0) + return ret; av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor); if (((major << 4) | minor) < NVENC_CAP) { @@ -442,11 +422,9 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx) if (ctx->device != idx && ctx->device != ANY_DEVICE) return -1; - cu_res = dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device); - if (cu_res != CUDA_SUCCESS) { - av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res); + ret = CHECK_CU(dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device)); + if (ret < 0) goto fail; - } ctx->cu_context = ctx->cu_context_internal; @@ -477,7 +455,7 @@ fail3: return ret; fail2: - dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal); + CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal)); ctx->cu_context_internal = NULL; fail: @@ -555,17 +533,11 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx) } else { int i, nb_devices = 0; - if ((dl_fn->cuda_dl->cuInit(0)) != CUDA_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, - "Cannot init CUDA\n"); + if (CHECK_CU(dl_fn->cuda_dl->cuInit(0)) < 0) return AVERROR_UNKNOWN; - } - if ((dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) != CUDA_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, - "Cannot enumerate the CUDA devices\n"); + if (CHECK_CU(dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) < 0) return AVERROR_UNKNOWN; - } if (!nb_devices) { av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n"); @@ -1460,7 +1432,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) ctx->nvencoder = NULL; if (ctx->cu_context_internal) - dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal); + CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal)); ctx->cu_context = ctx->cu_context_internal = NULL; #if CONFIG_D3D11VA diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 7c6fc836e5..a7ebd0221b 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -334,8 +334,9 @@ OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER) += vf_convolution_opencl.o opencl.o OBJS-$(CONFIG_ROTATE_FILTER) += vf_rotate.o OBJS-$(CONFIG_SAB_FILTER) += vf_sab.o OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale.o -OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.ptx.o -OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale.o +OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.ptx.o \ + cuda_check.o +OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale.o cuda_check.o OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale.o vaapi_vpp.o OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale.o @@ -376,7 +377,8 @@ OBJS-$(CONFIG_TBLEND_FILTER) += vf_blend.o framesync.o OBJS-$(CONFIG_TELECINE_FILTER) += vf_telecine.o OBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o framesync.o OBJS-$(CONFIG_THUMBNAIL_FILTER) += vf_thumbnail.o -OBJS-$(CONFIG_THUMBNAIL_CUDA_FILTER) += vf_thumbnail_cuda.o vf_thumbnail_cuda.ptx.o +OBJS-$(CONFIG_THUMBNAIL_CUDA_FILTER) += vf_thumbnail_cuda.o vf_thumbnail_cuda.ptx.o \ + cuda_check.o OBJS-$(CONFIG_TILE_FILTER) += vf_tile.o OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o OBJS-$(CONFIG_TLUT2_FILTER) += vf_lut2.o framesync.o @@ -386,7 +388,7 @@ OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace.o opencl/tonemap.o opencl/colorspace_common.o OBJS-$(CONFIG_TPAD_FILTER) += vf_tpad.o OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o -OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o +OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o cuda_check.o OBJS-$(CONFIG_TRIM_FILTER) += trim.o OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o @@ -410,7 +412,8 @@ OBJS-$(CONFIG_WEAVE_FILTER) += vf_weave.o OBJS-$(CONFIG_XBR_FILTER) += vf_xbr.o OBJS-$(CONFIG_XSTACK_FILTER) += vf_stack.o framesync.o OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o yadif_common.o -OBJS-$(CONFIG_YADIF_CUDA_FILTER) += vf_yadif_cuda.o vf_yadif_cuda.ptx.o yadif_common.o +OBJS-$(CONFIG_YADIF_CUDA_FILTER) += vf_yadif_cuda.o vf_yadif_cuda.ptx.o \ + yadif_common.o cuda_check.o OBJS-$(CONFIG_ZMQ_FILTER) += f_zmq.o OBJS-$(CONFIG_ZOOMPAN_FILTER) += vf_zoompan.o OBJS-$(CONFIG_ZSCALE_FILTER) += vf_zscale.o diff --git a/libavfilter/cuda_check.c b/libavfilter/cuda_check.c new file mode 100644 index 0000000000..a1ebb88882 --- /dev/null +++ b/libavfilter/cuda_check.c @@ -0,0 +1 @@ +#include "libavutil/cuda_check.c" diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c index 7b2b78c1ed..53b7aa9531 100644 --- a/libavfilter/vf_scale_cuda.c +++ b/libavfilter/vf_scale_cuda.c @@ -28,6 +28,7 @@ #include "libavutil/common.h" #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda.h" +#include "libavutil/cuda_check.h" #include "libavutil/internal.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" @@ -52,6 +53,8 @@ static const enum AVPixelFormat supported_formats[] = { #define BLOCKX 32 #define BLOCKY 16 +#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x) + typedef struct CUDAScaleContext { const AVClass *class; enum AVPixelFormat in_fmt; @@ -255,55 +258,48 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink) AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; - CUresult err; int w, h; int ret; extern char vf_scale_cuda_ptx[]; - err = cuCtxPushCurrent(cuda_ctx); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error pushing cuda context\n"); - ret = AVERROR_UNKNOWN; + ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); + if (ret < 0) goto fail; - } - err = cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error loading module data\n"); - ret = AVERROR_UNKNOWN; + ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx)); + if (ret < 0) goto fail; - } - cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar"); - cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2"); - cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4"); - cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort"); - cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2"); - cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4"); + CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4")); - cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex"); - cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex"); - cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex"); - cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex"); - cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"); - cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex"); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex")); - cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER)); - cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR)); - cuCtxPopCurrent(&dummy); + CHECK_CU(cuCtxPopCurrent(&dummy)); if ((ret = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, @@ -339,7 +335,7 @@ fail: return ret; } -static int call_resize_kernel(CUDAScaleContext *s, CUfunction func, CUtexref tex, int channels, +static int call_resize_kernel(CUDAScaleContext *ctx, CUfunction func, CUtexref tex, int channels, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, int pixel_size) @@ -358,8 +354,9 @@ static int call_resize_kernel(CUDAScaleContext *s, CUfunction func, CUtexref tex desc.Format = CU_AD_FORMAT_UNSIGNED_INT16; } - cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size); - cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL); + CHECK_CU(cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size)); + CHECK_CU(cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, + BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL)); return 0; } @@ -470,7 +467,6 @@ static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in) AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; AVFrame *out = NULL; - CUresult err; CUcontext dummy; int ret = 0; @@ -480,15 +476,13 @@ static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in) goto fail; } - err = cuCtxPushCurrent(device_hwctx->cuda_ctx); - if (err != CUDA_SUCCESS) { - ret = AVERROR_UNKNOWN; + ret = CHECK_CU(cuCtxPushCurrent(device_hwctx->cuda_ctx)); + if (ret < 0) goto fail; - } ret = cudascale_scale(ctx, out, in); - cuCtxPopCurrent(&dummy); + CHECK_CU(cuCtxPopCurrent(&dummy)); if (ret < 0) goto fail; diff --git a/libavfilter/vf_scale_npp.c b/libavfilter/vf_scale_npp.c index 8a277ce8e1..a3e085764a 100644 --- a/libavfilter/vf_scale_npp.c +++ b/libavfilter/vf_scale_npp.c @@ -29,6 +29,7 @@ #include "libavutil/common.h" #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/cuda_check.h" #include "libavutil/internal.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" @@ -39,6 +40,8 @@ #include "scale.h" #include "video.h" +#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x) + static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_NV12, @@ -498,7 +501,6 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in) AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; AVFrame *out = NULL; - CUresult err; CUcontext dummy; int ret = 0; @@ -511,15 +513,13 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in) goto fail; } - err = device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx); - if (err != CUDA_SUCCESS) { - ret = AVERROR_UNKNOWN; + ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx)); + if (ret < 0) goto fail; - } ret = nppscale_scale(ctx, out, in); - device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy); + CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy)); if (ret < 0) goto fail; diff --git a/libavfilter/vf_thumbnail_cuda.c b/libavfilter/vf_thumbnail_cuda.c index 53df7e0bf7..22691e156f 100644 --- a/libavfilter/vf_thumbnail_cuda.c +++ b/libavfilter/vf_thumbnail_cuda.c @@ -24,12 +24,15 @@ #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda.h" +#include "libavutil/cuda_check.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" #include "avfilter.h" #include "internal.h" +#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x) + #define HIST_SIZE (3*256) #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) #define BLOCKX 32 @@ -154,7 +157,7 @@ static AVFrame *get_best_frame(AVFilterContext *ctx) return picref; } -static int thumbnail_kernel(ThumbnailCudaContext *s, CUfunction func, CUtexref tex, int channels, +static int thumbnail_kernel(ThumbnailCudaContext *ctx, CUfunction func, CUtexref tex, int channels, int *histogram, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, int pixel_size) { CUdeviceptr src_devptr = (CUdeviceptr)src_dptr; @@ -171,8 +174,10 @@ static int thumbnail_kernel(ThumbnailCudaContext *s, CUfunction func, CUtexref t desc.Format = CU_AD_FORMAT_UNSIGNED_INT16; } - cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch); - cuLaunchKernel(func, DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args, NULL); + CHECK_CU(cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch)); + CHECK_CU(cuLaunchKernel(func, + DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1, + BLOCKX, BLOCKY, 1, 0, 0, args, NULL)); return 0; } @@ -235,7 +240,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) int *hist = s->frames[s->n].histogram; AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)s->hw_frames_ctx->data; AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx; - CUresult err; CUcontext dummy; CUDA_MEMCPY2D cpy = { 0 }; int ret = 0; @@ -243,11 +247,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) // keep a reference of each frame s->frames[s->n].buf = frame; - err = cuCtxPushCurrent(device_hwctx->cuda_ctx); - if (err != CUDA_SUCCESS) - return AVERROR_UNKNOWN; + ret = CHECK_CU(cuCtxPushCurrent(device_hwctx->cuda_ctx)); + if (ret < 0) + return ret; - cuMemsetD8(s->data, 0, HIST_SIZE * sizeof(int)); + CHECK_CU(cuMemsetD8(s->data, 0, HIST_SIZE * sizeof(int))); thumbnail(ctx, (int*)s->data, frame); @@ -260,11 +264,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) cpy.WidthInBytes = HIST_SIZE * sizeof(int); cpy.Height = 1; - err = cuMemcpy2D(&cpy); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); - return AVERROR_UNKNOWN; - } + ret = CHECK_CU(cuMemcpy2D(&cpy)); + if (ret < 0) + return ret; if (hw_frames_ctx->sw_format == AV_PIX_FMT_NV12 || hw_frames_ctx->sw_format == AV_PIX_FMT_YUV420P || hw_frames_ctx->sw_format == AV_PIX_FMT_P010LE || hw_frames_ctx->sw_format == AV_PIX_FMT_P016LE) @@ -274,7 +276,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) hist[i] = 4 * hist[i]; } - cuCtxPopCurrent(&dummy); + CHECK_CU(cuCtxPopCurrent(&dummy)); if (ret < 0) return ret; @@ -292,12 +294,12 @@ static av_cold void uninit(AVFilterContext *ctx) ThumbnailCudaContext *s = ctx->priv; if (s->data) { - cuMemFree(s->data); + CHECK_CU(cuMemFree(s->data)); s->data = 0; } if (s->cu_module) { - cuModuleUnload(s->cu_module); + CHECK_CU(cuModuleUnload(s->cu_module)); s->cu_module = NULL; } @@ -340,49 +342,43 @@ static int config_props(AVFilterLink *inlink) AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; - CUresult err; + int ret; extern char vf_thumbnail_cuda_ptx[]; - err = cuCtxPushCurrent(cuda_ctx); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error pushing cuda context\n"); - return AVERROR_UNKNOWN; - } + ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); + if (ret < 0) + return ret; - err = cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error loading module data\n"); - return AVERROR_UNKNOWN; - } + ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx)); + if (ret < 0) + return ret; - cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar"); - cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2"); - cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort"); - cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2"); + CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2")); - cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex"); - cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex"); - cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex"); - cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex")); - cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER)); - cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR)); - err = cuMemAlloc(&s->data, HIST_SIZE * sizeof(int)); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error allocating cuda memory\n"); - return AVERROR_UNKNOWN; - } + ret = CHECK_CU(cuMemAlloc(&s->data, HIST_SIZE * sizeof(int))); + if (ret < 0) + return ret; - cuCtxPopCurrent(&dummy); + CHECK_CU(cuCtxPopCurrent(&dummy)); s->hw_frames_ctx = ctx->inputs[0]->hw_frames_ctx; diff --git a/libavfilter/vf_transpose_npp.c b/libavfilter/vf_transpose_npp.c index 1b3a5c0c69..3ea031667c 100644 --- a/libavfilter/vf_transpose_npp.c +++ b/libavfilter/vf_transpose_npp.c @@ -23,6 +23,7 @@ #include "libavutil/common.h" #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/cuda_check.h" #include "libavutil/internal.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" @@ -32,6 +33,8 @@ #include "internal.h" #include "video.h" +#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x) + static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P @@ -397,7 +400,6 @@ static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in) AVHWFramesContext *frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data; AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; AVFrame *out = NULL; - CUresult err; CUcontext dummy; int ret = 0; @@ -410,15 +412,13 @@ static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in) goto fail; } - err = device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx); - if (err != CUDA_SUCCESS) { - ret = AVERROR_UNKNOWN; + ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx)); + if (ret < 0) goto fail; - } ret = npptranspose_filter(ctx, out, in); - device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy); + CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy)); if (ret < 0) goto fail; diff --git a/libavfilter/vf_yadif_cuda.c b/libavfilter/vf_yadif_cuda.c index be22344d9d..85e1aac5eb 100644 --- a/libavfilter/vf_yadif_cuda.c +++ b/libavfilter/vf_yadif_cuda.c @@ -21,6 +21,7 @@ #include #include "libavutil/avassert.h" #include "libavutil/hwcontext_cuda.h" +#include "libavutil/cuda_check.h" #include "internal.h" #include "yadif.h" @@ -48,28 +49,7 @@ typedef struct DeintCUDAContext { #define BLOCKX 32 #define BLOCKY 16 -static int check_cu(AVFilterContext *avctx, CUresult err, const char *func) -{ - const char *err_name; - const char *err_string; - - av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func); - - if (err == CUDA_SUCCESS) - return 0; - - cuGetErrorName(err, &err_name); - cuGetErrorString(err, &err_string); - - av_log(avctx, AV_LOG_ERROR, "%s failed", func); - if (err_name && err_string) - av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string); - av_log(avctx, AV_LOG_ERROR, "\n"); - - return AVERROR_EXTERNAL; -} - -#define CHECK_CU(x) check_cu(ctx, (x), #x) +#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x) static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, CUdeviceptr prev, CUdeviceptr cur, CUdeviceptr next, @@ -85,7 +65,7 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, { DeintCUDAContext *s = ctx->priv; CUtexObject tex_prev = 0, tex_cur = 0, tex_next = 0; - CUresult err; + int ret; int skip_spatial_check = s->yadif.mode&2; void *args[] = { &dst, &tex_prev, &tex_cur, &tex_next, @@ -108,24 +88,21 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, }; res_desc.res.pitch2D.devPtr = (CUdeviceptr)prev; - err = CHECK_CU(cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL)); - if (err != CUDA_SUCCESS) { + ret = CHECK_CU(cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL)); + if (ret < 0) goto exit; - } res_desc.res.pitch2D.devPtr = (CUdeviceptr)cur; - err = CHECK_CU(cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL)); - if (err != CUDA_SUCCESS) { + ret = CHECK_CU(cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL)); + if (ret < 0) goto exit; - } res_desc.res.pitch2D.devPtr = (CUdeviceptr)next; - err = CHECK_CU(cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL)); - if (err != CUDA_SUCCESS) { + ret = CHECK_CU(cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL)); + if (ret < 0) goto exit; - } - err = CHECK_CU(cuLaunchKernel(func, + ret = CHECK_CU(cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, s->stream, args, NULL)); @@ -138,7 +115,7 @@ exit: if (tex_next) CHECK_CU(cuTexObjectDestroy(tex_next)); - return err; + return ret; } static void filter(AVFilterContext *ctx, AVFrame *dst, @@ -147,13 +124,11 @@ static void filter(AVFilterContext *ctx, AVFrame *dst, DeintCUDAContext *s = ctx->priv; YADIFContext *y = &s->yadif; CUcontext dummy; - CUresult err; - int i; + int i, ret; - err = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); - if (err != CUDA_SUCCESS) { - goto exit; - } + ret = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); + if (ret < 0) + return; for (i = 0; i < y->csp->nb_components; i++) { CUfunction func; @@ -204,10 +179,7 @@ static void filter(AVFilterContext *ctx, AVFrame *dst, parity, tff); } - err = CHECK_CU(cuStreamSynchronize(s->stream)); - if (err != CUDA_SUCCESS) { - goto exit; - } + CHECK_CU(cuStreamSynchronize(s->stream)); exit: CHECK_CU(cuCtxPopCurrent(&dummy)); @@ -283,7 +255,6 @@ static int config_output(AVFilterLink *link) YADIFContext *y = &s->yadif; int ret = 0; CUcontext dummy; - CUresult err; av_assert0(s->input_frames); s->device_ref = av_buffer_ref(s->input_frames->device_ref); @@ -342,41 +313,29 @@ static int config_output(AVFilterLink *link) y->csp = av_pix_fmt_desc_get(output_frames->sw_format); y->filter = filter; - err = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); - if (err != CUDA_SUCCESS) { - ret = AVERROR_EXTERNAL; + ret = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); + if (ret < 0) goto exit; - } - err = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx)); - if (err != CUDA_SUCCESS) { - ret = AVERROR_INVALIDDATA; + ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx)); + if (ret < 0) goto exit; - } - err = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar")); - if (err != CUDA_SUCCESS) { - ret = AVERROR_INVALIDDATA; + ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar")); + if (ret < 0) goto exit; - } - err = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2")); - if (err != CUDA_SUCCESS) { - ret = AVERROR_INVALIDDATA; + ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2")); + if (ret < 0) goto exit; - } - err= CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort")); - if (err != CUDA_SUCCESS) { - ret = AVERROR_INVALIDDATA; + ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort")); + if (ret < 0) goto exit; - } - err = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2")); - if (err != CUDA_SUCCESS) { - ret = AVERROR_INVALIDDATA; + ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2")); + if (ret < 0) goto exit; - } exit: CHECK_CU(cuCtxPopCurrent(&dummy)); diff --git a/libavutil/Makefile b/libavutil/Makefile index 9ed24cfc82..b772111695 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -157,7 +157,7 @@ OBJS = adler32.o \ xtea.o \ tea.o \ -OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o +OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o cuda_check.o OBJS-$(CONFIG_D3D11VA) += hwcontext_d3d11va.o OBJS-$(CONFIG_DXVA2) += hwcontext_dxva2.o OBJS-$(CONFIG_LIBDRM) += hwcontext_drm.o @@ -175,7 +175,8 @@ OBJS += $(COMPAT_OBJS:%=../compat/%) SLIBOBJS-$(HAVE_GNU_WINDRES) += avutilres.o SKIPHEADERS-$(HAVE_CUDA_H) += hwcontext_cuda.h -SKIPHEADERS-$(CONFIG_CUDA) += hwcontext_cuda_internal.h +SKIPHEADERS-$(CONFIG_CUDA) += hwcontext_cuda_internal.h \ + cuda_check.h SKIPHEADERS-$(CONFIG_D3D11VA) += hwcontext_d3d11va.h SKIPHEADERS-$(CONFIG_DXVA2) += hwcontext_dxva2.h SKIPHEADERS-$(CONFIG_QSV) += hwcontext_qsv.h diff --git a/libavutil/cuda_check.c b/libavutil/cuda_check.c new file mode 100644 index 0000000000..95c0256d12 --- /dev/null +++ b/libavutil/cuda_check.c @@ -0,0 +1,45 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "compat/cuda/dynlink_loader.h" +#include "libavutil/cuda_check.h" + +int ff_cuda_check(void *avctx, + void *cuGetErrorName_fn, + void *cuGetErrorString_fn, + CUresult err, const char *func) +{ + const char *err_name; + const char *err_string; + + av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func); + + if (err == CUDA_SUCCESS) + return 0; + + ((tcuGetErrorName *)cuGetErrorName_fn)(err, &err_name); + ((tcuGetErrorString *)cuGetErrorString_fn)(err, &err_string); + + av_log(avctx, AV_LOG_ERROR, "%s failed", func); + if (err_name && err_string) + av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string); + av_log(avctx, AV_LOG_ERROR, "\n"); + + return AVERROR_EXTERNAL; +} + diff --git a/libavutil/cuda_check.h b/libavutil/cuda_check.h new file mode 100644 index 0000000000..0d45538c2f --- /dev/null +++ b/libavutil/cuda_check.h @@ -0,0 +1,43 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#ifndef FF_CUDA_CHECK_H +#define FF_CUDA_CHECK_H + +/** + * Wrap a CUDA function call and print error information if it fails. + */ + +int ff_cuda_check(void *avctx, + void *cuGetErrorName_fn, void *cuGetErrorString_fn, + CUresult err, const char *func); + +/** + * Convenience wrapper for ff_cuda_check when directly linking libcuda. + */ + +#define FF_CUDA_CHECK(avclass, x) ff_cuda_check(avclass, cuGetErrorName, cuGetErrorString, (x), #x) + +/** + * Convenience wrapper for ff_cuda_check when dynamically loading cuda symbols. + */ + +#define FF_CUDA_CHECK_DL(avclass, cudl, x) ff_cuda_check(avclass, cudl->cuGetErrorName, cudl->cuGetErrorString, (x), #x) + +#endif /* FF_CUDA_CHECK_H */ diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index 3b1d53e799..540a7610ef 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -21,6 +21,7 @@ #include "hwcontext.h" #include "hwcontext_internal.h" #include "hwcontext_cuda_internal.h" +#include "cuda_check.h" #include "mem.h" #include "pixdesc.h" #include "pixfmt.h" @@ -43,6 +44,8 @@ static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_0BGR32, }; +#define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x) + static int cuda_frames_get_constraints(AVHWDeviceContext *ctx, const void *hwconfig, AVHWFramesConstraints *constraints) @@ -70,48 +73,48 @@ static int cuda_frames_get_constraints(AVHWDeviceContext *ctx, static void cuda_buffer_free(void *opaque, uint8_t *data) { - AVHWFramesContext *ctx = opaque; - AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; - CudaFunctions *cu = hwctx->internal->cuda_dl; + AVHWFramesContext *ctx = opaque; + AVHWDeviceContext *device_ctx = ctx->device_ctx; + AVCUDADeviceContext *hwctx = device_ctx->hwctx; + CudaFunctions *cu = hwctx->internal->cuda_dl; CUcontext dummy; - cu->cuCtxPushCurrent(hwctx->cuda_ctx); + CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); - cu->cuMemFree((CUdeviceptr)data); + CHECK_CU(cu->cuMemFree((CUdeviceptr)data)); - cu->cuCtxPopCurrent(&dummy); + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); } static AVBufferRef *cuda_pool_alloc(void *opaque, int size) { - AVHWFramesContext *ctx = opaque; - AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; - CudaFunctions *cu = hwctx->internal->cuda_dl; + AVHWFramesContext *ctx = opaque; + AVHWDeviceContext *device_ctx = ctx->device_ctx; + AVCUDADeviceContext *hwctx = device_ctx->hwctx; + CudaFunctions *cu = hwctx->internal->cuda_dl; AVBufferRef *ret = NULL; CUcontext dummy = NULL; CUdeviceptr data; - CUresult err; + int err; - err = cu->cuCtxPushCurrent(hwctx->cuda_ctx); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n"); + err = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); + if (err < 0) return NULL; - } - err = cu->cuMemAlloc(&data, size); - if (err != CUDA_SUCCESS) + err = CHECK_CU(cu->cuMemAlloc(&data, size)); + if (err < 0) goto fail; ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0); if (!ret) { - cu->cuMemFree(data); + CHECK_CU(cu->cuMemFree(data)); goto fail; } fail: - cu->cuCtxPopCurrent(&dummy); + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); return ret; } @@ -194,17 +197,17 @@ static int cuda_transfer_get_formats(AVHWFramesContext *ctx, static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src) { - CUDAFramesContext *priv = ctx->internal->priv; - AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; - CudaFunctions *cu = device_hwctx->internal->cuda_dl; + CUDAFramesContext *priv = ctx->internal->priv; + AVHWDeviceContext *device_ctx = ctx->device_ctx; + AVCUDADeviceContext *hwctx = device_ctx->hwctx; + CudaFunctions *cu = hwctx->internal->cuda_dl; CUcontext dummy; - CUresult err; - int i; + int i, ret; - err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx); - if (err != CUDA_SUCCESS) - return AVERROR_UNKNOWN; + ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); + if (ret < 0) + return ret; for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { CUDA_MEMCPY2D cpy = { @@ -218,20 +221,17 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, .Height = src->height >> (i ? priv->shift_height : 0), }; - err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); - return AVERROR_UNKNOWN; - } + ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream)); + if (ret < 0) + goto exit; } - err = cu->cuStreamSynchronize(device_hwctx->stream); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n"); - return AVERROR_UNKNOWN; - } + ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream)); + if (ret < 0) + goto exit; - cu->cuCtxPopCurrent(&dummy); +exit: + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); return 0; } @@ -239,17 +239,17 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src) { - CUDAFramesContext *priv = ctx->internal->priv; - AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; - CudaFunctions *cu = device_hwctx->internal->cuda_dl; + CUDAFramesContext *priv = ctx->internal->priv; + AVHWDeviceContext *device_ctx = ctx->device_ctx; + AVCUDADeviceContext *hwctx = device_ctx->hwctx; + CudaFunctions *cu = hwctx->internal->cuda_dl; CUcontext dummy; - CUresult err; - int i; + int i, ret; - err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx); - if (err != CUDA_SUCCESS) - return AVERROR_UNKNOWN; + ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); + if (ret < 0) + return ret; for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { CUDA_MEMCPY2D cpy = { @@ -263,31 +263,29 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, .Height = src->height >> (i ? priv->shift_height : 0), }; - err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error transferring the data to the CUDA frame\n"); - return AVERROR_UNKNOWN; - } + ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream)); + if (ret < 0) + goto exit; } - err = cu->cuStreamSynchronize(device_hwctx->stream); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n"); - return AVERROR_UNKNOWN; - } + ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream)); + if (ret < 0) + goto exit; - cu->cuCtxPopCurrent(&dummy); +exit: + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); return 0; } -static void cuda_device_uninit(AVHWDeviceContext *ctx) +static void cuda_device_uninit(AVHWDeviceContext *device_ctx) { - AVCUDADeviceContext *hwctx = ctx->hwctx; + AVCUDADeviceContext *hwctx = device_ctx->hwctx; if (hwctx->internal) { + CudaFunctions *cu = hwctx->internal->cuda_dl; if (hwctx->internal->is_allocated && hwctx->cuda_ctx) { - hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx); + CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); hwctx->cuda_ctx = NULL; } cuda_free_functions(&hwctx->internal->cuda_dl); @@ -322,53 +320,47 @@ error: return ret; } -static int cuda_device_create(AVHWDeviceContext *ctx, const char *device, +static int cuda_device_create(AVHWDeviceContext *device_ctx, + const char *device, AVDictionary *opts, int flags) { - AVCUDADeviceContext *hwctx = ctx->hwctx; + AVCUDADeviceContext *hwctx = device_ctx->hwctx; CudaFunctions *cu; CUdevice cu_device; CUcontext dummy; - CUresult err; - int device_idx = 0; + int ret, device_idx = 0; if (device) device_idx = strtol(device, NULL, 0); - if (cuda_device_init(ctx) < 0) + if (cuda_device_init(device_ctx) < 0) goto error; cu = hwctx->internal->cuda_dl; - err = cu->cuInit(0); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n"); + ret = CHECK_CU(cu->cuInit(0)); + if (ret < 0) goto error; - } - err = cu->cuDeviceGet(&cu_device, device_idx); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx); + ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx)); + if (ret < 0) goto error; - } - err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n"); + ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device)); + if (ret < 0) goto error; - } // Setting stream to NULL will make functions automatically use the default CUstream hwctx->stream = NULL; - cu->cuCtxPopCurrent(&dummy); + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); hwctx->internal->is_allocated = 1; return 0; error: - cuda_device_uninit(ctx); + cuda_device_uninit(device_ctx); return AVERROR_UNKNOWN; }