avcodec/cuviddec: Add support for decoding HEVC 4:4:4 content

This is the equivalent change for cuviddec after the previous change for nvdec. I made similar changes to the copying routines to handle pixel formats in a more generic way. Note that unlike with nvdec, there is no confusion about the ability of a codec to output 444 formats. This is because the cuvid parser is used, meaning that 444 JPEG content is still indicated as using a 420 output format.
2025-08-10 06:10:52 +02:00 · 2018-10-07 09:10:00 -07:00
parent 83c7ac2e47
commit 317b7b06fd
1 changed files with 46 additions and 20 deletions
--- a/libavcodec/cuviddec.c
+++ b/libavcodec/cuviddec.c
@@ -34,8 +34,14 @@
 #include "avcodec.h"
 #include "decode.h"
 #include "hwaccel.h"
 #include "nvdec.h"
 #include "internal.h"
 #if !NVDECAPI_CHECK_VERSION(9, 0)
 #define cudaVideoSurfaceFormat_YUV444 2
 #define cudaVideoSurfaceFormat_YUV444_16Bit 3
 #endif
 typedef struct CuvidContext
 {
    AVClass *avclass;
@@ -106,6 +112,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
    CUVIDDECODECAPS *caps = NULL;
    CUVIDDECODECREATEINFO cuinfo;
    int surface_fmt;
    int chroma_444;
    int old_width = avctx->width;
    int old_height = avctx->height;
@@ -148,17 +155,19 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
    cuinfo.target_rect.right = cuinfo.ulTargetWidth;
    cuinfo.target_rect.bottom = cuinfo.ulTargetHeight;
    chroma_444 = format->chroma_format == cudaVideoChromaFormat_444;
    switch (format->bit_depth_luma_minus8) {
    case 0: // 8-bit
-        pix_fmts[1] = AV_PIX_FMT_NV12;
+        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
        caps = &ctx->caps8;
        break;
    case 2: // 10-bit
-        pix_fmts[1] = AV_PIX_FMT_P010;
+        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
        caps = &ctx->caps10;
        break;
    case 4: // 12-bit
-        pix_fmts[1] = AV_PIX_FMT_P016;
+        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
        caps = &ctx->caps12;
        break;
    default:
@@ -261,12 +270,6 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
        return 0;
    }
    if (format->chroma_format != cudaVideoChromaFormat_420) {
        av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
        ctx->internal_error = AVERROR(EINVAL);
        return 0;
    }
    ctx->chroma_format = format->chroma_format;
    cuinfo.CodecType = ctx->codec_type = format->codec;
@@ -280,8 +283,15 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
    case AV_PIX_FMT_P016:
        cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
        break;
    case AV_PIX_FMT_YUV444P:
        cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444;
        break;
    case AV_PIX_FMT_YUV444P16:
        cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
        break;
    default:
-        av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n");
+        av_log(avctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
               av_get_pix_fmt_name(avctx->sw_pix_fmt));
        ctx->internal_error = AVERROR(EINVAL);
        return 0;
    }
@@ -490,6 +500,7 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
        return ret;
    if (av_fifo_size(ctx->frame_queue)) {
        const AVPixFmtDescriptor *pixdesc;
        CuvidParsedFrame parsed_frame;
        CUVIDPROCPARAMS params;
        unsigned int pitch = 0;
@@ -520,7 +531,10 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                goto error;
            }
-            for (i = 0; i < 2; i++) {
+            pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
            for (i = 0; i < pixdesc->nb_components; i++) {
                int height = avctx->height >> (i ? pixdesc->log2_chroma_h : 0);
                CUDA_MEMCPY2D cpy = {
                    .srcMemoryType = CU_MEMORYTYPE_DEVICE,
                    .dstMemoryType = CU_MEMORYTYPE_DEVICE,
@@ -530,14 +544,14 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                    .dstPitch      = frame->linesize[i],
                    .srcY          = offset,
                    .WidthInBytes  = FFMIN(pitch, frame->linesize[i]),
-                    .Height        = avctx->height >> (i ? 1 : 0),
+                    .Height        = height,
                };
                ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream));
                if (ret < 0)
                    goto error;
-                offset += avctx->height;
+                offset += height;
            }
            ret = CHECK_CU(ctx->cudl->cuStreamSynchronize(device_hwctx->stream));
@@ -545,7 +559,10 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                goto error;
        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12      ||
                   avctx->pix_fmt == AV_PIX_FMT_P010      ||
-                   avctx->pix_fmt == AV_PIX_FMT_P016) {
+                   avctx->pix_fmt == AV_PIX_FMT_P016      ||
                   avctx->pix_fmt == AV_PIX_FMT_YUV444P   ||
                   avctx->pix_fmt == AV_PIX_FMT_YUV444P16) {
            unsigned int offset = 0;
            AVFrame *tmp_frame = av_frame_alloc();
            if (!tmp_frame) {
                av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
@@ -553,15 +570,24 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                goto error;
            }
            pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
            tmp_frame->format        = AV_PIX_FMT_CUDA;
            tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
            tmp_frame->data[0]       = (uint8_t*)mapped_frame;
            tmp_frame->linesize[0]   = pitch;
            tmp_frame->data[1]       = (uint8_t*)(mapped_frame + avctx->height * pitch);
            tmp_frame->linesize[1]   = pitch;
            tmp_frame->width         = avctx->width;
            tmp_frame->height        = avctx->height;
            /*
             * Note that the following logic would not work for three plane
             * YUV420 because the pitch value is different for the chroma
             * planes.
             */
            for (i = 0; i < pixdesc->nb_components; i++) {
                tmp_frame->data[i]     = (uint8_t*)mapped_frame + offset;
                tmp_frame->linesize[i] = pitch;
                offset += pitch * (avctx->height >> (i ? pixdesc->log2_chroma_h : 0));
            }
            ret = ff_get_buffer(avctx, frame, 0);
            if (ret < 0) {
                av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");