avcodec/nvdec: add 4:2:2 decoding and 10-bit support

This commit adds support for 4:2:2 decoding for HEVC and H.264 on NVIDIA Blackwell GPUs. Additionally, it supports 10-bit decoding for H.264 on Blackwell GPUs. Signed-off-by: Diego de Souza <ddesouza@nvidia.com> Signed-off-by: Timo Rothenpieler <timo@rothenpieler.org>
2025-08-04 22:03:09 +02:00 · 2025-01-08 10:30:41 +01:00
parent 7454a07d58
commit 30e6effff9
5 changed files with 51 additions and 9 deletions
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@ -807,6 +807,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
 #endif
 #if CONFIG_H264_VULKAN_HWACCEL
        *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
+#if CONFIG_H264_NVDEC_HWACCEL
+        *fmt++ = AV_PIX_FMT_CUDA;
 #endif
        if (CHROMA444(h)) {
            if (h->avctx->colorspace == AVCOL_SPC_RGB) {
--- a/libavcodec/hevc/hevcdec.c
+++ b/libavcodec/hevc/hevcdec.c
@ -626,6 +626,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 #endif
 #if CONFIG_HEVC_VULKAN_HWACCEL
        *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
+#if CONFIG_HEVC_NVDEC_HWACCEL
+        *fmt++ = AV_PIX_FMT_CUDA;
 #endif
        break;
    case AV_PIX_FMT_YUV444P10:
@ -654,6 +657,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 #endif
 #if CONFIG_HEVC_VULKAN_HWACCEL
        *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
+#if CONFIG_HEVC_NVDEC_HWACCEL
+        *fmt++ = AV_PIX_FMT_CUDA;
 #endif
        break;
    }
--- a/libavcodec/nvdec.c
+++ b/libavcodec/nvdec.c
@ -375,13 +375,27 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)

    switch (sw_desc->comp[0].depth) {
    case 8:
-        output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444 :
-                                     cudaVideoSurfaceFormat_NV12;
+        if (chroma_444) {
+            output_format = cudaVideoSurfaceFormat_YUV444;
+#ifdef NVDEC_HAVE_422_SUPPORT
+        } else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
+            output_format = cudaVideoSurfaceFormat_NV16;
+#endif
+        } else {
+            output_format = cudaVideoSurfaceFormat_NV12;
+        }
        break;
    case 10:
    case 12:
-        output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444_16Bit :
-                                     cudaVideoSurfaceFormat_P016;
+        if (chroma_444) {
+            output_format = cudaVideoSurfaceFormat_YUV444_16Bit;
+#ifdef NVDEC_HAVE_422_SUPPORT
+        } else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
+            output_format = cudaVideoSurfaceFormat_P216;
+#endif
+        } else {
+            output_format = cudaVideoSurfaceFormat_P016;
+        }
        break;
    default:
        av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
@ -729,13 +743,27 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,

    switch (sw_desc->comp[0].depth) {
    case 8:
-        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
+        if (chroma_444) {
+            frames_ctx->sw_format = AV_PIX_FMT_YUV444P;
+#ifdef NVDEC_HAVE_422_SUPPORT
+        } else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
+            frames_ctx->sw_format = AV_PIX_FMT_NV16;
+#endif
+        } else {
+            frames_ctx->sw_format = AV_PIX_FMT_NV12;
+        }
        break;
    case 10:
-        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
-        break;
    case 12:
-        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
+        if (chroma_444) {
+            frames_ctx->sw_format = AV_PIX_FMT_YUV444P16;
+#ifdef NVDEC_HAVE_422_SUPPORT
+        } else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
+            frames_ctx->sw_format = AV_PIX_FMT_P216LE;
+#endif
+        } else {
+            frames_ctx->sw_format = AV_PIX_FMT_P016LE;
+        }
        break;
    default:
        return AVERROR(EINVAL);
--- a/libavcodec/nvdec.h
+++ b/libavcodec/nvdec.h
@ -41,6 +41,11 @@
    ((major) < 8 || ((major) == 8 && (minor) <= 0))
 #endif

+// SDK 13.0 compile time feature checks
+#if NVDECAPI_CHECK_VERSION(13, 0)
+#define NVDEC_HAVE_422_SUPPORT
+#endif
+
 typedef struct NVDECFrame {
    unsigned int idx;
    unsigned int ref_idx;
--- a/libavcodec/nvdec_h264.c
+++ b/libavcodec/nvdec_h264.c
@ -97,7 +97,7 @@ static int nvdec_h264_start_frame(AVCodecContext *avctx,
            .num_ref_idx_l1_active_minus1           = pps->ref_count[1] - 1,
            .weighted_pred_flag                     = pps->weighted_pred,
            .weighted_bipred_idc                    = pps->weighted_bipred_idc,
-            .pic_init_qp_minus26                    = pps->init_qp - 26,
+            .pic_init_qp_minus26                    = pps->init_qp - 26 - 6 * (sps->bit_depth_luma - 8),
            .deblocking_filter_control_present_flag = pps->deblocking_filter_parameters_present,
            .redundant_pic_cnt_present_flag         = pps->redundant_pic_cnt_present,
            .transform_8x8_mode_flag                = pps->transform_8x8_mode,