1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-04 22:03:09 +02:00

avcodec/nvdec: add 4:2:2 decoding and 10-bit support

This commit adds support for 4:2:2 decoding for HEVC and H.264 on
NVIDIA Blackwell GPUs. Additionally, it supports 10-bit decoding
for H.264 on Blackwell GPUs.

Signed-off-by: Diego de Souza <ddesouza@nvidia.com>
Signed-off-by: Timo Rothenpieler <timo@rothenpieler.org>
This commit is contained in:
Diego de Souza
2025-01-08 10:30:41 +01:00
committed by Timo Rothenpieler
parent 7454a07d58
commit 30e6effff9
5 changed files with 51 additions and 9 deletions

View File

@ -807,6 +807,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
#endif
#if CONFIG_H264_VULKAN_HWACCEL
*fmt++ = AV_PIX_FMT_VULKAN;
#endif
#if CONFIG_H264_NVDEC_HWACCEL
*fmt++ = AV_PIX_FMT_CUDA;
#endif
if (CHROMA444(h)) {
if (h->avctx->colorspace == AVCOL_SPC_RGB) {

View File

@ -626,6 +626,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
#endif
#if CONFIG_HEVC_VULKAN_HWACCEL
*fmt++ = AV_PIX_FMT_VULKAN;
#endif
#if CONFIG_HEVC_NVDEC_HWACCEL
*fmt++ = AV_PIX_FMT_CUDA;
#endif
break;
case AV_PIX_FMT_YUV444P10:
@ -654,6 +657,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
#endif
#if CONFIG_HEVC_VULKAN_HWACCEL
*fmt++ = AV_PIX_FMT_VULKAN;
#endif
#if CONFIG_HEVC_NVDEC_HWACCEL
*fmt++ = AV_PIX_FMT_CUDA;
#endif
break;
}

View File

@ -375,13 +375,27 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
switch (sw_desc->comp[0].depth) {
case 8:
output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444 :
cudaVideoSurfaceFormat_NV12;
if (chroma_444) {
output_format = cudaVideoSurfaceFormat_YUV444;
#ifdef NVDEC_HAVE_422_SUPPORT
} else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
output_format = cudaVideoSurfaceFormat_NV16;
#endif
} else {
output_format = cudaVideoSurfaceFormat_NV12;
}
break;
case 10:
case 12:
output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444_16Bit :
cudaVideoSurfaceFormat_P016;
if (chroma_444) {
output_format = cudaVideoSurfaceFormat_YUV444_16Bit;
#ifdef NVDEC_HAVE_422_SUPPORT
} else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
output_format = cudaVideoSurfaceFormat_P216;
#endif
} else {
output_format = cudaVideoSurfaceFormat_P016;
}
break;
default:
av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
@ -729,13 +743,27 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
switch (sw_desc->comp[0].depth) {
case 8:
frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
if (chroma_444) {
frames_ctx->sw_format = AV_PIX_FMT_YUV444P;
#ifdef NVDEC_HAVE_422_SUPPORT
} else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
frames_ctx->sw_format = AV_PIX_FMT_NV16;
#endif
} else {
frames_ctx->sw_format = AV_PIX_FMT_NV12;
}
break;
case 10:
frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
break;
case 12:
frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
if (chroma_444) {
frames_ctx->sw_format = AV_PIX_FMT_YUV444P16;
#ifdef NVDEC_HAVE_422_SUPPORT
} else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
frames_ctx->sw_format = AV_PIX_FMT_P216LE;
#endif
} else {
frames_ctx->sw_format = AV_PIX_FMT_P016LE;
}
break;
default:
return AVERROR(EINVAL);

View File

@ -41,6 +41,11 @@
((major) < 8 || ((major) == 8 && (minor) <= 0))
#endif
// SDK 13.0 compile time feature checks
#if NVDECAPI_CHECK_VERSION(13, 0)
#define NVDEC_HAVE_422_SUPPORT
#endif
typedef struct NVDECFrame {
unsigned int idx;
unsigned int ref_idx;

View File

@ -97,7 +97,7 @@ static int nvdec_h264_start_frame(AVCodecContext *avctx,
.num_ref_idx_l1_active_minus1 = pps->ref_count[1] - 1,
.weighted_pred_flag = pps->weighted_pred,
.weighted_bipred_idc = pps->weighted_bipred_idc,
.pic_init_qp_minus26 = pps->init_qp - 26,
.pic_init_qp_minus26 = pps->init_qp - 26 - 6 * (sps->bit_depth_luma - 8),
.deblocking_filter_control_present_flag = pps->deblocking_filter_parameters_present,
.redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present,
.transform_8x8_mode_flag = pps->transform_8x8_mode,