mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
avcodec/cuviddec: Add support for decoding HEVC 4:4:4 content
This is the equivalent change for cuviddec after the previous change for nvdec. I made similar changes to the copying routines to handle pixel formats in a more generic way. Note that unlike with nvdec, there is no confusion about the ability of a codec to output 444 formats. This is because the cuvid parser is used, meaning that 444 JPEG content is still indicated as using a 420 output format.
This commit is contained in:
parent
83c7ac2e47
commit
317b7b06fd
@ -34,8 +34,14 @@
|
|||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "decode.h"
|
#include "decode.h"
|
||||||
#include "hwaccel.h"
|
#include "hwaccel.h"
|
||||||
|
#include "nvdec.h"
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
|
||||||
|
#if !NVDECAPI_CHECK_VERSION(9, 0)
|
||||||
|
#define cudaVideoSurfaceFormat_YUV444 2
|
||||||
|
#define cudaVideoSurfaceFormat_YUV444_16Bit 3
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct CuvidContext
|
typedef struct CuvidContext
|
||||||
{
|
{
|
||||||
AVClass *avclass;
|
AVClass *avclass;
|
||||||
@ -106,6 +112,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
|
|||||||
CUVIDDECODECAPS *caps = NULL;
|
CUVIDDECODECAPS *caps = NULL;
|
||||||
CUVIDDECODECREATEINFO cuinfo;
|
CUVIDDECODECREATEINFO cuinfo;
|
||||||
int surface_fmt;
|
int surface_fmt;
|
||||||
|
int chroma_444;
|
||||||
|
|
||||||
int old_width = avctx->width;
|
int old_width = avctx->width;
|
||||||
int old_height = avctx->height;
|
int old_height = avctx->height;
|
||||||
@ -148,17 +155,19 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
|
|||||||
cuinfo.target_rect.right = cuinfo.ulTargetWidth;
|
cuinfo.target_rect.right = cuinfo.ulTargetWidth;
|
||||||
cuinfo.target_rect.bottom = cuinfo.ulTargetHeight;
|
cuinfo.target_rect.bottom = cuinfo.ulTargetHeight;
|
||||||
|
|
||||||
|
chroma_444 = format->chroma_format == cudaVideoChromaFormat_444;
|
||||||
|
|
||||||
switch (format->bit_depth_luma_minus8) {
|
switch (format->bit_depth_luma_minus8) {
|
||||||
case 0: // 8-bit
|
case 0: // 8-bit
|
||||||
pix_fmts[1] = AV_PIX_FMT_NV12;
|
pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
|
||||||
caps = &ctx->caps8;
|
caps = &ctx->caps8;
|
||||||
break;
|
break;
|
||||||
case 2: // 10-bit
|
case 2: // 10-bit
|
||||||
pix_fmts[1] = AV_PIX_FMT_P010;
|
pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
|
||||||
caps = &ctx->caps10;
|
caps = &ctx->caps10;
|
||||||
break;
|
break;
|
||||||
case 4: // 12-bit
|
case 4: // 12-bit
|
||||||
pix_fmts[1] = AV_PIX_FMT_P016;
|
pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
|
||||||
caps = &ctx->caps12;
|
caps = &ctx->caps12;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -261,12 +270,6 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (format->chroma_format != cudaVideoChromaFormat_420) {
|
|
||||||
av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
|
|
||||||
ctx->internal_error = AVERROR(EINVAL);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx->chroma_format = format->chroma_format;
|
ctx->chroma_format = format->chroma_format;
|
||||||
|
|
||||||
cuinfo.CodecType = ctx->codec_type = format->codec;
|
cuinfo.CodecType = ctx->codec_type = format->codec;
|
||||||
@ -280,8 +283,15 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
|
|||||||
case AV_PIX_FMT_P016:
|
case AV_PIX_FMT_P016:
|
||||||
cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
|
cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
|
||||||
break;
|
break;
|
||||||
|
case AV_PIX_FMT_YUV444P:
|
||||||
|
cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444;
|
||||||
|
break;
|
||||||
|
case AV_PIX_FMT_YUV444P16:
|
||||||
|
cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n");
|
av_log(avctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
|
||||||
|
av_get_pix_fmt_name(avctx->sw_pix_fmt));
|
||||||
ctx->internal_error = AVERROR(EINVAL);
|
ctx->internal_error = AVERROR(EINVAL);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -490,6 +500,7 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
|
|||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
if (av_fifo_size(ctx->frame_queue)) {
|
if (av_fifo_size(ctx->frame_queue)) {
|
||||||
|
const AVPixFmtDescriptor *pixdesc;
|
||||||
CuvidParsedFrame parsed_frame;
|
CuvidParsedFrame parsed_frame;
|
||||||
CUVIDPROCPARAMS params;
|
CUVIDPROCPARAMS params;
|
||||||
unsigned int pitch = 0;
|
unsigned int pitch = 0;
|
||||||
@ -520,7 +531,10 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < 2; i++) {
|
pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
|
||||||
|
|
||||||
|
for (i = 0; i < pixdesc->nb_components; i++) {
|
||||||
|
int height = avctx->height >> (i ? pixdesc->log2_chroma_h : 0);
|
||||||
CUDA_MEMCPY2D cpy = {
|
CUDA_MEMCPY2D cpy = {
|
||||||
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
|
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
|
||||||
.dstMemoryType = CU_MEMORYTYPE_DEVICE,
|
.dstMemoryType = CU_MEMORYTYPE_DEVICE,
|
||||||
@ -530,14 +544,14 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
|
|||||||
.dstPitch = frame->linesize[i],
|
.dstPitch = frame->linesize[i],
|
||||||
.srcY = offset,
|
.srcY = offset,
|
||||||
.WidthInBytes = FFMIN(pitch, frame->linesize[i]),
|
.WidthInBytes = FFMIN(pitch, frame->linesize[i]),
|
||||||
.Height = avctx->height >> (i ? 1 : 0),
|
.Height = height,
|
||||||
};
|
};
|
||||||
|
|
||||||
ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream));
|
ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream));
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
offset += avctx->height;
|
offset += height;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = CHECK_CU(ctx->cudl->cuStreamSynchronize(device_hwctx->stream));
|
ret = CHECK_CU(ctx->cudl->cuStreamSynchronize(device_hwctx->stream));
|
||||||
@ -545,7 +559,10 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
|
|||||||
goto error;
|
goto error;
|
||||||
} else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
|
} else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
|
||||||
avctx->pix_fmt == AV_PIX_FMT_P010 ||
|
avctx->pix_fmt == AV_PIX_FMT_P010 ||
|
||||||
avctx->pix_fmt == AV_PIX_FMT_P016) {
|
avctx->pix_fmt == AV_PIX_FMT_P016 ||
|
||||||
|
avctx->pix_fmt == AV_PIX_FMT_YUV444P ||
|
||||||
|
avctx->pix_fmt == AV_PIX_FMT_YUV444P16) {
|
||||||
|
unsigned int offset = 0;
|
||||||
AVFrame *tmp_frame = av_frame_alloc();
|
AVFrame *tmp_frame = av_frame_alloc();
|
||||||
if (!tmp_frame) {
|
if (!tmp_frame) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
|
av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
|
||||||
@ -553,15 +570,24 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
|
||||||
|
|
||||||
tmp_frame->format = AV_PIX_FMT_CUDA;
|
tmp_frame->format = AV_PIX_FMT_CUDA;
|
||||||
tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
|
tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
|
||||||
tmp_frame->data[0] = (uint8_t*)mapped_frame;
|
|
||||||
tmp_frame->linesize[0] = pitch;
|
|
||||||
tmp_frame->data[1] = (uint8_t*)(mapped_frame + avctx->height * pitch);
|
|
||||||
tmp_frame->linesize[1] = pitch;
|
|
||||||
tmp_frame->width = avctx->width;
|
tmp_frame->width = avctx->width;
|
||||||
tmp_frame->height = avctx->height;
|
tmp_frame->height = avctx->height;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that the following logic would not work for three plane
|
||||||
|
* YUV420 because the pitch value is different for the chroma
|
||||||
|
* planes.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < pixdesc->nb_components; i++) {
|
||||||
|
tmp_frame->data[i] = (uint8_t*)mapped_frame + offset;
|
||||||
|
tmp_frame->linesize[i] = pitch;
|
||||||
|
offset += pitch * (avctx->height >> (i ? pixdesc->log2_chroma_h : 0));
|
||||||
|
}
|
||||||
|
|
||||||
ret = ff_get_buffer(avctx, frame, 0);
|
ret = ff_get_buffer(avctx, frame, 0);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
|
av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
|
||||||
|
Loading…
Reference in New Issue
Block a user