You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	lavu: add side data AV_FRAME_DATA_DETECTION_BBOXES for object detection/classification
This commit is contained in:
		| @@ -14,6 +14,8 @@ libavutil:     2017-10-21 | |||||||
|  |  | ||||||
|  |  | ||||||
| API changes, most recent first: | API changes, most recent first: | ||||||
|  | 2021-04-17 - xxxxxxxxxx - lavu 56.73.100 - frame.h detection_bbox.h | ||||||
|  |   Add AV_FRAME_DATA_DETECTION_BBOXES | ||||||
|  |  | ||||||
| 2021-04-06 - xxxxxxxxxx - lavf 58.78.100 - avformat.h | 2021-04-06 - xxxxxxxxxx - lavf 58.78.100 - avformat.h | ||||||
|   Add avformat_index_get_entries_count(), avformat_index_get_entry(), |   Add avformat_index_get_entries_count(), avformat_index_get_entry(), | ||||||
|   | |||||||
| @@ -21,6 +21,7 @@ HEADERS = adler32.h                                                     \ | |||||||
|           cpu.h                                                         \ |           cpu.h                                                         \ | ||||||
|           crc.h                                                         \ |           crc.h                                                         \ | ||||||
|           des.h                                                         \ |           des.h                                                         \ | ||||||
|  |           detection_bbox.h                                              \ | ||||||
|           dict.h                                                        \ |           dict.h                                                        \ | ||||||
|           display.h                                                     \ |           display.h                                                     \ | ||||||
|           dovi_meta.h                                                   \ |           dovi_meta.h                                                   \ | ||||||
| @@ -113,6 +114,7 @@ OBJS = adler32.o                                                        \ | |||||||
|        cpu.o                                                            \ |        cpu.o                                                            \ | ||||||
|        crc.o                                                            \ |        crc.o                                                            \ | ||||||
|        des.o                                                            \ |        des.o                                                            \ | ||||||
|  |        detection_bbox.o                                                 \ | ||||||
|        dict.o                                                           \ |        dict.o                                                           \ | ||||||
|        display.o                                                        \ |        display.o                                                        \ | ||||||
|        dovi_meta.o                                                      \ |        dovi_meta.o                                                      \ | ||||||
|   | |||||||
							
								
								
									
										73
									
								
								libavutil/detection_bbox.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								libavutil/detection_bbox.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | |||||||
|  | /* | ||||||
|  |  * This file is part of FFmpeg. | ||||||
|  |  * | ||||||
|  |  * FFmpeg is free software; you can redistribute it and/or | ||||||
|  |  * modify it under the terms of the GNU Lesser General Public | ||||||
|  |  * License as published by the Free Software Foundation; either | ||||||
|  |  * version 2.1 of the License, or (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * FFmpeg is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||||
|  |  * Lesser General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU Lesser General Public | ||||||
|  |  * License along with FFmpeg; if not, write to the Free Software | ||||||
|  |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | #include "detection_bbox.h" | ||||||
|  |  | ||||||
|  | AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size) | ||||||
|  | { | ||||||
|  |     size_t size; | ||||||
|  |     struct { | ||||||
|  |         AVDetectionBBoxHeader header; | ||||||
|  |         AVDetectionBBox boxes[1]; | ||||||
|  |     } *ret; | ||||||
|  |  | ||||||
|  |     size = sizeof(*ret); | ||||||
|  |     if (nb_bboxes - 1 > (SIZE_MAX - size) / sizeof(*ret->boxes)) | ||||||
|  |         return NULL; | ||||||
|  |     size += sizeof(*ret->boxes) * (nb_bboxes - 1); | ||||||
|  |  | ||||||
|  |     ret = av_mallocz(size); | ||||||
|  |     if (!ret) | ||||||
|  |         return NULL; | ||||||
|  |  | ||||||
|  |     ret->header.nb_bboxes = nb_bboxes; | ||||||
|  |     ret->header.bbox_size = sizeof(*ret->boxes); | ||||||
|  |     ret->header.bboxes_offset = (char *)&ret->boxes - (char *)&ret->header; | ||||||
|  |  | ||||||
|  |     if (out_size) | ||||||
|  |         *out_size = size; | ||||||
|  |  | ||||||
|  |     return &ret->header; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes) | ||||||
|  | { | ||||||
|  |     AVBufferRef         *buf; | ||||||
|  |     AVDetectionBBoxHeader *header; | ||||||
|  |     size_t size; | ||||||
|  |  | ||||||
|  |     header = av_detection_bbox_alloc(nb_bboxes, &size); | ||||||
|  |     if (!header) | ||||||
|  |         return NULL; | ||||||
|  |     if (size > INT_MAX) { | ||||||
|  |         av_freep(&header); | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  |     buf = av_buffer_create((uint8_t *)header, size, NULL, NULL, 0); | ||||||
|  |     if (!buf) { | ||||||
|  |         av_freep(&header); | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if (!av_frame_new_side_data_from_buf(frame, AV_FRAME_DATA_DETECTION_BBOXES, buf)) { | ||||||
|  |         av_buffer_unref(&buf); | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return header; | ||||||
|  | } | ||||||
							
								
								
									
										107
									
								
								libavutil/detection_bbox.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								libavutil/detection_bbox.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,107 @@ | |||||||
|  | /* | ||||||
|  |  * This file is part of FFmpeg. | ||||||
|  |  * | ||||||
|  |  * FFmpeg is free software; you can redistribute it and/or | ||||||
|  |  * modify it under the terms of the GNU Lesser General Public | ||||||
|  |  * License as published by the Free Software Foundation; either | ||||||
|  |  * version 2.1 of the License, or (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * FFmpeg is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||||
|  |  * Lesser General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU Lesser General Public | ||||||
|  |  * License along with FFmpeg; if not, write to the Free Software | ||||||
|  |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | #ifndef AVUTIL_DETECTION_BBOX_H | ||||||
|  | #define AVUTIL_DETECTION_BBOX_H | ||||||
|  |  | ||||||
|  | #include "rational.h" | ||||||
|  | #include "avassert.h" | ||||||
|  | #include "frame.h" | ||||||
|  |  | ||||||
|  | typedef struct AVDetectionBBox { | ||||||
|  |     /** | ||||||
|  |      * Distance in pixels from the left/top edge of the frame, | ||||||
|  |      * together with width and height, defining the bounding box. | ||||||
|  |      */ | ||||||
|  |     int x; | ||||||
|  |     int y; | ||||||
|  |     int w; | ||||||
|  |     int h; | ||||||
|  |  | ||||||
|  | #define AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE 64 | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Detect result with confidence | ||||||
|  |      */ | ||||||
|  |     char detect_label[AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE]; | ||||||
|  |     AVRational detect_confidence; | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * At most 4 classifications based on the detected bounding box. | ||||||
|  |      * For example, we can get max 4 different attributes with 4 different | ||||||
|  |      * DNN models on one bounding box. | ||||||
|  |      * classify_count is zero if no classification. | ||||||
|  |      */ | ||||||
|  | #define AV_NUM_BBOX_CLASSIFY 4 | ||||||
|  |     uint32_t classify_count; | ||||||
|  |     char classify_labels[AV_NUM_BBOX_CLASSIFY][AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE]; | ||||||
|  |     AVRational classify_confidences[AV_NUM_BBOX_CLASSIFY]; | ||||||
|  | } AVDetectionBBox; | ||||||
|  |  | ||||||
|  | typedef struct AVDetectionBBoxHeader { | ||||||
|  |     /** | ||||||
|  |      * Information about how the bounding box is generated. | ||||||
|  |      * for example, the DNN model name. | ||||||
|  |      */ | ||||||
|  |     char source[256]; | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Number of bounding boxes in the array. | ||||||
|  |      */ | ||||||
|  |     uint32_t nb_bboxes; | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Offset in bytes from the beginning of this structure at which | ||||||
|  |      * the array of bounding boxes starts. | ||||||
|  |      */ | ||||||
|  |     size_t bboxes_offset; | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Size of each bounding box in bytes. | ||||||
|  |      */ | ||||||
|  |     size_t bbox_size; | ||||||
|  | } AVDetectionBBoxHeader; | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Get the bounding box at the specified {@code idx}. Must be between 0 and nb_bboxes. | ||||||
|  |  */ | ||||||
|  | static av_always_inline AVDetectionBBox* | ||||||
|  | av_get_detection_bbox(const AVDetectionBBoxHeader *header, unsigned int idx) | ||||||
|  | { | ||||||
|  |     av_assert0(idx < header->nb_bboxes); | ||||||
|  |     return (AVDetectionBBox *)((uint8_t *)header + header->bboxes_offset + | ||||||
|  |                                idx * header->bbox_size); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * Allocates memory for AVDetectionBBoxHeader, plus an array of {@code nb_bboxes} | ||||||
|  |  * AVDetectionBBox, and initializes the variables. | ||||||
|  |  * Can be freed with a normal av_free() call. | ||||||
|  |  * | ||||||
|  |  * @param out_size if non-NULL, the size in bytes of the resulting data array is | ||||||
|  |  * written here. | ||||||
|  |  */ | ||||||
|  | AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size); | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * Allocates memory for AVDetectionBBoxHeader, plus an array of {@code nb_bboxes} | ||||||
|  |  * AVDetectionBBox, in the given AVFrame {@code frame} as AVFrameSideData of type | ||||||
|  |  * AV_FRAME_DATA_DETECTION_BBOXES and initializes the variables. | ||||||
|  |  */ | ||||||
|  | AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes); | ||||||
|  | #endif | ||||||
| @@ -853,6 +853,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type) | |||||||
|     case AV_FRAME_DATA_VIDEO_ENC_PARAMS:            return "Video encoding parameters"; |     case AV_FRAME_DATA_VIDEO_ENC_PARAMS:            return "Video encoding parameters"; | ||||||
|     case AV_FRAME_DATA_SEI_UNREGISTERED:            return "H.26[45] User Data Unregistered SEI message"; |     case AV_FRAME_DATA_SEI_UNREGISTERED:            return "H.26[45] User Data Unregistered SEI message"; | ||||||
|     case AV_FRAME_DATA_FILM_GRAIN_PARAMS:           return "Film grain parameters"; |     case AV_FRAME_DATA_FILM_GRAIN_PARAMS:           return "Film grain parameters"; | ||||||
|  |     case AV_FRAME_DATA_DETECTION_BBOXES:            return "Bounding boxes for object detection and classification"; | ||||||
|     } |     } | ||||||
|     return NULL; |     return NULL; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -198,6 +198,12 @@ enum AVFrameSideDataType { | |||||||
|      * Must be present for every frame which should have film grain applied. |      * Must be present for every frame which should have film grain applied. | ||||||
|      */ |      */ | ||||||
|     AV_FRAME_DATA_FILM_GRAIN_PARAMS, |     AV_FRAME_DATA_FILM_GRAIN_PARAMS, | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Bounding boxes for object detection and classification, | ||||||
|  |      * as described by AVDetectionBBoxHeader. | ||||||
|  |      */ | ||||||
|  |     AV_FRAME_DATA_DETECTION_BBOXES, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| enum AVActiveFormatDescription { | enum AVActiveFormatDescription { | ||||||
|   | |||||||
| @@ -79,7 +79,7 @@ | |||||||
|  */ |  */ | ||||||
|  |  | ||||||
| #define LIBAVUTIL_VERSION_MAJOR  56 | #define LIBAVUTIL_VERSION_MAJOR  56 | ||||||
| #define LIBAVUTIL_VERSION_MINOR  72 | #define LIBAVUTIL_VERSION_MINOR  73 | ||||||
| #define LIBAVUTIL_VERSION_MICRO 100 | #define LIBAVUTIL_VERSION_MICRO 100 | ||||||
|  |  | ||||||
| #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ | #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user