You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	lavu: add side data AV_FRAME_DATA_DETECTION_BBOXES for object detection/classification
This commit is contained in:
		| @@ -14,6 +14,8 @@ libavutil:     2017-10-21 | ||||
|  | ||||
|  | ||||
| API changes, most recent first: | ||||
| 2021-04-17 - xxxxxxxxxx - lavu 56.73.100 - frame.h detection_bbox.h | ||||
|   Add AV_FRAME_DATA_DETECTION_BBOXES | ||||
|  | ||||
| 2021-04-06 - xxxxxxxxxx - lavf 58.78.100 - avformat.h | ||||
|   Add avformat_index_get_entries_count(), avformat_index_get_entry(), | ||||
|   | ||||
| @@ -21,6 +21,7 @@ HEADERS = adler32.h                                                     \ | ||||
|           cpu.h                                                         \ | ||||
|           crc.h                                                         \ | ||||
|           des.h                                                         \ | ||||
|           detection_bbox.h                                              \ | ||||
|           dict.h                                                        \ | ||||
|           display.h                                                     \ | ||||
|           dovi_meta.h                                                   \ | ||||
| @@ -113,6 +114,7 @@ OBJS = adler32.o                                                        \ | ||||
|        cpu.o                                                            \ | ||||
|        crc.o                                                            \ | ||||
|        des.o                                                            \ | ||||
|        detection_bbox.o                                                 \ | ||||
|        dict.o                                                           \ | ||||
|        display.o                                                        \ | ||||
|        dovi_meta.o                                                      \ | ||||
|   | ||||
							
								
								
									
										73
									
								
								libavutil/detection_bbox.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								libavutil/detection_bbox.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | ||||
| /* | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with FFmpeg; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
| #include "detection_bbox.h" | ||||
|  | ||||
| AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size) | ||||
| { | ||||
|     size_t size; | ||||
|     struct { | ||||
|         AVDetectionBBoxHeader header; | ||||
|         AVDetectionBBox boxes[1]; | ||||
|     } *ret; | ||||
|  | ||||
|     size = sizeof(*ret); | ||||
|     if (nb_bboxes - 1 > (SIZE_MAX - size) / sizeof(*ret->boxes)) | ||||
|         return NULL; | ||||
|     size += sizeof(*ret->boxes) * (nb_bboxes - 1); | ||||
|  | ||||
|     ret = av_mallocz(size); | ||||
|     if (!ret) | ||||
|         return NULL; | ||||
|  | ||||
|     ret->header.nb_bboxes = nb_bboxes; | ||||
|     ret->header.bbox_size = sizeof(*ret->boxes); | ||||
|     ret->header.bboxes_offset = (char *)&ret->boxes - (char *)&ret->header; | ||||
|  | ||||
|     if (out_size) | ||||
|         *out_size = size; | ||||
|  | ||||
|     return &ret->header; | ||||
| } | ||||
|  | ||||
| AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes) | ||||
| { | ||||
|     AVBufferRef         *buf; | ||||
|     AVDetectionBBoxHeader *header; | ||||
|     size_t size; | ||||
|  | ||||
|     header = av_detection_bbox_alloc(nb_bboxes, &size); | ||||
|     if (!header) | ||||
|         return NULL; | ||||
|     if (size > INT_MAX) { | ||||
|         av_freep(&header); | ||||
|         return NULL; | ||||
|     } | ||||
|     buf = av_buffer_create((uint8_t *)header, size, NULL, NULL, 0); | ||||
|     if (!buf) { | ||||
|         av_freep(&header); | ||||
|         return NULL; | ||||
|     } | ||||
|  | ||||
|     if (!av_frame_new_side_data_from_buf(frame, AV_FRAME_DATA_DETECTION_BBOXES, buf)) { | ||||
|         av_buffer_unref(&buf); | ||||
|         return NULL; | ||||
|     } | ||||
|  | ||||
|     return header; | ||||
| } | ||||
							
								
								
									
										107
									
								
								libavutil/detection_bbox.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								libavutil/detection_bbox.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,107 @@ | ||||
| /* | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with FFmpeg; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
| #ifndef AVUTIL_DETECTION_BBOX_H | ||||
| #define AVUTIL_DETECTION_BBOX_H | ||||
|  | ||||
| #include "rational.h" | ||||
| #include "avassert.h" | ||||
| #include "frame.h" | ||||
|  | ||||
| typedef struct AVDetectionBBox { | ||||
|     /** | ||||
|      * Distance in pixels from the left/top edge of the frame, | ||||
|      * together with width and height, defining the bounding box. | ||||
|      */ | ||||
|     int x; | ||||
|     int y; | ||||
|     int w; | ||||
|     int h; | ||||
|  | ||||
| #define AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE 64 | ||||
|  | ||||
|     /** | ||||
|      * Detect result with confidence | ||||
|      */ | ||||
|     char detect_label[AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE]; | ||||
|     AVRational detect_confidence; | ||||
|  | ||||
|     /** | ||||
|      * At most 4 classifications based on the detected bounding box. | ||||
|      * For example, we can get max 4 different attributes with 4 different | ||||
|      * DNN models on one bounding box. | ||||
|      * classify_count is zero if no classification. | ||||
|      */ | ||||
| #define AV_NUM_BBOX_CLASSIFY 4 | ||||
|     uint32_t classify_count; | ||||
|     char classify_labels[AV_NUM_BBOX_CLASSIFY][AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE]; | ||||
|     AVRational classify_confidences[AV_NUM_BBOX_CLASSIFY]; | ||||
| } AVDetectionBBox; | ||||
|  | ||||
| typedef struct AVDetectionBBoxHeader { | ||||
|     /** | ||||
|      * Information about how the bounding box is generated. | ||||
|      * for example, the DNN model name. | ||||
|      */ | ||||
|     char source[256]; | ||||
|  | ||||
|     /** | ||||
|      * Number of bounding boxes in the array. | ||||
|      */ | ||||
|     uint32_t nb_bboxes; | ||||
|  | ||||
|     /** | ||||
|      * Offset in bytes from the beginning of this structure at which | ||||
|      * the array of bounding boxes starts. | ||||
|      */ | ||||
|     size_t bboxes_offset; | ||||
|  | ||||
|     /** | ||||
|      * Size of each bounding box in bytes. | ||||
|      */ | ||||
|     size_t bbox_size; | ||||
| } AVDetectionBBoxHeader; | ||||
|  | ||||
| /* | ||||
|  * Get the bounding box at the specified {@code idx}. Must be between 0 and nb_bboxes. | ||||
|  */ | ||||
| static av_always_inline AVDetectionBBox* | ||||
| av_get_detection_bbox(const AVDetectionBBoxHeader *header, unsigned int idx) | ||||
| { | ||||
|     av_assert0(idx < header->nb_bboxes); | ||||
|     return (AVDetectionBBox *)((uint8_t *)header + header->bboxes_offset + | ||||
|                                idx * header->bbox_size); | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * Allocates memory for AVDetectionBBoxHeader, plus an array of {@code nb_bboxes} | ||||
|  * AVDetectionBBox, and initializes the variables. | ||||
|  * Can be freed with a normal av_free() call. | ||||
|  * | ||||
|  * @param out_size if non-NULL, the size in bytes of the resulting data array is | ||||
|  * written here. | ||||
|  */ | ||||
| AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size); | ||||
|  | ||||
| /** | ||||
|  * Allocates memory for AVDetectionBBoxHeader, plus an array of {@code nb_bboxes} | ||||
|  * AVDetectionBBox, in the given AVFrame {@code frame} as AVFrameSideData of type | ||||
|  * AV_FRAME_DATA_DETECTION_BBOXES and initializes the variables. | ||||
|  */ | ||||
| AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes); | ||||
| #endif | ||||
| @@ -853,6 +853,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type) | ||||
|     case AV_FRAME_DATA_VIDEO_ENC_PARAMS:            return "Video encoding parameters"; | ||||
|     case AV_FRAME_DATA_SEI_UNREGISTERED:            return "H.26[45] User Data Unregistered SEI message"; | ||||
|     case AV_FRAME_DATA_FILM_GRAIN_PARAMS:           return "Film grain parameters"; | ||||
|     case AV_FRAME_DATA_DETECTION_BBOXES:            return "Bounding boxes for object detection and classification"; | ||||
|     } | ||||
|     return NULL; | ||||
| } | ||||
|   | ||||
| @@ -198,6 +198,12 @@ enum AVFrameSideDataType { | ||||
|      * Must be present for every frame which should have film grain applied. | ||||
|      */ | ||||
|     AV_FRAME_DATA_FILM_GRAIN_PARAMS, | ||||
|  | ||||
|     /** | ||||
|      * Bounding boxes for object detection and classification, | ||||
|      * as described by AVDetectionBBoxHeader. | ||||
|      */ | ||||
|     AV_FRAME_DATA_DETECTION_BBOXES, | ||||
| }; | ||||
|  | ||||
| enum AVActiveFormatDescription { | ||||
|   | ||||
| @@ -79,7 +79,7 @@ | ||||
|  */ | ||||
|  | ||||
| #define LIBAVUTIL_VERSION_MAJOR  56 | ||||
| #define LIBAVUTIL_VERSION_MINOR  72 | ||||
| #define LIBAVUTIL_VERSION_MINOR  73 | ||||
| #define LIBAVUTIL_VERSION_MICRO 100 | ||||
|  | ||||
| #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user