diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c index be6c03bbe5..68c1709b93 100644 --- a/libavformat/id3v2.c +++ b/libavformat/id3v2.c @@ -59,63 +59,129 @@ static unsigned int get_size(AVIOContext *s, int len) return v; } -static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const char *key) +/** + * Free GEOB type extra metadata. + */ +static void free_geobtag(ID3v2ExtraMetaGEOB *geob) { - char *q, dst[512]; - const char *val = NULL; - int len, dstlen = sizeof(dst) - 1; - unsigned genre; + av_free(geob->mime_type); + av_free(geob->file_name); + av_free(geob->description); + av_free(geob->data); + av_free(geob); +} + +/** + * Decode characters to UTF-8 according to encoding type. The decoded buffer is + * always null terminated. + * + * @param dst Pointer where the address of the buffer with the decoded bytes is + * stored. Buffer must be freed by caller. + * @param dstlen Pointer to an int where the length of the decoded string + * is stored (in bytes, incl. null termination) + * @param maxread Pointer to maximum number of characters to read from the + * AVIOContext. After execution the value is decremented by the number of bytes + * actually read. + * @seeknull If true, decoding stops after the first U+0000 character found, if + * there is any before maxread is reached + * @returns 0 if no error occured, dst is uninitialized on error + */ +static int decode_str(AVFormatContext *s, AVIOContext *pb, int encoding, + uint8_t **dst, int *dstlen, int *maxread, const int seeknull) +{ + int len, ret; + uint8_t tmp; + uint32_t ch = 1; + int left = *maxread; unsigned int (*get)(AVIOContext*) = avio_rb16; + AVIOContext *dynbuf; - dst[0] = 0; - if (taglen < 1) - return; + if ((ret = avio_open_dyn_buf(&dynbuf)) < 0) { + av_log(s, AV_LOG_ERROR, "Error opening memory stream\n"); + return ret; + } - taglen--; /* account for encoding type byte */ - - switch (avio_r8(pb)) { /* encoding type */ + switch (encoding) { case ID3v2_ENCODING_ISO8859: - q = dst; - while (taglen-- && q - dst < dstlen - 7) { - uint8_t tmp; - PUT_UTF8(avio_r8(pb), tmp, *q++ = tmp;) + while (left && (!seeknull || ch)) { + ch = avio_r8(pb); + PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);) + left--; } - *q = 0; break; case ID3v2_ENCODING_UTF16BOM: - taglen -= 2; + if ((left -= 2) < 0) { + av_log(s, AV_LOG_ERROR, "Cannot read BOM value, input too short\n"); + avio_close_dyn_buf(dynbuf, (uint8_t **)dst); + av_freep(dst); + return AVERROR_INVALIDDATA; + } switch (avio_rb16(pb)) { case 0xfffe: get = avio_rl16; case 0xfeff: break; default: - av_log(s, AV_LOG_ERROR, "Incorrect BOM value in tag %s.\n", key); - return; + av_log(s, AV_LOG_ERROR, "Incorrect BOM value\n"); + avio_close_dyn_buf(dynbuf, (uint8_t **)dst); + av_freep(dst); + *maxread = left; + return AVERROR_INVALIDDATA; } // fall-through case ID3v2_ENCODING_UTF16BE: - q = dst; - while (taglen > 1 && q - dst < dstlen - 7) { - uint32_t ch; - uint8_t tmp; - - GET_UTF16(ch, ((taglen -= 2) >= 0 ? get(pb) : 0), break;) - PUT_UTF8(ch, tmp, *q++ = tmp;) + while ((left > 1) && (!seeknull || ch)) { + GET_UTF16(ch, ((left -= 2) >= 0 ? get(pb) : 0), break;) + PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);) } - *q = 0; + if (left < 0) + left += 2; /* did not read last char from pb */ break; case ID3v2_ENCODING_UTF8: - len = FFMIN(taglen, dstlen); - avio_read(pb, dst, len); - dst[len] = 0; + while (left && (!seeknull || ch)) { + ch = avio_r8(pb); + avio_w8(dynbuf, ch); + left--; + } break; default: - av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s.\n", key); + av_log(s, AV_LOG_WARNING, "Unknown encoding\n"); + } + + if (ch) + avio_w8(dynbuf, 0); + + len = avio_close_dyn_buf(dynbuf, (uint8_t **)dst); + if (dstlen) + *dstlen = len; + + *maxread = left; + + return 0; +} + +/** + * Parse a text tag. + */ +static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const char *key) +{ + uint8_t *dst; + const char *val = NULL; + int len, dstlen; + unsigned genre; + + if (taglen < 1) + return; + + taglen--; /* account for encoding type byte */ + + if (decode_str(s, pb, avio_r8(pb), &dst, &dstlen, &taglen, 0) < 0) { + av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key); + return; } if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) @@ -134,6 +200,82 @@ static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const cha if (val) av_dict_set(&s->metadata, key, val, AV_DICT_DONT_OVERWRITE); + + av_free(dst); +} + +/** + * Parse GEOB tag into a ID3v2ExtraMetaGEOB struct. + */ +static void read_geobtag(AVFormatContext *s, AVIOContext *pb, int taglen, char *tag, ID3v2ExtraMeta **extra_meta) +{ + ID3v2ExtraMetaGEOB *geob_data = NULL; + ID3v2ExtraMeta *new_extra = NULL; + char encoding; + unsigned int len; + + if (taglen < 1) + return; + + geob_data = av_mallocz(sizeof(ID3v2ExtraMetaGEOB)); + if (!geob_data) { + av_log(s, AV_LOG_ERROR, "Failed to alloc %zu bytes\n", sizeof(ID3v2ExtraMetaGEOB)); + return; + } + + new_extra = av_mallocz(sizeof(ID3v2ExtraMeta)); + if (!new_extra) { + av_log(s, AV_LOG_ERROR, "Failed to alloc %zu bytes\n", sizeof(ID3v2ExtraMeta)); + goto fail; + } + + /* read encoding type byte */ + encoding = avio_r8(pb); + taglen--; + + /* read MIME type (always ISO-8859) */ + if (decode_str(s, pb, ID3v2_ENCODING_ISO8859, &geob_data->mime_type, NULL, &taglen, 1) < 0 + || taglen <= 0) + goto fail; + + /* read file name */ + if (decode_str(s, pb, encoding, &geob_data->file_name, NULL, &taglen, 1) < 0 + || taglen <= 0) + goto fail; + + /* read content description */ + if (decode_str(s, pb, encoding, &geob_data->description, NULL, &taglen, 1) < 0 + || taglen < 0) + goto fail; + + if (taglen) { + /* save encapsulated binary data */ + geob_data->data = av_malloc(taglen); + if (!geob_data->data) { + av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", taglen); + goto fail; + } + if ((len = avio_read(pb, geob_data->data, taglen)) < taglen) + av_log(s, AV_LOG_WARNING, "Error reading GEOB frame, data truncated.\n"); + geob_data->datasize = len; + } else { + geob_data->data = NULL; + geob_data->datasize = 0; + } + + /* add data to the list */ + new_extra->tag = "GEOB"; + new_extra->data = geob_data; + new_extra->next = *extra_meta; + *extra_meta = new_extra; + + return; + +fail: + av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", tag); + free_geobtag(geob_data); + av_free(new_extra); + return; } static int is_number(const char *str) @@ -182,7 +324,27 @@ finish: av_dict_set(m, "date", date, 0); } -static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags) +/** + * Get the corresponding ID3v2EMFunc struct for a tag. + * @param isv34 Determines if v2.2 or v2.3/4 strings are used + * @return A pointer to the ID3v2EMFunc struct if found, NULL otherwise. + */ +static const ID3v2EMFunc *get_extra_meta_func(const char *tag, int isv34) +{ + int i = 0; + while (ff_id3v2_extra_meta_funcs[i].tag3) { + if (!memcmp(tag, + (isv34 ? + ff_id3v2_extra_meta_funcs[i].tag4 : + ff_id3v2_extra_meta_funcs[i].tag3), + (isv34 ? 4 : 3))) + return &ff_id3v2_extra_meta_funcs[i]; + i++; + } + return NULL; +} + +static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags, ID3v2ExtraMeta **extra_meta) { int isv34, tlen, unsync; char tag[5]; @@ -190,8 +352,10 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t int taghdrlen; const char *reason = NULL; AVIOContext pb; + AVIOContext *pbx; unsigned char *buffer = NULL; int buffer_size = 0; + void (*extra_func)(AVFormatContext*, AVIOContext*, int, char*, ID3v2ExtraMeta**) = NULL; switch (version) { case 2: @@ -252,7 +416,8 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t if (tflags & (ID3v2_FLAG_ENCRYPTION | ID3v2_FLAG_COMPRESSION)) { av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag); avio_skip(s->pb, tlen); - } else if (tag[0] == 'T') { + /* check for text tag or supported special meta tag */ + } else if (tag[0] == 'T' || (extra_meta && (extra_func = get_extra_meta_func(tag, isv34)->read))) { if (unsync || tunsync) { int i, j; av_fast_malloc(&buffer, &buffer_size, tlen); @@ -268,10 +433,17 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t } } ffio_init_context(&pb, buffer, j, 0, NULL, NULL, NULL, NULL); - read_ttag(s, &pb, j, tag); + tlen = j; + pbx = &pb; // read from sync buffer } else { - read_ttag(s, s->pb, tlen, tag); + pbx = s->pb; // read straight from input } + if (tag[0] == 'T') + /* parse text tag */ + read_ttag(s, pbx, tlen, tag); + else + /* parse special meta tag */ + extra_func(s, pbx, tlen, tag, extra_meta); } else if (!tag[0]) { if (tag[1]) @@ -295,7 +467,7 @@ seek: return; } -void ff_id3v2_read(AVFormatContext *s, const char *magic) +void ff_id3v2_read_all(AVFormatContext *s, const char *magic, ID3v2ExtraMeta **extra_meta) { int len, ret; uint8_t buf[ID3v2_HEADER_SIZE]; @@ -315,7 +487,7 @@ void ff_id3v2_read(AVFormatContext *s, const char *magic) ((buf[7] & 0x7f) << 14) | ((buf[8] & 0x7f) << 7) | (buf[9] & 0x7f); - ff_id3v2_parse(s, len, buf[3], buf[5]); + ff_id3v2_parse(s, len, buf[3], buf[5], extra_meta); } else { avio_seek(s->pb, off, SEEK_SET); } @@ -326,6 +498,30 @@ void ff_id3v2_read(AVFormatContext *s, const char *magic) merge_date(&s->metadata); } +void ff_id3v2_read(AVFormatContext *s, const char *magic) +{ + ff_id3v2_read_all(s, magic, NULL); +} + +void ff_id3v2_free_extra_meta(ID3v2ExtraMeta **extra_meta) +{ + ID3v2ExtraMeta *current = *extra_meta, *next; + void (*free_func)(ID3v2ExtraMeta*); + + while (current) { + if ((free_func = get_extra_meta_func(current->tag, 1)->free)) + free_func(current->data); + next = current->next; + av_freep(¤t); + current = next; + } +} + +const ID3v2EMFunc ff_id3v2_extra_meta_funcs[] = { + { "GEO", "GEOB", read_geobtag, free_geobtag }, + { NULL } +}; + const AVMetadataConv ff_id3v2_34_metadata_conv[] = { { "TALB", "album"}, { "TCOM", "composer"}, diff --git a/libavformat/id3v2.h b/libavformat/id3v2.h index 3e0e65a92e..13dec399d1 100644 --- a/libavformat/id3v2.h +++ b/libavformat/id3v2.h @@ -45,6 +45,27 @@ enum ID3v2Encoding { ID3v2_ENCODING_UTF8 = 3, }; +typedef struct ID3v2ExtraMeta { + const char *tag; + void *data; + struct ID3v2ExtraMeta *next; +} ID3v2ExtraMeta; + +typedef struct ID3v2ExtraMetaGEOB { + uint32_t datasize; + uint8_t *mime_type; + uint8_t *file_name; + uint8_t *description; + uint8_t *data; +} ID3v2ExtraMetaGEOB; + +typedef struct ID3v2EMFunc { + const char *tag3; + const char *tag4; + void (*read)(AVFormatContext*, AVIOContext*, int, char*, ID3v2ExtraMeta **); + void (*free)(); +} ID3v2EMFunc; + /** * Detect ID3v2 Header. * @param buf must be ID3v2_HEADER_SIZE byte long @@ -61,10 +82,25 @@ int ff_id3v2_match(const uint8_t *buf, const char *magic); int ff_id3v2_tag_len(const uint8_t *buf); /** - * Read an ID3v2 tag + * Read an ID3v2 tag (text tags only) */ void ff_id3v2_read(AVFormatContext *s, const char *magic); +/** + * Read an ID3v2 tag, including supported extra metadata (currently only GEOB) + * @param extra_meta If not NULL, extra metadata is parsed into a list of + * ID3v2ExtraMeta structs and *extra_meta points to the head of the list + */ +void ff_id3v2_read_all(AVFormatContext *s, const char *magic, ID3v2ExtraMeta **extra_meta); + +/** + * Free memory allocated parsing special (non-text) metadata. + * @param extra_meta Pointer to a pointer to the head of a ID3v2ExtraMeta list, *extra_meta is set to NULL. + */ +void ff_id3v2_free_extra_meta(ID3v2ExtraMeta **extra_meta); + +extern const ID3v2EMFunc ff_id3v2_extra_meta_funcs[]; + extern const AVMetadataConv ff_id3v2_34_metadata_conv[]; extern const AVMetadataConv ff_id3v2_4_metadata_conv[]; extern const AVMetadataConv ff_id3v2_2_metadata_conv[];