1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-11-23 21:54:53 +02:00
Files
FFmpeg/libavcodec/webvttdec.c
Leon Grutters 9a32b86307 avcodec/webvttdec: strip classes
If a supported tag has a class, e.g "<i.bold>" it is ignored entirely;
so for example "<i.bold>Hello</i>" would be converted to "Hello{\i0}"
instead of the intended "{\i1}Hello{\i0}".

Signed-off-by: Leon Grutters <gruttersleonbot2@gmail.com>
2025-08-04 09:12:17 +00:00

125 lines
3.8 KiB
C

/*
* Copyright (c) 2012 Clément Bœsch
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* WebVTT subtitle decoder
* @see https://www.w3.org/TR/webvtt1/
* @todo need to support extended markups and cue settings
*/
#include "avcodec.h"
#include "ass.h"
#include "codec_internal.h"
#include "libavutil/bprint.h"
static const struct {
const char *from;
const char *to;
} webvtt_tag_replace[] = {
{"{", "\\{{}"}, {"\\", "\\\xe2\x81\xa0"}, // escape to avoid ASS markup conflicts
{"&gt;", ">"}, {"&lt;", "<"},
{"&lrm;", "\xe2\x80\x8e"}, {"&rlm;", "\xe2\x80\x8f"},
{"&amp;", "&"}, {"&nbsp;", "\\h"},
};
static const struct {
const char from[6];
const char to[6];
} webvtt_valid_tags[] = {
{"i", "{\\i1}"}, {"/i", "{\\i0}"},
{"b", "{\\b1}"}, {"/b", "{\\b0}"},
{"u", "{\\u1}"}, {"/u", "{\\u0}"},
};
static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
{
int i, again = 0;
while (*p) {
if (*p == '<') {
const char *tag_end = strchr(p, '>');
ptrdiff_t len;
if (!tag_end)
break;
len = tag_end - p + 1;
for (i = 0; i < FF_ARRAY_ELEMS(webvtt_valid_tags); i++) {
const char *from = webvtt_valid_tags[i].from;
if(!strncmp(p + 1, from, strlen(from))) {
av_bprintf(buf, "%s", webvtt_valid_tags[i].to);
break;
}
}
p += len;
again = 1;
}
for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
const char *from = webvtt_tag_replace[i].from;
const size_t len = strlen(from);
if (!strncmp(p, from, len)) {
av_bprintf(buf, "%s", webvtt_tag_replace[i].to);
p += len;
again = 1;
break;
}
}
if (again) {
again = 0;
continue;
}
if (p[0] == '\n' && p[1])
av_bprintf(buf, "\\N");
else if (*p != '\r')
av_bprint_chars(buf, *p, 1);
p++;
}
return 0;
}
static int webvtt_decode_frame(AVCodecContext *avctx, AVSubtitle *sub,
int *got_sub_ptr, const AVPacket *avpkt)
{
int ret = 0;
const char *ptr = avpkt->data;
FFASSDecoderContext *s = avctx->priv_data;
AVBPrint buf;
av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
if (ptr && avpkt->size > 0 && !webvtt_event_to_ass(&buf, ptr))
ret = ff_ass_add_rect(sub, buf.str, s->readorder++, 0, NULL, NULL);
av_bprint_finalize(&buf, NULL);
if (ret < 0)
return ret;
*got_sub_ptr = sub->num_rects > 0;
return avpkt->size;
}
const FFCodec ff_webvtt_decoder = {
.p.name = "webvtt",
CODEC_LONG_NAME("WebVTT subtitle"),
.p.type = AVMEDIA_TYPE_SUBTITLE,
.p.id = AV_CODEC_ID_WEBVTT,
FF_CODEC_DECODE_SUB_CB(webvtt_decode_frame),
.init = ff_ass_subtitle_header_default,
.flush = ff_ass_decoder_flush,
.priv_data_size = sizeof(FFASSDecoderContext),
};