Improve support for PGS subtitles.

The previous implementation assumed that a new picture would always supersede the previous picture. Similarly, presentation segments were assumed to pertain to the most-recently-read picture. However, each presentation segment may refer to 0 or more pictures by their ID. Picture IDs may repeat, and a repeated picture ID indicates that the old picture for that ID is no longer needed and may be discarded. The new implementation allocates a buffer with one slot for each possible picture ID (the picture ID is a 16-bit field) and properly decodes presentation segments so that all relevant pictures are output upon encountering a display segment. Given that most PGS streams are unlikely to use more than a small fraction of the available picture IDs, it would probably be better to use a more memory-efficient data structure. I'm lazy though, so I leave this to a more motivated individual. I've tested the code with MKV files in VLC (a recent revision from their git repo) and with HandBrake (a version that I hacked up to use ffmpeg's PGS subtitle decoder). Review-by: Hendrik Leppkes <h.leppkes@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
2025-08-15 14:13:16 +02:00 · 2012-01-19 07:31:01 -08:00
parent cf7c7f13cd
commit d150a147da
2 changed files with 118 additions and 81 deletions
--- a/1
+++ b/1
@@ -26,6 +26,7 @@ version next:
 - ffprobe -show_program_version, -show_library_versions, -show_versions options
 - rv34: frame-level multi-threading
 - optimized iMDCT transform on x86 using SSE for for mpegaudiodec
 - Improved PGS subtitle decoder
 version 0.9:
--- a/libavcodec/pgssubdec.c
+++ b/libavcodec/pgssubdec.c
@@ -40,11 +40,16 @@ enum SegmentType {
    DISPLAY_SEGMENT      = 0x80,
 };
-typedef struct PGSSubPresentation {
+typedef struct PGSSubPictureReference {
    int x;
    int y;
-    int id_number;
+    int picture_id;
-    int object_number;
+} PGSSubPictureReference;
 typedef struct PGSSubPresentation {
    int                    id_number;
    int                    object_count;
    PGSSubPictureReference *objects;
 } PGSSubPresentation;
 typedef struct PGSSubPicture {
@@ -58,22 +63,29 @@ typedef struct PGSSubPicture {
 typedef struct PGSSubContext {
    PGSSubPresentation presentation;
    uint32_t           clut[256];
-    PGSSubPicture      picture;
+    PGSSubPicture      pictures[UINT16_MAX];
 } PGSSubContext;
 static av_cold int init_decoder(AVCodecContext *avctx)
 {
-    avctx->pix_fmt = PIX_FMT_PAL8;
+    avctx->pix_fmt     = PIX_FMT_PAL8;
    return 0;
 }
 static av_cold int close_decoder(AVCodecContext *avctx)
 {
    uint16_t picture;
    PGSSubContext *ctx = avctx->priv_data;
-    av_freep(&ctx->picture.rle);
+    av_freep(&ctx->presentation.objects);
-    ctx->picture.rle_buffer_size  = 0;
+    ctx->presentation.object_count = 0;
    for (picture = 0; picture < UINT16_MAX; ++picture) {
        av_freep(&ctx->pictures[picture].rle);
        ctx->pictures[picture].rle_buffer_size = 0;
    }
    return 0;
 }
@@ -88,7 +100,7 @@ static av_cold int close_decoder(AVCodecContext *avctx)
 * @param buf pointer to the RLE data to process
 * @param buf_size size of the RLE data to process
 */
-static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub,
+static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub, int rect,
                      const uint8_t *buf, unsigned int buf_size)
 {
    const uint8_t *rle_bitmap_end;
@@ -96,15 +108,15 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub,
    rle_bitmap_end = buf + buf_size;
-    sub->rects[0]->pict.data[0] = av_malloc(sub->rects[0]->w * sub->rects[0]->h);
+    sub->rects[rect]->pict.data[0] = av_malloc(sub->rects[rect]->w * sub->rects[rect]->h);
-    if (!sub->rects[0]->pict.data[0])
+    if (!sub->rects[rect]->pict.data[0])
        return -1;
    pixel_count = 0;
    line_count  = 0;
-    while (buf < rle_bitmap_end && line_count < sub->rects[0]->h) {
+    while (buf < rle_bitmap_end && line_count < sub->rects[rect]->h) {
        uint8_t flags, color;
        int run;
@@ -119,27 +131,27 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub,
            color = flags & 0x80 ? bytestream_get_byte(&buf) : 0;
        }
-        if (run > 0 && pixel_count + run <= sub->rects[0]->w * sub->rects[0]->h) {
+        if (run > 0 && pixel_count + run <= sub->rects[rect]->w * sub->rects[rect]->h) {
-            memset(sub->rects[0]->pict.data[0] + pixel_count, color, run);
+            memset(sub->rects[rect]->pict.data[0] + pixel_count, color, run);
            pixel_count += run;
        } else if (!run) {
            /*
             * New Line. Check if correct pixels decoded, if not display warning
             * and adjust bitmap pointer to correct new line position.
             */
-            if (pixel_count % sub->rects[0]->w > 0)
+            if (pixel_count % sub->rects[rect]->w > 0)
                av_log(avctx, AV_LOG_ERROR, "Decoded %d pixels, when line should be %d pixels\n",
-                       pixel_count % sub->rects[0]->w, sub->rects[0]->w);
+                       pixel_count % sub->rects[rect]->w, sub->rects[rect]->w);
            line_count++;
        }
    }
-    if (pixel_count < sub->rects[0]->w * sub->rects[0]->h) {
+    if (pixel_count < sub->rects[rect]->w * sub->rects[rect]->h) {
        av_log(avctx, AV_LOG_ERROR, "Insufficient RLE data for subtitle\n");
        return -1;
    }
-    av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count, sub->rects[0]->w * sub->rects[0]->h);
+    av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count, sub->rects[rect]->w * sub->rects[rect]->h);
    return 0;
 }
@@ -162,25 +174,28 @@ static int parse_picture_segment(AVCodecContext *avctx,
    uint8_t sequence_desc;
    unsigned int rle_bitmap_len, width, height;
    uint16_t picture_id;
    if (buf_size <= 4)
        return -1;
    buf_size -= 4;
-    /* skip 3 unknown bytes: Object ID (2 bytes), Version Number */
+    picture_id = bytestream_get_be16(&buf);
-    buf += 3;
+
    /* skip 1 unknown byte: Version Number */
    buf++;
    /* Read the Sequence Description to determine if start of RLE data or appended to previous RLE */
    sequence_desc = bytestream_get_byte(&buf);
    if (!(sequence_desc & 0x80)) {
        /* Additional RLE data */
-        if (buf_size > ctx->picture.rle_remaining_len)
+        if (buf_size > ctx->pictures[picture_id].rle_remaining_len)
            return -1;
-        memcpy(ctx->picture.rle + ctx->picture.rle_data_len, buf, buf_size);
+        memcpy(ctx->pictures[picture_id].rle + ctx->pictures[picture_id].rle_data_len, buf, buf_size);
-        ctx->picture.rle_data_len += buf_size;
+        ctx->pictures[picture_id].rle_data_len += buf_size;
-        ctx->picture.rle_remaining_len -= buf_size;
+        ctx->pictures[picture_id].rle_remaining_len -= buf_size;
        return 0;
    }
@@ -202,17 +217,17 @@ static int parse_picture_segment(AVCodecContext *avctx,
        return -1;
    }
-    ctx->picture.w = width;
+    ctx->pictures[picture_id].w = width;
-    ctx->picture.h = height;
+    ctx->pictures[picture_id].h = height;
-    av_fast_malloc(&ctx->picture.rle, &ctx->picture.rle_buffer_size, rle_bitmap_len);
+    av_fast_malloc(&ctx->pictures[picture_id].rle, &ctx->pictures[picture_id].rle_buffer_size, rle_bitmap_len);
-    if (!ctx->picture.rle)
+    if (!ctx->pictures[picture_id].rle)
        return -1;
-    memcpy(ctx->picture.rle, buf, buf_size);
+    memcpy(ctx->pictures[picture_id].rle, buf, buf_size);
-    ctx->picture.rle_data_len = buf_size;
+    ctx->pictures[picture_id].rle_data_len      = buf_size;
-    ctx->picture.rle_remaining_len = rle_bitmap_len - buf_size;
+    ctx->pictures[picture_id].rle_remaining_len = rle_bitmap_len - buf_size;
    return 0;
 }
@@ -275,11 +290,11 @@ static void parse_presentation_segment(AVCodecContext *avctx,
 {
    PGSSubContext *ctx = avctx->priv_data;
    int x, y;
    int w = bytestream_get_be16(&buf);
    int h = bytestream_get_be16(&buf);
    uint16_t object_index;
    av_dlog(avctx, "Video Dimensions %dx%d\n",
            w, h);
    if (av_image_check_size(w, h, 0, avctx) >= 0)
@@ -298,34 +313,48 @@ static void parse_presentation_segment(AVCodecContext *avctx,
     */
    buf += 3;
-    ctx->presentation.object_number = bytestream_get_byte(&buf);
+    ctx->presentation.object_count = bytestream_get_byte(&buf);
-    if (!ctx->presentation.object_number)
+    if (!ctx->presentation.object_count)
        return;
-    /*
+    /* Verify that enough bytes are remaining for all of the objects. */
-     * Skip 4 bytes of unknown:
+    buf_size -= 11;
-     *     object_id_ref (2 bytes),
+    if (buf_size < ctx->presentation.object_count * 8) {
-     *     window_id_ref,
+        ctx->presentation.object_count = 0;
-     *     composition_flag (0x80 - object cropped, 0x40 - object forced)
+        return;
     */
    buf += 4;
    x = bytestream_get_be16(&buf);
    y = bytestream_get_be16(&buf);
    /* TODO If cropping, cropping_x, cropping_y, cropping_width, cropping_height (all 2 bytes).*/
    av_dlog(avctx, "Subtitle Placement x=%d, y=%d\n", x, y);
    if (x > avctx->width || y > avctx->height) {
        av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x = %d, y = %d, video width = %d, video height = %d.\n",
               x, y, avctx->width, avctx->height);
        x = 0; y = 0;
    }
-    /* Fill in dimensions */
+    av_freep(&ctx->presentation.objects);
-    ctx->presentation.x = x;
+    ctx->presentation.objects = av_malloc(sizeof(PGSSubPictureReference) * ctx->presentation.object_count);
-    ctx->presentation.y = y;
+    if (!ctx->presentation.objects) {
        ctx->presentation.object_count = 0;
        return;
    }
    for (object_index = 0; object_index < ctx->presentation.object_count; ++object_index) {
        PGSSubPictureReference *reference = &ctx->presentation.objects[object_index];
        reference->picture_id             = bytestream_get_be16(&buf);
         /*
         * Skip 2 bytes of unknown:
         *     window_id_ref,
         *     composition_flag (0x80 - object cropped, 0x40 - object forced)
         */
        buf += 2;
        reference->x = bytestream_get_be16(&buf);
        reference->y = bytestream_get_be16(&buf);
        /* TODO If cropping, cropping_x, cropping_y, cropping_width, cropping_height (all 2 bytes).*/
        av_dlog(avctx, "Subtitle Placement ID=%d, x=%d, y=%d\n", reference->picture_id, reference->x, reference->y);
        if (reference->x > avctx->width || reference->y > avctx->height) {
            av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x = %d, y = %d, video width = %d, video height = %d.\n",
                   reference->x, reference->y, avctx->width, avctx->height);
            reference->x = 0;
            reference->y = 0;
        }
    }
 }
 /**
@@ -349,45 +378,52 @@ static int display_end_segment(AVCodecContext *avctx, void *data,
    AVSubtitle    *sub = data;
    PGSSubContext *ctx = avctx->priv_data;
    uint16_t rect;
    /*
     *      The end display time is a timeout value and is only reached
-     *      if the next subtitle is later then timeout or subtitle has
+     *      if the next subtitle is later than timeout or subtitle has
     *      not been cleared by a subsequent empty display command.
     */
-    // Blank if last object_number was 0.
+    memset(sub, 0, sizeof(*sub));
-    // Note that this may be wrong for more complex subtitles.
+
-    if (!ctx->presentation.object_number)
+    // Blank if last object_count was 0.
    if (!ctx->presentation.object_count)
        return 1;
    sub->start_display_time = 0;
    sub->end_display_time   = 20000;
    sub->format             = 0;
-    sub->rects     = av_mallocz(sizeof(*sub->rects));
+    sub->num_rects = ctx->presentation.object_count;
-    sub->rects[0]  = av_mallocz(sizeof(*sub->rects[0]));
+    sub->rects     = av_mallocz(sizeof(*sub->rects) * sub->num_rects);
    sub->num_rects = 1;
-    sub->rects[0]->x    = ctx->presentation.x;
+    for (rect = 0; rect < sub->num_rects; ++rect) {
-    sub->rects[0]->y    = ctx->presentation.y;
+        uint16_t picture_id    = ctx->presentation.objects[rect].picture_id;
-    sub->rects[0]->w    = ctx->picture.w;
+        sub->rects[rect]       = av_mallocz(sizeof(*sub->rects[rect]));
-    sub->rects[0]->h    = ctx->picture.h;
+        sub->rects[rect]->x    = ctx->presentation.objects[rect].x;
-    sub->rects[0]->type = SUBTITLE_BITMAP;
+        sub->rects[rect]->y    = ctx->presentation.objects[rect].y;
        sub->rects[rect]->w    = ctx->pictures[picture_id].w;
        sub->rects[rect]->h    = ctx->pictures[picture_id].h;
        sub->rects[rect]->type = SUBTITLE_BITMAP;
-    /* Process bitmap */
+        /* Process bitmap */
-    sub->rects[0]->pict.linesize[0] = ctx->picture.w;
+        sub->rects[rect]->pict.linesize[0] = ctx->pictures[picture_id].w;
        if (ctx->pictures[picture_id].rle) {
            if (ctx->pictures[picture_id].rle_remaining_len)
                av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes shorter than expected\n",
                       ctx->pictures[picture_id].rle_data_len, ctx->pictures[picture_id].rle_remaining_len);
            if (decode_rle(avctx, sub, rect, ctx->pictures[picture_id].rle, ctx->pictures[picture_id].rle_data_len) < 0)
                return 0;
        }
-    if (ctx->picture.rle) {
+        /* Allocate memory for colors */
-        if (ctx->picture.rle_remaining_len)
+        sub->rects[rect]->nb_colors    = 256;
-            av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes shorter than expected\n",
+        sub->rects[rect]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
-                   ctx->picture.rle_data_len, ctx->picture.rle_remaining_len);
+
-        if(decode_rle(avctx, sub, ctx->picture.rle, ctx->picture.rle_data_len) < 0)
+        memcpy(sub->rects[rect]->pict.data[1], ctx->clut, sub->rects[rect]->nb_colors * sizeof(uint32_t));
            return 0;
    }
    /* Allocate memory for colors */
    sub->rects[0]->nb_colors    = 256;
    sub->rects[0]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
    memcpy(sub->rects[0]->pict.data[1], ctx->clut, sub->rects[0]->nb_colors * sizeof(uint32_t));
    return 1;
 }