Improve support for PGS subtitles.

The previous implementation assumed that a new picture would always supersede the previous picture. Similarly, presentation segments were assumed to pertain to the most-recently-read picture. However, each presentation segment may refer to 0 or more pictures by their ID. Picture IDs may repeat, and a repeated picture ID indicates that the old picture for that ID is no longer needed and may be discarded. The new implementation allocates a buffer with one slot for each possible picture ID (the picture ID is a 16-bit field) and properly decodes presentation segments so that all relevant pictures are output upon encountering a display segment. Given that most PGS streams are unlikely to use more than a small fraction of the available picture IDs, it would probably be better to use a more memory-efficient data structure. I'm lazy though, so I leave this to a more motivated individual. I've tested the code with MKV files in VLC (a recent revision from their git repo) and with HandBrake (a version that I hacked up to use ffmpeg's PGS subtitle decoder). Review-by: Hendrik Leppkes <h.leppkes@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
2025-08-10 06:10:52 +02:00 · 2012-01-19 07:31:01 -08:00
parent cf7c7f13cd
commit d150a147da
2 changed files with 118 additions and 81 deletions
--- a/1
+++ b/1
@@ -26,6 +26,7 @@ version next:
 - ffprobe -show_program_version, -show_library_versions, -show_versions options
 - rv34: frame-level multi-threading
 - optimized iMDCT transform on x86 using SSE for for mpegaudiodec
+- Improved PGS subtitle decoder


 version 0.9:
--- a/libavcodec/pgssubdec.c
+++ b/libavcodec/pgssubdec.c
@@ -40,11 +40,16 @@ enum SegmentType {
    DISPLAY_SEGMENT      = 0x80,
 };

-typedef struct PGSSubPresentation {
+typedef struct PGSSubPictureReference {
    int x;
    int y;
-    int id_number;
-    int object_number;
+    int picture_id;
+} PGSSubPictureReference;
+
+typedef struct PGSSubPresentation {
+    int                    id_number;
+    int                    object_count;
+    PGSSubPictureReference *objects;
 } PGSSubPresentation;

 typedef struct PGSSubPicture {
@@ -58,22 +63,29 @@ typedef struct PGSSubPicture {
 typedef struct PGSSubContext {
    PGSSubPresentation presentation;
    uint32_t           clut[256];
-    PGSSubPicture      picture;
+    PGSSubPicture      pictures[UINT16_MAX];
 } PGSSubContext;

 static av_cold int init_decoder(AVCodecContext *avctx)
 {
-    avctx->pix_fmt = PIX_FMT_PAL8;
+    avctx->pix_fmt     = PIX_FMT_PAL8;

    return 0;
 }

 static av_cold int close_decoder(AVCodecContext *avctx)
 {
+    uint16_t picture;
+
    PGSSubContext *ctx = avctx->priv_data;

-    av_freep(&ctx->picture.rle);
-    ctx->picture.rle_buffer_size  = 0;
+    av_freep(&ctx->presentation.objects);
+    ctx->presentation.object_count = 0;
+
+    for (picture = 0; picture < UINT16_MAX; ++picture) {
+        av_freep(&ctx->pictures[picture].rle);
+        ctx->pictures[picture].rle_buffer_size = 0;
+    }

    return 0;
 }
@@ -88,7 +100,7 @@ static av_cold int close_decoder(AVCodecContext *avctx)
 * @param buf pointer to the RLE data to process
 * @param buf_size size of the RLE data to process
 */
-static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub,
+static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub, int rect,
                      const uint8_t *buf, unsigned int buf_size)
 {
    const uint8_t *rle_bitmap_end;
@@ -96,15 +108,15 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub,

    rle_bitmap_end = buf + buf_size;

-    sub->rects[0]->pict.data[0] = av_malloc(sub->rects[0]->w * sub->rects[0]->h);
+    sub->rects[rect]->pict.data[0] = av_malloc(sub->rects[rect]->w * sub->rects[rect]->h);

-    if (!sub->rects[0]->pict.data[0])
+    if (!sub->rects[rect]->pict.data[0])
        return -1;

    pixel_count = 0;
    line_count  = 0;

-    while (buf < rle_bitmap_end && line_count < sub->rects[0]->h) {
+    while (buf < rle_bitmap_end && line_count < sub->rects[rect]->h) {
        uint8_t flags, color;
        int run;

@@ -119,27 +131,27 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub,
            color = flags & 0x80 ? bytestream_get_byte(&buf) : 0;
        }

-        if (run > 0 && pixel_count + run <= sub->rects[0]->w * sub->rects[0]->h) {
-            memset(sub->rects[0]->pict.data[0] + pixel_count, color, run);
+        if (run > 0 && pixel_count + run <= sub->rects[rect]->w * sub->rects[rect]->h) {
+            memset(sub->rects[rect]->pict.data[0] + pixel_count, color, run);
            pixel_count += run;
        } else if (!run) {
            /*
             * New Line. Check if correct pixels decoded, if not display warning
             * and adjust bitmap pointer to correct new line position.
             */
-            if (pixel_count % sub->rects[0]->w > 0)
+            if (pixel_count % sub->rects[rect]->w > 0)
                av_log(avctx, AV_LOG_ERROR, "Decoded %d pixels, when line should be %d pixels\n",
-                       pixel_count % sub->rects[0]->w, sub->rects[0]->w);
+                       pixel_count % sub->rects[rect]->w, sub->rects[rect]->w);
            line_count++;
        }
    }

-    if (pixel_count < sub->rects[0]->w * sub->rects[0]->h) {
+    if (pixel_count < sub->rects[rect]->w * sub->rects[rect]->h) {
        av_log(avctx, AV_LOG_ERROR, "Insufficient RLE data for subtitle\n");
        return -1;
    }

-    av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count, sub->rects[0]->w * sub->rects[0]->h);
+    av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count, sub->rects[rect]->w * sub->rects[rect]->h);

    return 0;
 }
@@ -162,25 +174,28 @@ static int parse_picture_segment(AVCodecContext *avctx,

    uint8_t sequence_desc;
    unsigned int rle_bitmap_len, width, height;
+    uint16_t picture_id;

    if (buf_size <= 4)
        return -1;
    buf_size -= 4;

-    /* skip 3 unknown bytes: Object ID (2 bytes), Version Number */
-    buf += 3;
+    picture_id = bytestream_get_be16(&buf);
+
+    /* skip 1 unknown byte: Version Number */
+    buf++;

    /* Read the Sequence Description to determine if start of RLE data or appended to previous RLE */
    sequence_desc = bytestream_get_byte(&buf);

    if (!(sequence_desc & 0x80)) {
        /* Additional RLE data */
-        if (buf_size > ctx->picture.rle_remaining_len)
+        if (buf_size > ctx->pictures[picture_id].rle_remaining_len)
            return -1;

-        memcpy(ctx->picture.rle + ctx->picture.rle_data_len, buf, buf_size);
-        ctx->picture.rle_data_len += buf_size;
-        ctx->picture.rle_remaining_len -= buf_size;
+        memcpy(ctx->pictures[picture_id].rle + ctx->pictures[picture_id].rle_data_len, buf, buf_size);
+        ctx->pictures[picture_id].rle_data_len += buf_size;
+        ctx->pictures[picture_id].rle_remaining_len -= buf_size;

        return 0;
    }
@@ -202,17 +217,17 @@ static int parse_picture_segment(AVCodecContext *avctx,
        return -1;
    }

-    ctx->picture.w = width;
-    ctx->picture.h = height;
+    ctx->pictures[picture_id].w = width;
+    ctx->pictures[picture_id].h = height;

-    av_fast_malloc(&ctx->picture.rle, &ctx->picture.rle_buffer_size, rle_bitmap_len);
+    av_fast_malloc(&ctx->pictures[picture_id].rle, &ctx->pictures[picture_id].rle_buffer_size, rle_bitmap_len);

-    if (!ctx->picture.rle)
+    if (!ctx->pictures[picture_id].rle)
        return -1;

-    memcpy(ctx->picture.rle, buf, buf_size);
-    ctx->picture.rle_data_len = buf_size;
-    ctx->picture.rle_remaining_len = rle_bitmap_len - buf_size;
+    memcpy(ctx->pictures[picture_id].rle, buf, buf_size);
+    ctx->pictures[picture_id].rle_data_len      = buf_size;
+    ctx->pictures[picture_id].rle_remaining_len = rle_bitmap_len - buf_size;

    return 0;
 }
@@ -275,11 +290,11 @@ static void parse_presentation_segment(AVCodecContext *avctx,
 {
    PGSSubContext *ctx = avctx->priv_data;

-    int x, y;
-
    int w = bytestream_get_be16(&buf);
    int h = bytestream_get_be16(&buf);

+    uint16_t object_index;
+
    av_dlog(avctx, "Video Dimensions %dx%d\n",
            w, h);
    if (av_image_check_size(w, h, 0, avctx) >= 0)
@@ -298,34 +313,48 @@ static void parse_presentation_segment(AVCodecContext *avctx,
     */
    buf += 3;

-    ctx->presentation.object_number = bytestream_get_byte(&buf);
-    if (!ctx->presentation.object_number)
+    ctx->presentation.object_count = bytestream_get_byte(&buf);
+    if (!ctx->presentation.object_count)
        return;

-    /*
-     * Skip 4 bytes of unknown:
-     *     object_id_ref (2 bytes),
-     *     window_id_ref,
-     *     composition_flag (0x80 - object cropped, 0x40 - object forced)
-     */
-    buf += 4;
-
-    x = bytestream_get_be16(&buf);
-    y = bytestream_get_be16(&buf);
-
-    /* TODO If cropping, cropping_x, cropping_y, cropping_width, cropping_height (all 2 bytes).*/
-
-    av_dlog(avctx, "Subtitle Placement x=%d, y=%d\n", x, y);
-
-    if (x > avctx->width || y > avctx->height) {
-        av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x = %d, y = %d, video width = %d, video height = %d.\n",
-               x, y, avctx->width, avctx->height);
-        x = 0; y = 0;
+    /* Verify that enough bytes are remaining for all of the objects. */
+    buf_size -= 11;
+    if (buf_size < ctx->presentation.object_count * 8) {
+        ctx->presentation.object_count = 0;
+        return;
    }

-    /* Fill in dimensions */
-    ctx->presentation.x = x;
-    ctx->presentation.y = y;
+    av_freep(&ctx->presentation.objects);
+    ctx->presentation.objects = av_malloc(sizeof(PGSSubPictureReference) * ctx->presentation.object_count);
+    if (!ctx->presentation.objects) {
+        ctx->presentation.object_count = 0;
+        return;
+    }
+
+    for (object_index = 0; object_index < ctx->presentation.object_count; ++object_index) {
+        PGSSubPictureReference *reference = &ctx->presentation.objects[object_index];
+        reference->picture_id             = bytestream_get_be16(&buf);
+
+         /*
+         * Skip 2 bytes of unknown:
+         *     window_id_ref,
+         *     composition_flag (0x80 - object cropped, 0x40 - object forced)
+         */
+        buf += 2;
+
+        reference->x = bytestream_get_be16(&buf);
+        reference->y = bytestream_get_be16(&buf);
+
+        /* TODO If cropping, cropping_x, cropping_y, cropping_width, cropping_height (all 2 bytes).*/
+        av_dlog(avctx, "Subtitle Placement ID=%d, x=%d, y=%d\n", reference->picture_id, reference->x, reference->y);
+
+        if (reference->x > avctx->width || reference->y > avctx->height) {
+            av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x = %d, y = %d, video width = %d, video height = %d.\n",
+                   reference->x, reference->y, avctx->width, avctx->height);
+            reference->x = 0;
+            reference->y = 0;
+        }
+    }
 }

 /**
@@ -349,45 +378,52 @@ static int display_end_segment(AVCodecContext *avctx, void *data,
    AVSubtitle    *sub = data;
    PGSSubContext *ctx = avctx->priv_data;

+    uint16_t rect;
+
    /*
     *      The end display time is a timeout value and is only reached
-     *      if the next subtitle is later then timeout or subtitle has
+     *      if the next subtitle is later than timeout or subtitle has
     *      not been cleared by a subsequent empty display command.
     */

-    // Blank if last object_number was 0.
-    // Note that this may be wrong for more complex subtitles.
-    if (!ctx->presentation.object_number)
+    memset(sub, 0, sizeof(*sub));
+
+    // Blank if last object_count was 0.
+    if (!ctx->presentation.object_count)
        return 1;
+
    sub->start_display_time = 0;
    sub->end_display_time   = 20000;
    sub->format             = 0;

-    sub->rects     = av_mallocz(sizeof(*sub->rects));
-    sub->rects[0]  = av_mallocz(sizeof(*sub->rects[0]));
-    sub->num_rects = 1;
+    sub->num_rects = ctx->presentation.object_count;
+    sub->rects     = av_mallocz(sizeof(*sub->rects) * sub->num_rects);

-    sub->rects[0]->x    = ctx->presentation.x;
-    sub->rects[0]->y    = ctx->presentation.y;
-    sub->rects[0]->w    = ctx->picture.w;
-    sub->rects[0]->h    = ctx->picture.h;
-    sub->rects[0]->type = SUBTITLE_BITMAP;
+    for (rect = 0; rect < sub->num_rects; ++rect) {
+        uint16_t picture_id    = ctx->presentation.objects[rect].picture_id;
+        sub->rects[rect]       = av_mallocz(sizeof(*sub->rects[rect]));
+        sub->rects[rect]->x    = ctx->presentation.objects[rect].x;
+        sub->rects[rect]->y    = ctx->presentation.objects[rect].y;
+        sub->rects[rect]->w    = ctx->pictures[picture_id].w;
+        sub->rects[rect]->h    = ctx->pictures[picture_id].h;
+        sub->rects[rect]->type = SUBTITLE_BITMAP;

-    /* Process bitmap */
-    sub->rects[0]->pict.linesize[0] = ctx->picture.w;
+        /* Process bitmap */
+        sub->rects[rect]->pict.linesize[0] = ctx->pictures[picture_id].w;
+        if (ctx->pictures[picture_id].rle) {
+            if (ctx->pictures[picture_id].rle_remaining_len)
+                av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes shorter than expected\n",
+                       ctx->pictures[picture_id].rle_data_len, ctx->pictures[picture_id].rle_remaining_len);
+            if (decode_rle(avctx, sub, rect, ctx->pictures[picture_id].rle, ctx->pictures[picture_id].rle_data_len) < 0)
+                return 0;
+        }

-    if (ctx->picture.rle) {
-        if (ctx->picture.rle_remaining_len)
-            av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes shorter than expected\n",
-                   ctx->picture.rle_data_len, ctx->picture.rle_remaining_len);
-        if(decode_rle(avctx, sub, ctx->picture.rle, ctx->picture.rle_data_len) < 0)
-            return 0;
+        /* Allocate memory for colors */
+        sub->rects[rect]->nb_colors    = 256;
+        sub->rects[rect]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
+
+        memcpy(sub->rects[rect]->pict.data[1], ctx->clut, sub->rects[rect]->nb_colors * sizeof(uint32_t));
    }
-    /* Allocate memory for colors */
-    sub->rects[0]->nb_colors    = 256;
-    sub->rects[0]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
-
-    memcpy(sub->rects[0]->pict.data[1], ctx->clut, sub->rects[0]->nb_colors * sizeof(uint32_t));

    return 1;
 }