[FFmpeg-devel] [PATCH] Improve support for PGS subtitles.

Sun Jan 22 17:27:55 CET 2012

I submitted this patch a few days ago. The developer documentation states
that if some time passes with no response to a patch that I should send a
reminder. So I'm doing that :).

-- Dave

On Thu, Jan 19, 2012 at 7:31 AM, David Mitchell <dave at fallingcanbedeadly.com
> wrote:

> The previous implementation assumed that a new picture would always
> supersede the previous picture. Similarly, presentation segments
> were assumed to pertain to the most-recently-read picture.
>
> However, each presentation segment may refer to 0 or more pictures
> by their ID. Picture IDs may repeat, and a repeated picture ID
> indicates that the old picture for that ID is no longer needed
> and may be discarded.
>
> The new implementation allocates a buffer with one slot for each
> possible picture ID (the picture ID is a 16-bit field) and
> properly decodes presentation segments so that all relevant
> pictures are output upon encountering a display segment.
>
> Given that most PGS streams are unlikely to use more than a small
> fraction of the available picture IDs, it would probably be better
> to use a more memory-efficient data structure. I'm lazy though, so
> I leave this to a more motivated individual.
>
> I've tested the code with MKV files in VLC (a recent revision from
> their git repo) and with HandBrake (a version that I hacked up to
> use ffmpeg's PGS subtitle decoder).
> ---
>  Changelog              |    1 +
>  libavcodec/pgssubdec.c |  198
> ++++++++++++++++++++++++++++--------------------
>  2 files changed, 118 insertions(+), 81 deletions(-)
>
> diff --git a/Changelog b/Changelog
> index 1543562..3e10f6c 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -16,6 +16,7 @@ version next:
>  - Automatic thread count based on detection number of (available) CPU
> cores
>  - y41p Brooktree Uncompressed 4:1:1 12-bit encoder and decoder
>  - ffprobe -show_error option
> +- Improved PGS subtitle decoder
>
>
>  version 0.9:
> diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c
> index 5a070e4..2785d25 100644
> --- a/libavcodec/pgssubdec.c
> +++ b/libavcodec/pgssubdec.c
> @@ -40,11 +40,16 @@ enum SegmentType {
>     DISPLAY_SEGMENT      = 0x80,
>  };
>
> -typedef struct PGSSubPresentation {
> +typedef struct PGSSubPictureReference {
>     int x;
>     int y;
> -    int id_number;
> -    int object_number;
> +    int picture_id;
> +} PGSSubPictureReference;
> +
> +typedef struct PGSSubPresentation {
> +    int                    id_number;
> +    int                    object_count;
> +    PGSSubPictureReference *objects;
>  } PGSSubPresentation;
>
>  typedef struct PGSSubPicture {
> @@ -58,22 +63,29 @@ typedef struct PGSSubPicture {
>  typedef struct PGSSubContext {
>     PGSSubPresentation presentation;
>     uint32_t           clut[256];
> -    PGSSubPicture      picture;
> +    PGSSubPicture      pictures[UINT16_MAX];
>  } PGSSubContext;
>
>  static av_cold int init_decoder(AVCodecContext *avctx)
>  {
> -    avctx->pix_fmt = PIX_FMT_PAL8;
> +    avctx->pix_fmt     = PIX_FMT_PAL8;
>
>     return 0;
>  }
>
>  static av_cold int close_decoder(AVCodecContext *avctx)
>  {
> +    uint16_t picture;
> +
>     PGSSubContext *ctx = avctx->priv_data;
>
> -    av_freep(&ctx->picture.rle);
> -    ctx->picture.rle_buffer_size  = 0;
> +    av_freep(&ctx->presentation.objects);
> +    ctx->presentation.object_count = 0;
> +
> +    for (picture = 0; picture < UINT16_MAX; ++picture) {
> +        av_freep(&ctx->pictures[picture].rle);
> +        ctx->pictures[picture].rle_buffer_size = 0;
> +    }
>
>     return 0;
>  }
> @@ -88,7 +100,7 @@ static av_cold int close_decoder(AVCodecContext *avctx)
>  * @param buf pointer to the RLE data to process
>  * @param buf_size size of the RLE data to process
>  */
> -static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub,
> +static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub, int rect,
>                       const uint8_t *buf, unsigned int buf_size)
>  {
>     const uint8_t *rle_bitmap_end;
> @@ -96,15 +108,15 @@ static int decode_rle(AVCodecContext *avctx,
> AVSubtitle *sub,
>
>     rle_bitmap_end = buf + buf_size;
>
> -    sub->rects[0]->pict.data[0] = av_malloc(sub->rects[0]->w *
> sub->rects[0]->h);
> +    sub->rects[rect]->pict.data[0] = av_malloc(sub->rects[rect]->w *
> sub->rects[rect]->h);
>
> -    if (!sub->rects[0]->pict.data[0])
> +    if (!sub->rects[rect]->pict.data[0])
>         return -1;
>
>     pixel_count = 0;
>     line_count  = 0;
>
> -    while (buf < rle_bitmap_end && line_count < sub->rects[0]->h) {
> +    while (buf < rle_bitmap_end && line_count < sub->rects[rect]->h) {
>         uint8_t flags, color;
>         int run;
>
> @@ -119,27 +131,27 @@ static int decode_rle(AVCodecContext *avctx,
> AVSubtitle *sub,
>             color = flags & 0x80 ? bytestream_get_byte(&buf) : 0;
>         }
>
> -        if (run > 0 && pixel_count + run <= sub->rects[0]->w *
> sub->rects[0]->h) {
> -            memset(sub->rects[0]->pict.data[0] + pixel_count, color, run);
> +        if (run > 0 && pixel_count + run <= sub->rects[rect]->w *
> sub->rects[rect]->h) {
> +            memset(sub->rects[rect]->pict.data[0] + pixel_count, color,
> run);
>             pixel_count += run;
>         } else if (!run) {
>             /*
>              * New Line. Check if correct pixels decoded, if not display
> warning
>              * and adjust bitmap pointer to correct new line position.
>              */
> -            if (pixel_count % sub->rects[0]->w > 0)
> +            if (pixel_count % sub->rects[rect]->w > 0)
>                 av_log(avctx, AV_LOG_ERROR, "Decoded %d pixels, when line
> should be %d pixels\n",
> -                       pixel_count % sub->rects[0]->w, sub->rects[0]->w);
> +                       pixel_count % sub->rects[rect]->w,
> sub->rects[rect]->w);
>             line_count++;
>         }
>     }
>
> -    if (pixel_count < sub->rects[0]->w * sub->rects[0]->h) {
> +    if (pixel_count < sub->rects[rect]->w * sub->rects[rect]->h) {
>         av_log(avctx, AV_LOG_ERROR, "Insufficient RLE data for
> subtitle\n");
>         return -1;
>     }
>
> -    av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count,
> sub->rects[0]->w * sub->rects[0]->h);
> +    av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count,
> sub->rects[rect]->w * sub->rects[rect]->h);
>
>     return 0;
>  }
> @@ -162,25 +174,28 @@ static int parse_picture_segment(AVCodecContext
> *avctx,
>
>     uint8_t sequence_desc;
>     unsigned int rle_bitmap_len, width, height;
> +    uint16_t picture_id;
>
>     if (buf_size <= 4)
>         return -1;
>     buf_size -= 4;
>
> -    /* skip 3 unknown bytes: Object ID (2 bytes), Version Number */
> -    buf += 3;
> +    picture_id = bytestream_get_be16(&buf);
> +
> +    /* skip 1 unknown byte: Version Number */
> +    buf++;
>
>     /* Read the Sequence Description to determine if start of RLE data or
> appended to previous RLE */
>     sequence_desc = bytestream_get_byte(&buf);
>
>     if (!(sequence_desc & 0x80)) {
>         /* Additional RLE data */
> -        if (buf_size > ctx->picture.rle_remaining_len)
> +        if (buf_size > ctx->pictures[picture_id].rle_remaining_len)
>             return -1;
>
> -        memcpy(ctx->picture.rle + ctx->picture.rle_data_len, buf,
> buf_size);
> -        ctx->picture.rle_data_len += buf_size;
> -        ctx->picture.rle_remaining_len -= buf_size;
> +        memcpy(ctx->pictures[picture_id].rle +
> ctx->pictures[picture_id].rle_data_len, buf, buf_size);
> +        ctx->pictures[picture_id].rle_data_len += buf_size;
> +        ctx->pictures[picture_id].rle_remaining_len -= buf_size;
>
>         return 0;
>     }
> @@ -202,17 +217,17 @@ static int parse_picture_segment(AVCodecContext
> *avctx,
>         return -1;
>     }
>
> -    ctx->picture.w = width;
> -    ctx->picture.h = height;
> +    ctx->pictures[picture_id].w = width;
> +    ctx->pictures[picture_id].h = height;
>
> -    av_fast_malloc(&ctx->picture.rle, &ctx->picture.rle_buffer_size,
> rle_bitmap_len);
> +    av_fast_malloc(&ctx->pictures[picture_id].rle,
> &ctx->pictures[picture_id].rle_buffer_size, rle_bitmap_len);
>
> -    if (!ctx->picture.rle)
> +    if (!ctx->pictures[picture_id].rle)
>         return -1;
>
> -    memcpy(ctx->picture.rle, buf, buf_size);
> -    ctx->picture.rle_data_len = buf_size;
> -    ctx->picture.rle_remaining_len = rle_bitmap_len - buf_size;
> +    memcpy(ctx->pictures[picture_id].rle, buf, buf_size);
> +    ctx->pictures[picture_id].rle_data_len      = buf_size;
> +    ctx->pictures[picture_id].rle_remaining_len = rle_bitmap_len -
> buf_size;
>
>     return 0;
>  }
> @@ -275,11 +290,11 @@ static void
> parse_presentation_segment(AVCodecContext *avctx,
>  {
>     PGSSubContext *ctx = avctx->priv_data;
>
> -    int x, y;
> -
>     int w = bytestream_get_be16(&buf);
>     int h = bytestream_get_be16(&buf);
>
> +    uint16_t object_index;
> +
>     av_dlog(avctx, "Video Dimensions %dx%d\n",
>             w, h);
>     if (av_image_check_size(w, h, 0, avctx) >= 0)
> @@ -298,34 +313,48 @@ static void
> parse_presentation_segment(AVCodecContext *avctx,
>      */
>     buf += 3;
>
> -    ctx->presentation.object_number = bytestream_get_byte(&buf);
> -    if (!ctx->presentation.object_number)
> +    ctx->presentation.object_count = bytestream_get_byte(&buf);
> +    if (!ctx->presentation.object_count)
>         return;
>
> -    /*
> -     * Skip 4 bytes of unknown:
> -     *     object_id_ref (2 bytes),
> -     *     window_id_ref,
> -     *     composition_flag (0x80 - object cropped, 0x40 - object forced)
> -     */
> -    buf += 4;
> +    /* Verify that enough bytes are remaining for all of the objects. */
> +    buf_size -= 11;
> +    if (buf_size < ctx->presentation.object_count * 8) {
> +        ctx->presentation.object_count = 0;
> +        return;
> +    }
>
> -    x = bytestream_get_be16(&buf);
> -    y = bytestream_get_be16(&buf);
> +    av_freep(&ctx->presentation.objects);
> +    ctx->presentation.objects = av_malloc(sizeof(PGSSubPictureReference)
> * ctx->presentation.object_count);
> +    if (!ctx->presentation.objects) {
> +        ctx->presentation.object_count = 0;
> +        return;
> +    }
>
> -    /* TODO If cropping, cropping_x, cropping_y, cropping_width,
> cropping_height (all 2 bytes).*/
> +    for (object_index = 0; object_index < ctx->presentation.object_count;
> ++object_index) {
> +        PGSSubPictureReference *reference =
> &ctx->presentation.objects[object_index];
> +        reference->picture_id             = bytestream_get_be16(&buf);
>
> -    av_dlog(avctx, "Subtitle Placement x=%d, y=%d\n", x, y);
> +         /*
> +         * Skip 2 bytes of unknown:
> +         *     window_id_ref,
> +         *     composition_flag (0x80 - object cropped, 0x40 - object
> forced)
> +         */
> +        buf += 2;
>
> -    if (x > avctx->width || y > avctx->height) {
> -        av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x =
> %d, y = %d, video width = %d, video height = %d.\n",
> -               x, y, avctx->width, avctx->height);
> -        x = 0; y = 0;
> -    }
> +        reference->x = bytestream_get_be16(&buf);
> +        reference->y = bytestream_get_be16(&buf);
>
> -    /* Fill in dimensions */
> -    ctx->presentation.x = x;
> -    ctx->presentation.y = y;
> +        /* TODO If cropping, cropping_x, cropping_y, cropping_width,
> cropping_height (all 2 bytes).*/
> +        av_dlog(avctx, "Subtitle Placement ID=%d, x=%d, y=%d\n",
> reference->picture_id, reference->x, reference->y);
> +
> +        if (reference->x > avctx->width || reference->y > avctx->height) {
> +            av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x
> = %d, y = %d, video width = %d, video height = %d.\n",
> +                   reference->x, reference->y, avctx->width,
> avctx->height);
> +            reference->x = 0;
> +            reference->y = 0;
> +        }
> +    }
>  }
>
>  /**
> @@ -349,45 +378,52 @@ static int display_end_segment(AVCodecContext
> *avctx, void *data,
>     AVSubtitle    *sub = data;
>     PGSSubContext *ctx = avctx->priv_data;
>
> +    uint16_t rect;
> +
>     /*
>      *      The end display time is a timeout value and is only reached
> -     *      if the next subtitle is later then timeout or subtitle has
> +     *      if the next subtitle is later than timeout or subtitle has
>      *      not been cleared by a subsequent empty display command.
>      */
>
> -    // Blank if last object_number was 0.
> -    // Note that this may be wrong for more complex subtitles.
> -    if (!ctx->presentation.object_number)
> +    memset(sub, 0, sizeof(*sub));
> +
> +    // Blank if last object_count was 0.
> +    if (!ctx->presentation.object_count)
>         return 1;
> +
>     sub->start_display_time = 0;
>     sub->end_display_time   = 20000;
>     sub->format             = 0;
>
> -    sub->rects     = av_mallocz(sizeof(*sub->rects));
> -    sub->rects[0]  = av_mallocz(sizeof(*sub->rects[0]));
> -    sub->num_rects = 1;
> -
> -    sub->rects[0]->x    = ctx->presentation.x;
> -    sub->rects[0]->y    = ctx->presentation.y;
> -    sub->rects[0]->w    = ctx->picture.w;
> -    sub->rects[0]->h    = ctx->picture.h;
> -    sub->rects[0]->type = SUBTITLE_BITMAP;
> -
> -    /* Process bitmap */
> -    sub->rects[0]->pict.linesize[0] = ctx->picture.w;
> -
> -    if (ctx->picture.rle) {
> -        if (ctx->picture.rle_remaining_len)
> -            av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes
> shorter than expected\n",
> -                   ctx->picture.rle_data_len,
> ctx->picture.rle_remaining_len);
> -        if(decode_rle(avctx, sub, ctx->picture.rle,
> ctx->picture.rle_data_len) < 0)
> -            return 0;
> -    }
> -    /* Allocate memory for colors */
> -    sub->rects[0]->nb_colors    = 256;
> -    sub->rects[0]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
> +    sub->num_rects = ctx->presentation.object_count;
> +    sub->rects     = av_mallocz(sizeof(*sub->rects) * sub->num_rects);
> +
> +    for (rect = 0; rect < sub->num_rects; ++rect) {
> +        uint16_t picture_id    =
> ctx->presentation.objects[rect].picture_id;
> +        sub->rects[rect]       = av_mallocz(sizeof(*sub->rects[rect]));
> +        sub->rects[rect]->x    = ctx->presentation.objects[rect].x;
> +        sub->rects[rect]->y    = ctx->presentation.objects[rect].y;
> +        sub->rects[rect]->w    = ctx->pictures[picture_id].w;
> +        sub->rects[rect]->h    = ctx->pictures[picture_id].h;
> +        sub->rects[rect]->type = SUBTITLE_BITMAP;
> +
> +        /* Process bitmap */
> +        sub->rects[rect]->pict.linesize[0] = ctx->pictures[picture_id].w;
> +        if (ctx->pictures[picture_id].rle) {
> +            if (ctx->pictures[picture_id].rle_remaining_len)
> +                av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u
> bytes shorter than expected\n",
> +                       ctx->pictures[picture_id].rle_data_len,
> ctx->pictures[picture_id].rle_remaining_len);
> +            if (decode_rle(avctx, sub, rect,
> ctx->pictures[picture_id].rle, ctx->pictures[picture_id].rle_data_len) < 0)
> +                return 0;
> +        }
> +
> +        /* Allocate memory for colors */
> +        sub->rects[rect]->nb_colors    = 256;
> +        sub->rects[rect]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
>
> -    memcpy(sub->rects[0]->pict.data[1], ctx->clut,
> sub->rects[0]->nb_colors * sizeof(uint32_t));
> +        memcpy(sub->rects[rect]->pict.data[1], ctx->clut,
> sub->rects[rect]->nb_colors * sizeof(uint32_t));
> +    }
>
>     return 1;
>  }
> --
> 1.7.5.4
>
>