[FFmpeg-devel] [PATCH] Improve support for PGS subtitles.
David Mitchell
dave at fallingcanbedeadly.com
Sun Jan 22 17:27:55 CET 2012
I submitted this patch a few days ago. The developer documentation states
that if some time passes with no response to a patch that I should send a
reminder. So I'm doing that :).
-- Dave
On Thu, Jan 19, 2012 at 7:31 AM, David Mitchell <dave at fallingcanbedeadly.com
> wrote:
> The previous implementation assumed that a new picture would always
> supersede the previous picture. Similarly, presentation segments
> were assumed to pertain to the most-recently-read picture.
>
> However, each presentation segment may refer to 0 or more pictures
> by their ID. Picture IDs may repeat, and a repeated picture ID
> indicates that the old picture for that ID is no longer needed
> and may be discarded.
>
> The new implementation allocates a buffer with one slot for each
> possible picture ID (the picture ID is a 16-bit field) and
> properly decodes presentation segments so that all relevant
> pictures are output upon encountering a display segment.
>
> Given that most PGS streams are unlikely to use more than a small
> fraction of the available picture IDs, it would probably be better
> to use a more memory-efficient data structure. I'm lazy though, so
> I leave this to a more motivated individual.
>
> I've tested the code with MKV files in VLC (a recent revision from
> their git repo) and with HandBrake (a version that I hacked up to
> use ffmpeg's PGS subtitle decoder).
> ---
> Changelog | 1 +
> libavcodec/pgssubdec.c | 198
> ++++++++++++++++++++++++++++--------------------
> 2 files changed, 118 insertions(+), 81 deletions(-)
>
> diff --git a/Changelog b/Changelog
> index 1543562..3e10f6c 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -16,6 +16,7 @@ version next:
> - Automatic thread count based on detection number of (available) CPU
> cores
> - y41p Brooktree Uncompressed 4:1:1 12-bit encoder and decoder
> - ffprobe -show_error option
> +- Improved PGS subtitle decoder
>
>
> version 0.9:
> diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c
> index 5a070e4..2785d25 100644
> --- a/libavcodec/pgssubdec.c
> +++ b/libavcodec/pgssubdec.c
> @@ -40,11 +40,16 @@ enum SegmentType {
> DISPLAY_SEGMENT = 0x80,
> };
>
> -typedef struct PGSSubPresentation {
> +typedef struct PGSSubPictureReference {
> int x;
> int y;
> - int id_number;
> - int object_number;
> + int picture_id;
> +} PGSSubPictureReference;
> +
> +typedef struct PGSSubPresentation {
> + int id_number;
> + int object_count;
> + PGSSubPictureReference *objects;
> } PGSSubPresentation;
>
> typedef struct PGSSubPicture {
> @@ -58,22 +63,29 @@ typedef struct PGSSubPicture {
> typedef struct PGSSubContext {
> PGSSubPresentation presentation;
> uint32_t clut[256];
> - PGSSubPicture picture;
> + PGSSubPicture pictures[UINT16_MAX];
> } PGSSubContext;
>
> static av_cold int init_decoder(AVCodecContext *avctx)
> {
> - avctx->pix_fmt = PIX_FMT_PAL8;
> + avctx->pix_fmt = PIX_FMT_PAL8;
>
> return 0;
> }
>
> static av_cold int close_decoder(AVCodecContext *avctx)
> {
> + uint16_t picture;
> +
> PGSSubContext *ctx = avctx->priv_data;
>
> - av_freep(&ctx->picture.rle);
> - ctx->picture.rle_buffer_size = 0;
> + av_freep(&ctx->presentation.objects);
> + ctx->presentation.object_count = 0;
> +
> + for (picture = 0; picture < UINT16_MAX; ++picture) {
> + av_freep(&ctx->pictures[picture].rle);
> + ctx->pictures[picture].rle_buffer_size = 0;
> + }
>
> return 0;
> }
> @@ -88,7 +100,7 @@ static av_cold int close_decoder(AVCodecContext *avctx)
> * @param buf pointer to the RLE data to process
> * @param buf_size size of the RLE data to process
> */
> -static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub,
> +static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub, int rect,
> const uint8_t *buf, unsigned int buf_size)
> {
> const uint8_t *rle_bitmap_end;
> @@ -96,15 +108,15 @@ static int decode_rle(AVCodecContext *avctx,
> AVSubtitle *sub,
>
> rle_bitmap_end = buf + buf_size;
>
> - sub->rects[0]->pict.data[0] = av_malloc(sub->rects[0]->w *
> sub->rects[0]->h);
> + sub->rects[rect]->pict.data[0] = av_malloc(sub->rects[rect]->w *
> sub->rects[rect]->h);
>
> - if (!sub->rects[0]->pict.data[0])
> + if (!sub->rects[rect]->pict.data[0])
> return -1;
>
> pixel_count = 0;
> line_count = 0;
>
> - while (buf < rle_bitmap_end && line_count < sub->rects[0]->h) {
> + while (buf < rle_bitmap_end && line_count < sub->rects[rect]->h) {
> uint8_t flags, color;
> int run;
>
> @@ -119,27 +131,27 @@ static int decode_rle(AVCodecContext *avctx,
> AVSubtitle *sub,
> color = flags & 0x80 ? bytestream_get_byte(&buf) : 0;
> }
>
> - if (run > 0 && pixel_count + run <= sub->rects[0]->w *
> sub->rects[0]->h) {
> - memset(sub->rects[0]->pict.data[0] + pixel_count, color, run);
> + if (run > 0 && pixel_count + run <= sub->rects[rect]->w *
> sub->rects[rect]->h) {
> + memset(sub->rects[rect]->pict.data[0] + pixel_count, color,
> run);
> pixel_count += run;
> } else if (!run) {
> /*
> * New Line. Check if correct pixels decoded, if not display
> warning
> * and adjust bitmap pointer to correct new line position.
> */
> - if (pixel_count % sub->rects[0]->w > 0)
> + if (pixel_count % sub->rects[rect]->w > 0)
> av_log(avctx, AV_LOG_ERROR, "Decoded %d pixels, when line
> should be %d pixels\n",
> - pixel_count % sub->rects[0]->w, sub->rects[0]->w);
> + pixel_count % sub->rects[rect]->w,
> sub->rects[rect]->w);
> line_count++;
> }
> }
>
> - if (pixel_count < sub->rects[0]->w * sub->rects[0]->h) {
> + if (pixel_count < sub->rects[rect]->w * sub->rects[rect]->h) {
> av_log(avctx, AV_LOG_ERROR, "Insufficient RLE data for
> subtitle\n");
> return -1;
> }
>
> - av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count,
> sub->rects[0]->w * sub->rects[0]->h);
> + av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count,
> sub->rects[rect]->w * sub->rects[rect]->h);
>
> return 0;
> }
> @@ -162,25 +174,28 @@ static int parse_picture_segment(AVCodecContext
> *avctx,
>
> uint8_t sequence_desc;
> unsigned int rle_bitmap_len, width, height;
> + uint16_t picture_id;
>
> if (buf_size <= 4)
> return -1;
> buf_size -= 4;
>
> - /* skip 3 unknown bytes: Object ID (2 bytes), Version Number */
> - buf += 3;
> + picture_id = bytestream_get_be16(&buf);
> +
> + /* skip 1 unknown byte: Version Number */
> + buf++;
>
> /* Read the Sequence Description to determine if start of RLE data or
> appended to previous RLE */
> sequence_desc = bytestream_get_byte(&buf);
>
> if (!(sequence_desc & 0x80)) {
> /* Additional RLE data */
> - if (buf_size > ctx->picture.rle_remaining_len)
> + if (buf_size > ctx->pictures[picture_id].rle_remaining_len)
> return -1;
>
> - memcpy(ctx->picture.rle + ctx->picture.rle_data_len, buf,
> buf_size);
> - ctx->picture.rle_data_len += buf_size;
> - ctx->picture.rle_remaining_len -= buf_size;
> + memcpy(ctx->pictures[picture_id].rle +
> ctx->pictures[picture_id].rle_data_len, buf, buf_size);
> + ctx->pictures[picture_id].rle_data_len += buf_size;
> + ctx->pictures[picture_id].rle_remaining_len -= buf_size;
>
> return 0;
> }
> @@ -202,17 +217,17 @@ static int parse_picture_segment(AVCodecContext
> *avctx,
> return -1;
> }
>
> - ctx->picture.w = width;
> - ctx->picture.h = height;
> + ctx->pictures[picture_id].w = width;
> + ctx->pictures[picture_id].h = height;
>
> - av_fast_malloc(&ctx->picture.rle, &ctx->picture.rle_buffer_size,
> rle_bitmap_len);
> + av_fast_malloc(&ctx->pictures[picture_id].rle,
> &ctx->pictures[picture_id].rle_buffer_size, rle_bitmap_len);
>
> - if (!ctx->picture.rle)
> + if (!ctx->pictures[picture_id].rle)
> return -1;
>
> - memcpy(ctx->picture.rle, buf, buf_size);
> - ctx->picture.rle_data_len = buf_size;
> - ctx->picture.rle_remaining_len = rle_bitmap_len - buf_size;
> + memcpy(ctx->pictures[picture_id].rle, buf, buf_size);
> + ctx->pictures[picture_id].rle_data_len = buf_size;
> + ctx->pictures[picture_id].rle_remaining_len = rle_bitmap_len -
> buf_size;
>
> return 0;
> }
> @@ -275,11 +290,11 @@ static void
> parse_presentation_segment(AVCodecContext *avctx,
> {
> PGSSubContext *ctx = avctx->priv_data;
>
> - int x, y;
> -
> int w = bytestream_get_be16(&buf);
> int h = bytestream_get_be16(&buf);
>
> + uint16_t object_index;
> +
> av_dlog(avctx, "Video Dimensions %dx%d\n",
> w, h);
> if (av_image_check_size(w, h, 0, avctx) >= 0)
> @@ -298,34 +313,48 @@ static void
> parse_presentation_segment(AVCodecContext *avctx,
> */
> buf += 3;
>
> - ctx->presentation.object_number = bytestream_get_byte(&buf);
> - if (!ctx->presentation.object_number)
> + ctx->presentation.object_count = bytestream_get_byte(&buf);
> + if (!ctx->presentation.object_count)
> return;
>
> - /*
> - * Skip 4 bytes of unknown:
> - * object_id_ref (2 bytes),
> - * window_id_ref,
> - * composition_flag (0x80 - object cropped, 0x40 - object forced)
> - */
> - buf += 4;
> + /* Verify that enough bytes are remaining for all of the objects. */
> + buf_size -= 11;
> + if (buf_size < ctx->presentation.object_count * 8) {
> + ctx->presentation.object_count = 0;
> + return;
> + }
>
> - x = bytestream_get_be16(&buf);
> - y = bytestream_get_be16(&buf);
> + av_freep(&ctx->presentation.objects);
> + ctx->presentation.objects = av_malloc(sizeof(PGSSubPictureReference)
> * ctx->presentation.object_count);
> + if (!ctx->presentation.objects) {
> + ctx->presentation.object_count = 0;
> + return;
> + }
>
> - /* TODO If cropping, cropping_x, cropping_y, cropping_width,
> cropping_height (all 2 bytes).*/
> + for (object_index = 0; object_index < ctx->presentation.object_count;
> ++object_index) {
> + PGSSubPictureReference *reference =
> &ctx->presentation.objects[object_index];
> + reference->picture_id = bytestream_get_be16(&buf);
>
> - av_dlog(avctx, "Subtitle Placement x=%d, y=%d\n", x, y);
> + /*
> + * Skip 2 bytes of unknown:
> + * window_id_ref,
> + * composition_flag (0x80 - object cropped, 0x40 - object
> forced)
> + */
> + buf += 2;
>
> - if (x > avctx->width || y > avctx->height) {
> - av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x =
> %d, y = %d, video width = %d, video height = %d.\n",
> - x, y, avctx->width, avctx->height);
> - x = 0; y = 0;
> - }
> + reference->x = bytestream_get_be16(&buf);
> + reference->y = bytestream_get_be16(&buf);
>
> - /* Fill in dimensions */
> - ctx->presentation.x = x;
> - ctx->presentation.y = y;
> + /* TODO If cropping, cropping_x, cropping_y, cropping_width,
> cropping_height (all 2 bytes).*/
> + av_dlog(avctx, "Subtitle Placement ID=%d, x=%d, y=%d\n",
> reference->picture_id, reference->x, reference->y);
> +
> + if (reference->x > avctx->width || reference->y > avctx->height) {
> + av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x
> = %d, y = %d, video width = %d, video height = %d.\n",
> + reference->x, reference->y, avctx->width,
> avctx->height);
> + reference->x = 0;
> + reference->y = 0;
> + }
> + }
> }
>
> /**
> @@ -349,45 +378,52 @@ static int display_end_segment(AVCodecContext
> *avctx, void *data,
> AVSubtitle *sub = data;
> PGSSubContext *ctx = avctx->priv_data;
>
> + uint16_t rect;
> +
> /*
> * The end display time is a timeout value and is only reached
> - * if the next subtitle is later then timeout or subtitle has
> + * if the next subtitle is later than timeout or subtitle has
> * not been cleared by a subsequent empty display command.
> */
>
> - // Blank if last object_number was 0.
> - // Note that this may be wrong for more complex subtitles.
> - if (!ctx->presentation.object_number)
> + memset(sub, 0, sizeof(*sub));
> +
> + // Blank if last object_count was 0.
> + if (!ctx->presentation.object_count)
> return 1;
> +
> sub->start_display_time = 0;
> sub->end_display_time = 20000;
> sub->format = 0;
>
> - sub->rects = av_mallocz(sizeof(*sub->rects));
> - sub->rects[0] = av_mallocz(sizeof(*sub->rects[0]));
> - sub->num_rects = 1;
> -
> - sub->rects[0]->x = ctx->presentation.x;
> - sub->rects[0]->y = ctx->presentation.y;
> - sub->rects[0]->w = ctx->picture.w;
> - sub->rects[0]->h = ctx->picture.h;
> - sub->rects[0]->type = SUBTITLE_BITMAP;
> -
> - /* Process bitmap */
> - sub->rects[0]->pict.linesize[0] = ctx->picture.w;
> -
> - if (ctx->picture.rle) {
> - if (ctx->picture.rle_remaining_len)
> - av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes
> shorter than expected\n",
> - ctx->picture.rle_data_len,
> ctx->picture.rle_remaining_len);
> - if(decode_rle(avctx, sub, ctx->picture.rle,
> ctx->picture.rle_data_len) < 0)
> - return 0;
> - }
> - /* Allocate memory for colors */
> - sub->rects[0]->nb_colors = 256;
> - sub->rects[0]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
> + sub->num_rects = ctx->presentation.object_count;
> + sub->rects = av_mallocz(sizeof(*sub->rects) * sub->num_rects);
> +
> + for (rect = 0; rect < sub->num_rects; ++rect) {
> + uint16_t picture_id =
> ctx->presentation.objects[rect].picture_id;
> + sub->rects[rect] = av_mallocz(sizeof(*sub->rects[rect]));
> + sub->rects[rect]->x = ctx->presentation.objects[rect].x;
> + sub->rects[rect]->y = ctx->presentation.objects[rect].y;
> + sub->rects[rect]->w = ctx->pictures[picture_id].w;
> + sub->rects[rect]->h = ctx->pictures[picture_id].h;
> + sub->rects[rect]->type = SUBTITLE_BITMAP;
> +
> + /* Process bitmap */
> + sub->rects[rect]->pict.linesize[0] = ctx->pictures[picture_id].w;
> + if (ctx->pictures[picture_id].rle) {
> + if (ctx->pictures[picture_id].rle_remaining_len)
> + av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u
> bytes shorter than expected\n",
> + ctx->pictures[picture_id].rle_data_len,
> ctx->pictures[picture_id].rle_remaining_len);
> + if (decode_rle(avctx, sub, rect,
> ctx->pictures[picture_id].rle, ctx->pictures[picture_id].rle_data_len) < 0)
> + return 0;
> + }
> +
> + /* Allocate memory for colors */
> + sub->rects[rect]->nb_colors = 256;
> + sub->rects[rect]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
>
> - memcpy(sub->rects[0]->pict.data[1], ctx->clut,
> sub->rects[0]->nb_colors * sizeof(uint32_t));
> + memcpy(sub->rects[rect]->pict.data[1], ctx->clut,
> sub->rects[rect]->nb_colors * sizeof(uint32_t));
> + }
>
> return 1;
> }
> --
> 1.7.5.4
>
>
More information about the ffmpeg-devel
mailing list