[FFmpeg-devel] [PATCH 1/3] lavc/pgs_frame_merge_bsf: add bsf to merge PGS segments

John Stebbins jstebbins at jetheaddev.com
Fri Apr 17 19:11:35 EEST 2020


On Fri, 2020-04-17 at 08:52 -0700, John Stebbins wrote:
> On Fri, 2020-04-17 at 13:21 +0300, Petri Hintukainen wrote:
> > to, 2020-04-16 kello 16:57 -0600, John Stebbins kirjoitti:
> > > Required to remux m2ts to mkv
> > > ---
> > >  libavcodec/Makefile              |   1 +
> > >  libavcodec/bitstream_filters.c   |   1 +
> > >  libavcodec/pgs_frame_merge_bsf.c | 164
> > > +++++++++++++++++++++++++++++++
> > >  3 files changed, 166 insertions(+)
> > >  create mode 100644 libavcodec/pgs_frame_merge_bsf.c
> > > 
> > > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> > > index c1c9a44f2b..c7d8fbebaa 100644
> > > --- a/libavcodec/Makefile
> > > +++ b/libavcodec/Makefile
> > > @@ -1110,6 +1110,7 @@ OBJS-
> > > $(CONFIG_MP3_HEADER_DECOMPRESS_BSF)  +=
> > > mp3_header_decompress_bsf.o \
> > >  OBJS-$(CONFIG_MPEG2_METADATA_BSF)         +=
> > > mpeg2_metadata_bsf.o
> > >  OBJS-$(CONFIG_NOISE_BSF)                  += noise_bsf.o
> > >  OBJS-$(CONFIG_NULL_BSF)                   += null_bsf.o
> > > +OBJS-$(CONFIG_PGS_FRAME_MERGE_BSF)        +=
> > > pgs_frame_merge_bsf.o
> > >  OBJS-$(CONFIG_PRORES_METADATA_BSF)        +=
> > > prores_metadata_bsf.o
> > >  OBJS-$(CONFIG_REMOVE_EXTRADATA_BSF)       +=
> > > remove_extradata_bsf.o
> > >  OBJS-$(CONFIG_TEXT2MOVSUB_BSF)            += movsub_bsf.o
> > > diff --git a/libavcodec/bitstream_filters.c
> > > b/libavcodec/bitstream_filters.c
> > > index 6b5ffe4d70..92619225f0 100644
> > > --- a/libavcodec/bitstream_filters.c
> > > +++ b/libavcodec/bitstream_filters.c
> > > @@ -49,6 +49,7 @@ extern const AVBitStreamFilter
> > > ff_mpeg4_unpack_bframes_bsf;
> > >  extern const AVBitStreamFilter ff_mov2textsub_bsf;
> > >  extern const AVBitStreamFilter ff_noise_bsf;
> > >  extern const AVBitStreamFilter ff_null_bsf;
> > > +extern const AVBitStreamFilter ff_pgs_frame_merge_bsf;
> > >  extern const AVBitStreamFilter ff_prores_metadata_bsf;
> > >  extern const AVBitStreamFilter ff_remove_extradata_bsf;
> > >  extern const AVBitStreamFilter ff_text2movsub_bsf;
> > > diff --git a/libavcodec/pgs_frame_merge_bsf.c
> > > b/libavcodec/pgs_frame_merge_bsf.c
> > > new file mode 100644
> > > index 0000000000..d1bb3a60c2
> > > --- /dev/null
> > > +++ b/libavcodec/pgs_frame_merge_bsf.c
> > > @@ -0,0 +1,164 @@
> > > +/*
> > > + * Copyright (c) 2020 John Stebbins <jstebbins.hb at gmail.com>
> > > + *
> > > + * This file is part of FFmpeg.
> > > + *
> > > + * FFmpeg is free software; you can redistribute it and/or
> > > + * modify it under the terms of the GNU Lesser General Public
> > > + * License as published by the Free Software Foundation; either
> > > + * version 2.1 of the License, or (at your option) any later
> > > version.
> > > + *
> > > + * FFmpeg is distributed in the hope that it will be useful,
> > > + * but WITHOUT ANY WARRANTY; without even the implied warranty
> > > of
> > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > > GNU
> > > + * Lesser General Public License for more details.
> > > + *
> > > + * You should have received a copy of the GNU Lesser General
> > > Public
> > > + * License along with FFmpeg; if not, write to the Free Software
> > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > > 02110-1301 USA
> > > + */
> > > +
> > > +/**
> > > + * @file
> > > + * This bitstream filter merges PGS subtitle packets containing
> > > incomplete
> > > + * set of segments into a single packet
> > > + *
> > > + * Packets already containing a complete set of segments will be
> > > passed through
> > > + * unchanged.
> > > + */
> > > +
> > > +#include "avcodec.h"
> > > +#include "bsf.h"
> > > +#include "libavutil/intreadwrite.h"
> > > +
> > > +enum PGSSegmentType {
> > > +    PALETTE_SEGMENT      = 0x14,
> > > +    OBJECT_SEGMENT       = 0x15,
> > > +    PRESENTATION_SEGMENT = 0x16,
> > > +    WINDOW_SEGMENT       = 0x17,
> > > +    DISPLAY_SEGMENT      = 0x80,
> > > +};
> > 
> > This is duplicated in pgssubdec.c and pgs_frame_split_bsf.c. It's
> > not
> > much of code for a separate header, but maybe you'll find more
> > while
> > writing the encoder ?
> > 
> 
> I've already written the encoder.  This enum is the only thing
> shared.
> So hardly seems worth adding a header. 
> 
> Testing the encoder is what lead me to writing the bsfs. It "works"
> in
> the sense that it's generating correct bitstreams.  But I realized
> the
> bsfs were needed to handle muxing properly.  And there is still the
> case to handle when the input AVSubtitle has end_display_time
> set.  In
> this case I need to generate 2 segment sequences.  One for the
> subtitle
> and one to indicate the end of the subtitle.  The current subtitle
> encoder API doesn't handle multipe output packets for on input
> subtitle
> gracefully.  So that's next on my list to fix.
> 
> > > +typedef struct PGSMergeContext {
> > > +    AVPacket *buffer_pkt, *in;
> > > +    int presentation_found;
> > > +} PGSMergeContext;
> > > +
> > > +static void frame_merge_flush(AVBSFContext *bsf)
> > > +{
> > > +    PGSMergeContext *ctx = bsf->priv_data;
> > > +
> > > +    av_packet_unref(ctx->in);
> > > +    av_packet_unref(ctx->buffer_pkt);
> > > +}
> > > +
> > > +static int frame_merge_filter(AVBSFContext *bsf, AVPacket *out)
> > > +{
> > > +    PGSMergeContext *ctx = bsf->priv_data;
> > > +    AVPacket *in = ctx->in, *pkt = ctx->buffer_pkt;
> > > +    int ret, i, size, pos, display = 0;
> > > +
> > > +    if (!in->data) {
> > > +        ret = ff_bsf_get_packet_ref(bsf, in);
> > > +        if (ret < 0)
> > > +            return ret;
> > > +    }
> > > +    if (!in->size) {
> > > +        av_packet_unref(in);
> > > +        return AVERROR(EAGAIN);
> > > +    }
> > > +
> > > +    // Validate packet data and find display_end segment
> > > +    size = in->size;
> > > +    i = 0;
> > > +    while (i + 3 <= in->size) {
> > > +        uint8_t segment_type;
> > > +        int segment_len;
> > > +
> > > +        segment_type = in->data[i];
> > > +        segment_len  = AV_RB16(in->data + i + 1) + 3;
> > > +        if (i + segment_len > in->size) // Invalid data
> > > +            break;
> > > +        if (segment_type == PRESENTATION_SEGMENT && !ctx-
> > > > presentation_found) {
> > > +            int object_count;
> > > +            ctx->presentation_found = 1;
> > > +            ret = av_packet_copy_props(pkt, in);
> > > +            if (ret < 0)
> > > +                goto fail;
> > > +            object_count  = in->data[i + 13];
> > 
> > Possible OOB read with invalid input ?
> 
> Yeah, when I woke up this morning, I realized I had done this.
> Will fix
> 
> > > +            pkt->flags = !!object_count * AV_PKT_FLAG_KEY;
> > 
> > Does keyframe here mean something that can be decoded and displayed
> > without previous display sets ?
> > 
> > If yes, maybe you could check composition descriptor state instead
> > ?
> > I
> > haven't looked into PGS internals in long time, but I think objects
> > and
> > palettes can be shared between display sets. Ex. animations could
> > be
> > implemented with only location / cropping / palette changes.
> 
> Hmm, good point.  *All* information to decode a subtitle is only
> guaranteed to be present in the current sequence when the
> composition_state == 1.  For other values, it may be referencing
> objects from previous sequences.  This is probably a better way to
> signal key frames.
> 
> Thanks!
> 

Well, that's just not going to work (:  It appears the "aquisition
point" state is pretty much never used.  At least I never see it in a
few samples I've tested.  They appear to like to simply set start of
epoch state and normal state.  And both states almost always have
complete information in the current sequence to render the subtitle.

So I'm afraid all we have are kind of bad choices.  Signal a keyframe
whenever there are objects provided and risk that the decoder may
encounter references it can't find.  Signal keyframes only on epoch
start and objects are provided, this will miss all "normal state"
sequences which are the majority. Signal a keyframe on acquisition
state which appears to *never* happen.

I lean toward keyframes for any sequence that has objects and let the
decoder deal with missing references.

> > > +        }
> > > +        i += segment_len;
> > > +        if (segment_type == DISPLAY_SEGMENT) {
> > > +            size = display = i;
> > > +            break;
> > > +        }
> > > +    }
> > > +    if (display && pkt->size == 0 && size == in->size) { //
> > > passthrough
> > > +        av_packet_move_ref(out, in);
> > > +        return 0;
> > > +    }
> > > +    if ((!display && i != in->size) || size > in->size) {
> > > +        av_log(bsf, AV_LOG_WARNING, "Failed to parse PGS
> > > segments.\n");
> > > +        // force output what we have
> > > +        display = size = in->size;
> > > +    }
> > > +
> > > +    pos = pkt->size;
> > > +    ret = av_grow_packet(pkt, size);
> > > +    if (ret < 0)
> > > +        goto fail;
> > > +    memcpy(pkt->data + pos, in->data, size);
> > > +
> > > +    if (size == in->size)
> > > +        av_packet_unref(in);
> > > +    else {
> > > +        in->data += size;
> > > +        in->size -= size;
> > > +    }
> > > +
> > > +    if (display) {
> > > +        ctx->presentation_found = 0;
> > > +        av_packet_move_ref(out, pkt);
> > > +        return 0;
> > > +    }
> > > +    return AVERROR(EAGAIN);
> > > +
> > > +fail:
> > > +    frame_merge_flush(bsf);
> > > +    return ret;
> > > +}
> > > +
> > > +static int frame_merge_init(AVBSFContext *bsf)
> > > +{
> > > +    PGSMergeContext *ctx = bsf->priv_data;
> > > +
> > > +    ctx->in  = av_packet_alloc();
> > > +    ctx->buffer_pkt = av_packet_alloc();
> > > +    if (!ctx->in || !ctx->buffer_pkt)
> > > +        return AVERROR(ENOMEM);
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static void frame_merge_close(AVBSFContext *bsf)
> > > +{
> > > +    PGSMergeContext *ctx = bsf->priv_data;
> > > +
> > > +    av_packet_free(&ctx->in);
> > > +    av_packet_free(&ctx->buffer_pkt);
> > > +}
> > > +
> > > +static const enum AVCodecID frame_merge_codec_ids[] = {
> > > +    AV_CODEC_ID_HDMV_PGS_SUBTITLE, AV_CODEC_ID_NONE,
> > > +};
> > > +
> > > +const AVBitStreamFilter ff_pgs_frame_merge_bsf = {
> > > +    .name           = "pgs_frame_merge",
> > > +    .priv_data_size = sizeof(PGSMergeContext),
> > > +    .init           = frame_merge_init,
> > > +    .flush          = frame_merge_flush,
> > > +    .close          = frame_merge_close,
> > > +    .filter         = frame_merge_filter,
> > > +    .codec_ids      = frame_merge_codec_ids,
> > > +};


More information about the ffmpeg-devel mailing list