[FFmpeg-devel] [PATCH 4/8] h264_metadata: Add support for A/53 closed captions

Wed Mar 21 01:02:21 EET 2018

On Mon, Mar 12, 2018 at 9:25 AM, Mark Thompson <sw at jkqxz.net> wrote:

> On 12/03/18 15:10, Hendrik Leppkes wrote:
> > On Mon, Mar 12, 2018 at 2:38 PM, Mark Thompson <sw at jkqxz.net> wrote:
> >> On 12/03/18 09:54, Hendrik Leppkes wrote:
> >>> On Sun, Mar 11, 2018 at 7:30 PM, Mark Thompson <sw at jkqxz.net> wrote:
> >>>> ---
> >>>>  libavcodec/h264_metadata_bsf.c | 121 ++++++++++++++++++++++++++++++
> +++++++++++
> >>>>  1 file changed, 121 insertions(+)
> >>>>
> >>>> diff --git a/libavcodec/h264_metadata_bsf.c
> b/libavcodec/h264_metadata_bsf.c
> >>>> index 36047887ca..d340c55990 100644
> >>>> --- a/libavcodec/h264_metadata_bsf.c
> >>>> +++ b/libavcodec/h264_metadata_bsf.c
> >>>> @@ -77,6 +77,8 @@ typedef struct H264MetadataContext {
> >>>>      int display_orientation;
> >>>>      double rotate;
> >>>>      int flip;
> >>>> +
> >>>> +    int a53_cc;
> >>>>  } H264MetadataContext;
> >>>>
> >>>>
> >>>> @@ -225,6 +227,8 @@ static int h264_metadata_filter(AVBSFContext
> *bsf, AVPacket *out)
> >>>>      int err, i, j, has_sps;
> >>>>      uint8_t *displaymatrix_side_data = NULL;
> >>>>      size_t displaymatrix_side_data_size = 0;
> >>>> +    uint8_t *a53_side_data = NULL;
> >>>> +    size_t a53_side_data_size = 0;
> >>>>
> >>>>      err = ff_bsf_get_packet(bsf, &in);
> >>>>      if (err < 0)
> >>>> @@ -514,6 +518,104 @@ static int h264_metadata_filter(AVBSFContext
> *bsf, AVPacket *out)
> >>>>          }
> >>>>      }
> >>>>
> >>>> +    if (ctx->a53_cc == INSERT) {
> >>>> +        uint8_t *data;
> >>>> +        int size;
> >>>> +
> >>>> +        data = av_packet_get_side_data(in, AV_PKT_DATA_A53_CC,
> &size);
> >>>> +        if (data) {
> >>>> +            H264RawSEIPayload payload = {
> >>>> +                .payload_type = H264_SEI_TYPE_USER_DATA_REGISTERED,
> >>>> +            };
> >>>> +            H264RawSEIUserDataRegistered *udr =
> >>>> +                &payload.payload.user_data_registered;
> >>>> +
> >>>> +            av_log(bsf, AV_LOG_WARNING, "A53 CC insert: %d
> bytes.\n", size);
> >>>> +
> >>>> +            udr->data_length = size + 10;
> >>>> +            udr->data_ref    = av_buffer_alloc(udr->data_length);
> >>>> +            if (!udr->data_ref) {
> >>>> +                err = AVERROR(ENOMEM);
> >>>> +                goto fail;
> >>>> +            }
> >>>> +            udr->data = udr->data_ref->data;
> >>>> +
> >>>> +            udr->itu_t_t35_country_code = 181;
> >>>> +            udr->data[0] = 0;
> >>>> +            udr->data[1] = 49;
> >>>> +            AV_WB32(udr->data + 2, MKBETAG('G', 'A', '9', '4'));
> >>>> +            udr->data[6] = 3;
> >>>> +            udr->data[7] = ((size / 3) & 0x1f) | 0x40;
> >>>> +            udr->data[8] = 0;
> >>>> +            memcpy(udr->data + 9, data, size);
> >>>> +            udr->data[size + 9] = 0xff;
> >>>> +
> >>>> +            err = ff_cbs_h264_add_sei_message(ctx->cbc, au,
> &payload);
> >>>> +            if (err < 0) {
> >>>> +                av_log(bsf, AV_LOG_ERROR, "Failed to add user data
> SEI "
> >>>> +                       "message to access unit.\n");
> >>>> +                av_buffer_unref(&udr->data_ref);
> >>>> +                goto fail;
> >>>> +            }
> >>>> +        }
> >>>> +
> >>>> +    } else if (ctx->a53_cc == REMOVE || ctx->a53_cc == EXTRACT) {
> >>>> +        for (i = 0; i < au->nb_units; i++) {
> >>>> +            H264RawSEI *sei;
> >>>> +            if (au->units[i].type != H264_NAL_SEI)
> >>>> +                continue;
> >>>> +            sei = au->units[i].content;
> >>>> +
> >>>> +            for (j = 0; j < sei->payload_count; j++) {
> >>>> +                H264RawSEIUserDataRegistered *udr;
> >>>> +                uint32_t tag;
> >>>> +                uint8_t type_code, count;
> >>>> +
> >>>> +                if (sei->payload[j].payload_type !=
> >>>> +                    H264_SEI_TYPE_USER_DATA_REGISTERED)
> >>>> +                    continue;
> >>>> +                udr = &sei->payload[j].payload.user_data_registered;
> >>>> +                tag = AV_RB32(udr->data + 2);
> >>>> +                type_code = udr->data[6];
> >>>> +                if (tag != MKBETAG('G', 'A', '9', '4') || type_code
> != 3)
> >>>> +                    continue;
> >>>> +
> >>>> +                if (ctx->a53_cc == REMOVE) {
> >>>> +                    err = ff_cbs_h264_delete_sei_message(ctx->cbc,
> au,
> >>>> +
>  &au->units[i], j);
> >>>> +                    if (err < 0) {
> >>>> +                        av_log(bsf, AV_LOG_ERROR, "Failed to delete "
> >>>> +                               "A53 CC SEI message.\n");
> >>>> +                        goto fail;
> >>>> +                    }
> >>>> +                    av_log(bsf, AV_LOG_WARNING, "A53 CC remove!.\n");
> >>>> +
> >>>> +                    --i;
> >>>> +                    break;
> >>>> +                }
> >>>> +
> >>>> +                // Extract.
> >>>> +                count = udr->data[7] & 0x1f;
> >>>> +                if (3 * count + 10 > udr->data_length) {
> >>>> +                    av_log(bsf, AV_LOG_ERROR, "Invalid A/53 closed
> caption "
> >>>> +                           "data: count %d overflows length %zu.\n",
> >>>> +                           count, udr->data_length);
> >>>> +                    continue;
> >>>> +                }
> >>>> +
> >>>> +                av_log(bsf, AV_LOG_WARNING, "A53 CC extract: %zu
> bytes.\n", udr->data_length);
> >>>> +
> >>>> +                err = av_reallocp(&a53_side_data,
> >>>> +                                  a53_side_data_size + 3 * count);
> >>>> +                if (err)
> >>>> +                    goto fail;
> >>>> +                memcpy(a53_side_data + a53_side_data_size,
> >>>> +                       udr->data + 9, 3 * count);
> >>>> +                a53_side_data_size += 3 * count;
> >>>> +            }
> >>>> +        }
> >>>> +    }
> >>>> +
> >>>>      err = ff_cbs_write_packet(ctx->cbc, out, au);
> >>>>      if (err < 0) {
> >>>>          av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
> >>>> @@ -535,6 +637,16 @@ static int h264_metadata_filter(AVBSFContext
> *bsf, AVPacket *out)
> >>>>          }
> >>>>          displaymatrix_side_data = NULL;
> >>>>      }
> >>>> +    if (a53_side_data) {
> >>>> +        err = av_packet_add_side_data(out, AV_PKT_DATA_A53_CC,
> >>>> +                                      a53_side_data,
> a53_side_data_size);
> >>>> +        if (err) {
> >>>> +            av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted
> A/53 "
> >>>> +                   "side data to packet.\n");
> >>>> +            goto fail;
> >>>> +        }
> >>>> +        a53_side_data = NULL;
> >>>> +    }
> >>>>
> >>>>      ctx->done_first_au = 1;
> >>>>
> >>>> @@ -542,6 +654,7 @@ static int h264_metadata_filter(AVBSFContext
> *bsf, AVPacket *out)
> >>>>  fail:
> >>>>      ff_cbs_fragment_uninit(ctx->cbc, au);
> >>>>      av_freep(&displaymatrix_side_data);
> >>>> +    av_freep(&a53_side_data);
> >>>>
> >>>>      av_packet_free(&in);
> >>>>
> >>>> @@ -670,6 +783,14 @@ static const AVOption h264_metadata_options[] = {
> >>>>      { "vertical",   "Set ver_flip",
> >>>>          0, AV_OPT_TYPE_CONST, { .i64 = FLIP_VERTICAL },   .unit
> ="flip" },
> >>>>
> >>>> +    { "a53_cc", "A/53 Closed Captions in SEI NAL units",
> >>>> +        OFFSET(a53_cc), AV_OPT_TYPE_INT,
> >>>> +        { .i64 = PASS }, PASS, EXTRACT, 0, "a53_cc" },
> >>>> +    { "pass",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS    },
> .unit = "a53_cc" },
> >>>> +    { "insert",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT  },
> .unit = "a53_cc" },
> >>>> +    { "remove",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE  },
> .unit = "a53_cc" },
> >>>> +    { "extract", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = EXTRACT },
> .unit = "a53_cc" },
> >>>> +
> >>>
> >>> Does extracting really make sense? Doesn't the data end up out of
> >>> order and basically unusable?
> >>
> >> Well, it's up to whatever follows to deal with that.  If the stream has
> correct timesatamps (ha) then you can use those directly.  If you're
> feeding the output to an opaque hardware decoder then having some way to
> associate input packets with output frames is sufficient to get the right
> ordering.  A BSF to deal with reordering somehow is also possible.
> >>
> >
> > That seems like a problem. You generate side-data like any other, but
> > in reality its invalid, and if you process it like any other A53
> > sidedata you get corrupt output.
>
> Each packet is being tagged with the side-data extracted from the contents
> of that packet.  Like the non-side-data part of a packet, this needs to be
> passed through a decoder (of some kind) to turn a packet into a frame
> before you can display it sensibly.
>
> > The same goes for "insert", I guess, how do I figure out in which
> > order to pass things to it to get any sort of functional output? Thats
> > a very specific setup which requires extremely custom and careful
> > usage, does that really fit in a generic filter?
> As with the decoder case, it can work with any encoder which can match
> output packets to input frames.  (I haven't sent any patches to pass
> side-data through an encoder yet, but I plan to do so at least for VAAPI.)
>

The videotoolbox encoder currently implements its own a53 insertion logic
(which is also buggy and produces broken bitstreams in some cases). It does
so by passing the side-data into the system encoder along with the video
data, and then inserts it into the SEI manually when the encoder returns a
slice.

Using this new INSERT bitstream mode would clean up that encoder
significantly, and also probably fix the bug in the manual SEI manipulation
code.

Aman

>
> - Mark
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>