[FFmpeg-devel] [PATCH 22/23] lavc/movtextenc: add font name handling

Thu Apr 9 18:21:35 EEST 2020

On Wed, 2020-04-08 at 12:23 -0700, Philip Langdale wrote:
> On Mon, 6 Apr 2020 11:52:17 -0600
> John Stebbins <jstebbins at jetheaddev.com> wrote:
> 
> > Initializes the mov text sample description from the ASS header and
> > creates an mov font table from the fonts available in the ASS
> > Styles.
> > ---
> >  libavcodec/ass_split.c  |   5 +
> >  libavcodec/ass_split.h  |   8 ++
> >  libavcodec/movtextenc.c | 253
> > ++++++++++++++++++++++++++++++++-------- 3 files changed, 216
> > insertions(+), 50 deletions(-)
> > 
> > diff --git a/libavcodec/ass_split.c b/libavcodec/ass_split.c
> > index 94c32667af..9d5a66f931 100644
> > --- a/libavcodec/ass_split.c
> > +++ b/libavcodec/ass_split.c
> > @@ -599,3 +599,8 @@ ASSStyle *ff_ass_style_get(ASSSplitContext
> > *ctx,
> > const char *style) return ass->styles + i;
> >      return NULL;
> >  }
> > +
> > +ASS *ff_ass_get(ASSSplitContext *ctx)
> > +{
> > +    return &ctx->ass;
> > +}
> > diff --git a/libavcodec/ass_split.h b/libavcodec/ass_split.h
> > index 30ce77250c..31b8e53242 100644
> > --- a/libavcodec/ass_split.h
> > +++ b/libavcodec/ass_split.h
> > @@ -204,4 +204,12 @@ int ff_ass_split_override_codes(const
> > ASSCodesCallbacks *callbacks, void *priv, */
> >  ASSStyle *ff_ass_style_get(ASSSplitContext *ctx, const char
> > *style);
> >  
> > +/**
> > + * Get ASS structure
> > + *
> > + * @param ctx Context previously initialized by ff_ass_split().
> > + * @return the ASS
> > + */
> > +ASS *ff_ass_get(ASSSplitContext *ctx);
> 
> Is this necesssary? The header says:

You are correct, I missed that comment.  I'll fix it. Kind of a sneaky
"API".

> 
> /**
>  * This struct can be casted to ASS to access to the split data.
>  */
> typedef struct ASSSplitContext ASSSplitContext;
> 
> So can't you just cast the context you already have?
> 
> > +
> >  #endif /* AVCODEC_ASS_SPLIT_H */
> > diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c
> > index 167dffee6a..a62bdb7eb0 100644
> > --- a/libavcodec/movtextenc.c
> > +++ b/libavcodec/movtextenc.c
> > @@ -79,6 +79,8 @@ typedef struct {
> >      StyleBox d;
> >      uint16_t text_pos;
> >      uint16_t byte_count;
> > +    char ** fonts;
> > +    int font_count;
> >  } MovTextContext;
> >  
> >  typedef struct {
> > @@ -171,69 +173,198 @@ static const Box box_types[] = {
> >  
> >  const static size_t box_count = FF_ARRAY_ELEMS(box_types);
> >  
> > -static av_cold int mov_text_encode_init(AVCodecContext *avctx)
> > +static int mov_text_encode_close(AVCodecContext *avctx)
> >  {
> > -    /*
> > -     * For now, we'll use a fixed default style. When we add
> > styling
> > -     * support, this will be generated from the ASS style.
> > -     */
> > -    static const uint8_t text_sample_entry[] = {
> > +    MovTextContext *s = avctx->priv_data;
> > +    int i;
> > +
> > +    ff_ass_split_free(s->ass_ctx);
> > +    if (s->style_attributes) {
> > +        for (i = 0; i < s->count; i++) {
> > +            av_freep(&s->style_attributes[i]);
> > +        }
> > +        av_freep(&s->style_attributes);
> > +    }
> > +    av_freep(&s->fonts);
> > +    av_freep(&s->style_attributes_temp);
> > +    av_bprint_finalize(&s->buffer, NULL);
> > +    return 0;
> > +}
> > +
> > +static int encode_sample_description(AVCodecContext *avctx)
> > +{
> > +    ASS * ass;
> > +    ASSStyle * style;
> > +    int i, j;
> > +    uint32_t tsmb_size, tsmb_type, back_color, style_color;
> > +    uint16_t style_start, style_end, fontID, count;
> > +    int font_names_total_len = 0;
> > +    MovTextContext *s = avctx->priv_data;
> > +
> > +    static const uint8_t display_and_justification[] = {
> >          0x00, 0x00, 0x00, 0x00, // uint32_t displayFlags
> >          0x01,                   // int8_t horizontal-justification
> >          0xFF,                   // int8_t vertical-justification
> > -        0x00, 0x00, 0x00, 0x00, // uint8_t background-color-
> > rgba[4]
> > -        // BoxRecord {
> > +    };
> > +    //  0x00, 0x00, 0x00, 0x00, // uint8_t background-color-
> > rgba[4]
> 
> Is this right? Should it be un-commented or removed entirely?

Well, I was leaving that in as a comment showing the structure of the
sample description.  But if it's confusing, I can just remove all those
comments.  There are several others that do not represent the actual
values written.

> 
> > +    static const uint8_t box_record[] = {
> > +    //     BoxRecord {
> >          0x00, 0x00,             // int16_t top
> >          0x00, 0x00,             // int16_t left
> >          0x00, 0x00,             // int16_t bottom
> >          0x00, 0x00,             // int16_t right
> > -        // };
> > -        // StyleRecord {
> > -        0x00, 0x00,             // uint16_t startChar
> > -        0x00, 0x00,             // uint16_t endChar
> > -        0x00, 0x01,             // uint16_t font-ID
> > -        0x00,                   // uint8_t face-style-flags
> > -        0x12,                   // uint8_t font-size
> > -        0xFF, 0xFF, 0xFF, 0xFF, // uint8_t text-color-rgba[4]
> > -        // };
> > -        // FontTableBox {
> > -        0x00, 0x00, 0x00, 0x12, // uint32_t size
> > -        'f', 't', 'a', 'b',     // uint8_t name[4]
> > -        0x00, 0x01,             // uint16_t entry-count
> > -        // FontRecord {
> > -        0x00, 0x01,             // uint16_t font-ID
> > -        0x05,                   // uint8_t font-name-length
> > -        'S', 'e', 'r', 'i', 'f',// uint8_t font[font-name-length]
> > -        // };
> > -        // };
> > +    //     };
> >      };
> > +    //     StyleRecord {
> > +    //  0x00, 0x00,             // uint16_t startChar
> > +    //  0x00, 0x00,             // uint16_t endChar
> > +    //  0x00, 0x01,             // uint16_t font-ID
> > +    //  0x00,                   // uint8_t face-style-flags
> > +    //  0x12,                   // uint8_t font-size
> > +    //  0xFF, 0xFF, 0xFF, 0xFF, // uint8_t text-color-rgba[4]
> > +    //     };
> > +    //     FontTableBox {
> > +    //  0x00, 0x00, 0x00, 0x12, // uint32_t size
> > +    //  'f', 't', 'a', 'b',     // uint8_t name[4]
> > +    //  0x00, 0x01,             // uint16_t entry-count
> > +    //     FontRecord {
> > +    //  0x00, 0x01,             // uint16_t font-ID
> > +    //  0x05,                   // uint8_t font-name-length
> > +    //  'S', 'e', 'r', 'i', 'f',// uint8_t font[font-name-length]
> > +    //     };
> > +    //     };
> 
> Did you really want to leave this all here commented out?

As I stated above, it shows the structure of the sample description
(more clearly than the code does I think).  But it's not necessary and
I'll remove it if it's distracting or confusing.

> 
> > +    // Populate sample description from ASS header
> > +    ass = ff_ass_get(s->ass_ctx);
> > +    style = ff_ass_style_get(s->ass_ctx, "Default");
> > +    if (!style && ass->styles_count) {
> > +        style = &ass->styles[0];
> > +    }
> > +    s->d.style_fontID   = DEFAULT_STYLE_FONT_ID;
> > +    s->d.style_fontsize = DEFAULT_STYLE_FONTSIZE;
> > +    s->d.style_color    = DEFAULT_STYLE_COLOR;
> > +    s->d.style_flag     = DEFAULT_STYLE_FLAG;
> > +    if (style) {
> > +        s->d.style_fontsize = style->font_size;
> > +        s->d.style_color = BGR_TO_RGB(style->primary_color &
> > 0xffffff) << 8 |
> > +                           255 - ((uint32_t)style->primary_color
> > >>
> > 24);
> > +        s->d.style_flag = (!!style->bold      *
> > STYLE_FLAG_BOLD)   |
> > +                          (!!style->italic    * STYLE_FLAG_ITALIC)
> > |
> > +                          (!!style->underline *
> > STYLE_FLAG_UNDERLINE);
> > +        back_color = (BGR_TO_RGB(style->back_color & 0xffffff) <<
> > 8)
> > +                     (255 - ((uint32_t)style->back_color >> 24));
> > +    }
> >  
> > -    MovTextContext *s = avctx->priv_data;
> > -    s->avctx = avctx;
> > +    av_bprint_append_any(&s->buffer, display_and_justification,
> > +
> > sizeof(display_and_justification));
> > +    back_color = AV_RB32(&back_color);
> > +    av_bprint_append_any(&s->buffer, &back_color, 4);
> > +    //     BoxRecord {
> > +    av_bprint_append_any(&s->buffer, box_record,
> > sizeof(box_record));
> > +    //     };
> > +    //     StyleRecord {
> > +    style_start  = AV_RB16(&s->d.style_start);
> > +    style_end    = AV_RB16(&s->d.style_end);
> > +    fontID = AV_RB16(&s->d.style_fontID);
> > +    style_color  = AV_RB32(&s->d.style_color);
> > +    av_bprint_append_any(&s->buffer, &style_start, 2);
> > +    av_bprint_append_any(&s->buffer, &style_end, 2);
> > +    av_bprint_append_any(&s->buffer, &fontID, 2);
> > +    av_bprint_append_any(&s->buffer, &s->d.style_flag, 1);
> > +    av_bprint_append_any(&s->buffer, &s->d.style_fontsize, 1);
> > +    av_bprint_append_any(&s->buffer, &style_color, 4);
> > +    //     };
> > +
> > +    // Build font table
> > +    // We can't build a complete font table since that would
> > require
> > +    // scanning all dialogs first.  But we can at least fill in
> > what
> > +    // is avaiable in the ASS header
> > +    if (style && ass->styles_count) {
> > +        // Find unique font names
> > +        av_dynarray_add(&s->fonts, &s->font_count, style-
> > >font_name);
> > +        font_names_total_len += strlen(style->font_name);
> > +        for (i = 0; i < ass->styles_count; i++) {
> > +            int found = 0;
> > +            for (j = 0; j < s->font_count; j++) {
> > +                if (!strcmp(s->fonts[j], ass-
> > >styles[i].font_name)) {
> > +                    found = 1;
> > +                    break;
> > +                }
> > +            }
> > +            if (!found) {
> > +                av_dynarray_add(&s->fonts, &s->font_count,
> > +                                           ass-
> > >styles[i].font_name);
> > +                font_names_total_len +=
> > strlen(ass->styles[i].font_name);
> > +            }
> > +        }
> > +    } else
> > +        av_dynarray_add(&s->fonts, &s->font_count,
> > (char*)"Serif");
> > +
> > +    //     FontTableBox {
> > +    tsmb_size = SIZE_ADD + 3 * s->font_count +
> > font_names_total_len;
> > +    tsmb_size = AV_RB32(&tsmb_size);
> > +    tsmb_type = MKTAG('f','t','a','b');
> > +    count = AV_RB16(&s->font_count);
> > +    av_bprint_append_any(&s->buffer, &tsmb_size, 4);
> > +    av_bprint_append_any(&s->buffer, &tsmb_type, 4);
> > +    av_bprint_append_any(&s->buffer, &count, 2);
> > +    //     FontRecord {
> > +    for (i = 0; i < s->font_count; i++) {
> > +        int len;
> > +        fontID = i + 1;
> > +        fontID = AV_RB16(&fontID);
> > +        av_bprint_append_any(&s->buffer, &fontID, 2);
> > +        len = strlen(s->fonts[i]);
> > +        av_bprint_append_any(&s->buffer, &len, 1);
> > +        av_bprint_append_any(&s->buffer, s->fonts[i], len);
> > +    }
> > +    //     };
> > +    //     };
> >  
> > -    s->style_attributes_temp =
> > av_mallocz(sizeof(*s->style_attributes_temp));
> > -    if (!s->style_attributes_temp) {
> > +    if (!av_bprint_is_complete(&s->buffer)) {
> >          return AVERROR(ENOMEM);
> >      }
> >  
> > -    avctx->extradata_size = sizeof text_sample_entry;
> > +    avctx->extradata_size = s->buffer.len;
> >      avctx->extradata = av_mallocz(avctx->extradata_size +
> > AV_INPUT_BUFFER_PADDING_SIZE);
> > -    if (!avctx->extradata)
> > +    if (!avctx->extradata) {
> >          return AVERROR(ENOMEM);
> > +    }
> > +
> > +    memcpy(avctx->extradata, s->buffer.str, avctx-
> > >extradata_size);
> > +    av_bprint_clear(&s->buffer);
> > +
> > +    return 0;
> > +}
> > +
> > +static av_cold int mov_text_encode_init(AVCodecContext *avctx)
> > +{
> > +    int ret;
> > +    MovTextContext *s = avctx->priv_data;
> > +    s->avctx = avctx;
> >  
> >      av_bprint_init(&s->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
> >  
> > -    memcpy(avctx->extradata, text_sample_entry,
> > avctx->extradata_size);
> > +    s->style_attributes_temp =
> > av_mallocz(sizeof(*s->style_attributes_temp));
> > +    if (!s->style_attributes_temp) {
> > +        ret = AVERROR(ENOMEM);
> > +        goto fail;
> > +    }
> >  
> >      s->ass_ctx = ff_ass_split(avctx->subtitle_header);
> > +    if (!s->ass_ctx) {
> > +        ret = AVERROR_INVALIDDATA;
> > +        goto fail;
> > +    }
> > +    ret = encode_sample_description(avctx);
> > +    if (ret < 0)
> > +        goto fail;
> >  
> > -    // TODO: Initialize from ASS style record
> > -    s->d.style_fontID   = DEFAULT_STYLE_FONT_ID;
> > -    s->d.style_fontsize = DEFAULT_STYLE_FONTSIZE;
> > -    s->d.style_color    = DEFAULT_STYLE_COLOR;
> > -    s->d.style_flag     = DEFAULT_STYLE_FLAG;
> > +    return 0;
> >  
> > -    return s->ass_ctx ? 0 : AVERROR_INVALIDDATA;
> > +fail:
> > +    mov_text_encode_close(avctx);
> > +    return ret;
> >  }
> >  
> >  // Start a new style box if needed
> > @@ -243,8 +374,9 @@ static int mov_text_style_start(MovTextContext
> > *s)
> >      if (s->style_attributes_temp->style_start == s->text_pos)
> >          // Still at same text pos, use same entry
> >          return 1;
> > -    if (s->style_attributes_temp->style_flag     != s-
> > >d.style_flag
> > -        s->style_attributes_temp->style_color    != s-
> > >d.style_color
> > +    if (s->style_attributes_temp->style_flag     != s-
> > >d.style_flag
> >  ||
> > +        s->style_attributes_temp->style_color    != s-
> > >d.style_color
> >  ||
> > +        s->style_attributes_temp->style_fontID   !=
> > s->d.style_fontID || s->style_attributes_temp->style_fontsize !=
> > s->d.style_fontsize) { // last style != defaults, end the style
> > entry
> > and start a new one s->box_flags |= STYL_BOX;
> > @@ -369,6 +501,33 @@ static void mov_text_alpha_cb(void *priv, int
> > alpha, int alpha_id) mov_text_alpha_set(s, 255 - alpha);
> >  }
> >  
> > +static uint16_t find_font_id(MovTextContext * s, const char *
> > name)
> > +{
> > +    int i;
> > +    for (i = 0; i < s->font_count; i++) {
> > +        if (!strcmp(name, s->fonts[i]))
> > +            return i + 1;
> > +    }
> > +    return 1;
> > +}
> > +
> > +static void mov_text_font_name_set(MovTextContext *s, const char
> > *name) +{
> > +    int fontID = find_font_id(s, name);
> > +    if (!s->style_attributes_temp ||
> > +        s->style_attributes_temp->style_fontID == fontID) {
> > +        // color hasn't changed
> > +        return;
> > +    }
> > +    if (mov_text_style_start(s))
> > +        s->style_attributes_temp->style_fontID = fontID;
> > +}
> > +
> > +static void mov_text_font_name_cb(void *priv, const char *name)
> > +{
> > +    mov_text_font_name_set((MovTextContext*)priv, name);
> > +}
> > +
> >  static void mov_text_font_size_set(MovTextContext *s, int size)
> >  {
> >      if (!s->style_attributes_temp ||
> > @@ -406,6 +565,7 @@ static void
> > mov_text_ass_style_set(MovTextContext
> > *s, ASSStyle *style) alpha = 255 - ((uint32_t)style->primary_color
> > >>
> > 24); mov_text_alpha_set(s, alpha);
> >          mov_text_font_size_set(s, style->font_size);
> > +        mov_text_font_name_set(s, style->font_name);
> >      } else {
> >          // End current style record, go back to defaults
> >          mov_text_style_start(s);
> > @@ -471,6 +631,7 @@ static const ASSCodesCallbacks
> > mov_text_callbacks
> > = { .style            = mov_text_style_cb,
> >      .color            = mov_text_color_cb,
> >      .alpha            = mov_text_alpha_cb,
> > +    .font_name        = mov_text_font_name_cb,
> >      .font_size        = mov_text_font_size_cb,
> >      .cancel_overrides = mov_text_cancel_overrides_cb,
> >      .end              = mov_text_end_cb,
> > @@ -548,14 +709,6 @@ exit:
> >      return length;
> >  }
> >  
> > -static int mov_text_encode_close(AVCodecContext *avctx)
> > -{
> > -    MovTextContext *s = avctx->priv_data;
> > -    ff_ass_split_free(s->ass_ctx);
> > -    av_bprint_finalize(&s->buffer, NULL);
> > -    return 0;
> > -}
> > -
> >  AVCodec ff_movtext_encoder = {
> >      .name           = "mov_text",
> >      .long_name      = NULL_IF_CONFIG_SMALL("3GPP Timed Text
> > subtitle"),
> 
> Rest LGTM.
> 
>