[FFmpeg-devel] drawtext filter

Anton Khirnov anton at khirnov.net
Thu Mar 16 18:52:25 EET 2023


Overall this patch could use a lot more polish. I really wish you
developed it in a more review-friendly form.
The commit message should use standard form and elaborate on what
advantage do we get from this huge change, which also adds a new
dependendency.


Quoting Francesco Carusi (2023-02-03 15:18:21)
> diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c
> index 50012bb258..7a0a255c5e 100644
> --- a/libavfilter/vf_drawtext.c
> +++ b/libavfilter/vf_drawtext.c

You are adding a bunch of trailing whitespace in the file, which is
forbidden.

> @@ -1,4 +1,5 @@
>  /*
> + * Copyright (c) 2023 Francesco Carusi
>   * Copyright (c) 2011 Stefano Sabatini
>   * Copyright (c) 2010 S.N. Hemanth Meenakshisundaram
>   * Copyright (c) 2003 Gustavo Sverzut Barbieri <gsbarbieri at yahoo.com.br>
> @@ -20,6 +21,14 @@
>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>   */
>  
> +/*
> + * Changelog - 2023
> + *
> + * - This filter now depends on libharfbuzz for text shaping.
> + * - Glyphs position is now accurate to 1/4 pixel in both directions
> + * - The default line height is now the one defined in the font
> + */

This is not the place for a changelog, that's what commit messages and
the Changelog file are for.

> +
>  /**
>   * @file
>   * drawtext filter, based on the original vhook/drawtext.c
> @@ -72,14 +81,20 @@
>  #include FT_GLYPH_H
>  #include FT_STROKER_H
>  
> +#include <hb.h>
> +#include <hb-ft.h>
> +
> +// Ceiling operation for positive integers division
> +#define POS_CEIL(x, y) ((x)/(y) + ((x)%(y) != 0))
> +
>  static const char *const var_names[] = {
>      "dar",
>      "hsub", "vsub",
> -    "line_h", "lh",           ///< line height, same as max_glyph_h
> +    "line_h", "lh",           ///< line height
>      "main_h", "h", "H",       ///< height of the input video
>      "main_w", "w", "W",       ///< width  of the input video
> -    "max_glyph_a", "ascent",  ///< max glyph ascent
> -    "max_glyph_d", "descent", ///< min glyph descent
> +    "max_glyph_a", "ascent",  ///< max glyph ascender
> +    "max_glyph_d", "descent", ///< min glyph descender

Seems like this should not be in this patch.

>  static const AVOption drawtext_options[]= {
> -    {"fontfile",    "set font file",        OFFSET(fontfile),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS},
> -    {"text",        "set text",             OFFSET(text),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS},
> -    {"textfile",    "set text file",        OFFSET(textfile),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS},
> -    {"fontcolor",   "set foreground color", OFFSET(fontcolor.rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS},
> +    {"fontfile",       "set font file",         OFFSET(fontfile),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS},
> +    {"text",           "set text",              OFFSET(text),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS},
> +    {"textfile",       "set text file",         OFFSET(textfile),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS},
> +    {"fontcolor",      "set foreground color",  OFFSET(fontcolor.rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS},
>      {"fontcolor_expr", "set foreground color expression", OFFSET(fontcolor_expr), AV_OPT_TYPE_STRING, {.str=""}, 0, 0, FLAGS},
> -    {"boxcolor",    "set box color",        OFFSET(boxcolor.rgba),      AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS},
> -    {"bordercolor", "set border color",     OFFSET(bordercolor.rgba),   AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS},
> -    {"shadowcolor", "set shadow color",     OFFSET(shadowcolor.rgba),   AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS},
> -    {"box",         "set box",              OFFSET(draw_box),           AV_OPT_TYPE_BOOL,   {.i64=0},     0,        1       , FLAGS},
> -    {"boxborderw",  "set box border width", OFFSET(boxborderw),         AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS},
> -    {"line_spacing",  "set line spacing in pixels", OFFSET(line_spacing),   AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX,FLAGS},
> -    {"fontsize",    "set font size",        OFFSET(fontsize_expr),      AV_OPT_TYPE_STRING, {.str=NULL},  0, 0 , FLAGS},
> -    {"x",           "set x expression",     OFFSET(x_expr),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS},
> -    {"y",           "set y expression",     OFFSET(y_expr),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS},
> -    {"shadowx",     "set shadow x offset",  OFFSET(shadowx),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS},
> -    {"shadowy",     "set shadow y offset",  OFFSET(shadowy),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS},
> -    {"borderw",     "set border width",     OFFSET(borderw),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS},
> -    {"tabsize",     "set tab size",         OFFSET(tabsize),            AV_OPT_TYPE_INT,    {.i64=4},     0,        INT_MAX , FLAGS},
> -    {"basetime",    "set base time",        OFFSET(basetime),           AV_OPT_TYPE_INT64,  {.i64=AV_NOPTS_VALUE}, INT64_MIN, INT64_MAX , FLAGS},
> +    {"boxcolor",       "set box color",         OFFSET(boxcolor.rgba),      AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS},
> +    {"bordercolor",    "set border color",      OFFSET(bordercolor.rgba),   AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS},
> +    {"shadowcolor",    "set shadow color",      OFFSET(shadowcolor.rgba),   AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS},
> +    {"box",            "set box",               OFFSET(draw_box),           AV_OPT_TYPE_BOOL,   {.i64=0},     0, 1, FLAGS},
> +    {"boxborderw",     "set box borders width", OFFSET(boxborderw),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS},
> +    {"line_spacing",   "set line spacing in pixels", OFFSET(line_spacing),  AV_OPT_TYPE_INT,    {.i64=-1},    INT_MIN, INT_MAX, FLAGS},
> +    {"fontsize",       "set font size",         OFFSET(fontsize_expr),      AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS},
> +    {"x",              "set x expression",      OFFSET(x_expr),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS},
> +    {"y",              "set y expression",      OFFSET(y_expr),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS},
> +    {"shadowx",        "set shadow x offset",   OFFSET(shadowx),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN, INT_MAX, FLAGS},
> +    {"shadowy",        "set shadow y offset",   OFFSET(shadowy),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN, INT_MAX, FLAGS},
> +    {"borderw",        "set border width",      OFFSET(borderw),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN, INT_MAX, FLAGS},
> +    {"tabsize",        "set tab size",          OFFSET(tabsize),            AV_OPT_TYPE_INT,    {.i64=4},     0, INT_MAX, FLAGS},
> +    {"basetime",       "set base time",         OFFSET(basetime),           AV_OPT_TYPE_INT64,  {.i64=AV_NOPTS_VALUE}, INT64_MIN, INT64_MAX, FLAGS},
>  #if CONFIG_LIBFONTCONFIG
>      { "font",        "Font name",            OFFSET(font),               AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS },
>  #endif
> @@ -248,15 +338,15 @@ static const AVOption drawtext_options[]= {
>          {"strftime", "set strftime expansion (deprecated)", OFFSET(exp_mode), AV_OPT_TYPE_CONST, {.i64=EXP_STRFTIME}, 0, 0, FLAGS, "expansion"},
>  
>      {"timecode",        "set initial timecode",             OFFSET(tc_opt_string), AV_OPT_TYPE_STRING,   {.str=NULL}, 0, 0, FLAGS},
> -    {"tc24hmax",        "set 24 hours max (timecode only)", OFFSET(tc24hmax),      AV_OPT_TYPE_BOOL,     {.i64=0},           0,        1, FLAGS},
> -    {"timecode_rate",   "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},           0,  INT_MAX, FLAGS},
> -    {"r",               "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},           0,  INT_MAX, FLAGS},
> -    {"rate",            "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},           0,  INT_MAX, FLAGS},
> -    {"reload",     "reload text file at specified frame interval", OFFSET(reload),     AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS},
> -    { "alpha",       "apply alpha while rendering", OFFSET(a_expr),      AV_OPT_TYPE_STRING, { .str = "1"     },          .flags = FLAGS },
> -    {"fix_bounds", "check and fix text coords to avoid clipping", OFFSET(fix_bounds), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
> -    {"start_number", "start frame number for n/frame_num variable", OFFSET(start_number), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS},
> -    {"text_source", "the source of text", OFFSET(text_source_string), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS },
> +    {"tc24hmax",        "set 24 hours max (timecode only)", OFFSET(tc24hmax),      AV_OPT_TYPE_BOOL,     {.i64=0},    0, 1, FLAGS},
> +    {"timecode_rate",   "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},    0, INT_MAX, FLAGS},
> +    {"r",               "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},    0, INT_MAX, FLAGS},
> +    {"rate",            "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},    0, INT_MAX, FLAGS},
> +    {"reload",          "reload text file at specified frame interval", OFFSET(reload), AV_OPT_TYPE_INT, {.i64=0},    0, INT_MAX, FLAGS},
> +    {"alpha",           "apply alpha while rendering",      OFFSET(a_expr),        AV_OPT_TYPE_STRING,   {.str = "1"}, .flags = FLAGS},
> +    {"fix_bounds",      "check and fix text coords to avoid clipping", OFFSET(fix_bounds), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
> +    {"start_number",    "start frame number for n/frame_num variable", OFFSET(start_number), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS},
> +    {"text_source",     "the source of text", OFFSET(text_source_string), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS },

Most of these changes seem cosmetic, they do not belong in this patch.

>  
>  #if CONFIG_LIBFRIBIDI
>      {"text_shaping", "attempt to shape text before drawing", OFFSET(text_shaping), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS},
> @@ -297,18 +387,24 @@ static const struct ft_error {
>  
>  #define FT_ERRMSG(e) ft_errors[e].err_msg
>  
> -typedef struct Glyph {
> -    FT_Glyph glyph;
> -    FT_Glyph border_glyph;
> -    uint32_t code;
> -    unsigned int fontsize;
> -    FT_Bitmap bitmap; ///< array holding bitmaps of font
> -    FT_Bitmap border_bitmap; ///< array holding bitmaps of font border
> -    FT_BBox bbox;
> -    int advance;
> -    int bitmap_left;
> -    int bitmap_top;
> -} Glyph;
> +
> +// Loads and (optionally) renders a glyph
> +static int load_glyph(AVFilterContext *ctx, Glyph **glyph_ptr, uint32_t code,
> +     int8_t shift_x64, int8_t shift_y64);
> +
> +// Shapes a line of text using libharfbuzz
> +static void shape_text_hb(DrawTextContext *s, HarfbuzzData* hb, const char* text, int textLen);
> +
> +// Performs text measurements
> +static int measure_text(AVFilterContext *ctx, TextMetrics *metrics);
> +
> +// Draws glyphs on the frame
> +static int draw_glyphs(DrawTextContext *s, AVFrame *frame,
> +                       FFDrawColor *color, TextMetrics *metrics,
> +                       int x, int y, int borderw);
> +
> +// Draws text on the frame
> +static int draw_text(AVFilterContext *ctx, AVFrame *frame);

Why is there a need for forward declarations?

>  static int glyph_cmp(const void *key, const void *b)
>  {
> @@ -316,80 +412,9 @@ static int glyph_cmp(const void *key, const void *b)
>      int64_t diff = (int64_t)a->code - (int64_t)bb->code;
>  
>      if (diff != 0)
> -         return diff > 0 ? 1 : -1;
> +        return diff > 0 ? 1 : -1;

unrelated cosmetics

> @@ -439,7 +464,6 @@ static av_cold int update_fontsize(AVFilterContext *ctx)
>             return err;
>  
>          size = av_expr_eval(s->fontsize_pexpr, s->var_values, &s->prng);
> -

unrelated cosmetics

>          if (!isnan(size)) {
>              roundedsize = round(size);
>              // test for overflow before cast
> @@ -447,7 +471,6 @@ static av_cold int update_fontsize(AVFilterContext *ctx)
>                  av_log(ctx, AV_LOG_ERROR, "fontsize overflow\n");
>                  return AVERROR(EINVAL);
>              }
> -

unrelated cosmetics

>              fontsize = roundedsize;
>          }
>      }
> @@ -548,7 +571,7 @@ static int load_font_fontconfig(AVFilterContext *ctx)
>          goto fail;
>      }
>  
> -    av_log(ctx, AV_LOG_INFO, "Using \"%s\"\n", filename);
> +    av_log(ctx, AV_LOG_VERBOSE, "Using \"%s\"\n", filename);

unrelated cosmetics

>      if (parse_err)
>          s->default_fontsize = size + 0.5;
>  
> @@ -690,6 +713,7 @@ static int shape_text(AVFilterContext *ctx)
>      s->text = tmp;
>      len = fribidi_unicode_to_charset(FRIBIDI_CHAR_SET_UTF8,
>                                       unicodestr, len, s->text);
> +

unrelated cosmetics

>  
> +static void shape_text_hb(DrawTextContext *s, HarfbuzzData* hb, const char* text, int textLen) 
> +{
> +    hb->buf = hb_buffer_create();

Seems like it at least this needs to be checked for errors, maybe some
of the other calls too.

> @@ -1559,56 +1795,142 @@ continue_on_invalid2:
>      update_color_with_alpha(s, &bordercolor, s->bordercolor);
>      update_color_with_alpha(s, &boxcolor   , s->boxcolor   );
>  
> -    box_w = max_text_line_w;
> -    box_h = y + s->max_glyph_h;
> +    if (s->draw_box && s->boxborderw) {
> +        s->bb_top = s->bb_right = s->bb_bottom = s->bb_left = s->boxborderw;
> +    } else {
> +        s->bb_top = s->bb_right = s->bb_bottom = s->bb_left = 0;
> +    }
>  
>      if (s->fix_bounds) {
> -
>          /* calculate footprint of text effects */
> -        int boxoffset     = s->draw_box ? FFMAX(s->boxborderw, 0) : 0;
>          int borderoffset  = s->borderw  ? FFMAX(s->borderw, 0) : 0;
>  
> -        int offsetleft = FFMAX3(boxoffset, borderoffset,
> +        int offsetleft = FFMAX3(FFMAX(s->bb_left, 0), borderoffset,
>                                  (s->shadowx < 0 ? FFABS(s->shadowx) : 0));
> -        int offsettop = FFMAX3(boxoffset, borderoffset,
> +        int offsettop = FFMAX3(FFMAX(s->bb_top, 0), borderoffset,
>                                  (s->shadowy < 0 ? FFABS(s->shadowy) : 0));
> -
> -        int offsetright = FFMAX3(boxoffset, borderoffset,
> +        int offsetright = FFMAX3(FFMAX(s->bb_right, 0), borderoffset,
>                                   (s->shadowx > 0 ? s->shadowx : 0));
> -        int offsetbottom = FFMAX3(boxoffset, borderoffset,
> +        int offsetbottom = FFMAX3(FFMAX(s->bb_bottom, 0), borderoffset,
>                                    (s->shadowy > 0 ? s->shadowy : 0));
>  
> -
>          if (s->x - offsetleft < 0) s->x = offsetleft;
>          if (s->y - offsettop < 0)  s->y = offsettop;
>  
> -        if (s->x + box_w + offsetright > width)
> -            s->x = FFMAX(width - box_w - offsetright, 0);
> -        if (s->y + box_h + offsetbottom > height)
> -            s->y = FFMAX(height - box_h - offsetbottom, 0);
> +        if (s->x + metrics.width + offsetright > width)
> +            s->x = FFMAX(width - metrics.width - offsetright, 0);
> +        if (s->y + metrics.height + offsetbottom > height)
> +            s->y = FFMAX(height - metrics.height - offsetbottom, 0);
>      }
>  
> -    /* draw box */
> -    if (s->draw_box)
> -        ff_blend_rectangle(&s->dc, &boxcolor,
> -                           frame->data, frame->linesize, width, height,
> -                           s->x - s->boxborderw, s->y - s->boxborderw,
> -                           box_w + s->boxborderw * 2, box_h + s->boxborderw * 2);
> +    x = 0;
> +    y = 0;
> +    x64 = (int)(s->x * 64.);
> +    y64 = (int)(s->y * 64. + metrics.offset_top64);
> +
> +    for (int l = 0; l < s->line_count; ++l) {
> +        TextLine *line = &s->lines[l];
> +        HarfbuzzData *hb = &line->hb_data;
> +        line->glyphs = av_mallocz(hb->glyph_count * sizeof(GlyphInfo));
> +
> +        for (int t = 0; t < hb->glyph_count; ++t) {
> +            GlyphInfo *g_info = &line->glyphs[t];
> +            uint8_t is_tab = last_tab_idx < s->tab_count &&
> +                hb->glyph_info[t].cluster == s->tab_clusters[last_tab_idx] - line->cluster_offset;
> +            int true_x, true_y;
> +            if (is_tab) {
> +                ++last_tab_idx;
> +            }
> +            true_x = x + hb->glyph_pos[t].x_offset;
> +            true_y = y + hb->glyph_pos[t].y_offset;
> +            shift_x64 = (((x64 + true_x) >> 4) & 0b0011) << 4;
> +            shift_y64 = ((4 - (((y64 + true_y) >> 4) & 0b0011)) & 0b0011) << 4;
> +
> +            ret = load_glyph(ctx, &glyph, hb->glyph_info[t].codepoint, shift_x64, shift_y64);
> +            if (ret != 0) {
> +                return ret;
> +            }
> +            g_info->code = hb->glyph_info[t].codepoint;
> +            g_info->x = (x64 + true_x) >> 6;
> +            g_info->y = ((y64 + true_y) >> 6) + (shift_y64 > 0 ? 1 : 0);
> +            g_info->shift_x64 = shift_x64;
> +            g_info->shift_y64 = shift_y64;
> +
> +            if (!is_tab) {
> +                x += hb->glyph_pos[t].x_advance;
> +            } else {
> +                int size = s->blank_advance64 * s->tabsize;
> +                x = (x / size + 1) * size;
> +            }
> +            y += hb->glyph_pos[t].y_advance;
> +        }
>  
> -    if (s->shadowx || s->shadowy) {
> -        if ((ret = draw_glyphs(s, frame, width, height,
> -                               &shadowcolor, s->shadowx, s->shadowy, 0)) < 0)
> -            return ret;
> +        y += metrics.line_height64 + s->line_spacing * 64;
> +        x = 0;
>      }
>  
> -    if (s->borderw) {
> -        if ((ret = draw_glyphs(s, frame, width, height,
> -                               &bordercolor, 0, 0, s->borderw)) < 0)
> +    metrics.rect_x = s->x;
> +    metrics.rect_y = s->y;
> +    
> +    s->box_width = metrics.width;
> +    s->box_height = metrics.height;

What is the point of duplicating these values in both structs?


-- 
Anton Khirnov


More information about the ffmpeg-devel mailing list