[FFmpeg-devel] drawtext filter

Francesco Carusi klimklim at tiscali.it
Mon Mar 20 09:41:08 EET 2023


Would it be ok if I split the patch so that one is dedicated to 
cosmetics and one to the implementation?
I'll also remove the change log section and the forward declarations and 
improve the error checking.


On 16/03/2023 17:52, Anton Khirnov wrote:
> Overall this patch could use a lot more polish. I really wish you
> developed it in a more review-friendly form.
> The commit message should use standard form and elaborate on what
> advantage do we get from this huge change, which also adds a new
> dependendency.
>
>
> Quoting Francesco Carusi (2023-02-03 15:18:21)
>> diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c
>> index 50012bb258..7a0a255c5e 100644
>> --- a/libavfilter/vf_drawtext.c
>> +++ b/libavfilter/vf_drawtext.c
> You are adding a bunch of trailing whitespace in the file, which is
> forbidden.
>
>> @@ -1,4 +1,5 @@
>>   /*
>> + * Copyright (c) 2023 Francesco Carusi
>>    * Copyright (c) 2011 Stefano Sabatini
>>    * Copyright (c) 2010 S.N. Hemanth Meenakshisundaram
>>    * Copyright (c) 2003 Gustavo Sverzut Barbieri <gsbarbieri at yahoo.com.br>
>> @@ -20,6 +21,14 @@
>>    * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>>    */
>>   
>> +/*
>> + * Changelog - 2023
>> + *
>> + * - This filter now depends on libharfbuzz for text shaping.
>> + * - Glyphs position is now accurate to 1/4 pixel in both directions
>> + * - The default line height is now the one defined in the font
>> + */
> This is not the place for a changelog, that's what commit messages and
> the Changelog file are for.
>
>> +
>>   /**
>>    * @file
>>    * drawtext filter, based on the original vhook/drawtext.c
>> @@ -72,14 +81,20 @@
>>   #include FT_GLYPH_H
>>   #include FT_STROKER_H
>>   
>> +#include <hb.h>
>> +#include <hb-ft.h>
>> +
>> +// Ceiling operation for positive integers division
>> +#define POS_CEIL(x, y) ((x)/(y) + ((x)%(y) != 0))
>> +
>>   static const char *const var_names[] = {
>>       "dar",
>>       "hsub", "vsub",
>> -    "line_h", "lh",           ///< line height, same as max_glyph_h
>> +    "line_h", "lh",           ///< line height
>>       "main_h", "h", "H",       ///< height of the input video
>>       "main_w", "w", "W",       ///< width  of the input video
>> -    "max_glyph_a", "ascent",  ///< max glyph ascent
>> -    "max_glyph_d", "descent", ///< min glyph descent
>> +    "max_glyph_a", "ascent",  ///< max glyph ascender
>> +    "max_glyph_d", "descent", ///< min glyph descender
> Seems like this should not be in this patch.
>
>>   static const AVOption drawtext_options[]= {
>> -    {"fontfile",    "set font file",        OFFSET(fontfile),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS},
>> -    {"text",        "set text",             OFFSET(text),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS},
>> -    {"textfile",    "set text file",        OFFSET(textfile),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS},
>> -    {"fontcolor",   "set foreground color", OFFSET(fontcolor.rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS},
>> +    {"fontfile",       "set font file",         OFFSET(fontfile),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS},
>> +    {"text",           "set text",              OFFSET(text),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS},
>> +    {"textfile",       "set text file",         OFFSET(textfile),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS},
>> +    {"fontcolor",      "set foreground color",  OFFSET(fontcolor.rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS},
>>       {"fontcolor_expr", "set foreground color expression", OFFSET(fontcolor_expr), AV_OPT_TYPE_STRING, {.str=""}, 0, 0, FLAGS},
>> -    {"boxcolor",    "set box color",        OFFSET(boxcolor.rgba),      AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS},
>> -    {"bordercolor", "set border color",     OFFSET(bordercolor.rgba),   AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS},
>> -    {"shadowcolor", "set shadow color",     OFFSET(shadowcolor.rgba),   AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS},
>> -    {"box",         "set box",              OFFSET(draw_box),           AV_OPT_TYPE_BOOL,   {.i64=0},     0,        1       , FLAGS},
>> -    {"boxborderw",  "set box border width", OFFSET(boxborderw),         AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS},
>> -    {"line_spacing",  "set line spacing in pixels", OFFSET(line_spacing),   AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX,FLAGS},
>> -    {"fontsize",    "set font size",        OFFSET(fontsize_expr),      AV_OPT_TYPE_STRING, {.str=NULL},  0, 0 , FLAGS},
>> -    {"x",           "set x expression",     OFFSET(x_expr),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS},
>> -    {"y",           "set y expression",     OFFSET(y_expr),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS},
>> -    {"shadowx",     "set shadow x offset",  OFFSET(shadowx),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS},
>> -    {"shadowy",     "set shadow y offset",  OFFSET(shadowy),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS},
>> -    {"borderw",     "set border width",     OFFSET(borderw),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS},
>> -    {"tabsize",     "set tab size",         OFFSET(tabsize),            AV_OPT_TYPE_INT,    {.i64=4},     0,        INT_MAX , FLAGS},
>> -    {"basetime",    "set base time",        OFFSET(basetime),           AV_OPT_TYPE_INT64,  {.i64=AV_NOPTS_VALUE}, INT64_MIN, INT64_MAX , FLAGS},
>> +    {"boxcolor",       "set box color",         OFFSET(boxcolor.rgba),      AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS},
>> +    {"bordercolor",    "set border color",      OFFSET(bordercolor.rgba),   AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS},
>> +    {"shadowcolor",    "set shadow color",      OFFSET(shadowcolor.rgba),   AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS},
>> +    {"box",            "set box",               OFFSET(draw_box),           AV_OPT_TYPE_BOOL,   {.i64=0},     0, 1, FLAGS},
>> +    {"boxborderw",     "set box borders width", OFFSET(boxborderw),         AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS},
>> +    {"line_spacing",   "set line spacing in pixels", OFFSET(line_spacing),  AV_OPT_TYPE_INT,    {.i64=-1},    INT_MIN, INT_MAX, FLAGS},
>> +    {"fontsize",       "set font size",         OFFSET(fontsize_expr),      AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS},
>> +    {"x",              "set x expression",      OFFSET(x_expr),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS},
>> +    {"y",              "set y expression",      OFFSET(y_expr),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS},
>> +    {"shadowx",        "set shadow x offset",   OFFSET(shadowx),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN, INT_MAX, FLAGS},
>> +    {"shadowy",        "set shadow y offset",   OFFSET(shadowy),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN, INT_MAX, FLAGS},
>> +    {"borderw",        "set border width",      OFFSET(borderw),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN, INT_MAX, FLAGS},
>> +    {"tabsize",        "set tab size",          OFFSET(tabsize),            AV_OPT_TYPE_INT,    {.i64=4},     0, INT_MAX, FLAGS},
>> +    {"basetime",       "set base time",         OFFSET(basetime),           AV_OPT_TYPE_INT64,  {.i64=AV_NOPTS_VALUE}, INT64_MIN, INT64_MAX, FLAGS},
>>   #if CONFIG_LIBFONTCONFIG
>>       { "font",        "Font name",            OFFSET(font),               AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS },
>>   #endif
>> @@ -248,15 +338,15 @@ static const AVOption drawtext_options[]= {
>>           {"strftime", "set strftime expansion (deprecated)", OFFSET(exp_mode), AV_OPT_TYPE_CONST, {.i64=EXP_STRFTIME}, 0, 0, FLAGS, "expansion"},
>>   
>>       {"timecode",        "set initial timecode",             OFFSET(tc_opt_string), AV_OPT_TYPE_STRING,   {.str=NULL}, 0, 0, FLAGS},
>> -    {"tc24hmax",        "set 24 hours max (timecode only)", OFFSET(tc24hmax),      AV_OPT_TYPE_BOOL,     {.i64=0},           0,        1, FLAGS},
>> -    {"timecode_rate",   "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},           0,  INT_MAX, FLAGS},
>> -    {"r",               "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},           0,  INT_MAX, FLAGS},
>> -    {"rate",            "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},           0,  INT_MAX, FLAGS},
>> -    {"reload",     "reload text file at specified frame interval", OFFSET(reload),     AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS},
>> -    { "alpha",       "apply alpha while rendering", OFFSET(a_expr),      AV_OPT_TYPE_STRING, { .str = "1"     },          .flags = FLAGS },
>> -    {"fix_bounds", "check and fix text coords to avoid clipping", OFFSET(fix_bounds), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
>> -    {"start_number", "start frame number for n/frame_num variable", OFFSET(start_number), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS},
>> -    {"text_source", "the source of text", OFFSET(text_source_string), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS },
>> +    {"tc24hmax",        "set 24 hours max (timecode only)", OFFSET(tc24hmax),      AV_OPT_TYPE_BOOL,     {.i64=0},    0, 1, FLAGS},
>> +    {"timecode_rate",   "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},    0, INT_MAX, FLAGS},
>> +    {"r",               "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},    0, INT_MAX, FLAGS},
>> +    {"rate",            "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},    0, INT_MAX, FLAGS},
>> +    {"reload",          "reload text file at specified frame interval", OFFSET(reload), AV_OPT_TYPE_INT, {.i64=0},    0, INT_MAX, FLAGS},
>> +    {"alpha",           "apply alpha while rendering",      OFFSET(a_expr),        AV_OPT_TYPE_STRING,   {.str = "1"}, .flags = FLAGS},
>> +    {"fix_bounds",      "check and fix text coords to avoid clipping", OFFSET(fix_bounds), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
>> +    {"start_number",    "start frame number for n/frame_num variable", OFFSET(start_number), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS},
>> +    {"text_source",     "the source of text", OFFSET(text_source_string), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS },
> Most of these changes seem cosmetic, they do not belong in this patch.
>
>>   
>>   #if CONFIG_LIBFRIBIDI
>>       {"text_shaping", "attempt to shape text before drawing", OFFSET(text_shaping), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS},
>> @@ -297,18 +387,24 @@ static const struct ft_error {
>>   
>>   #define FT_ERRMSG(e) ft_errors[e].err_msg
>>   
>> -typedef struct Glyph {
>> -    FT_Glyph glyph;
>> -    FT_Glyph border_glyph;
>> -    uint32_t code;
>> -    unsigned int fontsize;
>> -    FT_Bitmap bitmap; ///< array holding bitmaps of font
>> -    FT_Bitmap border_bitmap; ///< array holding bitmaps of font border
>> -    FT_BBox bbox;
>> -    int advance;
>> -    int bitmap_left;
>> -    int bitmap_top;
>> -} Glyph;
>> +
>> +// Loads and (optionally) renders a glyph
>> +static int load_glyph(AVFilterContext *ctx, Glyph **glyph_ptr, uint32_t code,
>> +     int8_t shift_x64, int8_t shift_y64);
>> +
>> +// Shapes a line of text using libharfbuzz
>> +static void shape_text_hb(DrawTextContext *s, HarfbuzzData* hb, const char* text, int textLen);
>> +
>> +// Performs text measurements
>> +static int measure_text(AVFilterContext *ctx, TextMetrics *metrics);
>> +
>> +// Draws glyphs on the frame
>> +static int draw_glyphs(DrawTextContext *s, AVFrame *frame,
>> +                       FFDrawColor *color, TextMetrics *metrics,
>> +                       int x, int y, int borderw);
>> +
>> +// Draws text on the frame
>> +static int draw_text(AVFilterContext *ctx, AVFrame *frame);
> Why is there a need for forward declarations?
>
>>   static int glyph_cmp(const void *key, const void *b)
>>   {
>> @@ -316,80 +412,9 @@ static int glyph_cmp(const void *key, const void *b)
>>       int64_t diff = (int64_t)a->code - (int64_t)bb->code;
>>   
>>       if (diff != 0)
>> -         return diff > 0 ? 1 : -1;
>> +        return diff > 0 ? 1 : -1;
> unrelated cosmetics
>
>> @@ -439,7 +464,6 @@ static av_cold int update_fontsize(AVFilterContext *ctx)
>>              return err;
>>   
>>           size = av_expr_eval(s->fontsize_pexpr, s->var_values, &s->prng);
>> -
> unrelated cosmetics
>
>>           if (!isnan(size)) {
>>               roundedsize = round(size);
>>               // test for overflow before cast
>> @@ -447,7 +471,6 @@ static av_cold int update_fontsize(AVFilterContext *ctx)
>>                   av_log(ctx, AV_LOG_ERROR, "fontsize overflow\n");
>>                   return AVERROR(EINVAL);
>>               }
>> -
> unrelated cosmetics
>
>>               fontsize = roundedsize;
>>           }
>>       }
>> @@ -548,7 +571,7 @@ static int load_font_fontconfig(AVFilterContext *ctx)
>>           goto fail;
>>       }
>>   
>> -    av_log(ctx, AV_LOG_INFO, "Using \"%s\"\n", filename);
>> +    av_log(ctx, AV_LOG_VERBOSE, "Using \"%s\"\n", filename);
> unrelated cosmetics
>
>>       if (parse_err)
>>           s->default_fontsize = size + 0.5;
>>   
>> @@ -690,6 +713,7 @@ static int shape_text(AVFilterContext *ctx)
>>       s->text = tmp;
>>       len = fribidi_unicode_to_charset(FRIBIDI_CHAR_SET_UTF8,
>>                                        unicodestr, len, s->text);
>> +
> unrelated cosmetics
>
>>   
>> +static void shape_text_hb(DrawTextContext *s, HarfbuzzData* hb, const char* text, int textLen)
>> +{
>> +    hb->buf = hb_buffer_create();
> Seems like it at least this needs to be checked for errors, maybe some
> of the other calls too.
>
>> @@ -1559,56 +1795,142 @@ continue_on_invalid2:
>>       update_color_with_alpha(s, &bordercolor, s->bordercolor);
>>       update_color_with_alpha(s, &boxcolor   , s->boxcolor   );
>>   
>> -    box_w = max_text_line_w;
>> -    box_h = y + s->max_glyph_h;
>> +    if (s->draw_box && s->boxborderw) {
>> +        s->bb_top = s->bb_right = s->bb_bottom = s->bb_left = s->boxborderw;
>> +    } else {
>> +        s->bb_top = s->bb_right = s->bb_bottom = s->bb_left = 0;
>> +    }
>>   
>>       if (s->fix_bounds) {
>> -
>>           /* calculate footprint of text effects */
>> -        int boxoffset     = s->draw_box ? FFMAX(s->boxborderw, 0) : 0;
>>           int borderoffset  = s->borderw  ? FFMAX(s->borderw, 0) : 0;
>>   
>> -        int offsetleft = FFMAX3(boxoffset, borderoffset,
>> +        int offsetleft = FFMAX3(FFMAX(s->bb_left, 0), borderoffset,
>>                                   (s->shadowx < 0 ? FFABS(s->shadowx) : 0));
>> -        int offsettop = FFMAX3(boxoffset, borderoffset,
>> +        int offsettop = FFMAX3(FFMAX(s->bb_top, 0), borderoffset,
>>                                   (s->shadowy < 0 ? FFABS(s->shadowy) : 0));
>> -
>> -        int offsetright = FFMAX3(boxoffset, borderoffset,
>> +        int offsetright = FFMAX3(FFMAX(s->bb_right, 0), borderoffset,
>>                                    (s->shadowx > 0 ? s->shadowx : 0));
>> -        int offsetbottom = FFMAX3(boxoffset, borderoffset,
>> +        int offsetbottom = FFMAX3(FFMAX(s->bb_bottom, 0), borderoffset,
>>                                     (s->shadowy > 0 ? s->shadowy : 0));
>>   
>> -
>>           if (s->x - offsetleft < 0) s->x = offsetleft;
>>           if (s->y - offsettop < 0)  s->y = offsettop;
>>   
>> -        if (s->x + box_w + offsetright > width)
>> -            s->x = FFMAX(width - box_w - offsetright, 0);
>> -        if (s->y + box_h + offsetbottom > height)
>> -            s->y = FFMAX(height - box_h - offsetbottom, 0);
>> +        if (s->x + metrics.width + offsetright > width)
>> +            s->x = FFMAX(width - metrics.width - offsetright, 0);
>> +        if (s->y + metrics.height + offsetbottom > height)
>> +            s->y = FFMAX(height - metrics.height - offsetbottom, 0);
>>       }
>>   
>> -    /* draw box */
>> -    if (s->draw_box)
>> -        ff_blend_rectangle(&s->dc, &boxcolor,
>> -                           frame->data, frame->linesize, width, height,
>> -                           s->x - s->boxborderw, s->y - s->boxborderw,
>> -                           box_w + s->boxborderw * 2, box_h + s->boxborderw * 2);
>> +    x = 0;
>> +    y = 0;
>> +    x64 = (int)(s->x * 64.);
>> +    y64 = (int)(s->y * 64. + metrics.offset_top64);
>> +
>> +    for (int l = 0; l < s->line_count; ++l) {
>> +        TextLine *line = &s->lines[l];
>> +        HarfbuzzData *hb = &line->hb_data;
>> +        line->glyphs = av_mallocz(hb->glyph_count * sizeof(GlyphInfo));
>> +
>> +        for (int t = 0; t < hb->glyph_count; ++t) {
>> +            GlyphInfo *g_info = &line->glyphs[t];
>> +            uint8_t is_tab = last_tab_idx < s->tab_count &&
>> +                hb->glyph_info[t].cluster == s->tab_clusters[last_tab_idx] - line->cluster_offset;
>> +            int true_x, true_y;
>> +            if (is_tab) {
>> +                ++last_tab_idx;
>> +            }
>> +            true_x = x + hb->glyph_pos[t].x_offset;
>> +            true_y = y + hb->glyph_pos[t].y_offset;
>> +            shift_x64 = (((x64 + true_x) >> 4) & 0b0011) << 4;
>> +            shift_y64 = ((4 - (((y64 + true_y) >> 4) & 0b0011)) & 0b0011) << 4;
>> +
>> +            ret = load_glyph(ctx, &glyph, hb->glyph_info[t].codepoint, shift_x64, shift_y64);
>> +            if (ret != 0) {
>> +                return ret;
>> +            }
>> +            g_info->code = hb->glyph_info[t].codepoint;
>> +            g_info->x = (x64 + true_x) >> 6;
>> +            g_info->y = ((y64 + true_y) >> 6) + (shift_y64 > 0 ? 1 : 0);
>> +            g_info->shift_x64 = shift_x64;
>> +            g_info->shift_y64 = shift_y64;
>> +
>> +            if (!is_tab) {
>> +                x += hb->glyph_pos[t].x_advance;
>> +            } else {
>> +                int size = s->blank_advance64 * s->tabsize;
>> +                x = (x / size + 1) * size;
>> +            }
>> +            y += hb->glyph_pos[t].y_advance;
>> +        }
>>   
>> -    if (s->shadowx || s->shadowy) {
>> -        if ((ret = draw_glyphs(s, frame, width, height,
>> -                               &shadowcolor, s->shadowx, s->shadowy, 0)) < 0)
>> -            return ret;
>> +        y += metrics.line_height64 + s->line_spacing * 64;
>> +        x = 0;
>>       }
>>   
>> -    if (s->borderw) {
>> -        if ((ret = draw_glyphs(s, frame, width, height,
>> -                               &bordercolor, 0, 0, s->borderw)) < 0)
>> +    metrics.rect_x = s->x;
>> +    metrics.rect_y = s->y;
>> +
>> +    s->box_width = metrics.width;
>> +    s->box_height = metrics.height;
> What is the point of duplicating these values in both structs?
>
>



More information about the ffmpeg-devel mailing list