[FFmpeg-devel] [PATCH 2/9] fftools/textformat: Quality improvements

Andreas Rheinhardt andreas.rheinhardt at outlook.com
Tue Apr 15 04:05:44 EEST 2025


softworkz:
> From: softworkz <softworkz at hotmail.com>
> 
> Signed-off-by: softworkz <softworkz at hotmail.com>
> ---
>  fftools/textformat/avtextformat.c | 121 +++++++++++++++++++-----------
>  fftools/textformat/avtextformat.h |   6 +-
>  fftools/textformat/tf_default.c   |   8 +-
>  fftools/textformat/tf_ini.c       |   2 +-
>  fftools/textformat/tf_json.c      |   8 +-
>  fftools/textformat/tf_xml.c       |   3 -
>  fftools/textformat/tw_avio.c      |   9 ++-
>  7 files changed, 101 insertions(+), 56 deletions(-)
> 
> diff --git a/fftools/textformat/avtextformat.c b/fftools/textformat/avtextformat.c
> index 1ce51d11e2..406025d19d 100644
> --- a/fftools/textformat/avtextformat.c
> +++ b/fftools/textformat/avtextformat.c
> @@ -93,9 +93,8 @@ static const AVClass textcontext_class = {
>  
>  static void bprint_bytes(AVBPrint *bp, const uint8_t *ubuf, size_t ubuf_size)
>  {
> -    int i;
>      av_bprintf(bp, "0X");
> -    for (i = 0; i < ubuf_size; i++)
> +    for (unsigned i = 0; i < ubuf_size; i++)

Why not size_t?

>          av_bprintf(bp, "%02X", ubuf[i]);
>  }
>  
> @@ -110,8 +109,6 @@ int avtext_context_close(AVTextFormatContext **ptctx)
>  
>      av_hash_freep(&tctx->hash);
>  
> -    av_hash_freep(&tctx->hash);
> -
>      if (tctx->formatter->uninit)
>          tctx->formatter->uninit(tctx);
>      for (i = 0; i < SECTION_MAX_NB_LEVELS; i++)
> @@ -141,12 +138,18 @@ int avtext_context_open(AVTextFormatContext      **ptctx,
>      AVTextFormatContext *tctx;
>      int i, ret = 0;
>  
> -    if (!(tctx = av_mallocz(sizeof(AVTextFormatContext)))) {
> +    if (!ptctx || !formatter)
> +        return AVERROR(EINVAL);

Can this happen?

> +
> +    if (!formatter->priv_size && formatter->priv_class)
> +        return AVERROR(EINVAL);

Stuff like this should never happen and should not be checked (or
actually: the proper place to check stuff like this is in test tools
like lavc/tests/avcodec.c, but I don't think it is worth it for fftools).

> +
> +    if (!((tctx = av_mallocz(sizeof(AVTextFormatContext))))) {
>          ret = AVERROR(ENOMEM);
>          goto fail;
>      }
>  
> -    if (!(tctx->priv = av_mallocz(formatter->priv_size))) {
> +    if (formatter->priv_size && !((tctx->priv = av_mallocz(formatter->priv_size)))) {
>          ret = AVERROR(ENOMEM);
>          goto fail;
>      }
> @@ -215,15 +218,15 @@ int avtext_context_open(AVTextFormatContext      **ptctx,
>  
>      /* validate replace string */
>      {
> -        const uint8_t *p = tctx->string_validation_replacement;
> -        const uint8_t *endp = p + strlen(p);
> +        const uint8_t *p = (uint8_t *)tctx->string_validation_replacement;
> +        const uint8_t *endp = p + strlen((const char *)p);
>          while (*p) {
>              const uint8_t *p0 = p;
>              int32_t code;
>              ret = av_utf8_decode(&code, &p, endp, tctx->string_validation_utf8_flags);
>              if (ret < 0) {
>                  AVBPrint bp;
> -                av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
> +                av_bprint_init(&bp, 0, AV_BPRINT_SIZE_UNLIMITED);

This adds a memleak on data where it makes a difference.

>                  bprint_bytes(&bp, p0, p - p0),
>                      av_log(tctx, AV_LOG_ERROR,
>                             "Invalid UTF8 sequence %s found in string validation replace '%s'\n",
> @@ -259,6 +262,9 @@ static const char unit_bit_per_second_str[] = "bit/s";
>  
>  void avtext_print_section_header(AVTextFormatContext *tctx, const void *data, int section_id)
>  {
> +    if (!tctx || section_id < 0 || section_id >= tctx->nb_sections)
> +        return;

Can this happen?

> +
>      tctx->level++;
>      av_assert0(tctx->level < SECTION_MAX_NB_LEVELS);
>  
> @@ -272,6 +278,9 @@ void avtext_print_section_header(AVTextFormatContext *tctx, const void *data, in
>  
>  void avtext_print_section_footer(AVTextFormatContext *tctx)
>  {
> +    if (!tctx || tctx->level < 0 || tctx->level >= SECTION_MAX_NB_LEVELS)
> +        return;

Can this happen?

> +
>      int section_id = tctx->section[tctx->level]->id;
>      int parent_section_id = tctx->level
>          ? tctx->section[tctx->level - 1]->id
> @@ -289,7 +298,12 @@ void avtext_print_section_footer(AVTextFormatContext *tctx)
>  
>  void avtext_print_integer(AVTextFormatContext *tctx, const char *key, int64_t val)
>  {
> -    const struct AVTextFormatSection *section = tctx->section[tctx->level];
> +    const AVTextFormatSection *section;
> +
> +    if (!tctx || !key || tctx->level < 0 || tctx->level >= SECTION_MAX_NB_LEVELS)
> +        return;

Can this happen?

> +
> +    section = tctx->section[tctx->level];
>  
>      if (section->show_all_entries || av_dict_get(section->entries_to_show, key, NULL, 0)) {
>          tctx->formatter->print_integer(tctx, key, val);
> @@ -299,24 +313,28 @@ void avtext_print_integer(AVTextFormatContext *tctx, const char *key, int64_t va
>  
>  static inline int validate_string(AVTextFormatContext *tctx, char **dstp, const char *src)
>  {
> -    const uint8_t *p, *endp;
> +    const uint8_t *p, *endp, *srcp = (const uint8_t *)src;
>      AVBPrint dstbuf;
> +    AVBPrint bp;
>      int invalid_chars_nb = 0, ret = 0;
>  
> +    if (!tctx || !dstp || !src)
> +        return AVERROR(EINVAL);
> +

Can this happen?

> +    *dstp = NULL;
>      av_bprint_init(&dstbuf, 0, AV_BPRINT_SIZE_UNLIMITED);
> +    av_bprint_init(&bp, 0, AV_BPRINT_SIZE_UNLIMITED);
>  
> -    endp = src + strlen(src);
> -    for (p = src; *p;) {
> -        uint32_t code;
> +    endp = srcp + strlen(src);
> +    for (p = srcp; *p;) {
> +        int32_t code;
>          int invalid = 0;
>          const uint8_t *p0 = p;
>  
>          if (av_utf8_decode(&code, &p, endp, tctx->string_validation_utf8_flags) < 0) {
> -            AVBPrint bp;
> -            av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
> -            bprint_bytes(&bp, p0, p-p0);
> -            av_log(tctx, AV_LOG_DEBUG,
> -                   "Invalid UTF-8 sequence %s found in string '%s'\n", bp.str, src);
> +            av_bprint_clear(&bp);
> +            bprint_bytes(&bp, p0, p - p0);
> +            av_log(tctx, AV_LOG_DEBUG, "Invalid UTF-8 sequence %s found in string '%s'\n", bp.str, src);
>              invalid = 1;
>          }
>  
> @@ -336,7 +354,7 @@ static inline int validate_string(AVTextFormatContext *tctx, char **dstp, const
>          }
>  
>          if (!invalid || tctx->string_validation == AV_TEXTFORMAT_STRING_VALIDATION_IGNORE)
> -            av_bprint_append_data(&dstbuf, p0, p-p0);
> +            av_bprint_append_data(&dstbuf, (const char *)p0, p - p0);
>      }
>  
>      if (invalid_chars_nb && tctx->string_validation == AV_TEXTFORMAT_STRING_VALIDATION_REPLACE)
> @@ -346,6 +364,7 @@ static inline int validate_string(AVTextFormatContext *tctx, char **dstp, const
>  
>  end:
>      av_bprint_finalize(&dstbuf, dstp);
> +    av_bprint_finalize(&bp, NULL);
>      return ret;
>  }
>  
> @@ -358,17 +377,18 @@ struct unit_value {
>      const char *unit;
>  };
>  
> -static char *value_string(AVTextFormatContext *tctx, char *buf, int buf_size, struct unit_value uv)
> +static char *value_string(const AVTextFormatContext *tctx, char *buf, int buf_size, struct unit_value uv)
>  {
>      double vald;
> -    int64_t vali;
> +    int64_t vali = 0;
>      int show_float = 0;
>  
>      if (uv.unit == unit_second_str) {
>          vald = uv.val.d;
>          show_float = 1;
>      } else {
> -        vald = vali = uv.val.i;
> +        vald = (double)uv.val.i;
> +        vali = uv.val.i;
>      }
>  
>      if (uv.unit == unit_second_str && tctx->use_value_sexagesimal_format) {
> @@ -387,17 +407,17 @@ static char *value_string(AVTextFormatContext *tctx, char *buf, int buf_size, st
>              int64_t index;
>  
>              if (uv.unit == unit_byte_str && tctx->use_byte_value_binary_prefix) {
> -                index = (int64_t) (log2(vald)) / 10;
> -                index = av_clip(index, 0, FF_ARRAY_ELEMS(si_prefixes) - 1);
> +                index = (int64_t)(log2(vald) / 10);
> +                index = av_clip64(index, 0, FF_ARRAY_ELEMS(si_prefixes) - 1);
>                  vald /= si_prefixes[index].bin_val;
>                  prefix_string = si_prefixes[index].bin_str;
>              } else {
> -                index = (int64_t) (log10(vald)) / 3;
> -                index = av_clip(index, 0, FF_ARRAY_ELEMS(si_prefixes) - 1);
> +                index = (int64_t)(log10(vald) / 3);
> +                index = av_clip64(index, 0, FF_ARRAY_ELEMS(si_prefixes) - 1);
>                  vald /= si_prefixes[index].dec_val;
>                  prefix_string = si_prefixes[index].dec_str;
>              }
> -            vali = vald;
> +            vali = (int64_t)vald;
>          }
>  
>          if (show_float || (tctx->use_value_prefix && vald != (int64_t)vald))
> @@ -425,9 +445,14 @@ void avtext_print_unit_int(AVTextFormatContext *tctx, const char *key, int value
>  
>  int avtext_print_string(AVTextFormatContext *tctx, const char *key, const char *val, int flags)
>  {
> -    const struct AVTextFormatSection *section = tctx->section[tctx->level];
> +    const AVTextFormatSection *section;
>      int ret = 0;
>  
> +    if (!tctx || !key || !val || tctx->level < 0 || tctx->level >= SECTION_MAX_NB_LEVELS)
> +        return AVERROR(EINVAL);

Can this happen?

> +
> +    section = tctx->section[tctx->level];
> +
>      if (tctx->show_optional_fields == SHOW_OPTIONAL_FIELDS_NEVER ||
>          (tctx->show_optional_fields == SHOW_OPTIONAL_FIELDS_AUTO
>              && (flags & AV_TEXTFORMAT_PRINT_STRING_OPTIONAL)
> @@ -462,7 +487,7 @@ int avtext_print_string(AVTextFormatContext *tctx, const char *key, const char *
>  void avtext_print_rational(AVTextFormatContext *tctx, const char *key, AVRational q, char sep)
>  {
>      AVBPrint buf;
> -    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_AUTOMATIC);
> +    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);

This is strictly worse than what was here before: With UNLIMITED you
would have a memleak in case the internal buffer wouldn't suffice.
(But anyway, this should use snprintf. I just sent a patch for this.)

>      av_bprintf(&buf, "%d%c%d", q.num, sep, q.den);
>      avtext_print_string(tctx, key, buf.str, 0);
>  }
> @@ -470,12 +495,11 @@ void avtext_print_rational(AVTextFormatContext *tctx, const char *key, AVRationa
>  void avtext_print_time(AVTextFormatContext *tctx, const char *key,
>                         int64_t ts, const AVRational *time_base, int is_duration)
>  {
> -    char buf[128];
> -
>      if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) {
>          avtext_print_string(tctx, key, "N/A", AV_TEXTFORMAT_PRINT_STRING_OPTIONAL);
>      } else {
> -        double d = ts * av_q2d(*time_base);
> +        char buf[128];
> +        double d = av_q2d(*time_base) * (double)ts;

We actually try to avoid explicit casts where possible.

>          struct unit_value uv;
>          uv.val.d = d;
>          uv.unit = unit_second_str;
> @@ -496,7 +520,8 @@ void avtext_print_data(AVTextFormatContext *tctx, const char *name,
>                         const uint8_t *data, int size)
>  {
>      AVBPrint bp;
> -    int offset = 0, l, i;
> +    unsigned offset = 0;
> +    int l, i;
>  
>      av_bprint_init(&bp, 0, AV_BPRINT_SIZE_UNLIMITED);
>      av_bprintf(&bp, "\n");
> @@ -523,25 +548,29 @@ void avtext_print_data(AVTextFormatContext *tctx, const char *name,
>  void avtext_print_data_hash(AVTextFormatContext *tctx, const char *name,
>                              const uint8_t *data, int size)
>  {
> -    char *p, buf[AV_HASH_MAX_SIZE * 2 + 64] = { 0 };
> +    char buf[AV_HASH_MAX_SIZE * 2 + 64] = { 0 };
> +    int len;
>  
>      if (!tctx->hash)
>          return;
>  
>      av_hash_init(tctx->hash);
>      av_hash_update(tctx->hash, data, size);
> -    snprintf(buf, sizeof(buf), "%s:", av_hash_get_name(tctx->hash));
> -    p = buf + strlen(buf);
> -    av_hash_final_hex(tctx->hash, p, buf + sizeof(buf) - p);
> +    len = snprintf(buf, sizeof(buf), "%s:", av_hash_get_name(tctx->hash));
> +    av_hash_final_hex(tctx->hash, (uint8_t *)&buf[len], (int)sizeof(buf) - len);

Is it guaranteed that the output of snprintf() is not truncated?

>      avtext_print_string(tctx, name, buf, 0);
>  }
>  
>  void avtext_print_integers(AVTextFormatContext *tctx, const char *name,
> -                                  uint8_t *data, int size, const char *format,
> -                                  int columns, int bytes, int offset_add)
> +                           uint8_t *data, int size, const char *format,
> +                           int columns, int bytes, int offset_add)
>  {
>      AVBPrint bp;
> -    int offset = 0, l, i;
> +    unsigned offset = 0;
> +    int l, i;
> +
> +    if (!name || !data || !format || columns <= 0 || bytes <= 0)
> +        return;

Can this happen?

>  
>      av_bprint_init(&bp, 0, AV_BPRINT_SIZE_UNLIMITED);
>      av_bprintf(&bp, "\n");
> @@ -607,12 +636,18 @@ int avtextwriter_context_open(AVTextWriterContext **pwctx, const AVTextWriter *w
>      AVTextWriterContext *wctx;
>      int ret = 0;
>  
> -    if (!(wctx = av_mallocz(sizeof(AVTextWriterContext)))) {
> +    if (!pwctx || !writer)
> +        return AVERROR(EINVAL);
> +
> +    if (!writer->priv_size && writer->priv_class)

Stuff like this should never happen and should therefore not be checked.

> +        return AVERROR(EINVAL);
> +
> +    if (!((wctx = av_mallocz(sizeof(AVTextWriterContext))))) {
>          ret = AVERROR(ENOMEM);
>          goto fail;
>      }
>  
> -    if (!(wctx->priv = av_mallocz(writer->priv_size))) {
> +    if (writer->priv_size && !((wctx->priv = av_mallocz(writer->priv_size)))) {
>          ret = AVERROR(ENOMEM);
>          goto fail;
>      }
> diff --git a/fftools/textformat/avtextformat.h b/fftools/textformat/avtextformat.h
> index 03564d14a7..e519094f4f 100644
> --- a/fftools/textformat/avtextformat.h
> +++ b/fftools/textformat/avtextformat.h
> @@ -21,9 +21,7 @@
>  #ifndef FFTOOLS_TEXTFORMAT_AVTEXTFORMAT_H
>  #define FFTOOLS_TEXTFORMAT_AVTEXTFORMAT_H
>  
> -#include <stddef.h>
>  #include <stdint.h>
> -#include "libavutil/attributes.h"
>  #include "libavutil/dict.h"
>  #include "libavformat/avio.h"
>  #include "libavutil/bprint.h"
> @@ -103,7 +101,7 @@ struct AVTextFormatContext {
>      unsigned int nb_item_type[SECTION_MAX_NB_LEVELS][SECTION_MAX_NB_SECTIONS];
>  
>      /** section per each level */
> -    const struct AVTextFormatSection *section[SECTION_MAX_NB_LEVELS];
> +    const AVTextFormatSection *section[SECTION_MAX_NB_LEVELS];
>      AVBPrint section_pbuf[SECTION_MAX_NB_LEVELS]; ///< generic print buffer dedicated to each section,
>                                                    ///  used by various formatters
>  
> @@ -124,7 +122,7 @@ struct AVTextFormatContext {
>  #define AV_TEXTFORMAT_PRINT_STRING_VALIDATE 2
>  
>  int avtext_context_open(AVTextFormatContext **ptctx, const AVTextFormatter *formatter, AVTextWriterContext *writer_context, const char *args,
> -                        const struct AVTextFormatSection *sections, int nb_sections,
> +                        const AVTextFormatSection *sections, int nb_sections,
>                          int show_value_unit,
>                          int use_value_prefix,
>                          int use_byte_value_binary_prefix,
> diff --git a/fftools/textformat/tf_default.c b/fftools/textformat/tf_default.c
> index 14ef9fe8f9..3b05d25f36 100644
> --- a/fftools/textformat/tf_default.c
> +++ b/fftools/textformat/tf_default.c
> @@ -70,9 +70,10 @@ DEFINE_FORMATTER_CLASS(default);
>  /* lame uppercasing routine, assumes the string is lower case ASCII */
>  static inline char *upcase_string(char *dst, size_t dst_size, const char *src)
>  {
> -    int i;
> +    unsigned i;
> +

Why not size_t?

>      for (i = 0; src[i] && i < dst_size - 1; i++)
> -        dst[i] = av_toupper(src[i]);
> +        dst[i] = (char)av_toupper(src[i]);
>      dst[i] = 0;
>      return dst;
>  }
> @@ -108,6 +109,9 @@ static void default_print_section_footer(AVTextFormatContext *wctx)
>      const struct AVTextFormatSection *section = wctx->section[wctx->level];
>      char buf[32];
>  
> +    if (!section)
> +        return;

Can this happen?

> +
>      if (def->noprint_wrappers || def->nested_section[wctx->level])
>          return;
>  
> diff --git a/fftools/textformat/tf_ini.c b/fftools/textformat/tf_ini.c
> index 9e1aa60e09..ec471fd480 100644
> --- a/fftools/textformat/tf_ini.c
> +++ b/fftools/textformat/tf_ini.c
> @@ -92,7 +92,7 @@ static char *ini_escape_str(AVBPrint *dst, const char *src)
>              /* fallthrough */
>          default:
>              if ((unsigned char)c < 32)
> -                av_bprintf(dst, "\\x00%02x", c & 0xff);
> +                av_bprintf(dst, "\\x00%02x", (unsigned char)c);
>              else
>                  av_bprint_chars(dst, c, 1);
>              break;
> diff --git a/fftools/textformat/tf_json.c b/fftools/textformat/tf_json.c
> index 24838b35ec..f286838d3c 100644
> --- a/fftools/textformat/tf_json.c
> +++ b/fftools/textformat/tf_json.c
> @@ -82,13 +82,18 @@ static const char *json_escape_str(AVBPrint *dst, const char *src, void *log_ctx
>      static const char json_subst[]  = { '"', '\\',  'b',  'f',  'n',  'r',  't', 0 };
>      const char *p;
>  
> +    if (!src) {
> +        av_log(log_ctx, AV_LOG_ERROR, "json_escape_str: NULL source string\n");
> +        return NULL;
> +    }

Can this even happen?

> +
>      for (p = src; *p; p++) {
>          char *s = strchr(json_escape, *p);
>          if (s) {
>              av_bprint_chars(dst, '\\', 1);
>              av_bprint_chars(dst, json_subst[s - json_escape], 1);
>          } else if ((unsigned char)*p < 32) {
> -            av_bprintf(dst, "\\u00%02x", *p & 0xff);
> +            av_bprintf(dst, "\\u00%02x", (unsigned char)*p);
>          } else {
>              av_bprint_chars(dst, *p, 1);
>          }
> @@ -107,6 +112,7 @@ static void json_print_section_header(AVTextFormatContext *wctx, const void *dat
>          wctx->section[wctx->level-1] : NULL;
>  
>      if (wctx->level && wctx->nb_item[wctx->level-1])
> +    if (wctx->level && wctx->nb_item[wctx->level - 1])
>          writer_put_str(wctx, ",\n");
>  
>      if (section->flags & AV_TEXTFORMAT_SECTION_FLAG_IS_WRAPPER) {
> diff --git a/fftools/textformat/tf_xml.c b/fftools/textformat/tf_xml.c
> index 76271dbaa6..eceeda81e5 100644
> --- a/fftools/textformat/tf_xml.c
> +++ b/fftools/textformat/tf_xml.c
> @@ -18,10 +18,7 @@
>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>   */
>  
> -#include <limits.h>
> -#include <stdarg.h>
>  #include <stdint.h>
> -#include <stdio.h>
>  #include <string.h>
>  
>  #include "avtextformat.h"
> diff --git a/fftools/textformat/tw_avio.c b/fftools/textformat/tw_avio.c
> index d335d35a56..3c7492aa06 100644
> --- a/fftools/textformat/tw_avio.c
> +++ b/fftools/textformat/tw_avio.c
> @@ -63,7 +63,7 @@ static void io_w8(AVTextWriterContext *wctx, int b)
>  static void io_put_str(AVTextWriterContext *wctx, const char *str)
>  {
>      IOWriterContext *ctx = wctx->priv;
> -    avio_write(ctx->avio_context, str, strlen(str));
> +    avio_write(ctx->avio_context, (const unsigned char *)str, (int)strlen(str));
>  }
>  
>  static void io_printf(AVTextWriterContext *wctx, const char *fmt, ...)
> @@ -89,10 +89,12 @@ const AVTextWriter avtextwriter_avio = {
>  
>  int avtextwriter_create_file(AVTextWriterContext **pwctx, const char *output_filename, int close_on_uninit)
>  {
> +    if (!pwctx || !output_filename || !output_filename[0])
> +        return AVERROR(EINVAL);

Can this happen?

> +
>      IOWriterContext *ctx;
>      int ret;
>  
> -
>      ret = avtextwriter_context_open(pwctx, &avtextwriter_avio);
>      if (ret < 0)
>          return ret;
> @@ -114,6 +116,9 @@ int avtextwriter_create_file(AVTextWriterContext **pwctx, const char *output_fil
>  
>  int avtextwriter_create_avio(AVTextWriterContext **pwctx, AVIOContext *avio_ctx, int close_on_uninit)
>  {
> +    if (!pwctx || !avio_ctx)
> +        return AVERROR(EINVAL);
> +
>      IOWriterContext *ctx;
>      int ret;
>  



More information about the ffmpeg-devel mailing list