[FFmpeg-devel] [PATCH] ffprobe: implement string validation policy setting
Stefano Sabatini
stefasab at gmail.com
Wed Oct 2 17:52:05 CEST 2013
This should fix trac tickets #1163, #2502, #2955.
---
doc/ffprobe.texi | 24 ++++++++++
ffprobe.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 158 insertions(+), 7 deletions(-)
diff --git a/doc/ffprobe.texi b/doc/ffprobe.texi
index 777dbe7..55c6e80 100644
--- a/doc/ffprobe.texi
+++ b/doc/ffprobe.texi
@@ -317,6 +317,30 @@ Show information related to program and library versions. This is the
equivalent of setting both @option{-show_program_version} and
@option{-show_library_versions} options.
+ at item -string_validation_policy @var{policy}
+Set string validation policy. It accepts the following values.
+
+ at table @samp
+ at item fail
+The program will fail immediately in case an invalid string (UTF-8)
+sequence is found in the input. This is especially useful to validate
+input metadata.
+
+ at item replace=REPLACEMENT
+The program will substitute the invalid UTF-8 sequences with the
+string specified in @var{REPLACEMENT}, which is typically a simple
+character.
+
+In case the replacement string is not specified, the program will
+assume the empty string, that is it will remove the invalid sequences
+from the input strings.
+This is especially useful to create validate metadata output from
+invalid sources.
+ at end table
+
+By default the program will apply the replace policy with an empty
+replacement.
+
@item -bitexact
Force bitexact output, useful to produce output which is not dependent
on the specific build.
diff --git a/ffprobe.c b/ffprobe.c
index c4f0a8f..2e2bb03 100644
--- a/ffprobe.c
+++ b/ffprobe.c
@@ -75,6 +75,14 @@ static int show_private_data = 1;
static char *print_format;
static char *stream_specifier;
+typedef enum {
+ STRING_VALIDATION_POLICY_FAIL,
+ STRING_VALIDATION_POLICY_REPLACE,
+} StringValidationPolicy;
+
+StringValidationPolicy string_validation_policy = STRING_VALIDATION_POLICY_REPLACE;
+static char *string_validation_replace;
+
typedef struct {
int id; ///< identifier
int64_t start, end; ///< start, end in second/AV_TIME_BASE units
@@ -428,17 +436,93 @@ static inline void writer_print_integer(WriterContext *wctx,
}
}
+static inline int validate_string(char **dstp, const char *src, void *log_ctx)
+{
+ const uint8_t *p;
+ AVBPrint dstbuf;
+ int invalid_chars_nb = 0, ret = 0;
+
+ av_bprint_init(&dstbuf, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+ for (p = src; *p;) {
+ uint32_t code;
+ uint8_t tmp;
+ int invalid = 0;
+
+ GET_UTF8(code, *p++, invalid = 1;);
+ if (invalid) {
+ invalid_chars_nb++;
+
+ switch (string_validation_policy) {
+ case STRING_VALIDATION_POLICY_FAIL:
+ {
+ av_log(log_ctx, AV_LOG_ERROR,
+ "Invalid UTF-8 character found in sequence '%s'\n", src);
+ ret = AVERROR_INVALIDDATA;
+ goto end;
+ };
+ break;
+
+ case STRING_VALIDATION_POLICY_REPLACE:
+ if (string_validation_replace) {
+ const uint8_t *s;
+ for (s = string_validation_replace; *s;) {
+ GET_UTF8(code, *s++, continue;);
+ PUT_UTF8(code, tmp, av_bprint_chars(&dstbuf, tmp, 1););
+ }
+ }
+ break;
+ }
+ } else {
+ PUT_UTF8(code, tmp, av_bprint_chars(&dstbuf, tmp, 1););
+ }
+ }
+
+ if (invalid_chars_nb) {
+ av_log(log_ctx, AV_LOG_WARNING,
+ "%d invalid UTF-8 characters found in sequence '%s', "
+ "they have been replaced with '%s'\n",
+ invalid_chars_nb, src, (char *)av_x_if_null(string_validation_replace, ""));
+ }
+
+end:
+ av_bprint_finalize(&dstbuf, dstp);
+ return ret;
+}
+
+#define PRINT_STRING_OPT 1
+#define PRINT_STRING_VALIDATE 2
+
static inline int writer_print_string(WriterContext *wctx,
- const char *key, const char *val, int opt)
+ const char *key, const char *val, int flags)
{
const struct section *section = wctx->section[wctx->level];
int ret = 0;
- if (opt && !(wctx->writer->flags & WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS))
+ if ((flags & PRINT_STRING_OPT)
+ && !(wctx->writer->flags & WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS))
return 0;
if (section->show_all_entries || av_dict_get(section->entries_to_show, key, NULL, 0)) {
- wctx->writer->print_string(wctx, key, val);
+ if (flags & PRINT_STRING_VALIDATE) {
+ char *key1 = NULL, *val1 = NULL;
+ ret = validate_string(&key1, key, wctx);
+ if (ret < 0) goto end;
+ ret = validate_string(&val1, val, wctx);
+ if (ret < 0) goto end;
+ wctx->writer->print_string(wctx, key1, val1);
+ end:
+ if (ret < 0) {
+ av_log(wctx, AV_LOG_ERROR,
+ "Invalid key=value string combination %s=%s in section %s\n",
+ key, val, section->unique_name);
+ }
+ av_free(key1);
+ av_free(val1);
+ } else {
+ wctx->writer->print_string(wctx, key, val);
+ }
+
wctx->nb_item[wctx->level]++;
}
@@ -460,7 +544,7 @@ static void writer_print_time(WriterContext *wctx, const char *key,
char buf[128];
if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) {
- writer_print_string(wctx, key, "N/A", 1);
+ writer_print_string(wctx, key, "N/A", PRINT_STRING_OPT);
} else {
double d = ts * av_q2d(*time_base);
struct unit_value uv;
@@ -474,7 +558,7 @@ static void writer_print_time(WriterContext *wctx, const char *key,
static void writer_print_ts(WriterContext *wctx, const char *key, int64_t ts, int is_duration)
{
if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) {
- writer_print_string(wctx, key, "N/A", 1);
+ writer_print_string(wctx, key, "N/A", PRINT_STRING_OPT);
} else {
writer_print_integer(wctx, key, ts);
}
@@ -1443,7 +1527,7 @@ static void writer_register_all(void)
#define print_int(k, v) writer_print_integer(w, k, v)
#define print_q(k, v, s) writer_print_rational(w, k, v, s)
#define print_str(k, v) writer_print_string(w, k, v, 0)
-#define print_str_opt(k, v) writer_print_string(w, k, v, 1)
+#define print_str_opt(k, v) writer_print_string(w, k, v, PRINT_STRING_OPT)
#define print_time(k, v, tb) writer_print_time(w, k, v, tb, 0)
#define print_ts(k, v) writer_print_ts(w, k, v, 0)
#define print_duration_time(k, v, tb) writer_print_time(w, k, v, tb, 1)
@@ -1468,7 +1552,7 @@ static inline int show_tags(WriterContext *wctx, AVDictionary *tags, int section
writer_print_section_header(wctx, section_id);
while ((tag = av_dict_get(tags, "", tag, AV_DICT_IGNORE_SUFFIX))) {
- ret = writer_print_string(wctx, tag->key, tag->value, 0);
+ ret = writer_print_string(wctx, tag->key, tag->value, PRINT_STRING_VALIDATE);
if (ret < 0)
break;
}
@@ -2534,6 +2618,48 @@ static int opt_read_intervals(void *optctx, const char *opt, const char *arg)
return parse_read_intervals(arg);
}
+static int opt_string_validation_policy(void *optctx, const char *opt, const char *arg)
+{
+ char *mode = av_strdup(arg);
+ char *next;
+ int ret = 0;
+
+ if (!mode) return AVERROR(ENOMEM);
+
+ next = strchr(mode, '=');
+ if (next)
+ *next++ = 0;
+
+ if (!strcmp(mode, "fail")) {
+ string_validation_policy = STRING_VALIDATION_POLICY_FAIL;
+ if (next) {
+ av_log(NULL, AV_LOG_ERROR,
+ "No argument must be specified for the option %s with mode 'fail'\n",
+ opt);
+ ret = AVERROR(EINVAL);
+ goto end;
+ }
+ } else if (!strcmp(mode, "replace")) {
+ string_validation_policy = STRING_VALIDATION_POLICY_REPLACE;
+ string_validation_replace = av_strdup(next);
+
+ if (next && !string_validation_replace) {
+ ret = AVERROR(ENOMEM);
+ goto end;
+ }
+ } else {
+ av_log(NULL, AV_LOG_ERROR,
+ "Invalid argument '%s' for option '%s', "
+ "choose between fail, or replace=REPLACEMENT\n", arg, opt);
+ ret = AVERROR(EINVAL);
+ goto end;
+ }
+
+end:
+ av_free(mode);
+ return ret;
+}
+
static int opt_pretty(void *optctx, const char *opt, const char *arg)
{
show_value_unit = 1;
@@ -2633,6 +2759,7 @@ static const OptionDef real_options[] = {
{ "private", OPT_BOOL, {(void*)&show_private_data}, "same as show_private_data" },
{ "bitexact", OPT_BOOL, {&do_bitexact}, "force bitexact output" },
{ "read_intervals", HAS_ARG, {.func_arg = opt_read_intervals}, "set read intervals", "read_intervals" },
+ { "string_validation_policy", HAS_ARG, {.func_arg = opt_string_validation_policy}, "select the string validation policy", "policy_specification" },
{ "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {.func_arg = opt_default}, "generic catch all option", "" },
{ "i", HAS_ARG, {.func_arg = opt_input_file_i}, "read specified file", "input_file"},
{ NULL, },
--
1.8.1.2
More information about the ffmpeg-devel
mailing list