[FFmpeg-devel] [PATCH 2/5] lavu/bprint: add XML escaping
Nicolas George
george at nsup.org
Wed Apr 12 16:23:22 EEST 2017
Le tridi 23 germinal, an CCXXV, Rodger Combs a écrit :
> ---
> libavutil/avstring.h | 28 ++++++++++++++++++++++++++++
> libavutil/bprint.c | 43 +++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 71 insertions(+)
>
> diff --git a/libavutil/avstring.h b/libavutil/avstring.h
> index 04d2695640..68b753a569 100644
> --- a/libavutil/avstring.h
> +++ b/libavutil/avstring.h
> @@ -314,6 +314,7 @@ enum AVEscapeMode {
> AV_ESCAPE_MODE_AUTO, ///< Use auto-selected escaping mode.
> AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.
> AV_ESCAPE_MODE_QUOTE, ///< Use single-quote escaping.
> + AV_ESCAPE_MODE_XML, ///< Use XML ampersand-escaping; requires UTF-8 input.
> };
>
> /**
> @@ -334,6 +335,33 @@ enum AVEscapeMode {
> #define AV_ESCAPE_FLAG_STRICT (1 << 1)
>
> /**
> + * In addition to the provided list, escape all characters outside the range of
> + * U+0020 to U+007E.
> + * This only applies to XML-escaping.
> + */
> +#define AV_ESCAPE_FLAG_NON_ASCII (1 << 2)
> +
> +/**
> + * In addition to the provided list, escape single or double quotes.
> + * This only applies to XML-escaping.
> + */
I think this doxy comment, written like that, only applies to SINGLE.
> +#define AV_ESCAPE_FLAG_ESCAPE_SINGLE_QUOTE (1 << 3)
> +#define AV_ESCAPE_FLAG_ESCAPE_DOUBLE_QUOTE (1 << 4)
Maybe also:
#define AV_ESCAPE_FLAG_ESCAPE_QUOTES (AV_ESCAPE_FLAG_ESCAPE_SINGLE_QUOTE|AV_ESCAPE_FLAG_ESCAPE_DOUBLE_QUOTE)
?
> +
> +/**
> + * Replace invalid UTF-8 characters with a U+FFFD REPLACEMENT CHARACTER, escaped
> + * if AV_ESCAPE_FLAG_NON_ASCII is set.
> + * This only applies to XML-escaping.
> + */
> +#define AV_ESCAPE_FLAG_REPLACE_INVALID_SEQUENCES (1 << 5)
> +
> +/**
> + * Replace invalid UTF-8 characters with a '?', overriding the previous flag.
> + * This only applies to XML-escaping.
> + */
> +#define AV_ESCAPE_FLAG_REPLACE_INVALID_ASCII (1 << 6)
> +
> +/**
> * Escape string in src, and put the escaped string in an allocated
> * string in *dst, which must be freed with av_free().
> *
> diff --git a/libavutil/bprint.c b/libavutil/bprint.c
> index 652775bef9..8e44c57346 100644
> --- a/libavutil/bprint.c
> +++ b/libavutil/bprint.c
> @@ -302,5 +302,48 @@ void av_bprint_escape(AVBPrint *dstbuf, const char *src, const char *special_cha
> }
> av_bprint_chars(dstbuf, '\'', 1);
> break;
> +
> + case AV_ESCAPE_MODE_XML:
> + /* &;-escape characters */
> + while (*src) {
> + uint8_t tmp;
> + uint32_t cp;
> + const char *src1 = src;
> + GET_UTF8(cp, (uint8_t)*src++, goto err;);
> +
> + if ((cp < 0xFF &&
> + ((special_chars && strchr(special_chars, cp)) ||
> + (flags & AV_ESCAPE_FLAG_WHITESPACE) && strchr(WHITESPACES, cp))) ||
> + (!(flags & AV_ESCAPE_FLAG_STRICT) &&
> + (cp == '&' || cp == '<' || cp == '>')) ||
& and < must always be encoded in XML. And > can sometimes be left
alone, but so few people use it that I think it is not worth the effort.
> + ((flags & AV_ESCAPE_FLAG_ESCAPE_SINGLE_QUOTE) && cp == '\'') ||
> + ((flags & AV_ESCAPE_FLAG_ESCAPE_DOUBLE_QUOTE) && cp == '"') ||
> + ((flags & AV_ESCAPE_FLAG_NON_ASCII) && (cp < 0x20 || cp > 0x7e))) {
> + switch (cp) {
> + case '&' : av_bprintf(dstbuf, "&"); break;
> + case '<' : av_bprintf(dstbuf, "<"); break;
> + case '>' : av_bprintf(dstbuf, ">"); break;
> + case '"' : av_bprintf(dstbuf, """); break;
> + case '\'': av_bprintf(dstbuf, "'"); break;
> + default: av_bprintf(dstbuf, "&#x%"PRIx32";", cp); break;
> + }
> + } else {
> + PUT_UTF8(cp, tmp, av_bprint_chars(dstbuf, tmp, 1);)
> + }
> + continue;
> + err:
> + if (flags & AV_ESCAPE_FLAG_REPLACE_INVALID_ASCII) {
> + av_bprint_chars(dstbuf, '?', 1);
> + } else if (flags & AV_ESCAPE_FLAG_REPLACE_INVALID_SEQUENCES) {
> + if (flags & AV_ESCAPE_FLAG_NON_ASCII)
> + av_bprintf(dstbuf, "\xEF\xBF\xBD");
> + else
> + av_bprintf(dstbuf, "�");
> + } else {
> + while (src1 < src)
> + av_bprint_chars(dstbuf, *src1++, 1);
> + }
> + }
> + break;
> }
> }
Regards,
--
Nicolas George
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20170412/61d8931b/attachment.sig>
More information about the ffmpeg-devel
mailing list