[FFmpeg-devel] [PATCH] asf - read/write metadata as UTF-16
Michael Niedermayer
michaelni
Sun Feb 21 23:03:01 CET 2010
On Sun, Feb 21, 2010 at 09:45:53PM +0100, Anton Khirnov wrote:
> Hi,
>
> asfenc/dec currently reads/writes metadata in a format that can be
> described as UTF-8 interleaved with 0s. for latin1 strings it happens to
> be identical with UTF-16, for anything else it's identical with random
> garbage.
>
> attached series of patches attempts to fix it.
>
> Anton Khirnov
> common.h | 32 ++++++++++++++++++++++++++++++++
> 1 file changed, 32 insertions(+)
> fe7b6d0330b85006eeeb767aaa1bb4c764f57451 0001-Add-PUT_UTF16-macro.patch
> From 28107be2554a34c1cd110b06725bb0ca59d14005 Mon Sep 17 00:00:00 2001
> From: Anton Khirnov <wyskas at gmail.com>
> Date: Sun, 21 Feb 2010 15:20:23 +0100
> Subject: [PATCH 1/5] Add PUT_UTF16 macro.
>
> ---
> libavutil/common.h | 32 ++++++++++++++++++++++++++++++++
> 1 files changed, 32 insertions(+), 0 deletions(-)
>
> diff --git a/libavutil/common.h b/libavutil/common.h
> index c91e658..ba048dd 100644
> --- a/libavutil/common.h
> +++ b/libavutil/common.h
> @@ -335,6 +335,38 @@ static inline av_const int av_ceil_log2(int x)
> }\
> }
>
> +/*!
> + * \def PUT_UTF16(val, tmp, PUT_16BIT, ERROR)
> + * Converts a 32-bit Unicode character to its UTF-16 encoded form (2 or 4 bytes).
> + * \param val is an input-only argument and should be of type uint32_t. It holds
> + * a UCS-4 encoded Unicode character that is to be converted to UTF-16. If
> + * val is given as a function it is executed only once.
doesnt seems so
> + * \param tmp is a temporary variable and should be of type uint16_t. It
> + * represents an intermediate value during conversion that is to be
> + * output by PUT_16BIT.
> + * \param PUT_16BIT writes the converted UTF-16 data to any proper destination
> + * in desired endianness. It could be a function or a statement, and uses tmp
> + * as the input byte. For example, PUT_BYTE could be "*output++ = tmp;"
> + * PUT_BYTE will be executed 1 or 2 times depending on input character.
> + * \param ERROR action that should be taken when val can't be represented in
> + * UTF-16 encoding. It should be a statement that jumps out of the macro,
> + * like exit(), goto, return, break, or continue.
> + */
> +#define PUT_UTF16(val, tmp, PUT_16BIT, ERROR)\
> + if (val < 0x10000) {\
> + tmp = val;\
> + PUT_16BIT\
> + } else if ( val <= 0x10FFFFU ) {\
> + tmp = 0xD800 | ((val - 0x10000) >> 10);\
> + PUT_16BIT\
> + tmp = 0xDC00 | ((val - 0x10000) & 0x3FF);\
> + PUT_16BIT\
> + } else {\
> + ERROR\
> + }\
PUT_UTF8 doesnt handle errors, and you do it quite incompletly
> +
> +
> +
> #include "mem.h"
>
> #ifdef HAVE_AV_CONFIG_H
> --
> 1.6.6.1
>
> libavformat/asfenc.c | 2 +-
> tests/ref/acodec/wmav1 | 2 +-
> tests/ref/acodec/wmav2 | 2 +-
> tests/ref/lavf/asf | 2 +-
> 4 files changed, 4 insertions(+), 4 deletions(-)
> 4495eac8d7428cbddbb190fd4ac6218ab19cf752 0002-asfenc-fix-yet-another-wrong-string-length.patch
> From 5cfe028a96bce36a2292cd175d6f056174cf29ee Mon Sep 17 00:00:00 2001
> From: Anton Khirnov <wyskas at gmail.com>
> Date: Sun, 21 Feb 2010 17:45:14 +0100
> Subject: [PATCH 2/5] asfenc: fix yet another wrong string length.
>
> ---
> libavformat/asfenc.c | 2 +-
> tests/ref/acodec/wmav1 | 2 +-
> tests/ref/acodec/wmav2 | 2 +-
> tests/ref/lavf/asf | 2 +-
> 4 files changed, 4 insertions(+), 4 deletions(-)
ok if you checked all uses of this function against the spec and
wmp
also make sure you bump the version string we store in asf files
[...]
> asf.c | 6 +++---
> asfenc.c | 9 ++-------
> 2 files changed, 5 insertions(+), 10 deletions(-)
> 82be7a855b218246f2ea0ce710ae2c4a8cc39033 0003-asf-don-t-add-WM-prefix-to-all-tags-it-s-only-requir.patch
> From 8d544fa0f591b72544fb44d6251982686b0c96e9 Mon Sep 17 00:00:00 2001
> From: Anton Khirnov <wyskas at gmail.com>
> Date: Sun, 21 Feb 2010 17:17:06 +0100
> Subject: [PATCH 3/5] asf: don't add WM/ prefix to all tags, it's only required for some.
ok
[...]
> asfenc.c | 71 +++++++++++++++++++++++++++++++++++++++++----------------------
> 1 file changed, 47 insertions(+), 24 deletions(-)
> 40fdbbb259951470ed7e054d1d0c931f35b63e96 0004-asfenc-write-tags-in-proper-UTF-16.patch
> From efd7583fe9bba5e965463d657c693b7972745a31 Mon Sep 17 00:00:00 2001
> From: Anton Khirnov <wyskas at gmail.com>
> Date: Sun, 21 Feb 2010 17:57:17 +0100
> Subject: [PATCH 4/5] asfenc: write tags in proper UTF-16.
>
> ---
> libavformat/asfenc.c | 71 +++++++++++++++++++++++++++++++++-----------------
> 1 files changed, 47 insertions(+), 24 deletions(-)
>
> diff --git a/libavformat/asfenc.c b/libavformat/asfenc.c
> index 8d6292d..f610a42 100644
> --- a/libavformat/asfenc.c
> +++ b/libavformat/asfenc.c
> @@ -203,21 +203,37 @@ static void put_guid(ByteIOContext *s, const ff_asf_guid *g)
> put_buffer(s, *g, sizeof(*g));
> }
>
> -static void put_str16_nolen(ByteIOContext *s, const char *tag);
> +static int put_str16_nolen(ByteIOContext *s, const char *tag);
> static void put_str16(ByteIOContext *s, const char *tag)
> {
> - put_le16(s, 2*(strlen(tag) + 1));
> - put_str16_nolen(s, tag);
> + int len;
> + uint8_t *pb;
> + ByteIOContext *dyn_buf;
> + if (url_open_dyn_buf(&dyn_buf) < 0)
> + return;
> +
> + put_str16_nolen(dyn_buf, tag);
> + len = url_close_dyn_buf(dyn_buf, &pb);
> + put_le16(s, len);
> + put_buffer(s, pb, len);
> + av_freep(&pb);
> }
>
> -static void put_str16_nolen(ByteIOContext *s, const char *tag)
> +static int put_str16_nolen(ByteIOContext *s, const char *tag)
> {
> - int c;
> + const uint8_t *q = tag;
> + int ret = 0;
>
> - do{
> - c = (uint8_t)*tag++;
> - put_le16(s, c);
> - }while(c);
> + while (*q) {
> + uint32_t ch;
> + uint16_t tmp;
> +
> + GET_UTF8(ch, *q++, break;)
> + PUT_UTF16(ch, tmp, put_le16(s, tmp);ret += 2;, break;)
> + }
> + put_le16(s, 0);
> + ret += 2;
> + return ret;
> }
>
> static int64_t put_header(ByteIOContext *pb, const ff_asf_guid *g)
> @@ -272,7 +288,7 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size, int64_t data
> {
> ASFContext *asf = s->priv_data;
> ByteIOContext *pb = s->pb;
> - AVMetadataTag *title, *author, *copyright, *comment;
> + AVMetadataTag *tags[5];
unrelated?
> int header_size, n, extra_size, extra_size2, wav_extra_size, file_time;
> int has_title;
> int metadata_count;
> @@ -281,13 +297,14 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size, int64_t data
> int bit_rate;
> int64_t duration;
>
> - title = av_metadata_get(s->metadata, "title" , NULL, 0);
> - author = av_metadata_get(s->metadata, "author" , NULL, 0);
> - copyright = av_metadata_get(s->metadata, "copyright", NULL, 0);
> - comment = av_metadata_get(s->metadata, "comment" , NULL, 0);
> + tags[0] = av_metadata_get(s->metadata, "title" , NULL, 0);
> + tags[1] = av_metadata_get(s->metadata, "author" , NULL, 0);
> + tags[2] = av_metadata_get(s->metadata, "copyright", NULL, 0);
> + tags[3] = av_metadata_get(s->metadata, "comment" , NULL, 0);
> + tags[4] = av_metadata_get(s->metadata, "rating" , NULL, 0);
adding rating is a unrelated change
[...]
> asfdec.c | 14 +++++++++-----
> 1 file changed, 9 insertions(+), 5 deletions(-)
> 726ff0a32ad06db7fe8719e2e071b0763b1cc5ed 0005-asfdec-read-metadata-as-proper-UTF-16.patch
> From c8beff1c527f43fc4763c4a46db403df8f8da278 Mon Sep 17 00:00:00 2001
> From: Anton Khirnov <wyskas at gmail.com>
> Date: Sun, 21 Feb 2010 18:45:38 +0100
> Subject: [PATCH 5/5] asfdec: read metadata as proper UTF-16.
>
> ---
> libavformat/asfdec.c | 14 +++++++++-----
> 1 files changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
> index 3ccadeb..de48167 100644
> --- a/libavformat/asfdec.c
> +++ b/libavformat/asfdec.c
> @@ -122,9 +122,12 @@ static void get_str16(ByteIOContext *pb, char *buf, int buf_size)
> static void get_str16_nolen(ByteIOContext *pb, int len, char *buf, int buf_size)
> {
> char* q = buf;
> - for (; len > 1; len -= 2) {
> + while (len > 1) {
> uint8_t tmp;
> - PUT_UTF8(get_le16(pb), tmp, if (q - buf < buf_size - 1) *q++ = tmp;)
> + uint32_t ch;
> +
> + GET_UTF16(ch, (len -= 2) >= 0 ? get_le16(pb) : 0, break;)
> + PUT_UTF8(ch, tmp, if (q - buf < buf_size - 1) *q++ = tmp;)
> }
> if (len > 0)
> url_fskip(pb, len);
> @@ -157,12 +160,12 @@ static void get_tag(AVFormatContext *s, const char *key, int type, int len)
> if ((unsigned)len >= UINT_MAX)
> return;
>
> - value = av_malloc(len+1);
> + value = av_malloc(2*len+1);
> if (!value)
> return;
>
> if (type == 0) { // UTF16-LE
> - get_str16_nolen(s->pb, len, value, len);
> + get_str16_nolen(s->pb, len, value, 2*len + 1);
> } else if (type > 1 && type <= 5) { // boolean or DWORD or QWORD or WORD
> uint64_t num = get_value(s->pb, type);
> snprintf(value, len, "%"PRIu64, num);
> @@ -174,7 +177,8 @@ static void get_tag(AVFormatContext *s, const char *key, int type, int len)
> }
> if (!strncmp(key, "WM/", 3))
> key += 3;
> - av_metadata_set2(&s->metadata, key, value, AV_METADATA_DONT_STRDUP_VAL);
> + av_metadata_set2(&s->metadata, key, value, 0);
> + av_freep(&value);
why the strdup change?
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
The greatest way to live with honor in this world is to be what we pretend
to be. -- Socrates
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20100221/320ffc7e/attachment.pgp>
More information about the ffmpeg-devel
mailing list