[MPlayer-dev-eng] [PATCH] handling UTF-8 mms urls

Fengguang Wu fengguang.wu at gmail.com
Thu Jun 22 05:24:23 CEST 2006


Greetings,

The attached patch enables mplayer to handle UTF-8 mms:// URLs.

It was written under the guidance of reviews on the previous
setlocale patch.

It has been tested ok in my daily use. Please consider applying it,
thanks.

Fengguang Wu
-------------- next part --------------
Index: libmpdemux/asf_mmst_streaming.c
===================================================================
--- libmpdemux/asf_mmst_streaming.c	(revision 18766)
+++ libmpdemux/asf_mmst_streaming.c	(working copy)
@@ -25,17 +25,6 @@
 #include <winsock2.h>
 #endif
 
-#ifndef USE_SETLOCALE
-#undef USE_ICONV
-#endif
-
-#ifdef USE_ICONV
-#include <iconv.h>
-#ifdef USE_LANGINFO
-#include <langinfo.h>
-#endif
-#endif
-
 #include "url.h"
 #include "asf.h"
 
@@ -119,40 +108,75 @@
   }
 }
 
-#ifdef USE_ICONV
-static iconv_t url_conv;
-#endif
+static char * utf8to16(char *dest, char *src, int len)
+{
+  unsigned cc;
+  unsigned mask;
+  int n;
+  char *end = src + len;
 
-static void string_utf16(char *dest, char *src, int len) {
-    int i;
-#ifdef USE_ICONV
-    size_t len1, len2;
-    char *ip, *op;
+  while(src < end) {
+    /* first byte */
+    cc = *src++;
 
-    if (url_conv != (iconv_t)(-1))
-    {
-    memset(dest, 0, 1000);
-    len1 = len; len2 = 1000;
-    ip = src; op = dest;
+    for (n = 0, mask = 0x80;
+         cc & mask;
+         n++, mask >>= 1)
+      ;
 
-    iconv(url_conv, &ip, &len1, &op, &len2);
-    }
-    else
-    {
-#endif
-	if (len > 499) len = 499;
-	for (i=0; i<len; i++) {
-	    dest[i*2] = src[i];
-	    dest[i*2+1] = 0;
-        }
-	/* trailing zeroes */
-	dest[i*2] = 0;
-	dest[i*2+1] = 0;
-#ifdef USE_ICONV
-    }
-#endif
+    /* n is number of leading '1' bits */
+    if (n == 1 || n > 6)
+      goto not_utf8;
+
+    /* n = number of following bytes */
+    if (n)
+      n--;
+
+    if (src + n > end)
+      goto not_utf8;
+
+    /* following bytes */
+    for (cc &= mask - 1;
+         n && (*src & 0xc0) == 0x80;
+         n--, src++)
+      cc = (cc << 6) | (*src & 0x3f);
+
+    if (n)
+      goto not_utf8;
+
+    if (cc < 0x10000) {
+      *dest++ = cc;
+      *dest++ = cc >> 8;
+    } else if (cc < 0x10ffff) {
+      unsigned w1, w2;
+      cc -= 0x10000;
+      w1 = 0xd800 | (cc >> 10);
+      w2 = 0xdc00 | (cc & 0x3ff);
+      *dest++ = w1;
+      *dest++ = w1 >> 8;
+      *dest++ = w2;
+      *dest++ = w2 >> 8;
+    } else {
+      fprintf(stderr, "string cannot be UTF-16 encoded!\n");	    
+    }	    
+  }
+  return dest;
+not_utf8:
+  return 0;
 }
 
+static int string_utf16(char *dest, char *src, int len)
+{
+  char *end = utf8to16(dest, src, len);
+
+  if (end)
+    return end - dest;
+
+  /* TODO: try local charset again. */
+  fprintf(stderr, "utf8to16(%s, %d) failed.\n", src, len);
+  return 0;
+}
+
 static void get_answer (int s) 
 {
   char  data[BUF_SIZE];
@@ -550,19 +574,10 @@
   * cmd 1 0x01 
   * */
 
-  /* prepare for the url encoding conversion */
-#ifdef USE_ICONV
-#ifdef USE_LANGINFO
-  url_conv = iconv_open("UTF-16LE",nl_langinfo(CODESET));
-#else
-  url_conv = iconv_open("UTF-16LE", NULL);
-#endif
-#endif
-
   snprintf (str, 1023, "\034\003NSPlayer/7.0.0.1956; {33715801-BAB3-9D85-24E9-03B90328270A}; Host: %s", url1->hostname);
-  string_utf16 (data, str, strlen(str));
+  len = string_utf16 (data, str, strlen(str));
 // send_command(s, commandno ....)
-  send_command (s, 1, 0, 0x0004000b, strlen(str)*2+2, data);
+  send_command (s, 1, 0, 0x0004000b, len, data);
 
   len = recv (s, data, BUF_SIZE, 0) ;
 
@@ -574,18 +589,23 @@
   * cmd 2 0x02
   *  */
 
-  string_utf16 (&data[8], "\002\000\\\\192.168.0.1\\TCP\\1037", 24);
+  len = string_utf16 (&data[8], "\002\000\\\\192.168.0.1\\TCP\\1037", 24);
   memset (data, 0, 8);
-  send_command (s, 2, 0, 0, 24*2+10, data);
+  /*
+   * Note: 9 or 10 are tested ok, so taking the orignal len+10.
+   * But why does len+8 not work here?
+   */
+  send_command (s, 2, 0, 0, len + 10, data);
 
   len = recv (s, data, BUF_SIZE, 0) ;
 
   /* This command sends file path (at server) and file name request to the server.
   * 0x5 */
 
-  string_utf16 (&data[8], path, strlen(path));
+  len = string_utf16 (&data[8], path, strlen(path));
   memset (data, 0, 8);
-  send_command (s, 5, 0, 0, strlen(path)*2+10, data);
+  /* Note: the original len+10 works only for _some_ files. */
+  send_command (s, 5, 0, 0, len + 8, data);
   free(path);
 
   get_answer (s);
@@ -656,10 +676,5 @@
   packet_length1 = packet_length;
   mp_msg(MSGT_NETWORK,MSGL_INFO,"mmst packet_length = %d\n", packet_length);
 
-#ifdef USE_ICONV
-  if (url_conv != (iconv_t)(-1))
-    iconv_close(url_conv);
-#endif
-
   return 0;
 }


More information about the MPlayer-dev-eng mailing list