[MPlayer-dev-eng] [PATCH] Parsing simplification in vobsub

ubitux ubitux at gmail.com
Wed Apr 14 00:27:53 CEST 2010


On Tue, Apr 13, 2010 at 10:46:10PM +0200, Reimar Döffinger wrote:
> It mixes too many different things, particularly the lang/id changes should
> be separate, reviewing the in one bunch is too error-prone.

Ok, here is 3 patches: one for the timestamp, one for the parse origin and
the last one for id parsing. Each patch has be written in standalone, the
order may not matter.

Also note that the third patch is a bit bigger than the other because I
needed to remove the allocation in order to perform a simpler parsing.

> Also moving all the comments before the function does not really belong in  this
> patch.

Fixed.

> > +/* id: xx, index: n */
> >  static int vobsub_parse_id(vobsub_t *vob, const char *line)
> >  {
> > -    // id: xx, index: n
> > -    size_t idlen;
> > -    const char *p, *q;
> > -    p  = line;
> > -    while (isspace(*p))
> > -        ++p;
> > -    q = p;
> > -    while (isalpha(*q))
> > -        ++q;
> > -    idlen = q - p;
> > -    if (idlen == 0)
> > +    char id[3];
> > +    unsigned int index;
> > +
> > +    if (sscanf(line, " %2s, index: %d", id, &index) != 2)
> >          return -1;
> > -    ++q;
> > -    while (isspace(*q))
> > -        ++q;
> > -    if (strncmp("index:", q, 6))
> > -        return -1;
> > -    q += 6;
> > -    while (isspace(*q))
> > -        ++q;
> > -    if (!isdigit(*q))
> > -        return -1;
> > -    return vobsub_add_id(vob, p, idlen, atoi(q));
> > +    return vobsub_add_id(vob, id, index);
> 
> This does more than just simplify the code, the old code did not
> require a comma, any non-alpha character was accepted.

Yes but it seems to be mandatory. It's like the timestamp/filepos comma
separator. I just checked at Guliverkli/VSFilter source and they do the
same.

> And unless vobsub_add_id had a check for it, it only required
> the id to have length > 0, but not to be exactly 2.

Vobsub can't have id length > 2, it's two reserved bytes. Also, I can't
imagine a lang with a single character, I don't think it is allowed.

> > +/* timestamp: HH:MM:SS.mmm, filepos: 0nnnnnnnnn */
> >  static int vobsub_parse_timestamp(vobsub_t *vob, const char *line)
> >  {
> > -    // timestamp: HH:MM:SS.mmm, filepos: 0nnnnnnnnn
> > -    const char *p;
> >      int h, m, s, ms;
> >      off_t filepos;
> > -    while (isspace(*line))
> > -        ++line;
> > -    p = line;
> > -    while (isdigit(*p))
> > -        ++p;
> > -    if (p - line != 2)
> > +
> > +    if (sscanf(line, " %d:%d:%d:%d , filepos: %lx ", &h, &m, &s, &ms, &filepos) != 5)
> 
> Doesn't match either, old code required time values to have exactly length
> 2, 2, 2 and 3, no whitespace was allowed before the ",", an %lx
> is not generally the correct format specified for off_t (you could just
> use uint64_t and PRIx64).

I added the length limits. It does not check if it's below, but is it a
real problem to tolerate no zero padding?

Allowed whitespaces before the comma is now fixed.

About the specified format, you want me to cast the filepos?

> And the terminating whitespace at best just wastes memory.

Oups, fixed.

> > +/* org: X, Y */
> >  static int vobsub_parse_origin(vobsub_t *vob, const char *line)
> >  {
> > -    // org: X,Y
> > -    char *p;
> > -    while (isspace(*line))
> > -        ++line;
> > -    if (!isdigit(*line))
> > -        return -1;
> > -    vob->origin_x = strtoul(line, &p, 10);
> > -    if (*p != ',')
> > -        return -1;
> > -    ++p;
> > -    vob->origin_y = strtoul(p, NULL, 10);
> > -    return 0;
> > +    unsigned int x, y;
> > +
> > +    if (sscanf(line, " %d , %d", &x, &y) == 2) {
> 
> No whitespace was allowed before the comma originally.

Fixed.

-- 
ubitux
-------------- next part --------------
Index: vobsub.c
===================================================================
--- vobsub.c	(revision 31036)
+++ vobsub.c	(working copy)
@@ -695,54 +695,10 @@
 static int vobsub_parse_timestamp(vobsub_t *vob, const char *line)
 {
     // timestamp: HH:MM:SS.mmm, filepos: 0nnnnnnnnn
-    const char *p;
     int h, m, s, ms;
     off_t filepos;
-    while (isspace(*line))
-        ++line;
-    p = line;
-    while (isdigit(*p))
-        ++p;
-    if (p - line != 2)
+    if (sscanf(line, " %02d:%02d:%02d:%03d, filepos: %lx", &h, &m, &s, &ms, &filepos) != 5)
         return -1;
-    h = atoi(line);
-    if (*p != ':')
-        return -1;
-    line = ++p;
-    while (isdigit(*p))
-        ++p;
-    if (p - line != 2)
-        return -1;
-    m = atoi(line);
-    if (*p != ':')
-        return -1;
-    line = ++p;
-    while (isdigit(*p))
-        ++p;
-    if (p - line != 2)
-        return -1;
-    s = atoi(line);
-    if (*p != ':')
-        return -1;
-    line = ++p;
-    while (isdigit(*p))
-        ++p;
-    if (p - line != 3)
-        return -1;
-    ms = atoi(line);
-    if (*p != ',')
-        return -1;
-    line = p + 1;
-    while (isspace(*line))
-        ++line;
-    if (strncmp("filepos:", line, 8))
-        return -1;
-    line += 8;
-    while (isspace(*line))
-        ++line;
-    if (! isxdigit(*line))
-        return -1;
-    filepos = strtol(line, NULL, 16);
     return vobsub_add_timestamp(vob, filepos, vob->delay + ms + 1000 * (s + 60 * (m + 60 * h)));
 }
 
-------------- next part --------------
Index: vobsub.c
===================================================================
--- vobsub.c	(revision 31036)
+++ vobsub.c	(working copy)
@@ -749,17 +749,14 @@
 static int vobsub_parse_origin(vobsub_t *vob, const char *line)
 {
     // org: X,Y
-    char *p;
-    while (isspace(*line))
-        ++line;
-    if (!isdigit(*line))
-        return -1;
-    vob->origin_x = strtoul(line, &p, 10);
-    if (*p != ',')
-        return -1;
-    ++p;
-    vob->origin_y = strtoul(p, NULL, 10);
-    return 0;
+    unsigned int x, y;
+
+    if (sscanf(line, " %d, %d", &x, &y) == 2) {
+        vob->origin_x = x;
+        vob->origin_y = y;
+        return 0;
+    }
+    return -1;
 }
 
 unsigned int vobsub_palette_to_yuv(unsigned int pal)
-------------- next part --------------
Index: vobsub.c
===================================================================
--- vobsub.c	(revision 31036)
+++ vobsub.c	(working copy)
@@ -484,7 +484,7 @@
 } packet_t;
 
 typedef struct {
-    char *id;
+    char id[3];
     packet_t *packets;
     unsigned int packets_reserve;
     unsigned int packets_size;
@@ -507,7 +507,7 @@
 
 static void packet_queue_construct(packet_queue_t *queue)
 {
-    queue->id = NULL;
+    strcpy(queue->id, "xx");
     queue->packets = NULL;
     queue->packets_reserve = 0;
     queue->packets_size = 0;
@@ -622,26 +622,14 @@
     return 0;
 }
 
-static int vobsub_add_id(vobsub_t *vob, const char *id, size_t idlen,
-                         const unsigned int index)
+static int vobsub_add_id(vobsub_t *vob, const char *id, const unsigned int index)
 {
     if (vobsub_ensure_spu_stream(vob, index) < 0)
         return -1;
-    if (id && idlen) {
-        if (vob->spu_streams[index].id)
-            free(vob->spu_streams[index].id);
-        vob->spu_streams[index].id = malloc(idlen + 1);
-        if (vob->spu_streams[index].id == NULL) {
-            mp_msg(MSGT_VOBSUB, MSGL_FATAL, "vobsub_add_id: malloc failure");
-            return -1;
-        }
-        vob->spu_streams[index].id[idlen] = 0;
-        memcpy(vob->spu_streams[index].id, id, idlen);
-    }
+    strcpy(vob->spu_streams[index].id, id);
     vob->spu_streams_current = index;
     mp_msg(MSGT_IDENTIFY, MSGL_INFO, "ID_VOBSUB_ID=%d\n", index);
-    if (id && idlen)
-        mp_msg(MSGT_IDENTIFY, MSGL_INFO, "ID_VSID_%d_LANG=%s\n", index, vob->spu_streams[index].id);
+    mp_msg(MSGT_IDENTIFY, MSGL_INFO, "ID_VSID_%d_LANG=%s\n", index, vob->spu_streams[index].id);
     mp_msg(MSGT_VOBSUB, MSGL_V, "[vobsub] subtitle (vobsubid): %d language %s\n",
            index, vob->spu_streams[index].id);
     return 0;
@@ -668,28 +656,12 @@
 static int vobsub_parse_id(vobsub_t *vob, const char *line)
 {
     // id: xx, index: n
-    size_t idlen;
-    const char *p, *q;
-    p  = line;
-    while (isspace(*p))
-        ++p;
-    q = p;
-    while (isalpha(*q))
-        ++q;
-    idlen = q - p;
-    if (idlen == 0)
+    char id[3];
+    unsigned int index;
+
+    if (sscanf(line, " %2s, index: %d", id, &index) != 2)
         return -1;
-    ++q;
-    while (isspace(*q))
-        ++q;
-    if (strncmp("index:", q, 6))
-        return -1;
-    q += 6;
-    while (isspace(*q))
-        ++q;
-    if (!isdigit(*q))
-        return -1;
-    return vobsub_add_id(vob, p, idlen, atoi(q));
+    return vobsub_add_id(vob, id, index);
 }
 
 static int vobsub_parse_timestamp(vobsub_t *vob, const char *line)
@@ -1112,7 +1084,7 @@
     vobsub_t *vob= (vobsub_t *) vobhandle;
     while (lang && strlen(lang) >= 2) {
         for (i = 0; i < vob->spu_streams_size; i++)
-            if (vob->spu_streams[i].id)
+            if (strcmp(vob->spu_streams[i].id, "xx") != 0)
                 if ((strncmp(vob->spu_streams[i].id, lang, 2) == 0)) {
                     vobsub_id = i;
                     mp_msg(MSGT_VOBSUB, MSGL_INFO, "Selected VOBSUB language: %d language: %s\n", i, vob->spu_streams[i].id);


More information about the MPlayer-dev-eng mailing list