[FFmpeg-devel] [PATCH v2 2/2] lavf/url: rewrite ff_make_absolute_url() using ff_url_decompose().

Nicolas George george at nsup.org
Wed Aug 5 19:49:57 EEST 2020


Also add and update some tests.

Change the semantic a little, because for filesytem paths
symlinks complicate things.
See the comments in the code for detail.

Signed-off-by: Nicolas George <george at nsup.org>
---
 libavformat/tests/url.c |  60 ++++++++-
 libavformat/url.c       | 261 ++++++++++++++++++++--------------------
 libavformat/url.h       |   4 +-
 tests/ref/fate/url      |  54 ++++++++-
 4 files changed, 245 insertions(+), 134 deletions(-)

diff --git a/libavformat/tests/url.c b/libavformat/tests/url.c
index e7d259ab7d..2440ae08bc 100644
--- a/libavformat/tests/url.c
+++ b/libavformat/tests/url.c
@@ -49,7 +49,13 @@ static void test_decompose(const char *url)
 static void test(const char *base, const char *rel)
 {
     char buf[200], buf2[200];
-    ff_make_absolute_url(buf, sizeof(buf), base, rel);
+    int ret;
+
+    ret = ff_make_absolute_url(buf, sizeof(buf), base, rel);
+    if (ret < 0) {
+        printf("%50s %-20s => error %s\n", base, rel, av_err2str(ret));
+        return;
+    }
     printf("%50s %-20s => %s\n", base, rel, buf);
     if (base) {
         /* Test in-buffer replacement */
@@ -104,6 +110,58 @@ int main(void)
     test("http://server/foo/bar", "/../../../../../other/url");
     test("http://server/foo/bar", "/test/../../../../../other/url");
     test("http://server/foo/bar", "/test/../../test/../../../other/url");
+    test("http://server/foo/bar", "file:../baz/qux");
+    test("http://server/foo//bar/", "../../");
+    test("file:../tmp/foo", "../bar/");
+    test("file:../tmp/foo", "file:../bar/");
+    test("http://server/foo/bar", "./");
+    test("http://server/foo/bar", ".dotfile");
+    test("http://server/foo/bar", "..doubledotfile");
+    test("http://server/foo/bar", "double..dotfile");
+    test("http://server/foo/bar", "doubledotfile..");
+
+    /* From https://tools.ietf.org/html/rfc3986#section-5.4 */
+    test("http://a/b/c/d;p?q", "g:h");           // g:h
+    test("http://a/b/c/d;p?q", "g");             // http://a/b/c/g
+    test("http://a/b/c/d;p?q", "./g");           // http://a/b/c/g
+    test("http://a/b/c/d;p?q", "g/");            // http://a/b/c/g/
+    test("http://a/b/c/d;p?q", "/g");            // http://a/g
+    test("http://a/b/c/d;p?q", "//g");           // http://g
+    test("http://a/b/c/d;p?q", "?y");            // http://a/b/c/d;p?y
+    test("http://a/b/c/d;p?q", "g?y");           // http://a/b/c/g?y
+    test("http://a/b/c/d;p?q", "#s");            // http://a/b/c/d;p?q#s
+    test("http://a/b/c/d;p?q", "g#s");           // http://a/b/c/g#s
+    test("http://a/b/c/d;p?q", "g?y#s");         // http://a/b/c/g?y#s
+    test("http://a/b/c/d;p?q", ";x");            // http://a/b/c/;x
+    test("http://a/b/c/d;p?q", "g;x");           // http://a/b/c/g;x
+    test("http://a/b/c/d;p?q", "g;x?y#s");       // http://a/b/c/g;x?y#s
+    test("http://a/b/c/d;p?q", "");              // http://a/b/c/d;p?q
+    test("http://a/b/c/d;p?q", ".");             // http://a/b/c/
+    test("http://a/b/c/d;p?q", "./");            // http://a/b/c/
+    test("http://a/b/c/d;p?q", "..");            // http://a/b/
+    test("http://a/b/c/d;p?q", "../");           // http://a/b/
+    test("http://a/b/c/d;p?q", "../g");          // http://a/b/g
+    test("http://a/b/c/d;p?q", "../..");         // http://a/
+    test("http://a/b/c/d;p?q", "../../");        // http://a/
+    test("http://a/b/c/d;p?q", "../../g");       // http://a/g
+    test("http://a/b/c/d;p?q", "../../../g");    // http://a/g
+    test("http://a/b/c/d;p?q", "../../../../g"); // http://a/g
+    test("http://a/b/c/d;p?q", "/./g");          // http://a/g
+    test("http://a/b/c/d;p?q", "/../g");         // http://a/g
+    test("http://a/b/c/d;p?q", "g.");            // http://a/b/c/g.
+    test("http://a/b/c/d;p?q", ".g");            // http://a/b/c/.g
+    test("http://a/b/c/d;p?q", "g..");           // http://a/b/c/g..
+    test("http://a/b/c/d;p?q", "..g");           // http://a/b/c/..g
+    test("http://a/b/c/d;p?q", "./../g");        // http://a/b/g
+    test("http://a/b/c/d;p?q", "./g/.");         // http://a/b/c/g/
+    test("http://a/b/c/d;p?q", "g/./h");         // http://a/b/c/g/h
+    test("http://a/b/c/d;p?q", "g/../h");        // http://a/b/c/h
+    test("http://a/b/c/d;p?q", "g;x=1/./y");     // http://a/b/c/g;x=1/y
+    test("http://a/b/c/d;p?q", "g;x=1/../y");    // http://a/b/c/y
+    test("http://a/b/c/d;p?q", "g?y/./x");       // http://a/b/c/g?y/./x
+    test("http://a/b/c/d;p?q", "g?y/../x");      // http://a/b/c/g?y/../x
+    test("http://a/b/c/d;p?q", "g#s/./x");       // http://a/b/c/g#s/./x
+    test("http://a/b/c/d;p?q", "g#s/../x");      // http://a/b/c/g#s/../x
 
     printf("\nTesting av_url_split:\n");
     test2("/foo/bar");
diff --git a/libavformat/url.c b/libavformat/url.c
index abdae49070..1b25169e91 100644
--- a/libavformat/url.c
+++ b/libavformat/url.c
@@ -27,6 +27,7 @@
 #if CONFIG_NETWORK
 #include "network.h"
 #endif
+#include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 
 /**
@@ -148,146 +149,148 @@ int ff_url_decompose(URLComponents *uc, const char *url, const char *end)
     return 0;
 }
 
-static void trim_double_dot_url(char *buf, const char *rel, int size)
+static int append_path(char *root, char *out_end, char **rout,
+                       const char *in, const char *in_end)
 {
-    const char *p = rel;
-    const char *root = rel;
-    char tmp_path[MAX_URL_SIZE] = {0, };
-    char *sep;
-    char *node;
-
-    /* Get the path root of the url which start by "://" */
-    if (p && (sep = strstr(p, "://"))) {
-        sep += 3;
-        root = strchr(sep, '/');
-        if (!root)
-            return;
-    }
-
-    /* set new current position if the root node is changed */
-    p = root;
-    while (p && (node = strstr(p, ".."))) {
-        av_strlcat(tmp_path, p, node - p + strlen(tmp_path));
-        p = node + 3;
-        sep = strrchr(tmp_path, '/');
-        if (sep)
-            sep[0] = '\0';
-        else
-            tmp_path[0] = '\0';
+    char *out = *rout;
+    const char *d, *next;
+
+    if (in < in_end && *in == '/')
+        in++; /* already taken care of */
+    while (in < in_end) {
+        d = find_delim("/", in, in_end);
+        next = d + (d < in_end && *d == '/');
+        if (d - in == 1 && in[0] == '.') {
+            /* skip */
+        } else if (d - in == 2 && in[0] == '.' && in[1] == '.') {
+            av_assert1(out[-1] == '/');
+            if (out - root > 1)
+                while (out > root && (--out)[-1] != '/');
+        } else {
+            if (out_end - out < next - in)
+                return AVERROR(ENOMEM);
+            memmove(out, in, next - in);
+            out += next - in;
+        }
+        in = next;
     }
-
-    if (!av_stristart(p, "/", NULL) && root != rel)
-        av_strlcat(tmp_path, "/", size);
-
-    av_strlcat(tmp_path, p, size);
-    /* start set buf after temp path process. */
-    av_strlcpy(buf, rel, root - rel + 1);
-
-    if (!av_stristart(tmp_path, "/", NULL) && root != rel)
-        av_strlcat(buf, "/", size);
-
-    av_strlcat(buf, tmp_path, size);
+    *rout = out;
+    return 0;
 }
 
-void ff_make_absolute_url(char *buf, int size, const char *base,
+int ff_make_absolute_url(char *buf, int size, const char *base,
                           const char *rel)
 {
-    char *sep, *path_query;
-    char *root, *p;
-    char tmp_path[MAX_URL_SIZE];
-
-    memset(tmp_path, 0, sizeof(tmp_path));
-    /* Absolute path, relative to the current server */
-    if (base && strstr(base, "://") && rel[0] == '/') {
-        if (base != buf)
-            av_strlcpy(buf, base, size);
-        sep = strstr(buf, "://");
-        if (sep) {
-            /* Take scheme from base url */
-            if (rel[1] == '/') {
-                sep[1] = '\0';
-            } else {
-                /* Take scheme and host from base url */
-                sep += 3;
-                sep = strchr(sep, '/');
-                if (sep)
-                    *sep = '\0';
-            }
-        }
-        av_strlcat(buf, rel, size);
-        trim_double_dot_url(tmp_path, buf, size);
-        memset(buf, 0, size);
-        av_strlcpy(buf, tmp_path, size);
-        return;
+    URLComponents ub, uc;
+    char *out, *out_end, *path;
+    const char *keep, *base_path_end;
+    int use_base_path, simplify_path = 0, ret;
+
+    /* This is tricky.
+       For HTTP, http://server/site/page + ../media/file
+       should resolve into http://server/media/file
+       but for filesystem access, dir/playlist + ../media/file
+       should resolve into dir/../media/file
+       because dir could be a symlink, and .. points to
+       the actual parent of the target directory.
+
+       We'll consider that URLs with an actual scheme and authority,
+       i.e. starting with scheme://, need parent dir simplification,
+       while bare paths or pseudo-URLs starting with proto: without
+       the double slash do not.
+
+       For real URLs, the processing is similar to the algorithm described
+       here:
+       https://tools.ietf.org/html/rfc3986#section-5
+     */
+
+    if (!size)
+        return AVERROR(ENOMEM);
+    out = buf;
+    out_end = buf + size - 1;
+
+    if (!base)
+        base = "";
+    if ((ret = ff_url_decompose(&ub, base, NULL) < 0) ||
+        (ret = ff_url_decompose(&uc, rel,  NULL) < 0))
+        goto error;
+
+    keep = ub.url;
+#define KEEP(component, also) do { \
+        if (uc.url_component_end_##component == uc.url && \
+            ub.url_component_end_##component > keep) { \
+            keep = ub.url_component_end_##component; \
+            also \
+        } \
+    } while (0)
+    KEEP(scheme, );
+    KEEP(authority_full, simplify_path = 1;);
+    KEEP(path,);
+    KEEP(query,);
+    KEEP(fragment,);
+#undef KEEP
+#define COPY(start, end) do { \
+        size_t len = end - start; \
+        if (len > out_end - out) { \
+            ret = AVERROR(ENOMEM);  \
+            goto error; \
+        } \
+        memmove(out, start, len); \
+        out += len; \
+    } while (0)
+    COPY(ub.url, keep);
+    COPY(uc.url, uc.path);
+    if (keep > ub.path)
+        simplify_path = 0;
+    if (URL_COMPONENT_HAVE(uc, scheme))
+        simplify_path = 0;
+    if (URL_COMPONENT_HAVE(uc, authority))
+        simplify_path = 1;
+
+    use_base_path = URL_COMPONENT_HAVE(ub, path) && keep <= ub.path;
+    if (uc.path > uc.url)
+        use_base_path = 0;
+    if (URL_COMPONENT_HAVE(uc, path) && uc.path[0] == '/')
+        use_base_path = 0;
+    if (use_base_path) {
+        base_path_end = ub.url_component_end_path;
+        if (URL_COMPONENT_HAVE(uc, path))
+            while (base_path_end > ub.path && base_path_end[-1] != '/')
+                base_path_end--;
     }
-    /* If rel actually is an absolute url, just copy it */
-    if (!base || strstr(rel, "://") || rel[0] == '/') {
-        memset(buf, 0, size);
-        trim_double_dot_url(buf, rel, size);
-        return;
-    }
-    if (base != buf)
-        av_strlcpy(buf, base, size);
-
-    /* Strip off any query string from base */
-    path_query = strchr(buf, '?');
-    if (path_query)
-        *path_query = '\0';
-
-    /* Is relative path just a new query part? */
-    if (rel[0] == '?') {
-        av_strlcat(buf, rel, size);
-        trim_double_dot_url(tmp_path, buf, size);
-        memset(buf, 0, size);
-        av_strlcpy(buf, tmp_path, size);
-        return;
-    }
-
-    root = p = buf;
-    /* Get the path root of the url which start by "://" */
-    if (p && strstr(p, "://")) {
-        sep = strstr(p, "://");
-        if (sep) {
-            sep += 3;
-            root = strchr(sep, '/');
-            if (!root)
-                return;
+    if (!use_base_path && !URL_COMPONENT_HAVE(uc, path))
+        simplify_path = 0;
+
+    if (simplify_path) {
+        const char *root = "/";
+        COPY(root, root + 1);
+        path = out;
+        if (use_base_path) {
+            ret = append_path(path, out_end, &out, ub.path, base_path_end);
+            if (ret < 0)
+                goto error;
         }
+        if (URL_COMPONENT_HAVE(uc, path)) {
+            ret = append_path(path, out_end, &out, uc.path, uc.url_component_end_path);
+            if (ret < 0)
+                goto error;
+        }
+    } else {
+        if (use_base_path)
+            COPY(ub.path, base_path_end);
+        COPY(uc.path, uc.url_component_end_path);
     }
 
-    /* Remove the file name from the base url */
-    sep = strrchr(buf, '/');
-    if (sep && sep <= root)
-        sep = root;
-
-    if (sep)
-        sep[1] = '\0';
-    else
-        buf[0] = '\0';
-    while (av_strstart(rel, "..", NULL) && sep) {
-        /* Remove the path delimiter at the end */
-        if (sep > root) {
-            sep[0] = '\0';
-            sep = strrchr(buf, '/');
-        }
+    COPY(uc.url_component_end_path, uc.end);
+#undef COPY
+    *out = 0;
+    return 0;
 
-        /* If the next directory name to pop off is "..", break here */
-        if (!strcmp(sep ? &sep[1] : buf, "..")) {
-            /* Readd the slash we just removed */
-            av_strlcat(buf, "/", size);
-            break;
-        }
-        /* Cut off the directory name */
-        if (sep)
-            sep[1] = '\0';
-        else
-            buf[0] = '\0';
-        rel += 3;
-    }
-    av_strlcat(buf, rel, size);
-    trim_double_dot_url(tmp_path, buf, size);
-    memset(buf, 0, size);
-    av_strlcpy(buf, tmp_path, size);
+error:
+    snprintf(buf, size, "invalid:%s",
+             ret == AVERROR(ENOMEM) ? "truncated" :
+             ret == AVERROR(EINVAL) ? "syntax_error" : "");
+    return ret;
 }
 
 AVIODirEntry *ff_alloc_dir_entry(void)
diff --git a/libavformat/url.h b/libavformat/url.h
index ae27da5c73..e33edf0cb9 100644
--- a/libavformat/url.h
+++ b/libavformat/url.h
@@ -312,8 +312,8 @@ int ff_url_join(char *str, int size, const char *proto,
  * @param base the base url, may be equal to buf.
  * @param rel the new url, which is interpreted relative to base
  */
-void ff_make_absolute_url(char *buf, int size, const char *base,
-                          const char *rel);
+int ff_make_absolute_url(char *buf, int size, const char *base,
+                         const char *rel);
 
 /**
  * Allocate directory entry with default values.
diff --git a/tests/ref/fate/url b/tests/ref/fate/url
index 84cf85abdd..7e6395c47b 100644
--- a/tests/ref/fate/url
+++ b/tests/ref/fate/url
@@ -46,9 +46,9 @@ http://[::1]:8080/dev/null =>
 Testing ff_make_absolute_url:
                                             (null) baz                  => baz
                                           /foo/bar baz                  => /foo/baz
-                                          /foo/bar ../baz               => /baz
+                                          /foo/bar ../baz               => /foo/../baz
                                           /foo/bar /baz                 => /baz
-                                          /foo/bar ../../../baz         => /baz
+                                          /foo/bar ../../../baz         => /foo/../../../baz
                                 http://server/foo/ baz                  => http://server/foo/baz
                              http://server/foo/bar baz                  => http://server/foo/baz
                                 http://server/foo/ ../baz               => http://server/baz
@@ -62,6 +62,56 @@ Testing ff_make_absolute_url:
                              http://server/foo/bar /../../../../../other/url => http://server/other/url
                              http://server/foo/bar /test/../../../../../other/url => http://server/other/url
                              http://server/foo/bar /test/../../test/../../../other/url => http://server/other/url
+                             http://server/foo/bar file:../baz/qux      => file:../baz/qux
+                           http://server/foo//bar/ ../../               => http://server/foo/
+                                   file:../tmp/foo ../bar/              => file:../tmp/../bar/
+                                   file:../tmp/foo file:../bar/         => file:../bar/
+                             http://server/foo/bar ./                   => http://server/foo/
+                             http://server/foo/bar .dotfile             => http://server/foo/.dotfile
+                             http://server/foo/bar ..doubledotfile      => http://server/foo/..doubledotfile
+                             http://server/foo/bar double..dotfile      => http://server/foo/double..dotfile
+                             http://server/foo/bar doubledotfile..      => http://server/foo/doubledotfile..
+                                http://a/b/c/d;p?q g:h                  => g:h
+                                http://a/b/c/d;p?q g                    => http://a/b/c/g
+                                http://a/b/c/d;p?q ./g                  => http://a/b/c/g
+                                http://a/b/c/d;p?q g/                   => http://a/b/c/g/
+                                http://a/b/c/d;p?q /g                   => http://a/g
+                                http://a/b/c/d;p?q //g                  => http://g
+                                http://a/b/c/d;p?q ?y                   => http://a/b/c/d;p?y
+                                http://a/b/c/d;p?q g?y                  => http://a/b/c/g?y
+                                http://a/b/c/d;p?q #s                   => http://a/b/c/d;p?q#s
+                                http://a/b/c/d;p?q g#s                  => http://a/b/c/g#s
+                                http://a/b/c/d;p?q g?y#s                => http://a/b/c/g?y#s
+                                http://a/b/c/d;p?q ;x                   => http://a/b/c/;x
+                                http://a/b/c/d;p?q g;x                  => http://a/b/c/g;x
+                                http://a/b/c/d;p?q g;x?y#s              => http://a/b/c/g;x?y#s
+                                http://a/b/c/d;p?q                      => http://a/b/c/d;p?q
+                                http://a/b/c/d;p?q .                    => http://a/b/c/
+                                http://a/b/c/d;p?q ./                   => http://a/b/c/
+                                http://a/b/c/d;p?q ..                   => http://a/b/
+                                http://a/b/c/d;p?q ../                  => http://a/b/
+                                http://a/b/c/d;p?q ../g                 => http://a/b/g
+                                http://a/b/c/d;p?q ../..                => http://a/
+                                http://a/b/c/d;p?q ../../               => http://a/
+                                http://a/b/c/d;p?q ../../g              => http://a/g
+                                http://a/b/c/d;p?q ../../../g           => http://a/g
+                                http://a/b/c/d;p?q ../../../../g        => http://a/g
+                                http://a/b/c/d;p?q /./g                 => http://a/g
+                                http://a/b/c/d;p?q /../g                => http://a/g
+                                http://a/b/c/d;p?q g.                   => http://a/b/c/g.
+                                http://a/b/c/d;p?q .g                   => http://a/b/c/.g
+                                http://a/b/c/d;p?q g..                  => http://a/b/c/g..
+                                http://a/b/c/d;p?q ..g                  => http://a/b/c/..g
+                                http://a/b/c/d;p?q ./../g               => http://a/b/g
+                                http://a/b/c/d;p?q ./g/.                => http://a/b/c/g/
+                                http://a/b/c/d;p?q g/./h                => http://a/b/c/g/h
+                                http://a/b/c/d;p?q g/../h               => http://a/b/c/h
+                                http://a/b/c/d;p?q g;x=1/./y            => http://a/b/c/g;x=1/y
+                                http://a/b/c/d;p?q g;x=1/../y           => http://a/b/c/y
+                                http://a/b/c/d;p?q g?y/./x              => http://a/b/c/g?y/./x
+                                http://a/b/c/d;p?q g?y/../x             => http://a/b/c/g?y/../x
+                                http://a/b/c/d;p?q g#s/./x              => http://a/b/c/g#s/./x
+                                http://a/b/c/d;p?q g#s/../x             => http://a/b/c/g#s/../x
 
 Testing av_url_split:
 /foo/bar                                                     =>                                                    -1 /foo/bar
-- 
2.27.0



More information about the ffmpeg-devel mailing list