"Fossies" - the Fresh Open Source Software Archive

Member "sitecopy-0.16.6/lib/neon/ne_uri.c" (5 Dec 2007, 17549 Bytes) of archive /linux/www/sitecopy-0.16.6.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ne_uri.c" see the Fossies "Dox" file reference documentation.

    1 /* 
    2    URI manipulation routines.
    3    Copyright (C) 1999-2006, Joe Orton <joe@manyfish.co.uk>
    4 
    5    This library is free software; you can redistribute it and/or
    6    modify it under the terms of the GNU Library General Public
    7    License as published by the Free Software Foundation; either
    8    version 2 of the License, or (at your option) any later version.
    9    
   10    This library is distributed in the hope that it will be useful,
   11    but WITHOUT ANY WARRANTY; without even the implied warranty of
   12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13    Library General Public License for more details.
   14 
   15    You should have received a copy of the GNU Library General Public
   16    License along with this library; if not, write to the Free
   17    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
   18    MA 02111-1307, USA
   19 
   20 */
   21 
   22 #include "config.h"
   23 
   24 #ifdef HAVE_STRING_H
   25 #include <string.h>
   26 #endif
   27 #ifdef HAVE_STRINGS_H
   28 #include <strings.h>
   29 #endif
   30 #ifdef HAVE_UNISTD_H
   31 #include <unistd.h>
   32 #endif
   33 #ifdef HAVE_STDLIB_H
   34 #include <stdlib.h>
   35 #endif
   36 
   37 #include <stdio.h>
   38 
   39 #include <ctype.h>
   40 
   41 #include "ne_string.h" /* for ne_buffer */
   42 #include "ne_alloc.h"
   43 #include "ne_uri.h"
   44 
   45 /* URI ABNF from RFC 3986: */
   46 
   47 #define PS (0x0001) /* "+" */
   48 #define PC (0x0002) /* "%" */
   49 #define DS (0x0004) /* "-" */
   50 #define DT (0x0008) /* "." */
   51 #define US (0x0010) /* "_" */
   52 #define TD (0x0020) /* "~" */
   53 #define FS (0x0040) /* "/" */
   54 #define CL (0x0080) /* ":" */
   55 #define AT (0x0100) /* "@" */
   56 #define QU (0x0200) /* "?" */
   57 
   58 #define DG (0x0400) /* DIGIT */
   59 #define AL (0x0800) /* ALPHA */
   60 
   61 #define GD (0x1000) /* gen-delims    = "#" / "[" / "]" 
   62                      * ... except ":", "/", "@", and "?" */
   63 
   64 #define SD (0x2000) /* sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
   65                      *               / "*" / "+" / "," / ";" / "=" 
   66                      * ... except "+" which is PS */
   67 
   68 #define OT (0x4000) /* others */
   69 
   70 #define URI_ALPHA (AL)
   71 #define URI_DIGIT (DG)
   72 
   73 /* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" */
   74 #define URI_UNRESERVED (AL | DG | DS | DT | US | TD)
   75 /* scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
   76 #define URI_SCHEME (AL | DG | PS | DS | DT)
   77 /* real sub-delims definition, including "+" */
   78 #define URI_SUBDELIM (PS | SD)
   79 /* real gen-delims definition, including ":", "/", "@" and "?" */
   80 #define URI_GENDELIM (GD | CL | FS | AT | QU)
   81 /* userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) */
   82 #define URI_USERINFO (URI_UNRESERVED | PC | URI_SUBDELIM | CL)
   83 /* pchar = unreserved / pct-encoded / sub-delims / ":" / "@" */
   84 #define URI_PCHAR (URI_UNRESERVED | PC | URI_SUBDELIM | CL | AT)
   85 /* invented: segchar = pchar / "/" */
   86 #define URI_SEGCHAR (URI_PCHAR | FS)
   87 /* query = *( pchar / "/" / "?" ) */
   88 #define URI_QUERY (URI_PCHAR | FS | QU)
   89 /* fragment == query */
   90 #define URI_FRAGMENT URI_QUERY
   91 
   92 /* any characters which should be path-escaped: */
   93 #define URI_ESCAPE ((URI_GENDELIM & ~(FS)) | URI_SUBDELIM | OT | PC)
   94 
   95 static const unsigned int uri_chars[256] = {
   96 /* 0xXX    x0      x2      x4      x6      x8      xA      xC      xE     */
   97 /*   0x */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT,
   98 /*   1x */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT,
   99 /*   2x */ OT, SD, OT, GD, SD, PC, SD, SD, SD, SD, SD, PS, SD, DS, DT, FS,
  100 /*   3x */ DG, DG, DG, DG, DG, DG, DG, DG, DG, DG, CL, SD, OT, SD, OT, QU,
  101 /*   4x */ AT, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL,
  102 /*   5x */ AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, GD, OT, GD, OT, US,
  103 /*   6x */ OT, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL,
  104 /*   7x */ AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, OT, OT, OT, TD, OT,
  105 /*   8x */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, 
  106 /*   9x */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, 
  107 /*   Ax */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, 
  108 /*   Bx */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, 
  109 /*   Cx */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, 
  110 /*   Dx */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, 
  111 /*   Ex */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, 
  112 /*   Fx */ OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT, OT
  113 };
  114 
  115 #define uri_lookup(ch) (uri_chars[(unsigned char)ch])
  116 
  117 char *ne_path_parent(const char *uri) 
  118 {
  119     size_t len = strlen(uri);
  120     const char *pnt = uri + len - 1;
  121     /* skip trailing slash (parent of "/foo/" is "/") */
  122     if (pnt >= uri && *pnt == '/')
  123     pnt--;
  124     /* find previous slash */
  125     while (pnt > uri && *pnt != '/')
  126     pnt--;
  127     if (pnt < uri || (pnt == uri && *pnt != '/'))
  128     return NULL;
  129     return ne_strndup(uri, pnt - uri + 1);
  130 }
  131 
  132 int ne_path_has_trailing_slash(const char *uri) 
  133 {
  134     size_t len = strlen(uri);
  135     return ((len > 0) &&
  136         (uri[len-1] == '/'));
  137 }
  138 
  139 unsigned int ne_uri_defaultport(const char *scheme)
  140 {
  141     /* RFC2616/3.2.3 says use case-insensitive comparisons here. */
  142     if (ne_strcasecmp(scheme, "http") == 0)
  143     return 80;
  144     else if (ne_strcasecmp(scheme, "https") == 0)
  145     return 443;
  146     else
  147     return 0;
  148 }
  149 
  150 int ne_uri_parse(const char *uri, ne_uri *parsed)
  151 {
  152     const char *p, *s;
  153 
  154     memset(parsed, 0, sizeof *parsed);
  155 
  156     p = s = uri;
  157 
  158     /* => s = p = URI-reference */
  159 
  160     if (uri_lookup(*p) & URI_ALPHA) {
  161         while (uri_lookup(*p) & URI_SCHEME)
  162             p++;
  163         
  164         if (*p == ':') {
  165             parsed->scheme = ne_strndup(uri, p - s);
  166             s = p + 1;
  167         }
  168     }
  169 
  170     /* => s = heir-part, or s = relative-part */
  171 
  172     if (s[0] == '/' && s[1] == '/') {
  173         const char *pa;
  174 
  175         /* => s = "//" authority path-abempty (from expansion of
  176          * either heir-part of relative-part)  */
  177         
  178         /* authority = [ userinfo "@" ] host [ ":" port ] */
  179 
  180         s = pa = s + 2; /* => s = authority */
  181 
  182         while (*pa != '/' && *pa != '\0')
  183             pa++;
  184         /* => pa = path-abempty */
  185         
  186         p = s;
  187         while (p < pa && uri_lookup(*p) & URI_USERINFO)
  188             p++;
  189 
  190         if (*p == '@') {
  191             parsed->userinfo = ne_strndup(s, p - s);
  192             s = p + 1;
  193         }
  194         /* => s = host */
  195 
  196         if (s[0] == '[') {
  197             p = s + 1;
  198 
  199             while (*p != ']' && p < pa)
  200                 p++;
  201 
  202             if (p == pa || (p + 1 != pa && p[1] != ':')) {
  203                 /* Ill-formed IP-literal. */
  204                 return -1;
  205             }
  206 
  207             p++; /* => p = colon */
  208         } else {
  209             /* Find the colon. */
  210             p = pa;
  211             while (*p != ':' && p > s)
  212                 p--;
  213         }
  214 
  215         if (p == s) {
  216             p = pa;
  217             /* No colon; => p = path-abempty */
  218         } else if (p + 1 != pa) {
  219             /* => p = colon */
  220             parsed->port = atoi(p + 1);
  221         }
  222         parsed->host = ne_strndup(s, p - s);
  223         
  224         s = pa;        
  225 
  226         if (*s == '\0') {
  227             s = "/"; /* FIXME: scheme-specific. */
  228         }
  229     }
  230 
  231     /* => s = path-abempty / path-absolute / path-rootless
  232      *      / path-empty / path-noscheme */
  233 
  234     p = s;
  235 
  236     while (uri_lookup(*p) & URI_SEGCHAR)
  237         p++;
  238 
  239     /* => p = [ "?" query ] [ "#" fragment ] */
  240 
  241     parsed->path = ne_strndup(s, p - s);
  242 
  243     if (*p != '\0') {
  244         s = p++;
  245 
  246         while (uri_lookup(*p) & URI_QUERY)
  247             p++;
  248 
  249         /* => p = [ "#" fragment ] */
  250         /* => s = [ "?" query ] [ "#" fragment ] */
  251 
  252         if (*s == '?') {
  253             parsed->query = ne_strndup(s + 1, p - s - 1);
  254             
  255             if (*p != '\0') {
  256                 s = p++;
  257 
  258                 while (uri_lookup(*p) & URI_FRAGMENT)
  259                     p++;
  260             }
  261         }
  262 
  263         /* => p now points to the next character after the
  264          * URI-reference; which should be the NUL byte. */
  265 
  266         if (*s == '#') {
  267             parsed->fragment = ne_strndup(s + 1, p - s - 1);
  268         }
  269         else if (*p || *s != '?') {
  270             return -1;
  271         }
  272     }
  273     
  274     return 0;
  275 }
  276 
  277 /* This function directly implements the "Merge Paths" algorithm
  278  * described in RFC 3986 section 5.2.3. */
  279 static char *merge_paths(const ne_uri *base, const char *path)
  280 {
  281     const char *p;
  282 
  283     if (base->host && base->path[0] == '\0') {
  284         return ne_concat("/", path, NULL);
  285     }
  286     
  287     p = strrchr(base->path, '/');
  288     if (p == NULL) {
  289         return ne_strdup(path);
  290     } else {
  291         size_t len = p - base->path + 1;
  292         char *ret = ne_malloc(strlen(path) + len + 1);
  293 
  294         memcpy(ret, base->path, len);
  295         memcpy(ret + len, path, strlen(path) + 1);
  296         return ret;
  297     }
  298 }
  299 
  300 /* This function directly implements the "Remove Dot Segments"
  301  * algorithm described in RFC 3986 section 5.2.4. */
  302 static char *remove_dot_segments(const char *path)
  303 {
  304     char *in, *inc, *out;
  305 
  306     inc = in = ne_strdup(path);
  307     out = ne_malloc(strlen(path) + 1);
  308     out[0] = '\0';
  309 
  310     while (in[0]) {
  311         /* case 2.A: */
  312         if (strncmp(in, "./", 2) == 0) {
  313             in += 2;
  314         } 
  315         else if (strncmp(in, "../", 3) == 0) {
  316             in += 3;
  317         }
  318 
  319         /* case 2.B: */
  320         else if (strncmp(in, "/./", 3) == 0) {
  321             in += 2;
  322         }
  323         else if (strcmp(in, "/.") == 0) {
  324             in[1] = '\0';
  325         }
  326 
  327         /* case 2.C: */
  328         else if (strncmp(in, "/../", 4) == 0 || strcmp(in, "/..") == 0) {
  329             char *p;
  330 
  331             /* Make the next character in the input buffer a "/": */
  332             if (in[3] == '\0') {
  333                 /* terminating "/.." case */
  334                 in += 2;
  335                 in[0] = '/';
  336             } else {
  337                 /* "/../" prefix case */
  338                 in += 3;
  339             }
  340 
  341             /* Trim the last component from the output buffer, or
  342              * empty it. */
  343             p = strrchr(out, '/');
  344             if (p) {
  345                 *p = '\0';
  346             } else {
  347                 out[0] = '\0';
  348             }
  349         }
  350 
  351         /* case 2.D: */
  352         else if (strcmp(in, ".") == 0 || strcmp(in, "..") == 0) {
  353             in[0] = '\0';
  354         }
  355 
  356         /* case 2.E */
  357         else {
  358             char *p;
  359 
  360             /* Search for the *second* "/" if the leading character is
  361              * already "/": */
  362             p = strchr(in + (in[0] == '/'), '/');
  363             /* Otherwise, copy the whole string */
  364             if (p == NULL) p = strchr(in, '\0');
  365 
  366             strncat(out, in, p - in);
  367             in = p;
  368         }
  369     }
  370 
  371     ne_free(inc);
  372 
  373     return out;
  374 }
  375 
  376 /* Copy authority components from 'src' to 'dest' if defined. */
  377 static void copy_authority(ne_uri *dest, const ne_uri *src)
  378 {
  379     if (src->host) dest->host = ne_strdup(src->host);
  380     dest->port = src->port;
  381     if (src->userinfo) dest->userinfo = ne_strdup(src->userinfo);
  382 }
  383 
  384 /* This function directly implements the "Transform References"
  385  * algorithm described in RFC 3986 section 5.2.2. */
  386 ne_uri *ne_uri_resolve(const ne_uri *base, const ne_uri *relative,
  387                        ne_uri *target)
  388 {
  389     memset(target, 0, sizeof *target);
  390 
  391     if (relative->scheme) {
  392         target->scheme = ne_strdup(relative->scheme);
  393         copy_authority(target, relative);
  394         target->path = remove_dot_segments(relative->path);
  395         if (relative->query) target->query = ne_strdup(relative->query);
  396     } else {
  397         if (relative->host) {
  398             copy_authority(target, relative);
  399             target->path = remove_dot_segments(relative->path);
  400             if (relative->query) target->query = ne_strdup(relative->query);
  401         } else {
  402             if (relative->path[0] == '\0') {
  403                 target->path = ne_strdup(base->path);
  404                 if (relative->query) {
  405                     target->query = ne_strdup(relative->query);
  406                 } else if (base->query) {
  407                     target->query = ne_strdup(base->query);
  408                 }
  409             } else {
  410                 if (relative->path[0] == '/') {
  411                     target->path = remove_dot_segments(relative->path);
  412                 } else {
  413                     char *merged = merge_paths(base, relative->path);
  414                     target->path = remove_dot_segments(merged);
  415                     ne_free(merged);
  416                 }
  417                 if (relative->query) target->query = ne_strdup(relative->query);
  418             }
  419             copy_authority(target, base);
  420         }
  421         if (base->scheme) target->scheme = ne_strdup(base->scheme);
  422     }
  423     
  424     if (relative->fragment) target->fragment = ne_strdup(relative->fragment);
  425 
  426     return target;
  427 }
  428 
  429 ne_uri *ne_uri_copy(ne_uri *dest, const ne_uri *src)
  430 {
  431     memset(dest, 0, sizeof *dest);
  432 
  433     if (src->scheme) dest->scheme = ne_strdup(src->scheme);
  434     copy_authority(dest, src);
  435     if (src->path) dest->path = ne_strdup(src->path);
  436     if (src->query) dest->query = ne_strdup(src->query);
  437     if (src->fragment) dest->fragment = ne_strdup(src->fragment);
  438 
  439     return dest;
  440 }
  441 
  442 void ne_uri_free(ne_uri *u)
  443 {
  444     if (u->host) ne_free(u->host);
  445     if (u->path) ne_free(u->path);
  446     if (u->scheme) ne_free(u->scheme);
  447     if (u->userinfo) ne_free(u->userinfo);
  448     if (u->fragment) ne_free(u->fragment);
  449     if (u->query) ne_free(u->query);
  450     memset(u, 0, sizeof *u);
  451 }
  452 
  453 char *ne_path_unescape(const char *uri) 
  454 {
  455     const char *pnt;
  456     char *ret, *retpos, buf[5] = { "0x00" };
  457     retpos = ret = ne_malloc(strlen(uri) + 1);
  458     for (pnt = uri; *pnt != '\0'; pnt++) {
  459     if (*pnt == '%') {
  460         if (!isxdigit((unsigned char) pnt[1]) || 
  461         !isxdigit((unsigned char) pnt[2])) {
  462         /* Invalid URI */
  463                 ne_free(ret);
  464         return NULL;
  465         }
  466         buf[2] = *++pnt; buf[3] = *++pnt; /* bit faster than memcpy */
  467         *retpos++ = (char)strtol(buf, NULL, 16);
  468     } else {
  469         *retpos++ = *pnt;
  470     }
  471     }
  472     *retpos = '\0';
  473     return ret;
  474 }
  475 
  476 /* CH must be an unsigned char; evaluates to 1 if CH should be
  477  * percent-encoded. */
  478 #define path_escape_ch(ch) (uri_lookup(ch) & URI_ESCAPE)
  479 
  480 char *ne_path_escape(const char *path) 
  481 {
  482     const unsigned char *pnt;
  483     char *ret, *p;
  484     size_t count = 0;
  485 
  486     for (pnt = (const unsigned char *)path; *pnt != '\0'; pnt++) {
  487         count += path_escape_ch(*pnt);
  488     }
  489 
  490     if (count == 0) {
  491     return ne_strdup(path);
  492     }
  493 
  494     p = ret = ne_malloc(strlen(path) + 2 * count + 1);
  495     for (pnt = (const unsigned char *)path; *pnt != '\0'; pnt++) {
  496     if (path_escape_ch(*pnt)) {
  497         /* Escape it - %<hex><hex> */
  498         sprintf(p, "%%%02x", (unsigned char) *pnt);
  499         p += 3;
  500     } else {
  501         *p++ = *pnt;
  502     }
  503     }
  504     *p = '\0';
  505     return ret;
  506 }
  507 
  508 #undef path_escape_ch
  509 
  510 #define CMPWITH(field, func)                    \
  511     do {                                        \
  512         if (u1->field) {                        \
  513             if (!u2->field) return -1;          \
  514             n = func(u1->field, u2->field);     \
  515             if (n) return n;                    \
  516         } else if (u2->field) {                 \
  517             return 1;                           \
  518         }                                       \
  519     } while (0)
  520 
  521 #define CMP(field) CMPWITH(field, strcmp)
  522 #define CASECMP(field) CMPWITH(field, ne_strcasecmp)
  523 
  524 /* As specified by RFC 2616, section 3.2.3. */
  525 int ne_uri_cmp(const ne_uri *u1, const ne_uri *u2)
  526 {
  527     int n;
  528     
  529     CMP(path);
  530     CASECMP(host);
  531     CASECMP(scheme);
  532     CMP(query);
  533     CMP(fragment);
  534     CMP(userinfo);
  535 
  536     return u2->port - u1->port;
  537 }
  538 
  539 #undef CMP
  540 #undef CASECMP
  541 #undef CMPWITH
  542 
  543 /* TODO: implement properly */
  544 int ne_path_compare(const char *a, const char *b) 
  545 {
  546     int ret = ne_strcasecmp(a, b);
  547     if (ret) {
  548     /* This logic says: "If the lengths of the two URIs differ by
  549      * exactly one, and the LONGER of the two URIs has a trailing
  550      * slash and the SHORTER one DOESN'T, then..." */
  551     int traila = ne_path_has_trailing_slash(a),
  552         trailb = ne_path_has_trailing_slash(b),
  553         lena = strlen(a), lenb = strlen(b);
  554     if (traila != trailb && abs(lena - lenb) == 1 &&
  555         ((traila && lena > lenb) || (trailb && lenb > lena))) {
  556         /* Compare them, ignoring the trailing slash on the longer
  557          * URI */
  558         if (strncasecmp(a, b, lena < lenb ? lena : lenb) == 0)
  559         ret = 0;
  560     }
  561     }
  562     return ret;
  563 }
  564 
  565 char *ne_uri_unparse(const ne_uri *uri)
  566 {
  567     ne_buffer *buf = ne_buffer_create();
  568 
  569     if (uri->scheme) {
  570         ne_buffer_concat(buf, uri->scheme, ":", NULL);
  571     }
  572 
  573     if (uri->host) {
  574         ne_buffer_czappend(buf, "//");
  575         if (uri->userinfo) {
  576             ne_buffer_concat(buf, uri->userinfo, "@", NULL);
  577         }
  578         ne_buffer_zappend(buf, uri->host);
  579         
  580         if (uri->port > 0
  581             && (!uri->scheme 
  582                 || ne_uri_defaultport(uri->scheme) != uri->port)) {
  583             char str[20];
  584             ne_snprintf(str, 20, ":%d", uri->port);
  585             ne_buffer_zappend(buf, str);
  586         }
  587     }
  588 
  589     ne_buffer_zappend(buf, uri->path);
  590 
  591     if (uri->query) {
  592         ne_buffer_concat(buf, "?", uri->query, NULL);
  593     }
  594     
  595     if (uri->fragment) {
  596         ne_buffer_concat(buf, "#", uri->fragment, NULL);
  597     }
  598 
  599     return ne_buffer_finish(buf);
  600 }
  601 
  602 /* Give it a path segment, it returns non-zero if child is 
  603  * a child of parent. */
  604 int ne_path_childof(const char *parent, const char *child) 
  605 {
  606     char *root = ne_strdup(child);
  607     int ret;
  608     if (strlen(parent) >= strlen(child)) {
  609     ret = 0;
  610     } else {
  611     /* root is the first of child, equal to length of parent */
  612     root[strlen(parent)] = '\0';
  613     ret = (ne_path_compare(parent, root) == 0);
  614     }
  615     ne_free(root);
  616     return ret;
  617 }