"Fossies" - the Fresh Open Source Software Archive

Member "srg-1.3.6/src/parseURL.cc" (5 Aug 2009, 10255 Bytes) of package /linux/privat/old/srg-1.3.6.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 /*
    2     SRG - Squid Report Generator
    3     Copyright 2005 University of Waikato
    4 
    5     This file is part of SRG.
    6 
    7     SRG is free software; you can redistribute it and/or modify
    8     it under the terms of the GNU General Public License as published by
    9     the Free Software Foundation; either version 2 of the License, or
   10     (at your option) any later version.
   11 
   12     SRG is distributed in the hope that it will be useful,
   13     but WITHOUT ANY WARRANTY; without even the implied warranty of
   14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   15     GNU General Public License for more details.
   16 
   17     You should have received a copy of the GNU General Public License
   18     along with SRG; if not, write to the Free Software
   19     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
   20 
   21 */
   22 
   23 #include "srg.h"
   24 
   25 #define STATE_INIT 0
   26 #define STATE_COLON 1
   27 #define STATE_PROTOCOL 2
   28 #define STATE_PASSPORT 3
   29 #define STATE_USERSITE 4
   30 #define STATE_SITE 5
   31 #define STATE_PORT 6
   32 #define STATE_END 7
   33 
   34 /* Parses the specified URL and fills the structure with the different parts
   35  * return 0 on success or non-zero otherwise */
   36 int parseURL(const char*URL, url_request *req) {
   37 
   38     char *start = strdup(URL);
   39     char *tmp = start;
   40     char *t1 = NULL;
   41     char *t2 = NULL;
   42     unsigned int state = STATE_INIT;
   43     
   44     /* Initialise the return structure */
   45     req->protocol = NULL;
   46     req->user = NULL;
   47     req->password = NULL;
   48     req->site = NULL;
   49     req->port = NULL;
   50     req->location = NULL;
   51     
   52     while (*tmp != '\0') {
   53         switch (state) {
   54         case STATE_INIT:
   55             if (*tmp == ':') {
   56                 /* Cannot Start with : */
   57                 if (!t1) {
   58                     free(start);
   59                     return -1;
   60                 }
   61                 *tmp = '\0';
   62                 t1 = start;
   63                 state = STATE_COLON;
   64             } else if (*tmp == '/') {
   65                 /* Cannot Start with / */
   66                 if (!t1) {
   67                     free(start);
   68                     return -1;
   69                 }
   70                 *tmp = '\0';
   71                 req->site = strdup(start);
   72                 t1 = NULL;
   73                 /* Get the location also */
   74                 *tmp = '/';
   75                 req->location = strdup(tmp);
   76                 state = STATE_END;
   77             } else {
   78                 /* Temporary pointer */
   79                 t1 = tmp;           
   80             }
   81             break;
   82         case STATE_COLON:
   83             if (*tmp == '/') {
   84                 state = STATE_PROTOCOL;
   85             } else if(*tmp == '@') {
   86                 /* user:@ (empty password!) */
   87                 *tmp = '\0';
   88                 req->user = strdup(start);
   89                 req->password = strdup("\0");
   90                 t1 = NULL;
   91                 state = STATE_SITE;
   92             } else {
   93                 /* No Protocol, could be password or port */
   94                 t2 = tmp;
   95                 state = STATE_PASSPORT;
   96             } 
   97             break;
   98         case STATE_PROTOCOL:
   99             if (*tmp == '/') {
  100                 /* Valid protocol found */
  101                 req->protocol = strdup(start);
  102                 t1 = NULL;
  103                 /* Could not have a user or a site */
  104                 state = STATE_USERSITE;
  105             } else {
  106                 /* :/ is not a valid string in a URL */
  107                 free(start);
  108                 return -1;
  109             }
  110             break;
  111         case STATE_USERSITE:
  112             if (*tmp == ':') {
  113                 /* Cannot Start with : */
  114                 if (!t1) {
  115                     free(start);
  116                     return -1;
  117                 }
  118                 *tmp = '\0';
  119                 state = STATE_PASSPORT;     
  120             } else if (*tmp == '/') {
  121                 if (!t1) {
  122                     /* /// is not valid in a URL */
  123                     free(start);
  124                     return -1;
  125                 }
  126                 /* No User / Pass, No Port */
  127                 *tmp = '\0';
  128                 req->site = strdup(t1);
  129                 /* Get the location also */
  130                 *tmp = '/';
  131                 req->location = strdup(tmp);
  132                 state = STATE_END;
  133             }
  134             /* Set starting pointer */
  135             if (!t1)
  136                 t1 = tmp;
  137             break;
  138         case STATE_PASSPORT:
  139             if (*tmp == '@') {
  140                 /* We have user/pass */
  141                 *tmp ='\0';
  142                 req->user = strdup(t1);
  143                 if (t2)
  144                     req->password = strdup(t2);
  145                 else
  146                     req->password = strdup("\0");
  147                 t1 = NULL;
  148                 t2 = NULL;
  149                 state = STATE_SITE;
  150             } else if (*tmp == '/') {
  151                 /* We have site / port */
  152                 if (!t2) {
  153                     /* :/ is not valid in a URL */
  154                     free(start);
  155                     return -1;
  156                 }
  157                 *tmp = '\0';
  158                 req->site = strdup(t1);
  159                 req->port = strdup(t2);
  160                 t1 = NULL;
  161                 t2 = NULL;
  162                 /* Get the location also */
  163                 *tmp = '/';
  164                 req->location = strdup(tmp);
  165                 state = STATE_END;
  166             }
  167             /* Set starting pointer */
  168             if (!t2) 
  169                 t2 = tmp;
  170             break;
  171         case STATE_SITE:
  172             if (*tmp == ':') {
  173                 /* Site / Port found */
  174                 if (!t1) {
  175                     /* Cannot have 0 length site */
  176                     free(start);
  177                     return -1;
  178                 }
  179                 *tmp = '\0';
  180                 req->site = strdup(t1);
  181                 t1 = NULL;
  182                 state = STATE_PORT;
  183             } else if (*tmp == '/') {
  184                 /* Site / Location found */
  185                 if (!t1) {
  186                     /* Cannot have 0 length site */
  187                     free(start);
  188                     return -1;
  189                 }
  190                 *tmp = '\0';
  191                 req->site = strdup(t1);
  192                 t1 = NULL;
  193                 /* Get the location also */
  194                 *tmp = '/';
  195                 req->location = strdup(tmp);                
  196                 state = STATE_END;
  197             } else {
  198                 /* Set starting pointer */
  199                 if (!t1)
  200                     t1 = tmp;
  201             }
  202             break;
  203         case STATE_PORT:
  204             if (*tmp == '/') {
  205                 /* Port / Location found */
  206                 if (!t1) {
  207                     /* Cannot have 0 length port */
  208                     free(start);
  209                     return -1;
  210                 }
  211                 *tmp = '\0';
  212                 req->port = strdup(t1);
  213                 t1 = NULL;
  214                 /* And get the location also */
  215                 *tmp = '/';
  216                 req->location = strdup(tmp);                
  217                 state = STATE_END;
  218             }
  219             /* Set starting pointer */
  220             if (!t1)
  221                 t1 = tmp;
  222             break;
  223         case STATE_END:
  224             break;
  225             break;
  226         }
  227         /* Move to next character */
  228         tmp++;
  229     }
  230 
  231     /* Ending cases */
  232     switch (state) {
  233     case STATE_INIT:
  234         /* No : found!, assume form of 'www.google.com', 
  235                  * assign site only 
  236                  */
  237         req->site = strdup(start);
  238         break;
  239     case STATE_COLON:
  240         /* INVALID: Cannot end URL with a : */
  241         free(start);
  242         return -1;
  243         break;
  244     case STATE_PROTOCOL:
  245         /* INVALID: Cannot end URL with :/ */
  246         free(start);
  247         return -1;
  248         break;
  249     case STATE_PASSPORT:
  250         /* We have a URL of the form 'www.google.com:80' */
  251         req->site = strdup(t1);
  252         req->port = strdup(t2);
  253         break;
  254     case STATE_USERSITE:
  255     case STATE_SITE:
  256         /* Assume that we have found a site */
  257         if (!t1) {
  258             /* Must be > 0 length */
  259             free(start);
  260             return -1;
  261         }
  262         req->site = strdup(t1);
  263         break;
  264     case STATE_PORT:
  265         /* Got port but no location, site is already set */
  266         if (!t1) {
  267             /* Cannot have 0 length port */
  268             free(start);
  269             return -1;
  270         }
  271         req->port = strdup(t1);
  272         
  273         break;
  274     case STATE_END:
  275         /* All is Well :) */
  276         break;
  277     }
  278     
  279     /* Always return a valid string for the location */
  280     if (!req->location)
  281         req->location = strdup("\0");
  282 
  283     free(start);
  284 
  285     return 0;
  286 }
  287 
  288 char *asprintURL(url_request *req) {
  289     
  290     char *buffer=NULL;
  291     
  292     /* Check that the parsed string matches the original */
  293     if (req->protocol != NULL) {
  294         if (req->user != NULL && req->password != NULL) {
  295             if (req->port != NULL) {
  296                 if (req->location != NULL) {
  297                     asprintf(&buffer, 
  298                         "%s://%s:%s@%s:%s%s", 
  299                         req->protocol, req->user, 
  300                         req->password, req->site, 
  301                         req->port, req->location);
  302                 } else {
  303                     asprintf(&buffer, 
  304                         "%s://%s:%s@%s:%s", 
  305                         req->protocol, req->user, 
  306                         req->password, req->site, 
  307                         req->port);
  308                 }
  309             } else {
  310                 if (req->location != NULL) {
  311                     asprintf(&buffer, 
  312                         "%s://%s:%s@%s%s", 
  313                         req->protocol, req->user, 
  314                         req->password, req->site, 
  315                         req->location);
  316                 } else {
  317                     asprintf(&buffer, 
  318                         "%s://%s:%s@%s", 
  319                         req->protocol, req->user, 
  320                         req->password, req->site);
  321                 }
  322             }
  323         } else {
  324             if (req->port != NULL) {
  325                 if (req->location != NULL) {
  326                     asprintf(&buffer, 
  327                         "%s://%s:%s%s", 
  328                         req->protocol, req->site, 
  329                         req->port, req->location);
  330                 } else {
  331                     asprintf(&buffer, 
  332                         "%s://%s:%s", 
  333                         req->protocol, req->site, 
  334                         req->port);
  335                 }
  336             } else {
  337                 if (req->location != NULL) {
  338                     asprintf(&buffer, 
  339                         "%s://%s%s", 
  340                         req->protocol, req->site, 
  341                         req->location);
  342                 } else {
  343                     asprintf(&buffer, 
  344                         "%s://%s", req->protocol, 
  345                         req->site);
  346                 }
  347             }
  348         }
  349     } else {
  350         if (req->user != NULL && req->password != NULL) {
  351             if (req->port != NULL) {
  352                 if (req->location != NULL) {
  353                     asprintf(&buffer, 
  354                         "%s:%s@%s:%s%s", req->user, 
  355                         req->password, req->site, 
  356                         req->port, req->location);
  357                 } else {
  358                     asprintf(&buffer, 
  359                         "%s:%s@%s:%s", req->user, 
  360                         req->password, req->site, 
  361                         req->port);
  362                 }
  363             } else {
  364                 if (req->location != NULL) {
  365                     asprintf(&buffer, 
  366                         "%s:%s@%s%s", req->user, 
  367                         req->password, req->site, 
  368                         req->location);
  369                 } else {
  370                     asprintf(&buffer, 
  371                         "%s:%s@%s", req->user, 
  372                         req->password, req->site);
  373                 }
  374             }
  375         } else {
  376             if (req->port != NULL) {
  377                 if (req->location != NULL) {
  378                     asprintf(&buffer, 
  379                         "%s:%s%s", req->site, 
  380                         req->port, req->location);
  381                 } else {
  382                     asprintf(&buffer, 
  383                         "%s:%s", req->site, 
  384                         req->port);
  385                 }
  386             } else {
  387                 if (req->location != NULL) {
  388                     asprintf(&buffer, 
  389                         "%s%s", req->site, 
  390                         req->location);
  391                 } else {
  392                     asprintf(&buffer, 
  393                         "%s", req->site);
  394                 }
  395             }
  396         }       
  397     }
  398 
  399     return buffer;
  400     
  401 }
  402 
  403 void freeURL(url_request *req) {
  404 
  405     /* Free any allocated strings */
  406     if (req->protocol)
  407         free(req->protocol);
  408     if (req->user)
  409         free(req->user);
  410     if (req->password)
  411         free(req->password);
  412     if (req->site)
  413         free(req->site);
  414     if (req->port)
  415         free(req->port);
  416     if (req->location)
  417         free(req->location);
  418 
  419 }
  420 
  421 #ifdef TEST
  422 #include <assert.h>
  423 void testURL(const char *URL, bool is_invalid);
  424 
  425 int main(int argc, char **argv) {
  426 
  427     testURL("www.google.com", false);
  428     testURL("www.google.com:80", false);
  429     testURL("http://www.google.com/", false); 
  430     testURL("http://www.google.com/index.html", false); 
  431     testURL("http://www.google.com:80/", false);
  432     testURL("http://www.google.com:80/index.html", false);
  433     testURL("www.google.com:80/index.html", false);
  434     testURL("www.google.com/index.html", false);
  435     testURL("www.google.com/", false);
  436     testURL("www.google.com:80/", false);
  437     testURL("matt:@www.google.com/index.html", false);
  438     testURL("matt:matt@www.google.com:80/", false);
  439     testURL(":matt@www.google.com:80/", true);
  440     testURL("ftp://www.google.com/", false);
  441 
  442     exit(0);
  443 }
  444 
  445 /* Tests that the specified URL is correctly parsed */
  446 void testURL(const char *URL, bool is_invalid) {
  447 
  448     url_request result;
  449     char *buffer=NULL;
  450 
  451     if (parseURL(URL, &result)!=0) {    
  452         /* Invalid URL, cannot parse */
  453         assert(is_invalid && true);
  454         return;
  455     }
  456     
  457     /* Check that the parsed string matches the original */
  458     buffer = asprintURL(&result);
  459     
  460     int rv = strcasecmp(buffer, URL);
  461     assert(rv==0);
  462     
  463     if (buffer)
  464         free(buffer);
  465     freeURL(&result);
  466 
  467     return;
  468 
  469 }
  470 
  471 #endif
  472