"Fossies" - the Fresh Open Source Software Archive

Member "delegate9.9.13/src/url.c" (14 Aug 2014, 69154 Bytes) of package /linux/misc/old/delegate9.9.13.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "url.c" see the Fossies "Dox" file reference documentation.

    1 /*////////////////////////////////////////////////////////////////////////
    2 Copyright (c) 1994-2000 Yutaka Sato and ETL,AIST,MITI
    3 Copyright (c) 2001-2006 National Institute of Advanced Industrial Science and Technology (AIST)
    4 AIST-Product-ID: 2000-ETL-198715-01, H14PRO-049, H15PRO-165, H18PRO-443
    5 
    6 Permission to use this material for noncommercial and/or evaluation
    7 purpose, copy this material for your own use, and distribute the copies
    8 via publicly accessible on-line media, without fee, is hereby granted
    9 provided that the above copyright notice and this permission notice
   10 appear in all copies.
   11 AIST MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY OF THIS
   12 MATERIAL FOR ANY PURPOSE.  IT IS PROVIDED "AS IS", WITHOUT ANY EXPRESS
   13 OR IMPLIED WARRANTIES.
   14 /////////////////////////////////////////////////////////////////////////
   15 Content-Type:   program/C; charset=US-ASCII
   16 Program:    url.c (rewrite for relayed-URL in the HTML)
   17 Author:     Yutaka Sato <ysato@etl.go.jp>
   18 Description:
   19 
   20     REWRITING RULE
   21 
   22       Uniform rewriting rule for URLs to be gatewayed by HTTP is:
   23 
   24     N://H:P/F  <=> http://${delegate}/-_-N://H:P/F
   25 
   26       Special rewriting rule for Gopher URL to be gatewayed by Gopher is:
   27 
   28     G://H:P/gF <=> G://${delegate}/g-_-G://H:P/gF
   29 
   30 History:
   31     March94 created
   32     941224  changed the rewriting rule
   33 //////////////////////////////////////////////////////////////////////#*/
   34 #include "delegate.h"
   35 #include "file.h"
   36 #include "url.h"
   37 #include <ctype.h>
   38 
   39 int   reserve_url(Connection *Conn);
   40 const char *CTX_get_modifires(Connection *Conn);
   41 void  CTX_set_modifires(Connection *Conn,PCStr(modifires));
   42 int   hostcmp_lexical(PCStr(h1),PCStr(h2),int cacheonly);
   43 int   scan_CODECONV(PCStr(spec),PCStr(what),int local);
   44 const char *CTX_changeproxy_url_to(Connection*ctx,PCStr(myhostport),PCStr(method),PVStr(url),PVStr(proxy));
   45 
   46 int ENABLE_ODGU = 0;
   47 int GOPHER_ON_HTTP = 1;
   48 
   49 #define TAGTRACE 0
   50 
   51 extern int URICONV_ANY;
   52 extern int URICONV_FULL;
   53 extern int URICONV_MOUNT;
   54 extern int URICONV_NORMAL;
   55 extern int URICONV_PARTIAL;
   56 extern int TAGCONV_nKILL;
   57 extern int TAGCONV_META;
   58 extern int TAGCONV_KILL;
   59 extern int TAGCONV_APPLET;
   60 extern int TAGCONV_JAVA;
   61 extern char TAGCONV_TAGEND[];
   62 extern int URL_SEARCH;
   63 
   64 typedef struct {
   65     int u_dummy;
   66 } UrlEnv;
   67 UrlEnv *urlEnv;
   68 void minit_url(){
   69     if( urlEnv == 0 ){
   70         urlEnv = (UrlEnv*)malloc(sizeof(UrlEnv));
   71         URL_SEARCH = URL_IN_HEAD | URL_IN_HTML_TAG;
   72 
   73     }
   74 }
   75 
   76 /*
   77  *  SEARCH URL REFERENCE IN HTML
   78  *  (half done and dangerous X-<)
   79  */
   80 
   81 static int isBASE(PCStr(tag))
   82 {
   83     if( tag != NULL && strncasecmp(tag,"<BASE",5) == 0 && isspace(tag[5]) )
   84         return 1;
   85     return 0;
   86 }
   87 
   88 static void uritrace(PCStr(where),PCStr(tag),PCStr(src))
   89 {   CStr(word,6);
   90     CStr(line,50);
   91 
   92     if( tag )
   93         FStrncpy(word,tag);
   94     else    strcpy(word,"?");
   95     lineScan(src,line);
   96     if( 2 <= LOGLEVEL )
   97     fprintf(stderr,"URL in %s %-6s %s\n",where,word,line);
   98 }
   99 
  100 /*
  101 #define iscomtag(p)   ((p[0]=='!' && p[1]=='-' && p[2]=='-' && p[3]=='#')?4:0)
  102 */
  103 #define iscomtag(p) \
  104     (p[0]=='?'?1:((p[0]=='!'&&p[1]=='-'&&p[2]=='-'&&p[3]=='#')?4:0))
  105 #define isAlpha(ch) ( 'a'<=ch && ch<='z' || 'A'<=ch && ch<='Z' )
  106 #define isSpace(ch) ( ch==' '||ch=='\t'||ch=='\r'||ch=='\n' )
  107 
  108 static int isSite(PCStr(site))
  109 {   const char *sp;
  110     char ch;
  111     int ok;
  112 
  113     ok = 0;
  114     if( !isAlpha(*site) ) goto EXIT;
  115     ok = 1;
  116     for( sp = site+1; ch = *sp; sp++ ){
  117         switch( ch ){
  118             case '/': goto EXIT;
  119             case '.': break;
  120             case '-': break;
  121             default:
  122                 if( !isdigit(ch) )
  123                 if( !isAlpha(ch) ){ ok = 0; goto EXIT; }
  124         }
  125     }
  126 EXIT:
  127     return ok;
  128 }
  129 static int isPath(PCStr(path))
  130 {   const char *pp;
  131     char ch;
  132     int ok;
  133 
  134     ok = 0;
  135     if( !isAlpha(*path) ) goto EXIT;
  136     ok = 1;
  137     for( pp = path+1; ch = *pp; pp++ ){
  138         switch( ch ){
  139             case '\'':
  140             case '"':
  141             case '?':
  142             case '/': goto EXIT;
  143             case '.': break;
  144             case '-': break;
  145             case '_': break;
  146             default:
  147                 if( !isdigit(ch) )
  148                 if( !isAlpha(ch) ){ ok = 0; goto EXIT; }
  149         }
  150     }
  151 
  152 EXIT:
  153     return ok;
  154 }
  155 
  156 int isFullURL(PCStr(url));
  157 #define isAbsFullURL(s) ( \
  158        *s == '/' && (s[1]=='/' && isSite(&s[2]) || isPath(&s[1])) \
  159     || *s == '.' && s[1]=='.' && s[2]=='/' \
  160     || isAlpha(*s) && isFullURL(s) \
  161 )
  162 
  163 #define isJSop(ch)  (ch == '(' || ch == ',' || ch == '=' || ch == '+')
  164 #define isCSSu(s)   (strncasecmp(s,"url(",4) == 0)
  165 
  166 static const char *isURLinJavaScript(PCStr(str),char qchp[])
  167 {   char ch;
  168     const char *sp;
  169     const char *tp;
  170 
  171     sp = str;
  172     tp = sp;
  173     ch = *sp;
  174 
  175     if( ch == '(' ){
  176         /* 9.9.8 for ({url:"..." */
  177         if( strncaseeq(sp+1,"{url:",5) ){
  178             ch = sp[1+5];
  179             if( ch == '"' || ch == '\'' ){
  180                 *qchp = ch;
  181                 if( sp[7] == '/' || isAbsFullURL((sp+7)) ){
  182                     return sp+7;
  183                 }
  184             }
  185         }
  186     }
  187     if( isJSop(ch) ){
  188         ch = *++sp;
  189         while( isSpace(ch) )
  190             ch = *++sp;
  191 
  192         if( strneq(sp,"&#39;",5) ){ /* escaped quote char. */
  193             *qchp = ';';
  194             sp += 5;
  195             if( isAbsFullURL(sp) ){
  196                 return sp;
  197             }
  198         }else
  199         if( ch == '"' || ch == '\'' ){
  200             *qchp = ch;
  201             ch = *++sp;
  202             if( isAbsFullURL(sp) ){
  203                 /*
  204                 uritrace("#x# JavaScript","",str);
  205                 */
  206                 return sp;
  207             }
  208         }
  209     }
  210     return 0;
  211 }
  212 
  213 #define quotech     Ctx->r_tagctx.r_curquote
  214 #define inScript    Ctx->r_tagctx.r_curscript[0]
  215 #define inStyle     Ctx->r_tagctx.r_curstyle[0]
  216 
  217 static void getTagCtx(Referer *Ctx,PCStr(ref),PCStr(p))
  218 {
  219     if( strncasecmp(p,"SCRIPT",6) == 0 ){
  220         char ch;
  221         ch = p[6];
  222         if( isSpace(ch) || ch=='>' ){
  223             if( ref[1] == '/' )
  224                 inScript = 0;
  225             else    inScript = 1;
  226         }
  227     }
  228     if( strncasecmp(p,"STYLE",5) == 0 ){
  229         char ch;
  230         ch = p[5];
  231         if( isSpace(ch) || ch=='>' ){
  232             if( ref[1] == '/' )
  233                 inStyle = 0;
  234             else    inStyle = 1;
  235         }
  236     }
  237 }
  238 static const char *findURLinHTML(Referer *Ctx,PCStr(tag),PCStr(ref))
  239 {   const char *up;
  240     const char *p;
  241     char qch;
  242     char ch;
  243     CStr(word,8);
  244     CStr(line,64);
  245 
  246     up = 0;
  247     qch = 0;
  248 
  249     /*
  250      * Script in Attribute value
  251      * it could be the value of attribute to be converted (attrTobeConv)
  252      */
  253     if( URL_SEARCH & URL_IN_ATTR_STYLE )
  254     if( strncasecmp(ref,"STYLE=",6) == 0 ){
  255         p = ref + 6;
  256         qch = 0;
  257         if( *p == '"' || *p == '\'' )
  258             qch = *p++;
  259         for(; *p && *p != qch; p++ ){
  260             if( isCSSu(p) ){
  261                 up = p+4;
  262                 if( *up == '\'' || *up == '"' )
  263                 {
  264                     qch = *up;
  265                     up++;
  266                 }
  267 
  268                 if( *up != '#' && isAbsFullURL(up) ){
  269                     uritrace("#A# StylesATTR",tag,ref);
  270                     goto FOUND;
  271                 }
  272             }
  273         }
  274     }
  275 
  276     if( URL_SEARCH & URL_IN_ATTR_SCRIPT )
  277     if( tag && strncaseeq(tag,"<INPUT",6) && strncaseeq(ref,"VALUE=",6) ){
  278         uritrace("#B# NOT-ScriptATTR",tag,ref);
  279     }else
  280     for( p = ref; ch = *p; p++ ){
  281         if( isspace(ch) || ch=='"' || ch=='\'' || ch=='>' || ch=='<' )
  282             break;
  283         if( isJSop(ch) ){
  284             if( up = isURLinJavaScript(p,&qch) ){
  285                 uritrace("#B# ScriptATTR",tag,ref);
  286                 goto FOUND;
  287             }
  288         }
  289     }
  290     return 0;
  291 
  292 FOUND:
  293     quotech = qch;
  294     return up;
  295 }
  296 static const char *isURLinEmbeded(Referer *Ctx,PCStr(tag),PCStr(str))
  297 {   const char *up;
  298     char ch;
  299 
  300     ch = *str;
  301     if( URL_SEARCH & URL_IN_HTML_SCRIPT )
  302     if( inScript && isJSop(ch) ){
  303         if( up = isURLinJavaScript(str,&quotech) ){
  304             uritrace("#C# ScriptHTML",tag,str);
  305             return up;
  306         }
  307     }
  308 
  309     if( URL_SEARCH & URL_IN_HTML_STYLE )
  310     if( inStyle && isCSSu(str) ){
  311         up = str+4;
  312         if( *up == '\'' || *up == '"' )
  313             quotech = *up++;
  314 
  315         uritrace("#D# StylesHTML",tag,str);
  316         return up;
  317     }
  318     return 0;
  319 }
  320 static const char *findURLinJavaScript(Referer *Ctx,PCStr(str))
  321 {   char pch;
  322     char ch;
  323     const char *up;
  324     const char *sp;
  325 
  326     pch = 0;
  327     for( sp = str; ch = *sp; ){
  328         if( isJSop(ch) ){
  329             if( up = isURLinJavaScript(sp,&quotech) ){
  330                 uritrace("#E# JavaScript","file",sp);
  331                 return up;
  332             }
  333         }
  334         pch = ch;
  335         if( *sp ) sp++;
  336     }
  337     return 0;
  338 }
  339 static const char *findURLinCSS(Referer *Ctx,PCStr(str))
  340 {   const char *tp;
  341     const char *np;
  342     const char *up;
  343     CStr(line,512);
  344 
  345     for( tp = str; tp && *tp; tp = np ){
  346         lineScan(tp,line);
  347         if( strcasestr(line,"url(") ){
  348             up = strcasestr(tp,"url(") + 4;
  349             if( *up == '"' || *up == '\'' )
  350                 quotech = *up++;
  351 
  352             uritrace("#F# StyleSheet","file",tp);
  353             return up;
  354         }
  355         if( np = strpbrk(tp,"\r\n") ){
  356             while( *np ){
  357                 if( *np != '\r' && *np != '\n' )
  358                     break;
  359                 np++;
  360             }
  361         }
  362     }
  363     return 0;
  364 }
  365 /*
  366 static const char *isXMLNS(PCStr(tag),PCStr(str))
  367 */
  368 #define isXMLNS(tag,str) isURLinXMLattr(Ctx,tag,str)
  369 
  370 static char strcasestrlen(PCStr(str1),PCStr(str2)){
  371     const char *s1 = str1;
  372     const char *s2 = str2;
  373     int eqlen;
  374     for( eqlen = 0; *s1++ == *s2++; eqlen++);
  375     if( *s2 == 0 )
  376         return eqlen;
  377     else    return 0;
  378 }
  379 /*
  380  *   xmlns[:namespace-prefix]="namespaceURI"
  381  * RDF URI Reference (URIref)
  382  * http://www.w3.org/TR/rdf-primer/
  383  *   rdf:{resource|about|ID|datatype}="URIref"
  384  *   <rdf:xxxx {resource|about|ID|datatype}="URIref" ...>
  385  */
  386 #define isRDFURI(alen,name) (\
  387        (alen = strcasestrlen(name,"resource")) \
  388     || (alen = strcasestrlen(name,"about")) \
  389     || (alen = strcasestrlen(name,"ID")) \
  390     || (alen = strcasestrlen(name,"datatype")) \
  391 )
  392 
  393 static const char *isURLinXMLattr(Referer *Ctx,PCStr(tag),PCStr(str))
  394 {   const char *sp;
  395     char ch;
  396     int dlen;
  397     int alen = 0;
  398     int canbeURI = 0;
  399 
  400     sp = str;
  401     while( isspace(*sp) )
  402         sp++;
  403     /*
  404     if( strncasecmp(sp,"xmlns:",6)==0 ){
  405         for( sp = sp+6; ch = *sp; sp++ ){
  406     */
  407     if( dlen = strcasestrlen(sp,"xmlns") ){
  408         if( sp[dlen] == ':' )
  409             dlen++;
  410         canbeURI = 1;
  411     }else
  412     if( (dlen = strcasestrlen(sp,"rdf:")) && isRDFURI(alen,sp+dlen) ){
  413         canbeURI = 2;
  414     }else
  415     if( strcasestrlen(tag,"rdf:") && isRDFURI(alen,sp) ){
  416         dlen = 0;
  417         canbeURI = 3;
  418     }
  419     if( canbeURI ){
  420         if( lURLFIND() ){
  421             CStr(tagb,128);
  422             CStr(line,128);
  423             wordScan(tag,tagb);
  424             lineScan(sp,line);
  425             fprintf(stderr,"{U}XML %d [%-7s] %d+%d %s\n",
  426                 canbeURI,tagb,dlen,alen,line);
  427         }
  428         for( sp = sp+dlen+alen; ch = *sp; sp++ ){
  429             if( ch == '=' )
  430             {
  431                 if( sp[1] == '"' || sp[1] == '\'' ){
  432                     quotech = *++sp;
  433                 }
  434                 return sp+1;
  435             }
  436             if( 0 < alen ){
  437                 break;
  438             }
  439             if( ch == '>' || ch == ' ' )
  440                 break;
  441         }
  442     }
  443     return 0;
  444 }
  445 
  446 int HTML_attrTobeConv(PCStr(attr),PCStr(tag),int *uconvp);
  447 const char *html_nextTagAttrX(void *vBase,PCStr(html),PCStr(ctype),PVStr(rem),const char **tagp,const char **attrp,int *convmaskp)
  448 {   Referer *Base = (Referer*)vBase;
  449     const char *top;
  450     const char *str;
  451     const char *tag;
  452     const char *attr;
  453     const char *ref;
  454     const char *attrtailp;
  455     unsigned char ch;
  456     const char *hp;
  457     int len;
  458     int convmask;
  459     int cvmb;
  460     int isendtag;
  461     const char *atp;
  462     CStr(fname,32);
  463     int NoAttr = 0; /* scan tags without attr. too */
  464 
  465     Referer Ctxb,*Ctx=&Ctxb;
  466     int qconvmask;
  467     int uriconv;
  468     const char *up;
  469 
  470     if( *html == 0 ){
  471         return 0;
  472     }
  473     if( convmaskp && (*convmaskp & TAGCONV_JAVA) ){
  474         NoAttr |= 1;
  475     }
  476     if( TAGCONV_nKILL ){
  477         NoAttr |= 2;
  478     }
  479 
  480     bzero(Ctx,sizeof(Referer));
  481     top = NULL;
  482     tag = NULL;
  483     attr = NULL;
  484     str = html;
  485     if( convmaskp ) convmask = *convmaskp;
  486 
  487     if( convmaskp ){
  488         qconvmask = *convmaskp;
  489     }else{
  490         qconvmask = 0;
  491     }
  492     if( qconvmask == 0 ){
  493         qconvmask = 0xFFFFFFFF;
  494     }
  495     uriconv = qconvmask & (URICONV_ANY|TAGCONV_META);
  496 
  497     if( ctype == 0 || *ctype == 0 ){
  498         if( Base )
  499             ctype = Base->r_cType;
  500         if( ctype == 0 )
  501             ctype = "";
  502     }
  503     if( Base ){
  504         if( Base->r_tagctx.r_curtag[0] ){
  505             tag = Base->r_tagctx.r_curtag;
  506         }
  507         Base->r_tagctx.r_curquote = 0;
  508         Ctx->r_tagctx = Base->r_tagctx;
  509     }else{
  510         inScript = 0;
  511         inStyle = 0;
  512         quotech = 0;
  513     }
  514 
  515 
  516     if( *ctype != 't' && strncasecmp(ctype,"text/",5) != 0 ){
  517     /* not in message body */
  518         if( strncasecmp(str,"WWW-Authenticate:",17) == 0
  519          || strncasecmp(str,"Proxy-Authenticate:",19) == 0 )
  520         if( ref = strcasestr(str,"Realm=<") ){
  521             /* if attrTobeConv() */ {
  522                 attr = str;
  523                 ref += 7;
  524                 top = ref;
  525                 goto exit;
  526             }
  527         }
  528         if( strncasecmp(str,"Location:",9) == 0 
  529         /*
  530          || strncasecmp(str,"Content-Location:",17) == 0 
  531         */
  532          || strncasecmp(str,"URI:",4) == 0 ){
  533             ref = strchr(str,':') + 1;
  534             while( *ref == ' ' )
  535                 ref++;
  536             if( convmaskp ) *convmaskp = convmask;
  537             wordScanY(str,fname,"^:");
  538             if( HTML_attrTobeConv(fname,"Header",convmaskp) ){
  539                 attr = str;
  540                 top = ref;
  541                 goto exit;
  542             }
  543         }
  544     }
  545 
  546     if( strcaseeq(ctype,"text/javascript")
  547      || strcaseeq(ctype,"application/x-javascript")
  548      || strcaseeq(ctype,"text/x-component")
  549     ){
  550         if( uriconv )
  551         if( URL_SEARCH & URL_IN_SCRIPT )
  552         {
  553             if( up = findURLinJavaScript(Ctx,str) ){
  554                 if( convmaskp ) *convmaskp = convmask;
  555                 top = up;
  556                 ref = up;
  557                 attr = up;
  558                 goto exit;
  559             }
  560         }
  561         return 0;
  562     }
  563     if( strcaseeq(ctype,"text/css") ){
  564         if( uriconv )
  565         if( URL_SEARCH & URL_IN_STYLE )
  566         {
  567             if( up = findURLinCSS(Ctx,str) ){
  568                 if( convmaskp ) *convmaskp = convmask;
  569                 top = up;
  570                 ref = up;
  571                 attr = up;
  572                 goto exit;
  573             }
  574         }
  575         return 0;
  576     }
  577     /*
  578     if( strcaseeq(ctype,"text/xml") ){
  579     */
  580     if( strcaseeq(ctype,"text/xml")
  581      || strcaseeq(ctype,"application/xml")
  582      || strcaseeq(ctype,"application/soap+xml")
  583     ){
  584         const char *sp;
  585 
  586         if( uriconv == 0 || (URL_SEARCH & URL_IN_XML) == 0 )
  587             return 0;
  588 
  589         if( tag != NULL ){
  590             if( hp = isXMLNS(tag,str) ){
  591                 top = hp;
  592                 goto exit;
  593             }
  594         }
  595         for( sp = str; ch = *sp; sp++ ){
  596             if( ch == '<' ){
  597             char inattr;
  598             CStr(dom,32);
  599             CStr(name,128);
  600             refQStr(np,name); /**/
  601             const char *nx;
  602 
  603             if( sp[1] == '/' ){
  604                 isendtag = 1;
  605                 sp++;
  606             }else   isendtag = 0;
  607             ++sp;
  608             tag = attr = sp;
  609 
  610             dom[0] = name[0] = 0;
  611             setQStr(np,name,sizeof(name));
  612             nx = name + (sizeof(name)-1);
  613             inattr = 0;
  614             for(; *sp && (ch = *sp) != '>'; sp++ ){
  615                 if( nx <= np )
  616                     break;
  617 
  618                 if( isspace(*sp) ){
  619                     if( hp = isXMLNS(tag,sp) ){
  620                         top = hp;
  621                         goto exit;
  622                     }
  623                     inattr = 1;
  624                 }
  625                 if( inattr == 0 ){
  626                     if( *sp == ':' ){
  627                         strcpy(dom,name);
  628                         np = name;
  629                         attr = (char*)sp+1;
  630                     }else{
  631                         setVStrPtrInc(np,ch);
  632                         setVStrEnd(np,0);
  633                     }
  634                 }
  635             }
  636             if( *sp != '>' )
  637                 return 0;
  638             tag = NULL;
  639             sp++;
  640 
  641             if( isendtag )
  642                 continue;
  643 
  644             while( isspace(*sp) ) sp++;
  645             if( *sp == '<' ){ /* maybe a nested entity */
  646                 sp--;
  647                 continue;
  648             }
  649 
  650             if( strcaseeq(name,"href")
  651              || strcaseeq(name,"src")
  652              || strcaseeq(name,"dst")
  653              || strcaseeq(name,"url")
  654              || strcaseeq(name,"link")
  655             ){
  656                 while( isspace(*sp) )
  657                     sp++;
  658                 top = sp;
  659                 goto exit;
  660             }
  661             }
  662         }
  663         return 0;
  664     }
  665 
  666     /*
  667      * The following code seems to make redundant search for attribute
  668      * even when not in a TAG ...
  669      * Maybe it is to cope with not only TAG but also HTTP header ... 
  670      * or because multiple attributes are in a TAG but "tagp" is not
  671      * restored ? or... most likely,
  672      * just because it did not care TAG when it is created originally...
  673      */
  674     atp = NULL;
  675     for(;;){
  676         isendtag = 0;
  677         if( atp == TAGCONV_TAGEND )
  678             tag = NULL;
  679 
  680 /*
  681 Probably this is obsolete, introduced in 2.8.33 where attribute was
  682 naively searched after any white space, and at the top of each line.
  683 After tag symbols has become to be cared, in 6.1.20, such line
  684 beginning with a tag seems to be excluded.
  685         if( str == html && *str != '<' && *str != '>' )
  686 */
  687         if( 0 )
  688             ref = html;
  689         else{
  690             for( ref = str; ch = *ref; ref++ ){
  691                 if( ch == '<' )
  692                 {
  693                     hp = ref + 1;
  694                     if( *hp == '/' ) hp++;
  695                     if( *hp == 's' || *hp == 'S' )
  696                     getTagCtx(Ctx,ref,hp);
  697 
  698                     if( len = iscomtag(hp) )
  699                         hp += len;
  700                     if( *hp != 0 && !isalpha(*hp) )
  701                         continue; /* not a tag */
  702                     for(; *hp; hp++ )
  703                         if( !isalpha(*hp) )
  704                             break;
  705                     if( *hp == '>' && NoAttr ){
  706                      /* 9.9.2 scan <TAG> without attr. as
  707                       * <EMBED>. Disabled in 7.9.11 by the
  708                       * !isspace() in the following line.
  709                       */
  710                     }else
  711                     if( *hp != 0 && !isspace(*hp) )
  712                         continue; /* not a tag */
  713 
  714                     tag = ref;
  715                     isendtag = ref[1] == '/';
  716                 }
  717                 else
  718                 if( ch == '>' )
  719                 {
  720                     if( tag != NULL ){
  721 /*
  722                         if( isendtag )
  723 */
  724                             break;
  725 
  726                     /* can be bad for begin tags with
  727                      * multiple attributes to be rewriten
  728                      * with "tagp" info. which is not
  729                      * availabe for secondary or after
  730                      * attr. in the current implementation.
  731                      * It must be fixed to make multiple
  732                      * attributes rewriting.
  733                     (6.1.20)
  734                     (7.6.1) this comment (maybe) about
  735                     "isendtag" seems misunderstanding
  736                     thinking the tag is interpreted right
  737                     succeeding SPACE char.)...?
  738                     Interpreting a tag after closing ">" char.
  739                     seems not to affect any attribute
  740                     rewriting.
  741                      */
  742                     }
  743                     tag = NULL;
  744                     isendtag = 0;
  745                 }
  746 
  747                 if( uriconv )
  748                 if( URL_SEARCH & URL_IN_HTML_EMBED )
  749                 if( up = isURLinEmbeded(Ctx,tag,ref) ){
  750                     if( convmaskp ) *convmaskp = convmask;
  751                     top = up;
  752                     ref = up;
  753                     attr = up;
  754                     goto exit;
  755                 }
  756 
  757                 /*
  758                 this should be so, but can be bad for
  759                 TAG-independent attribute rewriting?
  760                  */
  761                 if( tag == NULL )
  762                     continue;
  763 
  764                 if(  isspace(ch) )
  765                     break;
  766                 if( ch == '(' ){
  767                     /* can be a JavaScript function call */
  768                     ref++;
  769                     break;
  770                 }
  771                 if( ch == ';' || ch == '"' || ch == '\'' ){
  772                     ref++;
  773                     break;
  774                 }
  775             }
  776         }
  777             for(; ch = *ref; ref++ )
  778                 if( !isspace(ch) )
  779                     break;
  780 
  781         if( rem != NULL && tag != NULL && *ref == 0 ){
  782             for( hp = tag+1; *hp; hp++ )
  783             {
  784                 if( len = iscomtag(hp) ){
  785                     hp += len - 1;
  786                     continue;
  787                 }
  788                 if( !isalpha(*hp) )
  789                     break;
  790             }
  791             if( *hp == 0 ){
  792                 sv1log("##truncated tag-name:%s\n",tag);
  793                 goto push;
  794             }
  795             while( isspace(*hp) )
  796                 hp++;
  797             if( *hp == 0 ){
  798                 sv1log("##truncated tag-body:%s\n",tag);
  799                 goto push;
  800             }
  801         }
  802         if( *ref == 0 )
  803             break;
  804 
  805         if( *ref == '<' && str < ref ){
  806             str = ref;
  807             continue;
  808         }
  809 
  810         if( *ref == '<' )
  811             tag = ref;
  812 
  813         attr = ref;
  814 
  815         if( *attr == '>' )
  816             atp = TAGCONV_TAGEND;
  817         else    atp = attr;
  818 
  819         if( TAGTRACE ){
  820             CStr(t,9);
  821             CStr(a,13);
  822             FStrncpy(a,atp);
  823             FStrncpy(t,tag?tag:"");
  824             sv1log("## TAG=%8X[%-8s] ATTR=[%-12s]\n",p2i(tag),t,a);
  825         }
  826         attrtailp = 0;
  827         if( rem != NULL && tag != NULL && isalpha(*atp) ){
  828             for( hp = atp+1; *hp; hp++ )
  829                 if( !isalpha(*hp) )
  830                     break;
  831             if( *hp == 0 ){
  832                 sv1log("##truncated attr-name:%s\n",atp);
  833                 goto push;
  834             }
  835             while( isspace(*hp) )
  836                 hp++;
  837             if( *hp == '=' ){
  838                 hp++;
  839                 while( isspace(*hp) )
  840                     hp++;
  841                 quotech = 0;
  842                 if( *hp == '"' || *hp == '\'' ){
  843                     quotech = *hp;
  844                     hp++;
  845                 }
  846                 for(; ch = *hp; hp++ ){
  847                     if( quotech != 0 ){
  848                         if( ch == quotech )
  849                             break;
  850                     }else{
  851                         if( ch == '>' || isspace(ch) )
  852                             break;
  853                     }
  854                 }
  855             }
  856             if( *hp == 0 ){
  857                 sv1log("##truncated attr-value:%s\n",atp);
  858                 goto push;
  859             }
  860             if( *hp == quotech )
  861                 hp++;
  862             while( isspace(*hp) )
  863                 hp++;
  864             attrtailp = hp;
  865         }
  866 
  867         if( convmaskp ) *convmaskp = convmask;
  868         len = HTML_attrTobeConv(atp,tag,convmaskp);
  869         if( len == 0 ){
  870             str = ref + 1;
  871             if( (hp = attrtailp) && *hp == 0 ){
  872                 sv1log("##truncated tag-body:%s\n",str);
  873                 goto push;
  874             }
  875 
  876             if( uriconv )
  877             if( URL_SEARCH & URL_IN_ATTR_EMBED )
  878             if( HTML_attrTobeConv(atp,tag,NULL) ){
  879                 /* attribute to be rewritten but is not the
  880                  * target of current conversion
  881                  * (ex. BASE attribute is not for PARTIAL)
  882                  */
  883             }else
  884             if( up = findURLinHTML(Ctx,tag,ref) ){
  885                 if( convmaskp ) *convmaskp = convmask;
  886                 top = up;
  887                 ref = up;
  888                 attr = up;
  889                 goto exit;
  890             }
  891 
  892             continue;
  893         }
  894         if( atp == TAGCONV_TAGEND ){
  895             top = ref;
  896             goto exit;
  897         }
  898         hp = ref + len;
  899 
  900         while( isspace(*hp) )
  901             hp++;
  902 
  903         switch( *hp ){
  904             case 0:   goto push;
  905             case '=': hp++; break;
  906             default:  str = ref + 1; continue;
  907         }
  908 
  909         while( isspace(*hp) )
  910             hp++;
  911 
  912         if( uriconv )
  913         if( URL_SEARCH & URL_IN_SCRIPTs )
  914         if( inScript && *hp == '\\' && (hp[1]=='\'' || hp[1]=='"') )
  915         {
  916             hp++; /* escaped quote in script */
  917         }
  918 
  919         if( *hp == '"' || *hp == '\'' )
  920         {
  921             quotech = *hp;
  922             hp++;
  923         }
  924 
  925 push:
  926         attrtailp = hp;
  927         if( rem != NULL ){
  928             for( attrtailp = hp; ch = *attrtailp; attrtailp++ ){
  929                 if( quotech != 0 && ch == quotech
  930                  || quotech == 0 && (isspace(ch) || ch == '>')
  931                 ){
  932                     break;
  933                 }
  934             }
  935         }
  936 
  937         if( rem != NULL && *attrtailp == 0 ){
  938             /* pushing a tag fragment from its begining is
  939              * required in recent implementation ...
  940              */
  941             if( tag && strlen(tag) < 1024
  942              && tag != Base->r_tagctx.r_curtag ){
  943                 strcpy(rem,tag);
  944                 *(char*)tag = 0; /* not "const" but fixed */
  945             }else
  946             if( strlen(ref) < 1024 ){
  947             strcpy(rem,ref);
  948             *(char*)ref = 0; /* not "const" but fixed */
  949             }else{
  950 sv1log("#### TOO LONG TO PUSH (%d): %s\n",istrlen(ref),ref);
  951             }
  952             top = NULL;
  953             goto exit;
  954         }
  955         top = hp;
  956         goto exit;
  957     }
  958 exit:
  959     if( tagp != NULL )
  960         *tagp = (char*)tag;
  961     if( attrp != NULL )
  962         *attrp = (char*)attr;
  963     if( Base != NULL ){
  964         Base->r_tagctx = Ctx->r_tagctx;
  965         if( tag ){
  966             wordScan(tag,Base->r_tagctx.r_curtag);
  967         }else{
  968             Base->r_tagctx.r_curtag[0] = 0;
  969         }
  970     }
  971     return top;
  972 }
  973 
  974 #define html_nextTagAttr(h,c,r,t,a,m) html_nextTagAttrX(referer,h,c,r,t,a,m)
  975 
  976 /*
  977  *  TRANSFORM delegated-URL to NORMAL URL:
  978  *  Delagation information embedded in the URL is removed, and paresd.
  979  *  "url" string passed from caller will be over wrote.
  980  */
  981 static char *printFlags(Connection *Conn,PVStr(s))
  982 {
  983     return Sprintf(AVStr(s),"=%s=",DELEGATE_FLAGS);
  984 }
  985 const char *endofHOSTPORT = "/? \t\r\n";
  986 
  987 void url_rmprefix(PVStr(proto),PVStr(prefix))
  988 {   const char *p;
  989     int len;
  990     char dch;
  991 
  992     setVStrEnd(prefix,0);
  993     if( strstr(proto,NDGU_MARK) == proto ){
  994         p = proto + strlen(NDGU_MARK);
  995         dch = *p;
  996         if( dch == '=' || dch == '/' ){
  997             for( p++; *p; p++ ){
  998                 if( *p == dch ){
  999                     len = p - proto + 1;
 1000         strncpy(prefix,proto,len); setVStrEnd(prefix,len);
 1001                     strcpy(proto,p+1);
 1002                     break;
 1003                 }
 1004             }
 1005         }
 1006     }
 1007 }
 1008 
 1009 int isLoadableURL(PCStr(url))
 1010 {
 1011     if( strncasecmp(url,"ftp://",6) == 0
 1012      || strncasecmp(url,"file:",5) == 0
 1013      || strncasecmp(url,"data:",5) == 0
 1014      || strncasecmp(url,"enc:",4) == 0
 1015      || strncasecmp(url,"myfile:",7) == 0
 1016      || strncasecmp(url,"builtin:",8) == 0
 1017      || strncasecmp(url,"http://",7) == 0 )
 1018         return 1;
 1019     return 0;
 1020 }
 1021 
 1022 int fromProxyClient(PCStr(url))
 1023 {   int from_proxy = 0;
 1024     const char *sp;
 1025     CStr(proto,32);
 1026 
 1027     if( strncasecmp(url,"http://",  7) == 0 ) return 1;
 1028     if( strncasecmp(url,"nntp://",  7) == 0 ) return 1;
 1029     if( strncasecmp(url,"wais://",  7) == 0 ) return 1;
 1030     if( strncasecmp(url,"ftp://",   6) == 0 ) return 1;
 1031     if( strncasecmp(url,"gopher://",9) == 0 ) return 1;
 1032 
 1033     if( url[0] != '/' ){
 1034         if( sp = scan_URI_scheme(url,AVStr(proto),sizeof(proto)) ){
 1035             if( strncmp(sp,"://",3) == 0 )
 1036                 if( strstr(url,NDGU_MARK) == NULL )
 1037                     from_proxy = 1;
 1038         }
 1039     }
 1040     return from_proxy;
 1041 }
 1042 int is_redirected_url(PCStr(url))
 1043 {
 1044     if( strstr(url,ODGU_MARK) ) return 1;
 1045     if( strstr(url,NDGU_MARK) ) return 1;
 1046     return 0;
 1047 }
 1048 int is_redirected_selector(PCStr(sel))
 1049 {
 1050     if( strncmp(sel,NDGU_MARK,strlen(NDGU_MARK)) == 0 )
 1051         return 1;
 1052     return 0;
 1053 }
 1054 
 1055 static char *scan_flags(char np[],PVStr(tp),PVStr(flags))
 1056 {   const char *fp;
 1057 
 1058     if( *np == '+' || *np == '-' || *np == '=' ){
 1059         if( fp = strchr(np+1,'=') ){
 1060             truncVStr(fp);
 1061             switch( *np ){
 1062                 case '+': onoff_flags(AVStr(flags),np+1,1); break;
 1063                 case '-': onoff_flags(AVStr(flags),np+1,0); break;
 1064                 case '=': wordscanX(np+1,AVStr(flags),64); break;
 1065             }
 1066             strcpy(tp,fp+1);
 1067             np = (char*)tp;
 1068         }
 1069     }
 1070     return np;
 1071 }
 1072 
 1073 static void put_gtype(PVStr(sel),int gtype,int toproxy)
 1074 {   CStr(ssel,URLSZ);
 1075 
 1076     if( !toproxy  || gtype == '7' ){
 1077         if( gtype==' ' || gtype=='\t' || gtype=='\r' || gtype=='\n' )
 1078             gtype = '1';
 1079         strcpy(ssel,sel);
 1080         sprintf(sel,"(:%c:)%s",gtype,ssel);
 1081     }
 1082 }
 1083 int get_gtype(PCStr(gsel),PVStr(sel))
 1084 {   int gtype;
 1085     CStr(path,1024);
 1086 
 1087     if( gsel[0]=='(' && gsel[1]==':' && gsel[3]==':' && gsel[4]==')' ){
 1088         gtype = gsel[2];
 1089         if( sel ) strcpy(sel,gsel+5);
 1090     }else{
 1091         gtype = gsel[0];
 1092         if( gtype=='\n' || gtype=='\r' || gtype=='\t' || gtype==0 )
 1093             gtype = '1';
 1094         else
 1095         if( Xsscanf(gsel,"%s",AVStr(path)) && path[strlen(path)-1] == '/' )
 1096             gtype = '1';
 1097         else
 1098         if( !strchr("0123456789gIT",gtype) )
 1099             gtype = '9';
 1100 
 1101         if( sel ) ovstrcpy((char*)sel,gsel);
 1102     }
 1103     return gtype;
 1104 }
 1105 
 1106 
 1107 static scanListFunc scan_modifier1(PCStr(mod1),PVStr(flags))
 1108 {
 1109     if( strncmp(mod1,"cc.",3) == 0 )
 1110         scan_CODECONV(mod1+3,CCV_TOCL,1);
 1111     else
 1112     if( strncmp(mod1,"cs.",3) == 0 )
 1113         scan_CODECONV(mod1+3,CCV_TOSV,1);
 1114     else
 1115     if( mod1[0] == 'F' )
 1116         strcpy(flags,mod1+1);
 1117     return 0;
 1118 }
 1119 static void scan_modifiers(Connection*ctx,PCStr(mods),PVStr(flags))
 1120 {
 1121     CTX_set_modifires((Connection*)ctx,mods);
 1122     scan_commaList(mods,0,scanListCall scan_modifier1,AVStr(flags));
 1123 }
 1124 
 1125 int scan_protositeport(PCStr(url),PVStr(proto),PVStr(userpasshost),PVStr(port));
 1126 int CTX_url_dereferN(Connection*ctx,PCStr(cproto),PVStr(url),PVStr(modifiers),PVStr(flags),PVStr(proto),PVStr(host),int *iportp,xPVStr(durl),int marklen)
 1127 {   CStr(protob,URLSZ);
 1128     CStr(port,URLSZ);
 1129     CStr(urlh,URLSZ);
 1130     CStr(modb,1024);
 1131     refQStr(pb,modb); /**/
 1132     char ch;
 1133     refQStr(np,durl); /**/
 1134     char gtype;
 1135     int len,ni;
 1136 
 1137     if( durl[marklen] == '/' && durl[marklen+1] != '/' ){
 1138         const char *pp;
 1139         setQStr(pb,modb,sizeof(modb));
 1140         for( pp = durl + marklen + 1; ch = *pp++; ){
 1141             assertVStr(modb,pb+1);
 1142             if( ch == ':' )
 1143                 break;
 1144             if( ch == '/' )
 1145                 break;
 1146             setVStrPtrInc(pb,ch);
 1147             if( isspace(*pp) )
 1148                 break;
 1149         }
 1150         if( ch == '/' ){
 1151             setVStrEnd(pb,0);
 1152             if( modifiers != NULL )
 1153                 strcpy(modifiers,modb);
 1154 
 1155             scan_modifiers(ctx,modb,AVStr(flags));
 1156             Xstrcpy(DVStr(durl,marklen),pp);
 1157         }else{
 1158             ovstrcpy((char*)durl+marklen,durl+marklen+1);
 1159         }
 1160     }else
 1161     if( url < durl ){
 1162         refQStr(pp,durl); /**/
 1163         modb[sizeof(modb)-1] = 0;
 1164         pb = &modb[sizeof(modb)-1];
 1165 
 1166         if( durl[-1] == ')' ){
 1167             for( pp = (char*)durl - 2; url <= pp; pp-- ){
 1168             if( *pp == '(' ){
 1169                 strcpy(pp,durl);
 1170                 durl = pp;
 1171                 break;
 1172             }
 1173             *(char*)--pb = *pp;
 1174             }
 1175         }else{
 1176             for( pp = (char*)durl - 1; url <= pp; pp-- ){
 1177             if( *pp == '/' || isspace(*pp) ){
 1178                 ovstrcpy((char*)pp+1,durl);
 1179                 durl = pp + 1;
 1180                 break;
 1181             }
 1182             *(char*)--pb = *pp;
 1183             }
 1184         }
 1185 
 1186         if( pp = strstr(pb,"-.-") ){
 1187             setVStrEnd(pp,0);
 1188             strcpy(urlh,durl);
 1189             sprintf(durl,"%s:///%s%s",NDGU_MARK,pp+3,urlh);
 1190         }
 1191         if( modifiers != NULL )
 1192             strcpy(modifiers,pb);
 1193         scan_modifiers(ctx,pb,AVStr(flags));
 1194     }
 1195 
 1196     if( &url[1] < durl && strcaseeq(cproto,"http") )
 1197         return 0;
 1198 
 1199     np = (char*)durl + marklen;
 1200     np = scan_flags((char*)np,AVStr(durl),AVStr(flags));
 1201     unescape_specials(np,":","//");
 1202 
 1203     port[0] = 0;
 1204     if( strncmp(np,":///",4) == 0 ){
 1205         protob[0] = 0;
 1206         strcpy(host,"localhost");
 1207         *iportp = SERVER_PORT();
 1208         strcpy(durl,np+4);
 1209         return 1;
 1210     }
 1211     if( strncmp(np,"://",3) == 0 )
 1212         ovstrcpy((char*)np,np+1);
 1213 
 1214     ni = scan_protositeport(np,AVStr(protob),AVStr(host),AVStr(port));
 1215 
 1216     if( ni == 2 || ni == 3 ){
 1217         refQStr(up,urlh); /**/
 1218 
 1219         strcpy(proto,protob);
 1220         up = Sprintf(AVStr(up),"%s://%s",proto,host);
 1221         if( proto[0] == 0 )
 1222             strcpy(proto,cproto);
 1223         if( ni == 2 )
 1224             *iportp = serviceport(proto);
 1225         else{   *iportp = atoi(port);
 1226             up = Sprintf(AVStr(up),":%s",port);
 1227         }
 1228         len = up - urlh;
 1229 
 1230         /* gopher://HP/G-_-gopher://...
 1231          * seems to no more be supported
 1232          */
 1233         gtype = 0;
 1234         /* skip "/Gtype" */
 1235         if( streq(cproto,"gopher") && streq(proto,"gopher") ){
 1236             if( np[len] == '/' ){
 1237                 len++;
 1238                 if( gtype = np[len] )
 1239                 if(strchr(endofHOSTPORT,gtype)==NULL){
 1240                     len++;
 1241                 }
 1242             }
 1243         }
 1244         if( url < durl && durl[-1] == '/' && np[len] == '/' )
 1245             len += 1;
 1246         strcpy(durl,np+len);
 1247         if( gtype )
 1248             put_gtype(AVStr(durl),gtype,0);
 1249         return 1;
 1250     }
 1251     return -1;
 1252 }
 1253 int CTX_url_dereferO(Connection*ctx,PCStr(cproto),PVStr(url),PVStr(modifiers),PVStr(flags),PVStr(proto),PVStr(host),int *iportp,PVStr(durl),int marklen)
 1254 {   const char *hp;
 1255     refQStr(np,durl); /**/
 1256     char gtype;
 1257     int ni;
 1258 
 1259     np = (char*)durl + marklen;
 1260     np = scan_flags((char*)np,AVStr(durl),AVStr(flags));
 1261 
 1262     /*
 1263      *  Gopher      =@=gopher:H:P=Gtype
 1264      *      'Gtype' is used by Gopher/DeleGates who doesn't know
 1265      *      what type the requested infomation is.
 1266      *  Ftp/Gopher  =@=ftp:H:P=Gtype
 1267      *      'Gtype' may be used to determine whether P is a
 1268      *      directory or a flat file.
 1269      *  
 1270      */
 1271     if( (ni = Xsscanf(np,"%[^:]:%[^:]:%d=%c",AVStr(proto),AVStr(host),iportp,&gtype)) == 4
 1272      || (ni = Xsscanf(np,"%[^:]:%[^=]=%c",   AVStr(proto),AVStr(host),&gtype)) == 3 )
 1273     {
 1274         if( ni == 3 )
 1275             *iportp = serviceport(proto);
 1276         if( hp = strpbrk(np+strlen(host),endofHOSTPORT) )
 1277             strcpy(durl,hp);
 1278         put_gtype(AVStr(url),gtype,0);
 1279         return 1;
 1280     }
 1281 
 1282     /*
 1283      *  Genric     =@=proto:H:P
 1284      */
 1285     ni = Xsscanf(np,"%[^:]:%[^:/? \t\r\n]:%d",AVStr(proto),AVStr(host),iportp);
 1286     if( 2 <= ni ){
 1287         if( ni == 2 )
 1288             *iportp = serviceport(proto);
 1289         if( hp = strpbrk(np+strlen(host),endofHOSTPORT) )
 1290             strcpy(durl,hp);
 1291         return 1;
 1292     }
 1293 
 1294     /*
 1295      *  HTTP-Special /=@=:H:P
 1296      */
 1297     ni = Xsscanf(np,":%[^:/? \t\r\n]:%d",AVStr(host),iportp);
 1298     if( 1 <= ni ){
 1299         if( ni == 1 )
 1300             *iportp = serviceport("http");
 1301         if( hp = strpbrk(np+strlen(host),endofHOSTPORT) ){
 1302             if( durl[-1] == '/' && hp[0] == '/' )
 1303                 strcpy(durl,hp+1);
 1304             else    strcpy(durl,hp);
 1305         }
 1306         return 1;
 1307     }
 1308 
 1309     setVStrEnd(host,0);
 1310     setVStrEnd(proto,0);
 1311     return 0;
 1312 }
 1313 int CTX_url_derefer(Connection*ctx,PCStr(cproto),PVStr(url),PVStr(modifiers),PVStr(flags),PVStr(proto),PVStr(host),int *iportp)
 1314 {   refQStr(durl,url); /**/
 1315     int rcode;
 1316 
 1317     if( durl = strstr(url,NDGU_MARK) ){
 1318         rcode = CTX_url_dereferN(ctx,cproto,AVStr(url),AVStr(modifiers),AVStr(flags),AVStr(proto),AVStr(host),iportp,
 1319                 AVStr(durl),strlen(NDGU_MARK));
 1320         if( rcode != -1 )
 1321             return rcode;
 1322     }
 1323 
 1324     if( ENABLE_ODGU )
 1325     if( durl = strstr(url,ODGU_MARK) ){
 1326         rcode = CTX_url_dereferO(ctx,cproto,AVStr(url),AVStr(modifiers),AVStr(flags),AVStr(proto),AVStr(host),iportp,
 1327                 AVStr(durl),strlen(ODGU_MARK));
 1328         if( rcode != -1 )
 1329             return rcode;
 1330     }
 1331     return 0;
 1332 }
 1333 
 1334 /*
 1335  *  site = user:pass@host:port
 1336  *  site = [ [ user [ : pass ] @ ] hostport ]
 1337  *  unreserved = A-Z a-z 0-9 $-_.!~*'(), 
 1338  *  user = *( unreserved | escaped | ;&=+ )
 1339  *  pass = *( unreserved | escaped | ;&=+ )
 1340  */
 1341 char *scan_URI_scheme(PCStr(url),PVStr(scheme),int size)
 1342 {   const char *up;
 1343     unsigned char uc;
 1344     int sx;
 1345 
 1346     sx = 0;
 1347     for( up = url; uc = *up; up++ ){
 1348         if( size <= sx + 1 )
 1349             break;
 1350         if( uc == ':' || isspace(uc) )
 1351             break;
 1352         else    setVStrElemInc(scheme,sx,uc); /**/
 1353     }
 1354     setVStrEnd(scheme,sx); /**/
 1355     return (char*)url + strlen(scheme);
 1356 }
 1357 char *scan_URI_site(PCStr(url),PVStr(site),int size)
 1358 {   CStr(buff,512);
 1359     int len;
 1360 
 1361     if( size == 0 )
 1362         size = 248; /* 7 bytes for :port-# ... */
 1363     len = sizeof(buff);
 1364     if( size < len )
 1365         len = size;
 1366     QStrncpy(buff,url,len);
 1367     setVStrEnd(site,0);
 1368     Xsscanf(buff,"%[-.A-Za-z0-9:@%%$_!~*'(),;&=+#]",AVStr(site));
 1369     url += strlen(site);
 1370     return (char*)url;
 1371 }
 1372 void decomp_URL_site(PCStr(site),PVStr(userpasshost),PVStr(port))
 1373 {   const char *up;
 1374     const char *pp;
 1375 
 1376     setVStrEnd(userpasshost,0);
 1377     setVStrEnd(port,0);
 1378     if( up = strrchr(site,'@') ){
 1379         if( pp = strchr(up,':') ){
 1380             truncVStr(pp); pp++;
 1381             strcpy(port,pp);
 1382         }
 1383         strcpy(userpasshost,site);
 1384     }else{
 1385         Xsscanf(site,"%[^:]:%s",AVStr(userpasshost),AVStr(port));
 1386     }
 1387 }
 1388 void decomp_URL_siteX(PCStr(site),PVStr(userpass),PVStr(user),PVStr(pass),PVStr(hostport),PVStr(host),PVStr(port))
 1389 {   const char *userp;
 1390     const char *passp;
 1391     const char *portp;
 1392 
 1393     strcpy(hostport,site);
 1394     if( userp = strrchr(hostport,'@') ){
 1395         truncVStr(userp); userp++;
 1396         strcpy(userpass,hostport);
 1397         if( passp = strchr(hostport,':') ){
 1398             truncVStr(passp); passp++;
 1399         }else   passp = "";
 1400         nonxalpha_unescape(hostport,AVStr(user),1);
 1401         nonxalpha_unescape(passp,AVStr(pass),1);
 1402         ovstrcpy((char*)hostport,userp);
 1403     }else{
 1404         setVStrEnd(pass,0);
 1405         setVStrEnd(user,0);
 1406         setVStrEnd(userpass,0);
 1407     }
 1408 
 1409     strcpy(host,hostport);
 1410     if( portp = strchr(host,':') ){
 1411         truncVStr(portp); portp++;
 1412         strcpy(port,portp);
 1413     }else{
 1414         setVStrEnd(port,0);
 1415     }
 1416 
 1417 Verbose("S[%s] = UP[%s]U[%s]P[%s] + HP[%s]H[%s]P[%s]\n",
 1418 site, userpass,user,pass, hostport,host,port);
 1419 }
 1420 
 1421 const char *scan_userpassX(PCStr(userpass),AuthInfo *ident);
 1422 const char *scan_url_userpass(PCStr(server),PVStr(user),PVStr(pass),PCStr(dfltuser))
 1423 {   CStr(ub,128);
 1424     CStr(wb,128);
 1425     const char *sp;
 1426     AuthInfo ident;
 1427 
 1428     sp = scan_userpassX(server,&ident);
 1429     wordScan(ident.i_user,ub);
 1430     textScan(ident.i_pass,wb);
 1431     if( *sp != '@' ){
 1432         strcpy(user,dfltuser);
 1433         setVStrEnd(pass,0);
 1434         return server;
 1435     }
 1436     nonxalpha_unescape(ub,AVStr(user),1);
 1437     nonxalpha_unescape(wb,AVStr(pass),1);
 1438     return sp + 1;
 1439 }
 1440 int scan_protositeport(PCStr(url),PVStr(proto),PVStr(userpasshost),PVStr(port))
 1441 {   const char *sp;
 1442     char ch;
 1443     CStr(site,MaxHostNameLen);
 1444 
 1445     sp = url;
 1446     if( *sp != '/' )
 1447         sp = scan_URI_scheme(sp,AVStr(proto),64);
 1448     else    setVStrEnd(proto,0);
 1449     if( *sp == ':' )
 1450         sp++;
 1451 
 1452     if( strncmp(sp,"//",2) == 0 )
 1453         sp += 2;
 1454     else
 1455     if( *sp == '/' )
 1456         sp += 1; /* for IE4.0 */
 1457     else    return 0;
 1458 
 1459     scan_URI_site(sp,AVStr(site),sizeof(site));
 1460     decomp_URL_site(site,AVStr(userpasshost),AVStr(port));
 1461 
 1462     if( *port == 0 )
 1463         return 2;
 1464     else    return 3;
 1465 }
 1466 int url_serviceport(PCStr(url))
 1467 {   CStr(proto,32);
 1468 
 1469     scan_URI_scheme(url,AVStr(proto),sizeof(proto));
 1470     return serviceport(proto);
 1471 }
 1472 
 1473 #define SITEC(c) ((c & 0x80) == 0 && 0x20 < c && c != '/' && c != '?')
 1474 #define PATHC(c) (c != '\r' && c != '\n')
 1475 
 1476 int decomp_absurlX(PCStr(url),PVStr(proto),PVStr(login),PVStr(upath),int ulen,const char **urlpathpp);
 1477 int decomp_absurl(PCStr(url),PVStr(proto),PVStr(login),PVStr(upath),int ulen)
 1478 {
 1479     return decomp_absurlX(url,BVStr(proto),BVStr(login),BVStr(upath),ulen,0);
 1480 }
 1481 int decomp_absurlX(PCStr(url),PVStr(proto),PVStr(login),PVStr(upath),int ulen,const char **urlpathpp)
 1482 {   const char *up = url;
 1483     const char *ux;
 1484     CStr(buf,MaxHostNameLen);
 1485     char *bp; /**/
 1486     const char *bx;
 1487     unsigned char uc;
 1488 
 1489     if( urlpathpp ) *urlpathpp = 0;
 1490     
 1491     if( proto ) setVStrEnd(proto,0);
 1492     if( login ) setVStrEnd(login,0);
 1493     if( upath ) setVStrEnd(upath,0);
 1494 
 1495     bp = (char*)buf;
 1496     bx = bp + 32 - 1;
 1497     while( bp < bx && (uc = *up) && uc != ':' ){ *bp++ = *up++; } *bp = 0;
 1498     if( proto ) strcpy(proto,buf);
 1499     if( *up++ != ':' ) return 0;
 1500     if( *up++ != '/' ) return 1;
 1501     if( *up++ != '/' ) return 1;
 1502 
 1503     bp = (char*)buf;
 1504     ux = up + sizeof(buf) - 1;
 1505     while( up < ux && (uc = *up) && SITEC(uc) ){ *bp++ = *up++; } *bp = 0;
 1506     if( login ) strcpy(login,buf);
 1507     if( urlpathpp ) *urlpathpp = (char*)up;
 1508     if( *up == '?' ) ; else
 1509     if( *up++ != '/' ) return 2;
 1510 
 1511     if( upath == 0 )
 1512         return 3;
 1513     bp = (char*)upath;
 1514     ux = url + (ulen - 1);
 1515     while( up < ux && (uc = *up) && PATHC(uc) ){ *bp++ = *up++; } *bp = 0;
 1516 
 1517     return 3;
 1518 }
 1519 
 1520 int strip_urlhead(PVStr(url),PVStr(proto),PVStr(login))
 1521 {   char rc;
 1522     int ni;
 1523     const char *urlpathp = 0;
 1524 
 1525     ni = decomp_absurlX(url,AVStr(proto),AVStr(login),VStrNULL,0,&urlpathp);
 1526     if( 2 <= ni ){
 1527         if( *urlpathp == '/' )
 1528             ovstrcpy((char*)url,urlpathp);
 1529         else    sprintf(url,"/%s",urlpathp);
 1530     }
 1531     return ni;
 1532 }
 1533 
 1534 const char *scan_userpassX(PCStr(userpass),AuthInfo *ident)
 1535 {   const char *hp;
 1536     const char *xp;
 1537     const char *pp;
 1538     const char *np;
 1539 
 1540     bzero(ident,sizeof(AuthInfo));
 1541     lineScan(userpass,ident->i_user);
 1542     ident->i_pass[0] = 0;
 1543 
 1544     if( xp = strpbrk(ident->i_user,"/?\r\n") )
 1545         truncVStr(xp);
 1546     if( hp = strrchr(ident->i_user,'@') )
 1547         truncVStr(hp);
 1548     if( pp = strchr(ident->i_user,':') ){
 1549         truncVStr(pp);
 1550         wordscanY(pp+1,MVStrSiz(ident->i_pass),"^\r\n");
 1551     }
 1552     if( hp )
 1553         np = &userpass[hp-ident->i_user];
 1554     else    np = &userpass[strlen(userpass)];
 1555     return np;
 1556 }
 1557 #define EOHN    "^:/? \t\r\n\f\""
 1558 int decomp_siteX(PCStr(proto),PCStr(site),AuthInfo *ident)
 1559 {   const char *xp;
 1560     const char *pp;
 1561 
 1562     xp = scan_userpassX(site,ident);
 1563     if( *xp == '@' ){
 1564         site = xp + 1;
 1565     }else{
 1566         ident->i_user[0] = 0;
 1567         ident->i_pass[0] = 0;
 1568     }
 1569 
 1570     pp = wordscanY(site,MVStrSiz(ident->i_Host),EOHN);
 1571     if( *pp == ':' )
 1572         pp++;
 1573     else    pp = "";
 1574     if( pp[0] )
 1575         return ident->i_Port = atoi(pp);
 1576     else    return ident->i_Port = serviceport(proto);
 1577 }
 1578 void site_strippass(PVStr(site))
 1579 {   const char *xp;
 1580     AuthInfo ident;
 1581 
 1582     xp = scan_userpassX(site,&ident);
 1583     if( *xp == '@' ){
 1584         sprintf(site,"%s%s",ident.i_user,xp);
 1585     }
 1586 }
 1587 void url_strippass(PVStr(url))
 1588 {   refQStr(sp,url); /**/
 1589 
 1590     if( sp = strstr(url,"://") )
 1591         site_strippass(QVStr(sp+3,url));
 1592 }
 1593 int scan_hostportX(PCStr(proto),PCStr(hostport),PVStr(host),int hsiz)
 1594 {   int port;
 1595     const char *pp;
 1596 
 1597     port = 0;
 1598     pp = wordscanY(hostport,AVStr(host),hsiz,EOHN);
 1599     if( *pp == ':' )
 1600         port = atoi(pp+1);
 1601     if( port == 0 )
 1602         port = serviceport(proto);
 1603     return port;
 1604 }
 1605 int scan_hostport1X(PCStr(hostport),PVStr(host),int hsiz)
 1606 {   const char *sp;
 1607     char ch;
 1608     refQStr(dp,host); /**/
 1609     const char *xp = &host[hsiz-1];
 1610     const char *pp;
 1611     int port;
 1612 
 1613     port = 0;
 1614     pp = 0;
 1615 
 1616     for( sp = hostport; ch = *sp; sp++ ){
 1617         if( xp <= dp )
 1618             break;
 1619         else
 1620         switch( ch ){
 1621             case '/': case '?':
 1622             case ' ': case '\t': case '\r': case '\n': case '\f':
 1623             goto EXIT;
 1624 
 1625             case ':':
 1626             /* might be one in "user:pass@host" */
 1627             port = atoi(sp+1);
 1628             pp = dp;
 1629             break;
 1630 
 1631             case '@':
 1632             cpyQStr(dp,host);
 1633             port = 0;
 1634             pp = 0;
 1635             break;
 1636 
 1637             default:
 1638             if( (ch & 0x80) || ch <= 0x20 )
 1639                 goto EXIT;
 1640             if( dp != sp )
 1641                 setVStrPtrInc(dp,ch);
 1642             break;
 1643         }
 1644     }
 1645 EXIT:
 1646     if( pp ) truncVStr(pp);
 1647     if( *dp != 0 )
 1648         setVStrEnd(dp,0);
 1649     return port;
 1650 }
 1651 int scan_hostport1pX(PCStr(proto),PCStr(login),PVStr(host),int hsiz)
 1652 {   int port;
 1653 
 1654     port = scan_hostport1X(login,AVStr(host),hsiz);
 1655     if( port == 0 )
 1656         port = serviceport(proto);
 1657     return port;
 1658 }
 1659 int scan_hostport0(PCStr(hostport),PVStr(host))
 1660 {   const char *sp;
 1661     char ch;
 1662     refQStr(dp,host); /**/
 1663     int port;
 1664 
 1665     port = 0;
 1666     for( sp = hostport; ch = *sp; sp++ ){
 1667         assertVStr(host,dp+1);
 1668         if( ch == ':' ){
 1669             port = atoi(sp+1);
 1670             break;
 1671         }
 1672         if( strchr("/ \t\r\n",ch) )
 1673             break;
 1674         setVStrPtrInc(dp,ch);
 1675     }
 1676     setVStrEnd(dp,0);
 1677     return port;
 1678 }
 1679 int scan_hostport(PCStr(proto),PCStr(hostport),PVStr(host))
 1680 {   int iport;
 1681 
 1682     iport = scan_hostport0(hostport,AVStr(host));
 1683     if( iport == 0 ){
 1684         iport = serviceport(proto);
 1685         /*
 1686         if( iport == 0 )
 1687         syslog_ERROR("## standard port for `%s' is unknown\n",proto);
 1688         */
 1689     }
 1690     return iport;
 1691 }
 1692 
 1693 /*
 1694  *  EXPAND PARTIAL HTTP-URL TO FULL SPEC URL:
 1695  *  Absolute path in URL which have no http://H:P should be expanded to
 1696  *  full description of URL, that is with http://HOST:PORT.
 1697  *  Relative path will be expanded with http:H:P in the HTTP clients.
 1698  */
 1699 char *HostPort(PVStr(hostport),PCStr(proto),PCStr(host),int port)
 1700 {
 1701     if( serviceport(proto) != port )
 1702         sprintf(hostport,"%s:%d",host,port);
 1703     else    strcpy(hostport,host);
 1704     return (char*)hostport;
 1705 }
 1706 
 1707 #define isSchemeChar(ch)    (isalnum(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.')
 1708 
 1709 int isFullURL(PCStr(url))
 1710 {   const char *up;
 1711     char ch;
 1712     CStr(proto,128);
 1713     refQStr(pp,proto); /**/
 1714 
 1715     if( !isSchemeChar(url[0]) )
 1716         return 0;
 1717 
 1718     for( up = url; isSchemeChar(ch = *up); up++ )
 1719     {
 1720         if( 32 <= pp - proto )
 1721             break;
 1722         setVStrPtrInc(pp,ch);
 1723     }
 1724     setVStrEnd(pp,0);
 1725 
 1726     if( up[0] == ':' ){
 1727         if( up[1] == '/' && up[2] == '/' )
 1728             return 1;
 1729         if( localPathProto(proto) && isFullpath(&up[1]) )
 1730             return 1;
 1731         if( streq(proto,"builtin") )
 1732             return 1;
 1733         if( streq(proto,"news") )
 1734             return 1;
 1735         if( streq(proto,"mailto") )
 1736             return 1;
 1737         if( streq(proto,"data") )
 1738             return 1;
 1739     }
 1740     return 0;
 1741 }
 1742 
 1743 const char *CTX_changeproxy_url(Connection*ctx,PCStr(clif),PCStr(method),PVStr(url),PVStr(proxy))
 1744 {   const char *opt;
 1745     const char *mark;
 1746 
 1747     if( opt = CTX_changeproxy_url_to(ctx,clif,method,AVStr(url),AVStr(proxy)) )
 1748         return opt;
 1749 
 1750     if( url[0] == '/' )
 1751     if( (mark = NDGU_MARK) && strncmp(url+1,mark,strlen(mark)) == 0
 1752      || (mark = ODGU_MARK) && strncmp(url+1,mark,strlen(mark)) == 0 )
 1753         return CTX_changeproxy_url_to(ctx,clif,method,QVStr(url+1+strlen(mark),url),AVStr(proxy));
 1754 
 1755     return NULL;
 1756 }
 1757 
 1758 int url_upathbaselen(PCStr(base),int blen)
 1759 {   const char *sp;
 1760     const char *xp;
 1761     const char *tp;
 1762     char tc;
 1763     int nblen;
 1764 
 1765     sp = 0;
 1766     xp = base + blen;
 1767     for( tp = base; tp < xp; tp++ ){
 1768         tc = *tp;
 1769         if( tc == '?' )
 1770             break;
 1771         if( tc == '/' )
 1772             sp = tp;
 1773     }
 1774     if( sp == 0 )
 1775         nblen = 0;
 1776     else    nblen = (sp+1) - base;
 1777     if( nblen != blen ) 
 1778         sv1vlog("URL BASE = %d/%d [%s]\n",nblen,blen,base);
 1779     return nblen;
 1780 }
 1781 int scan_url1(PCStr(url),PVStr(values));
 1782 static void setBASE(Referer *referer,PCStr(url))
 1783 {   CStr(values,URLSZ);
 1784     const char *av[64]; /**/
 1785     const char *v1;
 1786     refQStr(ap,referer->r_altbuf); /**/
 1787     const char *dp;
 1788     int len;
 1789     CStr(burl,URLSZ);
 1790 
 1791     if( referer->r_altbuf == NULL )
 1792         return;
 1793 
 1794     if( (len = scan_url1(url,AVStr(values))) <= 0 )
 1795         return;
 1796 
 1797     strncpy(burl,url,len); setVStrEnd(burl,len);
 1798     sv1log("<BASE HREF=%s>\n",burl);
 1799 
 1800     stoV(values,64,av,'\n');
 1801     if( v1 = getv(av,"proto")){
 1802         referer->r_sv.u_proto = ap;
 1803         strcpy(ap,v1);
 1804         ap += strlen(ap) + 1;
 1805     }
 1806     if( v1 = getv(av,"host") ){
 1807         referer->r_sv.u_host = ap;
 1808         strcpy(ap,v1);
 1809         ap += strlen(ap) + 1;
 1810     }
 1811     if( v1 = getv(av,"port") )
 1812         referer->r_sv.u_port = atoi(v1);
 1813 
 1814     if( v1 = getv(av,"path") ){
 1815         referer->r_sv.u_path = ap;
 1816         strcpy(ap,v1);
 1817         ap += strlen(ap) + 1;
 1818 
 1819         {
 1820         int blen;
 1821         referer->r_sv.u_base = ap;
 1822         blen = url_upathbaselen(v1,strlen(v1));
 1823         strncpy(ap,v1,blen);
 1824         XsetVStrEnd(AVStr(ap),blen);
 1825         ap += blen + 2;
 1826         }
 1827     }
 1828 }
 1829 static void getBASE(Referer *referer,const char **myhp,const char **proto,const char **hostport,const char **host,int *port,const char **base)
 1830 {
 1831     if( referer->r_qvbase.u_proto ){ /* v9.9.11 new-140810f, VBASE */
 1832         *myhp = referer->r_qvbase.u_hostport;
 1833         *proto = referer->r_qvbase.u_proto;
 1834         *hostport = referer->r_qvbase.u_hostport;
 1835         *host = referer->r_qvbase.u_host;
 1836         *port = referer->r_qvbase.u_port;
 1837         *base = referer->r_qvbase.u_base;
 1838         return;
 1839     }else
 1840     if( referer->r_vb.u_proto )
 1841         *myhp = referer->r_vb.u_hostport;
 1842     else    *myhp = referer->r_my.u_hostport;
 1843 
 1844     *proto = referer->r_sv.u_proto;
 1845     *hostport = referer->r_sv.u_hostport;
 1846     *host = referer->r_sv.u_host;
 1847     *port = referer->r_sv.u_port;
 1848     *base = referer->r_sv.u_base;
 1849 }
 1850 
 1851 void url_relative(PVStr(relurl),PCStr(absurl),PCStr(baseurl))
 1852 {   int ui,nsl;
 1853 
 1854     nsl = 0;
 1855     for( ui = 0; absurl[ui] && baseurl[ui]; ui++ ){
 1856         if( absurl[ui] != baseurl[ui] )
 1857             break;
 1858         if( absurl[ui] == '/' )
 1859             nsl++;
 1860         if( nsl == 3 )
 1861             break;
 1862     }
 1863     if( nsl == 3 ){
 1864         setVStrEnd(relurl,0);
 1865         return;
 1866     }
 1867     strcpy(relurl,absurl);
 1868 }
 1869 
 1870 #define UREND(ch)   (ch=='"' || ch=='>' || isspace(ch) || ch=='\0')
 1871 #define CURDIR(u)   (u[0]=='.' && (u[1]=='/' || UREND(u[1])) ? &u[1] : 0)
 1872 #define UPDIR(u)    (u[0]=='.' ? CURDIR((&u[1])) : 0)
 1873 
 1874 int urlpath_normalize(PCStr(url),PVStr(rurl))
 1875 {   const char *up;
 1876     const char *np;
 1877     refQStr(xp,rurl); /**/
 1878     char uc;
 1879     int norm;
 1880 
 1881     up = url;
 1882     norm = 0;
 1883 
 1884     while( uc = *up ){
 1885         assertVStr(rurl,xp+1);
 1886         /*
 1887          * up points to the top of a URL element
 1888          */
 1889         if( uc == '/' ){
 1890             if( xp != up )
 1891                 setVStrPtrInc(xp,uc);
 1892             else    xp++;
 1893             uc = *++up;
 1894         }
 1895         if( uc == '?' || UREND(uc) ){
 1896             if( xp != up ){
 1897                 strcpy(xp,up);
 1898                 xp += strlen(xp);
 1899             }
 1900             break;
 1901         }
 1902         if( np = CURDIR(up) ){
 1903             norm++;
 1904             if( *np == '/' )
 1905                 np++;
 1906             up = np;
 1907             continue;
 1908         }
 1909         if( np = UPDIR(up) ){
 1910             norm++;
 1911             if( *np == '/' )
 1912                 np++;
 1913             up = np;
 1914             if( rurl < xp )
 1915                 xp--;
 1916             while( rurl < xp ){
 1917                 if( *--xp == '/' ) 
 1918                     break;
 1919             } 
 1920             if( *xp == '/')
 1921                 xp++;
 1922             setVStrEnd(xp,0);
 1923             continue;
 1924         }
 1925 
 1926         /*
 1927          * skip to the top of the next URL element
 1928          */
 1929         while( uc = *up ){
 1930             if( uc == '/' || uc == '?' || UREND(uc) )
 1931                 break;
 1932             if( xp != up )
 1933                 setVStrPtrInc(xp,*up++);
 1934             else{
 1935                 xp++;
 1936                 up++;
 1937             }
 1938         }
 1939     }
 1940     if( xp != up )
 1941         XsetVStrEnd(AVStr(xp),0);
 1942 
 1943     return xp != up;
 1944 }
 1945 
 1946 /*
 1947  * care an abnormal pointer to outer space of the server ...
 1948  * care only "../" at the top of URL to make the normalization be light weight
 1949  */
 1950 int url_normalize(PCStr(base),PCStr(url),int *blen)
 1951 {   const char *up;
 1952     const char *bp;
 1953     const char *xp;
 1954     int nup,abu;
 1955 
 1956     /*
 1957      * "./" should not be stripped ?
 1958      * when the "base" is not root of the site...
 1959      * if( !UPDIR(url) ){
 1960      */
 1961     if( !CURDIR(url) && !UPDIR(url) ){
 1962         *blen = strlen(base);
 1963         return 0;
 1964     }
 1965 
 1966     bp = base + strlen(base);
 1967     up = url;
 1968     nup = 0;
 1969     abu = 0;
 1970     if( xp = CURDIR(up) ){
 1971         if( *xp != '/' )
 1972             up = xp;
 1973         else    up = xp + 1;
 1974     }
 1975     while( xp = UPDIR(up) ){
 1976         nup++;
 1977         if( bp == base )
 1978             abu = 1;
 1979         else{
 1980             while( base < bp )
 1981                 if( *--bp == '/' )
 1982                     break;
 1983         }
 1984         if( *xp != '/' ){
 1985             up = xp;
 1986             break;
 1987         }else{
 1988             up = xp + 1;
 1989         }
 1990     }
 1991     if( nup ){
 1992         if( bp == base )
 1993             abu = 1;
 1994         else{
 1995             while( base < bp )
 1996                 if( *--bp == '/' )
 1997                     break;
 1998         }
 1999     }
 2000     if( abu && LOG_VERBOSE ){
 2001         CStr(ub,32);
 2002         QStrncpy(ub,url,16);
 2003         Verbose("ABNORMAL-URL: base<%s> url<%s>\n",base,ub);
 2004     }
 2005     *blen = bp - base;
 2006     return up - url;
 2007 }
 2008 
 2009 int java_conv(PCStr(line),PVStr(xline),int uconvs)
 2010 {   int uconv,nconv;
 2011     const char *sp = line;
 2012     const char *np;
 2013     refQStr(xp,xline); /**/
 2014     const char *tagp;
 2015     int len;
 2016     const char *tag;
 2017     CStr(tagb,32);
 2018     const char *tp;
 2019     Referer *referer = 0;
 2020 
 2021     for( nconv = 0; ; nconv++ ){
 2022         uconv = uconvs;
 2023         np = html_nextTagAttr(sp,"",VStrNULL,&tagp,NULL,&uconv);
 2024         if( np == NULL )
 2025             break;
 2026 
 2027         if( (uconv & (TAGCONV_KILL|TAGCONV_JAVA)) == 0 )
 2028             break;
 2029 
 2030         tagb[0] = 0;
 2031         if( tagp != NULL ){
 2032             tp = tagp;
 2033             if( *tp == '<' ){
 2034                 tp++;
 2035                 if( *tp == '/' )
 2036                     tp++;
 2037                 wordScanY(tp,tagb,"^ \t\r\n>");
 2038                 tag = tagb;
 2039             }
 2040         }
 2041 
 2042         if( tagp != NULL ){
 2043             if( (uconv & TAGCONV_KILL) && tagb[0] ){
 2044             }else
 2045             if( strncasecmp(tagp,"</APPLET",8) == 0 ) tag = "APPLET"; else
 2046             if( strncasecmp(tagp,"<APPLET", 7) == 0 ) tag = "APPLET"; else
 2047             if( strncasecmp(tagp,"</OBJECT",8) == 0 ) tag = "OBJECT"; else
 2048             if( strncasecmp(tagp,"<OBJECT", 7) == 0 ) tag = "OBJECT"; else
 2049             if( strncasecmp(tagp,"</EMBED", 7) == 0 ) tag = "EMBED";  else
 2050             if( strncasecmp(tagp,"<EMBED",  6) == 0 ) tag = "EMBED";  else
 2051             {
 2052                 if( TAGTRACE )
 2053                 sv1log("## TAG NOMATCH %s\n",tagp);
 2054                 tagp = NULL;
 2055             }
 2056         }
 2057 
 2058         if( tagp == NULL ){
 2059             len = np - sp;
 2060             Bcopy(sp,xp,len);
 2061             xp += len;
 2062             sp = np;
 2063             continue;
 2064         }
 2065 
 2066         sv1log("## TAG %s -> killed-%s\n",tag,tag);
 2067         len = tagp+1 - sp;
 2068         Bcopy(sp,xp,len);
 2069         xp += len;
 2070         sp = tagp+1;
 2071         XsetVStrEnd(AVStr(xp),0);
 2072 
 2073         if( *sp == '/' ){
 2074             sp += 1;
 2075             setVStrPtrInc(xp,'/');
 2076         }
 2077         sp += strlen(tag);
 2078         sprintf(xp,"killed-%s",tag);
 2079         xp += strlen(xp);
 2080     }
 2081     strcpy(xp,sp);
 2082     return nconv;
 2083 }
 2084 
 2085 int url_unify_ports = 0;
 2086 #define PORT_MARK   "-.-P"
 2087 void url_delport(PVStr(url),int *portp)
 2088 {   const char *dp; /* not "const" but fixed */
 2089     CStr(port,32);
 2090 
 2091     if( dp = strstr(url,PORT_MARK) ){
 2092         wordScanY(dp+4,port,"0123456789");
 2093         *portp = atoi(port);
 2094         ovstrcpy((char*)dp,dp+4+strlen(port));
 2095     }
 2096 }
 2097 #define EOURL   "^ \t\r\n\"'>"
 2098 int url_movport(PCStr(url),PVStr(vurl),int siz)
 2099 {   refQStr(dp,vurl); /**/
 2100     CStr(proto,64);
 2101     CStr(port,32);
 2102     CStr(xport,32);
 2103     int ilen = 0;
 2104 
 2105     if( !url_unify_ports )
 2106         return 0;
 2107 
 2108     wordscanY(url,AVStr(vurl),siz,EOURL);
 2109     if( dp = strstr(vurl,"://") )
 2110     if( dp = strpbrk(dp+3,":/? \t\r\n\"'") )
 2111     if( *dp == ':' ){
 2112         ilen = strlen(vurl);
 2113         wordScanY(dp+1,port,"0123456789");
 2114         if( port[0] ){
 2115             sprintf(xport,"%s%s",PORT_MARK,port);
 2116             ovstrcpy((char*)dp,dp+1+strlen(port));
 2117             if( dp = strpbrk(vurl,"?#") )
 2118                 ;
 2119             else    dp = (char*)vurl + strlen(vurl);
 2120             Strins(AVStr(dp),xport);
 2121         }
 2122     }
 2123     if( ilen == 0 )
 2124         setVStrEnd(vurl,0);
 2125     return ilen;
 2126 }
 2127 
 2128 #define SkipQuoted(where,referer,np,sp,xp) { \
 2129     int qch; \
 2130     int ch; \
 2131     const char *qp; \
 2132     if( qch = referer->r_tagctx.r_curquote ){ \
 2133         for( qp = np; (ch = *qp); qp++ ) \
 2134             if( ch == qch ) \
 2135                 break; \
 2136         if( ch == qch && sp <= qp ){ \
 2137             while( ch = *sp ){ \
 2138                 setVStrPtrInc(xp,*sp++); \
 2139                 if( ch == qch ) \
 2140                     break; \
 2141             } \
 2142         } \
 2143     } \
 2144 }
 2145 
 2146 void url_absoluteS(Referer *referer,PCStr(line),PVStr(xline),PVStr(rem))
 2147 {   const char *myhp;
 2148     const char *proto;
 2149     const char *host;
 2150     int   port;
 2151     const char *base;
 2152     const char *hp;
 2153     CStr(hostportb,MaxHostNameLen);
 2154     const char *sp = line;
 2155     const char *np;
 2156     refQStr(xp,xline); /**/
 2157     int ch;
 2158     const char *tagp;
 2159     int uconv;
 2160 
 2161     getBASE(referer,&myhp,&proto,&hp,&host,&port,&base);
 2162 
 2163     for(;;){
 2164         uconv = URICONV_ANY;
 2165         np = (char*)html_nextTagAttr(sp,"",AVStr(rem),&tagp,NULL,&uconv);
 2166         if( np == NULL )
 2167             break;
 2168 
 2169         /* white spaces in "CDATA" ...
 2170          * http://www.w3.org/TR/html401/types.html#type-cdata
 2171          */
 2172         if( referer->r_tagctx.r_curquote ){
 2173             const char *dp = np;
 2174             while( isspace(*dp) )
 2175                 dp++;
 2176             if( np < dp ){
 2177                 ovstrcpy((char*)np,dp);
 2178             }
 2179             /* space in URL should be cared if it affect MOUNT... */
 2180         }
 2181 
 2182         if( referer->r_altbuf != NULL && tagp != NULL && isBASE(tagp) ){
 2183             setBASE(referer,np);
 2184             getBASE(referer,&myhp,&proto,&hp,&host,&port,&base);
 2185         }
 2186 
 2187         ch = np[0];
 2188         ((char*)np)[0] = 0; /**/
 2189         strcpy(xp,sp);
 2190         xp += strlen(xp);
 2191         ((char*)np)[0] = ch; /**/
 2192         sp = np;
 2193 
 2194         if( strncasecmp(np,"nntp://-.-/",11) == 0 ){
 2195             sp += 11;
 2196             sprintf(xp,"nntp://%s/",myhp);
 2197         }else
 2198         if( strncasecmp(np,"http://-.-/",11) == 0 ){
 2199             sp += 11;
 2200             sprintf(xp,"http://%s/",myhp);
 2201         }else
 2202         if( isFullURL(np) ){
 2203         }else
 2204         if( strncasecmp(np,"http:/",6) == 0 ){
 2205             if( np[6] != '/' ){
 2206             sp += 6;
 2207             HostPort(AVStr(hostportb),"http",host,port);
 2208             sprintf(xp,"http://%s/",hostportb);
 2209             }
 2210         }
 2211         else
 2212         if( ch != '/' && streq(proto,"ftp") )
 2213         {
 2214             /* Relay ftp to the proxy server for non-proxy client,
 2215              * who see current protocol as HTTP, thus will not
 2216              * make automatic expansion of relative URL of ftp type.
 2217              */
 2218             if( ch == '.' && np[1] == '/' )
 2219                 sp += 2;
 2220             strcpy(xp,base);
 2221         }
 2222         else
 2223         if( ch == '/' && np[1] != '/' ){
 2224             /* Absolute path without host:port. This will be cause
 2225              * ignoreing =@=:realhost:realport part in the current
 2226              * page's URL
 2227              */
 2228             sp += 1;
 2229             sprintf(xp,"%s://%s/",proto,hp);
 2230         }
 2231         else
 2232         if( ch == '/' && np[1] == '/' ){ /* with host:port */
 2233             sp += 2;
 2234             sprintf(xp,"%s://",proto);
 2235         }
 2236         else
 2237         if( ch == '$' && strncaseeq(np,"${VBASE}",8) ){ /* v9.9.11 new-140809e */
 2238             sp += 8;
 2239             if( referer->r_qvbase.u_proto ){
 2240                 UrlX *up = &referer->r_qvbase;
 2241                 sprintf(xp,"%s://%s/%s",up->u_proto,
 2242                     up->u_hostport,up->u_path);
 2243             }
 2244         }else
 2245         if( ch == '$' && strncaseeq(np,"${SELF}",7) ){ /* v9.9.11 new-140809h */
 2246             sp += 7;
 2247             if( referer->r_requrl.u_path ){
 2248                 UrlX *up = &referer->r_requrl;
 2249                 sprintf(xp,"%s://%s/%s",up->u_proto,
 2250                     up->u_hostport,up->u_path);
 2251             }
 2252         }else
 2253         if( ch == '?' ){ /* 9.9.11 new-140812a */
 2254             if( (uconv & URICONV_FULL) != 0 ){
 2255             if( (referer->r_flags & UMF_QUERY_FULL) != 0 ){
 2256                 UrlX *ux;
 2257                 if( referer->r_qvbase.u_path ){
 2258                     ux = &referer->r_qvbase;
 2259                 }
 2260                 else
 2261                 if( referer->r_requrl.u_path ){
 2262                     ux = &referer->r_requrl;
 2263                 }
 2264                 else{
 2265                     /* should not happen */
 2266                 }
 2267                 sprintf(xp,"%s://%s/%s",ux->u_proto,
 2268                     ux->u_hostport,ux->u_path);
 2269             }
 2270             }
 2271         }else
 2272         if( ch == '#' ){ /* 9.9.11 new-140727m */
 2273             if( (uconv & URICONV_FULL) != 0 ){
 2274             /*
 2275             sprintf(xp,"%s://%s/%s",proto,hp,referer->r_sv.u_path+1);
 2276             */
 2277             sprintf(xp,"%s://%s/%s",proto,hp,referer->r_sv.u_path);
 2278             /* v9.9.12 fix-140814e, setReferer() is fixed to set
 2279              * r_sv.u_path removing leading '/' of "/upath".  This
 2280              * was bad because setBASE() by <BASE HREF=URL> set
 2281              * r_sv.u_path removing leading '/' thus it did not
 2282              * work by the above "+1".  Anyway, u_path should be
 2283              * without leading '/' by the definition. And
 2284              * r_sv.u_path is used nowhere except here currently,
 2285              * so no side effect will be.
 2286              */
 2287             }
 2288         }else
 2289         if( uconv & (URICONV_FULL|URICONV_NORMAL) ){
 2290             int uplen,blen;
 2291 
 2292 /*
 2293             if( *base == '/' ) base++;
 2294 */
 2295             uplen = url_normalize(base,sp,&blen);
 2296 
 2297             if( *np != '#' )
 2298             if( (uconv & URICONV_FULL) || uplen ){
 2299                 sprintf(xp,"%s://%s/",proto,hp);
 2300                 sp += uplen;
 2301                 if( 0 < blen ){
 2302                     xp += strlen(xp);
 2303                     strncpy(xp,base,blen);
 2304                     XsetVStrEnd(AVStr(xp),blen);
 2305                     if( xp[blen-1] != '/' )
 2306                         Xstrcpy(QVStr(&xp[blen],xline),"/");
 2307                     else    XsetVStrEnd(AVStr(xp),blen);
 2308                 }
 2309             }
 2310         }
 2311         if( url_unify_ports ){
 2312             if( *xp ){
 2313                 if( strncasecmp(xp,"http://",7) == 0 ){
 2314                     CStr(nb,512);
 2315                     refQStr(tp,xline); /**/
 2316                     const char *up;
 2317 
 2318                     tp = (char*)xp + strlen(xp);
 2319                     up = wordscanY(sp,AVStr(tp),256,EOURL);
 2320                     if( url_movport(xp,AVStr(nb),sizeof(nb)) ){
 2321                         strcpy(xp,nb);
 2322                         sp = up;
 2323                     }else   setVStrEnd(tp,0);
 2324                 }
 2325             }else{
 2326                 int ilen;
 2327                 if( strncasecmp(np,"http://",7) == 0 ){
 2328                     if( ilen = url_movport(np,AVStr(xp),256) )
 2329                         sp += ilen;
 2330                 }
 2331             }
 2332         }
 2333         xp += strlen(xp);
 2334         SkipQuoted("absolute",referer,np,sp,xp);
 2335     }
 2336     strcpy(xp,sp);
 2337 }
 2338 void url_absolute(PCStr(myhp),PCStr(proto),PCStr(host),int port,PCStr(base),PCStr(line),PVStr(xline),PVStr(rem))
 2339 {   Referer referer;
 2340     CStr(hostport,128);
 2341 
 2342     bzero(&referer,sizeof(Referer));
 2343     referer.r_my.u_hostport = myhp;
 2344     referer.r_sv.u_hostport = HostPort(AVStr(hostport),proto,host,port);
 2345     referer.r_sv.u_proto = proto;
 2346     referer.r_sv.u_host = host;
 2347     referer.r_sv.u_port = port;
 2348     referer.r_sv.u_base = base;
 2349     setQStr(referer.r_altbuf,NULL,0);
 2350     url_absoluteS(&referer,line,AVStr(xline),AVStr(rem));
 2351 }
 2352 
 2353 /*
 2354  *  TRANSFORM URL TO delegated-URL
 2355  *  This function assumes that URLs in the "line" is in FULL-SPEC
 2356  *  format of URL without omittion of protocol-name nor host-port field.
 2357  */
 2358 void CTX_url_delegateS(Connection*ctx,Referer *referer,PCStr(line),PVStr(xline),/*char *dgrelay*/int dgrelay)
 2359 {   const char *sp = line;
 2360     const char *np;
 2361     refQStr(xp,xline); /**/
 2362     CStr(rurl,URLSZ);
 2363     int ulen;
 2364     int ch;
 2365     int uconv;
 2366     int qch;
 2367 
 2368     UrlX *ux;
 2369     const char *myproto;
 2370     const char *myhost;
 2371     const char *mypath;
 2372     int myport;
 2373 
 2374     if( referer->r_qvbase.u_proto ){ /* v9.9.11 new-140810f, VBASE */
 2375         ux = &referer->r_qvbase;
 2376     }else
 2377     if( referer->r_vb.u_proto )
 2378         ux = &referer->r_vb;
 2379     else    ux = &referer->r_my;
 2380     myproto = ux->u_proto;
 2381     myhost = ux->u_host;
 2382     myport = ux->u_port;
 2383     mypath = ux->u_path;
 2384 
 2385 
 2386     for(;;){
 2387         uconv = URICONV_ANY & ~(URICONV_FULL | URICONV_PARTIAL);
 2388             /* should be URICONV_MOUNT ? */
 2389         np = (char*)html_nextTagAttr(sp,"",VStrNULL,NULL,NULL,&uconv);
 2390         if( np == NULL )
 2391             break;
 2392         qch = referer->r_tagctx.r_curquote;
 2393 
 2394         ch = *np;
 2395         *(char*)np = 0;  /**/
 2396 
 2397         strcpy(xp,sp); xp += strlen(xp);
 2398         *(char*)np = ch; /**/
 2399         sp = np;
 2400 
 2401         if( ulen = CTX_url_rurlX(ctx,qch,np,AVStr(rurl),myproto,myhost,myport,mypath,dgrelay) )
 2402         if( strncmp(sp+ulen,ODGU_MARK,strlen(ODGU_MARK)) != 0 )
 2403         if( strncmp(sp+ulen,NDGU_MARK,strlen(NDGU_MARK)) != 0 )
 2404         {
 2405             strcpy(xp,rurl);
 2406             sp += ulen;
 2407             xp += strlen(xp);
 2408         }
 2409         SkipQuoted("deleate",referer,np,sp,xp);
 2410     }
 2411     strcpy(xp,sp);
 2412 }
 2413 void scan_url(PCStr(line),iFUNCP func,void *arg1,void *arg2)
 2414 {   const char *sp;
 2415     const char *np;
 2416     const char *tp;
 2417     char tc;
 2418     int ulen;
 2419     Referer *referer = 0;
 2420 
 2421     sp = line;
 2422     while( np = html_nextTagAttr(sp,"",VStrNULL,NULL,NULL,NULL) ){
 2423         if( tp = strpbrk(np," \t\r\n\">") ){
 2424             ulen = tp - np;
 2425             tc = *tp;
 2426             *(char*)tp = 0;  /**/
 2427             (*func)((char*)np,arg1,arg2);
 2428             *(char*)tp = tc; /**/
 2429             sp = np + ulen;
 2430         }else   break;
 2431     }
 2432 }
 2433 
 2434 
 2435 /*
 2436  *  delegated-URL SYNTHESIZER
 2437  *  Given "attrs" is a NL-separated list of NAME=VALUEs.  This is a
 2438  *  output format of URL parser in the SLL library.
 2439  */
 2440 
 2441 int callback_it(PCStr(proto));
 2442 static char *delegate_url(Connection*ctx,PVStr(url),PCStr(attrs),PCStr(ourl),int olen,/*char *dgrelay*/int dgrelay)
 2443 {   CStr(abuf,URLSZ);
 2444     const char *av[64]; /**/
 2445     int ac;
 2446     refQStr(up,url); /**/
 2447     const char *proto;
 2448     const char *val;
 2449     const char *hostport;
 2450     const char *delegate;
 2451     const char *dproto;
 2452     const char *path;
 2453     CStr(xpath,URLSZ);
 2454     const char *search;
 2455     const char *gselector;
 2456     CStr(oURLbuf,URLSZ);
 2457     const char *modifiers;
 2458 
 2459     strcpy(abuf,attrs);
 2460     ac = stoV(abuf,64,av,'\n');
 2461 
 2462     proto = getv(av,"proto");
 2463     dproto = getv(av,"dproto");
 2464     delegate = getv(av,"delegate");
 2465     if( delegate == 0 )
 2466         return 0;
 2467     hostport = getv(av,"hostport");
 2468     if( hostport == NULL ) hostport = getv(av,"host");
 2469     if( hostport == NULL /* && inScript */ ){
 2470         hostport = "";
 2471     }
 2472     path = getv(av,"path");
 2473     search = getv(av,"search");
 2474 
 2475 if( CTX_mount_url_fromL(ctx,AVStr(url),proto,hostport,path,search,dproto,delegate) )
 2476 return (char*)url + strlen(url);
 2477 
 2478     if( dgrelay == 0 )
 2479         return 0;
 2480 
 2481     if( proto == 0 )
 2482         return 0;
 2483 
 2484     if( callback_it(proto) == 0 )
 2485         return 0;
 2486 
 2487     if( dproto == NULL )
 2488         dproto = "http";
 2489 
 2490     if( hostport == 0 )
 2491         return 0;
 2492 
 2493     if( streq(proto,"news") )
 2494         return 0;
 2495     if( streq(proto,"telnet") )
 2496         return 0;
 2497 
 2498 /*
 2499     if( !isRELAYABLE(dgrelay,proto,hostport) )
 2500         return 0;
 2501 */
 2502     if( !isREACHABLE(proto,hostport) )
 2503         return 0;
 2504 
 2505     if( streq(proto,dproto) )
 2506     if( delegate && hostport && streq(delegate,hostport) )
 2507         return 0; /* no rewriting is necessary */
 2508 
 2509     if( path && nonxalpha_unescape(path,AVStr(xpath),1) )
 2510         path = xpath;
 2511 
 2512     gselector = 0;
 2513 
 2514     strncpy(oURLbuf,ourl,olen); setVStrEnd(oURLbuf,olen);
 2515 
 2516     cpyQStr(up,url); 
 2517     up = Sprintf(AVStr(up),"%s://",dproto);
 2518 
 2519     if( !GOPHER_ON_HTTP && streq(proto,"gopher") ){
 2520         up = Sprintf(AVStr(url),"gopher://");
 2521         gselector = getv(av,"path");
 2522         if( gselector == 0 || *gselector == 0 )
 2523             gselector = "1";
 2524     }
 2525 
 2526     up = Sprintf(AVStr(up),"%s",delegate);
 2527     if( gselector )
 2528         up = Sprintf(AVStr(up),"/%c",*gselector);
 2529     else    up = Sprintf(AVStr(up),"/");
 2530 
 2531     if( strncmp(ourl,url,strlen(url)) == 0 ){
 2532         /* is this right ?  doesn't it suppress necessary one ? */
 2533         /*Verbose("####### DON'T MAKE DUPLICATE REWRITE: %s\n",url);*/
 2534         return 0;
 2535     }
 2536 
 2537 modifiers = CTX_get_modifires((Connection*)ctx);
 2538 /*
 2539 if( modifiers[0] && up[-1] == '/' )
 2540     up = Sprintf(up,"%s",modifiers);
 2541 else
 2542 if( DELEGATE_FLAGS[0] )
 2543 if( up[-1] == '/' )
 2544     up = Sprintf(up,"F%s",DELEGATE_FLAGS);
 2545 else    up = Sprintf(up,"(F%s)",DELEGATE_FLAGS);
 2546 */
 2547 
 2548     up = Sprintf(AVStr(up),"%s",NDGU_MARK);
 2549 
 2550 if( modifiers[0] )
 2551 up = Sprintf(AVStr(up),"/%s/",modifiers);
 2552 
 2553     up = Sprintf(AVStr(up),"%s",oURLbuf);
 2554     return (char*)up;
 2555 }
 2556 void delegate_selector(Connection *Conn,PVStr(xselector),PCStr(host),int iport,int gtype)
 2557 {   CStr(dgopher,1024);
 2558     CStr(tmp,1024);
 2559     refQStr(dp,dgopher); /**/
 2560 
 2561     dp = Sprintf(AVStr(dgopher),NDGU_MARK);
 2562     if( DELEGATE_FLAGS[0] )
 2563         dp = printFlags(Conn,AVStr(dp));
 2564 
 2565     dp = Sprintf(AVStr(dp),"gopher://%s:%d/%c",host,iport,gtype?gtype:'1');
 2566     strcpy(tmp,xselector);
 2567     sprintf(xselector,"%s%s",dgopher,tmp);
 2568 }
 2569 
 2570 char *file_hostpath(PCStr(url),xPVStr(proto),xPVStr(login))
 2571 {   CStr(protobuf,128);
 2572     CStr(hostbuf,128);
 2573     const char *path;
 2574 
 2575     if( strchr(url,':') == NULL )
 2576         return NULL;
 2577 
 2578     if( proto == NULL )
 2579         setPStr(proto,protobuf,sizeof(protobuf));
 2580 
 2581     setVStrEnd(proto,0);
 2582 
 2583     if( login == NULL )
 2584         setPStr(login,hostbuf,sizeof(hostbuf));
 2585     setVStrEnd(login,0);
 2586 
 2587     Xsscanf(url,"%[a-zA-Z0-9]",AVStr(proto));
 2588     if( !localPathProto(proto) )
 2589         return NULL;
 2590 
 2591     path = url + strlen(proto);
 2592     if( path[0] != ':' )
 2593         return NULL;
 2594     path += 1;
 2595 
 2596     if( strncmp(path,"//",2) == 0 ){
 2597         path += 2;
 2598         if( path[0] == '/' )
 2599             strcpy(login,"localhost");
 2600         else{
 2601             Xsscanf(path,"%[^/]",AVStr(login));
 2602             path += strlen(login);
 2603         }
 2604     }
 2605     return (char*)path;
 2606 }
 2607 
 2608 
 2609 
 2610 /*
 2611  *  SCAN A URL AND EXPANDS IT TO A delegated-URL
 2612  */
 2613 
 2614 #include "SLL.h"
 2615 extern SLLRule URL[];
 2616 
 2617 /*
 2618  * the end of url should be detected by the closing char. "qch"
 2619  * like <"> or <'> when the url[-1] is <"> or <'>
 2620  */
 2621 int SLLparseURL(int qch,PCStr(srca),const char **nsrcp,putvFunc putv,PVStr(vala),int size,char **nvalp)
 2622 {   int rcode;
 2623     char eouc;
 2624     const char *eoup; /* not "const" but fixed */
 2625 
 2626     eoup = 0;
 2627     if( qch != 0 && (eoup = strchr(srca,qch)) ){
 2628         /*
 2629         ... This is not good for rURL*%pat$ matching for attr='URL'
 2630         eoup++;
 2631         */
 2632         eouc = *eoup;
 2633         *(char*)eoup = 0; /**/
 2634     }else
 2635     if( qch == 0 && (eoup = strpbrk(srca," \t\r\n")) ){
 2636         /* multiple attributes in a single tag may include URL,
 2637          * so leave the following attribute to be scanned
 2638          */
 2639         eouc = *eoup;
 2640         *(char*)eoup = 0;
 2641     }
 2642 
 2643     /*
 2644      * SLLparse should be given the length of source string.
 2645      */
 2646     rcode = SLLparse(0,URL,srca,nsrcp,putv,AVStr(vala),size,nvalp);
 2647     if( eoup ){
 2648         *(char*)eoup = eouc; /**/
 2649     }
 2650     return rcode;
 2651 }
 2652 
 2653 int CTX_url_rurlX(Connection*ctx,int qch,PCStr(url),PVStr(rurl),PCStr(dproto),PCStr(dhost),int dport,PCStr(dpath),/*char *dgrelay*/int dgrelay)
 2654 {   const char *nurl;
 2655     CStr(values,URLSZ);
 2656     CStr(hostport,MaxHostNameLen);
 2657     refQStr(vp,values); /**/
 2658     const char *proto;
 2659     const char *rp;
 2660     const char *tail;
 2661     int len;
 2662 
 2663     if( strncmp(url,"!-_-",4) == 0 ){
 2664         strcpy(rurl,url+4);
 2665         return strlen(url);
 2666     }
 2667 
 2668     if( reserve_url((Connection*)ctx) )
 2669         return 0;
 2670 
 2671     nurl = url;
 2672     cpyQStr(vp,values);
 2673     values[0] = 0;
 2674 
 2675     setVStrEnd(rurl,0);
 2676     if( SLLparseURL(qch,url,&nurl,SLL_putval,AVStr(vp),URLSZ,(char**)&vp) == 0 ){
 2677         len = nurl - url;
 2678         if( dproto && dproto[0] )
 2679             vp = Sprintf(AVStr(vp),"dproto=%s\n",dproto);
 2680 
 2681         if( dhost && dhost[0] ){
 2682             if( dproto && dproto[0] )
 2683                 HostPort(AVStr(hostport),dproto,dhost,dport);
 2684             else    sprintf(hostport,"%s:%d",dhost,dport);
 2685             if( *dpath != 0 && *dpath != '/' )
 2686             vp = Sprintf(AVStr(vp),"delegate=%s/%s\n",hostport,dpath);
 2687             else
 2688             vp = Sprintf(AVStr(vp),"delegate=%s%s\n",hostport,dpath);
 2689         }
 2690         if((tail = delegate_url(ctx,AVStr(rurl),values,url,len,dgrelay)) == 0)
 2691             return 0;
 2692         return len;
 2693     }
 2694     return 0;
 2695 }
 2696 
 2697 int url_partializeS(Referer *referer,PCStr(line),PVStr(xline))
 2698 {   const char *myproto;
 2699     const char *myhost;
 2700     int myport;
 2701     const char *sp;
 2702     const char *np;
 2703     refQStr(xp,xline); /**/
 2704     const char *nurl;
 2705     URLStr purl;
 2706     CStr(values,URLSZ);
 2707     const char *av[64]; /**/
 2708     refQStr(vp,values); /**/
 2709     const char *proto;
 2710     const char *host;
 2711     const char *port;
 2712     const char *path;
 2713     const char *search;
 2714     int porti;
 2715     int len;
 2716     int nmod;
 2717     int umask;
 2718     int qch;
 2719 
 2720     myproto = referer->r_my.u_proto;
 2721     myhost = referer->r_my.u_host;
 2722     myport = referer->r_my.u_port;
 2723 
 2724     sp = line;
 2725     nmod = 0;
 2726 
 2727     for(;;){
 2728         umask = URICONV_PARTIAL;
 2729         np = html_nextTagAttr(sp,"",VStrNULL,NULL,NULL,&umask);
 2730         if( np == NULL )
 2731             break;
 2732         qch = referer->r_tagctx.r_curquote;
 2733 
 2734         len = np - sp;
 2735         Bcopy(sp,xp,len); XsetVStrEnd(AVStr(xp),len);
 2736         xp += len;
 2737         sp = np;
 2738         cpyQStr(vp,values);
 2739 
 2740         if( umask & URICONV_FULL ){
 2741             /* conflicting, adopt FULL prior to PARTIAL ... */
 2742         }else
 2743         if( SLLparseURL(qch,np,&nurl,SLL_putval,AVStr(vp),URLSZ,(char**)&vp) == 0 ){
 2744             stoV(values,64,av,'\n');
 2745             if( proto = getv(av,"proto") )
 2746             if( host  = getv(av,"host" ) ){
 2747                 if( port = getv(av,"port") )
 2748                     porti = atoi(port);
 2749                 else    porti = serviceport(proto);
 2750                 path = getv(av,"path");
 2751                 search = getv(av,"search");
 2752 
 2753                 if( porti == myport )
 2754                 if( strcaseeq(proto,myproto) )
 2755                 if( hostcmp_lexical(host,myhost,1) == 0 ){
 2756                     sp += nurl - np;
 2757                     setVStrPtrInc(xp,'/');
 2758                     if( path )
 2759                         strcpy(xp,path);
 2760                     else    setVStrEnd(xp,0);
 2761                     if( search ){
 2762                         xp += strlen(xp);
 2763                         setVStrPtrInc(xp,'?');
 2764                         strcpy(xp,search);
 2765                     }
 2766                     nmod++;
 2767                 }
 2768             }
 2769         }
 2770         xp += strlen(xp);
 2771         SkipQuoted("partialize",referer,np,sp,xp);
 2772     }
 2773     strcpy(xp,sp);
 2774     return nmod;
 2775 }
 2776 
 2777 /*
 2778  *  SCAN A URL-EXTENTION
 2779  */
 2780 extern SLLRule URLX[];
 2781 
 2782 void putv(PCStr(t),PCStr(n),int l,PCStr(vb))
 2783 {   CStr(buf,1024);
 2784 
 2785     strncpy(buf,n,l); setVStrEnd(buf,l);
 2786     printf("%s=%s\n",t,buf);
 2787 }
 2788 
 2789 int scan_url1(PCStr(url),PVStr(values))
 2790 {   const char *nurl;
 2791     refQStr(vp,values); /**/
 2792 
 2793     nurl = url;
 2794     cpyQStr(vp,values);
 2795     setVStrEnd(values,0);
 2796     if( SLLparse(0,URL,url,&nurl, SLL_putval,AVStr(vp),URLSZ,(char**)&vp ) == 0 )
 2797         return nurl - url;
 2798     return 0;
 2799 }
 2800 int scan_urlx(PCStr(urlx),PVStr(values))
 2801 {   const char *nurlx;
 2802     refQStr(vp,values); /**/
 2803 
 2804     nurlx = (char*)urlx;
 2805     cpyQStr(vp,values);
 2806     setVStrEnd(values,0);
 2807     if( SLLparse(0,URLX,urlx,&nurlx, SLL_putval,AVStr(vp),URLSZ,(char**)&vp ) == 0 )
 2808         return nurlx - urlx;
 2809     return 0;
 2810 }
 2811 
 2812 
 2813 /*
 2814  *  URL SYNTAX TABLES FOR SLL LIBRARY
 2815  */
 2816 
 2817 static char DIGIT[] = "0123456789";
 2818 static char ALPHA[] = "\
 2819 abcdefghijklmnopqrstuvwxyz\
 2820 ABCDEFGHIJKLMNOPQRSTUVWXYZ\
 2821 ";
 2822 
 2823 static char ALPHADIGIT[] = "\
 2824 abcdefghijklmnopqrstuvwxyz\
 2825 ABCDEFGHIJKLMNOPQRSTUVWXYZ\
 2826 0123456789\
 2827 ";
 2828 
 2829 static char ALNUM[] = "\
 2830 abcdefghijklmnopqrstuvwxyz\
 2831 ABCDEFGHIJKLMNOPQRSTUVWXYZ\
 2832 0123456789-\
 2833 ";
 2834 
 2835 extern char SLL_OTHERWISE[];
 2836 #define OTHERWISE SLL_OTHERWISE
 2837 
 2838 /*
 2839 static char NALPHA[] = "\
 2840 abcdefghijklmnopqrstuvwxyz\
 2841 ABCDEFGHIJKLMNOPQRSTUVWXYZ\
 2842 0123456789\
 2843 $-_.&+\
 2844 !*'();, \
 2845 ";
 2846 */
 2847 static char NALPHA[] = "\
 2848 abcdefghijklmnopqrstuvwxyz\
 2849 ABCDEFGHIJKLMNOPQRSTUVWXYZ\
 2850 0123456789\
 2851 $-_.&+\
 2852 !*'();,\
 2853 ";
 2854 
 2855 static char XALPHA[] = "\
 2856 abcdefghijklmnopqrstuvwxyz\
 2857 ABCDEFGHIJKLMNOPQRSTUVWXYZ\
 2858 0123456789\
 2859 $-_.&+\
 2860 !*'():;, %\
 2861 ";
 2862 
 2863 static char YALPHA[] = "\
 2864 abcdefghijklmnopqrstuvwxyz\
 2865 ABCDEFGHIJKLMNOPQRSTUVWXYZ\
 2866 0123456789\
 2867 $-_@.&+\
 2868 !~*'():;, %\
 2869 ";
 2870 
 2871 /* "|" is not in "uric" in RFC2396 but usually used in CGI-Counter for ex. */
 2872 static char URIC[] = "\
 2873 abcdefghijklmnopqrstuvwxyz\
 2874 ABCDEFGHIJKLMNOPQRSTUVWXYZ\
 2875 0123456789\
 2876 ;/?:@&=+$,\
 2877 -_.!~*'()\
 2878 %\
 2879 |\
 2880 ";
 2881 
 2882 ISRULE( URL );
 2883 ISRULE( HTTP    );
 2884 ISRULE( GOPHER  );
 2885 ISRULE( FTP );
 2886 ISRULE( FILEP   );
 2887 ISRULE( NEWS    );
 2888 ISRULE( NNTP    );
 2889 ISRULE( WAIS    );
 2890 ISRULE( DATAS   );
 2891 
 2892 /*
 2893 ISRULE( AFS );
 2894 ISRULE( MAILTO  );
 2895 ISRULE( TELNET  );
 2896 ISRULE( GENERIC);
 2897 */
 2898 
 2899 ISRULE( HOSTPORT);
 2900 ISRULE( PATH);
 2901 ISRULE( SEARCH);
 2902 
 2903 ALT(URL)
 2904     { "proto",  "https",    HTTP,       IGNCASE|PUTGATE}, /* must be before http */
 2905     { "proto",  "http",     HTTP,       IGNCASE|PUTGATE},
 2906     { "proto",  "gopher",   GOPHER,     IGNCASE|PUTGATE},
 2907     { "proto",  "ftp",      FTP,        IGNCASE|PUTGATE},
 2908     { "proto",  "sftp",     FTP,        IGNCASE|PUTGATE},
 2909     { "proto",  "file",     FILEP,      IGNCASE|PUTGATE},
 2910     { "proto",  "news",     NEWS,       IGNCASE|PUTGATE},
 2911     { "proto",  "nntp",     NNTP,       IGNCASE|PUTGATE},
 2912     { "proto",  "wais",     WAIS,       IGNCASE|PUTGATE},
 2913     { "proto",  "data",     DATAS,      IGNCASE|PUTGATE},
 2914 /*
 2915     { "proto",  "afs://",   AFS,        IGNCASE|PUTGATE},
 2916     { "proto",  "mailto::", MAILTO,     IGNCASE|PUTGATE},
 2917     { "proto",  "telnet:",  TELNET,     IGNCASE|PUTGATE},
 2918     { "proto",  IMM,        GENERIC,    IGNCASE|PUTGATE},
 2919 */
 2920 END
 2921 
 2922 SEQ(HTTP)
 2923     { "://",    "://",      NEXT        },
 2924 /*
 2925     { "hostport",   IMM,        HOSTPORT,   PUTVAL},
 2926 */
 2927     { "hostport",   IMM,        HOSTPORT,   xOPTIONAL|PUTVAL},
 2928     { "path",   "/",        PATH,       xOPTIONAL|PUTVAL},
 2929     { "search", "?",        SEARCH,     xOPTIONAL|PUTVAL},
 2930 END
 2931 
 2932 ISRULE( IALPHA );
 2933 ISRULE( DIGITS );
 2934 ISRULE( ALPHAS );
 2935 ISRULE( NALPHAS);
 2936 ISRULE( XALPHAS);
 2937 ISRULE( YALPHAS);
 2938 ISRULE( DOMLABEL);
 2939 
 2940 SEQ(HOSTNAME)
 2941     { "name",   IMM,        DOMLABEL    },
 2942     { "name",   ".",        HOSTNAME,   xOPTIONAL},
 2943 END
 2944 SEQ(HOSTNUMBER)
 2945     { "num1",   IMM,        DIGITS      },
 2946     { "num2",   ".",        DIGITS      },
 2947     { "num3",   ".",        DIGITS      },
 2948     { "num4",   ".",        DIGITS      },
 2949 END
 2950 /* try HOSTNUMBER first, not to let 123.123.123.123 be matched with HOSTNAME */
 2951 ALT(HOST)
 2952     { "number", IMM,        HOSTNUMBER  },
 2953     { "name",   IMM,        HOSTNAME    },
 2954 END
 2955 SEQ(PORT)
 2956     { "number", IMM,        DIGITS      },
 2957 END
 2958 SEQ(HOSTPORT)
 2959     { "host",   IMM,        HOST,       PUTVAL},
 2960     { "port",   ":",        PORT,       xOPTIONAL|PUTVAL},
 2961 END
 2962 
 2963 ALT(DOMLABEL2)
 2964     { "alphadigit", ALPHADIGIT, DOMLABEL2,  CHARSET},
 2965     { "hyphen", "-",        DOMLABEL    },
 2966     { "terminate",  OTHERWISE,  SUCCESS     },
 2967 END
 2968 SEQ(DOMLABEL)
 2969     { "alphadigit", ALPHADIGIT, DOMLABEL2,  CHARSET},
 2970 END
 2971 
 2972 SEQ(IALPHA)
 2973     { "alpha",  ALPHA,      NEXT,       CHARSET },
 2974     { "xalphas",    IMM,        NALPHAS,    xOPTIONAL},
 2975 END
 2976 
 2977 /*
 2978 SEQ(SEARCH1)
 2979     { "search", IMM,        XALPHAS,    },
 2980     { "search", "+",        SEARCH,     xOPTIONAL},
 2981 END
 2982 SEQ(SEARCH)
 2983     { "search", IMM,        SEARCH1,    xOPTIONAL},
 2984 END
 2985 */
 2986 SEQ(URICS)
 2987     { "uric",   URIC,       NEXT,       CHARSET},
 2988     { "uric",   IMM,        URICS,      xOPTIONAL},
 2989 END
 2990 SEQ(SEARCH)
 2991     { "search", IMM,        URICS,      xOPTIONAL},
 2992 END
 2993 
 2994 SEQ(ALPHAS)
 2995     { "alpha",  ALPHA,      NEXT,       CHARSET},
 2996     { "alpha",  IMM,        ALPHAS,     xOPTIONAL},
 2997 END
 2998 SEQ(ALNUMS)
 2999     { "alnum",  ALNUM,      NEXT,       CHARSET},
 3000     { "alnum",  IMM,        ALNUMS,     xOPTIONAL},
 3001 END
 3002 SEQ(NALPHAS)
 3003     { "nalpha", NALPHA,     NEXT,       CHARSET},
 3004     { "nalpha", IMM,        NALPHAS,    xOPTIONAL},
 3005 END
 3006 SEQ(XALPHAS)
 3007     { "xalpha", XALPHA,     NEXT,       CHARSET},
 3008     { "xalpha", IMM,        XALPHAS,    xOPTIONAL},
 3009 END
 3010 SEQ(YALPHAS)
 3011     { "yalpha", YALPHA,     NEXT,       CHARSET},
 3012     { "yalpha", IMM,        YALPHAS,    xOPTIONAL},
 3013 END
 3014 
 3015 SEQ(PATH1)
 3016     { "name",   IMM,        YALPHAS,    xOPTIONAL},
 3017     { "dir",    "/",        PATH,       xOPTIONAL},
 3018 END
 3019 
 3020 ALT(PATH)
 3021     { "path",   IMM,        PATH1       },
 3022     { "nullpath",   IMM,        SUCCESS     },
 3023 END
 3024 
 3025 
 3026 SEQ(USERPASS)
 3027     { "user",   IMM,        XALPHAS,    PUTVAL},
 3028     { "pass",   ":",        XALPHAS,    xOPTIONAL|PUTVAL},
 3029     { "@",      "@",        SUCCESS     },
 3030 END
 3031 SEQ(LOGIN)
 3032     { "userpass",   IMM,        USERPASS,   xOPTIONAL|PUTVAL},
 3033     { "hostport",   IMM,        HOSTPORT,   PUTVAL},
 3034 END
 3035 SEQ(FTP)
 3036     { "login",  "://",      LOGIN,      PUTVAL  },
 3037     { "path",   "/",        PATH,       xOPTIONAL|PUTVAL},
 3038 END
 3039 
 3040 SEQ(FILEH)
 3041 /*
 3042     { "host",   IMM,        HOST,       xOPTIONAL|PUTVAL    },
 3043 */
 3044     { "host",   IMM,        HOSTPORT,   xOPTIONAL|PUTVAL    },
 3045     { "path",   "/",        PATH,       xOPTIONAL|PUTVAL    },
 3046 END
 3047 ALT(FILEP)
 3048     { "file",   "://",      FILEH       },
 3049     { "path",   ":",        PATH,       PUTVAL  },
 3050 END
 3051 
 3052 ALT(GROUP1)
 3053     {"name",    ".",        GROUP1      },
 3054     {"name",    IMM,        SUCCESS     },
 3055 END
 3056 SEQ(GROUPx)
 3057     {"name",    IMM,        IALPHA      },
 3058     {"name",    IMM,        GROUP1,     xOPTIONAL},
 3059 END
 3060 SEQ(ARTICLE)
 3061     {"serial",  IMM,        XALPHAS     },
 3062     {"domain",  "@",        HOST        },
 3063 END
 3064 ALT(GROUPART)
 3065     {"group",   IMM,        GROUPx,     PUTVAL  },
 3066     {"article", IMM,        ARTICLE,    PUTVAL  },
 3067 END
 3068 SEQ(NEWS)
 3069     {"groupart",    ":",        GROUPART,   PUTVAL  },
 3070 END
 3071 SEQ(NNTP)
 3072     {"hostport",    "://",      HOSTPORT,   PUTVAL  },
 3073     {"group",   "/",        GROUPx,     PUTVAL  },
 3074     {"search",  "?",        SEARCH,     xOPTIONAL|PUTVAL},
 3075 END
 3076 
 3077 SEQ(DATABASE)
 3078     {"database",    IMM,        XALPHAS,    },
 3079 END
 3080 SEQ(WAIS)
 3081     {"hostport",    "://",      HOSTPORT,   PUTVAL  },
 3082     {"database",    "/",        DATABASE,   PUTVAL  },
 3083     {"search",  "?",        SEARCH,     xOPTIONAL|PUTVAL},
 3084 END
 3085 SEQ(DATAS)
 3086     {"typemaj", ":",        SUCCESS,    },
 3087 END
 3088 
 3089 
 3090 ALT(SELECTOR)
 3091     { "selector",   IMM,        PATH,       },
 3092 END
 3093 
 3094 ALT(GTYPE)
 3095     { "gtype",  DIGIT,      SUCCESS,    CHARSET},
 3096     { "nullgtype",  IMM,        SUCCESS     },
 3097 END
 3098 
 3099 SEQ(GSELECTOR)
 3100     { "gtype",  IMM,        GTYPE,      PUTVAL},
 3101     { "selector",   IMM,        SELECTOR,   xOPTIONAL|PUTVAL},
 3102 END
 3103 
 3104 SEQ(GOPHER)
 3105     { "//",     "://",      NEXT        },
 3106     { "hostport",   IMM,        HOSTPORT,   PUTVAL},
 3107     { "path",   "/",        GSELECTOR,  xOPTIONAL|PUTVAL},
 3108     { "search", "?",        SEARCH,     xOPTIONAL|PUTVAL},
 3109 END
 3110 
 3111 
 3112 ALT(DIGITS1)
 3113     { "digit",  DIGIT,      DIGITS1,    CHARSET },
 3114     { "nondigit",   IMM,        SUCCESS     },
 3115 END
 3116 ALT(DIGITS)
 3117     { "digit",  DIGIT,      DIGITS1,    CHARSET },
 3118 END
 3119 
 3120 /*
 3121  *
 3122  */
 3123 SEQ(FLAGS1)
 3124     { "flags",  "=",        ALPHAS,     },
 3125     { "eoflags",    "=",        SUCCESS     },
 3126 END
 3127 SEQ(FLAGS2)
 3128     { "flags",  "+",        ALPHAS,     },
 3129     { "eoflags",    "=",        SUCCESS     },
 3130 END
 3131 SEQ(FLAGS3)
 3132     { "flags",  "(",        ALPHAS,     },
 3133     { "eoflags",    ")",        SUCCESS     },
 3134 END
 3135 SEQ(FLAGS4)
 3136     { "flags",  "@",        ALPHAS,     },
 3137     { "eoflags",    "@",        SUCCESS     },
 3138 END
 3139 ALT(FLAGS)
 3140     { "f1",     IMM,        FLAGS1      },
 3141     { "f2",     IMM,        FLAGS2      },
 3142     { "f3",     IMM,        FLAGS3      },
 3143     { "f4",     IMM,        FLAGS4      },
 3144 END
 3145 SEQ(URLX)
 3146     { "xflags", IMM,        FLAGS,      PUTVAL|xOPTIONAL},
 3147     { "xproto", IMM,        ALPHAS,     PUTVAL|xOPTIONAL},
 3148     { "xhostport",  ":",        HOSTPORT,   PUTVAL},
 3149     { "xgtype", "=",        DIGITS,     PUTVAL|xOPTIONAL},
 3150 END
 3151 
 3152 /*
 3153 ISRULE(ROUTE);
 3154 ISRULE(HOSTLIST);
 3155 
 3156 SEQ(ROUTE)
 3157     { "proto",  IMM,        ALPHAS,     PUTVAL},
 3158     { "host",   "://"       HOST,       PURVAL},
 3159     { "port",   ":"     PORT,       PURVAL},
 3160     { "dstlist",    ":"     HOSTLIST,   PURVAL},
 3161     { "dstlist",    ":"     HOSTLIST,   PURVAL|xOPTIONAL},
 3162 END
 3163 
 3164 SEQ(HOSTLIST)
 3165     { "host",   IMM,        HOST,
 3166 END
 3167 */