"Fossies" - the Fresh Open Source Software Archive

Member "TeXmacs-2.1.2-src/src/System/Classes/url.cpp" (5 May 2022, 32462 Bytes) of package /linux/misc/TeXmacs-2.1.2-src.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 
    2 /******************************************************************************
    3 * MODULE     : url.cpp
    4 * DESCRIPTION: unified resource location handling
    5 * COPYRIGHT  : (C) 1999  Joris van der Hoeven
    6 *******************************************************************************
    7 * The url class uses a tree representation for urls.
    8 * This allows us to generalize the concept of an url and allow paths and
    9 * patterns to be regarded as urls too. An url is either a string or a tuple
   10 * of one of the following types:
   11 *   "." -- here
   12 *   ".." -- parent
   13 *   none -- invalid url
   14 *   concat -- a/b/c is represented as (concat "a" (concat "b" "c"));
   15 *   or -- the path a:b/c is represented as (or "a" (concat "b" "c"));
   16 *   root -- the url http://gnu.org yields (concat (root "http") "gnu.org");
   17 *   wildcard -- (wildcard) corresponds to any url, (wildcard "*.tm")
   18 *     to all strings which end with .tm and (wildcard "*.tm" "file")
   19 *     to all TeXmacs files (i.e. discarding directories ending with .tm).
   20 *******************************************************************************
   21 * There are three main types of urls:
   22 *   - rootless urls, like a/b/c. These urls are mainly used in computations.
   23 *     For example, they can be appended to another url.
   24 *   - Standard rooted urls, like file:///usr or https://www.texmacs.org.
   25 *     These are the same as those used on the web.
   26 *   - System urls, characterized by a "default" root.
   27 *     These urls are similar to standard rooted urls, but they behave
   28 *     in a slightly different way with respect to concatenation.
   29 *     For instance https://www.texmacs.org/Web * file:///tmp would yield
   30 *     file:///tmp, where as https://www.texmacs.org/Web * /tmp yields
   31 *     https://www.texmacs.org/tmp
   32 *******************************************************************************
   33 * There are several formats for parsing (and printing) urls:
   34 *   - System format: the usual format on your operating system.
   35 *     On unix systems "/usr/bin:/usr/local/bin" would be a valid url
   36 *     representing a path and on windows systems "c:\windows;c:\TeXmacs"
   37 *     would be OK.
   38 *   - Unix format: this format forces unix-like notation even for
   39 *     other systems like Windows. This is convenient for url's in
   40 *     the source code. Unix environment variables like ~ and $TEXMACS_PATH
   41 *     can also be part of the url.
   42 *   - Standard format: the format which is used on the web.
   43 *     Notice that ftp://www.texmacs.org/pub and ftp://www.texmacs.org/pub/
   44 *     represent different urls. The second one is represented by concating
   45 *     on the right with an empty name.
   46 *******************************************************************************
   47 * When an explicit operation on urls need to be performed,
   48 * like reading a file, the url is first "resolved" into a simple url
   49 * with a unique name (modulo symbolic links) for the resource.
   50 * Next, the url is "concretized" as a file name which is understood
   51 * by the operating system. This may require searching the file from the web.
   52 * Concretized urls should be used quickly and not memorized,
   53 * since such names may be the names of temporary files,
   54 * which may be destroyed soon afterwards.
   55 *******************************************************************************
   56 * This software falls under the GNU general public license version 3 or later.
   57 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
   58 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
   59 ******************************************************************************/
   60 
   61 #include "boot.hpp"
   62 #include "url.hpp"
   63 #include "sys_utils.hpp"
   64 #include "web_files.hpp"
   65 #include "file.hpp"
   66 #include "analyze.hpp"
   67 
   68 #include <ctype.h>
   69 
   70 #ifdef OS_MINGW
   71 #define WINPATHS
   72 #endif
   73 
   74 #ifdef WINPATHS
   75 #define URL_CONCATER  '\\'
   76 #define URL_SEPARATOR ';'
   77 #else
   78 #define URL_CONCATER  '/'
   79 #define URL_SEPARATOR ':'
   80 #endif
   81 
   82 /******************************************************************************
   83 * Unrooted url constructors
   84 ******************************************************************************/
   85 
   86 static url
   87 url_get_atom (string s, int type) {
   88   if (type < URL_STANDARD) {
   89     if (s == "~") return url_system (get_env ("HOME"));
   90     if (starts (s, "$")) {
   91       string val= get_env (s (1, N(s)));
   92       if (val == "") return url_none ();
   93       return unblank (url_system (val));
   94     }
   95   }
   96   if (occurs ("*", s)) return url_wildcard (s);
   97 #ifdef WINPATHS
   98   if (N(s)==2 && ends (s, ":"))
   99     s->resize(1); // remove the ':' after unit letter
  100 #endif
  101   return as_url (tree (s));
  102 }
  103 
  104 static void
  105 skip_ipv6 (string s, int& i) {
  106   i++;
  107   while (i<N(s) && (s[i] == ':' ||
  108                     (s[i] >= '0' && s[i] <= '9') ||
  109                     (s[i] >= 'a' && s[i] <= 'f') ||
  110                     (s[i] >= 'A' && s[i] <= 'F'))) i++;
  111   if (i<N(s) && s[i] == ']') i++;
  112 }
  113 
  114 static url
  115 url_get_name (string s, int type= URL_STANDARD, int i=0) {
  116   char sep= (type == URL_SYSTEM)? URL_CONCATER: '/';
  117   int start= i, n= N(s);
  118   while ((i<n) && (s[i] != sep) && (s[i] != '/')) {
  119     if (s[i] == '[') skip_ipv6 (s, i); else i++; }
  120   url u= url_get_atom (s (start, i), type);
  121   // url u= tree (s (start, i));
  122   if (i == n) return u;
  123   if (start == i) return url_get_name (s, type, i+1);
  124   return u * url_get_name (s, type, i+1);
  125 }
  126 
  127 static url
  128 url_get_path (string s, int type= URL_STANDARD, int i=0) {
  129   char sep= (type == URL_SYSTEM)? URL_SEPARATOR: ':';
  130   int start= i, n= N(s);
  131   if (i == n) return url_none ();
  132   while ((i<n) && (s[i] != sep)) {
  133     if (s[i] == '[') skip_ipv6 (s, i); else i++; }
  134   url u= url_general (s (start, i), type);
  135   if (i == n) return u;
  136   if (start == i) return url_get_path (s, type, i+1);
  137   return u | url_get_path (s, type, i+1);
  138 }
  139 
  140 /******************************************************************************
  141 * Rooted url constructors
  142 ******************************************************************************/
  143 
  144 url
  145 url_root (string protocol) {
  146   return as_url (tuple ("root", protocol));
  147 }
  148 
  149 url
  150 url_ramdisc (string contents) {
  151   return as_url (tuple ("root", "ramdisc", contents));
  152 }
  153 
  154 static url
  155 url_default (string name, int type= URL_SYSTEM) {
  156   url u= url_get_name (name, type);
  157 #ifdef WINPATHS
  158   // FIXME: this hack seems a bit too simple
  159   if (is_concat (u) && (u[2]->t == "")) u= u[1];
  160   // cout << name << " -> " << url_root ("default") * u << "\n";
  161   return url_root ("default") * u;
  162 #else
  163   if (u->t == "") return url_root ("default");
  164   return url_root ("default") * u;
  165 #endif
  166 }
  167 
  168 static url
  169 url_mingw_default (string name, int type) {
  170   string s= name (0, 2) * ":" * name (2, N(name));
  171   return url_root ("default") * url_get_name (s, type);  
  172 }
  173 
  174 static url
  175 url_path (string s, int type= URL_SYSTEM) {
  176   url u= url_get_path (s, type);
  177   return u;
  178 }
  179 
  180 static url
  181 url_local (string name) {
  182   url u= url_get_name (name, URL_SYSTEM);
  183   return reroot (u, "file");
  184 }
  185 
  186 static url
  187 url_file (string name) {
  188   url u= url_get_name (name);
  189   return url_root ("file") * u;
  190 }
  191 
  192 static url
  193 url_http (string name) {
  194   url u= url_get_name (name);
  195   return url_root ("http") * u;
  196 }
  197 
  198 static url
  199 url_https (string name) {
  200   url u= url_get_name (name);
  201   return url_root ("https") * u;
  202 }
  203 
  204 static url
  205 url_ftp (string name) {
  206   url u= url_get_name (name);
  207   return url_root ("ftp") * u;
  208 }
  209 
  210 static url
  211 url_tmfs (string name) {
  212   url u= url_get_name (name);
  213   return url_root ("tmfs") * u;
  214 }
  215 
  216 static url
  217 url_blank (string name) {
  218   url u= url_get_name (name);
  219   return url_root ("blank") * u;
  220 }
  221 
  222 /******************************************************************************
  223 * Generic url constructor
  224 ******************************************************************************/
  225 
  226 static bool
  227 heuristic_is_path (string name, int type) {
  228   char sep= (type==0)? URL_SEPARATOR: ':';
  229   int i= 0, n= N(name);
  230   while (i<n)
  231     if (name[i] == '[') skip_ipv6 (name, i);
  232     else if (name[i] == sep) return true;
  233     else i++;
  234   return false;
  235 }
  236 
  237 static bool
  238 heuristic_is_default (string name, int type) {
  239 #ifdef WINPATHS
  240   // FIXME: we probably should take into account 'type' too
  241   if (N(name) < 2) return false;
  242   if ((name[0] == '\\') && (name[1] == '\\')) return true;
  243   return
  244     isalpha (name[0]) && (name[1] == ':') &&
  245     ((N(name)==2) || (name[2] == '\\') || (name[2] == '/'));
  246 #else
  247   char sep= (type==0)? URL_CONCATER: '/';
  248   return (name != "") && (name[0] == sep);
  249 #endif
  250 }
  251 
  252 static bool
  253 heuristic_is_http (string name) {
  254   return starts (name, "www.");
  255   // FIXME: we might want to recognize some other ones like google.com too
  256 }
  257 
  258 static bool
  259 heuristic_is_ftp (string name) {
  260   return starts (name, "ftp.");
  261 }
  262 
  263 static bool
  264 heuristic_is_mingw_default (string name, int type) {
  265 #ifdef WINPATHS
  266   return type != URL_SYSTEM && N(name) >= 2 &&
  267          name[0] == '/' && is_alpha (name[1]) &&
  268          (N(name) == 2 || name[2] == '/');
  269 #else
  270   (void) name; (void) type; return false;
  271 #endif         
  272 }
  273 
  274 url
  275 url_general (string name, int type= URL_SYSTEM) {
  276   if (starts (name, "local:")) return url_local (name (6, N (name)));
  277   if (starts (name, "file://")) return url_file (name (7, N (name)));
  278   if (starts (name, "http://")) return url_http (name (7, N (name)));
  279   if (starts (name, "https://")) return url_https (name (8, N (name)));
  280   if (starts (name, "ftp://")) return url_ftp (name (6, N (name)));
  281   if (starts (name, "tmfs://")) return url_tmfs (name (7, N (name)));
  282   if (starts (name, "//")) return url_blank (name (2, N (name)));
  283   if (heuristic_is_path (name, type)) return url_path (name, type);
  284   if (heuristic_is_default (name, type)) return url_default (name, type);
  285   if (heuristic_is_mingw_default (name, type)) return url_mingw_default (name, type);
  286   if (type != URL_CLEAN_UNIX) {
  287     if (heuristic_is_http (name)) return url_http (name);
  288     if (heuristic_is_ftp (name)) return url_ftp (name);
  289   }
  290   return url_get_name (name, type);
  291 }
  292 
  293 url
  294 url_unix (string name) {
  295   return url_general (name, URL_UNIX);
  296 }
  297 
  298 url
  299 url_unix (string dir, string name) {
  300   return url_unix (dir) * url_unix (name);
  301 }
  302 
  303 url
  304 url_system (string name) {
  305   return url_general (name, URL_SYSTEM);
  306 }
  307 
  308 url
  309 url_system (string dir, string name) {
  310   return url_system (dir) * url_system (name);
  311 }
  312 
  313 url
  314 url_standard (string name) {
  315   return url_general (name, URL_STANDARD);
  316 }
  317 
  318 url
  319 url_standard (string dir, string name) {
  320   return url_standard (dir) * url_standard (name);
  321 }
  322 
  323 url::url (): rep (tm_new<url_rep> (tuple ("none"))) {}
  324 url::url (const char* name): rep (tm_new<url_rep> (url_unix (name)->t)) {}
  325 url::url (string name): rep (tm_new<url_rep> (url_unix (name)->t)) {}
  326 url::url (string path_name, string name):
  327   rep (tm_new<url_rep> (url_unix (path_name, name)->t)) {}
  328 
  329 /******************************************************************************
  330 * Computational url constructors
  331 ******************************************************************************/
  332 
  333 static bool
  334 is_special_root (url u) {
  335 #ifdef WINPATHS
  336   return is_root (u);
  337 #else
  338   return is_root_web (u);
  339 #endif
  340 }
  341 
  342 static bool
  343 is_semi_root (url u) {
  344   // url u such that u/.. == u (website or windows drive name)
  345   return is_concat (u) && is_special_root (u[1]) && is_atomic (u[2]);
  346 }
  347 
  348 url
  349 operator * (url u1, url u2) {
  350   //cout << "concat " << u1->t << " * " << u2->t << "\n";
  351   if (is_root (u2) || (is_concat (u2) && is_root (u2[1]))) {
  352     if (is_concat (u1) && is_root_web (u1[1])) {
  353       if (is_root (u2, "default") ||
  354           (is_concat (u2) && is_root (u2[1], "default")))
  355         {
  356           url v= u1[2];
  357           while (is_concat (v)) v= v[1];
  358           if (is_root (u2)) return u1[1] * v;
  359           return u1[1] * v * u2[2];
  360         }
  361       if (is_root (u2, "blank") ||
  362           (is_concat (u2) && is_root (u2[1], "blank")))
  363         return reroot (u2, u1[1][1]->t->label);
  364     }
  365     return u2;
  366   }
  367   if (is_here (u1) || (u1->t == "")) return u2;
  368   if (is_here (u2)) return u1;
  369   if (is_none (u1)) return url_none ();
  370   if (is_none (u2)) return url_none ();
  371   if (u2 == url_parent ()) {
  372     if (is_root (u1)) return u1;
  373     if (is_pseudo_atomic (u1) && (!is_parent (u1))) return url_here ();
  374     if (is_semi_root (u1)) return u1;
  375   }
  376   if (is_concat (u2) && (u2[1] == url_parent ())) {
  377     if (is_root (u1)) return u1 * u2[2];
  378     if (is_pseudo_atomic (u1) && (!is_parent (u1))) return u2[2];
  379     if (is_semi_root (u1)) return u1 * u2[2];
  380   }
  381   if (is_concat (u1)) return u1[1] * (u1[2] * u2);
  382   return as_url (tuple ("concat", u1->t, u2->t));
  383 }
  384 
  385 url
  386 operator * (url u1, const char* name) {
  387   return u1 * url (name);
  388 }
  389 
  390 url
  391 operator * (url u1, string name) {
  392   return u1 * url (name);
  393 }
  394 
  395 url
  396 operator | (url u1, url u2) {
  397   if (is_none (u1)) return u2;
  398   if (is_none (u2)) return u1;
  399   if (is_or (u1)) return u1[1] | (u1[2] | u2);
  400   if (u1 == u2) return u2;
  401   if (is_or (u2) && (u1 == u2[1])) return u2;
  402   return as_url (tuple ("or", u1->t, u2->t));
  403 }
  404 
  405 url
  406 url_wildcard () {
  407   return as_url (tuple ("wildcard"));
  408 }
  409 
  410 url
  411 url_wildcard (string name) {
  412   return as_url (tuple ("wildcard", name));
  413 }
  414 
  415 /******************************************************************************
  416 * url predicates
  417 ******************************************************************************/
  418 
  419 bool
  420 is_rooted (url u) {
  421   return
  422     is_root (u) ||
  423     (is_concat (u) && is_rooted (u[1])) ||
  424     (is_or (u) && is_rooted (u[1]) && is_rooted (u[2]));
  425 }
  426 
  427 bool
  428 is_rooted (url u, string protocol) {
  429   return
  430     is_root (u, protocol) ||
  431     (is_concat (u) && is_rooted (u[1], protocol)) ||
  432     (is_or (u) && is_rooted (u[1], protocol) && is_rooted (u[2], protocol));
  433 }
  434 
  435 bool
  436 is_rooted_web (url u) {
  437   return
  438     is_root_web (u) ||
  439     (is_concat (u) && is_rooted_web (u[1])) ||
  440     (is_or (u) && is_rooted_web (u[1]) && is_rooted_web (u[2]));
  441 }
  442 
  443 bool
  444 is_rooted_tmfs (url u) {
  445   return
  446     is_root_tmfs (u) ||
  447     (is_concat (u) && is_rooted_tmfs (u[1])) ||
  448     (is_or (u) && is_rooted_tmfs (u[1]) && is_rooted_tmfs (u[2]));
  449 }
  450 
  451 bool
  452 is_tmfs_protocol (url u, string protocol) {
  453   return
  454     u->t == protocol ||
  455     (is_concat (u) && is_tmfs_protocol (u[1], protocol));
  456 }
  457 
  458 bool
  459 is_rooted_tmfs (url u, string protocol) {
  460   return
  461     (is_concat (u) && is_root_tmfs (u[1]) &&
  462                       is_tmfs_protocol (u[2], protocol)) ||
  463     (is_or (u) && is_rooted_tmfs (u[1], protocol) &&
  464                   is_rooted_tmfs (u[2], protocol));
  465 }
  466 
  467 bool
  468 is_rooted_blank (url u) {
  469   return
  470     is_root_blank (u) ||
  471     (is_concat (u) && is_rooted_blank (u[1])) ||
  472     (is_or (u) && is_rooted_blank (u[1]) && is_rooted_blank (u[2]));
  473 }
  474 
  475 bool
  476 is_name (url u) {
  477   if (is_atomic (u)) return true;
  478   if (!is_concat (u)) return false;
  479   return is_name (u[1]) && is_name (u[2]);
  480 }
  481 
  482 bool
  483 is_rooted_name (url u) {
  484   return is_concat (u) && is_root (u[1]) && is_name (u[2]);
  485 }
  486 
  487 bool
  488 is_name_in_path (url u) {
  489   if (is_name (u)) return true;
  490   return is_concat (u) && is_root (u[1], "default") && is_name (u[2]);
  491 }
  492 
  493 bool
  494 is_ramdisc (url u) {
  495   return is_concat (u) && is_root (u[1], "ramdisc");
  496 }
  497 
  498 /******************************************************************************
  499 * Conversion routines for urls
  500 ******************************************************************************/
  501 
  502 string
  503 as_string (url u, int type) {
  504   // This routine pritty prints an url as a string.
  505   // FIXME: the current algorithm is quadratic in time.
  506   if (is_none (u)) return "{}";
  507   if (is_atomic (u)) return u->t->label;
  508   if (is_concat (u)) {
  509     int stype= type;
  510     if (is_root (u[1]) && (!is_root (u[1], "default"))) stype= URL_STANDARD;
  511     string sep= (stype==URL_SYSTEM? string (URL_CONCATER): string ("/"));
  512     string s1 = as_string (u[1], type);
  513     string s2 = as_string (u[2], stype);
  514     if (is_root (u[1], "default")) s1= "";
  515     if ((!is_name (u[1])) && (!is_root (u[1]))) s1= "{" * s1 * "}";
  516     if ((!is_concat (u[2])) && (!is_atomic (u[2])) && (!is_wildcard (u[2], 1)))
  517       s2= "{" * s2 * "}";
  518 #ifdef WINPATHS
  519     if (((is_root (u[1], "default") && type == URL_SYSTEM) ||
  520          is_root (u[1], "file"))) { // have to return the windows format
  521       string root, remain;
  522       if (is_concat (u[2])) {           
  523         root = as_string (u[2][1], type);
  524         // root might be unit letter or hostname. It depends on the length
  525         remain = as_string (u[2][2], type);
  526       }
  527       else {
  528         root = s2;
  529         remain = "";
  530       }
  531       if (is_root (u[1], "default")) {
  532         if (N(root) == 1) return root * ":\\" * remain; // drive letter
  533         else return "\\\\" * root * "\\" * remain;
  534       }
  535       else {
  536         if (N(root) == 1) return s1 * "/" * root * ":/" * remain; // local file
  537         else return s1 * root * "/" * remain; // remote
  538       }
  539     }
  540 #endif
  541     return s1 * sep * s2;
  542   }
  543   if (is_or (u)) {
  544     string s1= as_string (u[1], type);
  545     string s2= as_string (u[2], type);
  546     if (!is_name_in_path (u[1])) s1= "{" * s1 * "}";
  547     if ((!is_or (u[2])) && (!is_name_in_path (u[2]))) s2= "{" * s2 * "}";
  548 #ifdef WINPATHS
  549     if (type == URL_STANDARD) return s1 * ":" * s2;
  550     else return s1 * string (URL_SEPARATOR) * s2;
  551 #else
  552     return s1 * string (URL_SEPARATOR) * s2;
  553 #endif
  554   }
  555 #ifdef WINPATHS
  556   if (is_root (u, "default")) {
  557     int stype= type;
  558     if (is_root (u[1]) && (!is_root (u[1], "default"))) stype= URL_STANDARD;
  559         if (stype == URL_SYSTEM) return ""; else return "/";
  560   }
  561 #else
  562   if (is_root (u, "default")) return "/";
  563 #endif
  564   if (is_root (u, "blank")) return "/";
  565   if (is_root (u, "file")) return u[1]->t->label * "://";
  566   if (is_root (u)) return u[1]->t->label * ":/";
  567   if (is_wildcard (u, 0)) return "**";
  568   if (is_wildcard (u, 1)) return u->t[1]->label;
  569   FAILED ("bad url");
  570   return "";
  571 }
  572 
  573 tm_ostream&
  574 operator << (tm_ostream& out, url u) {
  575   return out << as_string (u, URL_SYSTEM);
  576 }
  577 
  578 /******************************************************************************
  579 * Operations on urls
  580 ******************************************************************************/
  581 
  582 url
  583 head (url u) {
  584   return u * url_parent ();
  585 }
  586 
  587 url
  588 tail (url u) {
  589   if (is_concat (u)) {
  590     if (is_root_web (u[1]) && is_atomic (u[2])) return url_here ();
  591     return tail (u[2]);
  592   }
  593   if (is_or (u)) return tail (u[1]) | tail (u[2]);
  594   if (is_root (u)) return url_here ();
  595   return u;
  596 }
  597 
  598 string
  599 suffix (url u) {
  600   u= tail (u);
  601   if (!is_atomic (u)) return "";
  602   string s= as_string (u);
  603   int i, n= N(s);
  604   for (i=n-1; i>=0; i--)
  605     if (s[i]=='.') break;
  606   if ((i>0) && (i<n-1)) {
  607     string r= s (i+1, n);
  608     while ((N(r)>0) && (r[N(r)-1]=='~' || r[N(r)-1]=='#')) r= r(0, N(r)-1);
  609     return locase_all(r);
  610   }
  611   return "";
  612 }
  613 
  614 string
  615 basename (url u, string suf) {
  616   string s= as_string (tail (u));
  617   if (suf != "" && N(s) > N(suf) && suf == s(N(s)-N(suf),N(s)))
  618     return s(0, N(s)-N(suf));
  619   return s;
  620 }
  621 
  622 string
  623 basename (url u) {
  624   string s= suffix (u);
  625   if (N(s) != 0) s= "." * s;
  626   return basename (u, s);
  627 }
  628 
  629 url
  630 glue (url u, string s) {
  631   if (is_atomic (u)) return as_url (tree (u->t->label * s));
  632   if (is_concat (u)) return u[1] * glue (u[2], s);
  633   if (is_or (u)) return glue (u[1], s) | glue (u[2], s);
  634   if (is_none (u)) return u;
  635   failed_error << "u= " << u << "\n";
  636   failed_error << "s= " << s << "\n";
  637   FAILED ("can't glue string to url");
  638   return u;
  639 }
  640 
  641 url
  642 unglue (url u, int nr) {
  643   if (is_atomic (u))
  644     return as_url (tree (u->t->label (0, max (N(u->t->label) - nr, 0))));
  645   if (is_concat (u)) return u[1] * unglue (u[2], nr);
  646   if (is_or (u)) return unglue (u[1], nr) | unglue (u[2], nr);
  647   if (is_none (u)) return u;
  648   failed_error << "u = " << u << "\n";
  649   failed_error << "nr= " << nr << "\n";
  650   FAILED ("can't unglue from url");
  651   return u;
  652 }
  653 
  654 url
  655 unblank (url u) {
  656   if (is_concat (u) && (u[2]->t == "")) return u[1];
  657   if (is_concat (u)) return u[1] * unblank (u[2]);
  658   if (is_or (u)) return unblank (u[1]) | unblank (u[2]);
  659   return u;
  660 }
  661 
  662 url
  663 relative (url base, url u) {
  664   return head (base) * u;
  665 }
  666 
  667 url
  668 delta_sub (url base, url u) {
  669   if (is_atomic (base))
  670     return u;
  671   if (is_concat (base) && is_concat (u) && (base[1] == u[1])) {
  672     if (is_special_root (base[1]) &&
  673         is_concat (base[2]) && is_concat (u[2]) &&
  674         base[2][1] != u[2][1])
  675       return url_none ();
  676     return delta_sub (base[2], u[2]);
  677   }
  678   if (is_concat (base) && !is_semi_root (base))
  679     return url_parent () * delta_sub (head (base), u);
  680   return url_none ();
  681 }
  682 
  683 url
  684 delta (url base, url u) {
  685   if (is_or (u))
  686     return delta (base, u[1]) | delta (base, u[2]);
  687   url res= delta_sub (base, u);
  688   if (is_none (res)) return u;
  689   return res;
  690 }
  691 
  692 static url
  693 expand (url u1, url u2) {
  694   if (is_or (u1)) return expand (u1[1], u2) | expand (u1[2], u2);
  695   if (is_or (u2)) return expand (u1, u2[1]) | expand (u1, u2[2]);
  696   if (is_ancestor (u2)) {
  697     if (is_concat (u1)) return u1 | expand (u1[1], u2);
  698     if (is_special_root (u1)) return u2;
  699     return u1 | u2;
  700   }
  701   if (is_concat (u2) && is_ancestor (u2[1]))
  702     return expand (expand (u1, u2[1]), u2[2]);
  703   return u1 * u2;
  704 }
  705 
  706 url
  707 expand (url u) {
  708   if (is_or (u)) return expand (u[1]) | expand (u[2]);
  709   if (is_concat (u)) return expand (expand (u[1]), expand (u[2]));
  710   return u;
  711 }
  712 
  713 bool
  714 descends (url u, url base) {
  715   if (is_or (base))
  716     return descends (u, base[1]) || descends (u, base[2]);
  717   if (is_or (u))
  718     return descends (u[1], base) && descends (u[2], base);
  719   if (u == base)
  720     return true;
  721   if (is_concat (u) && is_atomic (base))
  722     return u[1] == base;
  723   if (is_concat (u) && is_concat (base))
  724     return u[1] == base[1] && descends (u[2], base[2]);
  725   return false;
  726 }
  727 
  728 bool
  729 is_secure (url u) {
  730   return descends (u, expand (url_path ("$TEXMACS_SECURE_PATH")));
  731 }
  732 
  733 /******************************************************************************
  734 * Url sorting and factorization
  735 ******************************************************************************/
  736 
  737 static bool
  738 operator <= (url u1, url u2) {
  739   if (is_atomic (u1) && is_atomic (u2))
  740     return u1->t->label <= u2->t->label;
  741   if (is_atomic (u1)) return true;
  742   if (is_atomic (u2)) return false;
  743   if (is_concat (u1) && is_concat (u2)) {
  744     if (u1[1] == u2[1]) return u1[2] <= u2[2];
  745     else return u1[1] <= u2[1];
  746   }
  747   if (is_concat (u1)) return true;
  748   if (is_concat (u2)) return false;
  749   return true; // does not matter for sorting
  750 }
  751 
  752 static url
  753 sort_sub (url add, url to) {
  754   if (is_or (to)) {
  755     if (add <= to[1]) return add | to;
  756     return to[1] | sort_sub (add, to[2]);
  757   }
  758   if (add <= to) return add | to;
  759   else return to | add;
  760 }
  761 
  762 url
  763 sort (url u) {
  764   if (is_or (u))
  765     return sort_sub (u[1], sort (u[2]));
  766   else return u;
  767 }
  768 
  769 static url
  770 factor_sorted (url u) {
  771   if (!is_or (u)) return u;
  772   url v= factor_sorted (u[2]);
  773   if (is_concat (u[1])) {
  774     if (is_concat (v) && (u[1][1] == v[1]))
  775       return u[1][1] * (u[1][2] | v[2]);
  776     if (is_or (v) && is_concat (v[1]) && (u[1][1] == v[1][1]))
  777       return (u[1][1] * (u[1][2] | v[1][2])) | v[2];
  778   }
  779   return u[1] | v;
  780 }
  781 
  782 static url
  783 factor_sub (url u) {
  784   if (is_concat (u)) return u[1] * factor (u[2]);
  785   if (is_or (u)) return factor_sub (u[1]) | factor_sub (u[2]);
  786   return u;
  787 }
  788 
  789 url
  790 factor (url u) {
  791   return factor_sub (factor_sorted (sort (u)));
  792 }
  793 
  794 /******************************************************************************
  795 * Url resolution and wildcard expansion
  796 ******************************************************************************/
  797 
  798 url complete (url base, url u, string filter, bool flag);
  799 
  800 string
  801 get_root (url u) {
  802   if (is_concat (u)) return get_root (u[1]);
  803   if (is_or (u)) {
  804     string s1= get_root (u[1]);
  805     string s2= get_root (u[2]);
  806     if (s1 == s2) return s1; else return "";
  807   }
  808   if (is_root (u)) return u[1]->t->label;
  809   return "";
  810 }
  811 
  812 url
  813 unroot (url u) {
  814   if (is_concat (u)) return unroot (u[1]) * u[2];
  815   if (is_or (u)) return unroot (u[1]) | unroot (u[2]);
  816   if (is_root (u)) return url_here ();
  817   return u;
  818 }
  819 
  820 url
  821 reroot (url u, string protocol) {
  822   if (is_concat (u)) return reroot (u[1], protocol) * u[2];
  823   if (is_or (u)) return reroot (u[1], protocol) | reroot (u[2], protocol);
  824   if (is_root (u)) return url_root (protocol);
  825   return u;
  826 }
  827 
  828 static url
  829 complete (url base, url sub, url u, string filter, bool flag) {
  830   if (is_or (sub)) {
  831     url res1= complete (base, sub[1], u, filter, flag);
  832     if ((!is_none (res1)) && flag) return res1;
  833     return res1 | complete (base, sub[2], u, filter, flag);
  834   }
  835   if (is_concat (sub) && is_rooted (sub[1])) {
  836     url res= complete (sub[1], sub[2], u, filter, flag);
  837     return sub[1] * res;
  838   }
  839   return sub * complete (base * sub, u, filter, flag);
  840 }
  841 
  842 url
  843 complete (url base, url u, string filter, bool flag) {
  844   // cout << "complete " << base << " |||| " << u << LF;
  845   if (!is_rooted(u)) {
  846      if (is_none (base)) return base;
  847      if (is_none (u)) return u;
  848      if ((!is_root (base)) && (!is_rooted_name (base))) {
  849        failed_error << "base  = " << base << LF;
  850        failed_error << "u     = " << u << LF;
  851        failed_error << "filter= " << filter << LF;
  852        FAILED ("invalid base url");
  853      }
  854   }
  855   if (is_name (u) || (is_concat (u) && is_root (u[1]) && is_name (u[2]))) {
  856     url comp= base * u;
  857     if (is_rooted (comp, "default") || is_rooted (comp, "file")) {
  858       if (is_of_type (comp, filter)) return reroot (u, "default");
  859       return url_none ();
  860     }
  861     if (is_rooted_web (comp) || is_rooted_tmfs (comp) || is_ramdisc (comp)) {
  862       if (is_of_type (comp, filter)) return u;
  863       return url_none ();
  864     }
  865     failed_error << "base  = " << base << LF;
  866     failed_error << "u     = " << u << LF;
  867     failed_error << "filter= " << filter << LF;
  868     ASSERT (is_rooted (comp), "unrooted url");
  869     FAILED ("bad protocol in url");
  870   }
  871   if (is_root (u)) {
  872     // FIXME: test filter flags here
  873     return u;
  874   }
  875   if (is_concat (u) && is_wildcard (u[1], 0) && is_wildcard (u[2], 1)) {
  876     // FIXME: ret= ret | ... is unefficient (quadratic) in main loop
  877     if (!(is_rooted (base, "default") || is_rooted (base, "file"))) {
  878       failed_error << "base  = " << base << LF;
  879       failed_error << "u     = " << u << LF;
  880       failed_error << "filter= " << filter << LF;
  881       FAILED ("wildcards only implemented for files");
  882     }
  883     url ret= url_none ();
  884     bool error_flag;
  885     array<string> dir= read_directory (base, error_flag);
  886     int i, n= N(dir);
  887     for (i=0; i<n; i++) {
  888       if ((!is_none (ret)) && flag) return ret;
  889       if ((dir[i] == ".") || (dir[i] == "..")) continue;
  890       if (starts (dir[i], "http://") ||
  891           starts (dir[i], "https://") ||
  892           starts (dir[i], "ftp://"))
  893         if (is_directory (base * dir[i])) continue;
  894       ret= ret | (dir[i] * complete (base * dir[i], u, filter, flag));
  895       if (match_wildcard (dir[i], u[2][1]->t->label))
  896         ret= ret | complete (base, dir[i], filter, flag);
  897     }
  898     return ret;
  899   }
  900   if (is_concat (u)) {
  901     url sub= complete (base, u[1], "", false);
  902     // "" should often be faster than the more correct "d" here
  903     return complete (base, sub, u[2], filter, flag);
  904   }
  905   if (is_or (u)) {
  906     url res1= complete (base, u[1], filter, flag);
  907     if ((!is_none (res1)) && flag) return res1;
  908     return res1 | complete (base, u[2], filter, flag);
  909   }
  910   if (is_wildcard (u)) {
  911     // FIXME: ret= ret | ... is unefficient (quadratic) in main loop
  912     if (!(is_rooted (base, "default") || is_rooted (base, "file"))) {
  913       failed_error << "base  = " << base << LF;
  914       failed_error << "u     = " << u << LF;
  915       failed_error << "filter= " << filter << LF;
  916       FAILED ("wildcards only implemented for files");
  917     }
  918     url ret= url_none ();
  919     if (is_wildcard (u, 0) && is_of_type (base, filter)) ret= url_here ();
  920     bool error_flag;
  921     array<string> dir= read_directory (base, error_flag);
  922     int i, n= N(dir);
  923     for (i=0; i<n; i++) {
  924       if ((!is_none (ret)) && flag) return ret;
  925       if ((dir[i] == ".") || (dir[i] == "..")) continue;
  926       if (starts (dir[i], "http://") ||
  927           starts (dir[i], "https://") ||
  928           starts (dir[i], "ftp://"))
  929         if (is_directory (base * dir[i])) continue;
  930       if (is_wildcard (u, 0))
  931         ret= ret | (dir[i] * complete (base * dir[i], u, filter, flag));
  932       else if (match_wildcard (dir[i], u[1]->t->label))
  933         ret= ret | complete (base, dir[i], filter, flag);
  934     }
  935     return ret;
  936   }
  937   failed_error << "url= " << u << LF;
  938   FAILED ("bad url");
  939   return u;
  940 }
  941 
  942 url
  943 complete (url u, string filter, bool flag) {
  944   url home= url_pwd ();
  945   return home * complete (home, u, filter, flag);
  946 }
  947 
  948 url
  949 complete (url u, string filter) {
  950   // This routine can be used in order to find all possible matches
  951   // for the wildcards in an url and replace the wildcards by these matches.
  952   // Moreover, matches are normalized (file root -> default root).
  953   url r =  complete (u, filter, false);
  954   // cout << "complete:" << u << " filter:" << filter << " result:" << r << LF;
  955   return r;
  956 }
  957 
  958 url
  959 resolve (url u, string filter) {
  960   // This routine does the same thing as complete, but it stops at
  961   // the first match. It is particularly useful for finding files in paths.
  962   return complete (u, filter, true);
  963   /*
  964   url res= complete (u, filter, true);
  965   if (is_none (res))
  966     cout << "Failed resolution of " << u << ", " << filter << LF;
  967   return res;
  968   */
  969 }
  970 
  971 url
  972 resolve_in_path (url u) {
  973   if (use_which) {
  974     string name = escape_sh (as_string (u));
  975     string which= var_eval_system ("which " * name * " 2> /dev/null");
  976     if (ends (which, name))
  977       return which;
  978     else if ((which != "") &&
  979              (!occurs ("bin/which: ", which)) &&
  980              (!starts (which, "which: ")) &&
  981              (!starts (which, "no ")))
  982       cout << "TeXmacs] " << which << "\n";
  983   }
  984 #ifdef OS_MINGW
  985   return resolve ((url_path ("$TEXMACS_PATH/bin") | url_path ("$PATH")) * u, "x");
  986 #else
  987   return resolve (url_path ("$PATH") * u, "x");
  988 #endif
  989 }
  990 
  991 bool
  992 exists (url u) {
  993   return !is_none (resolve (u, "r"));
  994 }
  995 
  996 bool
  997 exists_in_path (url u) {
  998 #ifdef OS_MINGW
  999   return !is_none (resolve_in_path (url (as_string (u) * ".bat"))) ||\
 1000          !is_none (resolve_in_path (url (as_string (u) * ".exe"))) ||\
 1001          !is_none (resolve_in_path (url (as_string (u) * ".com")));
 1002 #else
 1003   return !is_none (resolve_in_path (u));
 1004 #endif
 1005 }
 1006 
 1007 bool
 1008 has_permission (url u, string filter) {
 1009   return !is_none (resolve (u, filter));
 1010 }
 1011 
 1012 static url
 1013 descendance_sub (url u) {
 1014   if (is_or (u))
 1015     return descendance_sub (u[1]) | descendance_sub (u[2]);
 1016   return complete (u, url_wildcard (), "r", false);
 1017 }
 1018 
 1019 url
 1020 descendance (url u) {
 1021   // Utility for style and package menus in tm_server.cpp
 1022   // Compute and merge subdirectories of directories in path
 1023   return factor (descendance_sub (u));
 1024 }
 1025 
 1026 url
 1027 subdirectories (url u) {
 1028   if (is_or (u))
 1029     return subdirectories (u[1]) | subdirectories (u[2]);
 1030   else if (is_directory (u)) {
 1031     url ret= u;
 1032     bool error_flag;
 1033     array<string> dir= read_directory (u, error_flag);
 1034     for (int i=0; i<N(dir); i++)
 1035       if (!starts (dir[i], ".") && is_directory (u * dir[i]))
 1036         ret= ret | subdirectories (u * dir[i]);
 1037     return ret;
 1038   }
 1039   else return url_none ();
 1040 }
 1041 
 1042 /******************************************************************************
 1043 * Concretization of resolved urls
 1044 ******************************************************************************/
 1045 
 1046 url
 1047 concretize_url (url u) {
 1048   // This routine transforms a resolved url into a system url.
 1049   // In the case of distant files from the web, a local copy is created.
 1050   if (is_rooted (u, "default") ||
 1051       is_rooted (u, "file") ||
 1052       is_rooted (u, "blank"))
 1053         return reroot (u, "default");
 1054   if (is_rooted_web (u)) return concretize_url (get_from_web (u));
 1055   if (is_rooted_tmfs (u)) return concretize_url (get_from_server (u));
 1056   if (is_ramdisc (u)) return concretize_url (get_from_ramdisc (u));
 1057   if (is_here (u)) return url_pwd ();
 1058   if (is_parent (u)) return url_pwd () * url_parent ();
 1059   return url_none ();
 1060 }
 1061 
 1062 string
 1063 concretize (url u) {
 1064   // This routine transforms a resolved url into a system file name.
 1065   // In the case of distant files from the web, a local copy is created.
 1066   url c= concretize_url (u);
 1067   if (!is_none (c)) return as_string (c);
 1068   if (is_wildcard (u, 1)) return u->t[1]->label;
 1069   std_warning << "Couldn't concretize " << u->t << LF;
 1070   // failed_error << "u= " << u << LF;
 1071   // FAILED ("url has no root");
 1072   return "xxx";
 1073 }
 1074 
 1075 string
 1076 materialize (url u, string filter) {
 1077   // Combines resolve and concretize
 1078   url r= resolve (u, filter);
 1079   if (!(is_rooted (r) || is_here (r) || is_parent (r))) {
 1080     failed_error << "u= " << u << LF;
 1081     FAILED ("url could not be resolved");
 1082   }
 1083   return concretize (r);
 1084 }