"Fossies" - the Fresh Open Source Software Archive

Member "TeXmacs-2.1.2-src/src/System/Classes/url.cpp" (5 May 2022, 32462 Bytes) of package /linux/misc/TeXmacs-2.1.2-src.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "url.cpp" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.1.1_vs_2.1.2.

    1 
    2 /******************************************************************************
    3 * MODULE     : url.cpp
    4 * DESCRIPTION: unified resource location handling
    5 * COPYRIGHT  : (C) 1999  Joris van der Hoeven
    6 *******************************************************************************
    7 * The url class uses a tree representation for urls.
    8 * This allows us to generalize the concept of an url and allow paths and
    9 * patterns to be regarded as urls too. An url is either a string or a tuple
   10 * of one of the following types:
   11 *   "." -- here
   12 *   ".." -- parent
   13 *   none -- invalid url
   14 *   concat -- a/b/c is represented as (concat "a" (concat "b" "c"));
   15 *   or -- the path a:b/c is represented as (or "a" (concat "b" "c"));
   16 *   root -- the url http://gnu.org yields (concat (root "http") "gnu.org");
   17 *   wildcard -- (wildcard) corresponds to any url, (wildcard "*.tm")
   18 *     to all strings which end with .tm and (wildcard "*.tm" "file")
   19 *     to all TeXmacs files (i.e. discarding directories ending with .tm).
   20 *******************************************************************************
   21 * There are three main types of urls:
   22 *   - rootless urls, like a/b/c. These urls are mainly used in computations.
   23 *     For example, they can be appended to another url.
   24 *   - Standard rooted urls, like file:///usr or https://www.texmacs.org.
   25 *     These are the same as those used on the web.
   26 *   - System urls, characterized by a "default" root.
   27 *     These urls are similar to standard rooted urls, but they behave
   28 *     in a slightly different way with respect to concatenation.
   29 *     For instance https://www.texmacs.org/Web * file:///tmp would yield
   30 *     file:///tmp, where as https://www.texmacs.org/Web * /tmp yields
   31 *     https://www.texmacs.org/tmp
   32 *******************************************************************************
   33 * There are several formats for parsing (and printing) urls:
   34 *   - System format: the usual format on your operating system.
   35 *     On unix systems "/usr/bin:/usr/local/bin" would be a valid url
   36 *     representing a path and on windows systems "c:\windows;c:\TeXmacs"
   37 *     would be OK.
   38 *   - Unix format: this format forces unix-like notation even for
   39 *     other systems like Windows. This is convenient for url's in
   40 *     the source code. Unix environment variables like ~ and $TEXMACS_PATH
   41 *     can also be part of the url.
   42 *   - Standard format: the format which is used on the web.
   43 *     Notice that ftp://www.texmacs.org/pub and ftp://www.texmacs.org/pub/
   44 *     represent different urls. The second one is represented by concating
   45 *     on the right with an empty name.
   46 *******************************************************************************
   47 * When an explicit operation on urls need to be performed,
   48 * like reading a file, the url is first "resolved" into a simple url
   49 * with a unique name (modulo symbolic links) for the resource.
   50 * Next, the url is "concretized" as a file name which is understood
   51 * by the operating system. This may require searching the file from the web.
   52 * Concretized urls should be used quickly and not memorized,
   53 * since such names may be the names of temporary files,
   54 * which may be destroyed soon afterwards.
   55 *******************************************************************************
   56 * This software falls under the GNU general public license version 3 or later.
   57 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
   58 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
   59 ******************************************************************************/
   60 
   61 #include "boot.hpp"
   62 #include "url.hpp"
   63 #include "sys_utils.hpp"
   64 #include "web_files.hpp"
   65 #include "file.hpp"
   66 #include "analyze.hpp"
   67 
   68 #include <ctype.h>
   69 
   70 #ifdef OS_MINGW
   71 #define WINPATHS
   72 #endif
   73 
   74 #ifdef WINPATHS
   75 #define URL_CONCATER  '\\'
   76 #define URL_SEPARATOR ';'
   77 #else
   78 #define URL_CONCATER  '/'
   79 #define URL_SEPARATOR ':'
   80 #endif
   81 
   82 /******************************************************************************
   83 * Unrooted url constructors
   84 ******************************************************************************/
   85 
   86 static url
   87 url_get_atom (string s, int type) {
   88   if (type < URL_STANDARD) {
   89     if (s == "~") return url_system (get_env ("HOME"));
   90     if (starts (s, "$")) {
   91       string val= get_env (s (1, N(s)));
   92       if (val == "") return url_none ();
   93       return unblank (url_system (val));
   94     }
   95   }
   96   if (occurs ("*", s)) return url_wildcard (s);
   97 #ifdef WINPATHS
   98   if (N(s)==2 && ends (s, ":"))
   99     s->resize(1); // remove the ':' after unit letter
  100 #endif
  101   return as_url (tree (s));
  102 }
  103 
  104 static void
  105 skip_ipv6 (string s, int& i) {
  106   i++;
  107   while (i<N(s) && (s[i] == ':' ||
  108                     (s[i] >= '0' && s[i] <= '9') ||
  109                     (s[i] >= 'a' && s[i] <= 'f') ||
  110                     (s[i] >= 'A' && s[i] <= 'F'))) i++;
  111   if (i<N(s) && s[i] == ']') i++;
  112 }
  113 
  114 static url
  115 url_get_name (string s, int type= URL_STANDARD, int i=0) {
  116   char sep= (type == URL_SYSTEM)? URL_CONCATER: '/';
  117   int start= i, n= N(s);
  118   while ((i<n) && (s[i] != sep) && (s[i] != '/')) {
  119     if (s[i] == '[') skip_ipv6 (s, i); else i++; }
  120   url u= url_get_atom (s (start, i), type);
  121   // url u= tree (s (start, i));
  122   if (i == n) return u;
  123   if (start == i) return url_get_name (s, type, i+1);
  124   return u * url_get_name (s, type, i+1);
  125 }
  126 
  127 static url
  128 url_get_path (string s, int type= URL_STANDARD, int i=0) {
  129   char sep= (type == URL_SYSTEM)? URL_SEPARATOR: ':';
  130   int start= i, n= N(s);
  131   if (i == n) return url_none ();
  132   while ((i<n) && (s[i] != sep)) {
  133     if (s[i] == '[') skip_ipv6 (s, i); else i++; }
  134   url u= url_general (s (start, i), type);
  135   if (i == n) return u;
  136   if (start == i) return url_get_path (s, type, i+1);
  137   return u | url_get_path (s, type, i+1);
  138 }
  139 
  140 /******************************************************************************
  141 * Rooted url constructors
  142 ******************************************************************************/
  143 
  144 url
  145 url_root (string protocol) {
  146   return as_url (tuple ("root", protocol));
  147 }
  148 
  149 url
  150 url_ramdisc (string contents) {
  151   return as_url (tuple ("root", "ramdisc", contents));
  152 }
  153 
  154 static url
  155 url_default (string name, int type= URL_SYSTEM) {
  156   url u= url_get_name (name, type);
  157 #ifdef WINPATHS
  158   // FIXME: this hack seems a bit too simple
  159   if (is_concat (u) && (u[2]->t == "")) u= u[1];
  160   // cout << name << " -> " << url_root ("default") * u << "\n";
  161   return url_root ("default") * u;
  162 #else
  163   if (u->t == "") return url_root ("default");
  164   return url_root ("default") * u;
  165 #endif
  166 }
  167 
  168 static url
  169 url_mingw_default (string name, int type) {
  170   string s= name (0, 2) * ":" * name (2, N(name));
  171   return url_root ("default") * url_get_name (s, type);  
  172 }
  173 
  174 static url
  175 url_path (string s, int type= URL_SYSTEM) {
  176   url u= url_get_path (s, type);
  177   return u;
  178 }
  179 
  180 static url
  181 url_local (string name) {
  182   url u= url_get_name (name, URL_SYSTEM);
  183   return reroot (u, "file");
  184 }
  185 
  186 static url
  187 url_file (string name) {
  188   url u= url_get_name (name);
  189   return url_root ("file") * u;
  190 }
  191 
  192 static url
  193 url_http (string name) {
  194   url u= url_get_name (name);
  195   return url_root ("http") * u;
  196 }
  197 
  198 static url
  199 url_https (string name) {
  200   url u= url_get_name (name);
  201   return url_root ("https") * u;
  202 }
  203 
  204 static url
  205 url_ftp (string name) {
  206   url u= url_get_name (name);
  207   return url_root ("ftp") * u;
  208 }
  209 
  210 static url
  211 url_tmfs (string name) {
  212   url u= url_get_name (name);
  213   return url_root ("tmfs") * u;
  214 }
  215 
  216 static url
  217 url_blank (string name) {
  218   url u= url_get_name (name);
  219   return url_root ("blank") * u;
  220 }
  221 
  222 /******************************************************************************
  223 * Generic url constructor
  224 ******************************************************************************/
  225 
  226 static bool
  227 heuristic_is_path (string name, int type) {
  228   char sep= (type==0)? URL_SEPARATOR: ':';
  229   int i= 0, n= N(name);
  230   while (i<n)
  231     if (name[i] == '[') skip_ipv6 (name, i);
  232     else if (name[i] == sep) return true;
  233     else i++;
  234   return false;
  235 }
  236 
  237 static bool
  238 heuristic_is_default (string name, int type) {
  239 #ifdef WINPATHS
  240   // FIXME: we probably should take into account 'type' too
  241   if (N(name) < 2) return false;
  242   if ((name[0] == '\\') && (name[1] == '\\')) return true;
  243   return
  244     isalpha (name[0]) && (name[1] == ':') &&
  245     ((N(name)==2) || (name[2] == '\\') || (name[2] == '/'));
  246 #else
  247   char sep= (type==0)? URL_CONCATER: '/';
  248   return (name != "") && (name[0] == sep);
  249 #endif
  250 }
  251 
  252 static bool
  253 heuristic_is_http (string name) {
  254   return starts (name, "www.");
  255   // FIXME: we might want to recognize some other ones like google.com too
  256 }
  257 
  258 static bool
  259 heuristic_is_ftp (string name) {
  260   return starts (name, "ftp.");
  261 }
  262 
  263 static bool
  264 heuristic_is_mingw_default (string name, int type) {
  265 #ifdef WINPATHS
  266   return type != URL_SYSTEM && N(name) >= 2 &&
  267          name[0] == '/' && is_alpha (name[1]) &&
  268          (N(name) == 2 || name[2] == '/');
  269 #else
  270   (void) name; (void) type; return false;
  271 #endif         
  272 }
  273 
  274 url
  275 url_general (string name, int type= URL_SYSTEM) {
  276   if (starts (name, "local:")) return url_local (name (6, N (name)));
  277   if (starts (name, "file://")) return url_file (name (7, N (name)));
  278   if (starts (name, "http://")) return url_http (name (7, N (name)));
  279   if (starts (name, "https://")) return url_https (name (8, N (name)));
  280   if (starts (name, "ftp://")) return url_ftp (name (6, N (name)));
  281   if (starts (name, "tmfs://")) return url_tmfs (name (7, N (name)));
  282   if (starts (name, "//")) return url_blank (name (2, N (name)));
  283   if (heuristic_is_path (name, type)) return url_path (name, type);
  284   if (heuristic_is_default (name, type)) return url_default (name, type);
  285   if (heuristic_is_mingw_default (name, type)) return url_mingw_default (name, type);
  286   if (type != URL_CLEAN_UNIX) {
  287     if (heuristic_is_http (name)) return url_http (name);
  288     if (heuristic_is_ftp (name)) return url_ftp (name);
  289   }
  290   return url_get_name (name, type);
  291 }
  292 
  293 url
  294 url_unix (string name) {
  295   return url_general (name, URL_UNIX);
  296 }
  297 
  298 url
  299 url_unix (string dir, string name) {
  300   return url_unix (dir) * url_unix (name);
  301 }
  302 
  303 url
  304 url_system (string name) {
  305   return url_general (name, URL_SYSTEM);
  306 }
  307 
  308 url
  309 url_system (string dir, string name) {
  310   return url_system (dir) * url_system (name);
  311 }
  312 
  313 url
  314 url_standard (string name) {
  315   return url_general (name, URL_STANDARD);
  316 }
  317 
  318 url
  319 url_standard (string dir, string name) {
  320   return url_standard (dir) * url_standard (name);
  321 }
  322 
  323 url::url (): rep (tm_new<url_rep> (tuple ("none"))) {}
  324 url::url (const char* name): rep (tm_new<url_rep> (url_unix (name)->t)) {}
  325 url::url (string name): rep (tm_new<url_rep> (url_unix (name)->t)) {}
  326 url::url (string path_name, string name):
  327   rep (tm_new<url_rep> (url_unix (path_name, name)->t)) {}
  328 
  329 /******************************************************************************
  330 * Computational url constructors
  331 ******************************************************************************/
  332 
  333 static bool
  334 is_special_root (url u) {
  335 #ifdef WINPATHS
  336   return is_root (u);
  337 #else
  338   return is_root_web (u);
  339 #endif
  340 }
  341 
  342 static bool
  343 is_semi_root (url u) {
  344   // url u such that u/.. == u (website or windows drive name)
  345   return is_concat (u) && is_special_root (u[1]) && is_atomic (u[2]);
  346 }
  347 
  348 url
  349 operator * (url u1, url u2) {
  350   //cout << "concat " << u1->t << " * " << u2->t << "\n";
  351   if (is_root (u2) || (is_concat (u2) && is_root (u2[1]))) {
  352     if (is_concat (u1) && is_root_web (u1[1])) {
  353       if (is_root (u2, "default") ||
  354           (is_concat (u2) && is_root (u2[1], "default")))
  355         {
  356           url v= u1[2];
  357           while (is_concat (v)) v= v[1];
  358           if (is_root (u2)) return u1[1] * v;
  359           return u1[1] * v * u2[2];
  360         }
  361       if (is_root (u2, "blank") ||
  362           (is_concat (u2) && is_root (u2[1], "blank")))
  363         return reroot (u2, u1[1][1]->t->label);
  364     }
  365     return u2;
  366   }
  367   if (is_here (u1) || (u1->t == "")) return u2;
  368   if (is_here (u2)) return u1;
  369   if (is_none (u1)) return url_none ();
  370   if (is_none (u2)) return url_none ();
  371   if (u2 == url_parent ()) {
  372     if (is_root (u1)) return u1;
  373     if (is_pseudo_atomic (u1) && (!is_parent (u1))) return url_here ();
  374     if (is_semi_root (u1)) return u1;
  375   }
  376   if (is_concat (u2) && (u2[1] == url_parent ())) {
  377     if (is_root (u1)) return u1 * u2[2];
  378     if (is_pseudo_atomic (u1) && (!is_parent (u1))) return u2[2];
  379     if (is_semi_root (u1)) return u1 * u2[2];
  380   }
  381   if (is_concat (u1)) return u1[1] * (u1[2] * u2);
  382   return as_url (tuple ("concat", u1->t, u2->t));
  383 }
  384 
  385 url
  386 operator * (url u1, const char* name) {
  387   return u1 * url (name);
  388 }
  389 
  390 url
  391 operator * (url u1, string name) {
  392   return u1 * url (name);
  393 }
  394 
  395 url
  396 operator | (url u1, url u2) {
  397   if (is_none (u1)) return u2;
  398   if (is_none (u2)) return u1;
  399   if (is_or (u1)) return u1[1] | (u1[2] | u2);
  400   if (u1 == u2) return u2;
  401   if (is_or (u2) && (u1 == u2[1])) return u2;
  402   return as_url (tuple ("or", u1->t, u2->t));
  403 }
  404 
  405 url
  406 url_wildcard () {
  407   return as_url (tuple ("wildcard"));
  408 }
  409 
  410 url
  411 url_wildcard (string name) {
  412   return as_url (tuple ("wildcard", name));
  413 }
  414 
  415 /******************************************************************************
  416 * url predicates
  417 ******************************************************************************/
  418 
  419 bool
  420 is_rooted (url u) {
  421   return
  422     is_root (u) ||
  423     (is_concat (u) && is_rooted (u[1])) ||
  424     (is_or (u) && is_rooted (u[1]) && is_rooted (u[2]));
  425 }
  426 
  427 bool
  428 is_rooted (url u, string protocol) {
  429   return
  430     is_root (u, protocol) ||
  431     (is_concat (u) && is_rooted (u[1], protocol)) ||
  432     (is_or (u) && is_rooted (u[1], protocol) && is_rooted (u[2], protocol));
  433 }
  434 
  435 bool
  436 is_rooted_web (url u) {
  437   return
  438     is_root_web (u) ||
  439     (is_concat (u) && is_rooted_web (u[1])) ||
  440     (is_or (u) && is_rooted_web (u[1]) && is_rooted_web (u[2]));
  441 }
  442 
  443 bool
  444 is_rooted_tmfs (url u) {
  445   return
  446     is_root_tmfs (u) ||
  447     (is_concat (u) && is_rooted_tmfs (u[1])) ||
  448     (is_or (u) && is_rooted_tmfs (u[1]) && is_rooted_tmfs (u[2]));
  449 }
  450 
  451 bool
  452 is_tmfs_protocol (url u, string protocol) {
  453   return
  454     u->t == protocol ||
  455     (is_concat (u) && is_tmfs_protocol (u[1], protocol));
  456 }
  457 
  458 bool
  459 is_rooted_tmfs (url u, string protocol) {
  460   return
  461     (is_concat (u) && is_root_tmfs (u[1]) &&
  462                       is_tmfs_protocol (u[2], protocol)) ||
  463     (is_or (u) && is_rooted_tmfs (u[1], protocol) &&
  464                   is_rooted_tmfs (u[2], protocol));
  465 }
  466 
  467 bool
  468 is_rooted_blank (url u) {
  469   return
  470     is_root_blank (u) ||
  471     (is_concat (u) && is_rooted_blank (u[1])) ||
  472     (is_or (u) && is_rooted_blank (u[1]) && is_rooted_blank (u[2]));
  473 }
  474 
  475 bool
  476 is_name (url u) {
  477   if (is_atomic (u)) return true;
  478   if (!is_concat (u)) return false;
  479   return is_name (u[1]) && is_name (u[2]);
  480 }
  481 
  482 bool
  483 is_rooted_name (url u) {
  484   return is_concat (u) && is_root (u[1]) && is_name (u[2]);
  485 }
  486 
  487 bool
  488 is_name_in_path (url u) {
  489   if (is_name (u)) return true;
  490   return is_concat (u) && is_root (u[1], "default") && is_name (u[2]);
  491 }
  492 
  493 bool
  494 is_ramdisc (url u) {
  495   return is_concat (u) && is_root (u[1], "ramdisc");
  496 }
  497 
  498 /******************************************************************************
  499 * Conversion routines for urls
  500 ******************************************************************************/
  501 
  502 string
  503 as_string (url u, int type) {
  504   // This routine pritty prints an url as a string.
  505   // FIXME: the current algorithm is quadratic in time.
  506   if (is_none (u)) return "{}";
  507   if (is_atomic (u)) return u->t->label;
  508   if (is_concat (u)) {
  509     int stype= type;
  510     if (is_root (u[1]) && (!is_root (u[1], "default"))) stype= URL_STANDARD;
  511     string sep= (stype==URL_SYSTEM? string (URL_CONCATER): string ("/"));
  512     string s1 = as_string (u[1], type);
  513     string s2 = as_string (u[2], stype);
  514     if (is_root (u[1], "default")) s1= "";
  515     if ((!is_name (u[1])) && (!is_root (u[1]))) s1= "{" * s1 * "}";
  516     if ((!is_concat (u[2])) && (!is_atomic (u[2])) && (!is_wildcard (u[2], 1)))
  517       s2= "{" * s2 * "}";
  518 #ifdef WINPATHS
  519     if (((is_root (u[1], "default") && type == URL_SYSTEM) ||
  520          is_root (u[1], "file"))) { // have to return the windows format
  521       string root, remain;
  522       if (is_concat (u[2])) {           
  523         root = as_string (u[2][1], type);
  524         // root might be unit letter or hostname. It depends on the length
  525         remain = as_string (u[2][2], type);
  526       }
  527       else {
  528         root = s2;
  529         remain = "";
  530       }
  531       if (is_root (u[1], "default")) {
  532         if (N(root) == 1) return root * ":\\" * remain; // drive letter
  533         else return "\\\\" * root * "\\" * remain;
  534       }
  535       else {
  536         if (N(root) == 1) return s1 * "/" * root * ":/" * remain; // local file
  537         else return s1 * root * "/" * remain; // remote
  538       }
  539     }
  540 #endif
  541     return s1 * sep * s2;
  542   }
  543   if (is_or (u)) {
  544     string s1= as_string (u[1], type);
  545     string s2= as_string (u[2], type);
  546     if (!is_name_in_path (u[1])) s1= "{" * s1 * "}";
  547     if ((!is_or (u[2])) && (!is_name_in_path (u[2]))) s2= "{" * s2 * "}";
  548 #ifdef WINPATHS
  549     if (type == URL_STANDARD) return s1 * ":" * s2;
  550     else return s1 * string (URL_SEPARATOR) * s2;
  551 #else
  552     return s1 * string (URL_SEPARATOR) * s2;
  553 #endif
  554   }
  555 #ifdef WINPATHS
  556   if (is_root (u, "default")) {
  557     int stype= type;
  558     if (is_root (u[1]) && (!is_root (u[1], "default"))) stype= URL_STANDARD;
  559         if (stype == URL_SYSTEM) return ""; else return "/";
  560   }
  561 #else
  562   if (is_root (u, "default")) return "/";
  563 #endif
  564   if (is_root (u, "blank")) return "/";
  565   if (is_root (u, "file")) return u[1]->t->label * "://";
  566   if (is_root (u)) return u[1]->t->label * ":/";
  567   if (is_wildcard (u, 0)) return "**";
  568   if (is_wildcard (u, 1)) return u->t[1]->label;
  569   FAILED ("bad url");
  570   return "";
  571 }
  572 
  573 tm_ostream&
  574 operator << (tm_ostream& out, url u) {
  575   return out << as_string (u, URL_SYSTEM);
  576 }
  577 
  578 /******************************************************************************
  579 * Operations on urls
  580 ******************************************************************************/
  581 
  582 url
  583 head (url u) {
  584   return u * url_parent ();
  585 }
  586 
  587 url
  588 tail (url u) {
  589   if (is_concat (u)) {
  590     if (is_root_web (u[1]) && is_atomic (u[2])) return url_here ();
  591     return tail (u[2]);
  592   }
  593   if (is_or (u)) return tail (u[1]) | tail (u[2]);
  594   if (is_root (u)) return url_here ();
  595   return u;
  596 }
  597 
  598 string
  599 suffix (url u) {
  600   u= tail (u);
  601   if (!is_atomic (u)) return "";
  602   string s= as_string (u);
  603   int i, n= N(s);
  604   for (i=n-1; i>=0; i--)
  605     if (s[i]=='.') break;
  606   if ((i>0) && (i<n-1)) {
  607     string r= s (i+1, n);
  608     while ((N(r)>0) && (r[N(r)-1]=='~' || r[N(r)-1]=='#')) r= r(0, N(r)-1);
  609     return locase_all(r);
  610   }
  611   return "";
  612 }
  613 
  614 string
  615 basename (url u, string suf) {
  616   string s= as_string (tail (u));
  617   if (suf != "" && N(s) > N(suf) && suf == s(N(s)-N(suf),N(s)))
  618     return s(0, N(s)-N(suf));
  619   return s;
  620 }
  621 
  622 string
  623 basename (url u) {
  624   string s= suffix (u);
  625   if (N(s) != 0) s= "." * s;
  626   return basename (u, s);
  627 }
  628 
  629 url
  630 glue (url u, string s) {
  631   if (is_atomic (u)) return as_url (tree (u->t->label * s));
  632   if (is_concat (u)) return u[1] * glue (u[2], s);
  633   if (is_or (u)) return glue (u[1], s) | glue (u[2], s);
  634   if (is_none (u)) return u;
  635   failed_error << "u= " << u << "\n";
  636   failed_error << "s= " << s << "\n";
  637   FAILED ("can't glue string to url");
  638   return u;
  639 }
  640 
  641 url
  642 unglue (url u, int nr) {
  643   if (is_atomic (u))
  644     return as_url (tree (u->t->label (0, max (N(u->t->label) - nr, 0))));
  645   if (is_concat (u)) return u[1] * unglue (u[2], nr);
  646   if (is_or (u)) return unglue (u[1], nr) | unglue (u[2], nr);
  647   if (is_none (u)) return u;
  648   failed_error << "u = " << u << "\n";
  649   failed_error << "nr= " << nr << "\n";
  650   FAILED ("can't unglue from url");
  651   return u;
  652 }
  653 
  654 url
  655 unblank (url u) {
  656   if (is_concat (u) && (u[2]->t == "")) return u[1];
  657   if (is_concat (u)) return u[1] * unblank (u[2]);
  658   if (is_or (u)) return unblank (u[1]) | unblank (u[2]);
  659   return u;
  660 }
  661 
  662 url
  663 relative (url base, url u) {
  664   return head (base) * u;
  665 }
  666 
  667 url
  668 delta_sub (url base, url u) {
  669   if (is_atomic (base))
  670     return u;
  671   if (is_concat (base) && is_concat (u) && (base[1] == u[1])) {
  672     if (is_special_root (base[1]) &&
  673         is_concat (base[2]) && is_concat (u[2]) &&
  674         base[2][1] != u[2][1])
  675       return url_none ();
  676     return delta_sub (base[2], u[2]);
  677   }
  678   if (is_concat (base) && !is_semi_root (base))
  679     return url_parent () * delta_sub (head (base), u);
  680   return url_none ();
  681 }
  682 
  683 url
  684 delta (url base, url u) {
  685   if (is_or (u))
  686     return delta (base, u[1]) | delta (base, u[2]);
  687   url res= delta_sub (base, u);
  688   if (is_none (res)) return u;
  689   return res;
  690 }
  691 
  692 static url
  693 expand (url u1, url u2) {
  694   if (is_or (u1)) return expand (u1[1], u2) | expand (u1[2], u2);
  695   if (is_or (u2)) return expand (u1, u2[1]) | expand (u1, u2[2]);
  696   if (is_ancestor (u2)) {
  697     if (is_concat (u1)) return u1 | expand (u1[1], u2);
  698     if (is_special_root (u1)) return u2;
  699     return u1 | u2;
  700   }
  701   if (is_concat (u2) && is_ancestor (u2[1]))
  702     return expand (expand (u1, u2[1]), u2[2]);
  703   return u1 * u2;
  704 }
  705 
  706 url
  707 expand (url u) {
  708   if (is_or (u)) return expand (u[1]) | expand (u[2]);
  709   if (is_concat (u)) return expand (expand (u[1]), expand (u[2]));
  710   return u;
  711 }
  712 
  713 bool
  714 descends (url u, url base) {
  715   if (is_or (base))
  716     return descends (u, base[1]) || descends (u, base[2]);
  717   if (is_or (u))
  718     return descends (u[1], base) && descends (u[2], base);
  719   if (u == base)
  720     return true;
  721   if (is_concat (u) && is_atomic (base))
  722     return u[1] == base;
  723   if (is_concat (u) && is_concat (base))
  724     return u[1] == base[1] && descends (u[2], base[2]);
  725   return false;
  726 }
  727 
  728 bool
  729 is_secure (url u) {
  730   return descends (u, expand (url_path ("$TEXMACS_SECURE_PATH")));
  731 }
  732 
  733 /******************************************************************************
  734 * Url sorting and factorization
  735 ******************************************************************************/
  736 
  737 static bool
  738 operator <= (url u1, url u2) {
  739   if (is_atomic (u1) && is_atomic (u2))
  740     return u1->t->label <= u2->t->label;
  741   if (is_atomic (u1)) return true;
  742   if (is_atomic (u2)) return false;
  743   if (is_concat (u1) && is_concat (u2)) {
  744     if (u1[1] == u2[1]) return u1[2] <= u2[2];
  745     else return u1[1] <= u2[1];
  746   }
  747   if (is_concat (u1)) return true;
  748   if (is_concat (u2)) return false;
  749   return true; // does not matter for sorting
  750 }
  751 
  752 static url
  753 sort_sub (url add, url to) {
  754   if (is_or (to)) {
  755     if (add <= to[1]) return add | to;
  756     return to[1] | sort_sub (add, to[2]);
  757   }
  758   if (add <= to) return add | to;
  759   else return to | add;
  760 }
  761 
  762 url
  763 sort (url u) {
  764   if (is_or (u))
  765     return sort_sub (u[1], sort (u[2]));
  766   else return u;
  767 }
  768 
  769 static url
  770 factor_sorted (url u) {
  771   if (!is_or (u)) return u;
  772   url v= factor_sorted (u[2]);
  773   if (is_concat (u[1])) {
  774     if (is_concat (v) && (u[1][1] == v[1]))
  775       return u[1][1] * (u[1][2] | v[2]);
  776     if (is_or (v) && is_concat (v[1]) && (u[1][1] == v[1][1]))
  777       return (u[1][1] * (u[1][2] | v[1][2])) | v[2];
  778   }
  779   return u[1] | v;
  780 }
  781 
  782 static url
  783 factor_sub (url u) {
  784   if (is_concat (u)) return u[1] * factor (u[2]);
  785   if (is_or (u)) return factor_sub (u[1]) | factor_sub (u[2]);
  786   return u;
  787 }
  788 
  789 url
  790 factor (url u) {
  791   return factor_sub (factor_sorted (sort (u)));
  792 }
  793 
  794 /******************************************************************************
  795 * Url resolution and wildcard expansion
  796 ******************************************************************************/
  797 
  798 url complete (url base, url u, string filter, bool flag);
  799 
  800 string
  801 get_root (url u) {
  802   if (is_concat (u)) return get_root (u[1]);
  803   if (is_or (u)) {
  804     string s1= get_root (u[1]);
  805     string s2= get_root (u[2]);
  806     if (s1 == s2) return s1; else return "";
  807   }
  808   if (is_root (u)) return u[1]->t->label;
  809   return "";
  810 }
  811 
  812 url
  813 unroot (url u) {
  814   if (is_concat (u)) return unroot (u[1]) * u[2];
  815   if (is_or (u)) return unroot (u[1]) | unroot (u[2]);
  816   if (is_root (u)) return url_here ();
  817   return u;
  818 }
  819 
  820 url
  821 reroot (url u, string protocol) {
  822   if (is_concat (u)) return reroot (u[1], protocol) * u[2];
  823   if (is_or (u)) return reroot (u[1], protocol) | reroot (u[2], protocol);
  824   if (is_root (u)) return url_root (protocol);
  825   return u;
  826 }
  827 
  828 static url
  829 complete (url base, url sub, url u, string filter, bool flag) {
  830   if (is_or (sub)) {
  831     url res1= complete (base, sub[1], u, filter, flag);
  832     if ((!is_none (res1)) && flag) return res1;
  833     return res1 | complete (base, sub[2], u, filter, flag);
  834   }
  835   if (is_concat (sub) && is_rooted (sub[1])) {
  836     url res= complete (sub[1], sub[2], u, filter, flag);
  837     return sub[1] * res;
  838   }
  839   return sub * complete (base * sub, u, filter, flag);
  840 }
  841 
  842 url
  843 complete (url base, url u, string filter, bool flag) {
  844   // cout << "complete " << base << " |||| " << u << LF;
  845   if (!is_rooted(u)) {
  846      if (is_none (base)) return base;
  847      if (is_none (u)) return u;
  848      if ((!is_root (base)) && (!is_rooted_name (base))) {
  849        failed_error << "base  = " << base << LF;
  850        failed_error << "u     = " << u << LF;
  851        failed_error << "filter= " << filter << LF;
  852        FAILED ("invalid base url");
  853      }
  854   }
  855   if (is_name (u) || (is_concat (u) && is_root (u[1]) && is_name (u[2]))) {
  856     url comp= base * u;
  857     if (is_rooted (comp, "default") || is_rooted (comp, "file")) {
  858       if (is_of_type (comp, filter)) return reroot (u, "default");
  859       return url_none ();
  860     }
  861     if (is_rooted_web (comp) || is_rooted_tmfs (comp) || is_ramdisc (comp)) {
  862       if (is_of_type (comp, filter)) return u;
  863       return url_none ();
  864     }
  865     failed_error << "base  = " << base << LF;
  866     failed_error << "u     = " << u << LF;
  867     failed_error << "filter= " << filter << LF;
  868     ASSERT (is_rooted (comp), "unrooted url");
  869     FAILED ("bad protocol in url");
  870   }
  871   if (is_root (u)) {
  872     // FIXME: test filter flags here
  873     return u;
  874   }
  875   if (is_concat (u) && is_wildcard (u[1], 0) && is_wildcard (u[2], 1)) {
  876     // FIXME: ret= ret | ... is unefficient (quadratic) in main loop
  877     if (!(is_rooted (base, "default") || is_rooted (base, "file"))) {
  878       failed_error << "base  = " << base << LF;
  879       failed_error << "u     = " << u << LF;
  880       failed_error << "filter= " << filter << LF;
  881       FAILED ("wildcards only implemented for files");
  882     }
  883     url ret= url_none ();
  884     bool error_flag;
  885     array<string> dir= read_directory (base, error_flag);
  886     int i, n= N(dir);
  887     for (i=0; i<n; i++) {
  888       if ((!is_none (ret)) && flag) return ret;
  889       if ((dir[i] == ".") || (dir[i] == "..")) continue;
  890       if (starts (dir[i], "http://") ||
  891           starts (dir[i], "https://") ||
  892           starts (dir[i], "ftp://"))
  893         if (is_directory (base * dir[i])) continue;
  894       ret= ret | (dir[i] * complete (base * dir[i], u, filter, flag));
  895       if (match_wildcard (dir[i], u[2][1]->t->label))
  896         ret= ret | complete (base, dir[i], filter, flag);
  897     }
  898     return ret;
  899   }
  900   if (is_concat (u)) {
  901     url sub= complete (base, u[1], "", false);
  902     // "" should often be faster than the more correct "d" here
  903     return complete (base, sub, u[2], filter, flag);
  904   }
  905   if (is_or (u)) {
  906     url res1= complete (base, u[1], filter, flag);
  907     if ((!is_none (res1)) && flag) return res1;
  908     return res1 | complete (base, u[2], filter, flag);
  909   }
  910   if (is_wildcard (u)) {
  911     // FIXME: ret= ret | ... is unefficient (quadratic) in main loop
  912     if (!(is_rooted (base, "default") || is_rooted (base, "file"))) {
  913       failed_error << "base  = " << base << LF;
  914       failed_error << "u     = " << u << LF;
  915       failed_error << "filter= " << filter << LF;
  916       FAILED ("wildcards only implemented for files");
  917     }
  918     url ret= url_none ();
  919     if (is_wildcard (u, 0) && is_of_type (base, filter)) ret= url_here ();
  920     bool error_flag;
  921     array<string> dir= read_directory (base, error_flag);
  922     int i, n= N(dir);
  923     for (i=0; i<n; i++) {
  924       if ((!is_none (ret)) && flag) return ret;
  925       if ((dir[i] == ".") || (dir[i] == "..")) continue;
  926       if (starts (dir[i], "http://") ||
  927           starts (dir[i], "https://") ||
  928           starts (dir[i], "ftp://"))
  929         if (is_directory (base * dir[i])) continue;
  930       if (is_wildcard (u, 0))
  931         ret= ret | (dir[i] * complete (base * dir[i], u, filter, flag));
  932       else if (match_wildcard (dir[i], u[1]->t->label))
  933         ret= ret | complete (base, dir[i], filter, flag);
  934     }
  935     return ret;
  936   }
  937   failed_error << "url= " << u << LF;
  938   FAILED ("bad url");
  939   return u;
  940 }
  941 
  942 url
  943 complete (url u, string filter, bool flag) {
  944   url home= url_pwd ();
  945   return home * complete (home, u, filter, flag);
  946 }
  947 
  948 url
  949 complete (url u, string filter) {
  950   // This routine can be used in order to find all possible matches
  951   // for the wildcards in an url and replace the wildcards by these matches.
  952   // Moreover, matches are normalized (file root -> default root).
  953   url r =  complete (u, filter, false);
  954   // cout << "complete:" << u << " filter:" << filter << " result:" << r << LF;
  955   return r;
  956 }
  957 
  958 url
  959 resolve (url u, string filter) {
  960   // This routine does the same thing as complete, but it stops at
  961   // the first match. It is particularly useful for finding files in paths.
  962   return complete (u, filter, true);
  963   /*
  964   url res= complete (u, filter, true);
  965   if (is_none (res))
  966     cout << "Failed resolution of " << u << ", " << filter << LF;
  967   return res;
  968   */
  969 }
  970 
  971 url
  972 resolve_in_path (url u) {
  973   if (use_which) {
  974     string name = escape_sh (as_string (u));
  975     string which= var_eval_system ("which " * name * " 2> /dev/null");
  976     if (ends (which, name))
  977       return which;
  978     else if ((which != "") &&
  979              (!occurs ("bin/which: ", which)) &&
  980              (!starts (which, "which: ")) &&
  981              (!starts (which, "no ")))
  982       cout << "TeXmacs] " << which << "\n";
  983   }
  984 #ifdef OS_MINGW
  985   return resolve ((url_path ("$TEXMACS_PATH/bin") | url_path ("$PATH")) * u, "x");
  986 #else
  987   return resolve (url_path ("$PATH") * u, "x");
  988 #endif
  989 }
  990 
  991 bool
  992 exists (url u) {
  993   return !is_none (resolve (u, "r"));
  994 }
  995 
  996 bool
  997 exists_in_path (url u) {
  998 #ifdef OS_MINGW
  999   return !is_none (resolve_in_path (url (as_string (u) * ".bat"))) ||\
 1000          !is_none (resolve_in_path (url (as_string (u) * ".exe"))) ||\
 1001          !is_none (resolve_in_path (url (as_string (u) * ".com")));
 1002 #else
 1003   return !is_none (resolve_in_path (u));
 1004 #endif
 1005 }
 1006 
 1007 bool
 1008 has_permission (url u, string filter) {
 1009   return !is_none (resolve (u, filter));
 1010 }
 1011 
 1012 static url
 1013 descendance_sub (url u) {
 1014   if (is_or (u))
 1015     return descendance_sub (u[1]) | descendance_sub (u[2]);
 1016   return complete (u, url_wildcard (), "r", false);
 1017 }
 1018 
 1019 url
 1020 descendance (url u) {
 1021   // Utility for style and package menus in tm_server.cpp
 1022   // Compute and merge subdirectories of directories in path
 1023   return factor (descendance_sub (u));
 1024 }
 1025 
 1026 url
 1027 subdirectories (url u) {
 1028   if (is_or (u))
 1029     return subdirectories (u[1]) | subdirectories (u[2]);
 1030   else if (is_directory (u)) {
 1031     url ret= u;
 1032     bool error_flag;
 1033     array<string> dir= read_directory (u, error_flag);
 1034     for (int i=0; i<N(dir); i++)
 1035       if (!starts (dir[i], ".") && is_directory (u * dir[i]))
 1036         ret= ret | subdirectories (u * dir[i]);
 1037     return ret;
 1038   }
 1039   else return url_none ();
 1040 }
 1041 
 1042 /******************************************************************************
 1043 * Concretization of resolved urls
 1044 ******************************************************************************/
 1045 
 1046 url
 1047 concretize_url (url u) {
 1048   // This routine transforms a resolved url into a system url.
 1049   // In the case of distant files from the web, a local copy is created.
 1050   if (is_rooted (u, "default") ||
 1051       is_rooted (u, "file") ||
 1052       is_rooted (u, "blank"))
 1053         return reroot (u, "default");
 1054   if (is_rooted_web (u)) return concretize_url (get_from_web (u));
 1055   if (is_rooted_tmfs (u)) return concretize_url (get_from_server (u));
 1056   if (is_ramdisc (u)) return concretize_url (get_from_ramdisc (u));
 1057   if (is_here (u)) return url_pwd ();
 1058   if (is_parent (u)) return url_pwd () * url_parent ();
 1059   return url_none ();
 1060 }
 1061 
 1062 string
 1063 concretize (url u) {
 1064   // This routine transforms a resolved url into a system file name.
 1065   // In the case of distant files from the web, a local copy is created.
 1066   url c= concretize_url (u);
 1067   if (!is_none (c)) return as_string (c);
 1068   if (is_wildcard (u, 1)) return u->t[1]->label;
 1069   std_warning << "Couldn't concretize " << u->t << LF;
 1070   // failed_error << "u= " << u << LF;
 1071   // FAILED ("url has no root");
 1072   return "xxx";
 1073 }
 1074 
 1075 string
 1076 materialize (url u, string filter) {
 1077   // Combines resolve and concretize
 1078   url r= resolve (u, filter);
 1079   if (!(is_rooted (r) || is_here (r) || is_parent (r))) {
 1080     failed_error << "u= " << u << LF;
 1081     FAILED ("url could not be resolved");
 1082   }
 1083   return concretize (r);
 1084 }