"Fossies" - the Fresh Open Source Software Archive 
Member "TeXmacs-2.1.2-src/src/System/Classes/url.cpp" (5 May 2022, 32462 Bytes) of package /linux/misc/TeXmacs-2.1.2-src.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "url.cpp" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
2.1.1_vs_2.1.2.
1
2 /******************************************************************************
3 * MODULE : url.cpp
4 * DESCRIPTION: unified resource location handling
5 * COPYRIGHT : (C) 1999 Joris van der Hoeven
6 *******************************************************************************
7 * The url class uses a tree representation for urls.
8 * This allows us to generalize the concept of an url and allow paths and
9 * patterns to be regarded as urls too. An url is either a string or a tuple
10 * of one of the following types:
11 * "." -- here
12 * ".." -- parent
13 * none -- invalid url
14 * concat -- a/b/c is represented as (concat "a" (concat "b" "c"));
15 * or -- the path a:b/c is represented as (or "a" (concat "b" "c"));
16 * root -- the url http://gnu.org yields (concat (root "http") "gnu.org");
17 * wildcard -- (wildcard) corresponds to any url, (wildcard "*.tm")
18 * to all strings which end with .tm and (wildcard "*.tm" "file")
19 * to all TeXmacs files (i.e. discarding directories ending with .tm).
20 *******************************************************************************
21 * There are three main types of urls:
22 * - rootless urls, like a/b/c. These urls are mainly used in computations.
23 * For example, they can be appended to another url.
24 * - Standard rooted urls, like file:///usr or https://www.texmacs.org.
25 * These are the same as those used on the web.
26 * - System urls, characterized by a "default" root.
27 * These urls are similar to standard rooted urls, but they behave
28 * in a slightly different way with respect to concatenation.
29 * For instance https://www.texmacs.org/Web * file:///tmp would yield
30 * file:///tmp, where as https://www.texmacs.org/Web * /tmp yields
31 * https://www.texmacs.org/tmp
32 *******************************************************************************
33 * There are several formats for parsing (and printing) urls:
34 * - System format: the usual format on your operating system.
35 * On unix systems "/usr/bin:/usr/local/bin" would be a valid url
36 * representing a path and on windows systems "c:\windows;c:\TeXmacs"
37 * would be OK.
38 * - Unix format: this format forces unix-like notation even for
39 * other systems like Windows. This is convenient for url's in
40 * the source code. Unix environment variables like ~ and $TEXMACS_PATH
41 * can also be part of the url.
42 * - Standard format: the format which is used on the web.
43 * Notice that ftp://www.texmacs.org/pub and ftp://www.texmacs.org/pub/
44 * represent different urls. The second one is represented by concating
45 * on the right with an empty name.
46 *******************************************************************************
47 * When an explicit operation on urls need to be performed,
48 * like reading a file, the url is first "resolved" into a simple url
49 * with a unique name (modulo symbolic links) for the resource.
50 * Next, the url is "concretized" as a file name which is understood
51 * by the operating system. This may require searching the file from the web.
52 * Concretized urls should be used quickly and not memorized,
53 * since such names may be the names of temporary files,
54 * which may be destroyed soon afterwards.
55 *******************************************************************************
56 * This software falls under the GNU general public license version 3 or later.
57 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
58 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
59 ******************************************************************************/
60
61 #include "boot.hpp"
62 #include "url.hpp"
63 #include "sys_utils.hpp"
64 #include "web_files.hpp"
65 #include "file.hpp"
66 #include "analyze.hpp"
67
68 #include <ctype.h>
69
70 #ifdef OS_MINGW
71 #define WINPATHS
72 #endif
73
74 #ifdef WINPATHS
75 #define URL_CONCATER '\\'
76 #define URL_SEPARATOR ';'
77 #else
78 #define URL_CONCATER '/'
79 #define URL_SEPARATOR ':'
80 #endif
81
82 /******************************************************************************
83 * Unrooted url constructors
84 ******************************************************************************/
85
86 static url
87 url_get_atom (string s, int type) {
88 if (type < URL_STANDARD) {
89 if (s == "~") return url_system (get_env ("HOME"));
90 if (starts (s, "$")) {
91 string val= get_env (s (1, N(s)));
92 if (val == "") return url_none ();
93 return unblank (url_system (val));
94 }
95 }
96 if (occurs ("*", s)) return url_wildcard (s);
97 #ifdef WINPATHS
98 if (N(s)==2 && ends (s, ":"))
99 s->resize(1); // remove the ':' after unit letter
100 #endif
101 return as_url (tree (s));
102 }
103
104 static void
105 skip_ipv6 (string s, int& i) {
106 i++;
107 while (i<N(s) && (s[i] == ':' ||
108 (s[i] >= '0' && s[i] <= '9') ||
109 (s[i] >= 'a' && s[i] <= 'f') ||
110 (s[i] >= 'A' && s[i] <= 'F'))) i++;
111 if (i<N(s) && s[i] == ']') i++;
112 }
113
114 static url
115 url_get_name (string s, int type= URL_STANDARD, int i=0) {
116 char sep= (type == URL_SYSTEM)? URL_CONCATER: '/';
117 int start= i, n= N(s);
118 while ((i<n) && (s[i] != sep) && (s[i] != '/')) {
119 if (s[i] == '[') skip_ipv6 (s, i); else i++; }
120 url u= url_get_atom (s (start, i), type);
121 // url u= tree (s (start, i));
122 if (i == n) return u;
123 if (start == i) return url_get_name (s, type, i+1);
124 return u * url_get_name (s, type, i+1);
125 }
126
127 static url
128 url_get_path (string s, int type= URL_STANDARD, int i=0) {
129 char sep= (type == URL_SYSTEM)? URL_SEPARATOR: ':';
130 int start= i, n= N(s);
131 if (i == n) return url_none ();
132 while ((i<n) && (s[i] != sep)) {
133 if (s[i] == '[') skip_ipv6 (s, i); else i++; }
134 url u= url_general (s (start, i), type);
135 if (i == n) return u;
136 if (start == i) return url_get_path (s, type, i+1);
137 return u | url_get_path (s, type, i+1);
138 }
139
140 /******************************************************************************
141 * Rooted url constructors
142 ******************************************************************************/
143
144 url
145 url_root (string protocol) {
146 return as_url (tuple ("root", protocol));
147 }
148
149 url
150 url_ramdisc (string contents) {
151 return as_url (tuple ("root", "ramdisc", contents));
152 }
153
154 static url
155 url_default (string name, int type= URL_SYSTEM) {
156 url u= url_get_name (name, type);
157 #ifdef WINPATHS
158 // FIXME: this hack seems a bit too simple
159 if (is_concat (u) && (u[2]->t == "")) u= u[1];
160 // cout << name << " -> " << url_root ("default") * u << "\n";
161 return url_root ("default") * u;
162 #else
163 if (u->t == "") return url_root ("default");
164 return url_root ("default") * u;
165 #endif
166 }
167
168 static url
169 url_mingw_default (string name, int type) {
170 string s= name (0, 2) * ":" * name (2, N(name));
171 return url_root ("default") * url_get_name (s, type);
172 }
173
174 static url
175 url_path (string s, int type= URL_SYSTEM) {
176 url u= url_get_path (s, type);
177 return u;
178 }
179
180 static url
181 url_local (string name) {
182 url u= url_get_name (name, URL_SYSTEM);
183 return reroot (u, "file");
184 }
185
186 static url
187 url_file (string name) {
188 url u= url_get_name (name);
189 return url_root ("file") * u;
190 }
191
192 static url
193 url_http (string name) {
194 url u= url_get_name (name);
195 return url_root ("http") * u;
196 }
197
198 static url
199 url_https (string name) {
200 url u= url_get_name (name);
201 return url_root ("https") * u;
202 }
203
204 static url
205 url_ftp (string name) {
206 url u= url_get_name (name);
207 return url_root ("ftp") * u;
208 }
209
210 static url
211 url_tmfs (string name) {
212 url u= url_get_name (name);
213 return url_root ("tmfs") * u;
214 }
215
216 static url
217 url_blank (string name) {
218 url u= url_get_name (name);
219 return url_root ("blank") * u;
220 }
221
222 /******************************************************************************
223 * Generic url constructor
224 ******************************************************************************/
225
226 static bool
227 heuristic_is_path (string name, int type) {
228 char sep= (type==0)? URL_SEPARATOR: ':';
229 int i= 0, n= N(name);
230 while (i<n)
231 if (name[i] == '[') skip_ipv6 (name, i);
232 else if (name[i] == sep) return true;
233 else i++;
234 return false;
235 }
236
237 static bool
238 heuristic_is_default (string name, int type) {
239 #ifdef WINPATHS
240 // FIXME: we probably should take into account 'type' too
241 if (N(name) < 2) return false;
242 if ((name[0] == '\\') && (name[1] == '\\')) return true;
243 return
244 isalpha (name[0]) && (name[1] == ':') &&
245 ((N(name)==2) || (name[2] == '\\') || (name[2] == '/'));
246 #else
247 char sep= (type==0)? URL_CONCATER: '/';
248 return (name != "") && (name[0] == sep);
249 #endif
250 }
251
252 static bool
253 heuristic_is_http (string name) {
254 return starts (name, "www.");
255 // FIXME: we might want to recognize some other ones like google.com too
256 }
257
258 static bool
259 heuristic_is_ftp (string name) {
260 return starts (name, "ftp.");
261 }
262
263 static bool
264 heuristic_is_mingw_default (string name, int type) {
265 #ifdef WINPATHS
266 return type != URL_SYSTEM && N(name) >= 2 &&
267 name[0] == '/' && is_alpha (name[1]) &&
268 (N(name) == 2 || name[2] == '/');
269 #else
270 (void) name; (void) type; return false;
271 #endif
272 }
273
274 url
275 url_general (string name, int type= URL_SYSTEM) {
276 if (starts (name, "local:")) return url_local (name (6, N (name)));
277 if (starts (name, "file://")) return url_file (name (7, N (name)));
278 if (starts (name, "http://")) return url_http (name (7, N (name)));
279 if (starts (name, "https://")) return url_https (name (8, N (name)));
280 if (starts (name, "ftp://")) return url_ftp (name (6, N (name)));
281 if (starts (name, "tmfs://")) return url_tmfs (name (7, N (name)));
282 if (starts (name, "//")) return url_blank (name (2, N (name)));
283 if (heuristic_is_path (name, type)) return url_path (name, type);
284 if (heuristic_is_default (name, type)) return url_default (name, type);
285 if (heuristic_is_mingw_default (name, type)) return url_mingw_default (name, type);
286 if (type != URL_CLEAN_UNIX) {
287 if (heuristic_is_http (name)) return url_http (name);
288 if (heuristic_is_ftp (name)) return url_ftp (name);
289 }
290 return url_get_name (name, type);
291 }
292
293 url
294 url_unix (string name) {
295 return url_general (name, URL_UNIX);
296 }
297
298 url
299 url_unix (string dir, string name) {
300 return url_unix (dir) * url_unix (name);
301 }
302
303 url
304 url_system (string name) {
305 return url_general (name, URL_SYSTEM);
306 }
307
308 url
309 url_system (string dir, string name) {
310 return url_system (dir) * url_system (name);
311 }
312
313 url
314 url_standard (string name) {
315 return url_general (name, URL_STANDARD);
316 }
317
318 url
319 url_standard (string dir, string name) {
320 return url_standard (dir) * url_standard (name);
321 }
322
323 url::url (): rep (tm_new<url_rep> (tuple ("none"))) {}
324 url::url (const char* name): rep (tm_new<url_rep> (url_unix (name)->t)) {}
325 url::url (string name): rep (tm_new<url_rep> (url_unix (name)->t)) {}
326 url::url (string path_name, string name):
327 rep (tm_new<url_rep> (url_unix (path_name, name)->t)) {}
328
329 /******************************************************************************
330 * Computational url constructors
331 ******************************************************************************/
332
333 static bool
334 is_special_root (url u) {
335 #ifdef WINPATHS
336 return is_root (u);
337 #else
338 return is_root_web (u);
339 #endif
340 }
341
342 static bool
343 is_semi_root (url u) {
344 // url u such that u/.. == u (website or windows drive name)
345 return is_concat (u) && is_special_root (u[1]) && is_atomic (u[2]);
346 }
347
348 url
349 operator * (url u1, url u2) {
350 //cout << "concat " << u1->t << " * " << u2->t << "\n";
351 if (is_root (u2) || (is_concat (u2) && is_root (u2[1]))) {
352 if (is_concat (u1) && is_root_web (u1[1])) {
353 if (is_root (u2, "default") ||
354 (is_concat (u2) && is_root (u2[1], "default")))
355 {
356 url v= u1[2];
357 while (is_concat (v)) v= v[1];
358 if (is_root (u2)) return u1[1] * v;
359 return u1[1] * v * u2[2];
360 }
361 if (is_root (u2, "blank") ||
362 (is_concat (u2) && is_root (u2[1], "blank")))
363 return reroot (u2, u1[1][1]->t->label);
364 }
365 return u2;
366 }
367 if (is_here (u1) || (u1->t == "")) return u2;
368 if (is_here (u2)) return u1;
369 if (is_none (u1)) return url_none ();
370 if (is_none (u2)) return url_none ();
371 if (u2 == url_parent ()) {
372 if (is_root (u1)) return u1;
373 if (is_pseudo_atomic (u1) && (!is_parent (u1))) return url_here ();
374 if (is_semi_root (u1)) return u1;
375 }
376 if (is_concat (u2) && (u2[1] == url_parent ())) {
377 if (is_root (u1)) return u1 * u2[2];
378 if (is_pseudo_atomic (u1) && (!is_parent (u1))) return u2[2];
379 if (is_semi_root (u1)) return u1 * u2[2];
380 }
381 if (is_concat (u1)) return u1[1] * (u1[2] * u2);
382 return as_url (tuple ("concat", u1->t, u2->t));
383 }
384
385 url
386 operator * (url u1, const char* name) {
387 return u1 * url (name);
388 }
389
390 url
391 operator * (url u1, string name) {
392 return u1 * url (name);
393 }
394
395 url
396 operator | (url u1, url u2) {
397 if (is_none (u1)) return u2;
398 if (is_none (u2)) return u1;
399 if (is_or (u1)) return u1[1] | (u1[2] | u2);
400 if (u1 == u2) return u2;
401 if (is_or (u2) && (u1 == u2[1])) return u2;
402 return as_url (tuple ("or", u1->t, u2->t));
403 }
404
405 url
406 url_wildcard () {
407 return as_url (tuple ("wildcard"));
408 }
409
410 url
411 url_wildcard (string name) {
412 return as_url (tuple ("wildcard", name));
413 }
414
415 /******************************************************************************
416 * url predicates
417 ******************************************************************************/
418
419 bool
420 is_rooted (url u) {
421 return
422 is_root (u) ||
423 (is_concat (u) && is_rooted (u[1])) ||
424 (is_or (u) && is_rooted (u[1]) && is_rooted (u[2]));
425 }
426
427 bool
428 is_rooted (url u, string protocol) {
429 return
430 is_root (u, protocol) ||
431 (is_concat (u) && is_rooted (u[1], protocol)) ||
432 (is_or (u) && is_rooted (u[1], protocol) && is_rooted (u[2], protocol));
433 }
434
435 bool
436 is_rooted_web (url u) {
437 return
438 is_root_web (u) ||
439 (is_concat (u) && is_rooted_web (u[1])) ||
440 (is_or (u) && is_rooted_web (u[1]) && is_rooted_web (u[2]));
441 }
442
443 bool
444 is_rooted_tmfs (url u) {
445 return
446 is_root_tmfs (u) ||
447 (is_concat (u) && is_rooted_tmfs (u[1])) ||
448 (is_or (u) && is_rooted_tmfs (u[1]) && is_rooted_tmfs (u[2]));
449 }
450
451 bool
452 is_tmfs_protocol (url u, string protocol) {
453 return
454 u->t == protocol ||
455 (is_concat (u) && is_tmfs_protocol (u[1], protocol));
456 }
457
458 bool
459 is_rooted_tmfs (url u, string protocol) {
460 return
461 (is_concat (u) && is_root_tmfs (u[1]) &&
462 is_tmfs_protocol (u[2], protocol)) ||
463 (is_or (u) && is_rooted_tmfs (u[1], protocol) &&
464 is_rooted_tmfs (u[2], protocol));
465 }
466
467 bool
468 is_rooted_blank (url u) {
469 return
470 is_root_blank (u) ||
471 (is_concat (u) && is_rooted_blank (u[1])) ||
472 (is_or (u) && is_rooted_blank (u[1]) && is_rooted_blank (u[2]));
473 }
474
475 bool
476 is_name (url u) {
477 if (is_atomic (u)) return true;
478 if (!is_concat (u)) return false;
479 return is_name (u[1]) && is_name (u[2]);
480 }
481
482 bool
483 is_rooted_name (url u) {
484 return is_concat (u) && is_root (u[1]) && is_name (u[2]);
485 }
486
487 bool
488 is_name_in_path (url u) {
489 if (is_name (u)) return true;
490 return is_concat (u) && is_root (u[1], "default") && is_name (u[2]);
491 }
492
493 bool
494 is_ramdisc (url u) {
495 return is_concat (u) && is_root (u[1], "ramdisc");
496 }
497
498 /******************************************************************************
499 * Conversion routines for urls
500 ******************************************************************************/
501
502 string
503 as_string (url u, int type) {
504 // This routine pritty prints an url as a string.
505 // FIXME: the current algorithm is quadratic in time.
506 if (is_none (u)) return "{}";
507 if (is_atomic (u)) return u->t->label;
508 if (is_concat (u)) {
509 int stype= type;
510 if (is_root (u[1]) && (!is_root (u[1], "default"))) stype= URL_STANDARD;
511 string sep= (stype==URL_SYSTEM? string (URL_CONCATER): string ("/"));
512 string s1 = as_string (u[1], type);
513 string s2 = as_string (u[2], stype);
514 if (is_root (u[1], "default")) s1= "";
515 if ((!is_name (u[1])) && (!is_root (u[1]))) s1= "{" * s1 * "}";
516 if ((!is_concat (u[2])) && (!is_atomic (u[2])) && (!is_wildcard (u[2], 1)))
517 s2= "{" * s2 * "}";
518 #ifdef WINPATHS
519 if (((is_root (u[1], "default") && type == URL_SYSTEM) ||
520 is_root (u[1], "file"))) { // have to return the windows format
521 string root, remain;
522 if (is_concat (u[2])) {
523 root = as_string (u[2][1], type);
524 // root might be unit letter or hostname. It depends on the length
525 remain = as_string (u[2][2], type);
526 }
527 else {
528 root = s2;
529 remain = "";
530 }
531 if (is_root (u[1], "default")) {
532 if (N(root) == 1) return root * ":\\" * remain; // drive letter
533 else return "\\\\" * root * "\\" * remain;
534 }
535 else {
536 if (N(root) == 1) return s1 * "/" * root * ":/" * remain; // local file
537 else return s1 * root * "/" * remain; // remote
538 }
539 }
540 #endif
541 return s1 * sep * s2;
542 }
543 if (is_or (u)) {
544 string s1= as_string (u[1], type);
545 string s2= as_string (u[2], type);
546 if (!is_name_in_path (u[1])) s1= "{" * s1 * "}";
547 if ((!is_or (u[2])) && (!is_name_in_path (u[2]))) s2= "{" * s2 * "}";
548 #ifdef WINPATHS
549 if (type == URL_STANDARD) return s1 * ":" * s2;
550 else return s1 * string (URL_SEPARATOR) * s2;
551 #else
552 return s1 * string (URL_SEPARATOR) * s2;
553 #endif
554 }
555 #ifdef WINPATHS
556 if (is_root (u, "default")) {
557 int stype= type;
558 if (is_root (u[1]) && (!is_root (u[1], "default"))) stype= URL_STANDARD;
559 if (stype == URL_SYSTEM) return ""; else return "/";
560 }
561 #else
562 if (is_root (u, "default")) return "/";
563 #endif
564 if (is_root (u, "blank")) return "/";
565 if (is_root (u, "file")) return u[1]->t->label * "://";
566 if (is_root (u)) return u[1]->t->label * ":/";
567 if (is_wildcard (u, 0)) return "**";
568 if (is_wildcard (u, 1)) return u->t[1]->label;
569 FAILED ("bad url");
570 return "";
571 }
572
573 tm_ostream&
574 operator << (tm_ostream& out, url u) {
575 return out << as_string (u, URL_SYSTEM);
576 }
577
578 /******************************************************************************
579 * Operations on urls
580 ******************************************************************************/
581
582 url
583 head (url u) {
584 return u * url_parent ();
585 }
586
587 url
588 tail (url u) {
589 if (is_concat (u)) {
590 if (is_root_web (u[1]) && is_atomic (u[2])) return url_here ();
591 return tail (u[2]);
592 }
593 if (is_or (u)) return tail (u[1]) | tail (u[2]);
594 if (is_root (u)) return url_here ();
595 return u;
596 }
597
598 string
599 suffix (url u) {
600 u= tail (u);
601 if (!is_atomic (u)) return "";
602 string s= as_string (u);
603 int i, n= N(s);
604 for (i=n-1; i>=0; i--)
605 if (s[i]=='.') break;
606 if ((i>0) && (i<n-1)) {
607 string r= s (i+1, n);
608 while ((N(r)>0) && (r[N(r)-1]=='~' || r[N(r)-1]=='#')) r= r(0, N(r)-1);
609 return locase_all(r);
610 }
611 return "";
612 }
613
614 string
615 basename (url u, string suf) {
616 string s= as_string (tail (u));
617 if (suf != "" && N(s) > N(suf) && suf == s(N(s)-N(suf),N(s)))
618 return s(0, N(s)-N(suf));
619 return s;
620 }
621
622 string
623 basename (url u) {
624 string s= suffix (u);
625 if (N(s) != 0) s= "." * s;
626 return basename (u, s);
627 }
628
629 url
630 glue (url u, string s) {
631 if (is_atomic (u)) return as_url (tree (u->t->label * s));
632 if (is_concat (u)) return u[1] * glue (u[2], s);
633 if (is_or (u)) return glue (u[1], s) | glue (u[2], s);
634 if (is_none (u)) return u;
635 failed_error << "u= " << u << "\n";
636 failed_error << "s= " << s << "\n";
637 FAILED ("can't glue string to url");
638 return u;
639 }
640
641 url
642 unglue (url u, int nr) {
643 if (is_atomic (u))
644 return as_url (tree (u->t->label (0, max (N(u->t->label) - nr, 0))));
645 if (is_concat (u)) return u[1] * unglue (u[2], nr);
646 if (is_or (u)) return unglue (u[1], nr) | unglue (u[2], nr);
647 if (is_none (u)) return u;
648 failed_error << "u = " << u << "\n";
649 failed_error << "nr= " << nr << "\n";
650 FAILED ("can't unglue from url");
651 return u;
652 }
653
654 url
655 unblank (url u) {
656 if (is_concat (u) && (u[2]->t == "")) return u[1];
657 if (is_concat (u)) return u[1] * unblank (u[2]);
658 if (is_or (u)) return unblank (u[1]) | unblank (u[2]);
659 return u;
660 }
661
662 url
663 relative (url base, url u) {
664 return head (base) * u;
665 }
666
667 url
668 delta_sub (url base, url u) {
669 if (is_atomic (base))
670 return u;
671 if (is_concat (base) && is_concat (u) && (base[1] == u[1])) {
672 if (is_special_root (base[1]) &&
673 is_concat (base[2]) && is_concat (u[2]) &&
674 base[2][1] != u[2][1])
675 return url_none ();
676 return delta_sub (base[2], u[2]);
677 }
678 if (is_concat (base) && !is_semi_root (base))
679 return url_parent () * delta_sub (head (base), u);
680 return url_none ();
681 }
682
683 url
684 delta (url base, url u) {
685 if (is_or (u))
686 return delta (base, u[1]) | delta (base, u[2]);
687 url res= delta_sub (base, u);
688 if (is_none (res)) return u;
689 return res;
690 }
691
692 static url
693 expand (url u1, url u2) {
694 if (is_or (u1)) return expand (u1[1], u2) | expand (u1[2], u2);
695 if (is_or (u2)) return expand (u1, u2[1]) | expand (u1, u2[2]);
696 if (is_ancestor (u2)) {
697 if (is_concat (u1)) return u1 | expand (u1[1], u2);
698 if (is_special_root (u1)) return u2;
699 return u1 | u2;
700 }
701 if (is_concat (u2) && is_ancestor (u2[1]))
702 return expand (expand (u1, u2[1]), u2[2]);
703 return u1 * u2;
704 }
705
706 url
707 expand (url u) {
708 if (is_or (u)) return expand (u[1]) | expand (u[2]);
709 if (is_concat (u)) return expand (expand (u[1]), expand (u[2]));
710 return u;
711 }
712
713 bool
714 descends (url u, url base) {
715 if (is_or (base))
716 return descends (u, base[1]) || descends (u, base[2]);
717 if (is_or (u))
718 return descends (u[1], base) && descends (u[2], base);
719 if (u == base)
720 return true;
721 if (is_concat (u) && is_atomic (base))
722 return u[1] == base;
723 if (is_concat (u) && is_concat (base))
724 return u[1] == base[1] && descends (u[2], base[2]);
725 return false;
726 }
727
728 bool
729 is_secure (url u) {
730 return descends (u, expand (url_path ("$TEXMACS_SECURE_PATH")));
731 }
732
733 /******************************************************************************
734 * Url sorting and factorization
735 ******************************************************************************/
736
737 static bool
738 operator <= (url u1, url u2) {
739 if (is_atomic (u1) && is_atomic (u2))
740 return u1->t->label <= u2->t->label;
741 if (is_atomic (u1)) return true;
742 if (is_atomic (u2)) return false;
743 if (is_concat (u1) && is_concat (u2)) {
744 if (u1[1] == u2[1]) return u1[2] <= u2[2];
745 else return u1[1] <= u2[1];
746 }
747 if (is_concat (u1)) return true;
748 if (is_concat (u2)) return false;
749 return true; // does not matter for sorting
750 }
751
752 static url
753 sort_sub (url add, url to) {
754 if (is_or (to)) {
755 if (add <= to[1]) return add | to;
756 return to[1] | sort_sub (add, to[2]);
757 }
758 if (add <= to) return add | to;
759 else return to | add;
760 }
761
762 url
763 sort (url u) {
764 if (is_or (u))
765 return sort_sub (u[1], sort (u[2]));
766 else return u;
767 }
768
769 static url
770 factor_sorted (url u) {
771 if (!is_or (u)) return u;
772 url v= factor_sorted (u[2]);
773 if (is_concat (u[1])) {
774 if (is_concat (v) && (u[1][1] == v[1]))
775 return u[1][1] * (u[1][2] | v[2]);
776 if (is_or (v) && is_concat (v[1]) && (u[1][1] == v[1][1]))
777 return (u[1][1] * (u[1][2] | v[1][2])) | v[2];
778 }
779 return u[1] | v;
780 }
781
782 static url
783 factor_sub (url u) {
784 if (is_concat (u)) return u[1] * factor (u[2]);
785 if (is_or (u)) return factor_sub (u[1]) | factor_sub (u[2]);
786 return u;
787 }
788
789 url
790 factor (url u) {
791 return factor_sub (factor_sorted (sort (u)));
792 }
793
794 /******************************************************************************
795 * Url resolution and wildcard expansion
796 ******************************************************************************/
797
798 url complete (url base, url u, string filter, bool flag);
799
800 string
801 get_root (url u) {
802 if (is_concat (u)) return get_root (u[1]);
803 if (is_or (u)) {
804 string s1= get_root (u[1]);
805 string s2= get_root (u[2]);
806 if (s1 == s2) return s1; else return "";
807 }
808 if (is_root (u)) return u[1]->t->label;
809 return "";
810 }
811
812 url
813 unroot (url u) {
814 if (is_concat (u)) return unroot (u[1]) * u[2];
815 if (is_or (u)) return unroot (u[1]) | unroot (u[2]);
816 if (is_root (u)) return url_here ();
817 return u;
818 }
819
820 url
821 reroot (url u, string protocol) {
822 if (is_concat (u)) return reroot (u[1], protocol) * u[2];
823 if (is_or (u)) return reroot (u[1], protocol) | reroot (u[2], protocol);
824 if (is_root (u)) return url_root (protocol);
825 return u;
826 }
827
828 static url
829 complete (url base, url sub, url u, string filter, bool flag) {
830 if (is_or (sub)) {
831 url res1= complete (base, sub[1], u, filter, flag);
832 if ((!is_none (res1)) && flag) return res1;
833 return res1 | complete (base, sub[2], u, filter, flag);
834 }
835 if (is_concat (sub) && is_rooted (sub[1])) {
836 url res= complete (sub[1], sub[2], u, filter, flag);
837 return sub[1] * res;
838 }
839 return sub * complete (base * sub, u, filter, flag);
840 }
841
842 url
843 complete (url base, url u, string filter, bool flag) {
844 // cout << "complete " << base << " |||| " << u << LF;
845 if (!is_rooted(u)) {
846 if (is_none (base)) return base;
847 if (is_none (u)) return u;
848 if ((!is_root (base)) && (!is_rooted_name (base))) {
849 failed_error << "base = " << base << LF;
850 failed_error << "u = " << u << LF;
851 failed_error << "filter= " << filter << LF;
852 FAILED ("invalid base url");
853 }
854 }
855 if (is_name (u) || (is_concat (u) && is_root (u[1]) && is_name (u[2]))) {
856 url comp= base * u;
857 if (is_rooted (comp, "default") || is_rooted (comp, "file")) {
858 if (is_of_type (comp, filter)) return reroot (u, "default");
859 return url_none ();
860 }
861 if (is_rooted_web (comp) || is_rooted_tmfs (comp) || is_ramdisc (comp)) {
862 if (is_of_type (comp, filter)) return u;
863 return url_none ();
864 }
865 failed_error << "base = " << base << LF;
866 failed_error << "u = " << u << LF;
867 failed_error << "filter= " << filter << LF;
868 ASSERT (is_rooted (comp), "unrooted url");
869 FAILED ("bad protocol in url");
870 }
871 if (is_root (u)) {
872 // FIXME: test filter flags here
873 return u;
874 }
875 if (is_concat (u) && is_wildcard (u[1], 0) && is_wildcard (u[2], 1)) {
876 // FIXME: ret= ret | ... is unefficient (quadratic) in main loop
877 if (!(is_rooted (base, "default") || is_rooted (base, "file"))) {
878 failed_error << "base = " << base << LF;
879 failed_error << "u = " << u << LF;
880 failed_error << "filter= " << filter << LF;
881 FAILED ("wildcards only implemented for files");
882 }
883 url ret= url_none ();
884 bool error_flag;
885 array<string> dir= read_directory (base, error_flag);
886 int i, n= N(dir);
887 for (i=0; i<n; i++) {
888 if ((!is_none (ret)) && flag) return ret;
889 if ((dir[i] == ".") || (dir[i] == "..")) continue;
890 if (starts (dir[i], "http://") ||
891 starts (dir[i], "https://") ||
892 starts (dir[i], "ftp://"))
893 if (is_directory (base * dir[i])) continue;
894 ret= ret | (dir[i] * complete (base * dir[i], u, filter, flag));
895 if (match_wildcard (dir[i], u[2][1]->t->label))
896 ret= ret | complete (base, dir[i], filter, flag);
897 }
898 return ret;
899 }
900 if (is_concat (u)) {
901 url sub= complete (base, u[1], "", false);
902 // "" should often be faster than the more correct "d" here
903 return complete (base, sub, u[2], filter, flag);
904 }
905 if (is_or (u)) {
906 url res1= complete (base, u[1], filter, flag);
907 if ((!is_none (res1)) && flag) return res1;
908 return res1 | complete (base, u[2], filter, flag);
909 }
910 if (is_wildcard (u)) {
911 // FIXME: ret= ret | ... is unefficient (quadratic) in main loop
912 if (!(is_rooted (base, "default") || is_rooted (base, "file"))) {
913 failed_error << "base = " << base << LF;
914 failed_error << "u = " << u << LF;
915 failed_error << "filter= " << filter << LF;
916 FAILED ("wildcards only implemented for files");
917 }
918 url ret= url_none ();
919 if (is_wildcard (u, 0) && is_of_type (base, filter)) ret= url_here ();
920 bool error_flag;
921 array<string> dir= read_directory (base, error_flag);
922 int i, n= N(dir);
923 for (i=0; i<n; i++) {
924 if ((!is_none (ret)) && flag) return ret;
925 if ((dir[i] == ".") || (dir[i] == "..")) continue;
926 if (starts (dir[i], "http://") ||
927 starts (dir[i], "https://") ||
928 starts (dir[i], "ftp://"))
929 if (is_directory (base * dir[i])) continue;
930 if (is_wildcard (u, 0))
931 ret= ret | (dir[i] * complete (base * dir[i], u, filter, flag));
932 else if (match_wildcard (dir[i], u[1]->t->label))
933 ret= ret | complete (base, dir[i], filter, flag);
934 }
935 return ret;
936 }
937 failed_error << "url= " << u << LF;
938 FAILED ("bad url");
939 return u;
940 }
941
942 url
943 complete (url u, string filter, bool flag) {
944 url home= url_pwd ();
945 return home * complete (home, u, filter, flag);
946 }
947
948 url
949 complete (url u, string filter) {
950 // This routine can be used in order to find all possible matches
951 // for the wildcards in an url and replace the wildcards by these matches.
952 // Moreover, matches are normalized (file root -> default root).
953 url r = complete (u, filter, false);
954 // cout << "complete:" << u << " filter:" << filter << " result:" << r << LF;
955 return r;
956 }
957
958 url
959 resolve (url u, string filter) {
960 // This routine does the same thing as complete, but it stops at
961 // the first match. It is particularly useful for finding files in paths.
962 return complete (u, filter, true);
963 /*
964 url res= complete (u, filter, true);
965 if (is_none (res))
966 cout << "Failed resolution of " << u << ", " << filter << LF;
967 return res;
968 */
969 }
970
971 url
972 resolve_in_path (url u) {
973 if (use_which) {
974 string name = escape_sh (as_string (u));
975 string which= var_eval_system ("which " * name * " 2> /dev/null");
976 if (ends (which, name))
977 return which;
978 else if ((which != "") &&
979 (!occurs ("bin/which: ", which)) &&
980 (!starts (which, "which: ")) &&
981 (!starts (which, "no ")))
982 cout << "TeXmacs] " << which << "\n";
983 }
984 #ifdef OS_MINGW
985 return resolve ((url_path ("$TEXMACS_PATH/bin") | url_path ("$PATH")) * u, "x");
986 #else
987 return resolve (url_path ("$PATH") * u, "x");
988 #endif
989 }
990
991 bool
992 exists (url u) {
993 return !is_none (resolve (u, "r"));
994 }
995
996 bool
997 exists_in_path (url u) {
998 #ifdef OS_MINGW
999 return !is_none (resolve_in_path (url (as_string (u) * ".bat"))) ||\
1000 !is_none (resolve_in_path (url (as_string (u) * ".exe"))) ||\
1001 !is_none (resolve_in_path (url (as_string (u) * ".com")));
1002 #else
1003 return !is_none (resolve_in_path (u));
1004 #endif
1005 }
1006
1007 bool
1008 has_permission (url u, string filter) {
1009 return !is_none (resolve (u, filter));
1010 }
1011
1012 static url
1013 descendance_sub (url u) {
1014 if (is_or (u))
1015 return descendance_sub (u[1]) | descendance_sub (u[2]);
1016 return complete (u, url_wildcard (), "r", false);
1017 }
1018
1019 url
1020 descendance (url u) {
1021 // Utility for style and package menus in tm_server.cpp
1022 // Compute and merge subdirectories of directories in path
1023 return factor (descendance_sub (u));
1024 }
1025
1026 url
1027 subdirectories (url u) {
1028 if (is_or (u))
1029 return subdirectories (u[1]) | subdirectories (u[2]);
1030 else if (is_directory (u)) {
1031 url ret= u;
1032 bool error_flag;
1033 array<string> dir= read_directory (u, error_flag);
1034 for (int i=0; i<N(dir); i++)
1035 if (!starts (dir[i], ".") && is_directory (u * dir[i]))
1036 ret= ret | subdirectories (u * dir[i]);
1037 return ret;
1038 }
1039 else return url_none ();
1040 }
1041
1042 /******************************************************************************
1043 * Concretization of resolved urls
1044 ******************************************************************************/
1045
1046 url
1047 concretize_url (url u) {
1048 // This routine transforms a resolved url into a system url.
1049 // In the case of distant files from the web, a local copy is created.
1050 if (is_rooted (u, "default") ||
1051 is_rooted (u, "file") ||
1052 is_rooted (u, "blank"))
1053 return reroot (u, "default");
1054 if (is_rooted_web (u)) return concretize_url (get_from_web (u));
1055 if (is_rooted_tmfs (u)) return concretize_url (get_from_server (u));
1056 if (is_ramdisc (u)) return concretize_url (get_from_ramdisc (u));
1057 if (is_here (u)) return url_pwd ();
1058 if (is_parent (u)) return url_pwd () * url_parent ();
1059 return url_none ();
1060 }
1061
1062 string
1063 concretize (url u) {
1064 // This routine transforms a resolved url into a system file name.
1065 // In the case of distant files from the web, a local copy is created.
1066 url c= concretize_url (u);
1067 if (!is_none (c)) return as_string (c);
1068 if (is_wildcard (u, 1)) return u->t[1]->label;
1069 std_warning << "Couldn't concretize " << u->t << LF;
1070 // failed_error << "u= " << u << LF;
1071 // FAILED ("url has no root");
1072 return "xxx";
1073 }
1074
1075 string
1076 materialize (url u, string filter) {
1077 // Combines resolve and concretize
1078 url r= resolve (u, filter);
1079 if (!(is_rooted (r) || is_here (r) || is_parent (r))) {
1080 failed_error << "u= " << u << LF;
1081 FAILED ("url could not be resolved");
1082 }
1083 return concretize (r);
1084 }