"Fossies" - the Fresh Open Source Software Archive 
Member "TeXmacs-2.1.2-src/src/System/Classes/url.cpp" (5 May 2022, 32462 Bytes) of package /linux/misc/TeXmacs-2.1.2-src.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
1
2 /******************************************************************************
3 * MODULE : url.cpp
4 * DESCRIPTION: unified resource location handling
5 * COPYRIGHT : (C) 1999 Joris van der Hoeven
6 *******************************************************************************
7 * The url class uses a tree representation for urls.
8 * This allows us to generalize the concept of an url and allow paths and
9 * patterns to be regarded as urls too. An url is either a string or a tuple
10 * of one of the following types:
11 * "." -- here
12 * ".." -- parent
13 * none -- invalid url
14 * concat -- a/b/c is represented as (concat "a" (concat "b" "c"));
15 * or -- the path a:b/c is represented as (or "a" (concat "b" "c"));
16 * root -- the url http://gnu.org yields (concat (root "http") "gnu.org");
17 * wildcard -- (wildcard) corresponds to any url, (wildcard "*.tm")
18 * to all strings which end with .tm and (wildcard "*.tm" "file")
19 * to all TeXmacs files (i.e. discarding directories ending with .tm).
20 *******************************************************************************
21 * There are three main types of urls:
22 * - rootless urls, like a/b/c. These urls are mainly used in computations.
23 * For example, they can be appended to another url.
24 * - Standard rooted urls, like file:///usr or https://www.texmacs.org.
25 * These are the same as those used on the web.
26 * - System urls, characterized by a "default" root.
27 * These urls are similar to standard rooted urls, but they behave
28 * in a slightly different way with respect to concatenation.
29 * For instance https://www.texmacs.org/Web * file:///tmp would yield
30 * file:///tmp, where as https://www.texmacs.org/Web * /tmp yields
31 * https://www.texmacs.org/tmp
32 *******************************************************************************
33 * There are several formats for parsing (and printing) urls:
34 * - System format: the usual format on your operating system.
35 * On unix systems "/usr/bin:/usr/local/bin" would be a valid url
36 * representing a path and on windows systems "c:\windows;c:\TeXmacs"
37 * would be OK.
38 * - Unix format: this format forces unix-like notation even for
39 * other systems like Windows. This is convenient for url's in
40 * the source code. Unix environment variables like ~ and $TEXMACS_PATH
41 * can also be part of the url.
42 * - Standard format: the format which is used on the web.
43 * Notice that ftp://www.texmacs.org/pub and ftp://www.texmacs.org/pub/
44 * represent different urls. The second one is represented by concating
45 * on the right with an empty name.
46 *******************************************************************************
47 * When an explicit operation on urls need to be performed,
48 * like reading a file, the url is first "resolved" into a simple url
49 * with a unique name (modulo symbolic links) for the resource.
50 * Next, the url is "concretized" as a file name which is understood
51 * by the operating system. This may require searching the file from the web.
52 * Concretized urls should be used quickly and not memorized,
53 * since such names may be the names of temporary files,
54 * which may be destroyed soon afterwards.
55 *******************************************************************************
56 * This software falls under the GNU general public license version 3 or later.
57 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
58 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
59 ******************************************************************************/
60
61 #include "boot.hpp"
62 #include "url.hpp"
63 #include "sys_utils.hpp"
64 #include "web_files.hpp"
65 #include "file.hpp"
66 #include "analyze.hpp"
67
68 #include <ctype.h>
69
70 #ifdef OS_MINGW
71 #define WINPATHS
72 #endif
73
74 #ifdef WINPATHS
75 #define URL_CONCATER '\\'
76 #define URL_SEPARATOR ';'
77 #else
78 #define URL_CONCATER '/'
79 #define URL_SEPARATOR ':'
80 #endif
81
82 /******************************************************************************
83 * Unrooted url constructors
84 ******************************************************************************/
85
86 static url
87 url_get_atom (string s, int type) {
88 if (type < URL_STANDARD) {
89 if (s == "~") return url_system (get_env ("HOME"));
90 if (starts (s, "$")) {
91 string val= get_env (s (1, N(s)));
92 if (val == "") return url_none ();
93 return unblank (url_system (val));
94 }
95 }
96 if (occurs ("*", s)) return url_wildcard (s);
97 #ifdef WINPATHS
98 if (N(s)==2 && ends (s, ":"))
99 s->resize(1); // remove the ':' after unit letter
100 #endif
101 return as_url (tree (s));
102 }
103
104 static void
105 skip_ipv6 (string s, int& i) {
106 i++;
107 while (i<N(s) && (s[i] == ':' ||
108 (s[i] >= '0' && s[i] <= '9') ||
109 (s[i] >= 'a' && s[i] <= 'f') ||
110 (s[i] >= 'A' && s[i] <= 'F'))) i++;
111 if (i<N(s) && s[i] == ']') i++;
112 }
113
114 static url
115 url_get_name (string s, int type= URL_STANDARD, int i=0) {
116 char sep= (type == URL_SYSTEM)? URL_CONCATER: '/';
117 int start= i, n= N(s);
118 while ((i<n) && (s[i] != sep) && (s[i] != '/')) {
119 if (s[i] == '[') skip_ipv6 (s, i); else i++; }
120 url u= url_get_atom (s (start, i), type);
121 // url u= tree (s (start, i));
122 if (i == n) return u;
123 if (start == i) return url_get_name (s, type, i+1);
124 return u * url_get_name (s, type, i+1);
125 }
126
127 static url
128 url_get_path (string s, int type= URL_STANDARD, int i=0) {
129 char sep= (type == URL_SYSTEM)? URL_SEPARATOR: ':';
130 int start= i, n= N(s);
131 if (i == n) return url_none ();
132 while ((i<n) && (s[i] != sep)) {
133 if (s[i] == '[') skip_ipv6 (s, i); else i++; }
134 url u= url_general (s (start, i), type);
135 if (i == n) return u;
136 if (start == i) return url_get_path (s, type, i+1);
137 return u | url_get_path (s, type, i+1);
138 }
139
140 /******************************************************************************
141 * Rooted url constructors
142 ******************************************************************************/
143
144 url
145 url_root (string protocol) {
146 return as_url (tuple ("root", protocol));
147 }
148
149 url
150 url_ramdisc (string contents) {
151 return as_url (tuple ("root", "ramdisc", contents));
152 }
153
154 static url
155 url_default (string name, int type= URL_SYSTEM) {
156 url u= url_get_name (name, type);
157 #ifdef WINPATHS
158 // FIXME: this hack seems a bit too simple
159 if (is_concat (u) && (u[2]->t == "")) u= u[1];
160 // cout << name << " -> " << url_root ("default") * u << "\n";
161 return url_root ("default") * u;
162 #else
163 if (u->t == "") return url_root ("default");
164 return url_root ("default") * u;
165 #endif
166 }
167
168 static url
169 url_mingw_default (string name, int type) {
170 string s= name (0, 2) * ":" * name (2, N(name));
171 return url_root ("default") * url_get_name (s, type);
172 }
173
174 static url
175 url_path (string s, int type= URL_SYSTEM) {
176 url u= url_get_path (s, type);
177 return u;
178 }
179
180 static url
181 url_local (string name) {
182 url u= url_get_name (name, URL_SYSTEM);
183 return reroot (u, "file");
184 }
185
186 static url
187 url_file (string name) {
188 url u= url_get_name (name);
189 return url_root ("file") * u;
190 }
191
192 static url
193 url_http (string name) {
194 url u= url_get_name (name);
195 return url_root ("http") * u;
196 }
197
198 static url
199 url_https (string name) {
200 url u= url_get_name (name);
201 return url_root ("https") * u;
202 }
203
204 static url
205 url_ftp (string name) {
206 url u= url_get_name (name);
207 return url_root ("ftp") * u;
208 }
209
210 static url
211 url_tmfs (string name) {
212 url u= url_get_name (name);
213 return url_root ("tmfs") * u;
214 }
215
216 static url
217 url_blank (string name) {
218 url u= url_get_name (name);
219 return url_root ("blank") * u;
220 }
221
222 /******************************************************************************
223 * Generic url constructor
224 ******************************************************************************/
225
226 static bool
227 heuristic_is_path (string name, int type) {
228 char sep= (type==0)? URL_SEPARATOR: ':';
229 int i= 0, n= N(name);
230 while (i<n)
231 if (name[i] == '[') skip_ipv6 (name, i);
232 else if (name[i] == sep) return true;
233 else i++;
234 return false;
235 }
236
237 static bool
238 heuristic_is_default (string name, int type) {
239 #ifdef WINPATHS
240 // FIXME: we probably should take into account 'type' too
241 if (N(name) < 2) return false;
242 if ((name[0] == '\\') && (name[1] == '\\')) return true;
243 return
244 isalpha (name[0]) && (name[1] == ':') &&
245 ((N(name)==2) || (name[2] == '\\') || (name[2] == '/'));
246 #else
247 char sep= (type==0)? URL_CONCATER: '/';
248 return (name != "") && (name[0] == sep);
249 #endif
250 }
251
252 static bool
253 heuristic_is_http (string name) {
254 return starts (name, "www.");
255 // FIXME: we might want to recognize some other ones like google.com too
256 }
257
258 static bool
259 heuristic_is_ftp (string name) {
260 return starts (name, "ftp.");
261 }
262
263 static bool
264 heuristic_is_mingw_default (string name, int type) {
265 #ifdef WINPATHS
266 return type != URL_SYSTEM && N(name) >= 2 &&
267 name[0] == '/' && is_alpha (name[1]) &&
268 (N(name) == 2 || name[2] == '/');
269 #else
270 (void) name; (void) type; return false;
271 #endif
272 }
273
274 url
275 url_general (string name, int type= URL_SYSTEM) {
276 if (starts (name, "local:")) return url_local (name (6, N (name)));
277 if (starts (name, "file://")) return url_file (name (7, N (name)));
278 if (starts (name, "http://")) return url_http (name (7, N (name)));
279 if (starts (name, "https://")) return url_https (name (8, N (name)));
280 if (starts (name, "ftp://")) return url_ftp (name (6, N (name)));
281 if (starts (name, "tmfs://")) return url_tmfs (name (7, N (name)));
282 if (starts (name, "//")) return url_blank (name (2, N (name)));
283 if (heuristic_is_path (name, type)) return url_path (name, type);
284 if (heuristic_is_default (name, type)) return url_default (name, type);
285 if (heuristic_is_mingw_default (name, type)) return url_mingw_default (name, type);
286 if (type != URL_CLEAN_UNIX) {
287 if (heuristic_is_http (name)) return url_http (name);
288 if (heuristic_is_ftp (name)) return url_ftp (name);
289 }
290 return url_get_name (name, type);
291 }
292
293 url
294 url_unix (string name) {
295 return url_general (name, URL_UNIX);
296 }
297
298 url
299 url_unix (string dir, string name) {
300 return url_unix (dir) * url_unix (name);
301 }
302
303 url
304 url_system (string name) {
305 return url_general (name, URL_SYSTEM);
306 }
307
308 url
309 url_system (string dir, string name) {
310 return url_system (dir) * url_system (name);
311 }
312
313 url
314 url_standard (string name) {
315 return url_general (name, URL_STANDARD);
316 }
317
318 url
319 url_standard (string dir, string name) {
320 return url_standard (dir) * url_standard (name);
321 }
322
323 url::url (): rep (tm_new<url_rep> (tuple ("none"))) {}
324 url::url (const char* name): rep (tm_new<url_rep> (url_unix (name)->t)) {}
325 url::url (string name): rep (tm_new<url_rep> (url_unix (name)->t)) {}
326 url::url (string path_name, string name):
327 rep (tm_new<url_rep> (url_unix (path_name, name)->t)) {}
328
329 /******************************************************************************
330 * Computational url constructors
331 ******************************************************************************/
332
333 static bool
334 is_special_root (url u) {
335 #ifdef WINPATHS
336 return is_root (u);
337 #else
338 return is_root_web (u);
339 #endif
340 }
341
342 static bool
343 is_semi_root (url u) {
344 // url u such that u/.. == u (website or windows drive name)
345 return is_concat (u) && is_special_root (u[1]) && is_atomic (u[2]);
346 }
347
348 url
349 operator * (url u1, url u2) {
350 //cout << "concat " << u1->t << " * " << u2->t << "\n";
351 if (is_root (u2) || (is_concat (u2) && is_root (u2[1]))) {
352 if (is_concat (u1) && is_root_web (u1[1])) {
353 if (is_root (u2, "default") ||
354 (is_concat (u2) && is_root (u2[1], "default")))
355 {
356 url v= u1[2];
357 while (is_concat (v)) v= v[1];
358 if (is_root (u2)) return u1[1] * v;
359 return u1[1] * v * u2[2];
360 }
361 if (is_root (u2, "blank") ||
362 (is_concat (u2) && is_root (u2[1], "blank")))
363 return reroot (u2, u1[1][1]->t->label);
364 }
365 return u2;
366 }
367 if (is_here (u1) || (u1->t == "")) return u2;
368 if (is_here (u2)) return u1;
369 if (is_none (u1)) return url_none ();
370 if (is_none (u2)) return url_none ();
371 if (u2 == url_parent ()) {
372 if (is_root (u1)) return u1;
373 if (is_pseudo_atomic (u1) && (!is_parent (u1))) return url_here ();
374 if (is_semi_root (u1)) return u1;
375 }
376 if (is_concat (u2) && (u2[1] == url_parent ())) {
377 if (is_root (u1)) return u1 * u2[2];
378 if (is_pseudo_atomic (u1) && (!is_parent (u1))) return u2[2];
379 if (is_semi_root (u1)) return u1 * u2[2];
380 }
381 if (is_concat (u1)) return u1[1] * (u1[2] * u2);
382 return as_url (tuple ("concat", u1->t, u2->t));
383 }
384
385 url
386 operator * (url u1, const char* name) {
387 return u1 * url (name);
388 }
389
390 url
391 operator * (url u1, string name) {
392 return u1 * url (name);
393 }
394
395 url
396 operator | (url u1, url u2) {
397 if (is_none (u1)) return u2;
398 if (is_none (u2)) return u1;
399 if (is_or (u1)) return u1[1] | (u1[2] | u2);
400 if (u1 == u2) return u2;
401 if (is_or (u2) && (u1 == u2[1])) return u2;
402 return as_url (tuple ("or", u1->t, u2->t));
403 }
404
405 url
406 url_wildcard () {
407 return as_url (tuple ("wildcard"));
408 }
409
410 url
411 url_wildcard (string name) {
412 return as_url (tuple ("wildcard", name));
413 }
414
415 /******************************************************************************
416 * url predicates
417 ******************************************************************************/
418
419 bool
420 is_rooted (url u) {
421 return
422 is_root (u) ||
423 (is_concat (u) && is_rooted (u[1])) ||
424 (is_or (u) && is_rooted (u[1]) && is_rooted (u[2]));
425 }
426
427 bool
428 is_rooted (url u, string protocol) {
429 return
430 is_root (u, protocol) ||
431 (is_concat (u) && is_rooted (u[1], protocol)) ||
432 (is_or (u) && is_rooted (u[1], protocol) && is_rooted (u[2], protocol));
433 }
434
435 bool
436 is_rooted_web (url u) {
437 return
438 is_root_web (u) ||
439 (is_concat (u) && is_rooted_web (u[1])) ||
440 (is_or (u) && is_rooted_web (u[1]) && is_rooted_web (u[2]));
441 }
442
443 bool
444 is_rooted_tmfs (url u) {
445 return
446 is_root_tmfs (u) ||
447 (is_concat (u) && is_rooted_tmfs (u[1])) ||
448 (is_or (u) && is_rooted_tmfs (u[1]) && is_rooted_tmfs (u[2]));
449 }
450
451 bool
452 is_tmfs_protocol (url u, string protocol) {
453 return
454 u->t == protocol ||
455 (is_concat (u) && is_tmfs_protocol (u[1], protocol));
456 }
457
458 bool
459 is_rooted_tmfs (url u, string protocol) {
460 return
461 (is_concat (u) && is_root_tmfs (u[1]) &&
462 is_tmfs_protocol (u[2], protocol)) ||
463 (is_or (u) && is_rooted_tmfs (u[1], protocol) &&
464 is_rooted_tmfs (u[2], protocol));
465 }
466
467 bool
468 is_rooted_blank (url u) {
469 return
470 is_root_blank (u) ||
471 (is_concat (u) && is_rooted_blank (u[1])) ||
472 (is_or (u) && is_rooted_blank (u[1]) && is_rooted_blank (u[2]));
473 }
474
475 bool
476 is_name (url u) {
477 if (is_atomic (u)) return true;
478 if (!is_concat (u)) return false;
479 return is_name (u[1]) && is_name (u[2]);
480 }
481
482 bool
483 is_rooted_name (url u) {
484 return is_concat (u) && is_root (u[1]) && is_name (u[2]);
485 }
486
487 bool
488 is_name_in_path (url u) {
489 if (is_name (u)) return true;
490 return is_concat (u) && is_root (u[1], "default") && is_name (u[2]);
491 }
492
493 bool
494 is_ramdisc (url u) {
495 return is_concat (u) && is_root (u[1], "ramdisc");
496 }
497
498 /******************************************************************************
499 * Conversion routines for urls
500 ******************************************************************************/
501
502 string
503 as_string (url u, int type) {
504 // This routine pritty prints an url as a string.
505 // FIXME: the current algorithm is quadratic in time.
506 if (is_none (u)) return "{}";
507 if (is_atomic (u)) return u->t->label;
508 if (is_concat (u)) {
509 int stype= type;
510 if (is_root (u[1]) && (!is_root (u[1], "default"))) stype= URL_STANDARD;
511 string sep= (stype==URL_SYSTEM? string (URL_CONCATER): string ("/"));
512 string s1 = as_string (u[1], type);
513 string s2 = as_string (u[2], stype);
514 if (is_root (u[1], "default")) s1= "";
515 if ((!is_name (u[1])) && (!is_root (u[1]))) s1= "{" * s1 * "}";
516 if ((!is_concat (u[2])) && (!is_atomic (u[2])) && (!is_wildcard (u[2], 1)))
517 s2= "{" * s2 * "}";
518 #ifdef WINPATHS
519 if (((is_root (u[1], "default") && type == URL_SYSTEM) ||
520 is_root (u[1], "file"))) { // have to return the windows format
521 string root, remain;
522 if (is_concat (u[2])) {
523 root = as_string (u[2][1], type);
524 // root might be unit letter or hostname. It depends on the length
525 remain = as_string (u[2][2], type);
526 }
527 else {
528 root = s2;
529 remain = "";
530 }
531 if (is_root (u[1], "default")) {
532 if (N(root) == 1) return root * ":\\" * remain; // drive letter
533 else return "\\\\" * root * "\\" * remain;
534 }
535 else {
536 if (N(root) == 1) return s1 * "/" * root * ":/" * remain; // local file
537 else return s1 * root * "/" * remain; // remote
538 }
539 }
540 #endif
541 return s1 * sep * s2;
542 }
543 if (is_or (u)) {
544 string s1= as_string (u[1], type);
545 string s2= as_string (u[2], type);
546 if (!is_name_in_path (u[1])) s1= "{" * s1 * "}";
547 if ((!is_or (u[2])) && (!is_name_in_path (u[2]))) s2= "{" * s2 * "}";
548 #ifdef WINPATHS
549 if (type == URL_STANDARD) return s1 * ":" * s2;
550 else return s1 * string (URL_SEPARATOR) * s2;
551 #else
552 return s1 * string (URL_SEPARATOR) * s2;
553 #endif
554 }
555 #ifdef WINPATHS
556 if (is_root (u, "default")) {
557 int stype= type;
558 if (is_root (u[1]) && (!is_root (u[1], "default"))) stype= URL_STANDARD;
559 if (stype == URL_SYSTEM) return ""; else return "/";
560 }
561 #else
562 if (is_root (u, "default")) return "/";
563 #endif
564 if (is_root (u, "blank")) return "/";
565 if (is_root (u, "file")) return u[1]->t->label * "://";
566 if (is_root (u)) return u[1]->t->label * ":/";
567 if (is_wildcard (u, 0)) return "**";
568 if (is_wildcard (u, 1)) return u->t[1]->label;
569 FAILED ("bad url");
570 return "";
571 }
572
573 tm_ostream&
574 operator << (tm_ostream& out, url u) {
575 return out << as_string (u, URL_SYSTEM);
576 }
577
578 /******************************************************************************
579 * Operations on urls
580 ******************************************************************************/
581
582 url
583 head (url u) {
584 return u * url_parent ();
585 }
586
587 url
588 tail (url u) {
589 if (is_concat (u)) {
590 if (is_root_web (u[1]) && is_atomic (u[2])) return url_here ();
591 return tail (u[2]);
592 }
593 if (is_or (u)) return tail (u[1]) | tail (u[2]);
594 if (is_root (u)) return url_here ();
595 return u;
596 }
597
598 string
599 suffix (url u) {
600 u= tail (u);
601 if (!is_atomic (u)) return "";
602 string s= as_string (u);
603 int i, n= N(s);
604 for (i=n-1; i>=0; i--)
605 if (s[i]=='.') break;
606 if ((i>0) && (i<n-1)) {
607 string r= s (i+1, n);
608 while ((N(r)>0) && (r[N(r)-1]=='~' || r[N(r)-1]=='#')) r= r(0, N(r)-1);
609 return locase_all(r);
610 }
611 return "";
612 }
613
614 string
615 basename (url u, string suf) {
616 string s= as_string (tail (u));
617 if (suf != "" && N(s) > N(suf) && suf == s(N(s)-N(suf),N(s)))
618 return s(0, N(s)-N(suf));
619 return s;
620 }
621
622 string
623 basename (url u) {
624 string s= suffix (u);
625 if (N(s) != 0) s= "." * s;
626 return basename (u, s);
627 }
628
629 url
630 glue (url u, string s) {
631 if (is_atomic (u)) return as_url (tree (u->t->label * s));
632 if (is_concat (u)) return u[1] * glue (u[2], s);
633 if (is_or (u)) return glue (u[1], s) | glue (u[2], s);
634 if (is_none (u)) return u;
635 failed_error << "u= " << u << "\n";
636 failed_error << "s= " << s << "\n";
637 FAILED ("can't glue string to url");
638 return u;
639 }
640
641 url
642 unglue (url u, int nr) {
643 if (is_atomic (u))
644 return as_url (tree (u->t->label (0, max (N(u->t->label) - nr, 0))));
645 if (is_concat (u)) return u[1] * unglue (u[2], nr);
646 if (is_or (u)) return unglue (u[1], nr) | unglue (u[2], nr);
647 if (is_none (u)) return u;
648 failed_error << "u = " << u << "\n";
649 failed_error << "nr= " << nr << "\n";
650 FAILED ("can't unglue from url");
651 return u;
652 }
653
654 url
655 unblank (url u) {
656 if (is_concat (u) && (u[2]->t == "")) return u[1];
657 if (is_concat (u)) return u[1] * unblank (u[2]);
658 if (is_or (u)) return unblank (u[1]) | unblank (u[2]);
659 return u;
660 }
661
662 url
663 relative (url base, url u) {
664 return head (base) * u;
665 }
666
667 url
668 delta_sub (url base, url u) {
669 if (is_atomic (base))
670 return u;
671 if (is_concat (base) && is_concat (u) && (base[1] == u[1])) {
672 if (is_special_root (base[1]) &&
673 is_concat (base[2]) && is_concat (u[2]) &&
674 base[2][1] != u[2][1])
675 return url_none ();
676 return delta_sub (base[2], u[2]);
677 }
678 if (is_concat (base) && !is_semi_root (base))
679 return url_parent () * delta_sub (head (base), u);
680 return url_none ();
681 }
682
683 url
684 delta (url base, url u) {
685 if (is_or (u))
686 return delta (base, u[1]) | delta (base, u[2]);
687 url res= delta_sub (base, u);
688 if (is_none (res)) return u;
689 return res;
690 }
691
692 static url
693 expand (url u1, url u2) {
694 if (is_or (u1)) return expand (u1[1], u2) | expand (u1[2], u2);
695 if (is_or (u2)) return expand (u1, u2[1]) | expand (u1, u2[2]);
696 if (is_ancestor (u2)) {
697 if (is_concat (u1)) return u1 | expand (u1[1], u2);
698 if (is_special_root (u1)) return u2;
699 return u1 | u2;
700 }
701 if (is_concat (u2) && is_ancestor (u2[1]))
702 return expand (expand (u1, u2[1]), u2[2]);
703 return u1 * u2;
704 }
705
706 url
707 expand (url u) {
708 if (is_or (u)) return expand (u[1]) | expand (u[2]);
709 if (is_concat (u)) return expand (expand (u[1]), expand (u[2]));
710 return u;
711 }
712
713 bool
714 descends (url u, url base) {
715 if (is_or (base))
716 return descends (u, base[1]) || descends (u, base[2]);
717 if (is_or (u))
718 return descends (u[1], base) && descends (u[2], base);
719 if (u == base)
720 return true;
721 if (is_concat (u) && is_atomic (base))
722 return u[1] == base;
723 if (is_concat (u) && is_concat (base))
724 return u[1] == base[1] && descends (u[2], base[2]);
725 return false;
726 }
727
728 bool
729 is_secure (url u) {
730 return descends (u, expand (url_path ("$TEXMACS_SECURE_PATH")));
731 }
732
733 /******************************************************************************
734 * Url sorting and factorization
735 ******************************************************************************/
736
737 static bool
738 operator <= (url u1, url u2) {
739 if (is_atomic (u1) && is_atomic (u2))
740 return u1->t->label <= u2->t->label;
741 if (is_atomic (u1)) return true;
742 if (is_atomic (u2)) return false;
743 if (is_concat (u1) && is_concat (u2)) {
744 if (u1[1] == u2[1]) return u1[2] <= u2[2];
745 else return u1[1] <= u2[1];
746 }
747 if (is_concat (u1)) return true;
748 if (is_concat (u2)) return false;
749 return true; // does not matter for sorting
750 }
751
752 static url
753 sort_sub (url add, url to) {
754 if (is_or (to)) {
755 if (add <= to[1]) return add | to;
756 return to[1] | sort_sub (add, to[2]);
757 }
758 if (add <= to) return add | to;
759 else return to | add;
760 }
761
762 url
763 sort (url u) {
764 if (is_or (u))
765 return sort_sub (u[1], sort (u[2]));
766 else return u;
767 }
768
769 static url
770 factor_sorted (url u) {
771 if (!is_or (u)) return u;
772 url v= factor_sorted (u[2]);
773 if (is_concat (u[1])) {
774 if (is_concat (v) && (u[1][1] == v[1]))
775 return u[1][1] * (u[1][2] | v[2]);
776 if (is_or (v) && is_concat (v[1]) && (u[1][1] == v[1][1]))
777 return (u[1][1] * (u[1][2] | v[1][2])) | v[2];
778 }
779 return u[1] | v;
780 }
781
782 static url
783 factor_sub (url u) {
784 if (is_concat (u)) return u[1] * factor (u[2]);
785 if (is_or (u)) return factor_sub (u[1]) | factor_sub (u[2]);
786 return u;
787 }
788
789 url
790 factor (url u) {
791 return factor_sub (factor_sorted (sort (u)));
792 }
793
794 /******************************************************************************
795 * Url resolution and wildcard expansion
796 ******************************************************************************/
797
798 url complete (url base, url u, string filter, bool flag);
799
800 string
801 get_root (url u) {
802 if (is_concat (u)) return get_root (u[1]);
803 if (is_or (u)) {
804 string s1= get_root (u[1]);
805 string s2= get_root (u[2]);
806 if (s1 == s2) return s1; else return "";
807 }
808 if (is_root (u)) return u[1]->t->label;
809 return "";
810 }
811
812 url
813 unroot (url u) {
814 if (is_concat (u)) return unroot (u[1]) * u[2];
815 if (is_or (u)) return unroot (u[1]) | unroot (u[2]);
816 if (is_root (u)) return url_here ();
817 return u;
818 }
819
820 url
821 reroot (url u, string protocol) {
822 if (is_concat (u)) return reroot (u[1], protocol) * u[2];
823 if (is_or (u)) return reroot (u[1], protocol) | reroot (u[2], protocol);
824 if (is_root (u)) return url_root (protocol);
825 return u;
826 }
827
828 static url
829 complete (url base, url sub, url u, string filter, bool flag) {
830 if (is_or (sub)) {
831 url res1= complete (base, sub[1], u, filter, flag);
832 if ((!is_none (res1)) && flag) return res1;
833 return res1 | complete (base, sub[2], u, filter, flag);
834 }
835 if (is_concat (sub) && is_rooted (sub[1])) {
836 url res= complete (sub[1], sub[2], u, filter, flag);
837 return sub[1] * res;
838 }
839 return sub * complete (base * sub, u, filter, flag);
840 }
841
842 url
843 complete (url base, url u, string filter, bool flag) {
844 // cout << "complete " << base << " |||| " << u << LF;
845 if (!is_rooted(u)) {
846 if (is_none (base)) return base;
847 if (is_none (u)) return u;
848 if ((!is_root (base)) && (!is_rooted_name (base))) {
849 failed_error << "base = " << base << LF;
850 failed_error << "u = " << u << LF;
851 failed_error << "filter= " << filter << LF;
852 FAILED ("invalid base url");
853 }
854 }
855 if (is_name (u) || (is_concat (u) && is_root (u[1]) && is_name (u[2]))) {
856 url comp= base * u;
857 if (is_rooted (comp, "default") || is_rooted (comp, "file")) {
858 if (is_of_type (comp, filter)) return reroot (u, "default");
859 return url_none ();
860 }
861 if (is_rooted_web (comp) || is_rooted_tmfs (comp) || is_ramdisc (comp)) {
862 if (is_of_type (comp, filter)) return u;
863 return url_none ();
864 }
865 failed_error << "base = " << base << LF;
866 failed_error << "u = " << u << LF;
867 failed_error << "filter= " << filter << LF;
868 ASSERT (is_rooted (comp), "unrooted url");
869 FAILED ("bad protocol in url");
870 }
871 if (is_root (u)) {
872 // FIXME: test filter flags here
873 return u;
874 }
875 if (is_concat (u) && is_wildcard (u[1], 0) && is_wildcard (u[2], 1)) {
876 // FIXME: ret= ret | ... is unefficient (quadratic) in main loop
877 if (!(is_rooted (base, "default") || is_rooted (base, "file"))) {
878 failed_error << "base = " << base << LF;
879 failed_error << "u = " << u << LF;
880 failed_error << "filter= " << filter << LF;
881 FAILED ("wildcards only implemented for files");
882 }
883 url ret= url_none ();
884 bool error_flag;
885 array<string> dir= read_directory (base, error_flag);
886 int i, n= N(dir);
887 for (i=0; i<n; i++) {
888 if ((!is_none (ret)) && flag) return ret;
889 if ((dir[i] == ".") || (dir[i] == "..")) continue;
890 if (starts (dir[i], "http://") ||
891 starts (dir[i], "https://") ||
892 starts (dir[i], "ftp://"))
893 if (is_directory (base * dir[i])) continue;
894 ret= ret | (dir[i] * complete (base * dir[i], u, filter, flag));
895 if (match_wildcard (dir[i], u[2][1]->t->label))
896 ret= ret | complete (base, dir[i], filter, flag);
897 }
898 return ret;
899 }
900 if (is_concat (u)) {
901 url sub= complete (base, u[1], "", false);
902 // "" should often be faster than the more correct "d" here
903 return complete (base, sub, u[2], filter, flag);
904 }
905 if (is_or (u)) {
906 url res1= complete (base, u[1], filter, flag);
907 if ((!is_none (res1)) && flag) return res1;
908 return res1 | complete (base, u[2], filter, flag);
909 }
910 if (is_wildcard (u)) {
911 // FIXME: ret= ret | ... is unefficient (quadratic) in main loop
912 if (!(is_rooted (base, "default") || is_rooted (base, "file"))) {
913 failed_error << "base = " << base << LF;
914 failed_error << "u = " << u << LF;
915 failed_error << "filter= " << filter << LF;
916 FAILED ("wildcards only implemented for files");
917 }
918 url ret= url_none ();
919 if (is_wildcard (u, 0) && is_of_type (base, filter)) ret= url_here ();
920 bool error_flag;
921 array<string> dir= read_directory (base, error_flag);
922 int i, n= N(dir);
923 for (i=0; i<n; i++) {
924 if ((!is_none (ret)) && flag) return ret;
925 if ((dir[i] == ".") || (dir[i] == "..")) continue;
926 if (starts (dir[i], "http://") ||
927 starts (dir[i], "https://") ||
928 starts (dir[i], "ftp://"))
929 if (is_directory (base * dir[i])) continue;
930 if (is_wildcard (u, 0))
931 ret= ret | (dir[i] * complete (base * dir[i], u, filter, flag));
932 else if (match_wildcard (dir[i], u[1]->t->label))
933 ret= ret | complete (base, dir[i], filter, flag);
934 }
935 return ret;
936 }
937 failed_error << "url= " << u << LF;
938 FAILED ("bad url");
939 return u;
940 }
941
942 url
943 complete (url u, string filter, bool flag) {
944 url home= url_pwd ();
945 return home * complete (home, u, filter, flag);
946 }
947
948 url
949 complete (url u, string filter) {
950 // This routine can be used in order to find all possible matches
951 // for the wildcards in an url and replace the wildcards by these matches.
952 // Moreover, matches are normalized (file root -> default root).
953 url r = complete (u, filter, false);
954 // cout << "complete:" << u << " filter:" << filter << " result:" << r << LF;
955 return r;
956 }
957
958 url
959 resolve (url u, string filter) {
960 // This routine does the same thing as complete, but it stops at
961 // the first match. It is particularly useful for finding files in paths.
962 return complete (u, filter, true);
963 /*
964 url res= complete (u, filter, true);
965 if (is_none (res))
966 cout << "Failed resolution of " << u << ", " << filter << LF;
967 return res;
968 */
969 }
970
971 url
972 resolve_in_path (url u) {
973 if (use_which) {
974 string name = escape_sh (as_string (u));
975 string which= var_eval_system ("which " * name * " 2> /dev/null");
976 if (ends (which, name))
977 return which;
978 else if ((which != "") &&
979 (!occurs ("bin/which: ", which)) &&
980 (!starts (which, "which: ")) &&
981 (!starts (which, "no ")))
982 cout << "TeXmacs] " << which << "\n";
983 }
984 #ifdef OS_MINGW
985 return resolve ((url_path ("$TEXMACS_PATH/bin") | url_path ("$PATH")) * u, "x");
986 #else
987 return resolve (url_path ("$PATH") * u, "x");
988 #endif
989 }
990
991 bool
992 exists (url u) {
993 return !is_none (resolve (u, "r"));
994 }
995
996 bool
997 exists_in_path (url u) {
998 #ifdef OS_MINGW
999 return !is_none (resolve_in_path (url (as_string (u) * ".bat"))) ||\
1000 !is_none (resolve_in_path (url (as_string (u) * ".exe"))) ||\
1001 !is_none (resolve_in_path (url (as_string (u) * ".com")));
1002 #else
1003 return !is_none (resolve_in_path (u));
1004 #endif
1005 }
1006
1007 bool
1008 has_permission (url u, string filter) {
1009 return !is_none (resolve (u, filter));
1010 }
1011
1012 static url
1013 descendance_sub (url u) {
1014 if (is_or (u))
1015 return descendance_sub (u[1]) | descendance_sub (u[2]);
1016 return complete (u, url_wildcard (), "r", false);
1017 }
1018
1019 url
1020 descendance (url u) {
1021 // Utility for style and package menus in tm_server.cpp
1022 // Compute and merge subdirectories of directories in path
1023 return factor (descendance_sub (u));
1024 }
1025
1026 url
1027 subdirectories (url u) {
1028 if (is_or (u))
1029 return subdirectories (u[1]) | subdirectories (u[2]);
1030 else if (is_directory (u)) {
1031 url ret= u;
1032 bool error_flag;
1033 array<string> dir= read_directory (u, error_flag);
1034 for (int i=0; i<N(dir); i++)
1035 if (!starts (dir[i], ".") && is_directory (u * dir[i]))
1036 ret= ret | subdirectories (u * dir[i]);
1037 return ret;
1038 }
1039 else return url_none ();
1040 }
1041
1042 /******************************************************************************
1043 * Concretization of resolved urls
1044 ******************************************************************************/
1045
1046 url
1047 concretize_url (url u) {
1048 // This routine transforms a resolved url into a system url.
1049 // In the case of distant files from the web, a local copy is created.
1050 if (is_rooted (u, "default") ||
1051 is_rooted (u, "file") ||
1052 is_rooted (u, "blank"))
1053 return reroot (u, "default");
1054 if (is_rooted_web (u)) return concretize_url (get_from_web (u));
1055 if (is_rooted_tmfs (u)) return concretize_url (get_from_server (u));
1056 if (is_ramdisc (u)) return concretize_url (get_from_ramdisc (u));
1057 if (is_here (u)) return url_pwd ();
1058 if (is_parent (u)) return url_pwd () * url_parent ();
1059 return url_none ();
1060 }
1061
1062 string
1063 concretize (url u) {
1064 // This routine transforms a resolved url into a system file name.
1065 // In the case of distant files from the web, a local copy is created.
1066 url c= concretize_url (u);
1067 if (!is_none (c)) return as_string (c);
1068 if (is_wildcard (u, 1)) return u->t[1]->label;
1069 std_warning << "Couldn't concretize " << u->t << LF;
1070 // failed_error << "u= " << u << LF;
1071 // FAILED ("url has no root");
1072 return "xxx";
1073 }
1074
1075 string
1076 materialize (url u, string filter) {
1077 // Combines resolve and concretize
1078 url r= resolve (u, filter);
1079 if (!(is_rooted (r) || is_here (r) || is_parent (r))) {
1080 failed_error << "u= " << u << LF;
1081 FAILED ("url could not be resolved");
1082 }
1083 return concretize (r);
1084 }