"Fossies" - the Fresh Open Source Software Archive 
Member "xombrero-1.6.4/tldlist.c" (17 Feb 2015, 8591 Bytes) of package /linux/www/old/xombrero-1.6.4.tgz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "tldlist.c" see the
Fossies "Dox" file reference documentation.
1 /*
2 * Copyright (c) 2012 Elias Norberg <xyzzy@kudzu.se>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 #include <xombrero.h>
18
19 #define TLD_TREE_END_NODE 1
20 #define TLD_TREE_EXCEPTION 2
21
22 struct tld_tree_node {
23 struct tld_tree_node *next;
24 struct tld_tree_node *child;
25 const char *lbl;
26 char flags;
27 };
28
29 struct tld_tree_node tld_tree_root = { NULL, NULL, "" };
30
31 #define TREE_INSERT_CHILD(n, data) \
32 n->child = g_malloc(sizeof *n); \
33 n->child->next = NULL; \
34 n->child->child = NULL; \
35 n->child->flags = 0; \
36 n->child->lbl = data;
37
38 #define TREE_INSERT_NEXT(n, data) \
39 n->next = g_malloc(sizeof *n); \
40 n->next->next = NULL; \
41 n->next->child = NULL; \
42 n->next->flags = 0; \
43 n->next->lbl = data;
44
45 #define P_BASE (36)
46 #define P_TMIN (1)
47 #define P_TMAX (26)
48 #define P_SKEW (38)
49 #define P_DAMP (700)
50 #define INITIAL_BIAS (72)
51 #define INITIAL_N (128)
52
53 int
54 adapt(int delta, int numpoints, int firsttime)
55 {
56 int k;
57
58 if (firsttime)
59 delta = delta / P_DAMP;
60 else
61 delta = delta / 2;
62
63 delta += (delta / numpoints);
64
65 k = 0;
66 while (delta > (((P_BASE - P_TMIN) * P_TMAX) / 2)) {
67 delta = delta / (P_BASE - P_TMIN);
68 k += P_BASE;
69 }
70
71 k += (((P_BASE - P_TMIN + 1) * delta) / (delta + P_SKEW));
72 return (k);
73 }
74
75 int
76 get_minimum_char(char *str, int n)
77 {
78 gunichar ch = 0;
79 gunichar min = 0xffffff;
80
81 for(; *str; str = g_utf8_next_char(str)) {
82 ch = g_utf8_get_char(str);
83 if (ch >= n && ch < min)
84 min = ch;
85 }
86
87 return (min);
88 }
89
90 char
91 encode_digit(int n)
92 {
93 if (n < 26)
94 return n + 'a';
95 return (n - 26) + '0';
96 }
97
98 char *
99 punycode_encode(char *str)
100 {
101 char output[1024];
102 char *s;
103 gunichar c;
104 int need_coding = 0;
105 int l, len, i;
106
107 int n = INITIAL_N;
108 int delta = 0;
109 int bias = INITIAL_BIAS;
110 int h, b, m, k, t, q;
111
112 l = 0;
113 for (s=str; *s; s = g_utf8_next_char(s)) {
114 c = g_utf8_get_char(s);
115 if (c < 128)
116 output[l++] = *s;
117 else
118 need_coding = 1;
119 }
120
121 output[l] = '\0';
122
123 if (!need_coding)
124 return g_strdup(output);
125
126 h = b = strlen(output);
127
128 if (l > 0)
129 output[l++] = '-';
130 output[l] = '\0';
131
132 len = g_utf8_strlen(str, -1);
133 while (h < len) {
134 m = get_minimum_char(str, n);
135 delta += (m - n) * (h + 1);
136 n = m;
137 for (s=str; *s; s = g_utf8_next_char(s)) {
138 c = g_utf8_get_char(s);
139
140 if (c < n) delta ++;
141 if (c == n) {
142 q = delta;
143 for (k=P_BASE;; k+=P_BASE) {
144 if (k <= bias)
145 t = P_TMIN;
146 else if(k >= bias + P_TMAX)
147 t = P_TMAX;
148 else
149 t = k - bias;
150
151 if (q < t)
152 break;
153
154 output[l++] = encode_digit(t+((q-t)%(P_BASE-t)));
155 q = (q - t) / (P_BASE - t);
156 }
157 output[l++] = encode_digit(q);
158 bias = adapt(delta, h + 1, h == b);
159 delta = 0;
160 h ++;
161 }
162 }
163 delta ++;
164 n ++;
165 }
166
167 output[l] = '\0';
168 for (i=l+4;i>=4;i--)
169 output[i] = output[i-4];
170 l += 4;
171 output[0] = 'x';
172 output[1] = 'n';
173 output[2] = '-';
174 output[3] = '-';
175 output[l] = '\0';
176 return g_strdup(output);
177 }
178
179 /*
180 * strrchr2(str, saveptr, ch)
181 *
182 * Walk backwards through str, jumping to next 'ch'
183 * On first call, *saveptr should be set to NULL.
184 * On following calls, supply the same saveptr.
185 *
186 * Returns NULL when the whole string 'str' has been
187 * looped through. Otherwise returns the position
188 * before the next 'ch'.
189 */
190 const char *
191 strrchr2(const char *str, const char **saveptr, int ch)
192 {
193 const char *ptr;
194
195 if (str != NULL && *saveptr == NULL) {
196 *saveptr = str + strlen(str);
197 } else if (str == *saveptr) {
198 return (NULL);
199 }
200
201 for (ptr= *saveptr - 1; ptr != str && *ptr != ch; ptr--)
202 ;
203
204 *saveptr = ptr;
205 if (ptr != str)
206 return (ptr+1);
207 return (ptr);
208 }
209
210 /*
211 * tld_tree_add(rule)
212 *
213 * Adds a tld-rule to the tree
214 */
215 void
216 tld_tree_add(const char *rule)
217 {
218 struct tld_tree_node *n;
219 const char *lbl;
220 const char *saveptr;
221
222 saveptr = NULL;
223 lbl = strrchr2(rule, &saveptr, '.');
224
225 for (n = &tld_tree_root; lbl != NULL;) {
226
227 if (strcmp(n->lbl, lbl) == 0) {
228 lbl = strrchr2(rule, &saveptr, '.');
229
230 if (!n->child)
231 break;
232
233 n = n->child;
234 continue;
235 }
236
237 if (n->next == NULL) {
238 TREE_INSERT_NEXT(n, lbl);
239 n = n->next;
240
241 lbl = strrchr2(rule, &saveptr, '.');
242 break;
243 }
244 n = n->next;
245 }
246
247 while (lbl) {
248 TREE_INSERT_CHILD(n, lbl);
249
250 lbl = strrchr2(rule, &saveptr, '.');
251 n = n->child;
252 }
253
254 n->flags |= TLD_TREE_END_NODE;
255 if (n->lbl[0] == '!') {
256 n->flags |= TLD_TREE_EXCEPTION;
257 n->lbl ++;
258 }
259 }
260
261 void
262 tld_tree_init()
263 {
264 FILE *fd;
265 char buf[1024];
266 char file[PATH_MAX];
267 char *ptr, *next_lbl;
268 char *enc_lbl;
269 char *rule, *rp;
270 char extra_ch;
271
272 snprintf(file, sizeof file, "%s" PS "tld-rules", resource_dir);
273 fd = fopen(file, "r");
274 if (fd == NULL) {
275 /* a poor replacement for the real list - but it's
276 * better than nothing.
277 */
278 tld_tree_add("*");
279 startpage_add("Could not open %s: this file is required "
280 "to handle TLD whitelisting properly", file);
281 return;
282 }
283
284 for (;;) {
285 ptr = fgets(buf, sizeof buf, fd);
286 if (ptr == NULL || feof(fd))
287 break;
288
289 /* skip comments */
290 if ((ptr = strstr(buf, "//")) != NULL)
291 *ptr = '\0';
292 /* skip anything after space or tab */
293 for (ptr = buf; *ptr; ptr++)
294 if (*ptr == ' ' || *ptr == '\t' ||
295 *ptr == '\n' || *ptr == '\r')
296 break;
297 *ptr = '\0';
298
299 if (!strlen(buf))
300 continue;
301
302 extra_ch = 0;
303 ptr = buf;
304 if (buf[0] == '!' && buf[0] == '*') {
305 extra_ch = buf[0];
306 ptr ++;
307 }
308
309
310 rule = NULL;
311 /* split into labels, and convert them one by one */
312 for (;;) {
313 if ((next_lbl = strchr(ptr, '.')))
314 *next_lbl = '\0';
315
316 enc_lbl = punycode_encode(ptr);
317 if (rule) {
318 rp = rule;
319 rule = g_strdup_printf("%s%s%s", rp, enc_lbl,
320 next_lbl ? "." : "");
321 g_free(rp);
322 g_free(enc_lbl);
323 } else {
324 rule = g_strdup_printf("%.1s%s%s",
325 extra_ch ? buf:"", enc_lbl,
326 next_lbl ? ".":"");
327 g_free(enc_lbl);
328 }
329
330 if (!next_lbl)
331 break;
332 ptr = next_lbl + 1;
333 }
334 tld_tree_add(rule);
335 }
336
337 fclose(fd);
338 }
339
340 /*
341 * tld_get_suffix(domain)
342 *
343 * Find the public suffix for domain.
344 *
345 * Returns a pointer to the suffix position
346 * in domain, or NULL if no public suffix
347 * was found.
348 */
349 char *
350 tld_get_suffix(const char *domain)
351 {
352 struct tld_tree_node *n;
353 const char *suffix;
354 const char *lbl, *saveptr;
355 const char *tmp_saveptr, *tmp_lbl;
356
357 if (domain == NULL)
358 return (NULL);
359 if (domain[0] == '.')
360 return (NULL);
361
362 saveptr = NULL;
363 suffix = NULL;
364 lbl = strrchr2(domain, &saveptr, '.');
365
366 for (n = &tld_tree_root; n != NULL && lbl != NULL;) {
367
368 if (!strlen(n->lbl)) {
369 n = n->next;
370 continue;
371 }
372
373 if (n->lbl[0] == '*') {
374 if (n->flags & TLD_TREE_END_NODE) {
375
376 tmp_saveptr = saveptr;
377 tmp_lbl = lbl;
378
379 lbl = strrchr2(domain, &saveptr, '.');
380
381 /* Save possible public suffix */
382 suffix = lbl;
383 saveptr = tmp_saveptr;
384 lbl = tmp_lbl;
385 }
386
387 n = n->next;
388 continue;
389 }
390
391 if (strcmp(n->lbl, lbl) == 0) {
392 if (n->flags & TLD_TREE_EXCEPTION) {
393 /* We're done looking */
394 suffix = lbl;
395 break;
396 }
397
398 lbl = strrchr2(domain, &saveptr, '.');
399
400 /* Possible public suffix - other rules might
401 * still apply
402 */
403 if (n->flags & TLD_TREE_END_NODE)
404 suffix = lbl;
405
406 /* Domain too short */
407 if (lbl == NULL) {
408 /* Check if we have a child that is '*' */
409 for (n = n->child; n; n = n->next)
410 if (n->lbl[0] == '*')
411 suffix = NULL;
412 break;
413 }
414
415 if (n->child == NULL)
416 break;
417
418 n = n->child;
419 continue;
420 }
421
422 if (n->next == NULL)
423 break;
424 n = n->next;
425 }
426
427 /* If we can't find a matching suffix, it can mean that either
428 * a) the user is surfing a local prefix
429 * b) the list is not properly updated
430 *
431 * In any case - in order not to break stuff while surfing
432 * new TLD's, we return the public suffix as the top 2 labels
433 *
434 * www.abc.xyz therefore has public suffix 'abc.xyz'
435 */
436 if (!suffix) {
437 saveptr = NULL;
438 strrchr2(domain, &saveptr, '.');
439 lbl = strrchr2(domain, &saveptr, '.');
440 suffix = lbl;
441 }
442
443 return ((char*)suffix);
444 }