"Fossies" - the Fresh Open Source Software Archive

Member "xombrero-1.6.4/tldlist.c" (17 Feb 2015, 8591 Bytes) of package /linux/www/old/xombrero-1.6.4.tgz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "tldlist.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright (c) 2012 Elias Norberg <xyzzy@kudzu.se>
    3  *
    4  * Permission to use, copy, modify, and distribute this software for any
    5  * purpose with or without fee is hereby granted, provided that the above
    6  * copyright notice and this permission notice appear in all copies.
    7  *
    8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
    9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
   10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
   11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   15  */
   16 
   17 #include <xombrero.h>
   18 
   19 #define TLD_TREE_END_NODE 1
   20 #define TLD_TREE_EXCEPTION 2
   21 
   22 struct tld_tree_node {
   23     struct tld_tree_node    *next;
   24     struct tld_tree_node    *child;
   25     const char      *lbl;
   26     char            flags;
   27 };
   28 
   29 struct tld_tree_node tld_tree_root = { NULL, NULL, "" };
   30 
   31 #define TREE_INSERT_CHILD(n, data) \
   32         n->child = g_malloc(sizeof *n); \
   33         n->child->next = NULL; \
   34         n->child->child = NULL; \
   35         n->child->flags = 0; \
   36         n->child->lbl = data;
   37 
   38 #define TREE_INSERT_NEXT(n, data) \
   39         n->next = g_malloc(sizeof *n); \
   40         n->next->next = NULL; \
   41         n->next->child = NULL; \
   42         n->next->flags = 0; \
   43         n->next->lbl = data;
   44 
   45 #define P_BASE      (36)
   46 #define P_TMIN      (1)
   47 #define P_TMAX      (26)
   48 #define P_SKEW      (38)
   49 #define P_DAMP      (700)
   50 #define INITIAL_BIAS    (72)
   51 #define INITIAL_N   (128)
   52 
   53 int
   54 adapt(int delta, int numpoints, int firsttime)
   55 {
   56     int     k;
   57 
   58     if (firsttime)
   59         delta = delta / P_DAMP;
   60     else
   61         delta = delta / 2;
   62 
   63     delta += (delta / numpoints);
   64 
   65     k = 0;
   66     while (delta > (((P_BASE - P_TMIN) * P_TMAX) / 2)) {
   67         delta = delta / (P_BASE - P_TMIN);
   68         k += P_BASE;
   69     }
   70 
   71     k += (((P_BASE - P_TMIN + 1) * delta) / (delta + P_SKEW));
   72     return (k);
   73 }
   74 
   75 int
   76 get_minimum_char(char *str, int n)
   77 {
   78     gunichar    ch = 0;
   79     gunichar    min = 0xffffff;
   80 
   81     for(; *str; str = g_utf8_next_char(str)) {
   82         ch = g_utf8_get_char(str);
   83         if (ch >= n && ch < min)
   84             min = ch;
   85     }
   86 
   87     return (min);
   88 }
   89 
   90 char
   91 encode_digit(int n)
   92 {
   93     if (n < 26)
   94         return n + 'a';
   95     return (n - 26) + '0';
   96 }
   97 
   98 char *
   99 punycode_encode(char *str)
  100 {
  101     char        output[1024];
  102     char        *s;
  103     gunichar    c;
  104     int     need_coding = 0;
  105     int     l, len, i;
  106 
  107     int     n = INITIAL_N;
  108     int     delta = 0;
  109     int     bias = INITIAL_BIAS;
  110     int     h, b, m, k, t, q;
  111 
  112     l = 0;
  113     for (s=str; *s; s = g_utf8_next_char(s)) {
  114         c = g_utf8_get_char(s);
  115         if (c < 128)
  116             output[l++] = *s;
  117         else
  118             need_coding = 1;
  119     }
  120 
  121     output[l] = '\0';
  122 
  123     if (!need_coding)
  124         return g_strdup(output);
  125 
  126     h = b = strlen(output);
  127 
  128     if (l > 0)
  129         output[l++] = '-';
  130     output[l] = '\0';
  131 
  132     len = g_utf8_strlen(str, -1);
  133     while (h < len) {
  134         m = get_minimum_char(str, n);
  135         delta += (m - n) * (h + 1);
  136         n = m;
  137         for (s=str; *s; s = g_utf8_next_char(s)) {
  138             c = g_utf8_get_char(s);
  139 
  140             if (c < n) delta ++;
  141             if (c == n) {
  142                 q = delta;
  143                 for (k=P_BASE;; k+=P_BASE) {
  144                     if (k <= bias)
  145                         t = P_TMIN;
  146                     else if(k >= bias + P_TMAX)
  147                         t = P_TMAX;
  148                     else
  149                         t = k - bias;
  150 
  151                     if (q < t)
  152                         break;
  153 
  154                     output[l++] = encode_digit(t+((q-t)%(P_BASE-t)));
  155                     q = (q - t) / (P_BASE - t);
  156                 }
  157                 output[l++] = encode_digit(q);
  158                 bias = adapt(delta, h + 1, h == b);
  159                 delta = 0;
  160                 h ++;
  161             }
  162         }
  163         delta ++;
  164         n ++;
  165     }
  166 
  167     output[l] = '\0';
  168     for (i=l+4;i>=4;i--)
  169         output[i] = output[i-4];
  170     l += 4;
  171     output[0] = 'x';
  172     output[1] = 'n';
  173     output[2] = '-';
  174     output[3] = '-';
  175     output[l] = '\0';
  176     return g_strdup(output);
  177 }
  178 
  179 /*
  180  * strrchr2(str, saveptr, ch)
  181  *
  182  * Walk backwards through str, jumping to next 'ch'
  183  * On first call, *saveptr should be set to NULL.
  184  * On following calls, supply the same saveptr.
  185  *
  186  * Returns NULL when the whole string 'str' has been
  187  * looped through. Otherwise returns the position
  188  * before the next 'ch'.
  189  */
  190 const char *
  191 strrchr2(const char *str, const char **saveptr, int ch)
  192 {
  193     const char      *ptr;
  194 
  195     if (str != NULL && *saveptr == NULL) {
  196         *saveptr = str + strlen(str);
  197     } else if (str == *saveptr) {
  198         return (NULL);
  199     }
  200 
  201     for (ptr= *saveptr - 1; ptr != str && *ptr != ch; ptr--)
  202         ;
  203 
  204     *saveptr = ptr;
  205     if (ptr != str)
  206         return (ptr+1);
  207     return (ptr);
  208 }
  209 
  210 /*
  211  * tld_tree_add(rule)
  212  *
  213  * Adds a tld-rule to the tree
  214  */
  215 void
  216 tld_tree_add(const char *rule)
  217 {
  218     struct tld_tree_node    *n;
  219     const char      *lbl;
  220     const char      *saveptr;
  221 
  222     saveptr = NULL;
  223     lbl = strrchr2(rule, &saveptr, '.');
  224 
  225     for (n = &tld_tree_root; lbl != NULL;) {
  226 
  227         if (strcmp(n->lbl, lbl) == 0) {
  228             lbl = strrchr2(rule, &saveptr, '.');
  229 
  230             if (!n->child)
  231                 break;
  232 
  233             n = n->child;
  234             continue;
  235         }
  236 
  237         if (n->next == NULL) {
  238             TREE_INSERT_NEXT(n, lbl);
  239             n = n->next;
  240 
  241             lbl = strrchr2(rule, &saveptr, '.');
  242             break;
  243         }
  244         n = n->next;
  245     }
  246 
  247     while (lbl) {
  248         TREE_INSERT_CHILD(n, lbl);
  249 
  250         lbl = strrchr2(rule, &saveptr, '.');
  251         n = n->child;
  252     }
  253 
  254     n->flags |= TLD_TREE_END_NODE;
  255     if (n->lbl[0] == '!') {
  256         n->flags |= TLD_TREE_EXCEPTION;
  257         n->lbl ++;
  258     }
  259 }
  260 
  261 void
  262 tld_tree_init()
  263 {
  264     FILE        *fd;
  265     char        buf[1024];
  266     char        file[PATH_MAX];
  267     char        *ptr, *next_lbl;
  268     char        *enc_lbl;
  269     char        *rule, *rp;
  270     char        extra_ch;
  271 
  272     snprintf(file, sizeof file, "%s" PS "tld-rules", resource_dir);
  273     fd = fopen(file, "r");
  274     if (fd == NULL) {
  275         /* a poor replacement for the real list - but it's
  276          * better than nothing.
  277          */
  278         tld_tree_add("*");
  279         startpage_add("Could not open %s: this file is required "
  280             "to handle TLD whitelisting properly", file);
  281         return;
  282     }
  283 
  284     for (;;) {
  285         ptr = fgets(buf, sizeof buf, fd);
  286         if (ptr == NULL || feof(fd))
  287             break;
  288 
  289         /* skip comments */
  290         if ((ptr = strstr(buf, "//")) != NULL)
  291             *ptr = '\0';
  292         /* skip anything after space or tab */
  293         for (ptr = buf; *ptr; ptr++)
  294             if (*ptr == ' ' || *ptr == '\t' ||
  295                 *ptr == '\n' || *ptr == '\r')
  296                 break;
  297         *ptr = '\0';
  298 
  299         if (!strlen(buf))
  300             continue;
  301 
  302         extra_ch = 0;
  303         ptr = buf;
  304         if (buf[0] == '!' && buf[0] == '*') {
  305             extra_ch = buf[0];
  306             ptr ++;
  307         }
  308 
  309 
  310         rule = NULL;
  311         /* split into labels, and convert them one by one */
  312         for (;;) {
  313             if ((next_lbl = strchr(ptr, '.')))
  314                 *next_lbl = '\0';
  315 
  316             enc_lbl = punycode_encode(ptr);
  317             if (rule) {
  318                 rp = rule;
  319                 rule = g_strdup_printf("%s%s%s", rp, enc_lbl,
  320                     next_lbl ? "." : "");
  321                 g_free(rp);
  322                 g_free(enc_lbl);
  323             } else {
  324                 rule = g_strdup_printf("%.1s%s%s",
  325                     extra_ch ? buf:"", enc_lbl,
  326                     next_lbl ? ".":"");
  327                 g_free(enc_lbl);
  328             }
  329 
  330             if (!next_lbl)
  331                 break;
  332             ptr = next_lbl + 1;
  333         }
  334         tld_tree_add(rule);
  335     }
  336 
  337     fclose(fd);
  338 }
  339 
  340 /*
  341  * tld_get_suffix(domain)
  342  *
  343  * Find the public suffix for domain.
  344  *
  345  * Returns a pointer to the suffix position
  346  * in domain, or NULL if no public suffix
  347  * was found.
  348  */
  349 char *
  350 tld_get_suffix(const char *domain)
  351 {
  352     struct tld_tree_node    *n;
  353     const char      *suffix;
  354     const char      *lbl, *saveptr;
  355     const char      *tmp_saveptr, *tmp_lbl;
  356 
  357     if (domain == NULL)
  358         return (NULL);
  359     if (domain[0] == '.')
  360         return (NULL);
  361 
  362     saveptr = NULL;
  363     suffix = NULL;
  364     lbl = strrchr2(domain, &saveptr, '.');
  365 
  366     for (n = &tld_tree_root; n != NULL && lbl != NULL;) {
  367 
  368         if (!strlen(n->lbl)) {
  369             n = n->next;
  370             continue;
  371         }
  372 
  373         if (n->lbl[0] == '*') {
  374             if (n->flags & TLD_TREE_END_NODE) {
  375 
  376                 tmp_saveptr = saveptr;
  377                 tmp_lbl = lbl;
  378 
  379                 lbl = strrchr2(domain, &saveptr, '.');
  380 
  381                 /* Save possible public suffix */
  382                 suffix = lbl;
  383                 saveptr = tmp_saveptr;
  384                 lbl = tmp_lbl;
  385             }
  386 
  387             n = n->next;
  388             continue;
  389         }
  390 
  391         if (strcmp(n->lbl, lbl) == 0) {
  392             if (n->flags & TLD_TREE_EXCEPTION) {
  393                 /* We're done looking */
  394                 suffix = lbl;
  395                 break;
  396             }
  397 
  398             lbl = strrchr2(domain, &saveptr, '.');
  399 
  400             /* Possible public suffix - other rules might
  401              * still apply
  402              */
  403             if (n->flags & TLD_TREE_END_NODE)
  404                 suffix = lbl;
  405 
  406             /* Domain too short */
  407             if (lbl == NULL) {
  408                 /* Check if we have a child that is '*' */
  409                 for (n = n->child; n; n = n->next)
  410                     if (n->lbl[0] == '*')
  411                         suffix = NULL;
  412                 break;
  413             }
  414 
  415             if (n->child == NULL)
  416                 break;
  417 
  418             n = n->child;
  419             continue;
  420         }
  421 
  422         if (n->next == NULL)
  423             break;
  424         n = n->next;
  425     }
  426 
  427     /* If we can't find a matching suffix, it can mean that either
  428      * a) the user is surfing a local prefix
  429      * b) the list is not properly updated
  430      *
  431      * In any case - in order not to break stuff while surfing
  432      * new TLD's, we return the public suffix as the top 2 labels
  433      *
  434      * www.abc.xyz therefore has public suffix 'abc.xyz'
  435      */
  436     if (!suffix) {
  437         saveptr = NULL;
  438         strrchr2(domain, &saveptr, '.');
  439         lbl = strrchr2(domain, &saveptr, '.');
  440         suffix = lbl;
  441     }
  442 
  443     return ((char*)suffix);
  444 }