"Fossies" - the Fresh Open Source Software Archive

Member "xapian-core-1.4.14/languages/compiler/analyser.c" (23 Nov 2019, 45222 Bytes) of package /linux/www/xapian-core-1.4.14.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "analyser.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 1.4.13_vs_1.4.14.

    1 
    2 #include <stdio.h>   /* printf etc */
    3 #include <stdlib.h>  /* exit */
    4 #include <string.h>  /* memmove */
    5 #include "header.h"
    6 
    7 typedef enum {
    8     e_token_omitted = 0,
    9     e_unexpected_token = 1,
   10     e_string_omitted = 2,
   11     e_unexpected_token_in_among = 3,
   12     /* For codes above here, report "after " t->previous_token after the error. */
   13     e_unresolved_substring = 14,
   14     e_not_allowed_inside_reverse = 15,
   15     e_empty_grouping = 16,
   16     e_already_backwards = 17,
   17     e_empty_among = 18,
   18     e_adjacent_bracketed_in_among = 19,
   19     e_substring_preceded_by_substring = 20,
   20     /* For codes below here, tokeniser->b is printed before the error. */
   21     e_redeclared = 30,
   22     e_undeclared = 31,
   23     e_declared_as_different_mode = 32,
   24     e_not_of_type_x = 33,
   25     e_not_of_type_string_or_integer = 34,
   26     e_misplaced = 35,
   27     e_redefined = 36,
   28     e_misused = 37
   29 } error_code;
   30 
   31 /* recursive usage: */
   32 
   33 static void read_program_(struct analyser * a, int terminator);
   34 static struct node * read_C(struct analyser * a);
   35 static struct node * C_style(struct analyser * a, const char * s, int token);
   36 
   37 
   38 static void print_node_(struct node * p, int n, const char * s) {
   39 
   40     int i;
   41     for (i = 0; i < n; i++) fputs(i == n - 1 ? s : "  ", stdout);
   42     printf("%s ", name_of_token(p->type));
   43     if (p->name) report_b(stdout, p->name->b);
   44     if (p->literalstring) {
   45         printf("'");
   46         report_b(stdout, p->literalstring);
   47         printf("'");
   48     }
   49     printf("\n");
   50     if (p->AE) print_node_(p->AE, n+1, "# ");
   51     if (p->left) print_node_(p->left, n+1, "  ");
   52     if (p->aux) print_node_(p->aux, n+1, "@ ");
   53     if (p->right) print_node_(p->right, n, "  ");
   54 }
   55 
   56 extern void print_program(struct analyser * a) {
   57     print_node_(a->program, 0, "  ");
   58 }
   59 
   60 static struct node * new_node(struct analyser * a, int type) {
   61     NEW(node, p);
   62     p->next = a->nodes; a->nodes = p;
   63     p->left = 0;
   64     p->right = 0;
   65     p->aux = 0;
   66     p->AE = 0;
   67     p->name = 0;
   68     p->literalstring = 0;
   69     p->mode = a->mode;
   70     p->line_number = a->tokeniser->line_number;
   71     p->type = type;
   72     return p;
   73 }
   74 
   75 static const char * name_of_mode(int n) {
   76     switch (n) {
   77         case m_backward: return "string backward";
   78         case m_forward:  return "string forward";
   79     /*  case m_integer:  return "integer";  */
   80     }
   81     fprintf(stderr, "Invalid mode %d in name_of_mode()\n", n);
   82     exit(1);
   83 }
   84 
   85 static const char * name_of_type(int n) {
   86     switch (n) {
   87         case 's': return "string";
   88         case 'i': return "integer";
   89         case 'r': return "routine";
   90         case 'R': return "routine or grouping";
   91         case 'g': return "grouping";
   92     }
   93     fprintf(stderr, "Invalid type %d in name_of_type()\n", n);
   94     exit(1);
   95 }
   96 
   97 static const char * name_of_name_type(int code) {
   98     switch (code) {
   99         case t_string: return "string";
  100         case t_boolean: return "boolean";
  101         case t_integer: return "integer";
  102         case t_routine: return "routine";
  103         case t_external: return "external";
  104         case t_grouping: return "grouping";
  105     }
  106     fprintf(stderr, "Invalid type code %d in name_of_name_type()\n", code);
  107     exit(1);
  108 }
  109 
  110 static void count_error(struct analyser * a) {
  111     struct tokeniser * t = a->tokeniser;
  112     if (t->error_count >= 20) { fprintf(stderr, "... etc\n"); exit(1); }
  113     t->error_count++;
  114 }
  115 
  116 static void error2(struct analyser * a, error_code n, int x) {
  117     struct tokeniser * t = a->tokeniser;
  118     count_error(a);
  119     fprintf(stderr, "%s:%d: ", t->file, t->line_number);
  120     if ((int)n >= (int)e_redeclared) report_b(stderr, t->b);
  121     switch (n) {
  122         case e_token_omitted:
  123             fprintf(stderr, "%s omitted", name_of_token(t->omission)); break;
  124         case e_unexpected_token_in_among:
  125             fprintf(stderr, "in among(...), ");
  126             /* fall through */
  127         case e_unexpected_token:
  128             fprintf(stderr, "unexpected %s", name_of_token(t->token));
  129             if (t->token == c_number) fprintf(stderr, " %d", t->number);
  130             if (t->token == c_name) {
  131                 fprintf(stderr, " ");
  132                 report_b(stderr, t->b);
  133             } break;
  134         case e_string_omitted:
  135             fprintf(stderr, "string omitted"); break;
  136 
  137         case e_unresolved_substring:
  138             fprintf(stderr, "unresolved substring on line %d", x); break;
  139         case e_not_allowed_inside_reverse:
  140             fprintf(stderr, "%s not allowed inside reverse(...)", name_of_token(t->token)); break;
  141         case e_empty_grouping:
  142             fprintf(stderr, "empty grouping"); break;
  143         case e_already_backwards:
  144             fprintf(stderr, "backwards used when already in this mode"); break;
  145         case e_empty_among:
  146             fprintf(stderr, "empty among(...)"); break;
  147         case e_adjacent_bracketed_in_among:
  148             fprintf(stderr, "two adjacent bracketed expressions in among(...)"); break;
  149         case e_substring_preceded_by_substring:
  150             fprintf(stderr, "substring preceded by another substring on line %d", x); break;
  151 
  152         case e_redeclared:
  153             fprintf(stderr, " re-declared"); break;
  154         case e_undeclared:
  155             fprintf(stderr, " undeclared"); break;
  156         case e_declared_as_different_mode:
  157             fprintf(stderr, " declared as %s mode; used as %s mode",
  158                             name_of_mode(a->mode), name_of_mode(x)); break;
  159         case e_not_of_type_x:
  160             fprintf(stderr, " not of type %s", name_of_type(x)); break;
  161         case e_not_of_type_string_or_integer:
  162             fprintf(stderr, " not of type string or integer"); break;
  163         case e_misplaced:
  164             fprintf(stderr, " misplaced"); break;
  165         case e_redefined:
  166             fprintf(stderr, " redefined"); break;
  167         case e_misused:
  168             fprintf(stderr, " mis-used as %s mode",
  169                             name_of_mode(x)); break;
  170     }
  171     if ((int)n < (int)e_unresolved_substring && t->previous_token > 0)
  172         fprintf(stderr, " after %s", name_of_token(t->previous_token));
  173     fprintf(stderr, "\n");
  174 }
  175 
  176 static void error(struct analyser * a, error_code n) { error2(a, n, 0); }
  177 
  178 static void error4(struct analyser * a, struct name * q) {
  179     count_error(a);
  180     fprintf(stderr, "%s:%d: ", a->tokeniser->file, q->used->line_number);
  181     report_b(stderr, q->b);
  182     fprintf(stderr, " undefined\n");
  183 }
  184 
  185 static void omission_error(struct analyser * a, int n) {
  186     a->tokeniser->omission = n;
  187     error(a, e_token_omitted);
  188 }
  189 
  190 static int check_token(struct analyser * a, int code) {
  191     struct tokeniser * t = a->tokeniser;
  192     if (t->token != code) { omission_error(a, code); return false; }
  193     return true;
  194 }
  195 
  196 static int get_token(struct analyser * a, int code) {
  197     struct tokeniser * t = a->tokeniser;
  198     read_token(t);
  199     {
  200         int x = check_token(a, code);
  201         if (!x) t->token_held = true;
  202         return x;
  203     }
  204 }
  205 
  206 static struct name * look_for_name(struct analyser * a) {
  207     symbol * q = a->tokeniser->b;
  208     struct name * p;
  209     for (p = a->names; p; p = p->next) {
  210         symbol * b = p->b;
  211         int n = SIZE(b);
  212         if (n == SIZE(q) && memcmp(q, b, n * sizeof(symbol)) == 0) {
  213             p->referenced = true;
  214             return p;
  215         }
  216     }
  217     return 0;
  218 }
  219 
  220 static struct name * find_name(struct analyser * a) {
  221     struct name * p = look_for_name(a);
  222     if (p == 0) error(a, e_undeclared);
  223     return p;
  224 }
  225 
  226 static void check_routine_mode(struct analyser * a, struct name * p, int mode) {
  227     if (p->mode < 0) p->mode = mode; else
  228     if (p->mode != mode) error2(a, e_misused, mode);
  229 }
  230 
  231 static void check_name_type(struct analyser * a, struct name * p, int type) {
  232     switch (type) {
  233         case 's':
  234             if (p->type == t_string) return;
  235             break;
  236         case 'i':
  237             if (p->type == t_integer) return;
  238             break;
  239         case 'b':
  240             if (p->type == t_boolean) return;
  241             break;
  242         case 'R':
  243             if (p->type == t_grouping) return;
  244             /* FALLTHRU */
  245         case 'r':
  246             if (p->type == t_routine || p->type == t_external) return;
  247             break;
  248         case 'g':
  249             if (p->type == t_grouping) return;
  250             break;
  251     }
  252     error2(a, e_not_of_type_x, type);
  253 }
  254 
  255 static void read_names(struct analyser * a, int type) {
  256     struct tokeniser * t = a->tokeniser;
  257     if (!get_token(a, c_bra)) return;
  258     while (true) {
  259         int token = read_token(t);
  260         switch (token) {
  261             case c_len: {
  262                 /* Context-sensitive token - once declared as a name, it loses
  263                  * its special meaning, for compatibility with older versions
  264                  * of snowball.
  265                  */
  266                 static const symbol c_len_lit[] = {
  267                     'l', 'e', 'n'
  268                 };
  269                 MOVE_TO_B(t->b, c_len_lit);
  270                 goto handle_as_name;
  271             }
  272             case c_lenof: {
  273                 /* Context-sensitive token - once declared as a name, it loses
  274                  * its special meaning, for compatibility with older versions
  275                  * of snowball.
  276                  */
  277                 static const symbol c_lenof_lit[] = {
  278                     'l', 'e', 'n', 'o', 'f'
  279                 };
  280                 MOVE_TO_B(t->b, c_lenof_lit);
  281                 goto handle_as_name;
  282             }
  283             case c_name:
  284 handle_as_name:
  285                 if (look_for_name(a) != 0) error(a, e_redeclared); else {
  286                     NEW(name, p);
  287                     p->b = copy_b(t->b);
  288                     p->type = type;
  289                     p->mode = -1; /* routines, externals */
  290                     /* We defer assigning counts until after we've eliminated
  291                      * variables whose values are never used. */
  292                     p->count = -1;
  293                     p->referenced = false;
  294                     p->used_in_among = false;
  295                     p->used = 0;
  296                     p->value_used = false;
  297                     p->initialised = false;
  298                     p->used_in_definition = false;
  299                     p->local_to = 0;
  300                     p->grouping = 0;
  301                     p->definition = 0;
  302                     p->declaration_line_number = t->line_number;
  303                     p->next = a->names;
  304                     a->names = p;
  305                     if (token != c_name) {
  306                         disable_token(t, token);
  307                     }
  308                 }
  309                 break;
  310             default:
  311                 if (!check_token(a, c_ket)) t->token_held = true;
  312                 return;
  313         }
  314     }
  315 }
  316 
  317 static symbol * new_literalstring(struct analyser * a) {
  318     NEW(literalstring, p);
  319     p->b = copy_b(a->tokeniser->b);
  320     p->next = a->literalstrings;
  321     a->literalstrings = p;
  322     return p->b;
  323 }
  324 
  325 static int read_AE_test(struct analyser * a) {
  326 
  327     struct tokeniser * t = a->tokeniser;
  328     switch (read_token(t)) {
  329         case c_assign: return c_mathassign;
  330         case c_plusassign:
  331         case c_minusassign:
  332         case c_multiplyassign:
  333         case c_divideassign:
  334         case c_eq:
  335         case c_ne:
  336         case c_gr:
  337         case c_ge:
  338         case c_ls:
  339         case c_le: return t->token;
  340         default: error(a, e_unexpected_token); t->token_held = true; return c_eq;
  341     }
  342 }
  343 
  344 static int binding(int t) {
  345     switch (t) {
  346         case c_plus: case c_minus: return 1;
  347         case c_multiply: case c_divide: return 2;
  348         default: return -2;
  349     }
  350 }
  351 
  352 static void mark_used_in(struct analyser * a, struct name * q, struct node * p) {
  353     if (!q->used) {
  354         q->used = p;
  355         q->local_to = a->program_end->name;
  356     } else if (q->local_to) {
  357         if (q->local_to != a->program_end->name) {
  358             /* Used in more than one routine/external. */
  359             q->local_to = NULL;
  360         }
  361     }
  362 }
  363 
  364 static void name_to_node(struct analyser * a, struct node * p, int type) {
  365     struct name * q = find_name(a);
  366     if (q) {
  367         check_name_type(a, q, type);
  368         mark_used_in(a, q, p);
  369     }
  370     p->name = q;
  371 }
  372 
  373 static struct node * read_AE(struct analyser * a, int B) {
  374     struct tokeniser * t = a->tokeniser;
  375     struct node * p;
  376     struct node * q;
  377     switch (read_token(t)) {
  378         case c_minus: /* monadic */
  379             q = read_AE(a, 100);
  380             if (q->type == c_neg) {
  381                 /* Optimise away double negation, which avoids generators
  382                  * having to worry about generating "--" (decrement operator
  383                  * in many languages).
  384                  */
  385                 p = q->right;
  386                 /* Don't free q, it's in the linked list a->nodes. */
  387                 break;
  388             }
  389             p = new_node(a, c_neg);
  390             p->right = q;
  391             break;
  392         case c_bra:
  393             p = read_AE(a, 0);
  394             get_token(a, c_ket);
  395             break;
  396         case c_name:
  397             p = new_node(a, c_name);
  398             name_to_node(a, p, 'i');
  399             if (p->name) p->name->value_used = true;
  400             break;
  401         case c_maxint:
  402         case c_minint:
  403             a->int_limits_used = true;
  404             /* fall through */
  405         case c_cursor:
  406         case c_limit:
  407         case c_len:
  408         case c_size:
  409             p = new_node(a, t->token);
  410             break;
  411         case c_number:
  412             p = new_node(a, c_number);
  413             p->number = t->number;
  414             break;
  415         case c_lenof:
  416         case c_sizeof:
  417             p = C_style(a, "s", t->token);
  418             break;
  419         default:
  420             error(a, e_unexpected_token);
  421             t->token_held = true;
  422             return 0;
  423     }
  424     while (true) {
  425         int token = read_token(t);
  426         int b = binding(token);
  427         if (binding(token) <= B) {
  428             t->token_held = true;
  429             return p;
  430         }
  431         q = new_node(a, token);
  432         q->left = p;
  433         q->right = read_AE(a, b);
  434         p = q;
  435     }
  436 }
  437 
  438 static struct node * read_C_connection(struct analyser * a, struct node * q, int op) {
  439     struct tokeniser * t = a->tokeniser;
  440     struct node * p = new_node(a, op);
  441     struct node * p_end = q;
  442     p->left = q;
  443     do {
  444         q = read_C(a);
  445         p_end->right = q; p_end = q;
  446     } while (read_token(t) == op);
  447     t->token_held = true;
  448     return p;
  449 }
  450 
  451 static struct node * read_C_list(struct analyser * a) {
  452     struct tokeniser * t = a->tokeniser;
  453     struct node * p = new_node(a, c_bra);
  454     struct node * p_end = 0;
  455     while (true) {
  456         int token = read_token(t);
  457         if (token == c_ket) return p;
  458         if (token < 0) { omission_error(a, c_ket); return p; }
  459         t->token_held = true;
  460         {
  461             struct node * q = read_C(a);
  462             while (true) {
  463                 token = read_token(t);
  464                 if (token != c_and && token != c_or) {
  465                     t->token_held = true;
  466                     break;
  467                 }
  468                 q = read_C_connection(a, q, token);
  469             }
  470             if (p_end == 0) p->left = q; else p_end->right = q;
  471             p_end = q;
  472         }
  473     }
  474 }
  475 
  476 static struct node * C_style(struct analyser * a, const char * s, int token) {
  477     int i;
  478     struct node * p = new_node(a, token);
  479     for (i = 0; s[i] != 0; i++) switch (s[i]) {
  480         case 'C':
  481             p->left = read_C(a); continue;
  482         case 'D':
  483             p->aux = read_C(a); continue;
  484         case 'A':
  485             p->AE = read_AE(a, 0); continue;
  486         case 'f':
  487             get_token(a, c_for); continue;
  488         case 'S':
  489             {
  490                 int str_token = read_token(a->tokeniser);
  491                 if (str_token == c_name) name_to_node(a, p, 's'); else
  492                 if (str_token == c_literalstring) p->literalstring = new_literalstring(a);
  493                 else error(a, e_string_omitted);
  494             }
  495             continue;
  496         case 'b':
  497         case 's':
  498         case 'i':
  499             if (get_token(a, c_name)) name_to_node(a, p, s[i]);
  500             continue;
  501     }
  502     return p;
  503 }
  504 
  505 static struct node * read_literalstring(struct analyser * a) {
  506     struct node * p = new_node(a, c_literalstring);
  507     p->literalstring = new_literalstring(a);
  508     return p;
  509 }
  510 
  511 static void reverse_b(symbol * b) {
  512     int i = 0; int j = SIZE(b) - 1;
  513     while (i < j) {
  514         int ch1 = b[i]; int ch2 = b[j];
  515         b[i++] = ch2; b[j--] = ch1;
  516     }
  517 }
  518 
  519 static int compare_amongvec(const void *pv, const void *qv) {
  520     const struct amongvec * p = (const struct amongvec*)pv;
  521     const struct amongvec * q = (const struct amongvec*)qv;
  522     symbol * b_p = p->b; int p_size = p->size;
  523     symbol * b_q = q->b; int q_size = q->size;
  524     int smaller_size = p_size < q_size ? p_size : q_size;
  525     int i;
  526     for (i = 0; i < smaller_size; i++)
  527         if (b_p[i] != b_q[i]) return b_p[i] - b_q[i];
  528     if (p_size - q_size)
  529         return p_size - q_size;
  530     return p->line_number - q->line_number;
  531 }
  532 
  533 #define PTR_NULL_CHECK(P, Q) do {\
  534         if ((Q) == NULL) {\
  535             if ((P) != NULL) return 1;\
  536         } else {\
  537             if ((P) == NULL) return -1;\
  538         }\
  539     } while (0)
  540 
  541 static int compare_node(const struct node *p, const struct node *q) {
  542     PTR_NULL_CHECK(p, q);
  543     if (q == NULL) {
  544         /* p must be NULL too. */
  545         return 0;
  546     }
  547 
  548     if (p->type != q->type) return p->type > q->type ? 1 : -1;
  549     if (p->mode != q->mode) return p->mode > q->mode ? 1 : -1;
  550     if (p->type == c_number) {
  551         if (p->number != q->number)
  552             return p->number > q->number ? 1 : -1;
  553     }
  554 
  555     PTR_NULL_CHECK(p->left, q->left);
  556     if (p->left) {
  557         int r = compare_node(p->left, q->left);
  558         if (r != 0) return r;
  559     }
  560 
  561     PTR_NULL_CHECK(p->AE, q->AE);
  562     if (p->AE) {
  563         int r = compare_node(p->AE, q->AE);
  564         if (r != 0) return r;
  565     }
  566 
  567     PTR_NULL_CHECK(p->aux, q->aux);
  568     if (p->aux) {
  569         int r = compare_node(p->aux, q->aux);
  570         if (r != 0) return r;
  571     }
  572 
  573     PTR_NULL_CHECK(p->name, q->name);
  574     if (p->name) {
  575         int r;
  576         if (SIZE(p->name->b) != SIZE(q->name->b)) {
  577             return SIZE(p->name->b) - SIZE(q->name->b);
  578         }
  579         r = memcmp(p->name->b, q->name->b,
  580                    SIZE(p->name->b) * sizeof(symbol));
  581         if (r != 0) return r;
  582     }
  583 
  584     PTR_NULL_CHECK(p->literalstring, q->literalstring);
  585     if (p->literalstring) {
  586         int r;
  587         if (SIZE(p->literalstring) != SIZE(q->literalstring)) {
  588             return SIZE(p->literalstring) - SIZE(q->literalstring);
  589         }
  590         r = memcmp(p->literalstring, q->literalstring,
  591                    SIZE(p->literalstring) * sizeof(symbol));
  592         if (r != 0) return r;
  593     }
  594 
  595     return compare_node(p->right, q->right);
  596 }
  597 
  598 static void make_among(struct analyser * a, struct node * p, struct node * substring) {
  599 
  600     NEW(among, x);
  601     NEWVEC(amongvec, v, p->number);
  602     struct node * q = p->left;
  603     struct amongvec * w0 = v;
  604     struct amongvec * w1 = v;
  605     int result = 1;
  606 
  607     int direction = substring != 0 ? substring->mode : p->mode;
  608     int backward = direction == m_backward;
  609 
  610     if (a->amongs == 0) a->amongs = x; else a->amongs_end->next = x;
  611     a->amongs_end = x;
  612     x->next = 0;
  613     x->b = v;
  614     x->number = a->among_count++;
  615     x->function_count = 0;
  616     x->starter = 0;
  617     x->nocommand_count = 0;
  618     x->amongvar_needed = false;
  619 
  620     if (q->type == c_bra) { x->starter = q; q = q->right; }
  621 
  622     while (q) {
  623         if (q->type == c_literalstring) {
  624             symbol * b = q->literalstring;
  625             w1->b = b;           /* pointer to case string */
  626             w1->action = NULL;   /* action gets filled in below */
  627             w1->line_number = q->line_number;
  628             w1->size = SIZE(b);  /* number of characters in string */
  629             w1->i = -1;          /* index of longest substring */
  630             w1->result = -1;     /* number of corresponding case expression */
  631             if (q->left) {
  632                 struct name * function = q->left->name;
  633                 w1->function = function;
  634                 function->used_in_among = true;
  635                 check_routine_mode(a, function, direction);
  636                 x->function_count++;
  637             } else {
  638                 w1->function = 0;
  639             }
  640             w1++;
  641         } else if (q->left == 0) {
  642             /* empty command: () */
  643             w0 = w1;
  644         } else {
  645             /* Check for previous action which is the same as this one and use
  646              * the same action code if we find one.
  647              */
  648             int among_result = -1;
  649             struct amongvec * w;
  650             for (w = v; w < w0; ++w) {
  651                 if (w->action && compare_node(w->action->left, q->left) == 0) {
  652                     if (w->result <= 0) {
  653                         printf("Among code %d isn't positive\n", w->result);
  654                         exit(1);
  655                     }
  656                     among_result = w->result;
  657                     break;
  658                 }
  659             }
  660             if (among_result < 0) {
  661                 among_result = result++;
  662             }
  663 
  664             while (w0 != w1) {
  665                 w0->action = q;
  666                 w0->result = among_result;
  667                 w0++;
  668             }
  669         }
  670         q = q->right;
  671     }
  672     if (w1-v != p->number) { fprintf(stderr, "oh! %d %d\n", (int)(w1-v), p->number); exit(1); }
  673     x->command_count = result - 1;
  674     {
  675         NEWVEC(node*, commands, x->command_count);
  676         memset(commands, 0, x->command_count * sizeof(struct node*));
  677         for (w0 = v; w0 < w1; w0++) {
  678             if (w0->result > 0) {
  679                 /* result == -1 when there's no command. */
  680                 if (w0->result > x->command_count) {
  681                     fprintf(stderr, "More among codes than expected\n");
  682                     exit(1);
  683                 }
  684                 if (!commands[w0->result - 1])
  685                     commands[w0->result - 1] = w0->action;
  686             } else {
  687                 ++x->nocommand_count;
  688             }
  689             if (backward) reverse_b(w0->b);
  690         }
  691         x->commands = commands;
  692     }
  693     qsort(v, w1 - v, sizeof(struct amongvec), compare_amongvec);
  694 
  695     /* the following loop is O(n squared) */
  696     for (w0 = w1 - 1; w0 >= v; w0--) {
  697         symbol * b = w0->b;
  698         int size = w0->size;
  699         struct amongvec * w;
  700 
  701         for (w = w0 - 1; w >= v; w--) {
  702             if (w->size < size && memcmp(w->b, b, w->size * sizeof(symbol)) == 0) {
  703                 w0->i = w - v;  /* fill in index of longest substring */
  704                 break;
  705             }
  706         }
  707     }
  708     if (backward) for (w0 = v; w0 < w1; w0++) reverse_b(w0->b);
  709 
  710     for (w0 = v; w0 < w1 - 1; w0++)
  711         if (w0->size == (w0 + 1)->size &&
  712             memcmp(w0->b, (w0 + 1)->b, w0->size * sizeof(symbol)) == 0) {
  713             count_error(a);
  714             fprintf(stderr, "%s:%d: among(...) has repeated string '",
  715                     a->tokeniser->file, (w0 + 1)->line_number);
  716             report_b(stderr, (w0 + 1)->b);
  717             fprintf(stderr, "'\n");
  718             count_error(a);
  719             fprintf(stderr, "%s:%d: previously seen here\n",
  720                     a->tokeniser->file, w0->line_number);
  721         }
  722 
  723     x->literalstring_count = p->number;
  724     p->among = x;
  725 
  726     x->substring = substring;
  727     if (substring != 0) substring->among = x;
  728     if (x->command_count > 1 ||
  729         (x->command_count == 1 && x->nocommand_count > 0) ||
  730         x->starter != 0) {
  731         /* We need to set among_var rather than just checking if find_among*()
  732          * returns zero or not.
  733          */
  734         x->amongvar_needed = a->amongvar_needed = true;
  735     }
  736 }
  737 
  738 static struct node * read_among(struct analyser * a) {
  739     struct tokeniser * t = a->tokeniser;
  740     struct node * p = new_node(a, c_among);
  741     struct node * p_end = 0;
  742     int previous_token = -1;
  743     struct node * substring = a->substring;
  744 
  745     a->substring = 0;
  746     p->number = 0; /* counts the number of literals */
  747     if (!get_token(a, c_bra)) return p;
  748     while (true) {
  749         struct node * q;
  750         int token = read_token(t);
  751         switch (token) {
  752             case c_literalstring:
  753                 q = read_literalstring(a);
  754                 if (read_token(t) == c_name) {
  755                     struct node * r = new_node(a, c_name);
  756                     name_to_node(a, r, 'r');
  757                     q->left = r;
  758                 }
  759                 else t->token_held = true;
  760                 p->number++; break;
  761             case c_bra:
  762                 if (previous_token == c_bra) error(a, e_adjacent_bracketed_in_among);
  763                 q = read_C_list(a); break;
  764             default:
  765                 error(a, e_unexpected_token_in_among);
  766                 previous_token = token;
  767                 continue;
  768             case c_ket:
  769                 if (p->number == 0) error(a, e_empty_among);
  770                 if (t->error_count == 0) make_among(a, p, substring);
  771                 return p;
  772         }
  773         previous_token = token;
  774         if (p_end == 0) p->left = q; else p_end->right = q;
  775         p_end = q;
  776     }
  777 }
  778 
  779 static struct node * read_substring(struct analyser * a) {
  780 
  781     struct node * p = new_node(a, c_substring);
  782     if (a->substring != 0) error2(a, e_substring_preceded_by_substring, a->substring->line_number);
  783     a->substring = p;
  784     return p;
  785 }
  786 
  787 static void check_modifyable(struct analyser * a) {
  788     if (!a->modifyable) error(a, e_not_allowed_inside_reverse);
  789 }
  790 
  791 static struct node * read_C(struct analyser * a) {
  792     struct tokeniser * t = a->tokeniser;
  793     int token = read_token(t);
  794     switch (token) {
  795         case c_bra:
  796             return read_C_list(a);
  797         case c_backwards:
  798             {
  799                 int mode = a->mode;
  800                 if (a->mode == m_backward) error(a, e_already_backwards); else a->mode = m_backward;
  801                 {   struct node * p = C_style(a, "C", token);
  802                     a->mode = mode;
  803                     return p;
  804                 }
  805             }
  806         case c_reverse:
  807             {
  808                 int mode = a->mode;
  809                 int modifyable = a->modifyable;
  810                 a->modifyable = false;
  811                 a->mode = mode == m_forward ? m_backward : m_forward;
  812                 {
  813                     struct node * p = C_style(a, "C", token);
  814                     a->mode = mode;
  815                     a->modifyable = modifyable;
  816                     return p;
  817                 }
  818             }
  819         case c_not:
  820         case c_try:
  821         case c_fail:
  822         case c_test:
  823         case c_do:
  824         case c_goto:
  825         case c_gopast:
  826         case c_repeat:
  827             return C_style(a, "C", token);
  828         case c_loop:
  829         case c_atleast:
  830             return C_style(a, "AC", token);
  831         case c_setmark: {
  832             struct node * n = C_style(a, "i", token);
  833             if (n->name) n->name->initialised = true;
  834             return n;
  835         }
  836         case c_tomark:
  837         case c_atmark:
  838         case c_hop:
  839             return C_style(a, "A", token);
  840         case c_delete:
  841             check_modifyable(a);
  842             /* fall through */
  843         case c_next:
  844         case c_tolimit:
  845         case c_atlimit:
  846         case c_leftslice:
  847         case c_rightslice:
  848         case c_true:
  849         case c_false:
  850         case c_debug:
  851             return new_node(a, token);
  852         case c_assignto:
  853         case c_sliceto: {
  854             struct node *n;
  855             check_modifyable(a);
  856             n = C_style(a, "s", token);
  857             if (n->name) n->name->initialised = true;
  858             return n;
  859         }
  860         case c_assign:
  861         case c_insert:
  862         case c_attach:
  863         case c_slicefrom: {
  864             struct node *n;
  865             check_modifyable(a);
  866             n = C_style(a, "S", token);
  867             if (n->name) n->name->value_used = true;
  868             return n;
  869         }
  870         case c_setlimit:
  871             return C_style(a, "CfD", token);
  872         case c_set:
  873         case c_unset: {
  874             struct node * n = C_style(a, "b", token);
  875             if (n->name) n->name->initialised = true;
  876             return n;
  877         }
  878         case c_dollar: {
  879             struct tokeniser * t = a->tokeniser;
  880             read_token(t);
  881             if (t->token == c_bra) {
  882                 /* Handle newer $(AE REL_OP AE) syntax. */
  883                 struct node * n = read_AE(a, 0);
  884                 read_token(t);
  885                 switch (t->token) {
  886                     case c_eq:
  887                     case c_ne:
  888                     case c_gr:
  889                     case c_ge:
  890                     case c_ls:
  891                     case c_le: {
  892                         struct node * lhs = n;
  893                         n = new_node(a, t->token);
  894                         n->left = lhs;
  895                         n->AE = read_AE(a, 0);
  896                         get_token(a, c_ket);
  897                         break;
  898                     }
  899                     default:
  900                         error(a, e_unexpected_token);
  901                         t->token_held = true;
  902                         break;
  903                 }
  904                 return n;
  905             }
  906 
  907             if (t->token == c_name) {
  908                 struct node * p;
  909                 struct name * q = find_name(a);
  910                 int mode = a->mode;
  911                 int modifyable = a->modifyable;
  912                 if (q && q->type == t_string) {
  913                     /* Assume for now that $ on string both initialises and
  914                      * uses the string variable.  FIXME: Can we do better?
  915                      */
  916                     q->initialised = true;
  917                     q->value_used = true;
  918                     a->mode = m_forward;
  919                     a->modifyable = true;
  920                     p = new_node(a, c_dollar);
  921                     p->left = read_C(a);
  922                     p->name = q;
  923                 } else {
  924                     if (q && q->type != t_integer) {
  925                         /* If $ is used on an unknown name or a name which
  926                          * isn't a string or an integer then we assume the
  927                          * unknown name is an integer as $ is used more often
  928                          * on integers than strings, so hopefully this it less
  929                          * likely to cause an error avalanche.
  930                          *
  931                          * For an unknown name, we'll already have reported an
  932                          * error.
  933                          */
  934                         error(a, e_not_of_type_string_or_integer);
  935                         q = NULL;
  936                     }
  937                     p = new_node(a, read_AE_test(a));
  938                     p->AE = read_AE(a, 0);
  939 
  940                     if (q) {
  941                         switch (p->type) {
  942                             case c_mathassign:
  943                                 q->initialised = true;
  944                                 p->name = q;
  945                                 break;
  946                             default:
  947                                 /* +=, etc don't "initialise" as they only
  948                                  * amend an existing value.  Similarly, they
  949                                  * don't count as using the value.
  950                                  */
  951                                 p->name = q;
  952                                 break;
  953                             case c_eq:
  954                             case c_ne:
  955                             case c_gr:
  956                             case c_ge:
  957                             case c_ls:
  958                             case c_le:
  959                                 p->left = new_node(a, c_name);
  960                                 p->left->name = q;
  961                                 q->value_used = true;
  962                                 break;
  963                         }
  964                     }
  965                 }
  966                 if (q) mark_used_in(a, q, p);
  967                 a->mode = mode;
  968                 a->modifyable = modifyable;
  969                 return p;
  970             }
  971 
  972             error(a, e_unexpected_token);
  973             t->token_held = true;
  974             return new_node(a, c_dollar);
  975         }
  976         case c_name:
  977             {
  978                 struct name * q = find_name(a);
  979                 struct node * p = new_node(a, c_name);
  980                 if (q) {
  981                     mark_used_in(a, q, p);
  982                     switch (q->type) {
  983                         case t_boolean:
  984                             p->type = c_booltest;
  985                             q->value_used = true;
  986                             break;
  987                         case t_integer:
  988                             error(a, e_misplaced); /* integer name misplaced */
  989                             break;
  990                         case t_string:
  991                             q->value_used = true;
  992                             break;
  993                         case t_routine:
  994                         case t_external:
  995                             p->type = c_call;
  996                             check_routine_mode(a, q, a->mode);
  997                             break;
  998                         case t_grouping:
  999                             p->type = c_grouping; break;
 1000                     }
 1001                 }
 1002                 p->name = q;
 1003                 return p;
 1004             }
 1005         case c_non:
 1006             {
 1007                 struct node * p = new_node(a, token);
 1008                 read_token(t);
 1009                 if (t->token == c_minus) read_token(t);
 1010                 if (!check_token(a, c_name)) { omission_error(a, c_name); return p; }
 1011                 name_to_node(a, p, 'g');
 1012                 return p;
 1013             }
 1014         case c_literalstring:
 1015             return read_literalstring(a);
 1016         case c_among: return read_among(a);
 1017         case c_substring: return read_substring(a);
 1018         default: error(a, e_unexpected_token); return 0;
 1019     }
 1020 }
 1021 
 1022 static int next_symbol(symbol * p, symbol * W, int utf8) {
 1023     if (utf8) {
 1024         int ch;
 1025         int j = get_utf8(p, & ch);
 1026         W[0] = ch; return j;
 1027     } else {
 1028         W[0] = p[0]; return 1;
 1029     }
 1030 }
 1031 
 1032 static symbol * alter_grouping(symbol * p, symbol * q, int style, int utf8) {
 1033     int j = 0;
 1034     symbol W[1];
 1035     int width;
 1036     if (style == c_plus) {
 1037         while (j < SIZE(q)) {
 1038             width = next_symbol(q + j, W, utf8);
 1039             p = add_to_b(p, 1, W);
 1040             j += width;
 1041         }
 1042     } else {
 1043         while (j < SIZE(q)) {
 1044             int i;
 1045             width = next_symbol(q + j, W, utf8);
 1046             for (i = 0; i < SIZE(p); i++) {
 1047                 if (p[i] == W[0]) {
 1048                     memmove(p + i, p + i + 1, (SIZE(p) - i - 1) * sizeof(symbol));
 1049                     SIZE(p)--;
 1050                 }
 1051             }
 1052             j += width;
 1053         }
 1054     }
 1055     return p;
 1056 }
 1057 
 1058 static void read_define_grouping(struct analyser * a, struct name * q) {
 1059     struct tokeniser * t = a->tokeniser;
 1060     int style = c_plus;
 1061     {
 1062         NEW(grouping, p);
 1063         if (a->groupings == 0) a->groupings = p; else a->groupings_end->next = p;
 1064         a->groupings_end = p;
 1065         if (q) q->grouping = p;
 1066         p->next = 0;
 1067         p->name = q;
 1068         p->line_number = a->tokeniser->line_number;
 1069         p->b = create_b(0);
 1070         while (true) {
 1071             switch (read_token(t)) {
 1072                 case c_name:
 1073                     {
 1074                         struct name * r = find_name(a);
 1075                         if (r) {
 1076                             check_name_type(a, r, 'g');
 1077                             p->b = alter_grouping(p->b, r->grouping->b, style, false);
 1078                             r->used_in_definition = true;
 1079                         }
 1080                     }
 1081                     break;
 1082                 case c_literalstring:
 1083                     p->b = alter_grouping(p->b, t->b, style, (a->encoding == ENC_UTF8));
 1084                     break;
 1085                 default: error(a, e_unexpected_token); return;
 1086             }
 1087             switch (read_token(t)) {
 1088                 case c_plus:
 1089                 case c_minus: style = t->token; break;
 1090                 default: goto label0;
 1091             }
 1092         }
 1093     label0:
 1094         {
 1095             int i;
 1096             int max = 0;
 1097             int min = 1<<16;
 1098             for (i = 0; i < SIZE(p->b); i++) {
 1099                 if (p->b[i] > max) max = p->b[i];
 1100                 if (p->b[i] < min) min = p->b[i];
 1101             }
 1102             p->largest_ch = max;
 1103             p->smallest_ch = min;
 1104             if (min == 1<<16) error(a, e_empty_grouping);
 1105         }
 1106         t->token_held = true; return;
 1107     }
 1108 }
 1109 
 1110 static void read_define_routine(struct analyser * a, struct name * q) {
 1111     struct node * p = new_node(a, c_define);
 1112     a->amongvar_needed = false;
 1113     if (q) {
 1114         check_name_type(a, q, 'R');
 1115         if (q->definition != 0) error(a, e_redefined);
 1116         if (q->mode < 0) q->mode = a->mode; else
 1117         if (q->mode != a->mode) error2(a, e_declared_as_different_mode, q->mode);
 1118     }
 1119     p->name = q;
 1120     if (a->program == 0) a->program = p; else a->program_end->right = p;
 1121     a->program_end = p;
 1122     get_token(a, c_as);
 1123     p->left = read_C(a);
 1124     if (q) q->definition = p->left;
 1125 
 1126     if (a->substring != 0) {
 1127         error2(a, e_unresolved_substring, a->substring->line_number);
 1128         a->substring = 0;
 1129     }
 1130     p->amongvar_needed = a->amongvar_needed;
 1131 }
 1132 
 1133 static void read_define(struct analyser * a) {
 1134     if (get_token(a, c_name)) {
 1135         struct name * q = find_name(a);
 1136         int type;
 1137         if (q) {
 1138             type = q->type;
 1139         } else {
 1140             /* No declaration, so sniff next token - if it is 'as' then parse
 1141              * as a routine, otherwise as a grouping.
 1142              */
 1143             if (read_token(a->tokeniser) == c_as) {
 1144                 type = t_routine;
 1145             } else {
 1146                 type = t_grouping;
 1147             }
 1148             a->tokeniser->token_held = true;
 1149         }
 1150 
 1151         if (type == t_grouping) {
 1152             read_define_grouping(a, q);
 1153         } else {
 1154             read_define_routine(a, q);
 1155         }
 1156     }
 1157 }
 1158 
 1159 static void read_backwardmode(struct analyser * a) {
 1160     int mode = a->mode;
 1161     a->mode = m_backward;
 1162     if (get_token(a, c_bra)) {
 1163         read_program_(a, c_ket);
 1164         check_token(a, c_ket);
 1165     }
 1166     a->mode = mode;
 1167 }
 1168 
 1169 static void read_program_(struct analyser * a, int terminator) {
 1170     struct tokeniser * t = a->tokeniser;
 1171     while (true) {
 1172         switch (read_token(t)) {
 1173             case c_strings:     read_names(a, t_string); break;
 1174             case c_booleans:    read_names(a, t_boolean); break;
 1175             case c_integers:    read_names(a, t_integer); break;
 1176             case c_routines:    read_names(a, t_routine); break;
 1177             case c_externals:   read_names(a, t_external); break;
 1178             case c_groupings:   read_names(a, t_grouping); break;
 1179             case c_define:      read_define(a); break;
 1180             case c_backwardmode:read_backwardmode(a); break;
 1181             case c_ket:
 1182                 if (terminator == c_ket) return;
 1183                 /* fall through */
 1184             default:
 1185                 error(a, e_unexpected_token); break;
 1186             case -1:
 1187                 if (terminator >= 0) omission_error(a, c_ket);
 1188                 return;
 1189         }
 1190     }
 1191 }
 1192 
 1193 static void remove_dead_assignments(struct node * p, struct name * q) {
 1194     if (p->name == q) {
 1195         switch (p->type) {
 1196             case c_assignto:
 1197             case c_sliceto:
 1198             case c_mathassign:
 1199             case c_plusassign:
 1200             case c_minusassign:
 1201             case c_multiplyassign:
 1202             case c_divideassign:
 1203             case c_setmark:
 1204             case c_set:
 1205             case c_unset:
 1206             case c_dollar:
 1207                 /* c_true is a no-op. */
 1208                 p->type = c_true;
 1209                 break;
 1210             default:
 1211                 /* There are no read accesses to this variable, so any
 1212                  * references must be assignments.
 1213                  */
 1214                 fprintf(stderr, "Unhandled type of dead assignment via %s\n",
 1215                         name_of_token(p->type));
 1216                 exit(1);
 1217         }
 1218     }
 1219     if (p->AE) remove_dead_assignments(p->AE, q);
 1220     if (p->left) remove_dead_assignments(p->left, q);
 1221     if (p->aux) remove_dead_assignments(p->aux, q);
 1222     if (p->right) remove_dead_assignments(p->right, q);
 1223 }
 1224 
 1225 extern void read_program(struct analyser * a) {
 1226     read_program_(a, -1);
 1227     {
 1228         struct name * q = a->names;
 1229         while (q) {
 1230             switch (q->type) {
 1231                 case t_external: case t_routine:
 1232                     if (q->used && q->definition == 0) error4(a, q);
 1233                     break;
 1234                 case t_grouping:
 1235                     if (q->used && q->grouping == 0) error4(a, q);
 1236                     break;
 1237             }
 1238             q = q->next;
 1239         }
 1240     }
 1241 
 1242     if (a->tokeniser->error_count == 0) {
 1243         struct name * q = a->names;
 1244         struct name ** ptr = &(a->names);
 1245         while (q) {
 1246             if (!q->referenced) {
 1247                 fprintf(stderr, "%s:%d: warning: %s '",
 1248                         a->tokeniser->file,
 1249                         q->declaration_line_number,
 1250                         name_of_name_type(q->type));
 1251                 report_b(stderr, q->b);
 1252                 if (q->type == t_routine ||
 1253                     q->type == t_external ||
 1254                     q->type == t_grouping) {
 1255                     fprintf(stderr, "' declared but not defined\n");
 1256                 } else {
 1257                     fprintf(stderr, "' defined but not used\n");
 1258                     q = q->next;
 1259                     *ptr = q;
 1260                     continue;
 1261                 }
 1262             } else if (q->type == t_routine || q->type == t_grouping) {
 1263                 /* It's OK to define a grouping but only use it to define other
 1264                  * groupings.
 1265                  */
 1266                 if (!q->used && !q->used_in_definition) {
 1267                     int line_num;
 1268                     if (q->type == t_routine) {
 1269                         line_num = q->definition->line_number;
 1270                     } else {
 1271                         line_num = q->grouping->line_number;
 1272                     }
 1273                     fprintf(stderr, "%s:%d: warning: %s '",
 1274                             a->tokeniser->file,
 1275                             line_num,
 1276                             name_of_name_type(q->type));
 1277                     report_b(stderr, q->b);
 1278                     fprintf(stderr, "' defined but not used\n");
 1279                 }
 1280             } else if (q->type == t_external) {
 1281                 /* Unused is OK. */
 1282             } else if (!q->initialised) {
 1283                 fprintf(stderr, "%s:%d: warning: %s '",
 1284                         a->tokeniser->file,
 1285                         q->declaration_line_number,
 1286                         name_of_name_type(q->type));
 1287                 report_b(stderr, q->b);
 1288                 fprintf(stderr, "' is never initialised\n");
 1289             } else if (!q->value_used) {
 1290                 fprintf(stderr, "%s:%d: warning: %s '",
 1291                         a->tokeniser->file,
 1292                         q->declaration_line_number,
 1293                         name_of_name_type(q->type));
 1294                 report_b(stderr, q->b);
 1295                 fprintf(stderr, "' is set but never used\n");
 1296                 remove_dead_assignments(a->program, q);
 1297                 q = q->next;
 1298                 *ptr = q;
 1299                 continue;
 1300             }
 1301             ptr = &(q->next);
 1302             q = q->next;
 1303         }
 1304 
 1305         {
 1306             /* Now we've eliminated variables whose values are never used we
 1307              * can number the variables, which is used by some generators.
 1308              */
 1309             int * name_count = a->name_count;
 1310             struct name * n;
 1311             for (n = a->names; n; n = n->next) {
 1312                 n->count = name_count[n->type]++;
 1313             }
 1314         }
 1315     }
 1316 }
 1317 
 1318 extern struct analyser * create_analyser(struct tokeniser * t) {
 1319     NEW(analyser, a);
 1320     a->tokeniser = t;
 1321     a->nodes = 0;
 1322     a->names = 0;
 1323     a->literalstrings = 0;
 1324     a->program = 0;
 1325     a->amongs = 0;
 1326     a->among_count = 0;
 1327     a->groupings = 0;
 1328     a->mode = m_forward;
 1329     a->modifyable = true;
 1330     { int i; for (i = 0; i < t_size; i++) a->name_count[i] = 0; }
 1331     a->substring = 0;
 1332     a->int_limits_used = false;
 1333     return a;
 1334 }
 1335 
 1336 extern void close_analyser(struct analyser * a) {
 1337     {
 1338         struct node * q = a->nodes;
 1339         while (q) {
 1340             struct node * q_next = q->next;
 1341             FREE(q);
 1342             q = q_next;
 1343         }
 1344     }
 1345     {
 1346         struct name * q = a->names;
 1347         while (q) {
 1348             struct name * q_next = q->next;
 1349             lose_b(q->b); FREE(q);
 1350             q = q_next;
 1351         }
 1352     }
 1353     {
 1354         struct literalstring * q = a->literalstrings;
 1355         while (q) {
 1356             struct literalstring * q_next = q->next;
 1357             lose_b(q->b); FREE(q);
 1358             q = q_next;
 1359         }
 1360     }
 1361     {
 1362         struct among * q = a->amongs;
 1363         while (q) {
 1364             struct among * q_next = q->next;
 1365             FREE(q->b);
 1366             FREE(q->commands);
 1367             FREE(q);
 1368             q = q_next;
 1369         }
 1370     }
 1371     {
 1372         struct grouping * q = a->groupings;
 1373         while (q) {
 1374             struct grouping * q_next = q->next;
 1375             lose_b(q->b); FREE(q);
 1376             q = q_next;
 1377         }
 1378     }
 1379     FREE(a);
 1380 }