"Fossies" - the Fresh Open Source Software Archive

Member "gawk-5.1.0/cint_array.c" (6 Feb 2020, 30851 Bytes) of package /linux/misc/gawk-5.1.0.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "cint_array.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 5.0.1_vs_5.1.0.

    1 /*
    2  * cint_array.c - routines for arrays of (mostly) consecutive positive integer indices.
    3  */
    4 
    5 /*
    6  * Copyright (C) 1986, 1988, 1989, 1991-2013, 2016, 2017, 2019, 2020,
    7  * the Free Software Foundation, Inc.
    8  *
    9  * This file is part of GAWK, the GNU implementation of the
   10  * AWK Programming Language.
   11  *
   12  * GAWK is free software; you can redistribute it and/or modify
   13  * it under the terms of the GNU General Public License as published by
   14  * the Free Software Foundation; either version 3 of the License, or
   15  * (at your option) any later version.
   16  *
   17  * GAWK is distributed in the hope that it will be useful,
   18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   20  * GNU General Public License for more details.
   21  *
   22  * You should have received a copy of the GNU General Public License
   23  * along with this program; if not, write to the Free Software
   24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
   25  */
   26 
   27 #include "awk.h"
   28 
   29 #define INT32_BIT 32
   30 
   31 extern FILE *output_fp;
   32 extern void indent(int indent_level);
   33 extern NODE **is_integer(NODE *symbol, NODE *subs);
   34 
   35 /*
   36  * NHAT         ---  maximum size of a leaf array (2^NHAT).
   37  * THRESHOLD    ---  Maximum capacity waste; THRESHOLD >= 2^(NHAT + 1).
   38  */
   39 
   40 static int NHAT = 10;
   41 static long THRESHOLD;
   42 
   43 /*
   44  * What is the optimium NHAT ? timing results suggest that 10 is a good choice,
   45  * although differences aren't that significant for > 10.
   46  */
   47 
   48 
   49 static NODE **cint_array_init(NODE *symbol, NODE *subs);
   50 static NODE **is_uinteger(NODE *symbol, NODE *subs);
   51 static NODE **cint_lookup(NODE *symbol, NODE *subs);
   52 static NODE **cint_exists(NODE *symbol, NODE *subs);
   53 static NODE **cint_clear(NODE *symbol, NODE *subs);
   54 static NODE **cint_remove(NODE *symbol, NODE *subs);
   55 static NODE **cint_list(NODE *symbol, NODE *t);
   56 static NODE **cint_copy(NODE *symbol, NODE *newsymb);
   57 static NODE **cint_dump(NODE *symbol, NODE *ndump);
   58 #ifdef ARRAYDEBUG
   59 static void cint_print(NODE *symbol);
   60 #endif
   61 
   62 const array_funcs_t cint_array_func = {
   63     "cint",
   64     cint_array_init,
   65     is_uinteger,
   66     cint_lookup,
   67     cint_exists,
   68     cint_clear,
   69     cint_remove,
   70     cint_list,
   71     cint_copy,
   72     cint_dump,
   73     (afunc_t) 0,
   74 };
   75 
   76 
   77 static NODE **argv_store(NODE *symbol, NODE *subs);
   78 
   79 /* special case for ARGV in sandbox mode */
   80 const array_funcs_t argv_array_func = {
   81     "argv",
   82     cint_array_init,
   83     is_uinteger,
   84     cint_lookup,
   85     cint_exists,
   86     cint_clear,
   87     cint_remove,
   88     cint_list,
   89     cint_copy,
   90     cint_dump,
   91     argv_store,
   92 };
   93 
   94 static inline int cint_hash(long k);
   95 static inline NODE **cint_find(NODE *symbol, long k, int h1);
   96 
   97 static inline NODE *make_node(NODETYPE type);
   98 
   99 static NODE **tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base);
  100 static NODE **tree_exists(NODE *tree, long k);
  101 static void tree_clear(NODE *tree);
  102 static int tree_remove(NODE *symbol, NODE *tree, long k);
  103 static void tree_copy(NODE *newsymb, NODE *tree, NODE *newtree);
  104 static long tree_list(NODE *tree, NODE **list, assoc_kind_t assoc_kind);
  105 static inline NODE **tree_find(NODE *tree, long k, int i);
  106 static void tree_info(NODE *tree, NODE *ndump, const char *aname);
  107 static size_t tree_kilobytes(NODE *tree);
  108 #ifdef ARRAYDEBUG
  109 static void tree_print(NODE *tree, size_t bi, int indent_level);
  110 #endif
  111 
  112 static inline NODE **leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base);
  113 static inline NODE **leaf_exists(NODE *array, long k);
  114 static void leaf_clear(NODE *array);
  115 static int leaf_remove(NODE *symbol, NODE *array, long k);
  116 static void leaf_copy(NODE *newsymb, NODE *array, NODE *newarray);
  117 static long leaf_list(NODE *array, NODE **list, assoc_kind_t assoc_kind);
  118 static void leaf_info(NODE *array, NODE *ndump, const char *aname);
  119 #ifdef ARRAYDEBUG
  120 static void leaf_print(NODE *array, size_t bi, int indent_level);
  121 #endif
  122 
  123 /* powers of 2 table upto 2^30 */
  124 static const long power_two_table[] = {
  125     1, 2, 4, 8, 16, 32, 64,
  126     128, 256, 512, 1024, 2048, 4096,
  127     8192, 16384, 32768, 65536, 131072, 262144,
  128     524288, 1048576, 2097152, 4194304, 8388608, 16777216,
  129     33554432, 67108864, 134217728, 268435456, 536870912, 1073741824
  130 };
  131 
  132 
  133 #define ISUINT(a, s)    ((((s)->flags & NUMINT) != 0 || is_integer(a, s) != NULL) \
  134                                     && (s)->numbr >= 0)
  135 
  136 /*
  137  * To store 2^n integers, allocate top-level array of size n, elements
  138  * of which are 1-Dimensional (leaf-array) of geometrically increasing
  139  * size (power of 2).
  140  *
  141  *  [0]   -->  [ 0 ]
  142  *  [1]   -->  [ 1 ]
  143  *  |2|   -->  [ 2 | 3 ]
  144  *  |3|   -->  [ 4 | 5 | 6 | 7 ]
  145  *  |.|
  146  *  |k|   -->  [ 2^(k - 1)| ...  | 2^k - 1 ]
  147  *  ...
  148  *
  149  * For a given integer n (> 0), the leaf-array is at 1 + floor(log2(n)).
  150  *
  151  * The idea for the geometrically increasing array sizes is from:
  152  *  Fast Functional Lists, Hash-Lists, Deques and Variable Length Arrays.
  153  *  Bagwell, Phil (2002).
  154  *  http://infoscience.epfl.ch/record/64410/files/techlists.pdf
  155  *
  156  * Disadvantage:
  157  * Worst case memory waste > 99% and will happen when each of the
  158  * leaf arrays contains only a single element. Even with consecutive
  159  * integers, memory waste can be as high as 50%.
  160  *
  161  * Solution: Hashed Array Trees (HATs).
  162  *
  163  */
  164 
  165 /* cint_array_init ---  array initialization routine */
  166 
  167 static NODE **
  168 cint_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
  169 {
  170     if (symbol == NULL) {
  171         long newval;
  172         size_t nelems = (sizeof(power_two_table) / sizeof(power_two_table[0]));
  173 
  174         /* check relevant environment variables */
  175         if ((newval = getenv_long("NHAT")) > 1 && newval < INT32_BIT)
  176             NHAT = newval;
  177         /* don't allow overflow off the end of the table */
  178         if (NHAT > nelems - 2)
  179             NHAT = nelems - 2;
  180         THRESHOLD = power_two_table[NHAT + 1];
  181     } else
  182         null_array(symbol);
  183 
  184     return & success_node;
  185 }
  186 
  187 
  188 /* is_uinteger --- test if the subscript is an integer >= 0 */
  189 
  190 NODE **
  191 is_uinteger(NODE *symbol, NODE *subs)
  192 {
  193     if (is_integer(symbol, subs) != NULL && subs->numbr >= 0)
  194         return & success_node;
  195     return NULL;
  196 }
  197 
  198 
  199 /* cint_lookup --- Find the subscript in the array; Install it if it isn't there. */
  200 
  201 static NODE **
  202 cint_lookup(NODE *symbol, NODE *subs)
  203 {
  204     NODE **lhs;
  205     long k;
  206     int h1 = -1, m, li;
  207     NODE *tn, *xn;
  208     long cint_size, capacity;
  209 
  210     k = -1;
  211     if (ISUINT(symbol, subs)) {
  212         k = subs->numbr;    /* k >= 0 */
  213         h1 = cint_hash(k);  /* h1 >= NHAT */
  214         if ((lhs = cint_find(symbol, k, h1)) != NULL)
  215             return lhs;
  216     }
  217     xn = symbol->xarray;
  218     if (xn != NULL && (lhs = xn->aexists(xn, subs)) != NULL)
  219         return lhs;
  220 
  221     /* It's not there, install it */
  222 
  223     if (k < 0)
  224         goto xinstall;
  225 
  226     m = h1 - 1; /* m >= (NHAT- 1) */
  227 
  228     /* Estimate capacity upper bound.
  229      * capacity upper bound = current capacity + leaf array size.
  230      */
  231     li = m > NHAT ? m : NHAT;
  232     while (li >= NHAT) {
  233         /* leaf-array of a HAT */
  234         li = (li + 1) / 2;
  235     }
  236     capacity = symbol->array_capacity + power_two_table[li];
  237 
  238     cint_size = (xn == NULL) ? symbol->table_size
  239                 : (symbol->table_size - xn->table_size);
  240     assert(cint_size >= 0);
  241     if ((capacity - cint_size) > THRESHOLD)
  242         goto xinstall;
  243 
  244     if (symbol->nodes == NULL) {
  245         symbol->array_capacity = 0;
  246         assert(symbol->table_size == 0);
  247 
  248         /* nodes[0] .. nodes[NHAT- 1] not used */
  249         ezalloc(symbol->nodes, NODE **, INT32_BIT * sizeof(NODE *), "cint_lookup");
  250     }
  251 
  252     symbol->table_size++;   /* one more element in array */
  253 
  254     tn = symbol->nodes[h1];
  255     if (tn == NULL) {
  256         tn = make_node(Node_array_tree);
  257         symbol->nodes[h1] = tn;
  258     }
  259 
  260     if (m < NHAT)
  261         return tree_lookup(symbol, tn, k, NHAT, 0);
  262     return tree_lookup(symbol, tn, k, m, power_two_table[m]);
  263 
  264 xinstall:
  265 
  266     symbol->table_size++;
  267     if (xn == NULL) {
  268         xn = symbol->xarray = make_array();
  269         xn->vname = symbol->vname;  /* shallow copy */
  270 
  271         /*
  272          * Avoid using assoc_lookup(xn, subs) which may lead
  273          * to infinite recursion.
  274          */
  275 
  276         if (is_integer(xn, subs))
  277             xn->array_funcs = & int_array_func;
  278         else
  279             xn->array_funcs = & str_array_func;
  280         xn->flags |= XARRAY;
  281     }
  282     return xn->alookup(xn, subs);
  283 }
  284 
  285 
  286 /* cint_exists --- test whether an index is in the array or not. */
  287 
  288 static NODE **
  289 cint_exists(NODE *symbol, NODE *subs)
  290 {
  291     NODE *xn;
  292 
  293     if (ISUINT(symbol, subs)) {
  294         long k = subs->numbr;
  295         NODE **lhs;
  296         if ((lhs = cint_find(symbol, k, cint_hash(k))) != NULL)
  297             return lhs;
  298     }
  299     if ((xn = symbol->xarray) == NULL)
  300         return NULL;
  301     return xn->aexists(xn, subs);
  302 }
  303 
  304 
  305 /* cint_clear --- flush all the values in symbol[] */
  306 
  307 static NODE **
  308 cint_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
  309 {
  310     size_t i;
  311     NODE *tn;
  312 
  313     assert(symbol->nodes != NULL);
  314 
  315     if (symbol->xarray != NULL) {
  316         NODE *xn = symbol->xarray;
  317         assoc_clear(xn);
  318         freenode(xn);
  319         symbol->xarray = NULL;
  320     }
  321 
  322     for (i = NHAT; i < INT32_BIT; i++) {
  323         tn = symbol->nodes[i];
  324         if (tn != NULL) {
  325             tree_clear(tn);
  326             freenode(tn);
  327         }
  328     }
  329 
  330     efree(symbol->nodes);
  331     symbol->ainit(symbol, NULL);    /* re-initialize symbol */
  332     return NULL;
  333 }
  334 
  335 
  336 /* cint_remove --- remove an index from the array */
  337 
  338 static NODE **
  339 cint_remove(NODE *symbol, NODE *subs)
  340 {
  341     long k;
  342     int h1;
  343     NODE *tn, *xn = symbol->xarray;
  344 
  345     if (symbol->table_size == 0)
  346         return NULL;
  347 
  348     if (! ISUINT(symbol, subs))
  349         goto xremove;
  350 
  351     assert(symbol->nodes != NULL);
  352 
  353     k = subs->numbr;
  354     h1 = cint_hash(k);
  355     tn = symbol->nodes[h1];
  356     if (tn == NULL || ! tree_remove(symbol, tn, k))
  357         goto xremove;
  358 
  359     if (tn->table_size == 0) {
  360         freenode(tn);
  361         symbol->nodes[h1] = NULL;
  362     }
  363 
  364     symbol->table_size--;
  365 
  366     if (xn == NULL && symbol->table_size == 0) {
  367         efree(symbol->nodes);
  368         symbol->ainit(symbol, NULL);    /* re-initialize array 'symbol' */
  369     } else if(xn != NULL && symbol->table_size == xn->table_size) {
  370         /* promote xn to symbol */
  371 
  372         xn->flags &= ~XARRAY;
  373         xn->parent_array = symbol->parent_array;
  374         efree(symbol->nodes);
  375         *symbol = *xn;
  376         freenode(xn);
  377     }
  378 
  379     return & success_node;
  380 
  381 xremove:
  382     xn = symbol->xarray;
  383     if (xn == NULL || xn->aremove(xn, subs) == NULL)
  384         return NULL;
  385     if (xn->table_size == 0) {
  386         freenode(xn);
  387         symbol->xarray = NULL;
  388     }
  389     symbol->table_size--;
  390     assert(symbol->table_size > 0);
  391 
  392     return & success_node;
  393 }
  394 
  395 
  396 /* cint_copy --- duplicate input array "symbol" */
  397 
  398 static NODE **
  399 cint_copy(NODE *symbol, NODE *newsymb)
  400 {
  401     NODE **old, **new;
  402     size_t i;
  403 
  404     assert(symbol->nodes != NULL);
  405 
  406     /* allocate new table */
  407     ezalloc(new, NODE **, INT32_BIT * sizeof(NODE *), "cint_copy");
  408 
  409     old = symbol->nodes;
  410     for (i = NHAT; i < INT32_BIT; i++) {
  411         if (old[i] == NULL)
  412             continue;
  413         new[i] = make_node(Node_array_tree);
  414         tree_copy(newsymb, old[i], new[i]);
  415     }
  416 
  417     if (symbol->xarray != NULL) {
  418         NODE *xn, *n;
  419         xn = symbol->xarray;
  420         n = make_array();
  421         n->vname = newsymb->vname;
  422         (void) xn->acopy(xn, n);
  423         newsymb->xarray = n;
  424     } else
  425         newsymb->xarray = NULL;
  426 
  427     newsymb->nodes = new;
  428     newsymb->table_size = symbol->table_size;
  429     newsymb->array_capacity = symbol->array_capacity;
  430     newsymb->flags = symbol->flags;
  431 
  432     return NULL;
  433 }
  434 
  435 
  436 /* cint_list --- return a list of items */
  437 
  438 static NODE**
  439 cint_list(NODE *symbol, NODE *t)
  440 {
  441     NODE **list = NULL;
  442     NODE *tn, *xn;
  443     unsigned long k = 0, num_elems, list_size;
  444     size_t j, ja, jd;
  445     int elem_size = 1;
  446     assoc_kind_t assoc_kind;
  447 
  448     num_elems = symbol->table_size;
  449     if (num_elems == 0)
  450         return NULL;
  451     assoc_kind = (assoc_kind_t) t->flags;
  452     if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
  453         num_elems = 1;
  454 
  455     if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
  456         elem_size = 2;
  457     list_size = num_elems * elem_size;
  458 
  459     if (symbol->xarray != NULL) {
  460         xn = symbol->xarray;
  461         list = xn->alist(xn, t);
  462         assert(list != NULL);
  463         assoc_kind &= ~(AASC|ADESC);
  464         t->flags = (unsigned int) assoc_kind;
  465         if (num_elems == 1 || num_elems == xn->table_size)
  466             return list;
  467         erealloc(list, NODE **, list_size * sizeof(NODE *), "cint_list");
  468         k = elem_size * xn->table_size;
  469     } else
  470         emalloc(list, NODE **, list_size * sizeof(NODE *), "cint_list");
  471 
  472     if ((assoc_kind & AINUM) == 0) {
  473         /* not sorting by "index num" */
  474         assoc_kind &= ~(AASC|ADESC);
  475         t->flags = (unsigned int) assoc_kind;
  476     }
  477 
  478     /* populate it with index in ascending or descending order */
  479 
  480     for (ja = NHAT, jd = INT32_BIT - 1; ja < INT32_BIT && jd >= NHAT; ) {
  481         j = (assoc_kind & ADESC) != 0 ? jd-- : ja++;
  482         tn = symbol->nodes[j];
  483         if (tn == NULL)
  484             continue;
  485         k += tree_list(tn, list + k, assoc_kind);
  486         if (k >= list_size)
  487             return list;
  488     }
  489     return list;
  490 }
  491 
  492 
  493 /* cint_dump --- dump array info */
  494 
  495 static NODE **
  496 cint_dump(NODE *symbol, NODE *ndump)
  497 {
  498     NODE *tn, *xn = NULL;
  499     int indent_level;
  500     size_t i;
  501     long cint_size = 0, xsize = 0;
  502     AWKNUM kb = 0;
  503     extern AWKNUM int_kilobytes(NODE *symbol);
  504     extern AWKNUM str_kilobytes(NODE *symbol);
  505 
  506     indent_level = ndump->alevel;
  507 
  508     if (symbol->xarray != NULL) {
  509         xn = symbol->xarray;
  510         xsize = xn->table_size;
  511     }
  512     cint_size = symbol->table_size - xsize;
  513 
  514     if ((symbol->flags & XARRAY) == 0)
  515         fprintf(output_fp, "%s `%s'\n",
  516             (symbol->parent_array == NULL) ? "array" : "sub-array",
  517             array_vname(symbol));
  518     indent_level++;
  519     indent(indent_level);
  520     fprintf(output_fp, "array_func: cint_array_func\n");
  521     if (symbol->flags != 0) {
  522         indent(indent_level);
  523         fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
  524     }
  525     indent(indent_level);
  526     fprintf(output_fp, "NHAT: %d\n", NHAT);
  527     indent(indent_level);
  528     fprintf(output_fp, "THRESHOLD: %ld\n", THRESHOLD);
  529     indent(indent_level);
  530     fprintf(output_fp, "table_size: %ld (total), %ld (cint), %ld (int + str)\n",
  531                 symbol->table_size, cint_size, xsize);
  532     indent(indent_level);
  533     fprintf(output_fp, "array_capacity: %lu\n", (unsigned long) symbol->array_capacity);
  534     indent(indent_level);
  535     fprintf(output_fp, "Load Factor: %.2g\n", (AWKNUM) cint_size / symbol->array_capacity);
  536 
  537     for (i = NHAT; i < INT32_BIT; i++) {
  538         tn = symbol->nodes[i];
  539         if (tn == NULL)
  540             continue;
  541         /* Node_array_tree  + HAT */
  542         kb += (sizeof(NODE) + tree_kilobytes(tn)) / 1024.0;
  543     }
  544     kb += (INT32_BIT * sizeof(NODE *)) / 1024.0;    /* symbol->nodes */
  545     kb += (symbol->array_capacity * sizeof(NODE *)) / 1024.0;   /* value nodes in Node_array_leaf(s) */
  546     if (xn != NULL) {
  547         if (xn->array_funcs == & int_array_func)
  548             kb += int_kilobytes(xn);
  549         else
  550             kb += str_kilobytes(xn);
  551     }
  552 
  553     indent(indent_level);
  554     fprintf(output_fp, "memory: %.2g kB (total)\n", kb);
  555 
  556     /* dump elements */
  557 
  558     if (ndump->adepth >= 0) {
  559         const char *aname;
  560 
  561         fprintf(output_fp, "\n");
  562         aname = make_aname(symbol);
  563         for (i = NHAT; i < INT32_BIT; i++) {
  564             tn = symbol->nodes[i];
  565             if (tn != NULL)
  566                 tree_info(tn, ndump, aname);
  567         }
  568     }
  569 
  570     if (xn != NULL) {
  571         fprintf(output_fp, "\n");
  572         xn->adump(xn, ndump);
  573     }
  574 
  575 #ifdef ARRAYDEBUG
  576     if (ndump->adepth < -999)
  577         cint_print(symbol);
  578 #endif
  579 
  580     return NULL;
  581 }
  582 
  583 
  584 /* cint_hash --- locate the HAT for a given number 'k' */
  585 
  586 static inline int
  587 cint_hash(long k)
  588 {
  589     uint32_t num, r, shift;
  590 
  591     assert(k >= 0);
  592     if (k == 0)
  593         return NHAT;
  594     num = k;
  595 
  596     /* Find the Floor(log base 2 of 32-bit integer) */
  597 
  598     /*
  599      * Warren Jr., Henry S. (2002). Hacker's Delight.
  600      * Addison Wesley. pp. pp. 215. ISBN 978-0201914658.
  601      *
  602      *  r = 0;
  603      *  if (num >= 1<<16) { num >>= 16; r += 16; }
  604      *  if (num >= 1<< 8) { num >>=  8; r +=  8; }
  605      *  if (num >= 1<< 4) { num >>=  4; r +=  4; }
  606      *  if (num >= 1<< 2) { num >>=  2; r +=  2; }
  607      *  if (num >= 1<< 1) {     r +=  1; }
  608      */
  609 
  610 
  611     /*
  612      * Slightly different code copied from:
  613      *
  614      * http://www-graphics.stanford.edu/~seander/bithacks.html
  615      * Bit Twiddling Hacks
  616      * By Sean Eron Anderson
  617      * seander@cs.stanford.edu
  618      * Individually, the code snippets here are in the public domain
  619      * (unless otherwise noted) --- feel free to use them however you please.
  620      * The aggregate collection and descriptions are (C) 1997-2005
  621      * Sean Eron Anderson. The code and descriptions are distributed in the
  622      * hope that they will be useful, but WITHOUT ANY WARRANTY and without
  623      * even the implied warranty of merchantability or fitness for a particular
  624      * purpose.
  625      *
  626      */
  627 
  628     r = (num > 0xFFFF) << 4; num >>= r;
  629     shift = (num > 0xFF) << 3; num >>= shift; r |= shift;
  630     shift = (num > 0x0F) << 2; num >>= shift; r |= shift;
  631     shift = (num > 0x03) << 1; num >>= shift; r |= shift;
  632     r |= (num >> 1);
  633 
  634     /* We use a single HAT for 0 <= num < 2^NHAT */
  635     if (r < NHAT)
  636         return NHAT;
  637 
  638     return (1 + r);
  639 }
  640 
  641 
  642 /* cint_find --- locate the integer subscript */
  643 
  644 static inline NODE **
  645 cint_find(NODE *symbol, long k, int h1)
  646 {
  647     NODE *tn;
  648 
  649     if (symbol->nodes == NULL || (tn = symbol->nodes[h1]) == NULL)
  650         return NULL;
  651     return tree_exists(tn, k);
  652 }
  653 
  654 
  655 #ifdef ARRAYDEBUG
  656 
  657 /* cint_print --- print structural info */
  658 
  659 static void
  660 cint_print(NODE *symbol)
  661 {
  662     NODE *tn;
  663     size_t i;
  664 
  665     fprintf(output_fp, "I[%4lu:%-4lu]\n", (unsigned long) INT32_BIT,
  666                 (unsigned long) symbol->table_size);
  667     for (i = NHAT; i < INT32_BIT; i++) {
  668         tn = symbol->nodes[i];
  669         if (tn == NULL)
  670             continue;
  671         tree_print(tn, i, 1);
  672     }
  673 }
  674 
  675 #endif
  676 
  677 
  678 /*------------------------ Hashed Array Trees -----------------------------*/
  679 
  680 /*
  681  * HATs: Hashed Array Trees
  682  * Fast variable-length arrays
  683  * Edward Sitarski
  684  * http://www.drdobbs.com/architecture-and-design/184409965
  685  *
  686  *  HAT has a top-level array containing a power of two
  687  *  number of leaf arrays. All leaf arrays are the same size as the
  688  *  top-level array. A full HAT can hold n^2 elements,
  689  *  where n (some power of 2) is the size of each leaf array.
  690  *  [i/n][i & (n - 1)] locates the `i th' element in a HAT.
  691  *
  692  */
  693 
  694 /*
  695  *  A half HAT is defined here as a HAT with a top-level array of size n^2/2
  696  *  and holds the first n^2/2 elements.
  697  *
  698  *   1. 2^8 elements can be stored in a full HAT of size 2^4.
  699  *   2. 2^9 elements can be stored in a half HAT of size 2^5.
  700  *   3. When the number of elements is some power of 2, it
  701  *      can be stored in a full or a half HAT.
  702  *   4. When the number of elements is some power of 2, it
  703  *      can be stored in a HAT (full or half) with HATs as leaf elements
  704  *      (full or half),  and so on (e.g. 2^8 elements in a HAT of size 2^4 (top-level
  705  *      array dimension) with each leaf array being a HAT of size 2^2).
  706  *
  707  *  IMPLEMENTATION DETAILS:
  708  *    1. A HAT of 2^12 elements needs 2^6 house-keeping NODEs
  709  *       of Node_array_leaf.
  710  *
  711  *    2. A HAT of HATS of 2^12 elements needs
  712  *       2^6 * (1 Node_array_tree + 2^3 Node_array_leaf)
  713  *       ~ 2^9 house-keeping NODEs.
  714  *
  715  *    3. When a leaf array (or leaf HAT) becomes empty, the memory
  716  *       is deallocated, and when there is no leaf array (or leaf HAT) left,
  717  *       the HAT is deleted.
  718  *
  719  *    4. A HAT stores the base (first) element, and locates the leaf array/HAT
  720  *       for the `i th' element using integer division
  721  *       (i - base)/n where n is the size of the top-level array.
  722  *
  723  */
  724 
  725 /* make_node --- initialize a NODE */
  726 
  727 static inline NODE *
  728 make_node(NODETYPE type)
  729 {
  730     NODE *n;
  731     getnode(n);
  732     memset(n, '\0', sizeof(NODE));
  733     n->type = type;
  734     return n;
  735 }
  736 
  737 
  738 /* tree_lookup --- Find an integer subscript in a HAT; Install it if it isn't there */
  739 
  740 static NODE **
  741 tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base)
  742 {
  743     NODE **lhs;
  744     NODE *tn;
  745     int i, n;
  746     size_t size;
  747     long num = k;
  748 
  749     /*
  750      * HAT size (size of Top & Leaf array) = 2^n
  751      * where n = Floor ((m + 1)/2). For an odd value of m,
  752      * only the first half of the HAT is needed.
  753      */
  754 
  755     n = (m + 1) / 2;
  756 
  757     if (tree->table_size == 0) {
  758         size_t actual_size;
  759         NODE **table;
  760 
  761         assert(tree->nodes == NULL);
  762 
  763         /* initialize top-level array */
  764         size = actual_size = power_two_table[n];
  765         tree->array_base = base;
  766         tree->array_size = size;
  767         tree->table_size = 0;   /* # of elements in the array */
  768         if (n > m/2) {
  769             /* only first half of the array used */
  770             actual_size /= 2;
  771             tree->flags |= HALFHAT;
  772         }
  773         ezalloc(table, NODE **, actual_size * sizeof(NODE *), "tree_lookup");
  774         tree->nodes = table;
  775     } else
  776         size = tree->array_size;
  777 
  778     num -= tree->array_base;
  779     i = num / size; /* top-level array index */
  780     assert(i >= 0);
  781 
  782     if ((lhs = tree_find(tree, k, i)) != NULL)
  783         return lhs;
  784 
  785     /* It's not there, install it */
  786 
  787     tree->table_size++;
  788     base += (size * i);
  789     tn = tree->nodes[i];
  790     if (n > NHAT) {
  791         if (tn == NULL)
  792             tn = tree->nodes[i] = make_node(Node_array_tree);
  793         return tree_lookup(symbol, tn, k, n, base);
  794     } else {
  795         if (tn == NULL)
  796             tn = tree->nodes[i] = make_node(Node_array_leaf);
  797         return leaf_lookup(symbol, tn, k, size, base);
  798     }
  799 }
  800 
  801 
  802 /* tree_exists --- test whether integer subscript `k' exists or not */
  803 
  804 static NODE **
  805 tree_exists(NODE *tree, long k)
  806 {
  807     int i;
  808     NODE *tn;
  809 
  810     i = (k - tree->array_base) / tree->array_size;
  811     assert(i >= 0);
  812     tn = tree->nodes[i];
  813     if (tn == NULL)
  814         return NULL;
  815     if (tn->type == Node_array_tree)
  816         return tree_exists(tn, k);
  817     return leaf_exists(tn, k);
  818 }
  819 
  820 /* tree_clear --- flush all the values */
  821 
  822 static void
  823 tree_clear(NODE *tree)
  824 {
  825     NODE *tn;
  826     size_t  j, hsize;
  827 
  828     hsize = tree->array_size;
  829     if ((tree->flags & HALFHAT) != 0)
  830         hsize /= 2;
  831 
  832     for (j = 0; j < hsize; j++) {
  833         tn = tree->nodes[j];
  834         if (tn == NULL)
  835             continue;
  836         if (tn->type == Node_array_tree)
  837             tree_clear(tn);
  838         else
  839             leaf_clear(tn);
  840         freenode(tn);
  841     }
  842 
  843     efree(tree->nodes);
  844     memset(tree, '\0', sizeof(NODE));
  845     tree->type = Node_array_tree;
  846 }
  847 
  848 
  849 /* tree_remove --- If the integer subscript is in the HAT, remove it */
  850 
  851 static int
  852 tree_remove(NODE *symbol, NODE *tree, long k)
  853 {
  854     int i;
  855     NODE *tn;
  856 
  857     i = (k - tree->array_base) / tree->array_size;
  858     assert(i >= 0);
  859     tn = tree->nodes[i];
  860     if (tn == NULL)
  861         return false;
  862 
  863     if (tn->type == Node_array_tree
  864             && ! tree_remove(symbol, tn, k))
  865         return false;
  866     else if (tn->type == Node_array_leaf
  867             && ! leaf_remove(symbol, tn, k))
  868         return false;
  869 
  870     if (tn->table_size == 0) {
  871         freenode(tn);
  872         tree->nodes[i] = NULL;
  873     }
  874 
  875     /* one less item in array */
  876     if (--tree->table_size == 0) {
  877         efree(tree->nodes);
  878         memset(tree, '\0', sizeof(NODE));
  879         tree->type = Node_array_tree;
  880     }
  881     return true;
  882 }
  883 
  884 
  885 /* tree_find --- locate an interger subscript in the HAT */
  886 
  887 static inline NODE **
  888 tree_find(NODE *tree, long k, int i)
  889 {
  890     NODE *tn;
  891 
  892     assert(tree->nodes != NULL);
  893     tn = tree->nodes[i];
  894     if (tn != NULL) {
  895         if (tn->type == Node_array_tree)
  896             return tree_exists(tn, k);
  897         return leaf_exists(tn, k);
  898     }
  899     return NULL;
  900 }
  901 
  902 
  903 /* tree_list --- return a list of items in the HAT */
  904 
  905 static long
  906 tree_list(NODE *tree, NODE **list, assoc_kind_t assoc_kind)
  907 {
  908     NODE *tn;
  909     size_t j, cj, hsize;
  910     long k = 0;
  911 
  912     assert(list != NULL);
  913 
  914     hsize = tree->array_size;
  915     if ((tree->flags & HALFHAT) != 0)
  916         hsize /= 2;
  917 
  918     for (j = 0; j < hsize; j++) {
  919         cj = (assoc_kind & ADESC) != 0 ? (hsize - 1 - j) : j;
  920         tn = tree->nodes[cj];
  921         if (tn == NULL)
  922             continue;
  923         if (tn->type == Node_array_tree)
  924             k += tree_list(tn, list + k, assoc_kind);
  925         else
  926             k += leaf_list(tn, list + k, assoc_kind);
  927         if ((assoc_kind & ADELETE) != 0 && k >= 1)
  928             return k;
  929     }
  930     return k;
  931 }
  932 
  933 
  934 /* tree_copy --- duplicate a HAT */
  935 
  936 static void
  937 tree_copy(NODE *newsymb, NODE *tree, NODE *newtree)
  938 {
  939     NODE **old, **new;
  940     size_t j, hsize;
  941 
  942     hsize = tree->array_size;
  943     if ((tree->flags & HALFHAT) != 0)
  944         hsize /= 2;
  945 
  946     ezalloc(new, NODE **, hsize * sizeof(NODE *), "tree_copy");
  947     newtree->nodes = new;
  948     newtree->array_base = tree->array_base;
  949     newtree->array_size = tree->array_size;
  950     newtree->table_size = tree->table_size;
  951     newtree->flags = tree->flags;
  952 
  953     old = tree->nodes;
  954     for (j = 0; j < hsize; j++) {
  955         if (old[j] == NULL)
  956             continue;
  957         if (old[j]->type == Node_array_tree) {
  958             new[j] = make_node(Node_array_tree);
  959             tree_copy(newsymb, old[j], new[j]);
  960         } else {
  961             new[j] = make_node(Node_array_leaf);
  962             leaf_copy(newsymb, old[j], new[j]);
  963         }
  964     }
  965 }
  966 
  967 
  968 /* tree_info --- print index, value info */
  969 
  970 static void
  971 tree_info(NODE *tree, NODE *ndump, const char *aname)
  972 {
  973     NODE *tn;
  974     size_t j, hsize;
  975 
  976     hsize = tree->array_size;
  977     if ((tree->flags & HALFHAT) != 0)
  978         hsize /= 2;
  979 
  980     for (j = 0; j < hsize; j++) {
  981         tn = tree->nodes[j];
  982         if (tn == NULL)
  983             continue;
  984         if (tn->type == Node_array_tree)
  985             tree_info(tn, ndump, aname);
  986         else
  987             leaf_info(tn, ndump, aname);
  988     }
  989 }
  990 
  991 
  992 /* tree_kilobytes --- calculate memory consumption of a HAT */
  993 
  994 static size_t
  995 tree_kilobytes(NODE *tree)
  996 {
  997     NODE *tn;
  998     size_t j, hsize;
  999     size_t sz = 0;
 1000 
 1001     hsize = tree->array_size;
 1002     if ((tree->flags & HALFHAT) != 0)
 1003         hsize /= 2;
 1004     for (j = 0; j < hsize; j++) {
 1005         tn = tree->nodes[j];
 1006         if (tn == NULL)
 1007             continue;
 1008         sz += sizeof(NODE); /* Node_array_tree or Node_array_leaf */
 1009         if (tn->type == Node_array_tree)
 1010             sz += tree_kilobytes(tn);
 1011     }
 1012     sz += hsize * sizeof(NODE *);   /* tree->nodes */
 1013     return sz;
 1014 }
 1015 
 1016 #ifdef ARRAYDEBUG
 1017 
 1018 /* tree_print --- print the HAT structures */
 1019 
 1020 static void
 1021 tree_print(NODE *tree, size_t bi, int indent_level)
 1022 {
 1023     NODE *tn;
 1024     size_t j, hsize;
 1025 
 1026     indent(indent_level);
 1027 
 1028     hsize = tree->array_size;
 1029     if ((tree->flags & HALFHAT) != 0)
 1030         hsize /= 2;
 1031     fprintf(output_fp, "%4lu:%s[%4lu:%-4lu]\n",
 1032             (unsigned long) bi,
 1033             (tree->flags & HALFHAT) != 0 ? "HH" : "H",
 1034             (unsigned long) hsize, (unsigned long) tree->table_size);
 1035 
 1036     for (j = 0; j < hsize; j++) {
 1037         tn = tree->nodes[j];
 1038         if (tn == NULL)
 1039             continue;
 1040         if (tn->type == Node_array_tree)
 1041             tree_print(tn, j, indent_level + 1);
 1042         else
 1043             leaf_print(tn, j, indent_level + 1);
 1044     }
 1045 }
 1046 #endif
 1047 
 1048 /*--------------------- leaf (linear 1-D) array --------------------*/
 1049 
 1050 /*
 1051  * leaf_lookup --- find an integer subscript in the array; Install it if
 1052  *  it isn't there.
 1053  */
 1054 
 1055 static inline NODE **
 1056 leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base)
 1057 {
 1058     NODE **lhs;
 1059 
 1060     if (array->nodes == NULL) {
 1061         array->table_size = 0;  /* sanity */
 1062         array->array_size = size;
 1063         array->array_base = base;
 1064         ezalloc(array->nodes, NODE **, size * sizeof(NODE *), "leaf_lookup");
 1065         symbol->array_capacity += size;
 1066     }
 1067 
 1068     lhs = array->nodes + (k - base); /* leaf element */
 1069     if (*lhs == NULL) {
 1070         array->table_size++;    /* one more element in leaf array */
 1071         *lhs = dupnode(Nnull_string);
 1072     }
 1073     return lhs;
 1074 }
 1075 
 1076 
 1077 /* leaf_exists --- check if the array contains an integer subscript */
 1078 
 1079 static inline NODE **
 1080 leaf_exists(NODE *array, long k)
 1081 {
 1082     NODE **lhs;
 1083     lhs = array->nodes + (k - array->array_base);
 1084     return (*lhs != NULL) ? lhs : NULL;
 1085 }
 1086 
 1087 
 1088 /* leaf_clear --- flush all values in the array */
 1089 
 1090 static void
 1091 leaf_clear(NODE *array)
 1092 {
 1093     long i, size = array->array_size;
 1094     NODE *r;
 1095 
 1096     for (i = 0; i < size; i++) {
 1097         r = array->nodes[i];
 1098         if (r == NULL)
 1099             continue;
 1100         if (r->type == Node_var_array) {
 1101             assoc_clear(r);     /* recursively clear all sub-arrays */
 1102             efree(r->vname);
 1103             freenode(r);
 1104         } else
 1105             unref(r);
 1106     }
 1107     efree(array->nodes);
 1108     array->nodes = NULL;
 1109     array->array_size = array->table_size = 0;
 1110 }
 1111 
 1112 
 1113 /* leaf_remove --- remove an integer subscript from the array */
 1114 
 1115 static int
 1116 leaf_remove(NODE *symbol, NODE *array, long k)
 1117 {
 1118     NODE **lhs;
 1119 
 1120     lhs = array->nodes + (k - array->array_base);
 1121     if (*lhs == NULL)
 1122         return false;
 1123     *lhs = NULL;
 1124     if (--array->table_size == 0) {
 1125         efree(array->nodes);
 1126         array->nodes = NULL;
 1127         symbol->array_capacity -= array->array_size;
 1128         array->array_size = 0;  /* sanity */
 1129     }
 1130     return true;
 1131 }
 1132 
 1133 
 1134 /* leaf_copy --- duplicate a leaf array */
 1135 
 1136 static void
 1137 leaf_copy(NODE *newsymb, NODE *array, NODE *newarray)
 1138 {
 1139     NODE **old, **new;
 1140     long size, i;
 1141 
 1142     size = array->array_size;
 1143     ezalloc(new, NODE **, size * sizeof(NODE *), "leaf_copy");
 1144     newarray->nodes = new;
 1145     newarray->array_size = size;
 1146     newarray->array_base = array->array_base;
 1147     newarray->flags = array->flags;
 1148     newarray->table_size = array->table_size;
 1149 
 1150     old = array->nodes;
 1151     for (i = 0; i < size; i++) {
 1152         if (old[i] == NULL)
 1153             continue;
 1154         if (old[i]->type == Node_val)
 1155             new[i] = dupnode(old[i]);
 1156         else {
 1157             NODE *r;
 1158             r = make_array();
 1159             r->vname = estrdup(old[i]->vname, strlen(old[i]->vname));
 1160             r->parent_array = newsymb;
 1161             new[i] = assoc_copy(old[i], r);
 1162         }
 1163     }
 1164 }
 1165 
 1166 
 1167 /* leaf_list --- return a list of items */
 1168 
 1169 static long
 1170 leaf_list(NODE *array, NODE **list, assoc_kind_t assoc_kind)
 1171 {
 1172     NODE *r, *subs;
 1173     long num, i, ci, k = 0;
 1174     long size = array->array_size;
 1175     static char buf[100];
 1176 
 1177     for (i = 0; i < size; i++) {
 1178         ci = (assoc_kind & ADESC) != 0 ? (size - 1 - i) : i;
 1179         r = array->nodes[ci];
 1180         if (r == NULL)
 1181             continue;
 1182 
 1183         /* index */
 1184         num = array->array_base + ci;
 1185         if ((assoc_kind & AISTR) != 0) {
 1186             sprintf(buf, "%ld", num);
 1187             subs = make_string(buf, strlen(buf));
 1188             subs->numbr = num;
 1189             subs->flags |= (NUMCUR|NUMINT);
 1190         } else {
 1191             subs = make_number((AWKNUM) num);
 1192             subs->flags |= (INTIND|NUMINT);
 1193         }
 1194         list[k++] = subs;
 1195 
 1196         /* value */
 1197         if ((assoc_kind & AVALUE) != 0) {
 1198             if (r->type == Node_val) {
 1199                 if ((assoc_kind & AVNUM) != 0)
 1200                     (void) force_number(r);
 1201                 else if ((assoc_kind & AVSTR) != 0)
 1202                     r = force_string(r);
 1203             }
 1204             list[k++] = r;
 1205         }
 1206         if ((assoc_kind & ADELETE) != 0 && k >= 1)
 1207             return k;
 1208     }
 1209 
 1210     return k;
 1211 }
 1212 
 1213 
 1214 /* leaf_info --- print index, value info */
 1215 
 1216 static void
 1217 leaf_info(NODE *array, NODE *ndump, const char *aname)
 1218 {
 1219     NODE *subs, *val;
 1220     size_t i, size;
 1221 
 1222     size = array->array_size;
 1223 
 1224     subs = make_number((AWKNUM) 0.0);
 1225     subs->flags |= (INTIND|NUMINT);
 1226     for (i = 0; i < size; i++) {
 1227         val = array->nodes[i];
 1228         if (val == NULL)
 1229             continue;
 1230         subs->numbr = array->array_base + i;
 1231         assoc_info(subs, val, ndump, aname);
 1232     }
 1233     unref(subs);
 1234 }
 1235 
 1236 #ifdef ARRAYDEBUG
 1237 
 1238 /* leaf_print --- print the leaf-array structure */
 1239 
 1240 
 1241 static void
 1242 leaf_print(NODE *array, size_t bi, int indent_level)
 1243 {
 1244     indent(indent_level);
 1245     fprintf(output_fp, "%4lu:L[%4lu:%-4lu]\n",
 1246             (unsigned long) bi,
 1247             (unsigned long) array->array_size,
 1248             (unsigned long) array->table_size);
 1249 }
 1250 #endif
 1251 
 1252 static NODE *argv_shadow_array = NULL;
 1253 
 1254 /* argv_store --- post assign function for ARGV in sandbox mode */
 1255 
 1256 static NODE **
 1257 argv_store(NODE *symbol, NODE *subs)
 1258 {
 1259     NODE **val = cint_exists(symbol, subs);
 1260     NODE *newval = *val;
 1261     char *cp;
 1262 
 1263     if (newval->stlen == 0) // empty strings in ARGV are OK
 1264         return val;
 1265 
 1266     if ((cp = strchr(newval->stptr, '=')) == NULL) {
 1267         if (! in_array(argv_shadow_array, newval))
 1268             fatal(_("cannot add a new file (%.*s) to ARGV in sandbox mode"),
 1269                 (int) newval->stlen, newval->stptr);
 1270     } else {
 1271         // check if it's a valid variable assignment
 1272         bool badvar = false;
 1273         char *arg = newval->stptr;
 1274         char *cp2;
 1275 
 1276         *cp = '\0'; // temporarily
 1277 
 1278         if (! is_letter((unsigned char) arg[0]))
 1279             badvar = true;
 1280         else
 1281             for (cp2 = arg+1; *cp2; cp2++)
 1282                 if (! is_identchar((unsigned char) *cp2) && *cp2 != ':') {
 1283                     badvar = true;
 1284                     break;
 1285                 }
 1286 
 1287         // further checks
 1288         if (! badvar) {
 1289             char *cp = strchr(arg, ':');
 1290             if (cp && (cp[1] != ':' || strchr(cp + 2, ':') != NULL))
 1291                 badvar = true;
 1292         }
 1293         *cp = '=';  // restore the '='
 1294 
 1295         if (badvar && ! in_array(argv_shadow_array, newval))
 1296             fatal(_("cannot add a new file (%.*s) to ARGV in sandbox mode"),
 1297                 (int) newval->stlen, newval->stptr);
 1298 
 1299         // otherwise, badvar is false, let it through as variable assignment
 1300     }
 1301     return val;
 1302 }
 1303 
 1304 /* init_argv_array --- set up the pointers for ARGV in sandbox mode. A bit hacky. */
 1305 
 1306 void
 1307 init_argv_array(NODE *argv_node, NODE *shadow_node)
 1308 {
 1309     /* If POSIX simply don't reset the vtable and things work as before */
 1310     if (! do_sandbox)
 1311         return;
 1312 
 1313     argv_node->array_funcs = & argv_array_func;
 1314     argv_shadow_array = shadow_node;
 1315 }